xref: /haiku/src/system/kernel/vm/vm.cpp (revision d123849688fadac4a8cebe5925c034fdfff971db)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, kernel, &secondArea,
811 			NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
925 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
926 	bool kernel, VMArea** _area, void** _virtualAddress)
927 {
928 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
929 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
930 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
931 		addressRestrictions->address, offset, size,
932 		addressRestrictions->address_specification, wiring, protection,
933 		_area, areaName));
934 	cache->AssertLocked();
935 
936 	if (size == 0) {
937 #if KDEBUG
938 		panic("map_backing_store(): called with size=0 for area '%s'!",
939 			areaName);
940 #endif
941 		return B_BAD_VALUE;
942 	}
943 
944 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
945 		| HEAP_DONT_LOCK_KERNEL_SPACE;
946 	int priority;
947 	if (addressSpace != VMAddressSpace::Kernel()) {
948 		priority = VM_PRIORITY_USER;
949 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
950 		priority = VM_PRIORITY_VIP;
951 		allocationFlags |= HEAP_PRIORITY_VIP;
952 	} else
953 		priority = VM_PRIORITY_SYSTEM;
954 
955 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
956 		allocationFlags);
957 	if (area == NULL)
958 		return B_NO_MEMORY;
959 
960 	status_t status;
961 
962 	// if this is a private map, we need to create a new cache
963 	// to handle the private copies of pages as they are written to
964 	VMCache* sourceCache = cache;
965 	if (mapping == REGION_PRIVATE_MAP) {
966 		VMCache* newCache;
967 
968 		// create an anonymous cache
969 		status = VMCacheFactory::CreateAnonymousCache(newCache,
970 			(protection & B_STACK_AREA) != 0
971 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
972 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
973 		if (status != B_OK)
974 			goto err1;
975 
976 		newCache->Lock();
977 		newCache->temporary = 1;
978 		newCache->virtual_base = offset;
979 		newCache->virtual_end = offset + size;
980 
981 		cache->AddConsumer(newCache);
982 
983 		cache = newCache;
984 	}
985 
986 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
987 		status = cache->SetMinimalCommitment(size, priority);
988 		if (status != B_OK)
989 			goto err2;
990 	}
991 
992 	// check to see if this address space has entered DELETE state
993 	if (addressSpace->IsBeingDeleted()) {
994 		// okay, someone is trying to delete this address space now, so we can't
995 		// insert the area, so back out
996 		status = B_BAD_TEAM_ID;
997 		goto err2;
998 	}
999 
1000 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1001 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1002 		status = unmap_address_range(addressSpace,
1003 			(addr_t)addressRestrictions->address, size, kernel);
1004 		if (status != B_OK)
1005 			goto err2;
1006 	}
1007 
1008 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1009 		allocationFlags, _virtualAddress);
1010 	if (status == B_NO_MEMORY
1011 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1012 		// TODO: At present, there is no way to notify the low_resource monitor
1013 		// that kernel addresss space is fragmented, nor does it check for this
1014 		// automatically. Due to how many locks are held, we cannot wait here
1015 		// for space to be freed up, but it would be good to at least notify
1016 		// that we tried and failed to allocate some amount.
1017 	}
1018 	if (status != B_OK)
1019 		goto err2;
1020 
1021 	// attach the cache to the area
1022 	area->cache = cache;
1023 	area->cache_offset = offset;
1024 
1025 	// point the cache back to the area
1026 	cache->InsertAreaLocked(area);
1027 	if (mapping == REGION_PRIVATE_MAP)
1028 		cache->Unlock();
1029 
1030 	// insert the area in the global area hash table
1031 	VMAreaHash::Insert(area);
1032 
1033 	// grab a ref to the address space (the area holds this)
1034 	addressSpace->Get();
1035 
1036 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1037 //		cache, sourceCache, areaName, area);
1038 
1039 	*_area = area;
1040 	return B_OK;
1041 
1042 err2:
1043 	if (mapping == REGION_PRIVATE_MAP) {
1044 		// We created this cache, so we must delete it again. Note, that we
1045 		// need to temporarily unlock the source cache or we'll otherwise
1046 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1047 		sourceCache->Unlock();
1048 		cache->ReleaseRefAndUnlock();
1049 		sourceCache->Lock();
1050 	}
1051 err1:
1052 	addressSpace->DeleteArea(area, allocationFlags);
1053 	return status;
1054 }
1055 
1056 
1057 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1058 	  locker1, locker2).
1059 */
1060 template<typename LockerType1, typename LockerType2>
1061 static inline bool
1062 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1063 {
1064 	area->cache->AssertLocked();
1065 
1066 	VMAreaUnwiredWaiter waiter;
1067 	if (!area->AddWaiterIfWired(&waiter))
1068 		return false;
1069 
1070 	// unlock everything and wait
1071 	if (locker1 != NULL)
1072 		locker1->Unlock();
1073 	if (locker2 != NULL)
1074 		locker2->Unlock();
1075 
1076 	waiter.waitEntry.Wait();
1077 
1078 	return true;
1079 }
1080 
1081 
1082 /*!	Checks whether the given area has any wired ranges intersecting with the
1083 	specified range and waits, if so.
1084 
1085 	When it has to wait, the function calls \c Unlock() on both \a locker1
1086 	and \a locker2, if given.
1087 	The area's top cache must be locked and must be unlocked as a side effect
1088 	of calling \c Unlock() on either \a locker1 or \a locker2.
1089 
1090 	If the function does not have to wait it does not modify or unlock any
1091 	object.
1092 
1093 	\param area The area to be checked.
1094 	\param base The base address of the range to check.
1095 	\param size The size of the address range to check.
1096 	\param locker1 An object to be unlocked when before starting to wait (may
1097 		be \c NULL).
1098 	\param locker2 An object to be unlocked when before starting to wait (may
1099 		be \c NULL).
1100 	\return \c true, if the function had to wait, \c false otherwise.
1101 */
1102 template<typename LockerType1, typename LockerType2>
1103 static inline bool
1104 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1105 	LockerType1* locker1, LockerType2* locker2)
1106 {
1107 	area->cache->AssertLocked();
1108 
1109 	VMAreaUnwiredWaiter waiter;
1110 	if (!area->AddWaiterIfWired(&waiter, base, size))
1111 		return false;
1112 
1113 	// unlock everything and wait
1114 	if (locker1 != NULL)
1115 		locker1->Unlock();
1116 	if (locker2 != NULL)
1117 		locker2->Unlock();
1118 
1119 	waiter.waitEntry.Wait();
1120 
1121 	return true;
1122 }
1123 
1124 
1125 /*!	Checks whether the given address space has any wired ranges intersecting
1126 	with the specified range and waits, if so.
1127 
1128 	Similar to wait_if_area_range_is_wired(), with the following differences:
1129 	- All areas intersecting with the range are checked (respectively all until
1130 	  one is found that contains a wired range intersecting with the given
1131 	  range).
1132 	- The given address space must at least be read-locked and must be unlocked
1133 	  when \c Unlock() is called on \a locker.
1134 	- None of the areas' caches are allowed to be locked.
1135 */
1136 template<typename LockerType>
1137 static inline bool
1138 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1139 	size_t size, LockerType* locker)
1140 {
1141 	for (VMAddressSpace::AreaRangeIterator it
1142 		= addressSpace->GetAreaRangeIterator(base, size);
1143 			VMArea* area = it.Next();) {
1144 
1145 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1146 
1147 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1148 			return true;
1149 	}
1150 
1151 	return false;
1152 }
1153 
1154 
1155 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1156 	It must be called in a situation where the kernel address space may be
1157 	locked.
1158 */
1159 status_t
1160 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1161 {
1162 	AddressSpaceReadLocker locker;
1163 	VMArea* area;
1164 	status_t status = locker.SetFromArea(id, area);
1165 	if (status != B_OK)
1166 		return status;
1167 
1168 	if (area->page_protections == NULL) {
1169 		status = allocate_area_page_protections(area);
1170 		if (status != B_OK)
1171 			return status;
1172 	}
1173 
1174 	*cookie = (void*)area;
1175 	return B_OK;
1176 }
1177 
1178 
1179 /*!	This is a debug helper function that can only be used with very specific
1180 	use cases.
1181 	Sets protection for the given address range to the protection specified.
1182 	If \a protection is 0 then the involved pages will be marked non-present
1183 	in the translation map to cause a fault on access. The pages aren't
1184 	actually unmapped however so that they can be marked present again with
1185 	additional calls to this function. For this to work the area must be
1186 	fully locked in memory so that the pages aren't otherwise touched.
1187 	This function does not lock the kernel address space and needs to be
1188 	supplied with a \a cookie retrieved from a successful call to
1189 	vm_prepare_kernel_area_debug_protection().
1190 */
1191 status_t
1192 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1193 	uint32 protection)
1194 {
1195 	// check address range
1196 	addr_t address = (addr_t)_address;
1197 	size = PAGE_ALIGN(size);
1198 
1199 	if ((address % B_PAGE_SIZE) != 0
1200 		|| (addr_t)address + size < (addr_t)address
1201 		|| !IS_KERNEL_ADDRESS(address)
1202 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1203 		return B_BAD_VALUE;
1204 	}
1205 
1206 	// Translate the kernel protection to user protection as we only store that.
1207 	if ((protection & B_KERNEL_READ_AREA) != 0)
1208 		protection |= B_READ_AREA;
1209 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1210 		protection |= B_WRITE_AREA;
1211 
1212 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1213 	VMTranslationMap* map = addressSpace->TranslationMap();
1214 	VMArea* area = (VMArea*)cookie;
1215 
1216 	addr_t offset = address - area->Base();
1217 	if (area->Size() - offset < size) {
1218 		panic("protect range not fully within supplied area");
1219 		return B_BAD_VALUE;
1220 	}
1221 
1222 	if (area->page_protections == NULL) {
1223 		panic("area has no page protections");
1224 		return B_BAD_VALUE;
1225 	}
1226 
1227 	// Invalidate the mapping entries so any access to them will fault or
1228 	// restore the mapping entries unchanged so that lookup will success again.
1229 	map->Lock();
1230 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1231 	map->Unlock();
1232 
1233 	// And set the proper page protections so that the fault case will actually
1234 	// fail and not simply try to map a new page.
1235 	for (addr_t pageAddress = address; pageAddress < address + size;
1236 			pageAddress += B_PAGE_SIZE) {
1237 		set_area_page_protection(area, pageAddress, protection);
1238 	}
1239 
1240 	return B_OK;
1241 }
1242 
1243 
1244 status_t
1245 vm_block_address_range(const char* name, void* address, addr_t size)
1246 {
1247 	if (!arch_vm_supports_protection(0))
1248 		return B_NOT_SUPPORTED;
1249 
1250 	AddressSpaceWriteLocker locker;
1251 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1252 	if (status != B_OK)
1253 		return status;
1254 
1255 	VMAddressSpace* addressSpace = locker.AddressSpace();
1256 
1257 	// create an anonymous cache
1258 	VMCache* cache;
1259 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1260 		VM_PRIORITY_SYSTEM);
1261 	if (status != B_OK)
1262 		return status;
1263 
1264 	cache->temporary = 1;
1265 	cache->virtual_end = size;
1266 	cache->Lock();
1267 
1268 	VMArea* area;
1269 	virtual_address_restrictions addressRestrictions = {};
1270 	addressRestrictions.address = address;
1271 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1272 	status = map_backing_store(addressSpace, cache, 0, name, size,
1273 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1274 		true, &area, NULL);
1275 	if (status != B_OK) {
1276 		cache->ReleaseRefAndUnlock();
1277 		return status;
1278 	}
1279 
1280 	cache->Unlock();
1281 	area->cache_type = CACHE_TYPE_RAM;
1282 	return area->id;
1283 }
1284 
1285 
1286 status_t
1287 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1288 {
1289 	AddressSpaceWriteLocker locker(team);
1290 	if (!locker.IsLocked())
1291 		return B_BAD_TEAM_ID;
1292 
1293 	VMAddressSpace* addressSpace = locker.AddressSpace();
1294 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1295 		addressSpace == VMAddressSpace::Kernel()
1296 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1297 }
1298 
1299 
1300 status_t
1301 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1302 	addr_t size, uint32 flags)
1303 {
1304 	if (size == 0)
1305 		return B_BAD_VALUE;
1306 
1307 	AddressSpaceWriteLocker locker(team);
1308 	if (!locker.IsLocked())
1309 		return B_BAD_TEAM_ID;
1310 
1311 	virtual_address_restrictions addressRestrictions = {};
1312 	addressRestrictions.address = *_address;
1313 	addressRestrictions.address_specification = addressSpec;
1314 	VMAddressSpace* addressSpace = locker.AddressSpace();
1315 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1316 		addressSpace == VMAddressSpace::Kernel()
1317 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1318 		_address);
1319 }
1320 
1321 
1322 area_id
1323 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1324 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1325 	const virtual_address_restrictions* virtualAddressRestrictions,
1326 	const physical_address_restrictions* physicalAddressRestrictions,
1327 	bool kernel, void** _address)
1328 {
1329 	VMArea* area;
1330 	VMCache* cache;
1331 	vm_page* page = NULL;
1332 	bool isStack = (protection & B_STACK_AREA) != 0;
1333 	page_num_t guardPages;
1334 	bool canOvercommit = false;
1335 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1336 		? VM_PAGE_ALLOC_CLEAR : 0;
1337 
1338 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1339 		team, name, size));
1340 
1341 	size = PAGE_ALIGN(size);
1342 	guardSize = PAGE_ALIGN(guardSize);
1343 	guardPages = guardSize / B_PAGE_SIZE;
1344 
1345 	if (size == 0 || size < guardSize)
1346 		return B_BAD_VALUE;
1347 	if (!arch_vm_supports_protection(protection))
1348 		return B_NOT_SUPPORTED;
1349 
1350 	if (team == B_CURRENT_TEAM)
1351 		team = VMAddressSpace::CurrentID();
1352 	if (team < 0)
1353 		return B_BAD_TEAM_ID;
1354 
1355 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1356 		canOvercommit = true;
1357 
1358 #ifdef DEBUG_KERNEL_STACKS
1359 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1360 		isStack = true;
1361 #endif
1362 
1363 	// check parameters
1364 	switch (virtualAddressRestrictions->address_specification) {
1365 		case B_ANY_ADDRESS:
1366 		case B_EXACT_ADDRESS:
1367 		case B_BASE_ADDRESS:
1368 		case B_ANY_KERNEL_ADDRESS:
1369 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1370 		case B_RANDOMIZED_ANY_ADDRESS:
1371 		case B_RANDOMIZED_BASE_ADDRESS:
1372 			break;
1373 
1374 		default:
1375 			return B_BAD_VALUE;
1376 	}
1377 
1378 	// If low or high physical address restrictions are given, we force
1379 	// B_CONTIGUOUS wiring, since only then we'll use
1380 	// vm_page_allocate_page_run() which deals with those restrictions.
1381 	if (physicalAddressRestrictions->low_address != 0
1382 		|| physicalAddressRestrictions->high_address != 0) {
1383 		wiring = B_CONTIGUOUS;
1384 	}
1385 
1386 	physical_address_restrictions stackPhysicalRestrictions;
1387 	bool doReserveMemory = false;
1388 	switch (wiring) {
1389 		case B_NO_LOCK:
1390 			break;
1391 		case B_FULL_LOCK:
1392 		case B_LAZY_LOCK:
1393 		case B_CONTIGUOUS:
1394 			doReserveMemory = true;
1395 			break;
1396 		case B_ALREADY_WIRED:
1397 			break;
1398 		case B_LOMEM:
1399 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1400 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1401 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1402 			wiring = B_CONTIGUOUS;
1403 			doReserveMemory = true;
1404 			break;
1405 		case B_32_BIT_FULL_LOCK:
1406 			if (B_HAIKU_PHYSICAL_BITS <= 32
1407 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1408 				wiring = B_FULL_LOCK;
1409 				doReserveMemory = true;
1410 				break;
1411 			}
1412 			// TODO: We don't really support this mode efficiently. Just fall
1413 			// through for now ...
1414 		case B_32_BIT_CONTIGUOUS:
1415 			#if B_HAIKU_PHYSICAL_BITS > 32
1416 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1417 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1418 					stackPhysicalRestrictions.high_address
1419 						= (phys_addr_t)1 << 32;
1420 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1421 				}
1422 			#endif
1423 			wiring = B_CONTIGUOUS;
1424 			doReserveMemory = true;
1425 			break;
1426 		default:
1427 			return B_BAD_VALUE;
1428 	}
1429 
1430 	// Optimization: For a single-page contiguous allocation without low/high
1431 	// memory restriction B_FULL_LOCK wiring suffices.
1432 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1433 		&& physicalAddressRestrictions->low_address == 0
1434 		&& physicalAddressRestrictions->high_address == 0) {
1435 		wiring = B_FULL_LOCK;
1436 	}
1437 
1438 	// For full lock or contiguous areas we're also going to map the pages and
1439 	// thus need to reserve pages for the mapping backend upfront.
1440 	addr_t reservedMapPages = 0;
1441 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1442 		AddressSpaceWriteLocker locker;
1443 		status_t status = locker.SetTo(team);
1444 		if (status != B_OK)
1445 			return status;
1446 
1447 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1448 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1449 	}
1450 
1451 	int priority;
1452 	if (team != VMAddressSpace::KernelID())
1453 		priority = VM_PRIORITY_USER;
1454 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1455 		priority = VM_PRIORITY_VIP;
1456 	else
1457 		priority = VM_PRIORITY_SYSTEM;
1458 
1459 	// Reserve memory before acquiring the address space lock. This reduces the
1460 	// chances of failure, since while holding the write lock to the address
1461 	// space (if it is the kernel address space that is), the low memory handler
1462 	// won't be able to free anything for us.
1463 	addr_t reservedMemory = 0;
1464 	if (doReserveMemory) {
1465 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1466 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1467 			return B_NO_MEMORY;
1468 		reservedMemory = size;
1469 		// TODO: We don't reserve the memory for the pages for the page
1470 		// directories/tables. We actually need to do since we currently don't
1471 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1472 		// there are actually less physical pages than there should be, which
1473 		// can get the VM into trouble in low memory situations.
1474 	}
1475 
1476 	AddressSpaceWriteLocker locker;
1477 	VMAddressSpace* addressSpace;
1478 	status_t status;
1479 
1480 	// For full lock areas reserve the pages before locking the address
1481 	// space. E.g. block caches can't release their memory while we hold the
1482 	// address space lock.
1483 	page_num_t reservedPages = reservedMapPages;
1484 	if (wiring == B_FULL_LOCK)
1485 		reservedPages += size / B_PAGE_SIZE;
1486 
1487 	vm_page_reservation reservation;
1488 	if (reservedPages > 0) {
1489 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1490 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1491 					priority)) {
1492 				reservedPages = 0;
1493 				status = B_WOULD_BLOCK;
1494 				goto err0;
1495 			}
1496 		} else
1497 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1498 	}
1499 
1500 	if (wiring == B_CONTIGUOUS) {
1501 		// we try to allocate the page run here upfront as this may easily
1502 		// fail for obvious reasons
1503 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1504 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1505 		if (page == NULL) {
1506 			status = B_NO_MEMORY;
1507 			goto err0;
1508 		}
1509 	}
1510 
1511 	// Lock the address space and, if B_EXACT_ADDRESS and
1512 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1513 	// is not wired.
1514 	do {
1515 		status = locker.SetTo(team);
1516 		if (status != B_OK)
1517 			goto err1;
1518 
1519 		addressSpace = locker.AddressSpace();
1520 	} while (virtualAddressRestrictions->address_specification
1521 			== B_EXACT_ADDRESS
1522 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1523 		&& wait_if_address_range_is_wired(addressSpace,
1524 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1525 
1526 	// create an anonymous cache
1527 	// if it's a stack, make sure that two pages are available at least
1528 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1529 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1530 		wiring == B_NO_LOCK, priority);
1531 	if (status != B_OK)
1532 		goto err1;
1533 
1534 	cache->temporary = 1;
1535 	cache->virtual_end = size;
1536 	cache->committed_size = reservedMemory;
1537 		// TODO: This should be done via a method.
1538 	reservedMemory = 0;
1539 
1540 	cache->Lock();
1541 
1542 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1543 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1544 		kernel, &area, _address);
1545 
1546 	if (status != B_OK) {
1547 		cache->ReleaseRefAndUnlock();
1548 		goto err1;
1549 	}
1550 
1551 	locker.DegradeToReadLock();
1552 
1553 	switch (wiring) {
1554 		case B_NO_LOCK:
1555 		case B_LAZY_LOCK:
1556 			// do nothing - the pages are mapped in as needed
1557 			break;
1558 
1559 		case B_FULL_LOCK:
1560 		{
1561 			// Allocate and map all pages for this area
1562 
1563 			off_t offset = 0;
1564 			for (addr_t address = area->Base();
1565 					address < area->Base() + (area->Size() - 1);
1566 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1567 #ifdef DEBUG_KERNEL_STACKS
1568 #	ifdef STACK_GROWS_DOWNWARDS
1569 				if (isStack && address < area->Base()
1570 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1571 #	else
1572 				if (isStack && address >= area->Base() + area->Size()
1573 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1574 #	endif
1575 					continue;
1576 #endif
1577 				vm_page* page = vm_page_allocate_page(&reservation,
1578 					PAGE_STATE_WIRED | pageAllocFlags);
1579 				cache->InsertPage(page, offset);
1580 				map_page(area, page, address, protection, &reservation);
1581 
1582 				DEBUG_PAGE_ACCESS_END(page);
1583 			}
1584 
1585 			break;
1586 		}
1587 
1588 		case B_ALREADY_WIRED:
1589 		{
1590 			// The pages should already be mapped. This is only really useful
1591 			// during boot time. Find the appropriate vm_page objects and stick
1592 			// them in the cache object.
1593 			VMTranslationMap* map = addressSpace->TranslationMap();
1594 			off_t offset = 0;
1595 
1596 			if (!gKernelStartup)
1597 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1598 
1599 			map->Lock();
1600 
1601 			for (addr_t virtualAddress = area->Base();
1602 					virtualAddress < area->Base() + (area->Size() - 1);
1603 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1604 				phys_addr_t physicalAddress;
1605 				uint32 flags;
1606 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1607 				if (status < B_OK) {
1608 					panic("looking up mapping failed for va 0x%lx\n",
1609 						virtualAddress);
1610 				}
1611 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1612 				if (page == NULL) {
1613 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1614 						"\n", physicalAddress);
1615 				}
1616 
1617 				DEBUG_PAGE_ACCESS_START(page);
1618 
1619 				cache->InsertPage(page, offset);
1620 				increment_page_wired_count(page);
1621 				vm_page_set_state(page, PAGE_STATE_WIRED);
1622 				page->busy = false;
1623 
1624 				DEBUG_PAGE_ACCESS_END(page);
1625 			}
1626 
1627 			map->Unlock();
1628 			break;
1629 		}
1630 
1631 		case B_CONTIGUOUS:
1632 		{
1633 			// We have already allocated our continuous pages run, so we can now
1634 			// just map them in the address space
1635 			VMTranslationMap* map = addressSpace->TranslationMap();
1636 			phys_addr_t physicalAddress
1637 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1638 			addr_t virtualAddress = area->Base();
1639 			off_t offset = 0;
1640 
1641 			map->Lock();
1642 
1643 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1644 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1645 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1646 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1647 				if (page == NULL)
1648 					panic("couldn't lookup physical page just allocated\n");
1649 
1650 				status = map->Map(virtualAddress, physicalAddress, protection,
1651 					area->MemoryType(), &reservation);
1652 				if (status < B_OK)
1653 					panic("couldn't map physical page in page run\n");
1654 
1655 				cache->InsertPage(page, offset);
1656 				increment_page_wired_count(page);
1657 
1658 				DEBUG_PAGE_ACCESS_END(page);
1659 			}
1660 
1661 			map->Unlock();
1662 			break;
1663 		}
1664 
1665 		default:
1666 			break;
1667 	}
1668 
1669 	cache->Unlock();
1670 
1671 	if (reservedPages > 0)
1672 		vm_page_unreserve_pages(&reservation);
1673 
1674 	TRACE(("vm_create_anonymous_area: done\n"));
1675 
1676 	area->cache_type = CACHE_TYPE_RAM;
1677 	return area->id;
1678 
1679 err1:
1680 	if (wiring == B_CONTIGUOUS) {
1681 		// we had reserved the area space upfront...
1682 		phys_addr_t pageNumber = page->physical_page_number;
1683 		int32 i;
1684 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1685 			page = vm_lookup_page(pageNumber);
1686 			if (page == NULL)
1687 				panic("couldn't lookup physical page just allocated\n");
1688 
1689 			vm_page_set_state(page, PAGE_STATE_FREE);
1690 		}
1691 	}
1692 
1693 err0:
1694 	if (reservedPages > 0)
1695 		vm_page_unreserve_pages(&reservation);
1696 	if (reservedMemory > 0)
1697 		vm_unreserve_memory(reservedMemory);
1698 
1699 	return status;
1700 }
1701 
1702 
1703 area_id
1704 vm_map_physical_memory(team_id team, const char* name, void** _address,
1705 	uint32 addressSpec, addr_t size, uint32 protection,
1706 	phys_addr_t physicalAddress, bool alreadyWired)
1707 {
1708 	VMArea* area;
1709 	VMCache* cache;
1710 	addr_t mapOffset;
1711 
1712 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1713 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1714 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1715 		addressSpec, size, protection, physicalAddress));
1716 
1717 	if (!arch_vm_supports_protection(protection))
1718 		return B_NOT_SUPPORTED;
1719 
1720 	AddressSpaceWriteLocker locker(team);
1721 	if (!locker.IsLocked())
1722 		return B_BAD_TEAM_ID;
1723 
1724 	// if the physical address is somewhat inside a page,
1725 	// move the actual area down to align on a page boundary
1726 	mapOffset = physicalAddress % B_PAGE_SIZE;
1727 	size += mapOffset;
1728 	physicalAddress -= mapOffset;
1729 
1730 	size = PAGE_ALIGN(size);
1731 
1732 	// create a device cache
1733 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1734 	if (status != B_OK)
1735 		return status;
1736 
1737 	cache->virtual_end = size;
1738 
1739 	cache->Lock();
1740 
1741 	virtual_address_restrictions addressRestrictions = {};
1742 	addressRestrictions.address = *_address;
1743 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1744 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1745 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1746 		true, &area, _address);
1747 
1748 	if (status < B_OK)
1749 		cache->ReleaseRefLocked();
1750 
1751 	cache->Unlock();
1752 
1753 	if (status == B_OK) {
1754 		// set requested memory type -- use uncached, if not given
1755 		uint32 memoryType = addressSpec & B_MTR_MASK;
1756 		if (memoryType == 0)
1757 			memoryType = B_MTR_UC;
1758 
1759 		area->SetMemoryType(memoryType);
1760 
1761 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1762 		if (status != B_OK)
1763 			delete_area(locker.AddressSpace(), area, false);
1764 	}
1765 
1766 	if (status != B_OK)
1767 		return status;
1768 
1769 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1770 
1771 	if (alreadyWired) {
1772 		// The area is already mapped, but possibly not with the right
1773 		// memory type.
1774 		map->Lock();
1775 		map->ProtectArea(area, area->protection);
1776 		map->Unlock();
1777 	} else {
1778 		// Map the area completely.
1779 
1780 		// reserve pages needed for the mapping
1781 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1782 			area->Base() + (size - 1));
1783 		vm_page_reservation reservation;
1784 		vm_page_reserve_pages(&reservation, reservePages,
1785 			team == VMAddressSpace::KernelID()
1786 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1787 
1788 		map->Lock();
1789 
1790 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1791 			map->Map(area->Base() + offset, physicalAddress + offset,
1792 				protection, area->MemoryType(), &reservation);
1793 		}
1794 
1795 		map->Unlock();
1796 
1797 		vm_page_unreserve_pages(&reservation);
1798 	}
1799 
1800 	// modify the pointer returned to be offset back into the new area
1801 	// the same way the physical address in was offset
1802 	*_address = (void*)((addr_t)*_address + mapOffset);
1803 
1804 	area->cache_type = CACHE_TYPE_DEVICE;
1805 	return area->id;
1806 }
1807 
1808 
1809 /*!	Don't use!
1810 	TODO: This function was introduced to map physical page vecs to
1811 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1812 	use a device cache and does not track vm_page::wired_count!
1813 */
1814 area_id
1815 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1816 	uint32 addressSpec, addr_t* _size, uint32 protection,
1817 	struct generic_io_vec* vecs, uint32 vecCount)
1818 {
1819 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1820 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1821 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1822 		addressSpec, _size, protection, vecs, vecCount));
1823 
1824 	if (!arch_vm_supports_protection(protection)
1825 		|| (addressSpec & B_MTR_MASK) != 0) {
1826 		return B_NOT_SUPPORTED;
1827 	}
1828 
1829 	AddressSpaceWriteLocker locker(team);
1830 	if (!locker.IsLocked())
1831 		return B_BAD_TEAM_ID;
1832 
1833 	if (vecCount == 0)
1834 		return B_BAD_VALUE;
1835 
1836 	addr_t size = 0;
1837 	for (uint32 i = 0; i < vecCount; i++) {
1838 		if (vecs[i].base % B_PAGE_SIZE != 0
1839 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1840 			return B_BAD_VALUE;
1841 		}
1842 
1843 		size += vecs[i].length;
1844 	}
1845 
1846 	// create a device cache
1847 	VMCache* cache;
1848 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1849 	if (result != B_OK)
1850 		return result;
1851 
1852 	cache->virtual_end = size;
1853 
1854 	cache->Lock();
1855 
1856 	VMArea* area;
1857 	virtual_address_restrictions addressRestrictions = {};
1858 	addressRestrictions.address = *_address;
1859 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1860 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1861 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1862 		&addressRestrictions, true, &area, _address);
1863 
1864 	if (result != B_OK)
1865 		cache->ReleaseRefLocked();
1866 
1867 	cache->Unlock();
1868 
1869 	if (result != B_OK)
1870 		return result;
1871 
1872 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1873 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1874 		area->Base() + (size - 1));
1875 
1876 	vm_page_reservation reservation;
1877 	vm_page_reserve_pages(&reservation, reservePages,
1878 			team == VMAddressSpace::KernelID()
1879 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1880 	map->Lock();
1881 
1882 	uint32 vecIndex = 0;
1883 	size_t vecOffset = 0;
1884 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1885 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1886 			vecOffset = 0;
1887 			vecIndex++;
1888 		}
1889 
1890 		if (vecIndex >= vecCount)
1891 			break;
1892 
1893 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1894 			protection, area->MemoryType(), &reservation);
1895 
1896 		vecOffset += B_PAGE_SIZE;
1897 	}
1898 
1899 	map->Unlock();
1900 	vm_page_unreserve_pages(&reservation);
1901 
1902 	if (_size != NULL)
1903 		*_size = size;
1904 
1905 	area->cache_type = CACHE_TYPE_DEVICE;
1906 	return area->id;
1907 }
1908 
1909 
1910 area_id
1911 vm_create_null_area(team_id team, const char* name, void** address,
1912 	uint32 addressSpec, addr_t size, uint32 flags)
1913 {
1914 	size = PAGE_ALIGN(size);
1915 
1916 	// Lock the address space and, if B_EXACT_ADDRESS and
1917 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1918 	// is not wired.
1919 	AddressSpaceWriteLocker locker;
1920 	do {
1921 		if (locker.SetTo(team) != B_OK)
1922 			return B_BAD_TEAM_ID;
1923 	} while (addressSpec == B_EXACT_ADDRESS
1924 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1925 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1926 			(addr_t)*address, size, &locker));
1927 
1928 	// create a null cache
1929 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1930 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1931 	VMCache* cache;
1932 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1933 	if (status != B_OK)
1934 		return status;
1935 
1936 	cache->temporary = 1;
1937 	cache->virtual_end = size;
1938 
1939 	cache->Lock();
1940 
1941 	VMArea* area;
1942 	virtual_address_restrictions addressRestrictions = {};
1943 	addressRestrictions.address = *address;
1944 	addressRestrictions.address_specification = addressSpec;
1945 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1946 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1947 		&addressRestrictions, true, &area, address);
1948 
1949 	if (status < B_OK) {
1950 		cache->ReleaseRefAndUnlock();
1951 		return status;
1952 	}
1953 
1954 	cache->Unlock();
1955 
1956 	area->cache_type = CACHE_TYPE_NULL;
1957 	return area->id;
1958 }
1959 
1960 
1961 /*!	Creates the vnode cache for the specified \a vnode.
1962 	The vnode has to be marked busy when calling this function.
1963 */
1964 status_t
1965 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1966 {
1967 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1968 }
1969 
1970 
1971 /*!	\a cache must be locked. The area's address space must be read-locked.
1972 */
1973 static void
1974 pre_map_area_pages(VMArea* area, VMCache* cache,
1975 	vm_page_reservation* reservation)
1976 {
1977 	addr_t baseAddress = area->Base();
1978 	addr_t cacheOffset = area->cache_offset;
1979 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1980 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1981 
1982 	for (VMCachePagesTree::Iterator it
1983 				= cache->pages.GetIterator(firstPage, true, true);
1984 			vm_page* page = it.Next();) {
1985 		if (page->cache_offset >= endPage)
1986 			break;
1987 
1988 		// skip busy and inactive pages
1989 		if (page->busy || page->usage_count == 0)
1990 			continue;
1991 
1992 		DEBUG_PAGE_ACCESS_START(page);
1993 		map_page(area, page,
1994 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1995 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1996 		DEBUG_PAGE_ACCESS_END(page);
1997 	}
1998 }
1999 
2000 
2001 /*!	Will map the file specified by \a fd to an area in memory.
2002 	The file will be mirrored beginning at the specified \a offset. The
2003 	\a offset and \a size arguments have to be page aligned.
2004 */
2005 static area_id
2006 _vm_map_file(team_id team, const char* name, void** _address,
2007 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2008 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2009 {
2010 	// TODO: for binary files, we want to make sure that they get the
2011 	//	copy of a file at a given time, ie. later changes should not
2012 	//	make it into the mapped copy -- this will need quite some changes
2013 	//	to be done in a nice way
2014 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2015 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2016 
2017 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2018 	size = PAGE_ALIGN(size);
2019 
2020 	if (mapping == REGION_NO_PRIVATE_MAP)
2021 		protection |= B_SHARED_AREA;
2022 	if (addressSpec != B_EXACT_ADDRESS)
2023 		unmapAddressRange = false;
2024 
2025 	if (fd < 0) {
2026 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2027 		virtual_address_restrictions virtualRestrictions = {};
2028 		virtualRestrictions.address = *_address;
2029 		virtualRestrictions.address_specification = addressSpec;
2030 		physical_address_restrictions physicalRestrictions = {};
2031 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2032 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2033 			_address);
2034 	}
2035 
2036 	// get the open flags of the FD
2037 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2038 	if (descriptor == NULL)
2039 		return EBADF;
2040 	int32 openMode = descriptor->open_mode;
2041 	put_fd(descriptor);
2042 
2043 	// The FD must open for reading at any rate. For shared mapping with write
2044 	// access, additionally the FD must be open for writing.
2045 	if ((openMode & O_ACCMODE) == O_WRONLY
2046 		|| (mapping == REGION_NO_PRIVATE_MAP
2047 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2048 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2049 		return EACCES;
2050 	}
2051 
2052 	// get the vnode for the object, this also grabs a ref to it
2053 	struct vnode* vnode = NULL;
2054 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2055 	if (status < B_OK)
2056 		return status;
2057 	VnodePutter vnodePutter(vnode);
2058 
2059 	// If we're going to pre-map pages, we need to reserve the pages needed by
2060 	// the mapping backend upfront.
2061 	page_num_t reservedPreMapPages = 0;
2062 	vm_page_reservation reservation;
2063 	if ((protection & B_READ_AREA) != 0) {
2064 		AddressSpaceWriteLocker locker;
2065 		status = locker.SetTo(team);
2066 		if (status != B_OK)
2067 			return status;
2068 
2069 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2070 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2071 
2072 		locker.Unlock();
2073 
2074 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2075 			team == VMAddressSpace::KernelID()
2076 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2077 	}
2078 
2079 	struct PageUnreserver {
2080 		PageUnreserver(vm_page_reservation* reservation)
2081 			:
2082 			fReservation(reservation)
2083 		{
2084 		}
2085 
2086 		~PageUnreserver()
2087 		{
2088 			if (fReservation != NULL)
2089 				vm_page_unreserve_pages(fReservation);
2090 		}
2091 
2092 		vm_page_reservation* fReservation;
2093 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2094 
2095 	// Lock the address space and, if the specified address range shall be
2096 	// unmapped, ensure it is not wired.
2097 	AddressSpaceWriteLocker locker;
2098 	do {
2099 		if (locker.SetTo(team) != B_OK)
2100 			return B_BAD_TEAM_ID;
2101 	} while (unmapAddressRange
2102 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2103 			(addr_t)*_address, size, &locker));
2104 
2105 	// TODO: this only works for file systems that use the file cache
2106 	VMCache* cache;
2107 	status = vfs_get_vnode_cache(vnode, &cache, false);
2108 	if (status < B_OK)
2109 		return status;
2110 
2111 	cache->Lock();
2112 
2113 	VMArea* area;
2114 	virtual_address_restrictions addressRestrictions = {};
2115 	addressRestrictions.address = *_address;
2116 	addressRestrictions.address_specification = addressSpec;
2117 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2118 		0, protection, mapping,
2119 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2120 		&addressRestrictions, kernel, &area, _address);
2121 
2122 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2123 		// map_backing_store() cannot know we no longer need the ref
2124 		cache->ReleaseRefLocked();
2125 	}
2126 
2127 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2128 		pre_map_area_pages(area, cache, &reservation);
2129 
2130 	cache->Unlock();
2131 
2132 	if (status == B_OK) {
2133 		// TODO: this probably deserves a smarter solution, ie. don't always
2134 		// prefetch stuff, and also, probably don't trigger it at this place.
2135 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2136 			// prefetches at max 10 MB starting from "offset"
2137 	}
2138 
2139 	if (status != B_OK)
2140 		return status;
2141 
2142 	area->cache_type = CACHE_TYPE_VNODE;
2143 	return area->id;
2144 }
2145 
2146 
2147 area_id
2148 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2149 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2150 	int fd, off_t offset)
2151 {
2152 	if (!arch_vm_supports_protection(protection))
2153 		return B_NOT_SUPPORTED;
2154 
2155 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2156 		mapping, unmapAddressRange, fd, offset, true);
2157 }
2158 
2159 
2160 VMCache*
2161 vm_area_get_locked_cache(VMArea* area)
2162 {
2163 	rw_lock_read_lock(&sAreaCacheLock);
2164 
2165 	while (true) {
2166 		VMCache* cache = area->cache;
2167 
2168 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2169 			// cache has been deleted
2170 			rw_lock_read_lock(&sAreaCacheLock);
2171 			continue;
2172 		}
2173 
2174 		rw_lock_read_lock(&sAreaCacheLock);
2175 
2176 		if (cache == area->cache) {
2177 			cache->AcquireRefLocked();
2178 			rw_lock_read_unlock(&sAreaCacheLock);
2179 			return cache;
2180 		}
2181 
2182 		// the cache changed in the meantime
2183 		cache->Unlock();
2184 	}
2185 }
2186 
2187 
2188 void
2189 vm_area_put_locked_cache(VMCache* cache)
2190 {
2191 	cache->ReleaseRefAndUnlock();
2192 }
2193 
2194 
2195 area_id
2196 vm_clone_area(team_id team, const char* name, void** address,
2197 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2198 	bool kernel)
2199 {
2200 	VMArea* newArea = NULL;
2201 	VMArea* sourceArea;
2202 
2203 	// Check whether the source area exists and is cloneable. If so, mark it
2204 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2205 	{
2206 		AddressSpaceWriteLocker locker;
2207 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2208 		if (status != B_OK)
2209 			return status;
2210 
2211 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2212 			return B_NOT_ALLOWED;
2213 
2214 		sourceArea->protection |= B_SHARED_AREA;
2215 		protection |= B_SHARED_AREA;
2216 	}
2217 
2218 	// Now lock both address spaces and actually do the cloning.
2219 
2220 	MultiAddressSpaceLocker locker;
2221 	VMAddressSpace* sourceAddressSpace;
2222 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2223 	if (status != B_OK)
2224 		return status;
2225 
2226 	VMAddressSpace* targetAddressSpace;
2227 	status = locker.AddTeam(team, true, &targetAddressSpace);
2228 	if (status != B_OK)
2229 		return status;
2230 
2231 	status = locker.Lock();
2232 	if (status != B_OK)
2233 		return status;
2234 
2235 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2236 	if (sourceArea == NULL)
2237 		return B_BAD_VALUE;
2238 
2239 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2240 		return B_NOT_ALLOWED;
2241 
2242 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2243 
2244 	if (!kernel && sourceAddressSpace != targetAddressSpace
2245 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2246 #if KDEBUG
2247 		Team* team = thread_get_current_thread()->team;
2248 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2249 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2250 #endif
2251 		status = B_NOT_ALLOWED;
2252 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2253 		status = B_NOT_ALLOWED;
2254 	} else {
2255 		virtual_address_restrictions addressRestrictions = {};
2256 		addressRestrictions.address = *address;
2257 		addressRestrictions.address_specification = addressSpec;
2258 		status = map_backing_store(targetAddressSpace, cache,
2259 			sourceArea->cache_offset, name, sourceArea->Size(),
2260 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2261 			kernel, &newArea, address);
2262 	}
2263 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2264 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2265 		// to create a new cache, and has therefore already acquired a reference
2266 		// to the source cache - but otherwise it has no idea that we need
2267 		// one.
2268 		cache->AcquireRefLocked();
2269 	}
2270 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2271 		// we need to map in everything at this point
2272 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2273 			// we don't have actual pages to map but a physical area
2274 			VMTranslationMap* map
2275 				= sourceArea->address_space->TranslationMap();
2276 			map->Lock();
2277 
2278 			phys_addr_t physicalAddress;
2279 			uint32 oldProtection;
2280 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2281 
2282 			map->Unlock();
2283 
2284 			map = targetAddressSpace->TranslationMap();
2285 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2286 				newArea->Base() + (newArea->Size() - 1));
2287 
2288 			vm_page_reservation reservation;
2289 			vm_page_reserve_pages(&reservation, reservePages,
2290 				targetAddressSpace == VMAddressSpace::Kernel()
2291 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2292 			map->Lock();
2293 
2294 			for (addr_t offset = 0; offset < newArea->Size();
2295 					offset += B_PAGE_SIZE) {
2296 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2297 					protection, newArea->MemoryType(), &reservation);
2298 			}
2299 
2300 			map->Unlock();
2301 			vm_page_unreserve_pages(&reservation);
2302 		} else {
2303 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2304 			size_t reservePages = map->MaxPagesNeededToMap(
2305 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2306 			vm_page_reservation reservation;
2307 			vm_page_reserve_pages(&reservation, reservePages,
2308 				targetAddressSpace == VMAddressSpace::Kernel()
2309 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2310 
2311 			// map in all pages from source
2312 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2313 					vm_page* page  = it.Next();) {
2314 				if (!page->busy) {
2315 					DEBUG_PAGE_ACCESS_START(page);
2316 					map_page(newArea, page,
2317 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2318 							- newArea->cache_offset),
2319 						protection, &reservation);
2320 					DEBUG_PAGE_ACCESS_END(page);
2321 				}
2322 			}
2323 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2324 			// ensuring that!
2325 
2326 			vm_page_unreserve_pages(&reservation);
2327 		}
2328 	}
2329 	if (status == B_OK)
2330 		newArea->cache_type = sourceArea->cache_type;
2331 
2332 	vm_area_put_locked_cache(cache);
2333 
2334 	if (status < B_OK)
2335 		return status;
2336 
2337 	return newArea->id;
2338 }
2339 
2340 
2341 /*!	Deletes the specified area of the given address space.
2342 
2343 	The address space must be write-locked.
2344 	The caller must ensure that the area does not have any wired ranges.
2345 
2346 	\param addressSpace The address space containing the area.
2347 	\param area The area to be deleted.
2348 	\param deletingAddressSpace \c true, if the address space is in the process
2349 		of being deleted.
2350 */
2351 static void
2352 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2353 	bool deletingAddressSpace)
2354 {
2355 	ASSERT(!area->IsWired());
2356 
2357 	VMAreaHash::Remove(area);
2358 
2359 	// At this point the area is removed from the global hash table, but
2360 	// still exists in the area list.
2361 
2362 	// Unmap the virtual address space the area occupied.
2363 	{
2364 		// We need to lock the complete cache chain.
2365 		VMCache* topCache = vm_area_get_locked_cache(area);
2366 		VMCacheChainLocker cacheChainLocker(topCache);
2367 		cacheChainLocker.LockAllSourceCaches();
2368 
2369 		// If the area's top cache is a temporary cache and the area is the only
2370 		// one referencing it (besides us currently holding a second reference),
2371 		// the unmapping code doesn't need to care about preserving the accessed
2372 		// and dirty flags of the top cache page mappings.
2373 		bool ignoreTopCachePageFlags
2374 			= topCache->temporary && topCache->RefCount() == 2;
2375 
2376 		area->address_space->TranslationMap()->UnmapArea(area,
2377 			deletingAddressSpace, ignoreTopCachePageFlags);
2378 	}
2379 
2380 	if (!area->cache->temporary)
2381 		area->cache->WriteModified();
2382 
2383 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2384 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2385 
2386 	arch_vm_unset_memory_type(area);
2387 	addressSpace->RemoveArea(area, allocationFlags);
2388 	addressSpace->Put();
2389 
2390 	area->cache->RemoveArea(area);
2391 	area->cache->ReleaseRef();
2392 
2393 	addressSpace->DeleteArea(area, allocationFlags);
2394 }
2395 
2396 
2397 status_t
2398 vm_delete_area(team_id team, area_id id, bool kernel)
2399 {
2400 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2401 		team, id));
2402 
2403 	// lock the address space and make sure the area isn't wired
2404 	AddressSpaceWriteLocker locker;
2405 	VMArea* area;
2406 	AreaCacheLocker cacheLocker;
2407 
2408 	do {
2409 		status_t status = locker.SetFromArea(team, id, area);
2410 		if (status != B_OK)
2411 			return status;
2412 
2413 		cacheLocker.SetTo(area);
2414 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2415 
2416 	cacheLocker.Unlock();
2417 
2418 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2419 		return B_NOT_ALLOWED;
2420 
2421 	delete_area(locker.AddressSpace(), area, false);
2422 	return B_OK;
2423 }
2424 
2425 
2426 /*!	Creates a new cache on top of given cache, moves all areas from
2427 	the old cache to the new one, and changes the protection of all affected
2428 	areas' pages to read-only. If requested, wired pages are moved up to the
2429 	new cache and copies are added to the old cache in their place.
2430 	Preconditions:
2431 	- The given cache must be locked.
2432 	- All of the cache's areas' address spaces must be read locked.
2433 	- Either the cache must not have any wired ranges or a page reservation for
2434 	  all wired pages must be provided, so they can be copied.
2435 
2436 	\param lowerCache The cache on top of which a new cache shall be created.
2437 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2438 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2439 		has wired page. The wired pages are copied in this case.
2440 */
2441 static status_t
2442 vm_copy_on_write_area(VMCache* lowerCache,
2443 	vm_page_reservation* wiredPagesReservation)
2444 {
2445 	VMCache* upperCache;
2446 
2447 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2448 
2449 	// We need to separate the cache from its areas. The cache goes one level
2450 	// deeper and we create a new cache inbetween.
2451 
2452 	// create an anonymous cache
2453 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2454 		lowerCache->GuardSize() / B_PAGE_SIZE,
2455 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2456 		VM_PRIORITY_USER);
2457 	if (status != B_OK)
2458 		return status;
2459 
2460 	upperCache->Lock();
2461 
2462 	upperCache->temporary = 1;
2463 	upperCache->virtual_base = lowerCache->virtual_base;
2464 	upperCache->virtual_end = lowerCache->virtual_end;
2465 
2466 	// transfer the lower cache areas to the upper cache
2467 	rw_lock_write_lock(&sAreaCacheLock);
2468 	upperCache->TransferAreas(lowerCache);
2469 	rw_lock_write_unlock(&sAreaCacheLock);
2470 
2471 	lowerCache->AddConsumer(upperCache);
2472 
2473 	// We now need to remap all pages from all of the cache's areas read-only,
2474 	// so that a copy will be created on next write access. If there are wired
2475 	// pages, we keep their protection, move them to the upper cache and create
2476 	// copies for the lower cache.
2477 	if (wiredPagesReservation != NULL) {
2478 		// We need to handle wired pages -- iterate through the cache's pages.
2479 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2480 				vm_page* page = it.Next();) {
2481 			if (page->WiredCount() > 0) {
2482 				// allocate a new page and copy the wired one
2483 				vm_page* copiedPage = vm_page_allocate_page(
2484 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2485 
2486 				vm_memcpy_physical_page(
2487 					copiedPage->physical_page_number * B_PAGE_SIZE,
2488 					page->physical_page_number * B_PAGE_SIZE);
2489 
2490 				// move the wired page to the upper cache (note: removing is OK
2491 				// with the SplayTree iterator) and insert the copy
2492 				upperCache->MovePage(page);
2493 				lowerCache->InsertPage(copiedPage,
2494 					page->cache_offset * B_PAGE_SIZE);
2495 
2496 				DEBUG_PAGE_ACCESS_END(copiedPage);
2497 			} else {
2498 				// Change the protection of this page in all areas.
2499 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2500 						tempArea = tempArea->cache_next) {
2501 					// The area must be readable in the same way it was
2502 					// previously writable.
2503 					uint32 protection = B_KERNEL_READ_AREA;
2504 					if ((tempArea->protection & B_READ_AREA) != 0)
2505 						protection |= B_READ_AREA;
2506 
2507 					VMTranslationMap* map
2508 						= tempArea->address_space->TranslationMap();
2509 					map->Lock();
2510 					map->ProtectPage(tempArea,
2511 						virtual_page_address(tempArea, page), protection);
2512 					map->Unlock();
2513 				}
2514 			}
2515 		}
2516 	} else {
2517 		ASSERT(lowerCache->WiredPagesCount() == 0);
2518 
2519 		// just change the protection of all areas
2520 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2521 				tempArea = tempArea->cache_next) {
2522 			// The area must be readable in the same way it was previously
2523 			// writable.
2524 			uint32 protection = B_KERNEL_READ_AREA;
2525 			if ((tempArea->protection & B_READ_AREA) != 0)
2526 				protection |= B_READ_AREA;
2527 
2528 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2529 			map->Lock();
2530 			map->ProtectArea(tempArea, protection);
2531 			map->Unlock();
2532 		}
2533 	}
2534 
2535 	vm_area_put_locked_cache(upperCache);
2536 
2537 	return B_OK;
2538 }
2539 
2540 
2541 area_id
2542 vm_copy_area(team_id team, const char* name, void** _address,
2543 	uint32 addressSpec, area_id sourceID)
2544 {
2545 	// Do the locking: target address space, all address spaces associated with
2546 	// the source cache, and the cache itself.
2547 	MultiAddressSpaceLocker locker;
2548 	VMAddressSpace* targetAddressSpace;
2549 	VMCache* cache;
2550 	VMArea* source;
2551 	AreaCacheLocker cacheLocker;
2552 	status_t status;
2553 	bool sharedArea;
2554 
2555 	page_num_t wiredPages = 0;
2556 	vm_page_reservation wiredPagesReservation;
2557 
2558 	bool restart;
2559 	do {
2560 		restart = false;
2561 
2562 		locker.Unset();
2563 		status = locker.AddTeam(team, true, &targetAddressSpace);
2564 		if (status == B_OK) {
2565 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2566 				&cache);
2567 		}
2568 		if (status != B_OK)
2569 			return status;
2570 
2571 		cacheLocker.SetTo(cache, true);	// already locked
2572 
2573 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2574 
2575 		page_num_t oldWiredPages = wiredPages;
2576 		wiredPages = 0;
2577 
2578 		// If the source area isn't shared, count the number of wired pages in
2579 		// the cache and reserve as many pages.
2580 		if (!sharedArea) {
2581 			wiredPages = cache->WiredPagesCount();
2582 
2583 			if (wiredPages > oldWiredPages) {
2584 				cacheLocker.Unlock();
2585 				locker.Unlock();
2586 
2587 				if (oldWiredPages > 0)
2588 					vm_page_unreserve_pages(&wiredPagesReservation);
2589 
2590 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2591 					VM_PRIORITY_USER);
2592 
2593 				restart = true;
2594 			}
2595 		} else if (oldWiredPages > 0)
2596 			vm_page_unreserve_pages(&wiredPagesReservation);
2597 	} while (restart);
2598 
2599 	// unreserve pages later
2600 	struct PagesUnreserver {
2601 		PagesUnreserver(vm_page_reservation* reservation)
2602 			:
2603 			fReservation(reservation)
2604 		{
2605 		}
2606 
2607 		~PagesUnreserver()
2608 		{
2609 			if (fReservation != NULL)
2610 				vm_page_unreserve_pages(fReservation);
2611 		}
2612 
2613 	private:
2614 		vm_page_reservation*	fReservation;
2615 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2616 
2617 	bool writableCopy
2618 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2619 	uint8* targetPageProtections = NULL;
2620 
2621 	if (source->page_protections != NULL) {
2622 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2623 		targetPageProtections = (uint8*)malloc_etc(bytes,
2624 			HEAP_DONT_LOCK_KERNEL_SPACE);
2625 		if (targetPageProtections == NULL)
2626 			return B_NO_MEMORY;
2627 
2628 		memcpy(targetPageProtections, source->page_protections, bytes);
2629 
2630 		if (!writableCopy) {
2631 			for (size_t i = 0; i < bytes; i++) {
2632 				if ((targetPageProtections[i]
2633 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2634 					writableCopy = true;
2635 					break;
2636 				}
2637 			}
2638 		}
2639 	}
2640 
2641 	if (addressSpec == B_CLONE_ADDRESS) {
2642 		addressSpec = B_EXACT_ADDRESS;
2643 		*_address = (void*)source->Base();
2644 	}
2645 
2646 	// First, create a cache on top of the source area, respectively use the
2647 	// existing one, if this is a shared area.
2648 
2649 	VMArea* target;
2650 	virtual_address_restrictions addressRestrictions = {};
2651 	addressRestrictions.address = *_address;
2652 	addressRestrictions.address_specification = addressSpec;
2653 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2654 		name, source->Size(), source->wiring, source->protection,
2655 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2656 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2657 		&addressRestrictions, true, &target, _address);
2658 	if (status < B_OK) {
2659 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2660 		return status;
2661 	}
2662 
2663 	if (targetPageProtections != NULL)
2664 		target->page_protections = targetPageProtections;
2665 
2666 	if (sharedArea) {
2667 		// The new area uses the old area's cache, but map_backing_store()
2668 		// hasn't acquired a ref. So we have to do that now.
2669 		cache->AcquireRefLocked();
2670 	}
2671 
2672 	// If the source area is writable, we need to move it one layer up as well
2673 
2674 	if (!sharedArea) {
2675 		if (writableCopy) {
2676 			// TODO: do something more useful if this fails!
2677 			if (vm_copy_on_write_area(cache,
2678 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2679 				panic("vm_copy_on_write_area() failed!\n");
2680 			}
2681 		}
2682 	}
2683 
2684 	// we return the ID of the newly created area
2685 	return target->id;
2686 }
2687 
2688 
2689 status_t
2690 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2691 	bool kernel)
2692 {
2693 	fix_protection(&newProtection);
2694 
2695 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2696 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2697 
2698 	if (!arch_vm_supports_protection(newProtection))
2699 		return B_NOT_SUPPORTED;
2700 
2701 	bool becomesWritable
2702 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2703 
2704 	// lock address spaces and cache
2705 	MultiAddressSpaceLocker locker;
2706 	VMCache* cache;
2707 	VMArea* area;
2708 	status_t status;
2709 	AreaCacheLocker cacheLocker;
2710 	bool isWritable;
2711 
2712 	bool restart;
2713 	do {
2714 		restart = false;
2715 
2716 		locker.Unset();
2717 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2718 		if (status != B_OK)
2719 			return status;
2720 
2721 		cacheLocker.SetTo(cache, true);	// already locked
2722 
2723 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2724 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2725 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2726 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2727 				" (%s)\n", team, newProtection, areaID, area->name);
2728 			return B_NOT_ALLOWED;
2729 		}
2730 
2731 		if (area->protection == newProtection)
2732 			return B_OK;
2733 
2734 		if (team != VMAddressSpace::KernelID()
2735 			&& area->address_space->ID() != team) {
2736 			// unless you're the kernel, you are only allowed to set
2737 			// the protection of your own areas
2738 			return B_NOT_ALLOWED;
2739 		}
2740 
2741 		isWritable
2742 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2743 
2744 		// Make sure the area (respectively, if we're going to call
2745 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2746 		// wired ranges.
2747 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2748 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2749 					otherArea = otherArea->cache_next) {
2750 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2751 					restart = true;
2752 					break;
2753 				}
2754 			}
2755 		} else {
2756 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2757 				restart = true;
2758 		}
2759 	} while (restart);
2760 
2761 	bool changePageProtection = true;
2762 	bool changeTopCachePagesOnly = false;
2763 
2764 	if (isWritable && !becomesWritable) {
2765 		// writable -> !writable
2766 
2767 		if (cache->source != NULL && cache->temporary) {
2768 			if (cache->CountWritableAreas(area) == 0) {
2769 				// Since this cache now lives from the pages in its source cache,
2770 				// we can change the cache's commitment to take only those pages
2771 				// into account that really are in this cache.
2772 
2773 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2774 					team == VMAddressSpace::KernelID()
2775 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2776 
2777 				// TODO: we may be able to join with our source cache, if
2778 				// count == 0
2779 			}
2780 		}
2781 
2782 		// If only the writability changes, we can just remap the pages of the
2783 		// top cache, since the pages of lower caches are mapped read-only
2784 		// anyway. That's advantageous only, if the number of pages in the cache
2785 		// is significantly smaller than the number of pages in the area,
2786 		// though.
2787 		if (newProtection
2788 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2789 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2790 			changeTopCachePagesOnly = true;
2791 		}
2792 	} else if (!isWritable && becomesWritable) {
2793 		// !writable -> writable
2794 
2795 		if (!cache->consumers.IsEmpty()) {
2796 			// There are consumers -- we have to insert a new cache. Fortunately
2797 			// vm_copy_on_write_area() does everything that's needed.
2798 			changePageProtection = false;
2799 			status = vm_copy_on_write_area(cache, NULL);
2800 		} else {
2801 			// No consumers, so we don't need to insert a new one.
2802 			if (cache->source != NULL && cache->temporary) {
2803 				// the cache's commitment must contain all possible pages
2804 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2805 					team == VMAddressSpace::KernelID()
2806 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2807 			}
2808 
2809 			if (status == B_OK && cache->source != NULL) {
2810 				// There's a source cache, hence we can't just change all pages'
2811 				// protection or we might allow writing into pages belonging to
2812 				// a lower cache.
2813 				changeTopCachePagesOnly = true;
2814 			}
2815 		}
2816 	} else {
2817 		// we don't have anything special to do in all other cases
2818 	}
2819 
2820 	if (status == B_OK) {
2821 		// remap existing pages in this cache
2822 		if (changePageProtection) {
2823 			VMTranslationMap* map = area->address_space->TranslationMap();
2824 			map->Lock();
2825 
2826 			if (changeTopCachePagesOnly) {
2827 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2828 				page_num_t lastPageOffset
2829 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2830 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2831 						vm_page* page = it.Next();) {
2832 					if (page->cache_offset >= firstPageOffset
2833 						&& page->cache_offset <= lastPageOffset) {
2834 						addr_t address = virtual_page_address(area, page);
2835 						map->ProtectPage(area, address, newProtection);
2836 					}
2837 				}
2838 			} else
2839 				map->ProtectArea(area, newProtection);
2840 
2841 			map->Unlock();
2842 		}
2843 
2844 		area->protection = newProtection;
2845 	}
2846 
2847 	return status;
2848 }
2849 
2850 
2851 status_t
2852 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2853 {
2854 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2855 	if (addressSpace == NULL)
2856 		return B_BAD_TEAM_ID;
2857 
2858 	VMTranslationMap* map = addressSpace->TranslationMap();
2859 
2860 	map->Lock();
2861 	uint32 dummyFlags;
2862 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2863 	map->Unlock();
2864 
2865 	addressSpace->Put();
2866 	return status;
2867 }
2868 
2869 
2870 /*!	The page's cache must be locked.
2871 */
2872 bool
2873 vm_test_map_modification(vm_page* page)
2874 {
2875 	if (page->modified)
2876 		return true;
2877 
2878 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2879 	vm_page_mapping* mapping;
2880 	while ((mapping = iterator.Next()) != NULL) {
2881 		VMArea* area = mapping->area;
2882 		VMTranslationMap* map = area->address_space->TranslationMap();
2883 
2884 		phys_addr_t physicalAddress;
2885 		uint32 flags;
2886 		map->Lock();
2887 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2888 		map->Unlock();
2889 
2890 		if ((flags & PAGE_MODIFIED) != 0)
2891 			return true;
2892 	}
2893 
2894 	return false;
2895 }
2896 
2897 
2898 /*!	The page's cache must be locked.
2899 */
2900 void
2901 vm_clear_map_flags(vm_page* page, uint32 flags)
2902 {
2903 	if ((flags & PAGE_ACCESSED) != 0)
2904 		page->accessed = false;
2905 	if ((flags & PAGE_MODIFIED) != 0)
2906 		page->modified = false;
2907 
2908 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2909 	vm_page_mapping* mapping;
2910 	while ((mapping = iterator.Next()) != NULL) {
2911 		VMArea* area = mapping->area;
2912 		VMTranslationMap* map = area->address_space->TranslationMap();
2913 
2914 		map->Lock();
2915 		map->ClearFlags(virtual_page_address(area, page), flags);
2916 		map->Unlock();
2917 	}
2918 }
2919 
2920 
2921 /*!	Removes all mappings from a page.
2922 	After you've called this function, the page is unmapped from memory and
2923 	the page's \c accessed and \c modified flags have been updated according
2924 	to the state of the mappings.
2925 	The page's cache must be locked.
2926 */
2927 void
2928 vm_remove_all_page_mappings(vm_page* page)
2929 {
2930 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2931 		VMArea* area = mapping->area;
2932 		VMTranslationMap* map = area->address_space->TranslationMap();
2933 		addr_t address = virtual_page_address(area, page);
2934 		map->UnmapPage(area, address, false);
2935 	}
2936 }
2937 
2938 
2939 int32
2940 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2941 {
2942 	int32 count = 0;
2943 
2944 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2945 	vm_page_mapping* mapping;
2946 	while ((mapping = iterator.Next()) != NULL) {
2947 		VMArea* area = mapping->area;
2948 		VMTranslationMap* map = area->address_space->TranslationMap();
2949 
2950 		bool modified;
2951 		if (map->ClearAccessedAndModified(area,
2952 				virtual_page_address(area, page), false, modified)) {
2953 			count++;
2954 		}
2955 
2956 		page->modified |= modified;
2957 	}
2958 
2959 
2960 	if (page->accessed) {
2961 		count++;
2962 		page->accessed = false;
2963 	}
2964 
2965 	return count;
2966 }
2967 
2968 
2969 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2970 	mappings.
2971 	The function iterates through the page mappings and removes them until
2972 	encountering one that has been accessed. From then on it will continue to
2973 	iterate, but only clear the accessed flag of the mapping. The page's
2974 	\c modified bit will be updated accordingly, the \c accessed bit will be
2975 	cleared.
2976 	\return The number of mapping accessed bits encountered, including the
2977 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2978 		of the page have been removed.
2979 */
2980 int32
2981 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2982 {
2983 	ASSERT(page->WiredCount() == 0);
2984 
2985 	if (page->accessed)
2986 		return vm_clear_page_mapping_accessed_flags(page);
2987 
2988 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2989 		VMArea* area = mapping->area;
2990 		VMTranslationMap* map = area->address_space->TranslationMap();
2991 		addr_t address = virtual_page_address(area, page);
2992 		bool modified = false;
2993 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2994 			page->accessed = true;
2995 			page->modified |= modified;
2996 			return vm_clear_page_mapping_accessed_flags(page);
2997 		}
2998 		page->modified |= modified;
2999 	}
3000 
3001 	return 0;
3002 }
3003 
3004 
3005 static int
3006 display_mem(int argc, char** argv)
3007 {
3008 	bool physical = false;
3009 	addr_t copyAddress;
3010 	int32 displayWidth;
3011 	int32 itemSize;
3012 	int32 num = -1;
3013 	addr_t address;
3014 	int i = 1, j;
3015 
3016 	if (argc > 1 && argv[1][0] == '-') {
3017 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3018 			physical = true;
3019 			i++;
3020 		} else
3021 			i = 99;
3022 	}
3023 
3024 	if (argc < i + 1 || argc > i + 2) {
3025 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3026 			"\tdl - 8 bytes\n"
3027 			"\tdw - 4 bytes\n"
3028 			"\tds - 2 bytes\n"
3029 			"\tdb - 1 byte\n"
3030 			"\tstring - a whole string\n"
3031 			"  -p or --physical only allows memory from a single page to be "
3032 			"displayed.\n");
3033 		return 0;
3034 	}
3035 
3036 	address = parse_expression(argv[i]);
3037 
3038 	if (argc > i + 1)
3039 		num = parse_expression(argv[i + 1]);
3040 
3041 	// build the format string
3042 	if (strcmp(argv[0], "db") == 0) {
3043 		itemSize = 1;
3044 		displayWidth = 16;
3045 	} else if (strcmp(argv[0], "ds") == 0) {
3046 		itemSize = 2;
3047 		displayWidth = 8;
3048 	} else if (strcmp(argv[0], "dw") == 0) {
3049 		itemSize = 4;
3050 		displayWidth = 4;
3051 	} else if (strcmp(argv[0], "dl") == 0) {
3052 		itemSize = 8;
3053 		displayWidth = 2;
3054 	} else if (strcmp(argv[0], "string") == 0) {
3055 		itemSize = 1;
3056 		displayWidth = -1;
3057 	} else {
3058 		kprintf("display_mem called in an invalid way!\n");
3059 		return 0;
3060 	}
3061 
3062 	if (num <= 0)
3063 		num = displayWidth;
3064 
3065 	void* physicalPageHandle = NULL;
3066 
3067 	if (physical) {
3068 		int32 offset = address & (B_PAGE_SIZE - 1);
3069 		if (num * itemSize + offset > B_PAGE_SIZE) {
3070 			num = (B_PAGE_SIZE - offset) / itemSize;
3071 			kprintf("NOTE: number of bytes has been cut to page size\n");
3072 		}
3073 
3074 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3075 
3076 		if (vm_get_physical_page_debug(address, &copyAddress,
3077 				&physicalPageHandle) != B_OK) {
3078 			kprintf("getting the hardware page failed.");
3079 			return 0;
3080 		}
3081 
3082 		address += offset;
3083 		copyAddress += offset;
3084 	} else
3085 		copyAddress = address;
3086 
3087 	if (!strcmp(argv[0], "string")) {
3088 		kprintf("%p \"", (char*)copyAddress);
3089 
3090 		// string mode
3091 		for (i = 0; true; i++) {
3092 			char c;
3093 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3094 					!= B_OK
3095 				|| c == '\0') {
3096 				break;
3097 			}
3098 
3099 			if (c == '\n')
3100 				kprintf("\\n");
3101 			else if (c == '\t')
3102 				kprintf("\\t");
3103 			else {
3104 				if (!isprint(c))
3105 					c = '.';
3106 
3107 				kprintf("%c", c);
3108 			}
3109 		}
3110 
3111 		kprintf("\"\n");
3112 	} else {
3113 		// number mode
3114 		for (i = 0; i < num; i++) {
3115 			uint64 value;
3116 
3117 			if ((i % displayWidth) == 0) {
3118 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3119 				if (i != 0)
3120 					kprintf("\n");
3121 
3122 				kprintf("[0x%lx]  ", address + i * itemSize);
3123 
3124 				for (j = 0; j < displayed; j++) {
3125 					char c;
3126 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3127 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3128 						displayed = j;
3129 						break;
3130 					}
3131 					if (!isprint(c))
3132 						c = '.';
3133 
3134 					kprintf("%c", c);
3135 				}
3136 				if (num > displayWidth) {
3137 					// make sure the spacing in the last line is correct
3138 					for (j = displayed; j < displayWidth * itemSize; j++)
3139 						kprintf(" ");
3140 				}
3141 				kprintf("  ");
3142 			}
3143 
3144 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3145 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3146 				kprintf("read fault");
3147 				break;
3148 			}
3149 
3150 			switch (itemSize) {
3151 				case 1:
3152 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3153 					break;
3154 				case 2:
3155 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3156 					break;
3157 				case 4:
3158 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3159 					break;
3160 				case 8:
3161 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3162 					break;
3163 			}
3164 		}
3165 
3166 		kprintf("\n");
3167 	}
3168 
3169 	if (physical) {
3170 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3171 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3172 	}
3173 	return 0;
3174 }
3175 
3176 
3177 static void
3178 dump_cache_tree_recursively(VMCache* cache, int level,
3179 	VMCache* highlightCache)
3180 {
3181 	// print this cache
3182 	for (int i = 0; i < level; i++)
3183 		kprintf("  ");
3184 	if (cache == highlightCache)
3185 		kprintf("%p <--\n", cache);
3186 	else
3187 		kprintf("%p\n", cache);
3188 
3189 	// recursively print its consumers
3190 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3191 			VMCache* consumer = it.Next();) {
3192 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3193 	}
3194 }
3195 
3196 
3197 static int
3198 dump_cache_tree(int argc, char** argv)
3199 {
3200 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3201 		kprintf("usage: %s <address>\n", argv[0]);
3202 		return 0;
3203 	}
3204 
3205 	addr_t address = parse_expression(argv[1]);
3206 	if (address == 0)
3207 		return 0;
3208 
3209 	VMCache* cache = (VMCache*)address;
3210 	VMCache* root = cache;
3211 
3212 	// find the root cache (the transitive source)
3213 	while (root->source != NULL)
3214 		root = root->source;
3215 
3216 	dump_cache_tree_recursively(root, 0, cache);
3217 
3218 	return 0;
3219 }
3220 
3221 
3222 const char*
3223 vm_cache_type_to_string(int32 type)
3224 {
3225 	switch (type) {
3226 		case CACHE_TYPE_RAM:
3227 			return "RAM";
3228 		case CACHE_TYPE_DEVICE:
3229 			return "device";
3230 		case CACHE_TYPE_VNODE:
3231 			return "vnode";
3232 		case CACHE_TYPE_NULL:
3233 			return "null";
3234 
3235 		default:
3236 			return "unknown";
3237 	}
3238 }
3239 
3240 
3241 #if DEBUG_CACHE_LIST
3242 
3243 static void
3244 update_cache_info_recursively(VMCache* cache, cache_info& info)
3245 {
3246 	info.page_count += cache->page_count;
3247 	if (cache->type == CACHE_TYPE_RAM)
3248 		info.committed += cache->committed_size;
3249 
3250 	// recurse
3251 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3252 			VMCache* consumer = it.Next();) {
3253 		update_cache_info_recursively(consumer, info);
3254 	}
3255 }
3256 
3257 
3258 static int
3259 cache_info_compare_page_count(const void* _a, const void* _b)
3260 {
3261 	const cache_info* a = (const cache_info*)_a;
3262 	const cache_info* b = (const cache_info*)_b;
3263 	if (a->page_count == b->page_count)
3264 		return 0;
3265 	return a->page_count < b->page_count ? 1 : -1;
3266 }
3267 
3268 
3269 static int
3270 cache_info_compare_committed(const void* _a, const void* _b)
3271 {
3272 	const cache_info* a = (const cache_info*)_a;
3273 	const cache_info* b = (const cache_info*)_b;
3274 	if (a->committed == b->committed)
3275 		return 0;
3276 	return a->committed < b->committed ? 1 : -1;
3277 }
3278 
3279 
3280 static void
3281 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3282 {
3283 	for (int i = 0; i < level; i++)
3284 		kprintf("  ");
3285 
3286 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3287 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3288 		cache->virtual_base, cache->virtual_end, cache->page_count);
3289 
3290 	if (level == 0)
3291 		kprintf("/%lu", info.page_count);
3292 
3293 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3294 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3295 
3296 		if (level == 0)
3297 			kprintf("/%lu", info.committed);
3298 	}
3299 
3300 	// areas
3301 	if (cache->areas != NULL) {
3302 		VMArea* area = cache->areas;
3303 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3304 			area->name, area->address_space->ID());
3305 
3306 		while (area->cache_next != NULL) {
3307 			area = area->cache_next;
3308 			kprintf(", %" B_PRId32, area->id);
3309 		}
3310 	}
3311 
3312 	kputs("\n");
3313 
3314 	// recurse
3315 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3316 			VMCache* consumer = it.Next();) {
3317 		dump_caches_recursively(consumer, info, level + 1);
3318 	}
3319 }
3320 
3321 
3322 static int
3323 dump_caches(int argc, char** argv)
3324 {
3325 	if (sCacheInfoTable == NULL) {
3326 		kprintf("No cache info table!\n");
3327 		return 0;
3328 	}
3329 
3330 	bool sortByPageCount = true;
3331 
3332 	for (int32 i = 1; i < argc; i++) {
3333 		if (strcmp(argv[i], "-c") == 0) {
3334 			sortByPageCount = false;
3335 		} else {
3336 			print_debugger_command_usage(argv[0]);
3337 			return 0;
3338 		}
3339 	}
3340 
3341 	uint32 totalCount = 0;
3342 	uint32 rootCount = 0;
3343 	off_t totalCommitted = 0;
3344 	page_num_t totalPages = 0;
3345 
3346 	VMCache* cache = gDebugCacheList;
3347 	while (cache) {
3348 		totalCount++;
3349 		if (cache->source == NULL) {
3350 			cache_info stackInfo;
3351 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3352 				? sCacheInfoTable[rootCount] : stackInfo;
3353 			rootCount++;
3354 			info.cache = cache;
3355 			info.page_count = 0;
3356 			info.committed = 0;
3357 			update_cache_info_recursively(cache, info);
3358 			totalCommitted += info.committed;
3359 			totalPages += info.page_count;
3360 		}
3361 
3362 		cache = cache->debug_next;
3363 	}
3364 
3365 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3366 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3367 			sortByPageCount
3368 				? &cache_info_compare_page_count
3369 				: &cache_info_compare_committed);
3370 	}
3371 
3372 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3373 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3374 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3375 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3376 			"page count" : "committed size");
3377 
3378 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3379 		for (uint32 i = 0; i < rootCount; i++) {
3380 			cache_info& info = sCacheInfoTable[i];
3381 			dump_caches_recursively(info.cache, info, 0);
3382 		}
3383 	} else
3384 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3385 
3386 	return 0;
3387 }
3388 
3389 #endif	// DEBUG_CACHE_LIST
3390 
3391 
3392 static int
3393 dump_cache(int argc, char** argv)
3394 {
3395 	VMCache* cache;
3396 	bool showPages = false;
3397 	int i = 1;
3398 
3399 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3400 		kprintf("usage: %s [-ps] <address>\n"
3401 			"  if -p is specified, all pages are shown, if -s is used\n"
3402 			"  only the cache info is shown respectively.\n", argv[0]);
3403 		return 0;
3404 	}
3405 	while (argv[i][0] == '-') {
3406 		char* arg = argv[i] + 1;
3407 		while (arg[0]) {
3408 			if (arg[0] == 'p')
3409 				showPages = true;
3410 			arg++;
3411 		}
3412 		i++;
3413 	}
3414 	if (argv[i] == NULL) {
3415 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3416 		return 0;
3417 	}
3418 
3419 	addr_t address = parse_expression(argv[i]);
3420 	if (address == 0)
3421 		return 0;
3422 
3423 	cache = (VMCache*)address;
3424 
3425 	cache->Dump(showPages);
3426 
3427 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3428 
3429 	return 0;
3430 }
3431 
3432 
3433 static void
3434 dump_area_struct(VMArea* area, bool mappings)
3435 {
3436 	kprintf("AREA: %p\n", area);
3437 	kprintf("name:\t\t'%s'\n", area->name);
3438 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3439 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3440 	kprintf("base:\t\t0x%lx\n", area->Base());
3441 	kprintf("size:\t\t0x%lx\n", area->Size());
3442 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3443 	kprintf("page_protection:%p\n", area->page_protections);
3444 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3445 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3446 	kprintf("cache:\t\t%p\n", area->cache);
3447 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3448 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3449 	kprintf("cache_next:\t%p\n", area->cache_next);
3450 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3451 
3452 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3453 	if (mappings) {
3454 		kprintf("page mappings:\n");
3455 		while (iterator.HasNext()) {
3456 			vm_page_mapping* mapping = iterator.Next();
3457 			kprintf("  %p", mapping->page);
3458 		}
3459 		kprintf("\n");
3460 	} else {
3461 		uint32 count = 0;
3462 		while (iterator.Next() != NULL) {
3463 			count++;
3464 		}
3465 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3466 	}
3467 }
3468 
3469 
3470 static int
3471 dump_area(int argc, char** argv)
3472 {
3473 	bool mappings = false;
3474 	bool found = false;
3475 	int32 index = 1;
3476 	VMArea* area;
3477 	addr_t num;
3478 
3479 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3480 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3481 			"All areas matching either id/address/name are listed. You can\n"
3482 			"force to check only a specific item by prefixing the specifier\n"
3483 			"with the id/contains/address/name keywords.\n"
3484 			"-m shows the area's mappings as well.\n");
3485 		return 0;
3486 	}
3487 
3488 	if (!strcmp(argv[1], "-m")) {
3489 		mappings = true;
3490 		index++;
3491 	}
3492 
3493 	int32 mode = 0xf;
3494 	if (!strcmp(argv[index], "id"))
3495 		mode = 1;
3496 	else if (!strcmp(argv[index], "contains"))
3497 		mode = 2;
3498 	else if (!strcmp(argv[index], "name"))
3499 		mode = 4;
3500 	else if (!strcmp(argv[index], "address"))
3501 		mode = 0;
3502 	if (mode != 0xf)
3503 		index++;
3504 
3505 	if (index >= argc) {
3506 		kprintf("No area specifier given.\n");
3507 		return 0;
3508 	}
3509 
3510 	num = parse_expression(argv[index]);
3511 
3512 	if (mode == 0) {
3513 		dump_area_struct((struct VMArea*)num, mappings);
3514 	} else {
3515 		// walk through the area list, looking for the arguments as a name
3516 
3517 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3518 		while ((area = it.Next()) != NULL) {
3519 			if (((mode & 4) != 0
3520 					&& !strcmp(argv[index], area->name))
3521 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3522 					|| (((mode & 2) != 0 && area->Base() <= num
3523 						&& area->Base() + area->Size() > num))))) {
3524 				dump_area_struct(area, mappings);
3525 				found = true;
3526 			}
3527 		}
3528 
3529 		if (!found)
3530 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3531 	}
3532 
3533 	return 0;
3534 }
3535 
3536 
3537 static int
3538 dump_area_list(int argc, char** argv)
3539 {
3540 	VMArea* area;
3541 	const char* name = NULL;
3542 	int32 id = 0;
3543 
3544 	if (argc > 1) {
3545 		id = parse_expression(argv[1]);
3546 		if (id == 0)
3547 			name = argv[1];
3548 	}
3549 
3550 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3551 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3552 		B_PRINTF_POINTER_WIDTH, "size");
3553 
3554 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3555 	while ((area = it.Next()) != NULL) {
3556 		if ((id != 0 && area->address_space->ID() != id)
3557 			|| (name != NULL && strstr(area->name, name) == NULL))
3558 			continue;
3559 
3560 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3561 			area->id, (void*)area->Base(), (void*)area->Size(),
3562 			area->protection, area->wiring, area->name);
3563 	}
3564 	return 0;
3565 }
3566 
3567 
3568 static int
3569 dump_available_memory(int argc, char** argv)
3570 {
3571 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3572 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3573 	return 0;
3574 }
3575 
3576 
3577 static int
3578 dump_mapping_info(int argc, char** argv)
3579 {
3580 	bool reverseLookup = false;
3581 	bool pageLookup = false;
3582 
3583 	int argi = 1;
3584 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3585 		const char* arg = argv[argi];
3586 		if (strcmp(arg, "-r") == 0) {
3587 			reverseLookup = true;
3588 		} else if (strcmp(arg, "-p") == 0) {
3589 			reverseLookup = true;
3590 			pageLookup = true;
3591 		} else {
3592 			print_debugger_command_usage(argv[0]);
3593 			return 0;
3594 		}
3595 	}
3596 
3597 	// We need at least one argument, the address. Optionally a thread ID can be
3598 	// specified.
3599 	if (argi >= argc || argi + 2 < argc) {
3600 		print_debugger_command_usage(argv[0]);
3601 		return 0;
3602 	}
3603 
3604 	uint64 addressValue;
3605 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3606 		return 0;
3607 
3608 	Team* team = NULL;
3609 	if (argi < argc) {
3610 		uint64 threadID;
3611 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3612 			return 0;
3613 
3614 		Thread* thread = Thread::GetDebug(threadID);
3615 		if (thread == NULL) {
3616 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3617 			return 0;
3618 		}
3619 
3620 		team = thread->team;
3621 	}
3622 
3623 	if (reverseLookup) {
3624 		phys_addr_t physicalAddress;
3625 		if (pageLookup) {
3626 			vm_page* page = (vm_page*)(addr_t)addressValue;
3627 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3628 		} else {
3629 			physicalAddress = (phys_addr_t)addressValue;
3630 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3631 		}
3632 
3633 		kprintf("    Team     Virtual Address      Area\n");
3634 		kprintf("--------------------------------------\n");
3635 
3636 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3637 			Callback()
3638 				:
3639 				fAddressSpace(NULL)
3640 			{
3641 			}
3642 
3643 			void SetAddressSpace(VMAddressSpace* addressSpace)
3644 			{
3645 				fAddressSpace = addressSpace;
3646 			}
3647 
3648 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3649 			{
3650 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3651 					virtualAddress);
3652 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3653 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3654 				else
3655 					kprintf("\n");
3656 				return false;
3657 			}
3658 
3659 		private:
3660 			VMAddressSpace*	fAddressSpace;
3661 		} callback;
3662 
3663 		if (team != NULL) {
3664 			// team specified -- get its address space
3665 			VMAddressSpace* addressSpace = team->address_space;
3666 			if (addressSpace == NULL) {
3667 				kprintf("Failed to get address space!\n");
3668 				return 0;
3669 			}
3670 
3671 			callback.SetAddressSpace(addressSpace);
3672 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3673 				physicalAddress, callback);
3674 		} else {
3675 			// no team specified -- iterate through all address spaces
3676 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3677 				addressSpace != NULL;
3678 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3679 				callback.SetAddressSpace(addressSpace);
3680 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3681 					physicalAddress, callback);
3682 			}
3683 		}
3684 	} else {
3685 		// get the address space
3686 		addr_t virtualAddress = (addr_t)addressValue;
3687 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3688 		VMAddressSpace* addressSpace;
3689 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3690 			addressSpace = VMAddressSpace::Kernel();
3691 		} else if (team != NULL) {
3692 			addressSpace = team->address_space;
3693 		} else {
3694 			Thread* thread = debug_get_debugged_thread();
3695 			if (thread == NULL || thread->team == NULL) {
3696 				kprintf("Failed to get team!\n");
3697 				return 0;
3698 			}
3699 
3700 			addressSpace = thread->team->address_space;
3701 		}
3702 
3703 		if (addressSpace == NULL) {
3704 			kprintf("Failed to get address space!\n");
3705 			return 0;
3706 		}
3707 
3708 		// let the translation map implementation do the job
3709 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3710 	}
3711 
3712 	return 0;
3713 }
3714 
3715 
3716 /*!	Deletes all areas and reserved regions in the given address space.
3717 
3718 	The caller must ensure that none of the areas has any wired ranges.
3719 
3720 	\param addressSpace The address space.
3721 	\param deletingAddressSpace \c true, if the address space is in the process
3722 		of being deleted.
3723 */
3724 void
3725 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3726 {
3727 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3728 		addressSpace->ID()));
3729 
3730 	addressSpace->WriteLock();
3731 
3732 	// remove all reserved areas in this address space
3733 	addressSpace->UnreserveAllAddressRanges(0);
3734 
3735 	// delete all the areas in this address space
3736 	while (VMArea* area = addressSpace->FirstArea()) {
3737 		ASSERT(!area->IsWired());
3738 		delete_area(addressSpace, area, deletingAddressSpace);
3739 	}
3740 
3741 	addressSpace->WriteUnlock();
3742 }
3743 
3744 
3745 static area_id
3746 vm_area_for(addr_t address, bool kernel)
3747 {
3748 	team_id team;
3749 	if (IS_USER_ADDRESS(address)) {
3750 		// we try the user team address space, if any
3751 		team = VMAddressSpace::CurrentID();
3752 		if (team < 0)
3753 			return team;
3754 	} else
3755 		team = VMAddressSpace::KernelID();
3756 
3757 	AddressSpaceReadLocker locker(team);
3758 	if (!locker.IsLocked())
3759 		return B_BAD_TEAM_ID;
3760 
3761 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3762 	if (area != NULL) {
3763 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3764 			return B_ERROR;
3765 
3766 		return area->id;
3767 	}
3768 
3769 	return B_ERROR;
3770 }
3771 
3772 
3773 /*!	Frees physical pages that were used during the boot process.
3774 	\a end is inclusive.
3775 */
3776 static void
3777 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3778 {
3779 	// free all physical pages in the specified range
3780 
3781 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3782 		phys_addr_t physicalAddress;
3783 		uint32 flags;
3784 
3785 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3786 			&& (flags & PAGE_PRESENT) != 0) {
3787 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3788 			if (page != NULL && page->State() != PAGE_STATE_FREE
3789 					&& page->State() != PAGE_STATE_CLEAR
3790 					&& page->State() != PAGE_STATE_UNUSED) {
3791 				DEBUG_PAGE_ACCESS_START(page);
3792 				vm_page_set_state(page, PAGE_STATE_FREE);
3793 			}
3794 		}
3795 	}
3796 
3797 	// unmap the memory
3798 	map->Unmap(start, end);
3799 }
3800 
3801 
3802 void
3803 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3804 {
3805 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3806 	addr_t end = start + (size - 1);
3807 	addr_t lastEnd = start;
3808 
3809 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3810 		(void*)start, (void*)end));
3811 
3812 	// The areas are sorted in virtual address space order, so
3813 	// we just have to find the holes between them that fall
3814 	// into the area we should dispose
3815 
3816 	map->Lock();
3817 
3818 	for (VMAddressSpace::AreaIterator it
3819 				= VMAddressSpace::Kernel()->GetAreaIterator();
3820 			VMArea* area = it.Next();) {
3821 		addr_t areaStart = area->Base();
3822 		addr_t areaEnd = areaStart + (area->Size() - 1);
3823 
3824 		if (areaEnd < start)
3825 			continue;
3826 
3827 		if (areaStart > end) {
3828 			// we are done, the area is already beyond of what we have to free
3829 			break;
3830 		}
3831 
3832 		if (areaStart > lastEnd) {
3833 			// this is something we can free
3834 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3835 				(void*)areaStart));
3836 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3837 		}
3838 
3839 		if (areaEnd >= end) {
3840 			lastEnd = areaEnd;
3841 				// no +1 to prevent potential overflow
3842 			break;
3843 		}
3844 
3845 		lastEnd = areaEnd + 1;
3846 	}
3847 
3848 	if (lastEnd < end) {
3849 		// we can also get rid of some space at the end of the area
3850 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3851 			(void*)end));
3852 		unmap_and_free_physical_pages(map, lastEnd, end);
3853 	}
3854 
3855 	map->Unlock();
3856 }
3857 
3858 
3859 static void
3860 create_preloaded_image_areas(struct preloaded_image* _image)
3861 {
3862 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3863 	char name[B_OS_NAME_LENGTH];
3864 	void* address;
3865 	int32 length;
3866 
3867 	// use file name to create a good area name
3868 	char* fileName = strrchr(image->name, '/');
3869 	if (fileName == NULL)
3870 		fileName = image->name;
3871 	else
3872 		fileName++;
3873 
3874 	length = strlen(fileName);
3875 	// make sure there is enough space for the suffix
3876 	if (length > 25)
3877 		length = 25;
3878 
3879 	memcpy(name, fileName, length);
3880 	strcpy(name + length, "_text");
3881 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3882 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3883 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3884 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3885 		// this will later be remapped read-only/executable by the
3886 		// ELF initialization code
3887 
3888 	strcpy(name + length, "_data");
3889 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3890 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3891 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3892 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3893 }
3894 
3895 
3896 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3897 	Any boot loader resources contained in that arguments must not be accessed
3898 	anymore past this point.
3899 */
3900 void
3901 vm_free_kernel_args(kernel_args* args)
3902 {
3903 	uint32 i;
3904 
3905 	TRACE(("vm_free_kernel_args()\n"));
3906 
3907 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3908 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3909 		if (area >= B_OK)
3910 			delete_area(area);
3911 	}
3912 }
3913 
3914 
3915 static void
3916 allocate_kernel_args(kernel_args* args)
3917 {
3918 	TRACE(("allocate_kernel_args()\n"));
3919 
3920 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3921 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3922 
3923 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3924 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3925 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3926 	}
3927 }
3928 
3929 
3930 static void
3931 unreserve_boot_loader_ranges(kernel_args* args)
3932 {
3933 	TRACE(("unreserve_boot_loader_ranges()\n"));
3934 
3935 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3936 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3937 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3938 			args->virtual_allocated_range[i].size);
3939 	}
3940 }
3941 
3942 
3943 static void
3944 reserve_boot_loader_ranges(kernel_args* args)
3945 {
3946 	TRACE(("reserve_boot_loader_ranges()\n"));
3947 
3948 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3949 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3950 
3951 		// If the address is no kernel address, we just skip it. The
3952 		// architecture specific code has to deal with it.
3953 		if (!IS_KERNEL_ADDRESS(address)) {
3954 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3955 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3956 			continue;
3957 		}
3958 
3959 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3960 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3961 		if (status < B_OK)
3962 			panic("could not reserve boot loader ranges\n");
3963 	}
3964 }
3965 
3966 
3967 static addr_t
3968 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3969 {
3970 	size = PAGE_ALIGN(size);
3971 
3972 	// find a slot in the virtual allocation addr range
3973 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3974 		// check to see if the space between this one and the last is big enough
3975 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3976 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3977 			+ args->virtual_allocated_range[i - 1].size;
3978 
3979 		addr_t base = alignment > 0
3980 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3981 
3982 		if (base >= KERNEL_BASE && base < rangeStart
3983 				&& rangeStart - base >= size) {
3984 			args->virtual_allocated_range[i - 1].size
3985 				+= base + size - previousRangeEnd;
3986 			return base;
3987 		}
3988 	}
3989 
3990 	// we hadn't found one between allocation ranges. this is ok.
3991 	// see if there's a gap after the last one
3992 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3993 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3994 		+ args->virtual_allocated_range[lastEntryIndex].size;
3995 	addr_t base = alignment > 0
3996 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3997 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3998 		args->virtual_allocated_range[lastEntryIndex].size
3999 			+= base + size - lastRangeEnd;
4000 		return base;
4001 	}
4002 
4003 	// see if there's a gap before the first one
4004 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4005 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4006 		base = rangeStart - size;
4007 		if (alignment > 0)
4008 			base = ROUNDDOWN(base, alignment);
4009 
4010 		if (base >= KERNEL_BASE) {
4011 			args->virtual_allocated_range[0].start = base;
4012 			args->virtual_allocated_range[0].size += rangeStart - base;
4013 			return base;
4014 		}
4015 	}
4016 
4017 	return 0;
4018 }
4019 
4020 
4021 static bool
4022 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4023 {
4024 	// TODO: horrible brute-force method of determining if the page can be
4025 	// allocated
4026 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4027 		if (address >= args->physical_memory_range[i].start
4028 			&& address < args->physical_memory_range[i].start
4029 				+ args->physical_memory_range[i].size)
4030 			return true;
4031 	}
4032 	return false;
4033 }
4034 
4035 
4036 page_num_t
4037 vm_allocate_early_physical_page(kernel_args* args)
4038 {
4039 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4040 		phys_addr_t nextPage;
4041 
4042 		nextPage = args->physical_allocated_range[i].start
4043 			+ args->physical_allocated_range[i].size;
4044 		// see if the page after the next allocated paddr run can be allocated
4045 		if (i + 1 < args->num_physical_allocated_ranges
4046 			&& args->physical_allocated_range[i + 1].size != 0) {
4047 			// see if the next page will collide with the next allocated range
4048 			if (nextPage >= args->physical_allocated_range[i+1].start)
4049 				continue;
4050 		}
4051 		// see if the next physical page fits in the memory block
4052 		if (is_page_in_physical_memory_range(args, nextPage)) {
4053 			// we got one!
4054 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4055 			return nextPage / B_PAGE_SIZE;
4056 		}
4057 	}
4058 
4059 	// Expanding upwards didn't work, try going downwards.
4060 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4061 		phys_addr_t nextPage;
4062 
4063 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4064 		// see if the page after the prev allocated paddr run can be allocated
4065 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4066 			// see if the next page will collide with the next allocated range
4067 			if (nextPage < args->physical_allocated_range[i-1].start
4068 				+ args->physical_allocated_range[i-1].size)
4069 				continue;
4070 		}
4071 		// see if the next physical page fits in the memory block
4072 		if (is_page_in_physical_memory_range(args, nextPage)) {
4073 			// we got one!
4074 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4075 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4076 			return nextPage / B_PAGE_SIZE;
4077 		}
4078 	}
4079 
4080 	return 0;
4081 		// could not allocate a block
4082 }
4083 
4084 
4085 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4086 	allocate some pages before the VM is completely up.
4087 */
4088 addr_t
4089 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4090 	uint32 attributes, addr_t alignment)
4091 {
4092 	if (physicalSize > virtualSize)
4093 		physicalSize = virtualSize;
4094 
4095 	// find the vaddr to allocate at
4096 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4097 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4098 	if (virtualBase == 0) {
4099 		panic("vm_allocate_early: could not allocate virtual address\n");
4100 		return 0;
4101 	}
4102 
4103 	// map the pages
4104 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4105 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4106 		if (physicalAddress == 0)
4107 			panic("error allocating early page!\n");
4108 
4109 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4110 
4111 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4112 			physicalAddress * B_PAGE_SIZE, attributes,
4113 			&vm_allocate_early_physical_page);
4114 	}
4115 
4116 	return virtualBase;
4117 }
4118 
4119 
4120 /*!	The main entrance point to initialize the VM. */
4121 status_t
4122 vm_init(kernel_args* args)
4123 {
4124 	struct preloaded_image* image;
4125 	void* address;
4126 	status_t err = 0;
4127 	uint32 i;
4128 
4129 	TRACE(("vm_init: entry\n"));
4130 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4131 	err = arch_vm_init(args);
4132 
4133 	// initialize some globals
4134 	vm_page_init_num_pages(args);
4135 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4136 
4137 	slab_init(args);
4138 
4139 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4140 	off_t heapSize = INITIAL_HEAP_SIZE;
4141 	// try to accomodate low memory systems
4142 	while (heapSize > sAvailableMemory / 8)
4143 		heapSize /= 2;
4144 	if (heapSize < 1024 * 1024)
4145 		panic("vm_init: go buy some RAM please.");
4146 
4147 	// map in the new heap and initialize it
4148 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4149 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4150 	TRACE(("heap at 0x%lx\n", heapBase));
4151 	heap_init(heapBase, heapSize);
4152 #endif
4153 
4154 	// initialize the free page list and physical page mapper
4155 	vm_page_init(args);
4156 
4157 	// initialize the cache allocators
4158 	vm_cache_init(args);
4159 
4160 	{
4161 		status_t error = VMAreaHash::Init();
4162 		if (error != B_OK)
4163 			panic("vm_init: error initializing area hash table\n");
4164 	}
4165 
4166 	VMAddressSpace::Init();
4167 	reserve_boot_loader_ranges(args);
4168 
4169 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4170 	heap_init_post_area();
4171 #endif
4172 
4173 	// Do any further initialization that the architecture dependant layers may
4174 	// need now
4175 	arch_vm_translation_map_init_post_area(args);
4176 	arch_vm_init_post_area(args);
4177 	vm_page_init_post_area(args);
4178 	slab_init_post_area();
4179 
4180 	// allocate areas to represent stuff that already exists
4181 
4182 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4183 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4184 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4185 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4186 #endif
4187 
4188 	allocate_kernel_args(args);
4189 
4190 	create_preloaded_image_areas(args->kernel_image);
4191 
4192 	// allocate areas for preloaded images
4193 	for (image = args->preloaded_images; image != NULL; image = image->next)
4194 		create_preloaded_image_areas(image);
4195 
4196 	// allocate kernel stacks
4197 	for (i = 0; i < args->num_cpus; i++) {
4198 		char name[64];
4199 
4200 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4201 		address = (void*)args->cpu_kstack[i].start;
4202 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4203 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4204 	}
4205 
4206 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4207 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4208 
4209 #if PARANOID_KERNEL_MALLOC
4210 	vm_block_address_range("uninitialized heap memory",
4211 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4212 #endif
4213 #if PARANOID_KERNEL_FREE
4214 	vm_block_address_range("freed heap memory",
4215 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4216 #endif
4217 
4218 	// create the object cache for the page mappings
4219 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4220 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4221 		NULL, NULL);
4222 	if (gPageMappingsObjectCache == NULL)
4223 		panic("failed to create page mappings object cache");
4224 
4225 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4226 
4227 #if DEBUG_CACHE_LIST
4228 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4229 		virtual_address_restrictions virtualRestrictions = {};
4230 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4231 		physical_address_restrictions physicalRestrictions = {};
4232 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4233 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4234 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4235 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4236 			&physicalRestrictions, (void**)&sCacheInfoTable);
4237 	}
4238 #endif	// DEBUG_CACHE_LIST
4239 
4240 	// add some debugger commands
4241 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4242 	add_debugger_command("area", &dump_area,
4243 		"Dump info about a particular area");
4244 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4245 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4246 #if DEBUG_CACHE_LIST
4247 	if (sCacheInfoTable != NULL) {
4248 		add_debugger_command_etc("caches", &dump_caches,
4249 			"List all VMCache trees",
4250 			"[ \"-c\" ]\n"
4251 			"All cache trees are listed sorted in decreasing order by number "
4252 				"of\n"
4253 			"used pages or, if \"-c\" is specified, by size of committed "
4254 				"memory.\n",
4255 			0);
4256 	}
4257 #endif
4258 	add_debugger_command("avail", &dump_available_memory,
4259 		"Dump available memory");
4260 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4261 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4262 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4263 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4264 	add_debugger_command("string", &display_mem, "dump strings");
4265 
4266 	add_debugger_command_etc("mapping", &dump_mapping_info,
4267 		"Print address mapping information",
4268 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4269 		"Prints low-level page mapping information for a given address. If\n"
4270 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4271 		"address that is looked up in the translation map of the current\n"
4272 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4273 		"\"-r\" is specified, <address> is a physical address that is\n"
4274 		"searched in the translation map of all teams, respectively the team\n"
4275 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4276 		"<address> is the address of a vm_page structure. The behavior is\n"
4277 		"equivalent to specifying \"-r\" with the physical address of that\n"
4278 		"page.\n",
4279 		0);
4280 
4281 	TRACE(("vm_init: exit\n"));
4282 
4283 	vm_cache_init_post_heap();
4284 
4285 	return err;
4286 }
4287 
4288 
4289 status_t
4290 vm_init_post_sem(kernel_args* args)
4291 {
4292 	// This frees all unused boot loader resources and makes its space available
4293 	// again
4294 	arch_vm_init_end(args);
4295 	unreserve_boot_loader_ranges(args);
4296 
4297 	// fill in all of the semaphores that were not allocated before
4298 	// since we're still single threaded and only the kernel address space
4299 	// exists, it isn't that hard to find all of the ones we need to create
4300 
4301 	arch_vm_translation_map_init_post_sem(args);
4302 
4303 	slab_init_post_sem();
4304 
4305 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4306 	heap_init_post_sem();
4307 #endif
4308 
4309 	return B_OK;
4310 }
4311 
4312 
4313 status_t
4314 vm_init_post_thread(kernel_args* args)
4315 {
4316 	vm_page_init_post_thread(args);
4317 	slab_init_post_thread();
4318 	return heap_init_post_thread();
4319 }
4320 
4321 
4322 status_t
4323 vm_init_post_modules(kernel_args* args)
4324 {
4325 	return arch_vm_init_post_modules(args);
4326 }
4327 
4328 
4329 void
4330 permit_page_faults(void)
4331 {
4332 	Thread* thread = thread_get_current_thread();
4333 	if (thread != NULL)
4334 		atomic_add(&thread->page_faults_allowed, 1);
4335 }
4336 
4337 
4338 void
4339 forbid_page_faults(void)
4340 {
4341 	Thread* thread = thread_get_current_thread();
4342 	if (thread != NULL)
4343 		atomic_add(&thread->page_faults_allowed, -1);
4344 }
4345 
4346 
4347 status_t
4348 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4349 	bool isUser, addr_t* newIP)
4350 {
4351 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4352 		faultAddress));
4353 
4354 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4355 
4356 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4357 	VMAddressSpace* addressSpace = NULL;
4358 
4359 	status_t status = B_OK;
4360 	*newIP = 0;
4361 	atomic_add((int32*)&sPageFaults, 1);
4362 
4363 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4364 		addressSpace = VMAddressSpace::GetKernel();
4365 	} else if (IS_USER_ADDRESS(pageAddress)) {
4366 		addressSpace = VMAddressSpace::GetCurrent();
4367 		if (addressSpace == NULL) {
4368 			if (!isUser) {
4369 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4370 					"memory!\n");
4371 				status = B_BAD_ADDRESS;
4372 				TPF(PageFaultError(-1,
4373 					VMPageFaultTracing
4374 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4375 			} else {
4376 				// XXX weird state.
4377 				panic("vm_page_fault: non kernel thread accessing user memory "
4378 					"that doesn't exist!\n");
4379 				status = B_BAD_ADDRESS;
4380 			}
4381 		}
4382 	} else {
4383 		// the hit was probably in the 64k DMZ between kernel and user space
4384 		// this keeps a user space thread from passing a buffer that crosses
4385 		// into kernel space
4386 		status = B_BAD_ADDRESS;
4387 		TPF(PageFaultError(-1,
4388 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4389 	}
4390 
4391 	if (status == B_OK) {
4392 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4393 			isUser, NULL);
4394 	}
4395 
4396 	if (status < B_OK) {
4397 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4398 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4399 			strerror(status), address, faultAddress, isWrite, isUser,
4400 			thread_get_current_thread_id());
4401 		if (!isUser) {
4402 			Thread* thread = thread_get_current_thread();
4403 			if (thread != NULL && thread->fault_handler != 0) {
4404 				// this will cause the arch dependant page fault handler to
4405 				// modify the IP on the interrupt frame or whatever to return
4406 				// to this address
4407 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4408 			} else {
4409 				// unhandled page fault in the kernel
4410 				panic("vm_page_fault: unhandled page fault in kernel space at "
4411 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4412 			}
4413 		} else {
4414 			Thread* thread = thread_get_current_thread();
4415 
4416 #ifdef TRACE_FAULTS
4417 			VMArea* area = NULL;
4418 			if (addressSpace != NULL) {
4419 				addressSpace->ReadLock();
4420 				area = addressSpace->LookupArea(faultAddress);
4421 			}
4422 
4423 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4424 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4425 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4426 				thread->team->Name(), thread->team->id,
4427 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4428 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4429 					area->Base() : 0x0));
4430 
4431 			if (addressSpace != NULL)
4432 				addressSpace->ReadUnlock();
4433 #endif
4434 
4435 			// If the thread has a signal handler for SIGSEGV, we simply
4436 			// send it the signal. Otherwise we notify the user debugger
4437 			// first.
4438 			struct sigaction action;
4439 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4440 					&& action.sa_handler != SIG_DFL
4441 					&& action.sa_handler != SIG_IGN)
4442 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4443 					SIGSEGV)) {
4444 				Signal signal(SIGSEGV,
4445 					status == B_PERMISSION_DENIED
4446 						? SEGV_ACCERR : SEGV_MAPERR,
4447 					EFAULT, thread->team->id);
4448 				signal.SetAddress((void*)address);
4449 				send_signal_to_thread(thread, signal, 0);
4450 			}
4451 		}
4452 	}
4453 
4454 	if (addressSpace != NULL)
4455 		addressSpace->Put();
4456 
4457 	return B_HANDLED_INTERRUPT;
4458 }
4459 
4460 
4461 struct PageFaultContext {
4462 	AddressSpaceReadLocker	addressSpaceLocker;
4463 	VMCacheChainLocker		cacheChainLocker;
4464 
4465 	VMTranslationMap*		map;
4466 	VMCache*				topCache;
4467 	off_t					cacheOffset;
4468 	vm_page_reservation		reservation;
4469 	bool					isWrite;
4470 
4471 	// return values
4472 	vm_page*				page;
4473 	bool					restart;
4474 	bool					pageAllocated;
4475 
4476 
4477 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4478 		:
4479 		addressSpaceLocker(addressSpace, true),
4480 		map(addressSpace->TranslationMap()),
4481 		isWrite(isWrite)
4482 	{
4483 	}
4484 
4485 	~PageFaultContext()
4486 	{
4487 		UnlockAll();
4488 		vm_page_unreserve_pages(&reservation);
4489 	}
4490 
4491 	void Prepare(VMCache* topCache, off_t cacheOffset)
4492 	{
4493 		this->topCache = topCache;
4494 		this->cacheOffset = cacheOffset;
4495 		page = NULL;
4496 		restart = false;
4497 		pageAllocated = false;
4498 
4499 		cacheChainLocker.SetTo(topCache);
4500 	}
4501 
4502 	void UnlockAll(VMCache* exceptCache = NULL)
4503 	{
4504 		topCache = NULL;
4505 		addressSpaceLocker.Unlock();
4506 		cacheChainLocker.Unlock(exceptCache);
4507 	}
4508 };
4509 
4510 
4511 /*!	Gets the page that should be mapped into the area.
4512 	Returns an error code other than \c B_OK, if the page couldn't be found or
4513 	paged in. The locking state of the address space and the caches is undefined
4514 	in that case.
4515 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4516 	had to unlock the address space and all caches and is supposed to be called
4517 	again.
4518 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4519 	found. It is returned in \c context.page. The address space will still be
4520 	locked as well as all caches starting from the top cache to at least the
4521 	cache the page lives in.
4522 */
4523 static status_t
4524 fault_get_page(PageFaultContext& context)
4525 {
4526 	VMCache* cache = context.topCache;
4527 	VMCache* lastCache = NULL;
4528 	vm_page* page = NULL;
4529 
4530 	while (cache != NULL) {
4531 		// We already hold the lock of the cache at this point.
4532 
4533 		lastCache = cache;
4534 
4535 		page = cache->LookupPage(context.cacheOffset);
4536 		if (page != NULL && page->busy) {
4537 			// page must be busy -- wait for it to become unbusy
4538 			context.UnlockAll(cache);
4539 			cache->ReleaseRefLocked();
4540 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4541 
4542 			// restart the whole process
4543 			context.restart = true;
4544 			return B_OK;
4545 		}
4546 
4547 		if (page != NULL)
4548 			break;
4549 
4550 		// The current cache does not contain the page we're looking for.
4551 
4552 		// see if the backing store has it
4553 		if (cache->HasPage(context.cacheOffset)) {
4554 			// insert a fresh page and mark it busy -- we're going to read it in
4555 			page = vm_page_allocate_page(&context.reservation,
4556 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4557 			cache->InsertPage(page, context.cacheOffset);
4558 
4559 			// We need to unlock all caches and the address space while reading
4560 			// the page in. Keep a reference to the cache around.
4561 			cache->AcquireRefLocked();
4562 			context.UnlockAll();
4563 
4564 			// read the page in
4565 			generic_io_vec vec;
4566 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4567 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4568 
4569 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4570 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4571 
4572 			cache->Lock();
4573 
4574 			if (status < B_OK) {
4575 				// on error remove and free the page
4576 				dprintf("reading page from cache %p returned: %s!\n",
4577 					cache, strerror(status));
4578 
4579 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4580 				cache->RemovePage(page);
4581 				vm_page_set_state(page, PAGE_STATE_FREE);
4582 
4583 				cache->ReleaseRefAndUnlock();
4584 				return status;
4585 			}
4586 
4587 			// mark the page unbusy again
4588 			cache->MarkPageUnbusy(page);
4589 
4590 			DEBUG_PAGE_ACCESS_END(page);
4591 
4592 			// Since we needed to unlock everything temporarily, the area
4593 			// situation might have changed. So we need to restart the whole
4594 			// process.
4595 			cache->ReleaseRefAndUnlock();
4596 			context.restart = true;
4597 			return B_OK;
4598 		}
4599 
4600 		cache = context.cacheChainLocker.LockSourceCache();
4601 	}
4602 
4603 	if (page == NULL) {
4604 		// There was no adequate page, determine the cache for a clean one.
4605 		// Read-only pages come in the deepest cache, only the top most cache
4606 		// may have direct write access.
4607 		cache = context.isWrite ? context.topCache : lastCache;
4608 
4609 		// allocate a clean page
4610 		page = vm_page_allocate_page(&context.reservation,
4611 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4612 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4613 			page->physical_page_number));
4614 
4615 		// insert the new page into our cache
4616 		cache->InsertPage(page, context.cacheOffset);
4617 		context.pageAllocated = true;
4618 	} else if (page->Cache() != context.topCache && context.isWrite) {
4619 		// We have a page that has the data we want, but in the wrong cache
4620 		// object so we need to copy it and stick it into the top cache.
4621 		vm_page* sourcePage = page;
4622 
4623 		// TODO: If memory is low, it might be a good idea to steal the page
4624 		// from our source cache -- if possible, that is.
4625 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4626 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4627 
4628 		// To not needlessly kill concurrency we unlock all caches but the top
4629 		// one while copying the page. Lacking another mechanism to ensure that
4630 		// the source page doesn't disappear, we mark it busy.
4631 		sourcePage->busy = true;
4632 		context.cacheChainLocker.UnlockKeepRefs(true);
4633 
4634 		// copy the page
4635 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4636 			sourcePage->physical_page_number * B_PAGE_SIZE);
4637 
4638 		context.cacheChainLocker.RelockCaches(true);
4639 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4640 
4641 		// insert the new page into our cache
4642 		context.topCache->InsertPage(page, context.cacheOffset);
4643 		context.pageAllocated = true;
4644 	} else
4645 		DEBUG_PAGE_ACCESS_START(page);
4646 
4647 	context.page = page;
4648 	return B_OK;
4649 }
4650 
4651 
4652 /*!	Makes sure the address in the given address space is mapped.
4653 
4654 	\param addressSpace The address space.
4655 	\param originalAddress The address. Doesn't need to be page aligned.
4656 	\param isWrite If \c true the address shall be write-accessible.
4657 	\param isUser If \c true the access is requested by a userland team.
4658 	\param wirePage On success, if non \c NULL, the wired count of the page
4659 		mapped at the given address is incremented and the page is returned
4660 		via this parameter.
4661 	\return \c B_OK on success, another error code otherwise.
4662 */
4663 static status_t
4664 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4665 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4666 {
4667 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4668 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4669 		originalAddress, isWrite, isUser));
4670 
4671 	PageFaultContext context(addressSpace, isWrite);
4672 
4673 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4674 	status_t status = B_OK;
4675 
4676 	addressSpace->IncrementFaultCount();
4677 
4678 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4679 	// the pages upfront makes sure we don't have any cache locked, so that the
4680 	// page daemon/thief can do their job without problems.
4681 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4682 		originalAddress);
4683 	context.addressSpaceLocker.Unlock();
4684 	vm_page_reserve_pages(&context.reservation, reservePages,
4685 		addressSpace == VMAddressSpace::Kernel()
4686 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4687 
4688 	while (true) {
4689 		context.addressSpaceLocker.Lock();
4690 
4691 		// get the area the fault was in
4692 		VMArea* area = addressSpace->LookupArea(address);
4693 		if (area == NULL) {
4694 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4695 				"space\n", originalAddress);
4696 			TPF(PageFaultError(-1,
4697 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4698 			status = B_BAD_ADDRESS;
4699 			break;
4700 		}
4701 
4702 		// check permissions
4703 		uint32 protection = get_area_page_protection(area, address);
4704 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4705 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4706 				area->id, (void*)originalAddress);
4707 			TPF(PageFaultError(area->id,
4708 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4709 			status = B_PERMISSION_DENIED;
4710 			break;
4711 		}
4712 		if (isWrite && (protection
4713 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4714 			dprintf("write access attempted on write-protected area 0x%"
4715 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4716 			TPF(PageFaultError(area->id,
4717 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4718 			status = B_PERMISSION_DENIED;
4719 			break;
4720 		} else if (isExecute && (protection
4721 				& (B_EXECUTE_AREA
4722 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4723 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4724 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4725 			TPF(PageFaultError(area->id,
4726 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4727 			status = B_PERMISSION_DENIED;
4728 			break;
4729 		} else if (!isWrite && !isExecute && (protection
4730 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4731 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4732 				" at %p\n", area->id, (void*)originalAddress);
4733 			TPF(PageFaultError(area->id,
4734 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4735 			status = B_PERMISSION_DENIED;
4736 			break;
4737 		}
4738 
4739 		// We have the area, it was a valid access, so let's try to resolve the
4740 		// page fault now.
4741 		// At first, the top most cache from the area is investigated.
4742 
4743 		context.Prepare(vm_area_get_locked_cache(area),
4744 			address - area->Base() + area->cache_offset);
4745 
4746 		// See if this cache has a fault handler -- this will do all the work
4747 		// for us.
4748 		{
4749 			// Note, since the page fault is resolved with interrupts enabled,
4750 			// the fault handler could be called more than once for the same
4751 			// reason -- the store must take this into account.
4752 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4753 			if (status != B_BAD_HANDLER)
4754 				break;
4755 		}
4756 
4757 		// The top most cache has no fault handler, so let's see if the cache or
4758 		// its sources already have the page we're searching for (we're going
4759 		// from top to bottom).
4760 		status = fault_get_page(context);
4761 		if (status != B_OK) {
4762 			TPF(PageFaultError(area->id, status));
4763 			break;
4764 		}
4765 
4766 		if (context.restart)
4767 			continue;
4768 
4769 		// All went fine, all there is left to do is to map the page into the
4770 		// address space.
4771 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4772 			context.page));
4773 
4774 		// If the page doesn't reside in the area's cache, we need to make sure
4775 		// it's mapped in read-only, so that we cannot overwrite someone else's
4776 		// data (copy-on-write)
4777 		uint32 newProtection = protection;
4778 		if (context.page->Cache() != context.topCache && !isWrite)
4779 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4780 
4781 		bool unmapPage = false;
4782 		bool mapPage = true;
4783 
4784 		// check whether there's already a page mapped at the address
4785 		context.map->Lock();
4786 
4787 		phys_addr_t physicalAddress;
4788 		uint32 flags;
4789 		vm_page* mappedPage = NULL;
4790 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4791 			&& (flags & PAGE_PRESENT) != 0
4792 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4793 				!= NULL) {
4794 			// Yep there's already a page. If it's ours, we can simply adjust
4795 			// its protection. Otherwise we have to unmap it.
4796 			if (mappedPage == context.page) {
4797 				context.map->ProtectPage(area, address, newProtection);
4798 					// Note: We assume that ProtectPage() is atomic (i.e.
4799 					// the page isn't temporarily unmapped), otherwise we'd have
4800 					// to make sure it isn't wired.
4801 				mapPage = false;
4802 			} else
4803 				unmapPage = true;
4804 		}
4805 
4806 		context.map->Unlock();
4807 
4808 		if (unmapPage) {
4809 			// If the page is wired, we can't unmap it. Wait until it is unwired
4810 			// again and restart. Note that the page cannot be wired for
4811 			// writing, since it it isn't in the topmost cache. So we can safely
4812 			// ignore ranges wired for writing (our own and other concurrent
4813 			// wiring attempts in progress) and in fact have to do that to avoid
4814 			// a deadlock.
4815 			VMAreaUnwiredWaiter waiter;
4816 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4817 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4818 				// unlock everything and wait
4819 				if (context.pageAllocated) {
4820 					// ... but since we allocated a page and inserted it into
4821 					// the top cache, remove and free it first. Otherwise we'd
4822 					// have a page from a lower cache mapped while an upper
4823 					// cache has a page that would shadow it.
4824 					context.topCache->RemovePage(context.page);
4825 					vm_page_free_etc(context.topCache, context.page,
4826 						&context.reservation);
4827 				} else
4828 					DEBUG_PAGE_ACCESS_END(context.page);
4829 
4830 				context.UnlockAll();
4831 				waiter.waitEntry.Wait();
4832 				continue;
4833 			}
4834 
4835 			// Note: The mapped page is a page of a lower cache. We are
4836 			// guaranteed to have that cached locked, our new page is a copy of
4837 			// that page, and the page is not busy. The logic for that guarantee
4838 			// is as follows: Since the page is mapped, it must live in the top
4839 			// cache (ruled out above) or any of its lower caches, and there is
4840 			// (was before the new page was inserted) no other page in any
4841 			// cache between the top cache and the page's cache (otherwise that
4842 			// would be mapped instead). That in turn means that our algorithm
4843 			// must have found it and therefore it cannot be busy either.
4844 			DEBUG_PAGE_ACCESS_START(mappedPage);
4845 			unmap_page(area, address);
4846 			DEBUG_PAGE_ACCESS_END(mappedPage);
4847 		}
4848 
4849 		if (mapPage) {
4850 			if (map_page(area, context.page, address, newProtection,
4851 					&context.reservation) != B_OK) {
4852 				// Mapping can only fail, when the page mapping object couldn't
4853 				// be allocated. Save for the missing mapping everything is
4854 				// fine, though. If this was a regular page fault, we'll simply
4855 				// leave and probably fault again. To make sure we'll have more
4856 				// luck then, we ensure that the minimum object reserve is
4857 				// available.
4858 				DEBUG_PAGE_ACCESS_END(context.page);
4859 
4860 				context.UnlockAll();
4861 
4862 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4863 						!= B_OK) {
4864 					// Apparently the situation is serious. Let's get ourselves
4865 					// killed.
4866 					status = B_NO_MEMORY;
4867 				} else if (wirePage != NULL) {
4868 					// The caller expects us to wire the page. Since
4869 					// object_cache_reserve() succeeded, we should now be able
4870 					// to allocate a mapping structure. Restart.
4871 					continue;
4872 				}
4873 
4874 				break;
4875 			}
4876 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4877 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4878 
4879 		// also wire the page, if requested
4880 		if (wirePage != NULL && status == B_OK) {
4881 			increment_page_wired_count(context.page);
4882 			*wirePage = context.page;
4883 		}
4884 
4885 		DEBUG_PAGE_ACCESS_END(context.page);
4886 
4887 		break;
4888 	}
4889 
4890 	return status;
4891 }
4892 
4893 
4894 status_t
4895 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4896 {
4897 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4898 }
4899 
4900 status_t
4901 vm_put_physical_page(addr_t vaddr, void* handle)
4902 {
4903 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4904 }
4905 
4906 
4907 status_t
4908 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4909 	void** _handle)
4910 {
4911 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4912 }
4913 
4914 status_t
4915 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4916 {
4917 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4918 }
4919 
4920 
4921 status_t
4922 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4923 {
4924 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4925 }
4926 
4927 status_t
4928 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4929 {
4930 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4931 }
4932 
4933 
4934 void
4935 vm_get_info(system_info* info)
4936 {
4937 	swap_get_info(info);
4938 
4939 	MutexLocker locker(sAvailableMemoryLock);
4940 	info->needed_memory = sNeededMemory;
4941 	info->free_memory = sAvailableMemory;
4942 }
4943 
4944 
4945 uint32
4946 vm_num_page_faults(void)
4947 {
4948 	return sPageFaults;
4949 }
4950 
4951 
4952 off_t
4953 vm_available_memory(void)
4954 {
4955 	MutexLocker locker(sAvailableMemoryLock);
4956 	return sAvailableMemory;
4957 }
4958 
4959 
4960 off_t
4961 vm_available_not_needed_memory(void)
4962 {
4963 	MutexLocker locker(sAvailableMemoryLock);
4964 	return sAvailableMemory - sNeededMemory;
4965 }
4966 
4967 
4968 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4969 	debugger.
4970 */
4971 off_t
4972 vm_available_not_needed_memory_debug(void)
4973 {
4974 	return sAvailableMemory - sNeededMemory;
4975 }
4976 
4977 
4978 size_t
4979 vm_kernel_address_space_left(void)
4980 {
4981 	return VMAddressSpace::Kernel()->FreeSpace();
4982 }
4983 
4984 
4985 void
4986 vm_unreserve_memory(size_t amount)
4987 {
4988 	mutex_lock(&sAvailableMemoryLock);
4989 
4990 	sAvailableMemory += amount;
4991 
4992 	mutex_unlock(&sAvailableMemoryLock);
4993 }
4994 
4995 
4996 status_t
4997 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4998 {
4999 	size_t reserve = kMemoryReserveForPriority[priority];
5000 
5001 	MutexLocker locker(sAvailableMemoryLock);
5002 
5003 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5004 
5005 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5006 		sAvailableMemory -= amount;
5007 		return B_OK;
5008 	}
5009 
5010 	if (timeout <= 0)
5011 		return B_NO_MEMORY;
5012 
5013 	// turn timeout into an absolute timeout
5014 	timeout += system_time();
5015 
5016 	// loop until we've got the memory or the timeout occurs
5017 	do {
5018 		sNeededMemory += amount;
5019 
5020 		// call the low resource manager
5021 		locker.Unlock();
5022 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5023 			B_ABSOLUTE_TIMEOUT, timeout);
5024 		locker.Lock();
5025 
5026 		sNeededMemory -= amount;
5027 
5028 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5029 			sAvailableMemory -= amount;
5030 			return B_OK;
5031 		}
5032 	} while (timeout > system_time());
5033 
5034 	return B_NO_MEMORY;
5035 }
5036 
5037 
5038 status_t
5039 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5040 {
5041 	// NOTE: The caller is responsible for synchronizing calls to this function!
5042 
5043 	AddressSpaceReadLocker locker;
5044 	VMArea* area;
5045 	status_t status = locker.SetFromArea(id, area);
5046 	if (status != B_OK)
5047 		return status;
5048 
5049 	// nothing to do, if the type doesn't change
5050 	uint32 oldType = area->MemoryType();
5051 	if (type == oldType)
5052 		return B_OK;
5053 
5054 	// set the memory type of the area and the mapped pages
5055 	VMTranslationMap* map = area->address_space->TranslationMap();
5056 	map->Lock();
5057 	area->SetMemoryType(type);
5058 	map->ProtectArea(area, area->protection);
5059 	map->Unlock();
5060 
5061 	// set the physical memory type
5062 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5063 	if (error != B_OK) {
5064 		// reset the memory type of the area and the mapped pages
5065 		map->Lock();
5066 		area->SetMemoryType(oldType);
5067 		map->ProtectArea(area, area->protection);
5068 		map->Unlock();
5069 		return error;
5070 	}
5071 
5072 	return B_OK;
5073 
5074 }
5075 
5076 
5077 /*!	This function enforces some protection properties:
5078 	 - kernel areas must be W^X (after kernel startup)
5079 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5080 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5081 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5082 	   and B_KERNEL_WRITE_AREA.
5083 */
5084 static void
5085 fix_protection(uint32* protection)
5086 {
5087 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5088 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5089 			|| (*protection & B_WRITE_AREA) != 0)
5090 		&& !gKernelStartup)
5091 		panic("kernel areas cannot be both writable and executable!");
5092 
5093 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5094 		if ((*protection & B_USER_PROTECTION) == 0
5095 			|| (*protection & B_WRITE_AREA) != 0)
5096 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5097 		else
5098 			*protection |= B_KERNEL_READ_AREA;
5099 	}
5100 }
5101 
5102 
5103 static void
5104 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5105 {
5106 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5107 	info->area = area->id;
5108 	info->address = (void*)area->Base();
5109 	info->size = area->Size();
5110 	info->protection = area->protection;
5111 	info->lock = area->wiring;
5112 	info->team = area->address_space->ID();
5113 	info->copy_count = 0;
5114 	info->in_count = 0;
5115 	info->out_count = 0;
5116 		// TODO: retrieve real values here!
5117 
5118 	VMCache* cache = vm_area_get_locked_cache(area);
5119 
5120 	// Note, this is a simplification; the cache could be larger than this area
5121 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5122 
5123 	vm_area_put_locked_cache(cache);
5124 }
5125 
5126 
5127 static status_t
5128 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5129 {
5130 	// is newSize a multiple of B_PAGE_SIZE?
5131 	if (newSize & (B_PAGE_SIZE - 1))
5132 		return B_BAD_VALUE;
5133 
5134 	// lock all affected address spaces and the cache
5135 	VMArea* area;
5136 	VMCache* cache;
5137 
5138 	MultiAddressSpaceLocker locker;
5139 	AreaCacheLocker cacheLocker;
5140 
5141 	status_t status;
5142 	size_t oldSize;
5143 	bool anyKernelArea;
5144 	bool restart;
5145 
5146 	do {
5147 		anyKernelArea = false;
5148 		restart = false;
5149 
5150 		locker.Unset();
5151 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5152 		if (status != B_OK)
5153 			return status;
5154 		cacheLocker.SetTo(cache, true);	// already locked
5155 
5156 		// enforce restrictions
5157 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5158 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5159 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5160 				"resize kernel area %" B_PRId32 " (%s)\n",
5161 				team_get_current_team_id(), areaID, area->name);
5162 			return B_NOT_ALLOWED;
5163 		}
5164 		// TODO: Enforce all restrictions (team, etc.)!
5165 
5166 		oldSize = area->Size();
5167 		if (newSize == oldSize)
5168 			return B_OK;
5169 
5170 		if (cache->type != CACHE_TYPE_RAM)
5171 			return B_NOT_ALLOWED;
5172 
5173 		if (oldSize < newSize) {
5174 			// We need to check if all areas of this cache can be resized.
5175 			for (VMArea* current = cache->areas; current != NULL;
5176 					current = current->cache_next) {
5177 				if (!current->address_space->CanResizeArea(current, newSize))
5178 					return B_ERROR;
5179 				anyKernelArea
5180 					|= current->address_space == VMAddressSpace::Kernel();
5181 			}
5182 		} else {
5183 			// We're shrinking the areas, so we must make sure the affected
5184 			// ranges are not wired.
5185 			for (VMArea* current = cache->areas; current != NULL;
5186 					current = current->cache_next) {
5187 				anyKernelArea
5188 					|= current->address_space == VMAddressSpace::Kernel();
5189 
5190 				if (wait_if_area_range_is_wired(current,
5191 						current->Base() + newSize, oldSize - newSize, &locker,
5192 						&cacheLocker)) {
5193 					restart = true;
5194 					break;
5195 				}
5196 			}
5197 		}
5198 	} while (restart);
5199 
5200 	// Okay, looks good so far, so let's do it
5201 
5202 	int priority = kernel && anyKernelArea
5203 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5204 	uint32 allocationFlags = kernel && anyKernelArea
5205 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5206 
5207 	if (oldSize < newSize) {
5208 		// Growing the cache can fail, so we do it first.
5209 		status = cache->Resize(cache->virtual_base + newSize, priority);
5210 		if (status != B_OK)
5211 			return status;
5212 	}
5213 
5214 	for (VMArea* current = cache->areas; current != NULL;
5215 			current = current->cache_next) {
5216 		status = current->address_space->ResizeArea(current, newSize,
5217 			allocationFlags);
5218 		if (status != B_OK)
5219 			break;
5220 
5221 		// We also need to unmap all pages beyond the new size, if the area has
5222 		// shrunk
5223 		if (newSize < oldSize) {
5224 			VMCacheChainLocker cacheChainLocker(cache);
5225 			cacheChainLocker.LockAllSourceCaches();
5226 
5227 			unmap_pages(current, current->Base() + newSize,
5228 				oldSize - newSize);
5229 
5230 			cacheChainLocker.Unlock(cache);
5231 		}
5232 	}
5233 
5234 	if (status == B_OK) {
5235 		// Shrink or grow individual page protections if in use.
5236 		if (area->page_protections != NULL) {
5237 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5238 			uint8* newProtections
5239 				= (uint8*)realloc(area->page_protections, bytes);
5240 			if (newProtections == NULL)
5241 				status = B_NO_MEMORY;
5242 			else {
5243 				area->page_protections = newProtections;
5244 
5245 				if (oldSize < newSize) {
5246 					// init the additional page protections to that of the area
5247 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5248 					uint32 areaProtection = area->protection
5249 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5250 					memset(area->page_protections + offset,
5251 						areaProtection | (areaProtection << 4), bytes - offset);
5252 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5253 						uint8& entry = area->page_protections[offset - 1];
5254 						entry = (entry & 0x0f) | (areaProtection << 4);
5255 					}
5256 				}
5257 			}
5258 		}
5259 	}
5260 
5261 	// shrinking the cache can't fail, so we do it now
5262 	if (status == B_OK && newSize < oldSize)
5263 		status = cache->Resize(cache->virtual_base + newSize, priority);
5264 
5265 	if (status != B_OK) {
5266 		// Something failed -- resize the areas back to their original size.
5267 		// This can fail, too, in which case we're seriously screwed.
5268 		for (VMArea* current = cache->areas; current != NULL;
5269 				current = current->cache_next) {
5270 			if (current->address_space->ResizeArea(current, oldSize,
5271 					allocationFlags) != B_OK) {
5272 				panic("vm_resize_area(): Failed and not being able to restore "
5273 					"original state.");
5274 			}
5275 		}
5276 
5277 		cache->Resize(cache->virtual_base + oldSize, priority);
5278 	}
5279 
5280 	// TODO: we must honour the lock restrictions of this area
5281 	return status;
5282 }
5283 
5284 
5285 status_t
5286 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5287 {
5288 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5289 }
5290 
5291 
5292 status_t
5293 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5294 {
5295 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5296 }
5297 
5298 
5299 status_t
5300 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5301 	bool user)
5302 {
5303 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5304 }
5305 
5306 
5307 void
5308 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5309 {
5310 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5311 }
5312 
5313 
5314 /*!	Copies a range of memory directly from/to a page that might not be mapped
5315 	at the moment.
5316 
5317 	For \a unsafeMemory the current mapping (if any is ignored). The function
5318 	walks through the respective area's cache chain to find the physical page
5319 	and copies from/to it directly.
5320 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5321 	must not cross a page boundary.
5322 
5323 	\param teamID The team ID identifying the address space \a unsafeMemory is
5324 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5325 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5326 		is passed, the address space of the thread returned by
5327 		debug_get_debugged_thread() is used.
5328 	\param unsafeMemory The start of the unsafe memory range to be copied
5329 		from/to.
5330 	\param buffer A safely accessible kernel buffer to be copied from/to.
5331 	\param size The number of bytes to be copied.
5332 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5333 		\a unsafeMemory, the other way around otherwise.
5334 */
5335 status_t
5336 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5337 	size_t size, bool copyToUnsafe)
5338 {
5339 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5340 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5341 		return B_BAD_VALUE;
5342 	}
5343 
5344 	// get the address space for the debugged thread
5345 	VMAddressSpace* addressSpace;
5346 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5347 		addressSpace = VMAddressSpace::Kernel();
5348 	} else if (teamID == B_CURRENT_TEAM) {
5349 		Thread* thread = debug_get_debugged_thread();
5350 		if (thread == NULL || thread->team == NULL)
5351 			return B_BAD_ADDRESS;
5352 
5353 		addressSpace = thread->team->address_space;
5354 	} else
5355 		addressSpace = VMAddressSpace::DebugGet(teamID);
5356 
5357 	if (addressSpace == NULL)
5358 		return B_BAD_ADDRESS;
5359 
5360 	// get the area
5361 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5362 	if (area == NULL)
5363 		return B_BAD_ADDRESS;
5364 
5365 	// search the page
5366 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5367 		+ area->cache_offset;
5368 	VMCache* cache = area->cache;
5369 	vm_page* page = NULL;
5370 	while (cache != NULL) {
5371 		page = cache->DebugLookupPage(cacheOffset);
5372 		if (page != NULL)
5373 			break;
5374 
5375 		// Page not found in this cache -- if it is paged out, we must not try
5376 		// to get it from lower caches.
5377 		if (cache->DebugHasPage(cacheOffset))
5378 			break;
5379 
5380 		cache = cache->source;
5381 	}
5382 
5383 	if (page == NULL)
5384 		return B_UNSUPPORTED;
5385 
5386 	// copy from/to physical memory
5387 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5388 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5389 
5390 	if (copyToUnsafe) {
5391 		if (page->Cache() != area->cache)
5392 			return B_UNSUPPORTED;
5393 
5394 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5395 	}
5396 
5397 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5398 }
5399 
5400 
5401 static inline bool
5402 validate_user_range(const void* addr, size_t size)
5403 {
5404 	addr_t address = (addr_t)addr;
5405 
5406 	// Check for overflows on all addresses.
5407 	if ((address + size) < address)
5408 		return false;
5409 
5410 	// Validate that the address does not cross the kernel/user boundary.
5411 	if (IS_USER_ADDRESS(address))
5412 		return IS_USER_ADDRESS(address + size);
5413 	else
5414 		return !IS_USER_ADDRESS(address + size);
5415 }
5416 
5417 
5418 //	#pragma mark - kernel public API
5419 
5420 
5421 status_t
5422 user_memcpy(void* to, const void* from, size_t size)
5423 {
5424 	if (!validate_user_range(to, size) || !validate_user_range(from, size))
5425 		return B_BAD_ADDRESS;
5426 
5427 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5428 		return B_BAD_ADDRESS;
5429 
5430 	return B_OK;
5431 }
5432 
5433 
5434 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5435 	the string in \a to, NULL-terminating the result.
5436 
5437 	\param to Pointer to the destination C-string.
5438 	\param from Pointer to the source C-string.
5439 	\param size Size in bytes of the string buffer pointed to by \a to.
5440 
5441 	\return strlen(\a from).
5442 */
5443 ssize_t
5444 user_strlcpy(char* to, const char* from, size_t size)
5445 {
5446 	if (to == NULL && size != 0)
5447 		return B_BAD_VALUE;
5448 	if (from == NULL)
5449 		return B_BAD_ADDRESS;
5450 
5451 	// Protect the source address from overflows.
5452 	size_t maxSize = size;
5453 	if ((addr_t)from + maxSize < (addr_t)from)
5454 		maxSize -= (addr_t)from + maxSize;
5455 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5456 		maxSize = USER_TOP - (addr_t)from;
5457 
5458 	if (!validate_user_range(to, maxSize))
5459 		return B_BAD_ADDRESS;
5460 
5461 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5462 	if (result < 0)
5463 		return result;
5464 
5465 	// If we hit the address overflow boundary, fail.
5466 	if ((size_t)result >= maxSize && maxSize < size)
5467 		return B_BAD_ADDRESS;
5468 
5469 	return result;
5470 }
5471 
5472 
5473 status_t
5474 user_memset(void* s, char c, size_t count)
5475 {
5476 	if (!validate_user_range(s, count))
5477 		return B_BAD_ADDRESS;
5478 
5479 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5480 		return B_BAD_ADDRESS;
5481 
5482 	return B_OK;
5483 }
5484 
5485 
5486 /*!	Wires a single page at the given address.
5487 
5488 	\param team The team whose address space the address belongs to. Supports
5489 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5490 		parameter is ignored.
5491 	\param address address The virtual address to wire down. Does not need to
5492 		be page aligned.
5493 	\param writable If \c true the page shall be writable.
5494 	\param info On success the info is filled in, among other things
5495 		containing the physical address the given virtual one translates to.
5496 	\return \c B_OK, when the page could be wired, another error code otherwise.
5497 */
5498 status_t
5499 vm_wire_page(team_id team, addr_t address, bool writable,
5500 	VMPageWiringInfo* info)
5501 {
5502 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5503 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5504 
5505 	// compute the page protection that is required
5506 	bool isUser = IS_USER_ADDRESS(address);
5507 	uint32 requiredProtection = PAGE_PRESENT
5508 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5509 	if (writable)
5510 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5511 
5512 	// get and read lock the address space
5513 	VMAddressSpace* addressSpace = NULL;
5514 	if (isUser) {
5515 		if (team == B_CURRENT_TEAM)
5516 			addressSpace = VMAddressSpace::GetCurrent();
5517 		else
5518 			addressSpace = VMAddressSpace::Get(team);
5519 	} else
5520 		addressSpace = VMAddressSpace::GetKernel();
5521 	if (addressSpace == NULL)
5522 		return B_ERROR;
5523 
5524 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5525 
5526 	VMTranslationMap* map = addressSpace->TranslationMap();
5527 	status_t error = B_OK;
5528 
5529 	// get the area
5530 	VMArea* area = addressSpace->LookupArea(pageAddress);
5531 	if (area == NULL) {
5532 		addressSpace->Put();
5533 		return B_BAD_ADDRESS;
5534 	}
5535 
5536 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5537 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5538 
5539 	// mark the area range wired
5540 	area->Wire(&info->range);
5541 
5542 	// Lock the area's cache chain and the translation map. Needed to look
5543 	// up the page and play with its wired count.
5544 	cacheChainLocker.LockAllSourceCaches();
5545 	map->Lock();
5546 
5547 	phys_addr_t physicalAddress;
5548 	uint32 flags;
5549 	vm_page* page;
5550 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5551 		&& (flags & requiredProtection) == requiredProtection
5552 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5553 			!= NULL) {
5554 		// Already mapped with the correct permissions -- just increment
5555 		// the page's wired count.
5556 		increment_page_wired_count(page);
5557 
5558 		map->Unlock();
5559 		cacheChainLocker.Unlock();
5560 		addressSpaceLocker.Unlock();
5561 	} else {
5562 		// Let vm_soft_fault() map the page for us, if possible. We need
5563 		// to fully unlock to avoid deadlocks. Since we have already
5564 		// wired the area itself, nothing disturbing will happen with it
5565 		// in the meantime.
5566 		map->Unlock();
5567 		cacheChainLocker.Unlock();
5568 		addressSpaceLocker.Unlock();
5569 
5570 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5571 			isUser, &page);
5572 
5573 		if (error != B_OK) {
5574 			// The page could not be mapped -- clean up.
5575 			VMCache* cache = vm_area_get_locked_cache(area);
5576 			area->Unwire(&info->range);
5577 			cache->ReleaseRefAndUnlock();
5578 			addressSpace->Put();
5579 			return error;
5580 		}
5581 	}
5582 
5583 	info->physicalAddress
5584 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5585 			+ address % B_PAGE_SIZE;
5586 	info->page = page;
5587 
5588 	return B_OK;
5589 }
5590 
5591 
5592 /*!	Unwires a single page previously wired via vm_wire_page().
5593 
5594 	\param info The same object passed to vm_wire_page() before.
5595 */
5596 void
5597 vm_unwire_page(VMPageWiringInfo* info)
5598 {
5599 	// lock the address space
5600 	VMArea* area = info->range.area;
5601 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5602 		// takes over our reference
5603 
5604 	// lock the top cache
5605 	VMCache* cache = vm_area_get_locked_cache(area);
5606 	VMCacheChainLocker cacheChainLocker(cache);
5607 
5608 	if (info->page->Cache() != cache) {
5609 		// The page is not in the top cache, so we lock the whole cache chain
5610 		// before touching the page's wired count.
5611 		cacheChainLocker.LockAllSourceCaches();
5612 	}
5613 
5614 	decrement_page_wired_count(info->page);
5615 
5616 	// remove the wired range from the range
5617 	area->Unwire(&info->range);
5618 
5619 	cacheChainLocker.Unlock();
5620 }
5621 
5622 
5623 /*!	Wires down the given address range in the specified team's address space.
5624 
5625 	If successful the function
5626 	- acquires a reference to the specified team's address space,
5627 	- adds respective wired ranges to all areas that intersect with the given
5628 	  address range,
5629 	- makes sure all pages in the given address range are mapped with the
5630 	  requested access permissions and increments their wired count.
5631 
5632 	It fails, when \a team doesn't specify a valid address space, when any part
5633 	of the specified address range is not covered by areas, when the concerned
5634 	areas don't allow mapping with the requested permissions, or when mapping
5635 	failed for another reason.
5636 
5637 	When successful the call must be balanced by a unlock_memory_etc() call with
5638 	the exact same parameters.
5639 
5640 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5641 		supported.
5642 	\param address The start of the address range to be wired.
5643 	\param numBytes The size of the address range to be wired.
5644 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5645 		requests that the range must be wired writable ("read from device
5646 		into memory").
5647 	\return \c B_OK on success, another error code otherwise.
5648 */
5649 status_t
5650 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5651 {
5652 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5653 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5654 
5655 	// compute the page protection that is required
5656 	bool isUser = IS_USER_ADDRESS(address);
5657 	bool writable = (flags & B_READ_DEVICE) == 0;
5658 	uint32 requiredProtection = PAGE_PRESENT
5659 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5660 	if (writable)
5661 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5662 
5663 	uint32 mallocFlags = isUser
5664 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5665 
5666 	// get and read lock the address space
5667 	VMAddressSpace* addressSpace = NULL;
5668 	if (isUser) {
5669 		if (team == B_CURRENT_TEAM)
5670 			addressSpace = VMAddressSpace::GetCurrent();
5671 		else
5672 			addressSpace = VMAddressSpace::Get(team);
5673 	} else
5674 		addressSpace = VMAddressSpace::GetKernel();
5675 	if (addressSpace == NULL)
5676 		return B_ERROR;
5677 
5678 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5679 		// We get a new address space reference here. The one we got above will
5680 		// be freed by unlock_memory_etc().
5681 
5682 	VMTranslationMap* map = addressSpace->TranslationMap();
5683 	status_t error = B_OK;
5684 
5685 	// iterate through all concerned areas
5686 	addr_t nextAddress = lockBaseAddress;
5687 	while (nextAddress != lockEndAddress) {
5688 		// get the next area
5689 		VMArea* area = addressSpace->LookupArea(nextAddress);
5690 		if (area == NULL) {
5691 			error = B_BAD_ADDRESS;
5692 			break;
5693 		}
5694 
5695 		addr_t areaStart = nextAddress;
5696 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5697 
5698 		// allocate the wired range (do that before locking the cache to avoid
5699 		// deadlocks)
5700 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5701 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5702 		if (range == NULL) {
5703 			error = B_NO_MEMORY;
5704 			break;
5705 		}
5706 
5707 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5708 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5709 
5710 		// mark the area range wired
5711 		area->Wire(range);
5712 
5713 		// Depending on the area cache type and the wiring, we may not need to
5714 		// look at the individual pages.
5715 		if (area->cache_type == CACHE_TYPE_NULL
5716 			|| area->cache_type == CACHE_TYPE_DEVICE
5717 			|| area->wiring == B_FULL_LOCK
5718 			|| area->wiring == B_CONTIGUOUS) {
5719 			nextAddress = areaEnd;
5720 			continue;
5721 		}
5722 
5723 		// Lock the area's cache chain and the translation map. Needed to look
5724 		// up pages and play with their wired count.
5725 		cacheChainLocker.LockAllSourceCaches();
5726 		map->Lock();
5727 
5728 		// iterate through the pages and wire them
5729 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5730 			phys_addr_t physicalAddress;
5731 			uint32 flags;
5732 
5733 			vm_page* page;
5734 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5735 				&& (flags & requiredProtection) == requiredProtection
5736 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5737 					!= NULL) {
5738 				// Already mapped with the correct permissions -- just increment
5739 				// the page's wired count.
5740 				increment_page_wired_count(page);
5741 			} else {
5742 				// Let vm_soft_fault() map the page for us, if possible. We need
5743 				// to fully unlock to avoid deadlocks. Since we have already
5744 				// wired the area itself, nothing disturbing will happen with it
5745 				// in the meantime.
5746 				map->Unlock();
5747 				cacheChainLocker.Unlock();
5748 				addressSpaceLocker.Unlock();
5749 
5750 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5751 					false, isUser, &page);
5752 
5753 				addressSpaceLocker.Lock();
5754 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5755 				cacheChainLocker.LockAllSourceCaches();
5756 				map->Lock();
5757 			}
5758 
5759 			if (error != B_OK)
5760 				break;
5761 		}
5762 
5763 		map->Unlock();
5764 
5765 		if (error == B_OK) {
5766 			cacheChainLocker.Unlock();
5767 		} else {
5768 			// An error occurred, so abort right here. If the current address
5769 			// is the first in this area, unwire the area, since we won't get
5770 			// to it when reverting what we've done so far.
5771 			if (nextAddress == areaStart) {
5772 				area->Unwire(range);
5773 				cacheChainLocker.Unlock();
5774 				range->~VMAreaWiredRange();
5775 				free_etc(range, mallocFlags);
5776 			} else
5777 				cacheChainLocker.Unlock();
5778 
5779 			break;
5780 		}
5781 	}
5782 
5783 	if (error != B_OK) {
5784 		// An error occurred, so unwire all that we've already wired. Note that
5785 		// even if not a single page was wired, unlock_memory_etc() is called
5786 		// to put the address space reference.
5787 		addressSpaceLocker.Unlock();
5788 		unlock_memory_etc(team, (void*)lockBaseAddress,
5789 			nextAddress - lockBaseAddress, flags);
5790 	}
5791 
5792 	return error;
5793 }
5794 
5795 
5796 status_t
5797 lock_memory(void* address, size_t numBytes, uint32 flags)
5798 {
5799 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5800 }
5801 
5802 
5803 /*!	Unwires an address range previously wired with lock_memory_etc().
5804 
5805 	Note that a call to this function must balance a previous lock_memory_etc()
5806 	call with exactly the same parameters.
5807 */
5808 status_t
5809 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5810 {
5811 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5812 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5813 
5814 	// compute the page protection that is required
5815 	bool isUser = IS_USER_ADDRESS(address);
5816 	bool writable = (flags & B_READ_DEVICE) == 0;
5817 	uint32 requiredProtection = PAGE_PRESENT
5818 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5819 	if (writable)
5820 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5821 
5822 	uint32 mallocFlags = isUser
5823 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5824 
5825 	// get and read lock the address space
5826 	VMAddressSpace* addressSpace = NULL;
5827 	if (isUser) {
5828 		if (team == B_CURRENT_TEAM)
5829 			addressSpace = VMAddressSpace::GetCurrent();
5830 		else
5831 			addressSpace = VMAddressSpace::Get(team);
5832 	} else
5833 		addressSpace = VMAddressSpace::GetKernel();
5834 	if (addressSpace == NULL)
5835 		return B_ERROR;
5836 
5837 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5838 		// Take over the address space reference. We don't unlock until we're
5839 		// done.
5840 
5841 	VMTranslationMap* map = addressSpace->TranslationMap();
5842 	status_t error = B_OK;
5843 
5844 	// iterate through all concerned areas
5845 	addr_t nextAddress = lockBaseAddress;
5846 	while (nextAddress != lockEndAddress) {
5847 		// get the next area
5848 		VMArea* area = addressSpace->LookupArea(nextAddress);
5849 		if (area == NULL) {
5850 			error = B_BAD_ADDRESS;
5851 			break;
5852 		}
5853 
5854 		addr_t areaStart = nextAddress;
5855 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5856 
5857 		// Lock the area's top cache. This is a requirement for
5858 		// VMArea::Unwire().
5859 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5860 
5861 		// Depending on the area cache type and the wiring, we may not need to
5862 		// look at the individual pages.
5863 		if (area->cache_type == CACHE_TYPE_NULL
5864 			|| area->cache_type == CACHE_TYPE_DEVICE
5865 			|| area->wiring == B_FULL_LOCK
5866 			|| area->wiring == B_CONTIGUOUS) {
5867 			// unwire the range (to avoid deadlocks we delete the range after
5868 			// unlocking the cache)
5869 			nextAddress = areaEnd;
5870 			VMAreaWiredRange* range = area->Unwire(areaStart,
5871 				areaEnd - areaStart, writable);
5872 			cacheChainLocker.Unlock();
5873 			if (range != NULL) {
5874 				range->~VMAreaWiredRange();
5875 				free_etc(range, mallocFlags);
5876 			}
5877 			continue;
5878 		}
5879 
5880 		// Lock the area's cache chain and the translation map. Needed to look
5881 		// up pages and play with their wired count.
5882 		cacheChainLocker.LockAllSourceCaches();
5883 		map->Lock();
5884 
5885 		// iterate through the pages and unwire them
5886 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5887 			phys_addr_t physicalAddress;
5888 			uint32 flags;
5889 
5890 			vm_page* page;
5891 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5892 				&& (flags & PAGE_PRESENT) != 0
5893 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5894 					!= NULL) {
5895 				// Already mapped with the correct permissions -- just increment
5896 				// the page's wired count.
5897 				decrement_page_wired_count(page);
5898 			} else {
5899 				panic("unlock_memory_etc(): Failed to unwire page: address "
5900 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5901 					nextAddress);
5902 				error = B_BAD_VALUE;
5903 				break;
5904 			}
5905 		}
5906 
5907 		map->Unlock();
5908 
5909 		// All pages are unwired. Remove the area's wired range as well (to
5910 		// avoid deadlocks we delete the range after unlocking the cache).
5911 		VMAreaWiredRange* range = area->Unwire(areaStart,
5912 			areaEnd - areaStart, writable);
5913 
5914 		cacheChainLocker.Unlock();
5915 
5916 		if (range != NULL) {
5917 			range->~VMAreaWiredRange();
5918 			free_etc(range, mallocFlags);
5919 		}
5920 
5921 		if (error != B_OK)
5922 			break;
5923 	}
5924 
5925 	// get rid of the address space reference lock_memory_etc() acquired
5926 	addressSpace->Put();
5927 
5928 	return error;
5929 }
5930 
5931 
5932 status_t
5933 unlock_memory(void* address, size_t numBytes, uint32 flags)
5934 {
5935 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5936 }
5937 
5938 
5939 /*!	Similar to get_memory_map(), but also allows to specify the address space
5940 	for the memory in question and has a saner semantics.
5941 	Returns \c B_OK when the complete range could be translated or
5942 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5943 	case the actual number of entries is written to \c *_numEntries. Any other
5944 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5945 	in this case.
5946 */
5947 status_t
5948 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5949 	physical_entry* table, uint32* _numEntries)
5950 {
5951 	uint32 numEntries = *_numEntries;
5952 	*_numEntries = 0;
5953 
5954 	VMAddressSpace* addressSpace;
5955 	addr_t virtualAddress = (addr_t)address;
5956 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5957 	phys_addr_t physicalAddress;
5958 	status_t status = B_OK;
5959 	int32 index = -1;
5960 	addr_t offset = 0;
5961 	bool interrupts = are_interrupts_enabled();
5962 
5963 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5964 		"entries)\n", team, address, numBytes, numEntries));
5965 
5966 	if (numEntries == 0 || numBytes == 0)
5967 		return B_BAD_VALUE;
5968 
5969 	// in which address space is the address to be found?
5970 	if (IS_USER_ADDRESS(virtualAddress)) {
5971 		if (team == B_CURRENT_TEAM)
5972 			addressSpace = VMAddressSpace::GetCurrent();
5973 		else
5974 			addressSpace = VMAddressSpace::Get(team);
5975 	} else
5976 		addressSpace = VMAddressSpace::GetKernel();
5977 
5978 	if (addressSpace == NULL)
5979 		return B_ERROR;
5980 
5981 	VMTranslationMap* map = addressSpace->TranslationMap();
5982 
5983 	if (interrupts)
5984 		map->Lock();
5985 
5986 	while (offset < numBytes) {
5987 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5988 		uint32 flags;
5989 
5990 		if (interrupts) {
5991 			status = map->Query((addr_t)address + offset, &physicalAddress,
5992 				&flags);
5993 		} else {
5994 			status = map->QueryInterrupt((addr_t)address + offset,
5995 				&physicalAddress, &flags);
5996 		}
5997 		if (status < B_OK)
5998 			break;
5999 		if ((flags & PAGE_PRESENT) == 0) {
6000 			panic("get_memory_map() called on unmapped memory!");
6001 			return B_BAD_ADDRESS;
6002 		}
6003 
6004 		if (index < 0 && pageOffset > 0) {
6005 			physicalAddress += pageOffset;
6006 			if (bytes > B_PAGE_SIZE - pageOffset)
6007 				bytes = B_PAGE_SIZE - pageOffset;
6008 		}
6009 
6010 		// need to switch to the next physical_entry?
6011 		if (index < 0 || table[index].address
6012 				!= physicalAddress - table[index].size) {
6013 			if ((uint32)++index + 1 > numEntries) {
6014 				// table to small
6015 				break;
6016 			}
6017 			table[index].address = physicalAddress;
6018 			table[index].size = bytes;
6019 		} else {
6020 			// page does fit in current entry
6021 			table[index].size += bytes;
6022 		}
6023 
6024 		offset += bytes;
6025 	}
6026 
6027 	if (interrupts)
6028 		map->Unlock();
6029 
6030 	if (status != B_OK)
6031 		return status;
6032 
6033 	if ((uint32)index + 1 > numEntries) {
6034 		*_numEntries = index;
6035 		return B_BUFFER_OVERFLOW;
6036 	}
6037 
6038 	*_numEntries = index + 1;
6039 	return B_OK;
6040 }
6041 
6042 
6043 /*!	According to the BeBook, this function should always succeed.
6044 	This is no longer the case.
6045 */
6046 extern "C" int32
6047 __get_memory_map_haiku(const void* address, size_t numBytes,
6048 	physical_entry* table, int32 numEntries)
6049 {
6050 	uint32 entriesRead = numEntries;
6051 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6052 		table, &entriesRead);
6053 	if (error != B_OK)
6054 		return error;
6055 
6056 	// close the entry list
6057 
6058 	// if it's only one entry, we will silently accept the missing ending
6059 	if (numEntries == 1)
6060 		return B_OK;
6061 
6062 	if (entriesRead + 1 > (uint32)numEntries)
6063 		return B_BUFFER_OVERFLOW;
6064 
6065 	table[entriesRead].address = 0;
6066 	table[entriesRead].size = 0;
6067 
6068 	return B_OK;
6069 }
6070 
6071 
6072 area_id
6073 area_for(void* address)
6074 {
6075 	return vm_area_for((addr_t)address, true);
6076 }
6077 
6078 
6079 area_id
6080 find_area(const char* name)
6081 {
6082 	return VMAreaHash::Find(name);
6083 }
6084 
6085 
6086 status_t
6087 _get_area_info(area_id id, area_info* info, size_t size)
6088 {
6089 	if (size != sizeof(area_info) || info == NULL)
6090 		return B_BAD_VALUE;
6091 
6092 	AddressSpaceReadLocker locker;
6093 	VMArea* area;
6094 	status_t status = locker.SetFromArea(id, area);
6095 	if (status != B_OK)
6096 		return status;
6097 
6098 	fill_area_info(area, info, size);
6099 	return B_OK;
6100 }
6101 
6102 
6103 status_t
6104 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6105 {
6106 	addr_t nextBase = *(addr_t*)cookie;
6107 
6108 	// we're already through the list
6109 	if (nextBase == (addr_t)-1)
6110 		return B_ENTRY_NOT_FOUND;
6111 
6112 	if (team == B_CURRENT_TEAM)
6113 		team = team_get_current_team_id();
6114 
6115 	AddressSpaceReadLocker locker(team);
6116 	if (!locker.IsLocked())
6117 		return B_BAD_TEAM_ID;
6118 
6119 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6120 	if (area == NULL) {
6121 		nextBase = (addr_t)-1;
6122 		return B_ENTRY_NOT_FOUND;
6123 	}
6124 
6125 	fill_area_info(area, info, size);
6126 	*cookie = (ssize_t)(area->Base() + 1);
6127 
6128 	return B_OK;
6129 }
6130 
6131 
6132 status_t
6133 set_area_protection(area_id area, uint32 newProtection)
6134 {
6135 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6136 		newProtection, true);
6137 }
6138 
6139 
6140 status_t
6141 resize_area(area_id areaID, size_t newSize)
6142 {
6143 	return vm_resize_area(areaID, newSize, true);
6144 }
6145 
6146 
6147 /*!	Transfers the specified area to a new team. The caller must be the owner
6148 	of the area.
6149 */
6150 area_id
6151 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6152 	bool kernel)
6153 {
6154 	area_info info;
6155 	status_t status = get_area_info(id, &info);
6156 	if (status != B_OK)
6157 		return status;
6158 
6159 	if (info.team != thread_get_current_thread()->team->id)
6160 		return B_PERMISSION_DENIED;
6161 
6162 	// We need to mark the area cloneable so the following operations work.
6163 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6164 	if (status != B_OK)
6165 		return status;
6166 
6167 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6168 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6169 	if (clonedArea < 0)
6170 		return clonedArea;
6171 
6172 	status = vm_delete_area(info.team, id, kernel);
6173 	if (status != B_OK) {
6174 		vm_delete_area(target, clonedArea, kernel);
6175 		return status;
6176 	}
6177 
6178 	// Now we can reset the protection to whatever it was before.
6179 	set_area_protection(clonedArea, info.protection);
6180 
6181 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6182 
6183 	return clonedArea;
6184 }
6185 
6186 
6187 extern "C" area_id
6188 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6189 	size_t numBytes, uint32 addressSpec, uint32 protection,
6190 	void** _virtualAddress)
6191 {
6192 	if (!arch_vm_supports_protection(protection))
6193 		return B_NOT_SUPPORTED;
6194 
6195 	fix_protection(&protection);
6196 
6197 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6198 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6199 		false);
6200 }
6201 
6202 
6203 area_id
6204 clone_area(const char* name, void** _address, uint32 addressSpec,
6205 	uint32 protection, area_id source)
6206 {
6207 	if ((protection & B_KERNEL_PROTECTION) == 0)
6208 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6209 
6210 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6211 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6212 }
6213 
6214 
6215 area_id
6216 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6217 	uint32 protection, uint32 flags, uint32 guardSize,
6218 	const virtual_address_restrictions* virtualAddressRestrictions,
6219 	const physical_address_restrictions* physicalAddressRestrictions,
6220 	void** _address)
6221 {
6222 	fix_protection(&protection);
6223 
6224 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6225 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6226 		true, _address);
6227 }
6228 
6229 
6230 extern "C" area_id
6231 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6232 	size_t size, uint32 lock, uint32 protection)
6233 {
6234 	fix_protection(&protection);
6235 
6236 	virtual_address_restrictions virtualRestrictions = {};
6237 	virtualRestrictions.address = *_address;
6238 	virtualRestrictions.address_specification = addressSpec;
6239 	physical_address_restrictions physicalRestrictions = {};
6240 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6241 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6242 		true, _address);
6243 }
6244 
6245 
6246 status_t
6247 delete_area(area_id area)
6248 {
6249 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6250 }
6251 
6252 
6253 //	#pragma mark - Userland syscalls
6254 
6255 
6256 status_t
6257 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6258 	addr_t size)
6259 {
6260 	// filter out some unavailable values (for userland)
6261 	switch (addressSpec) {
6262 		case B_ANY_KERNEL_ADDRESS:
6263 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6264 			return B_BAD_VALUE;
6265 	}
6266 
6267 	addr_t address;
6268 
6269 	if (!IS_USER_ADDRESS(userAddress)
6270 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6271 		return B_BAD_ADDRESS;
6272 
6273 	status_t status = vm_reserve_address_range(
6274 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6275 		RESERVED_AVOID_BASE);
6276 	if (status != B_OK)
6277 		return status;
6278 
6279 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6280 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6281 			(void*)address, size);
6282 		return B_BAD_ADDRESS;
6283 	}
6284 
6285 	return B_OK;
6286 }
6287 
6288 
6289 status_t
6290 _user_unreserve_address_range(addr_t address, addr_t size)
6291 {
6292 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6293 		(void*)address, size);
6294 }
6295 
6296 
6297 area_id
6298 _user_area_for(void* address)
6299 {
6300 	return vm_area_for((addr_t)address, false);
6301 }
6302 
6303 
6304 area_id
6305 _user_find_area(const char* userName)
6306 {
6307 	char name[B_OS_NAME_LENGTH];
6308 
6309 	if (!IS_USER_ADDRESS(userName)
6310 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6311 		return B_BAD_ADDRESS;
6312 
6313 	return find_area(name);
6314 }
6315 
6316 
6317 status_t
6318 _user_get_area_info(area_id area, area_info* userInfo)
6319 {
6320 	if (!IS_USER_ADDRESS(userInfo))
6321 		return B_BAD_ADDRESS;
6322 
6323 	area_info info;
6324 	status_t status = get_area_info(area, &info);
6325 	if (status < B_OK)
6326 		return status;
6327 
6328 	// TODO: do we want to prevent userland from seeing kernel protections?
6329 	//info.protection &= B_USER_PROTECTION;
6330 
6331 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6332 		return B_BAD_ADDRESS;
6333 
6334 	return status;
6335 }
6336 
6337 
6338 status_t
6339 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6340 {
6341 	ssize_t cookie;
6342 
6343 	if (!IS_USER_ADDRESS(userCookie)
6344 		|| !IS_USER_ADDRESS(userInfo)
6345 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6346 		return B_BAD_ADDRESS;
6347 
6348 	area_info info;
6349 	status_t status = _get_next_area_info(team, &cookie, &info,
6350 		sizeof(area_info));
6351 	if (status != B_OK)
6352 		return status;
6353 
6354 	//info.protection &= B_USER_PROTECTION;
6355 
6356 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6357 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6358 		return B_BAD_ADDRESS;
6359 
6360 	return status;
6361 }
6362 
6363 
6364 status_t
6365 _user_set_area_protection(area_id area, uint32 newProtection)
6366 {
6367 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6368 		return B_BAD_VALUE;
6369 
6370 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6371 		newProtection, false);
6372 }
6373 
6374 
6375 status_t
6376 _user_resize_area(area_id area, size_t newSize)
6377 {
6378 	// TODO: Since we restrict deleting of areas to those owned by the team,
6379 	// we should also do that for resizing (check other functions, too).
6380 	return vm_resize_area(area, newSize, false);
6381 }
6382 
6383 
6384 area_id
6385 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6386 	team_id target)
6387 {
6388 	// filter out some unavailable values (for userland)
6389 	switch (addressSpec) {
6390 		case B_ANY_KERNEL_ADDRESS:
6391 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6392 			return B_BAD_VALUE;
6393 	}
6394 
6395 	void* address;
6396 	if (!IS_USER_ADDRESS(userAddress)
6397 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6398 		return B_BAD_ADDRESS;
6399 
6400 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6401 	if (newArea < B_OK)
6402 		return newArea;
6403 
6404 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6405 		return B_BAD_ADDRESS;
6406 
6407 	return newArea;
6408 }
6409 
6410 
6411 area_id
6412 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6413 	uint32 protection, area_id sourceArea)
6414 {
6415 	char name[B_OS_NAME_LENGTH];
6416 	void* address;
6417 
6418 	// filter out some unavailable values (for userland)
6419 	switch (addressSpec) {
6420 		case B_ANY_KERNEL_ADDRESS:
6421 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6422 			return B_BAD_VALUE;
6423 	}
6424 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6425 		return B_BAD_VALUE;
6426 
6427 	if (!IS_USER_ADDRESS(userName)
6428 		|| !IS_USER_ADDRESS(userAddress)
6429 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6430 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6431 		return B_BAD_ADDRESS;
6432 
6433 	fix_protection(&protection);
6434 
6435 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6436 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6437 		false);
6438 	if (clonedArea < B_OK)
6439 		return clonedArea;
6440 
6441 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6442 		delete_area(clonedArea);
6443 		return B_BAD_ADDRESS;
6444 	}
6445 
6446 	return clonedArea;
6447 }
6448 
6449 
6450 area_id
6451 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6452 	size_t size, uint32 lock, uint32 protection)
6453 {
6454 	char name[B_OS_NAME_LENGTH];
6455 	void* address;
6456 
6457 	// filter out some unavailable values (for userland)
6458 	switch (addressSpec) {
6459 		case B_ANY_KERNEL_ADDRESS:
6460 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6461 			return B_BAD_VALUE;
6462 	}
6463 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6464 		return B_BAD_VALUE;
6465 
6466 	if (!IS_USER_ADDRESS(userName)
6467 		|| !IS_USER_ADDRESS(userAddress)
6468 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6469 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6470 		return B_BAD_ADDRESS;
6471 
6472 	if (addressSpec == B_EXACT_ADDRESS
6473 		&& IS_KERNEL_ADDRESS(address))
6474 		return B_BAD_VALUE;
6475 
6476 	if (addressSpec == B_ANY_ADDRESS)
6477 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6478 	if (addressSpec == B_BASE_ADDRESS)
6479 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6480 
6481 	fix_protection(&protection);
6482 
6483 	virtual_address_restrictions virtualRestrictions = {};
6484 	virtualRestrictions.address = address;
6485 	virtualRestrictions.address_specification = addressSpec;
6486 	physical_address_restrictions physicalRestrictions = {};
6487 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6488 		size, lock, protection, 0, 0, &virtualRestrictions,
6489 		&physicalRestrictions, false, &address);
6490 
6491 	if (area >= B_OK
6492 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6493 		delete_area(area);
6494 		return B_BAD_ADDRESS;
6495 	}
6496 
6497 	return area;
6498 }
6499 
6500 
6501 status_t
6502 _user_delete_area(area_id area)
6503 {
6504 	// Unlike the BeOS implementation, you can now only delete areas
6505 	// that you have created yourself from userland.
6506 	// The documentation to delete_area() explicitly states that this
6507 	// will be restricted in the future, and so it will.
6508 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6509 }
6510 
6511 
6512 // TODO: create a BeOS style call for this!
6513 
6514 area_id
6515 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6516 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6517 	int fd, off_t offset)
6518 {
6519 	char name[B_OS_NAME_LENGTH];
6520 	void* address;
6521 	area_id area;
6522 
6523 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6524 		return B_BAD_VALUE;
6525 
6526 	fix_protection(&protection);
6527 
6528 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6529 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6530 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6531 		return B_BAD_ADDRESS;
6532 
6533 	if (addressSpec == B_EXACT_ADDRESS) {
6534 		if ((addr_t)address + size < (addr_t)address
6535 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6536 			return B_BAD_VALUE;
6537 		}
6538 		if (!IS_USER_ADDRESS(address)
6539 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6540 			return B_BAD_ADDRESS;
6541 		}
6542 	}
6543 
6544 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6545 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6546 		false);
6547 	if (area < B_OK)
6548 		return area;
6549 
6550 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6551 		return B_BAD_ADDRESS;
6552 
6553 	return area;
6554 }
6555 
6556 
6557 status_t
6558 _user_unmap_memory(void* _address, size_t size)
6559 {
6560 	addr_t address = (addr_t)_address;
6561 
6562 	// check params
6563 	if (size == 0 || (addr_t)address + size < (addr_t)address
6564 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6565 		return B_BAD_VALUE;
6566 	}
6567 
6568 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6569 		return B_BAD_ADDRESS;
6570 
6571 	// Write lock the address space and ensure the address range is not wired.
6572 	AddressSpaceWriteLocker locker;
6573 	do {
6574 		status_t status = locker.SetTo(team_get_current_team_id());
6575 		if (status != B_OK)
6576 			return status;
6577 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6578 			size, &locker));
6579 
6580 	// unmap
6581 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6582 }
6583 
6584 
6585 status_t
6586 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6587 {
6588 	// check address range
6589 	addr_t address = (addr_t)_address;
6590 	size = PAGE_ALIGN(size);
6591 
6592 	if ((address % B_PAGE_SIZE) != 0)
6593 		return B_BAD_VALUE;
6594 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6595 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6596 		// weird error code required by POSIX
6597 		return ENOMEM;
6598 	}
6599 
6600 	// extend and check protection
6601 	if ((protection & ~B_USER_PROTECTION) != 0)
6602 		return B_BAD_VALUE;
6603 
6604 	fix_protection(&protection);
6605 
6606 	// We need to write lock the address space, since we're going to play with
6607 	// the areas. Also make sure that none of the areas is wired and that we're
6608 	// actually allowed to change the protection.
6609 	AddressSpaceWriteLocker locker;
6610 
6611 	bool restart;
6612 	do {
6613 		restart = false;
6614 
6615 		status_t status = locker.SetTo(team_get_current_team_id());
6616 		if (status != B_OK)
6617 			return status;
6618 
6619 		// First round: Check whether the whole range is covered by areas and we
6620 		// are allowed to modify them.
6621 		addr_t currentAddress = address;
6622 		size_t sizeLeft = size;
6623 		while (sizeLeft > 0) {
6624 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6625 			if (area == NULL)
6626 				return B_NO_MEMORY;
6627 
6628 			if ((area->protection & B_KERNEL_AREA) != 0)
6629 				return B_NOT_ALLOWED;
6630 
6631 			// TODO: For (shared) mapped files we should check whether the new
6632 			// protections are compatible with the file permissions. We don't
6633 			// have a way to do that yet, though.
6634 
6635 			addr_t offset = currentAddress - area->Base();
6636 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6637 
6638 			AreaCacheLocker cacheLocker(area);
6639 
6640 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6641 					&locker, &cacheLocker)) {
6642 				restart = true;
6643 				break;
6644 			}
6645 
6646 			cacheLocker.Unlock();
6647 
6648 			currentAddress += rangeSize;
6649 			sizeLeft -= rangeSize;
6650 		}
6651 	} while (restart);
6652 
6653 	// Second round: If the protections differ from that of the area, create a
6654 	// page protection array and re-map mapped pages.
6655 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6656 	addr_t currentAddress = address;
6657 	size_t sizeLeft = size;
6658 	while (sizeLeft > 0) {
6659 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6660 		if (area == NULL)
6661 			return B_NO_MEMORY;
6662 
6663 		addr_t offset = currentAddress - area->Base();
6664 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6665 
6666 		currentAddress += rangeSize;
6667 		sizeLeft -= rangeSize;
6668 
6669 		if (area->page_protections == NULL) {
6670 			if (area->protection == protection)
6671 				continue;
6672 
6673 			status_t status = allocate_area_page_protections(area);
6674 			if (status != B_OK)
6675 				return status;
6676 		}
6677 
6678 		// We need to lock the complete cache chain, since we potentially unmap
6679 		// pages of lower caches.
6680 		VMCache* topCache = vm_area_get_locked_cache(area);
6681 		VMCacheChainLocker cacheChainLocker(topCache);
6682 		cacheChainLocker.LockAllSourceCaches();
6683 
6684 		for (addr_t pageAddress = area->Base() + offset;
6685 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6686 			map->Lock();
6687 
6688 			set_area_page_protection(area, pageAddress, protection);
6689 
6690 			phys_addr_t physicalAddress;
6691 			uint32 flags;
6692 
6693 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6694 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6695 				map->Unlock();
6696 				continue;
6697 			}
6698 
6699 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6700 			if (page == NULL) {
6701 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6702 					"\n", area, physicalAddress);
6703 				map->Unlock();
6704 				return B_ERROR;
6705 			}
6706 
6707 			// If the page is not in the topmost cache and write access is
6708 			// requested, we have to unmap it. Otherwise we can re-map it with
6709 			// the new protection.
6710 			bool unmapPage = page->Cache() != topCache
6711 				&& (protection & B_WRITE_AREA) != 0;
6712 
6713 			if (!unmapPage)
6714 				map->ProtectPage(area, pageAddress, protection);
6715 
6716 			map->Unlock();
6717 
6718 			if (unmapPage) {
6719 				DEBUG_PAGE_ACCESS_START(page);
6720 				unmap_page(area, pageAddress);
6721 				DEBUG_PAGE_ACCESS_END(page);
6722 			}
6723 		}
6724 	}
6725 
6726 	return B_OK;
6727 }
6728 
6729 
6730 status_t
6731 _user_sync_memory(void* _address, size_t size, uint32 flags)
6732 {
6733 	addr_t address = (addr_t)_address;
6734 	size = PAGE_ALIGN(size);
6735 
6736 	// check params
6737 	if ((address % B_PAGE_SIZE) != 0)
6738 		return B_BAD_VALUE;
6739 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6740 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6741 		// weird error code required by POSIX
6742 		return ENOMEM;
6743 	}
6744 
6745 	bool writeSync = (flags & MS_SYNC) != 0;
6746 	bool writeAsync = (flags & MS_ASYNC) != 0;
6747 	if (writeSync && writeAsync)
6748 		return B_BAD_VALUE;
6749 
6750 	if (size == 0 || (!writeSync && !writeAsync))
6751 		return B_OK;
6752 
6753 	// iterate through the range and sync all concerned areas
6754 	while (size > 0) {
6755 		// read lock the address space
6756 		AddressSpaceReadLocker locker;
6757 		status_t error = locker.SetTo(team_get_current_team_id());
6758 		if (error != B_OK)
6759 			return error;
6760 
6761 		// get the first area
6762 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6763 		if (area == NULL)
6764 			return B_NO_MEMORY;
6765 
6766 		uint32 offset = address - area->Base();
6767 		size_t rangeSize = min_c(area->Size() - offset, size);
6768 		offset += area->cache_offset;
6769 
6770 		// lock the cache
6771 		AreaCacheLocker cacheLocker(area);
6772 		if (!cacheLocker)
6773 			return B_BAD_VALUE;
6774 		VMCache* cache = area->cache;
6775 
6776 		locker.Unlock();
6777 
6778 		uint32 firstPage = offset >> PAGE_SHIFT;
6779 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6780 
6781 		// write the pages
6782 		if (cache->type == CACHE_TYPE_VNODE) {
6783 			if (writeSync) {
6784 				// synchronous
6785 				error = vm_page_write_modified_page_range(cache, firstPage,
6786 					endPage);
6787 				if (error != B_OK)
6788 					return error;
6789 			} else {
6790 				// asynchronous
6791 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6792 				// TODO: This is probably not quite what is supposed to happen.
6793 				// Especially when a lot has to be written, it might take ages
6794 				// until it really hits the disk.
6795 			}
6796 		}
6797 
6798 		address += rangeSize;
6799 		size -= rangeSize;
6800 	}
6801 
6802 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6803 	// synchronize multiple mappings of the same file. In our VM they never get
6804 	// out of sync, though, so we don't have to do anything.
6805 
6806 	return B_OK;
6807 }
6808 
6809 
6810 status_t
6811 _user_memory_advice(void* _address, size_t size, uint32 advice)
6812 {
6813 	addr_t address = (addr_t)_address;
6814 	if ((address % B_PAGE_SIZE) != 0)
6815 		return B_BAD_VALUE;
6816 
6817 	size = PAGE_ALIGN(size);
6818 	if (address + size < address || !IS_USER_ADDRESS(address)
6819 		|| !IS_USER_ADDRESS(address + size)) {
6820 		// weird error code required by POSIX
6821 		return B_NO_MEMORY;
6822 	}
6823 
6824 	switch (advice) {
6825 		case MADV_NORMAL:
6826 		case MADV_SEQUENTIAL:
6827 		case MADV_RANDOM:
6828 		case MADV_WILLNEED:
6829 		case MADV_DONTNEED:
6830 			// TODO: Implement!
6831 			break;
6832 
6833 		case MADV_FREE:
6834 		{
6835 			AddressSpaceWriteLocker locker;
6836 			do {
6837 				status_t status = locker.SetTo(team_get_current_team_id());
6838 				if (status != B_OK)
6839 					return status;
6840 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6841 					address, size, &locker));
6842 
6843 			discard_address_range(locker.AddressSpace(), address, size, false);
6844 			break;
6845 		}
6846 
6847 		default:
6848 			return B_BAD_VALUE;
6849 	}
6850 
6851 	return B_OK;
6852 }
6853 
6854 
6855 status_t
6856 _user_get_memory_properties(team_id teamID, const void* address,
6857 	uint32* _protected, uint32* _lock)
6858 {
6859 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6860 		return B_BAD_ADDRESS;
6861 
6862 	AddressSpaceReadLocker locker;
6863 	status_t error = locker.SetTo(teamID);
6864 	if (error != B_OK)
6865 		return error;
6866 
6867 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6868 	if (area == NULL)
6869 		return B_NO_MEMORY;
6870 
6871 
6872 	uint32 protection = area->protection;
6873 	if (area->page_protections != NULL)
6874 		protection = get_area_page_protection(area, (addr_t)address);
6875 
6876 	uint32 wiring = area->wiring;
6877 
6878 	locker.Unlock();
6879 
6880 	error = user_memcpy(_protected, &protection, sizeof(protection));
6881 	if (error != B_OK)
6882 		return error;
6883 
6884 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6885 
6886 	return error;
6887 }
6888 
6889 
6890 // An ordered list of non-overlapping ranges to track mlock/munlock locking.
6891 // It is allowed to call mlock/munlock in unbalanced ways (lock a range
6892 // multiple times, unlock a part of it, lock several consecutive ranges and
6893 // unlock them in one go, etc). However the low level lock_memory and
6894 // unlock_memory calls require the locks/unlocks to be balanced (you lock a
6895 // fixed range, and then unlock exactly the same range). This list allows to
6896 // keep track of what was locked exactly so we can unlock the correct things.
6897 struct LockedPages : DoublyLinkedListLinkImpl<LockedPages> {
6898 	addr_t start;
6899 	addr_t end;
6900 
6901 	status_t LockMemory()
6902 	{
6903 		return lock_memory((void*)start, end - start, 0);
6904 	}
6905 
6906 	status_t UnlockMemory()
6907 	{
6908 		return unlock_memory((void*)start, end - start, 0);
6909 	}
6910 
6911 	status_t Move(addr_t start, addr_t end)
6912 	{
6913 		status_t result = lock_memory((void*)start, end - start, 0);
6914 		if (result != B_OK)
6915 			return result;
6916 
6917 		result = UnlockMemory();
6918 
6919 		if (result != B_OK) {
6920 			// What can we do if the unlock fails?
6921 			panic("Failed to unlock memory: %s", strerror(result));
6922 			return result;
6923 		}
6924 
6925 		this->start = start;
6926 		this->end = end;
6927 
6928 		return B_OK;
6929 	}
6930 };
6931 
6932 
6933 status_t
6934 _user_mlock(const void* address, size_t size) {
6935 	// Maybe there's nothing to do, in which case, do nothing
6936 	if (size == 0)
6937 		return B_OK;
6938 
6939 	// Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to
6940 	// reject the call otherwise)
6941 	if ((addr_t)address % B_PAGE_SIZE != 0)
6942 		return EINVAL;
6943 
6944 	size = ROUNDUP(size, B_PAGE_SIZE);
6945 
6946 	addr_t endAddress = (addr_t)address + size;
6947 
6948 	// Pre-allocate a linked list element we may need (it's simpler to do it
6949 	// now than run out of memory in the midle of changing things)
6950 	LockedPages* newRange = new(std::nothrow) LockedPages();
6951 	if (newRange == NULL)
6952 		return ENOMEM;
6953 
6954 	// Get and lock the team
6955 	Team* team = thread_get_current_thread()->team;
6956 	TeamLocker teamLocker(team);
6957 	teamLocker.Lock();
6958 
6959 	status_t error = B_OK;
6960 	LockedPagesList* lockedPages = &team->locked_pages_list;
6961 
6962 	// Locate the first locked range possibly overlapping ours
6963 	LockedPages* currentRange = lockedPages->Head();
6964 	while (currentRange != NULL && currentRange->end <= (addr_t)address)
6965 		currentRange = lockedPages->GetNext(currentRange);
6966 
6967 	if (currentRange == NULL || currentRange->start >= endAddress) {
6968 		// No existing range is overlapping with ours. We can just lock our
6969 		// range and stop here.
6970 		newRange->start = (addr_t)address;
6971 		newRange->end = endAddress;
6972 		error = newRange->LockMemory();
6973 		if (error != B_OK) {
6974 			delete newRange;
6975 			return error;
6976 		}
6977 		lockedPages->InsertBefore(currentRange, newRange);
6978 		return B_OK;
6979 	}
6980 
6981 	// We get here when there is at least one existing overlapping range.
6982 
6983 	if (currentRange->start <= (addr_t)address) {
6984 		if (currentRange->end >= endAddress) {
6985 			// An existing range is already fully covering the pages we need to
6986 			// lock. Nothing to do then.
6987 			delete newRange;
6988 			return B_OK;
6989 		} else {
6990 			// An existing range covers the start of the area we want to lock.
6991 			// Advance our start address to avoid it.
6992 			address = (void*)currentRange->end;
6993 
6994 			// Move on to the next range for the next step
6995 			currentRange = lockedPages->GetNext(currentRange);
6996 		}
6997 	}
6998 
6999 	// First, lock the new range
7000 	newRange->start = (addr_t)address;
7001 	newRange->end = endAddress;
7002 	error = newRange->LockMemory();
7003 	if (error != B_OK) {
7004 		delete newRange;
7005 		return error;
7006 	}
7007 
7008 	// Unlock all ranges fully overlapping with the area we need to lock
7009 	while (currentRange != NULL && currentRange->end < endAddress) {
7010 		// The existing range is fully contained inside the new one we're
7011 		// trying to lock. Delete/unlock it, and replace it with a new one
7012 		// (this limits fragmentation of the range list, and is simpler to
7013 		// manage)
7014 		error = currentRange->UnlockMemory();
7015 		if (error != B_OK) {
7016 			panic("Failed to unlock a memory range: %s", strerror(error));
7017 			newRange->UnlockMemory();
7018 			delete newRange;
7019 			return error;
7020 		}
7021 		LockedPages* temp = currentRange;
7022 		currentRange = lockedPages->GetNext(currentRange);
7023 		lockedPages->Remove(temp);
7024 		delete temp;
7025 	}
7026 
7027 	if (currentRange != NULL) {
7028 		// One last range may cover the end of the area we're trying to lock
7029 
7030 		if (currentRange->start == (addr_t)address) {
7031 			// In case two overlapping ranges (one at the start and the other
7032 			// at the end) already cover the area we're after, there's nothing
7033 			// more to do. So we destroy our new extra allocation
7034 			error = newRange->UnlockMemory();
7035 			delete newRange;
7036 			return error;
7037 		}
7038 
7039 		if (currentRange->start < endAddress) {
7040 			// Make sure the last range is not overlapping, by moving its start
7041 			error = currentRange->Move(endAddress, currentRange->end);
7042 			if (error != B_OK) {
7043 				panic("Failed to move a memory range: %s", strerror(error));
7044 				newRange->UnlockMemory();
7045 				delete newRange;
7046 				return error;
7047 			}
7048 		}
7049 	}
7050 
7051 	// Finally, store the new range in the locked list
7052 	lockedPages->InsertBefore(currentRange, newRange);
7053 	return B_OK;
7054 }
7055 
7056 
7057 status_t
7058 _user_munlock(const void* address, size_t size) {
7059 	// Maybe there's nothing to do, in which case, do nothing
7060 	if (size == 0)
7061 		return B_OK;
7062 
7063 	// Make sure the address is multiple of B_PAGE_SIZE (POSIX allows us to
7064 	// reject the call otherwise)
7065 	if ((addr_t)address % B_PAGE_SIZE != 0)
7066 		return EINVAL;
7067 
7068 	// Round size up to the next page
7069 	size = ROUNDUP(size, B_PAGE_SIZE);
7070 
7071 	addr_t endAddress = (addr_t)address + size;
7072 
7073 	// Get and lock the team
7074 	Team* team = thread_get_current_thread()->team;
7075 	TeamLocker teamLocker(team);
7076 	teamLocker.Lock();
7077 	LockedPagesList* lockedPages = &team->locked_pages_list;
7078 
7079 	status_t error = B_OK;
7080 
7081 	// Locate the first locked range possibly overlapping ours
7082 	LockedPages* currentRange = lockedPages->Head();
7083 	while (currentRange != NULL && currentRange->end <= (addr_t)address)
7084 		currentRange = lockedPages->GetNext(currentRange);
7085 
7086 	if (currentRange == NULL || currentRange->start >= endAddress) {
7087 		// No range is intersecting, nothing to unlock
7088 		return B_OK;
7089 	}
7090 
7091 	if (currentRange->start < (addr_t)address) {
7092 		if (currentRange->end > endAddress) {
7093 			// There is a range fully covering the area we want to unlock,
7094 			// and it extends on both sides. We need to split it in two
7095 			LockedPages* newRange = new(std::nothrow) LockedPages();
7096 			if (newRange == NULL)
7097 				return ENOMEM;
7098 
7099 			newRange->start = endAddress;
7100 			newRange->end = currentRange->end;
7101 
7102 			error = newRange->LockMemory();
7103 			if (error != B_OK) {
7104 				delete newRange;
7105 				return error;
7106 			}
7107 
7108 			error = currentRange->Move(currentRange->start, (addr_t)address);
7109 			if (error != B_OK) {
7110 				delete newRange;
7111 				return error;
7112 			}
7113 
7114 			lockedPages->InsertAfter(currentRange, newRange);
7115 			return B_OK;
7116 		} else {
7117 			// There is a range that overlaps and extends before the one we
7118 			// want to unlock, we need to shrink it
7119 			error = currentRange->Move(currentRange->start, (addr_t)address);
7120 			if (error != B_OK)
7121 				return error;
7122 		}
7123 	}
7124 
7125 	while (currentRange != NULL && currentRange->end <= endAddress) {
7126 		// Unlock all fully overlapping ranges
7127 		error = currentRange->UnlockMemory();
7128 		if (error != B_OK)
7129 			return error;
7130 		LockedPages* temp = currentRange;
7131 		currentRange = lockedPages->GetNext(currentRange);
7132 		lockedPages->Remove(temp);
7133 		delete temp;
7134 	}
7135 
7136 	// Finally split the last partially overlapping range if any
7137 	if (currentRange != NULL && currentRange->start < endAddress) {
7138 		error = currentRange->Move(endAddress, currentRange->end);
7139 		if (error != B_OK)
7140 			return error;
7141 	}
7142 
7143 	return B_OK;
7144 }
7145 
7146 
7147 // #pragma mark -- compatibility
7148 
7149 
7150 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7151 
7152 
7153 struct physical_entry_beos {
7154 	uint32	address;
7155 	uint32	size;
7156 };
7157 
7158 
7159 /*!	The physical_entry structure has changed. We need to translate it to the
7160 	old one.
7161 */
7162 extern "C" int32
7163 __get_memory_map_beos(const void* _address, size_t numBytes,
7164 	physical_entry_beos* table, int32 numEntries)
7165 {
7166 	if (numEntries <= 0)
7167 		return B_BAD_VALUE;
7168 
7169 	const uint8* address = (const uint8*)_address;
7170 
7171 	int32 count = 0;
7172 	while (numBytes > 0 && count < numEntries) {
7173 		physical_entry entry;
7174 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7175 		if (result < 0) {
7176 			if (result != B_BUFFER_OVERFLOW)
7177 				return result;
7178 		}
7179 
7180 		if (entry.address >= (phys_addr_t)1 << 32) {
7181 			panic("get_memory_map(): Address is greater 4 GB!");
7182 			return B_ERROR;
7183 		}
7184 
7185 		table[count].address = entry.address;
7186 		table[count++].size = entry.size;
7187 
7188 		address += entry.size;
7189 		numBytes -= entry.size;
7190 	}
7191 
7192 	// null-terminate the table, if possible
7193 	if (count < numEntries) {
7194 		table[count].address = 0;
7195 		table[count].size = 0;
7196 	}
7197 
7198 	return B_OK;
7199 }
7200 
7201 
7202 /*!	The type of the \a physicalAddress parameter has changed from void* to
7203 	phys_addr_t.
7204 */
7205 extern "C" area_id
7206 __map_physical_memory_beos(const char* name, void* physicalAddress,
7207 	size_t numBytes, uint32 addressSpec, uint32 protection,
7208 	void** _virtualAddress)
7209 {
7210 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7211 		addressSpec, protection, _virtualAddress);
7212 }
7213 
7214 
7215 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7216 	we meddle with the \a lock parameter to force 32 bit.
7217 */
7218 extern "C" area_id
7219 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7220 	size_t size, uint32 lock, uint32 protection)
7221 {
7222 	switch (lock) {
7223 		case B_NO_LOCK:
7224 			break;
7225 		case B_FULL_LOCK:
7226 		case B_LAZY_LOCK:
7227 			lock = B_32_BIT_FULL_LOCK;
7228 			break;
7229 		case B_CONTIGUOUS:
7230 			lock = B_32_BIT_CONTIGUOUS;
7231 			break;
7232 	}
7233 
7234 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7235 		protection);
7236 }
7237 
7238 
7239 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7240 	"BASE");
7241 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7242 	"map_physical_memory@", "BASE");
7243 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7244 	"BASE");
7245 
7246 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7247 	"get_memory_map@@", "1_ALPHA3");
7248 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7249 	"map_physical_memory@@", "1_ALPHA3");
7250 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7251 	"1_ALPHA3");
7252 
7253 
7254 #else
7255 
7256 
7257 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7258 	"get_memory_map@@", "BASE");
7259 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7260 	"map_physical_memory@@", "BASE");
7261 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7262 	"BASE");
7263 
7264 
7265 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7266