xref: /haiku/src/system/kernel/vm/vm.cpp (revision ed24eb5ff12640d052171c6a7feba37fab8a75d1)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/ThreadAutoLock.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_priv.h>
53 #include <vm/VMAddressSpace.h>
54 #include <vm/VMArea.h>
55 #include <vm/VMCache.h>
56 
57 #include "VMAddressSpaceLocking.h"
58 #include "VMAnonymousCache.h"
59 #include "VMAnonymousNoSwapCache.h"
60 #include "IORequest.h"
61 
62 
63 //#define TRACE_VM
64 //#define TRACE_FAULTS
65 #ifdef TRACE_VM
66 #	define TRACE(x) dprintf x
67 #else
68 #	define TRACE(x) ;
69 #endif
70 #ifdef TRACE_FAULTS
71 #	define FTRACE(x) dprintf x
72 #else
73 #	define FTRACE(x) ;
74 #endif
75 
76 
77 namespace {
78 
79 class AreaCacheLocking {
80 public:
81 	inline bool Lock(VMCache* lockable)
82 	{
83 		return false;
84 	}
85 
86 	inline void Unlock(VMCache* lockable)
87 	{
88 		vm_area_put_locked_cache(lockable);
89 	}
90 };
91 
92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
93 public:
94 	inline AreaCacheLocker(VMCache* cache = NULL)
95 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
96 	{
97 	}
98 
99 	inline AreaCacheLocker(VMArea* area)
100 		: AutoLocker<VMCache, AreaCacheLocking>()
101 	{
102 		SetTo(area);
103 	}
104 
105 	inline void SetTo(VMCache* cache, bool alreadyLocked)
106 	{
107 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
108 	}
109 
110 	inline void SetTo(VMArea* area)
111 	{
112 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
113 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
114 	}
115 };
116 
117 
118 class VMCacheChainLocker {
119 public:
120 	VMCacheChainLocker()
121 		:
122 		fTopCache(NULL),
123 		fBottomCache(NULL)
124 	{
125 	}
126 
127 	VMCacheChainLocker(VMCache* topCache)
128 		:
129 		fTopCache(topCache),
130 		fBottomCache(topCache)
131 	{
132 	}
133 
134 	~VMCacheChainLocker()
135 	{
136 		Unlock();
137 	}
138 
139 	void SetTo(VMCache* topCache)
140 	{
141 		fTopCache = topCache;
142 		fBottomCache = topCache;
143 
144 		if (topCache != NULL)
145 			topCache->SetUserData(NULL);
146 	}
147 
148 	VMCache* LockSourceCache()
149 	{
150 		if (fBottomCache == NULL || fBottomCache->source == NULL)
151 			return NULL;
152 
153 		VMCache* previousCache = fBottomCache;
154 
155 		fBottomCache = fBottomCache->source;
156 		fBottomCache->Lock();
157 		fBottomCache->AcquireRefLocked();
158 		fBottomCache->SetUserData(previousCache);
159 
160 		return fBottomCache;
161 	}
162 
163 	void LockAllSourceCaches()
164 	{
165 		while (LockSourceCache() != NULL) {
166 		}
167 	}
168 
169 	void Unlock(VMCache* exceptCache = NULL)
170 	{
171 		if (fTopCache == NULL)
172 			return;
173 
174 		// Unlock caches in source -> consumer direction. This is important to
175 		// avoid double-locking and a reversal of locking order in case a cache
176 		// is eligable for merging.
177 		VMCache* cache = fBottomCache;
178 		while (cache != NULL) {
179 			VMCache* nextCache = (VMCache*)cache->UserData();
180 			if (cache != exceptCache)
181 				cache->ReleaseRefAndUnlock(cache != fTopCache);
182 
183 			if (cache == fTopCache)
184 				break;
185 
186 			cache = nextCache;
187 		}
188 
189 		fTopCache = NULL;
190 		fBottomCache = NULL;
191 	}
192 
193 	void UnlockKeepRefs(bool keepTopCacheLocked)
194 	{
195 		if (fTopCache == NULL)
196 			return;
197 
198 		VMCache* nextCache = fBottomCache;
199 		VMCache* cache = NULL;
200 
201 		while (keepTopCacheLocked
202 				? nextCache != fTopCache : cache != fTopCache) {
203 			cache = nextCache;
204 			nextCache = (VMCache*)cache->UserData();
205 			cache->Unlock(cache != fTopCache);
206 		}
207 	}
208 
209 	void RelockCaches(bool topCacheLocked)
210 	{
211 		if (fTopCache == NULL)
212 			return;
213 
214 		VMCache* nextCache = fTopCache;
215 		VMCache* cache = NULL;
216 		if (topCacheLocked) {
217 			cache = nextCache;
218 			nextCache = cache->source;
219 		}
220 
221 		while (cache != fBottomCache && nextCache != NULL) {
222 			VMCache* consumer = cache;
223 			cache = nextCache;
224 			nextCache = cache->source;
225 			cache->Lock();
226 			cache->SetUserData(consumer);
227 		}
228 	}
229 
230 private:
231 	VMCache*	fTopCache;
232 	VMCache*	fBottomCache;
233 };
234 
235 } // namespace
236 
237 
238 // The memory reserve an allocation of the certain priority must not touch.
239 static const size_t kMemoryReserveForPriority[] = {
240 	VM_MEMORY_RESERVE_USER,		// user
241 	VM_MEMORY_RESERVE_SYSTEM,	// system
242 	0							// VIP
243 };
244 
245 
246 ObjectCache* gPageMappingsObjectCache;
247 
248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
249 
250 static off_t sAvailableMemory;
251 static off_t sNeededMemory;
252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
253 static uint32 sPageFaults;
254 
255 static VMPhysicalPageMapper* sPhysicalPageMapper;
256 
257 #if DEBUG_CACHE_LIST
258 
259 struct cache_info {
260 	VMCache*	cache;
261 	addr_t		page_count;
262 	addr_t		committed;
263 };
264 
265 static const int kCacheInfoTableCount = 100 * 1024;
266 static cache_info* sCacheInfoTable;
267 
268 #endif	// DEBUG_CACHE_LIST
269 
270 
271 // function declarations
272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
273 	bool addressSpaceCleanup);
274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
275 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
276 static status_t map_backing_store(VMAddressSpace* addressSpace,
277 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
278 	int protection, int protectionMax, int mapping, uint32 flags,
279 	const virtual_address_restrictions* addressRestrictions, bool kernel,
280 	VMArea** _area, void** _virtualAddress);
281 static void fix_protection(uint32* protection);
282 
283 
284 //	#pragma mark -
285 
286 
287 #if VM_PAGE_FAULT_TRACING
288 
289 namespace VMPageFaultTracing {
290 
291 class PageFaultStart : public AbstractTraceEntry {
292 public:
293 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
294 		:
295 		fAddress(address),
296 		fPC(pc),
297 		fWrite(write),
298 		fUser(user)
299 	{
300 		Initialized();
301 	}
302 
303 	virtual void AddDump(TraceOutput& out)
304 	{
305 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
306 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
307 	}
308 
309 private:
310 	addr_t	fAddress;
311 	addr_t	fPC;
312 	bool	fWrite;
313 	bool	fUser;
314 };
315 
316 
317 // page fault errors
318 enum {
319 	PAGE_FAULT_ERROR_NO_AREA		= 0,
320 	PAGE_FAULT_ERROR_KERNEL_ONLY,
321 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
322 	PAGE_FAULT_ERROR_READ_PROTECTED,
323 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
324 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
325 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
326 };
327 
328 
329 class PageFaultError : public AbstractTraceEntry {
330 public:
331 	PageFaultError(area_id area, status_t error)
332 		:
333 		fArea(area),
334 		fError(error)
335 	{
336 		Initialized();
337 	}
338 
339 	virtual void AddDump(TraceOutput& out)
340 	{
341 		switch (fError) {
342 			case PAGE_FAULT_ERROR_NO_AREA:
343 				out.Print("page fault error: no area");
344 				break;
345 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
346 				out.Print("page fault error: area: %ld, kernel only", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
349 				out.Print("page fault error: area: %ld, write protected",
350 					fArea);
351 				break;
352 			case PAGE_FAULT_ERROR_READ_PROTECTED:
353 				out.Print("page fault error: area: %ld, read protected", fArea);
354 				break;
355 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
356 				out.Print("page fault error: area: %ld, execute protected",
357 					fArea);
358 				break;
359 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
360 				out.Print("page fault error: kernel touching bad user memory");
361 				break;
362 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
363 				out.Print("page fault error: no address space");
364 				break;
365 			default:
366 				out.Print("page fault error: area: %ld, error: %s", fArea,
367 					strerror(fError));
368 				break;
369 		}
370 	}
371 
372 private:
373 	area_id		fArea;
374 	status_t	fError;
375 };
376 
377 
378 class PageFaultDone : public AbstractTraceEntry {
379 public:
380 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
381 			vm_page* page)
382 		:
383 		fArea(area),
384 		fTopCache(topCache),
385 		fCache(cache),
386 		fPage(page)
387 	{
388 		Initialized();
389 	}
390 
391 	virtual void AddDump(TraceOutput& out)
392 	{
393 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
394 			"page: %p", fArea, fTopCache, fCache, fPage);
395 	}
396 
397 private:
398 	area_id		fArea;
399 	VMCache*	fTopCache;
400 	VMCache*	fCache;
401 	vm_page*	fPage;
402 };
403 
404 }	// namespace VMPageFaultTracing
405 
406 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
407 #else
408 #	define TPF(x) ;
409 #endif	// VM_PAGE_FAULT_TRACING
410 
411 
412 //	#pragma mark -
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 increment_page_wired_count(vm_page* page)
419 {
420 	if (!page->IsMapped())
421 		atomic_add(&gMappedPagesCount, 1);
422 	page->IncrementWiredCount();
423 }
424 
425 
426 /*!	The page's cache must be locked.
427 */
428 static inline void
429 decrement_page_wired_count(vm_page* page)
430 {
431 	page->DecrementWiredCount();
432 	if (!page->IsMapped())
433 		atomic_add(&gMappedPagesCount, -1);
434 }
435 
436 
437 static inline addr_t
438 virtual_page_address(VMArea* area, vm_page* page)
439 {
440 	return area->Base()
441 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
442 }
443 
444 
445 //! You need to have the address space locked when calling this function
446 static VMArea*
447 lookup_area(VMAddressSpace* addressSpace, area_id id)
448 {
449 	VMAreas::ReadLock();
450 
451 	VMArea* area = VMAreas::LookupLocked(id);
452 	if (area != NULL && area->address_space != addressSpace)
453 		area = NULL;
454 
455 	VMAreas::ReadUnlock();
456 
457 	return area;
458 }
459 
460 
461 static status_t
462 allocate_area_page_protections(VMArea* area)
463 {
464 	// In the page protections we store only the three user protections,
465 	// so we use 4 bits per page.
466 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
467 	area->page_protections = (uint8*)malloc_etc(bytes,
468 		area->address_space == VMAddressSpace::Kernel()
469 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
470 	if (area->page_protections == NULL)
471 		return B_NO_MEMORY;
472 
473 	// init the page protections for all pages to that of the area
474 	uint32 areaProtection = area->protection
475 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
476 	memset(area->page_protections, areaProtection | (areaProtection << 4),
477 		bytes);
478 	return B_OK;
479 }
480 
481 
482 static inline void
483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
484 {
485 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
486 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
487 	uint8& entry = area->page_protections[pageIndex / 2];
488 	if (pageIndex % 2 == 0)
489 		entry = (entry & 0xf0) | protection;
490 	else
491 		entry = (entry & 0x0f) | (protection << 4);
492 }
493 
494 
495 static inline uint32
496 get_area_page_protection(VMArea* area, addr_t pageAddress)
497 {
498 	if (area->page_protections == NULL)
499 		return area->protection;
500 
501 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
502 	uint32 protection = area->page_protections[pageIndex / 2];
503 	if (pageIndex % 2 == 0)
504 		protection &= 0x0f;
505 	else
506 		protection >>= 4;
507 
508 	uint32 kernelProtection = 0;
509 	if ((protection & B_READ_AREA) != 0)
510 		kernelProtection |= B_KERNEL_READ_AREA;
511 	if ((protection & B_WRITE_AREA) != 0)
512 		kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 	// If this is a kernel area we return only the kernel flags.
515 	if (area->address_space == VMAddressSpace::Kernel())
516 		return kernelProtection;
517 
518 	return protection | kernelProtection;
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
811 			&addressRestrictions, kernel, &secondArea, NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection,
925 	int protectionMax, int mapping,
926 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
927 	bool kernel, VMArea** _area, void** _virtualAddress)
928 {
929 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
930 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
931 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
932 		addressSpace, cache, addressRestrictions->address, offset, size,
933 		addressRestrictions->address_specification, wiring, protection,
934 		protectionMax, _area, areaName));
935 	cache->AssertLocked();
936 
937 	if (size == 0) {
938 #if KDEBUG
939 		panic("map_backing_store(): called with size=0 for area '%s'!",
940 			areaName);
941 #endif
942 		return B_BAD_VALUE;
943 	}
944 	if (offset < 0)
945 		return B_BAD_VALUE;
946 
947 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
948 		| HEAP_DONT_LOCK_KERNEL_SPACE;
949 	int priority;
950 	if (addressSpace != VMAddressSpace::Kernel()) {
951 		priority = VM_PRIORITY_USER;
952 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
953 		priority = VM_PRIORITY_VIP;
954 		allocationFlags |= HEAP_PRIORITY_VIP;
955 	} else
956 		priority = VM_PRIORITY_SYSTEM;
957 
958 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
959 		allocationFlags);
960 	if (mapping != REGION_PRIVATE_MAP)
961 		area->protection_max = protectionMax & B_USER_PROTECTION;
962 	if (area == NULL)
963 		return B_NO_MEMORY;
964 
965 	status_t status;
966 
967 	// if this is a private map, we need to create a new cache
968 	// to handle the private copies of pages as they are written to
969 	VMCache* sourceCache = cache;
970 	if (mapping == REGION_PRIVATE_MAP) {
971 		VMCache* newCache;
972 
973 		// create an anonymous cache
974 		status = VMCacheFactory::CreateAnonymousCache(newCache,
975 			(protection & B_STACK_AREA) != 0
976 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
977 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
978 		if (status != B_OK)
979 			goto err1;
980 
981 		newCache->Lock();
982 		newCache->temporary = 1;
983 		newCache->virtual_base = offset;
984 		newCache->virtual_end = offset + size;
985 
986 		cache->AddConsumer(newCache);
987 
988 		cache = newCache;
989 	}
990 
991 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
992 		status = cache->SetMinimalCommitment(size, priority);
993 		if (status != B_OK)
994 			goto err2;
995 	}
996 
997 	// check to see if this address space has entered DELETE state
998 	if (addressSpace->IsBeingDeleted()) {
999 		// okay, someone is trying to delete this address space now, so we can't
1000 		// insert the area, so back out
1001 		status = B_BAD_TEAM_ID;
1002 		goto err2;
1003 	}
1004 
1005 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1006 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1007 		status = unmap_address_range(addressSpace,
1008 			(addr_t)addressRestrictions->address, size, kernel);
1009 		if (status != B_OK)
1010 			goto err2;
1011 	}
1012 
1013 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1014 		allocationFlags, _virtualAddress);
1015 	if (status == B_NO_MEMORY
1016 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1017 		// Due to how many locks are held, we cannot wait here for space to be
1018 		// freed up, but we can at least notify the low_resource handler.
1019 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1020 	}
1021 	if (status != B_OK)
1022 		goto err2;
1023 
1024 	// attach the cache to the area
1025 	area->cache = cache;
1026 	area->cache_offset = offset;
1027 
1028 	// point the cache back to the area
1029 	cache->InsertAreaLocked(area);
1030 	if (mapping == REGION_PRIVATE_MAP)
1031 		cache->Unlock();
1032 
1033 	// insert the area in the global areas map
1034 	VMAreas::Insert(area);
1035 
1036 	// grab a ref to the address space (the area holds this)
1037 	addressSpace->Get();
1038 
1039 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1040 //		cache, sourceCache, areaName, area);
1041 
1042 	*_area = area;
1043 	return B_OK;
1044 
1045 err2:
1046 	if (mapping == REGION_PRIVATE_MAP) {
1047 		// We created this cache, so we must delete it again. Note, that we
1048 		// need to temporarily unlock the source cache or we'll otherwise
1049 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1050 		sourceCache->Unlock();
1051 		cache->ReleaseRefAndUnlock();
1052 		sourceCache->Lock();
1053 	}
1054 err1:
1055 	addressSpace->DeleteArea(area, allocationFlags);
1056 	return status;
1057 }
1058 
1059 
1060 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1061 	  locker1, locker2).
1062 */
1063 template<typename LockerType1, typename LockerType2>
1064 static inline bool
1065 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1066 {
1067 	area->cache->AssertLocked();
1068 
1069 	VMAreaUnwiredWaiter waiter;
1070 	if (!area->AddWaiterIfWired(&waiter))
1071 		return false;
1072 
1073 	// unlock everything and wait
1074 	if (locker1 != NULL)
1075 		locker1->Unlock();
1076 	if (locker2 != NULL)
1077 		locker2->Unlock();
1078 
1079 	waiter.waitEntry.Wait();
1080 
1081 	return true;
1082 }
1083 
1084 
1085 /*!	Checks whether the given area has any wired ranges intersecting with the
1086 	specified range and waits, if so.
1087 
1088 	When it has to wait, the function calls \c Unlock() on both \a locker1
1089 	and \a locker2, if given.
1090 	The area's top cache must be locked and must be unlocked as a side effect
1091 	of calling \c Unlock() on either \a locker1 or \a locker2.
1092 
1093 	If the function does not have to wait it does not modify or unlock any
1094 	object.
1095 
1096 	\param area The area to be checked.
1097 	\param base The base address of the range to check.
1098 	\param size The size of the address range to check.
1099 	\param locker1 An object to be unlocked when before starting to wait (may
1100 		be \c NULL).
1101 	\param locker2 An object to be unlocked when before starting to wait (may
1102 		be \c NULL).
1103 	\return \c true, if the function had to wait, \c false otherwise.
1104 */
1105 template<typename LockerType1, typename LockerType2>
1106 static inline bool
1107 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1108 	LockerType1* locker1, LockerType2* locker2)
1109 {
1110 	area->cache->AssertLocked();
1111 
1112 	VMAreaUnwiredWaiter waiter;
1113 	if (!area->AddWaiterIfWired(&waiter, base, size))
1114 		return false;
1115 
1116 	// unlock everything and wait
1117 	if (locker1 != NULL)
1118 		locker1->Unlock();
1119 	if (locker2 != NULL)
1120 		locker2->Unlock();
1121 
1122 	waiter.waitEntry.Wait();
1123 
1124 	return true;
1125 }
1126 
1127 
1128 /*!	Checks whether the given address space has any wired ranges intersecting
1129 	with the specified range and waits, if so.
1130 
1131 	Similar to wait_if_area_range_is_wired(), with the following differences:
1132 	- All areas intersecting with the range are checked (respectively all until
1133 	  one is found that contains a wired range intersecting with the given
1134 	  range).
1135 	- The given address space must at least be read-locked and must be unlocked
1136 	  when \c Unlock() is called on \a locker.
1137 	- None of the areas' caches are allowed to be locked.
1138 */
1139 template<typename LockerType>
1140 static inline bool
1141 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1142 	size_t size, LockerType* locker)
1143 {
1144 	for (VMAddressSpace::AreaRangeIterator it
1145 		= addressSpace->GetAreaRangeIterator(base, size);
1146 			VMArea* area = it.Next();) {
1147 
1148 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1149 
1150 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1151 			return true;
1152 	}
1153 
1154 	return false;
1155 }
1156 
1157 
1158 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1159 	It must be called in a situation where the kernel address space may be
1160 	locked.
1161 */
1162 status_t
1163 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1164 {
1165 	AddressSpaceReadLocker locker;
1166 	VMArea* area;
1167 	status_t status = locker.SetFromArea(id, area);
1168 	if (status != B_OK)
1169 		return status;
1170 
1171 	if (area->page_protections == NULL) {
1172 		status = allocate_area_page_protections(area);
1173 		if (status != B_OK)
1174 			return status;
1175 	}
1176 
1177 	*cookie = (void*)area;
1178 	return B_OK;
1179 }
1180 
1181 
1182 /*!	This is a debug helper function that can only be used with very specific
1183 	use cases.
1184 	Sets protection for the given address range to the protection specified.
1185 	If \a protection is 0 then the involved pages will be marked non-present
1186 	in the translation map to cause a fault on access. The pages aren't
1187 	actually unmapped however so that they can be marked present again with
1188 	additional calls to this function. For this to work the area must be
1189 	fully locked in memory so that the pages aren't otherwise touched.
1190 	This function does not lock the kernel address space and needs to be
1191 	supplied with a \a cookie retrieved from a successful call to
1192 	vm_prepare_kernel_area_debug_protection().
1193 */
1194 status_t
1195 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1196 	uint32 protection)
1197 {
1198 	// check address range
1199 	addr_t address = (addr_t)_address;
1200 	size = PAGE_ALIGN(size);
1201 
1202 	if ((address % B_PAGE_SIZE) != 0
1203 		|| (addr_t)address + size < (addr_t)address
1204 		|| !IS_KERNEL_ADDRESS(address)
1205 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1206 		return B_BAD_VALUE;
1207 	}
1208 
1209 	// Translate the kernel protection to user protection as we only store that.
1210 	if ((protection & B_KERNEL_READ_AREA) != 0)
1211 		protection |= B_READ_AREA;
1212 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1213 		protection |= B_WRITE_AREA;
1214 
1215 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1216 	VMTranslationMap* map = addressSpace->TranslationMap();
1217 	VMArea* area = (VMArea*)cookie;
1218 
1219 	addr_t offset = address - area->Base();
1220 	if (area->Size() - offset < size) {
1221 		panic("protect range not fully within supplied area");
1222 		return B_BAD_VALUE;
1223 	}
1224 
1225 	if (area->page_protections == NULL) {
1226 		panic("area has no page protections");
1227 		return B_BAD_VALUE;
1228 	}
1229 
1230 	// Invalidate the mapping entries so any access to them will fault or
1231 	// restore the mapping entries unchanged so that lookup will success again.
1232 	map->Lock();
1233 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1234 	map->Unlock();
1235 
1236 	// And set the proper page protections so that the fault case will actually
1237 	// fail and not simply try to map a new page.
1238 	for (addr_t pageAddress = address; pageAddress < address + size;
1239 			pageAddress += B_PAGE_SIZE) {
1240 		set_area_page_protection(area, pageAddress, protection);
1241 	}
1242 
1243 	return B_OK;
1244 }
1245 
1246 
1247 status_t
1248 vm_block_address_range(const char* name, void* address, addr_t size)
1249 {
1250 	if (!arch_vm_supports_protection(0))
1251 		return B_NOT_SUPPORTED;
1252 
1253 	AddressSpaceWriteLocker locker;
1254 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1255 	if (status != B_OK)
1256 		return status;
1257 
1258 	VMAddressSpace* addressSpace = locker.AddressSpace();
1259 
1260 	// create an anonymous cache
1261 	VMCache* cache;
1262 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1263 		VM_PRIORITY_SYSTEM);
1264 	if (status != B_OK)
1265 		return status;
1266 
1267 	cache->temporary = 1;
1268 	cache->virtual_end = size;
1269 	cache->Lock();
1270 
1271 	VMArea* area;
1272 	virtual_address_restrictions addressRestrictions = {};
1273 	addressRestrictions.address = address;
1274 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1275 	status = map_backing_store(addressSpace, cache, 0, name, size,
1276 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1277 		true, &area, NULL);
1278 	if (status != B_OK) {
1279 		cache->ReleaseRefAndUnlock();
1280 		return status;
1281 	}
1282 
1283 	cache->Unlock();
1284 	area->cache_type = CACHE_TYPE_RAM;
1285 	return area->id;
1286 }
1287 
1288 
1289 status_t
1290 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1291 {
1292 	AddressSpaceWriteLocker locker(team);
1293 	if (!locker.IsLocked())
1294 		return B_BAD_TEAM_ID;
1295 
1296 	VMAddressSpace* addressSpace = locker.AddressSpace();
1297 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1298 		addressSpace == VMAddressSpace::Kernel()
1299 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1300 }
1301 
1302 
1303 status_t
1304 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1305 	addr_t size, uint32 flags)
1306 {
1307 	if (size == 0)
1308 		return B_BAD_VALUE;
1309 
1310 	AddressSpaceWriteLocker locker(team);
1311 	if (!locker.IsLocked())
1312 		return B_BAD_TEAM_ID;
1313 
1314 	virtual_address_restrictions addressRestrictions = {};
1315 	addressRestrictions.address = *_address;
1316 	addressRestrictions.address_specification = addressSpec;
1317 	VMAddressSpace* addressSpace = locker.AddressSpace();
1318 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1319 		addressSpace == VMAddressSpace::Kernel()
1320 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1321 		_address);
1322 }
1323 
1324 
1325 area_id
1326 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1327 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1328 	const virtual_address_restrictions* virtualAddressRestrictions,
1329 	const physical_address_restrictions* physicalAddressRestrictions,
1330 	bool kernel, void** _address)
1331 {
1332 	VMArea* area;
1333 	VMCache* cache;
1334 	vm_page* page = NULL;
1335 	bool isStack = (protection & B_STACK_AREA) != 0;
1336 	page_num_t guardPages;
1337 	bool canOvercommit = false;
1338 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1339 		? VM_PAGE_ALLOC_CLEAR : 0;
1340 
1341 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1342 		team, name, size));
1343 
1344 	size = PAGE_ALIGN(size);
1345 	guardSize = PAGE_ALIGN(guardSize);
1346 	guardPages = guardSize / B_PAGE_SIZE;
1347 
1348 	if (size == 0 || size < guardSize)
1349 		return B_BAD_VALUE;
1350 	if (!arch_vm_supports_protection(protection))
1351 		return B_NOT_SUPPORTED;
1352 
1353 	if (team == B_CURRENT_TEAM)
1354 		team = VMAddressSpace::CurrentID();
1355 	if (team < 0)
1356 		return B_BAD_TEAM_ID;
1357 
1358 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1359 		canOvercommit = true;
1360 
1361 #ifdef DEBUG_KERNEL_STACKS
1362 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1363 		isStack = true;
1364 #endif
1365 
1366 	// check parameters
1367 	switch (virtualAddressRestrictions->address_specification) {
1368 		case B_ANY_ADDRESS:
1369 		case B_EXACT_ADDRESS:
1370 		case B_BASE_ADDRESS:
1371 		case B_ANY_KERNEL_ADDRESS:
1372 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1373 		case B_RANDOMIZED_ANY_ADDRESS:
1374 		case B_RANDOMIZED_BASE_ADDRESS:
1375 			break;
1376 
1377 		default:
1378 			return B_BAD_VALUE;
1379 	}
1380 
1381 	// If low or high physical address restrictions are given, we force
1382 	// B_CONTIGUOUS wiring, since only then we'll use
1383 	// vm_page_allocate_page_run() which deals with those restrictions.
1384 	if (physicalAddressRestrictions->low_address != 0
1385 		|| physicalAddressRestrictions->high_address != 0) {
1386 		wiring = B_CONTIGUOUS;
1387 	}
1388 
1389 	physical_address_restrictions stackPhysicalRestrictions;
1390 	bool doReserveMemory = false;
1391 	switch (wiring) {
1392 		case B_NO_LOCK:
1393 			break;
1394 		case B_FULL_LOCK:
1395 		case B_LAZY_LOCK:
1396 		case B_CONTIGUOUS:
1397 			doReserveMemory = true;
1398 			break;
1399 		case B_ALREADY_WIRED:
1400 			break;
1401 		case B_LOMEM:
1402 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1403 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1404 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1405 			wiring = B_CONTIGUOUS;
1406 			doReserveMemory = true;
1407 			break;
1408 		case B_32_BIT_FULL_LOCK:
1409 			if (B_HAIKU_PHYSICAL_BITS <= 32
1410 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1411 				wiring = B_FULL_LOCK;
1412 				doReserveMemory = true;
1413 				break;
1414 			}
1415 			// TODO: We don't really support this mode efficiently. Just fall
1416 			// through for now ...
1417 		case B_32_BIT_CONTIGUOUS:
1418 			#if B_HAIKU_PHYSICAL_BITS > 32
1419 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1420 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1421 					stackPhysicalRestrictions.high_address
1422 						= (phys_addr_t)1 << 32;
1423 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1424 				}
1425 			#endif
1426 			wiring = B_CONTIGUOUS;
1427 			doReserveMemory = true;
1428 			break;
1429 		default:
1430 			return B_BAD_VALUE;
1431 	}
1432 
1433 	// Optimization: For a single-page contiguous allocation without low/high
1434 	// memory restriction B_FULL_LOCK wiring suffices.
1435 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1436 		&& physicalAddressRestrictions->low_address == 0
1437 		&& physicalAddressRestrictions->high_address == 0) {
1438 		wiring = B_FULL_LOCK;
1439 	}
1440 
1441 	// For full lock or contiguous areas we're also going to map the pages and
1442 	// thus need to reserve pages for the mapping backend upfront.
1443 	addr_t reservedMapPages = 0;
1444 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1445 		AddressSpaceWriteLocker locker;
1446 		status_t status = locker.SetTo(team);
1447 		if (status != B_OK)
1448 			return status;
1449 
1450 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1451 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1452 	}
1453 
1454 	int priority;
1455 	if (team != VMAddressSpace::KernelID())
1456 		priority = VM_PRIORITY_USER;
1457 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1458 		priority = VM_PRIORITY_VIP;
1459 	else
1460 		priority = VM_PRIORITY_SYSTEM;
1461 
1462 	// Reserve memory before acquiring the address space lock. This reduces the
1463 	// chances of failure, since while holding the write lock to the address
1464 	// space (if it is the kernel address space that is), the low memory handler
1465 	// won't be able to free anything for us.
1466 	addr_t reservedMemory = 0;
1467 	if (doReserveMemory) {
1468 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1469 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1470 			return B_NO_MEMORY;
1471 		reservedMemory = size;
1472 		// TODO: We don't reserve the memory for the pages for the page
1473 		// directories/tables. We actually need to do since we currently don't
1474 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1475 		// there are actually less physical pages than there should be, which
1476 		// can get the VM into trouble in low memory situations.
1477 	}
1478 
1479 	AddressSpaceWriteLocker locker;
1480 	VMAddressSpace* addressSpace;
1481 	status_t status;
1482 
1483 	// For full lock areas reserve the pages before locking the address
1484 	// space. E.g. block caches can't release their memory while we hold the
1485 	// address space lock.
1486 	page_num_t reservedPages = reservedMapPages;
1487 	if (wiring == B_FULL_LOCK)
1488 		reservedPages += size / B_PAGE_SIZE;
1489 
1490 	vm_page_reservation reservation;
1491 	if (reservedPages > 0) {
1492 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1493 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1494 					priority)) {
1495 				reservedPages = 0;
1496 				status = B_WOULD_BLOCK;
1497 				goto err0;
1498 			}
1499 		} else
1500 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1501 	}
1502 
1503 	if (wiring == B_CONTIGUOUS) {
1504 		// we try to allocate the page run here upfront as this may easily
1505 		// fail for obvious reasons
1506 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1507 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1508 		if (page == NULL) {
1509 			status = B_NO_MEMORY;
1510 			goto err0;
1511 		}
1512 	}
1513 
1514 	// Lock the address space and, if B_EXACT_ADDRESS and
1515 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1516 	// is not wired.
1517 	do {
1518 		status = locker.SetTo(team);
1519 		if (status != B_OK)
1520 			goto err1;
1521 
1522 		addressSpace = locker.AddressSpace();
1523 	} while (virtualAddressRestrictions->address_specification
1524 			== B_EXACT_ADDRESS
1525 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1526 		&& wait_if_address_range_is_wired(addressSpace,
1527 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1528 
1529 	// create an anonymous cache
1530 	// if it's a stack, make sure that two pages are available at least
1531 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1532 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1533 		wiring == B_NO_LOCK, priority);
1534 	if (status != B_OK)
1535 		goto err1;
1536 
1537 	cache->temporary = 1;
1538 	cache->virtual_end = size;
1539 	cache->committed_size = reservedMemory;
1540 		// TODO: This should be done via a method.
1541 	reservedMemory = 0;
1542 
1543 	cache->Lock();
1544 
1545 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1546 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1547 		virtualAddressRestrictions, kernel, &area, _address);
1548 
1549 	if (status != B_OK) {
1550 		cache->ReleaseRefAndUnlock();
1551 		goto err1;
1552 	}
1553 
1554 	locker.DegradeToReadLock();
1555 
1556 	switch (wiring) {
1557 		case B_NO_LOCK:
1558 		case B_LAZY_LOCK:
1559 			// do nothing - the pages are mapped in as needed
1560 			break;
1561 
1562 		case B_FULL_LOCK:
1563 		{
1564 			// Allocate and map all pages for this area
1565 
1566 			off_t offset = 0;
1567 			for (addr_t address = area->Base();
1568 					address < area->Base() + (area->Size() - 1);
1569 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1570 #ifdef DEBUG_KERNEL_STACKS
1571 #	ifdef STACK_GROWS_DOWNWARDS
1572 				if (isStack && address < area->Base()
1573 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1574 #	else
1575 				if (isStack && address >= area->Base() + area->Size()
1576 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1577 #	endif
1578 					continue;
1579 #endif
1580 				vm_page* page = vm_page_allocate_page(&reservation,
1581 					PAGE_STATE_WIRED | pageAllocFlags);
1582 				cache->InsertPage(page, offset);
1583 				map_page(area, page, address, protection, &reservation);
1584 
1585 				DEBUG_PAGE_ACCESS_END(page);
1586 			}
1587 
1588 			break;
1589 		}
1590 
1591 		case B_ALREADY_WIRED:
1592 		{
1593 			// The pages should already be mapped. This is only really useful
1594 			// during boot time. Find the appropriate vm_page objects and stick
1595 			// them in the cache object.
1596 			VMTranslationMap* map = addressSpace->TranslationMap();
1597 			off_t offset = 0;
1598 
1599 			if (!gKernelStartup)
1600 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1601 
1602 			map->Lock();
1603 
1604 			for (addr_t virtualAddress = area->Base();
1605 					virtualAddress < area->Base() + (area->Size() - 1);
1606 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1607 				phys_addr_t physicalAddress;
1608 				uint32 flags;
1609 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1610 				if (status < B_OK) {
1611 					panic("looking up mapping failed for va 0x%lx\n",
1612 						virtualAddress);
1613 				}
1614 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1615 				if (page == NULL) {
1616 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1617 						"\n", physicalAddress);
1618 				}
1619 
1620 				DEBUG_PAGE_ACCESS_START(page);
1621 
1622 				cache->InsertPage(page, offset);
1623 				increment_page_wired_count(page);
1624 				vm_page_set_state(page, PAGE_STATE_WIRED);
1625 				page->busy = false;
1626 
1627 				DEBUG_PAGE_ACCESS_END(page);
1628 			}
1629 
1630 			map->Unlock();
1631 			break;
1632 		}
1633 
1634 		case B_CONTIGUOUS:
1635 		{
1636 			// We have already allocated our continuous pages run, so we can now
1637 			// just map them in the address space
1638 			VMTranslationMap* map = addressSpace->TranslationMap();
1639 			phys_addr_t physicalAddress
1640 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1641 			addr_t virtualAddress = area->Base();
1642 			off_t offset = 0;
1643 
1644 			map->Lock();
1645 
1646 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1647 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1648 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1649 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1650 				if (page == NULL)
1651 					panic("couldn't lookup physical page just allocated\n");
1652 
1653 				status = map->Map(virtualAddress, physicalAddress, protection,
1654 					area->MemoryType(), &reservation);
1655 				if (status < B_OK)
1656 					panic("couldn't map physical page in page run\n");
1657 
1658 				cache->InsertPage(page, offset);
1659 				increment_page_wired_count(page);
1660 
1661 				DEBUG_PAGE_ACCESS_END(page);
1662 			}
1663 
1664 			map->Unlock();
1665 			break;
1666 		}
1667 
1668 		default:
1669 			break;
1670 	}
1671 
1672 	cache->Unlock();
1673 
1674 	if (reservedPages > 0)
1675 		vm_page_unreserve_pages(&reservation);
1676 
1677 	TRACE(("vm_create_anonymous_area: done\n"));
1678 
1679 	area->cache_type = CACHE_TYPE_RAM;
1680 	return area->id;
1681 
1682 err1:
1683 	if (wiring == B_CONTIGUOUS) {
1684 		// we had reserved the area space upfront...
1685 		phys_addr_t pageNumber = page->physical_page_number;
1686 		int32 i;
1687 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1688 			page = vm_lookup_page(pageNumber);
1689 			if (page == NULL)
1690 				panic("couldn't lookup physical page just allocated\n");
1691 
1692 			vm_page_set_state(page, PAGE_STATE_FREE);
1693 		}
1694 	}
1695 
1696 err0:
1697 	if (reservedPages > 0)
1698 		vm_page_unreserve_pages(&reservation);
1699 	if (reservedMemory > 0)
1700 		vm_unreserve_memory(reservedMemory);
1701 
1702 	return status;
1703 }
1704 
1705 
1706 area_id
1707 vm_map_physical_memory(team_id team, const char* name, void** _address,
1708 	uint32 addressSpec, addr_t size, uint32 protection,
1709 	phys_addr_t physicalAddress, bool alreadyWired)
1710 {
1711 	VMArea* area;
1712 	VMCache* cache;
1713 	addr_t mapOffset;
1714 
1715 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1716 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1717 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1718 		addressSpec, size, protection, physicalAddress));
1719 
1720 	if (!arch_vm_supports_protection(protection))
1721 		return B_NOT_SUPPORTED;
1722 
1723 	AddressSpaceWriteLocker locker(team);
1724 	if (!locker.IsLocked())
1725 		return B_BAD_TEAM_ID;
1726 
1727 	// if the physical address is somewhat inside a page,
1728 	// move the actual area down to align on a page boundary
1729 	mapOffset = physicalAddress % B_PAGE_SIZE;
1730 	size += mapOffset;
1731 	physicalAddress -= mapOffset;
1732 
1733 	size = PAGE_ALIGN(size);
1734 
1735 	// create a device cache
1736 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1737 	if (status != B_OK)
1738 		return status;
1739 
1740 	cache->virtual_end = size;
1741 
1742 	cache->Lock();
1743 
1744 	virtual_address_restrictions addressRestrictions = {};
1745 	addressRestrictions.address = *_address;
1746 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1747 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1748 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1749 		true, &area, _address);
1750 
1751 	if (status < B_OK)
1752 		cache->ReleaseRefLocked();
1753 
1754 	cache->Unlock();
1755 
1756 	if (status == B_OK) {
1757 		// set requested memory type -- use uncached, if not given
1758 		uint32 memoryType = addressSpec & B_MTR_MASK;
1759 		if (memoryType == 0)
1760 			memoryType = B_MTR_UC;
1761 
1762 		area->SetMemoryType(memoryType);
1763 
1764 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1765 		if (status != B_OK)
1766 			delete_area(locker.AddressSpace(), area, false);
1767 	}
1768 
1769 	if (status != B_OK)
1770 		return status;
1771 
1772 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1773 
1774 	if (alreadyWired) {
1775 		// The area is already mapped, but possibly not with the right
1776 		// memory type.
1777 		map->Lock();
1778 		map->ProtectArea(area, area->protection);
1779 		map->Unlock();
1780 	} else {
1781 		// Map the area completely.
1782 
1783 		// reserve pages needed for the mapping
1784 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1785 			area->Base() + (size - 1));
1786 		vm_page_reservation reservation;
1787 		vm_page_reserve_pages(&reservation, reservePages,
1788 			team == VMAddressSpace::KernelID()
1789 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1790 
1791 		map->Lock();
1792 
1793 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1794 			map->Map(area->Base() + offset, physicalAddress + offset,
1795 				protection, area->MemoryType(), &reservation);
1796 		}
1797 
1798 		map->Unlock();
1799 
1800 		vm_page_unreserve_pages(&reservation);
1801 	}
1802 
1803 	// modify the pointer returned to be offset back into the new area
1804 	// the same way the physical address in was offset
1805 	*_address = (void*)((addr_t)*_address + mapOffset);
1806 
1807 	area->cache_type = CACHE_TYPE_DEVICE;
1808 	return area->id;
1809 }
1810 
1811 
1812 /*!	Don't use!
1813 	TODO: This function was introduced to map physical page vecs to
1814 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1815 	use a device cache and does not track vm_page::wired_count!
1816 */
1817 area_id
1818 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1819 	uint32 addressSpec, addr_t* _size, uint32 protection,
1820 	struct generic_io_vec* vecs, uint32 vecCount)
1821 {
1822 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1823 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1824 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1825 		addressSpec, _size, protection, vecs, vecCount));
1826 
1827 	if (!arch_vm_supports_protection(protection)
1828 		|| (addressSpec & B_MTR_MASK) != 0) {
1829 		return B_NOT_SUPPORTED;
1830 	}
1831 
1832 	AddressSpaceWriteLocker locker(team);
1833 	if (!locker.IsLocked())
1834 		return B_BAD_TEAM_ID;
1835 
1836 	if (vecCount == 0)
1837 		return B_BAD_VALUE;
1838 
1839 	addr_t size = 0;
1840 	for (uint32 i = 0; i < vecCount; i++) {
1841 		if (vecs[i].base % B_PAGE_SIZE != 0
1842 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1843 			return B_BAD_VALUE;
1844 		}
1845 
1846 		size += vecs[i].length;
1847 	}
1848 
1849 	// create a device cache
1850 	VMCache* cache;
1851 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1852 	if (result != B_OK)
1853 		return result;
1854 
1855 	cache->virtual_end = size;
1856 
1857 	cache->Lock();
1858 
1859 	VMArea* area;
1860 	virtual_address_restrictions addressRestrictions = {};
1861 	addressRestrictions.address = *_address;
1862 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1863 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1864 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1865 		&addressRestrictions, true, &area, _address);
1866 
1867 	if (result != B_OK)
1868 		cache->ReleaseRefLocked();
1869 
1870 	cache->Unlock();
1871 
1872 	if (result != B_OK)
1873 		return result;
1874 
1875 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1876 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1877 		area->Base() + (size - 1));
1878 
1879 	vm_page_reservation reservation;
1880 	vm_page_reserve_pages(&reservation, reservePages,
1881 			team == VMAddressSpace::KernelID()
1882 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1883 	map->Lock();
1884 
1885 	uint32 vecIndex = 0;
1886 	size_t vecOffset = 0;
1887 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1888 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1889 			vecOffset = 0;
1890 			vecIndex++;
1891 		}
1892 
1893 		if (vecIndex >= vecCount)
1894 			break;
1895 
1896 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1897 			protection, area->MemoryType(), &reservation);
1898 
1899 		vecOffset += B_PAGE_SIZE;
1900 	}
1901 
1902 	map->Unlock();
1903 	vm_page_unreserve_pages(&reservation);
1904 
1905 	if (_size != NULL)
1906 		*_size = size;
1907 
1908 	area->cache_type = CACHE_TYPE_DEVICE;
1909 	return area->id;
1910 }
1911 
1912 
1913 area_id
1914 vm_create_null_area(team_id team, const char* name, void** address,
1915 	uint32 addressSpec, addr_t size, uint32 flags)
1916 {
1917 	size = PAGE_ALIGN(size);
1918 
1919 	// Lock the address space and, if B_EXACT_ADDRESS and
1920 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1921 	// is not wired.
1922 	AddressSpaceWriteLocker locker;
1923 	do {
1924 		if (locker.SetTo(team) != B_OK)
1925 			return B_BAD_TEAM_ID;
1926 	} while (addressSpec == B_EXACT_ADDRESS
1927 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1928 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1929 			(addr_t)*address, size, &locker));
1930 
1931 	// create a null cache
1932 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1933 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1934 	VMCache* cache;
1935 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1936 	if (status != B_OK)
1937 		return status;
1938 
1939 	cache->temporary = 1;
1940 	cache->virtual_end = size;
1941 
1942 	cache->Lock();
1943 
1944 	VMArea* area;
1945 	virtual_address_restrictions addressRestrictions = {};
1946 	addressRestrictions.address = *address;
1947 	addressRestrictions.address_specification = addressSpec;
1948 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1949 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1950 		REGION_NO_PRIVATE_MAP, flags,
1951 		&addressRestrictions, true, &area, address);
1952 
1953 	if (status < B_OK) {
1954 		cache->ReleaseRefAndUnlock();
1955 		return status;
1956 	}
1957 
1958 	cache->Unlock();
1959 
1960 	area->cache_type = CACHE_TYPE_NULL;
1961 	return area->id;
1962 }
1963 
1964 
1965 /*!	Creates the vnode cache for the specified \a vnode.
1966 	The vnode has to be marked busy when calling this function.
1967 */
1968 status_t
1969 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1970 {
1971 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1972 }
1973 
1974 
1975 /*!	\a cache must be locked. The area's address space must be read-locked.
1976 */
1977 static void
1978 pre_map_area_pages(VMArea* area, VMCache* cache,
1979 	vm_page_reservation* reservation)
1980 {
1981 	addr_t baseAddress = area->Base();
1982 	addr_t cacheOffset = area->cache_offset;
1983 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1984 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1985 
1986 	for (VMCachePagesTree::Iterator it
1987 				= cache->pages.GetIterator(firstPage, true, true);
1988 			vm_page* page = it.Next();) {
1989 		if (page->cache_offset >= endPage)
1990 			break;
1991 
1992 		// skip busy and inactive pages
1993 		if (page->busy || page->usage_count == 0)
1994 			continue;
1995 
1996 		DEBUG_PAGE_ACCESS_START(page);
1997 		map_page(area, page,
1998 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1999 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2000 		DEBUG_PAGE_ACCESS_END(page);
2001 	}
2002 }
2003 
2004 
2005 /*!	Will map the file specified by \a fd to an area in memory.
2006 	The file will be mirrored beginning at the specified \a offset. The
2007 	\a offset and \a size arguments have to be page aligned.
2008 */
2009 static area_id
2010 _vm_map_file(team_id team, const char* name, void** _address,
2011 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2012 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2013 {
2014 	// TODO: for binary files, we want to make sure that they get the
2015 	//	copy of a file at a given time, ie. later changes should not
2016 	//	make it into the mapped copy -- this will need quite some changes
2017 	//	to be done in a nice way
2018 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2019 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2020 
2021 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2022 	size = PAGE_ALIGN(size);
2023 
2024 	if (mapping == REGION_NO_PRIVATE_MAP)
2025 		protection |= B_SHARED_AREA;
2026 	if (addressSpec != B_EXACT_ADDRESS)
2027 		unmapAddressRange = false;
2028 
2029 	if (fd < 0) {
2030 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2031 		virtual_address_restrictions virtualRestrictions = {};
2032 		virtualRestrictions.address = *_address;
2033 		virtualRestrictions.address_specification = addressSpec;
2034 		physical_address_restrictions physicalRestrictions = {};
2035 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2036 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2037 			_address);
2038 	}
2039 
2040 	// get the open flags of the FD
2041 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2042 	if (descriptor == NULL)
2043 		return EBADF;
2044 	int32 openMode = descriptor->open_mode;
2045 	put_fd(descriptor);
2046 
2047 	// The FD must open for reading at any rate. For shared mapping with write
2048 	// access, additionally the FD must be open for writing.
2049 	if ((openMode & O_ACCMODE) == O_WRONLY
2050 		|| (mapping == REGION_NO_PRIVATE_MAP
2051 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2052 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2053 		return EACCES;
2054 	}
2055 
2056 	uint32 protectionMax = 0;
2057 	if (mapping != REGION_PRIVATE_MAP) {
2058 		protectionMax = protection | B_READ_AREA;
2059 		if ((openMode & O_ACCMODE) == O_RDWR)
2060 			protectionMax |= B_WRITE_AREA;
2061 	}
2062 
2063 	// get the vnode for the object, this also grabs a ref to it
2064 	struct vnode* vnode = NULL;
2065 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2066 	if (status < B_OK)
2067 		return status;
2068 	VnodePutter vnodePutter(vnode);
2069 
2070 	// If we're going to pre-map pages, we need to reserve the pages needed by
2071 	// the mapping backend upfront.
2072 	page_num_t reservedPreMapPages = 0;
2073 	vm_page_reservation reservation;
2074 	if ((protection & B_READ_AREA) != 0) {
2075 		AddressSpaceWriteLocker locker;
2076 		status = locker.SetTo(team);
2077 		if (status != B_OK)
2078 			return status;
2079 
2080 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2081 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2082 
2083 		locker.Unlock();
2084 
2085 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2086 			team == VMAddressSpace::KernelID()
2087 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2088 	}
2089 
2090 	struct PageUnreserver {
2091 		PageUnreserver(vm_page_reservation* reservation)
2092 			:
2093 			fReservation(reservation)
2094 		{
2095 		}
2096 
2097 		~PageUnreserver()
2098 		{
2099 			if (fReservation != NULL)
2100 				vm_page_unreserve_pages(fReservation);
2101 		}
2102 
2103 		vm_page_reservation* fReservation;
2104 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2105 
2106 	// Lock the address space and, if the specified address range shall be
2107 	// unmapped, ensure it is not wired.
2108 	AddressSpaceWriteLocker locker;
2109 	do {
2110 		if (locker.SetTo(team) != B_OK)
2111 			return B_BAD_TEAM_ID;
2112 	} while (unmapAddressRange
2113 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2114 			(addr_t)*_address, size, &locker));
2115 
2116 	// TODO: this only works for file systems that use the file cache
2117 	VMCache* cache;
2118 	status = vfs_get_vnode_cache(vnode, &cache, false);
2119 	if (status < B_OK)
2120 		return status;
2121 
2122 	cache->Lock();
2123 
2124 	VMArea* area;
2125 	virtual_address_restrictions addressRestrictions = {};
2126 	addressRestrictions.address = *_address;
2127 	addressRestrictions.address_specification = addressSpec;
2128 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2129 		0, protection, protectionMax, mapping,
2130 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2131 		&addressRestrictions, kernel, &area, _address);
2132 
2133 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2134 		// map_backing_store() cannot know we no longer need the ref
2135 		cache->ReleaseRefLocked();
2136 	}
2137 
2138 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2139 		pre_map_area_pages(area, cache, &reservation);
2140 
2141 	cache->Unlock();
2142 
2143 	if (status == B_OK) {
2144 		// TODO: this probably deserves a smarter solution, ie. don't always
2145 		// prefetch stuff, and also, probably don't trigger it at this place.
2146 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2147 			// prefetches at max 10 MB starting from "offset"
2148 	}
2149 
2150 	if (status != B_OK)
2151 		return status;
2152 
2153 	area->cache_type = CACHE_TYPE_VNODE;
2154 	return area->id;
2155 }
2156 
2157 
2158 area_id
2159 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2160 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2161 	int fd, off_t offset)
2162 {
2163 	if (!arch_vm_supports_protection(protection))
2164 		return B_NOT_SUPPORTED;
2165 
2166 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2167 		mapping, unmapAddressRange, fd, offset, true);
2168 }
2169 
2170 
2171 VMCache*
2172 vm_area_get_locked_cache(VMArea* area)
2173 {
2174 	rw_lock_read_lock(&sAreaCacheLock);
2175 
2176 	while (true) {
2177 		VMCache* cache = area->cache;
2178 
2179 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2180 			// cache has been deleted
2181 			rw_lock_read_lock(&sAreaCacheLock);
2182 			continue;
2183 		}
2184 
2185 		rw_lock_read_lock(&sAreaCacheLock);
2186 
2187 		if (cache == area->cache) {
2188 			cache->AcquireRefLocked();
2189 			rw_lock_read_unlock(&sAreaCacheLock);
2190 			return cache;
2191 		}
2192 
2193 		// the cache changed in the meantime
2194 		cache->Unlock();
2195 	}
2196 }
2197 
2198 
2199 void
2200 vm_area_put_locked_cache(VMCache* cache)
2201 {
2202 	cache->ReleaseRefAndUnlock();
2203 }
2204 
2205 
2206 area_id
2207 vm_clone_area(team_id team, const char* name, void** address,
2208 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2209 	bool kernel)
2210 {
2211 	VMArea* newArea = NULL;
2212 	VMArea* sourceArea;
2213 
2214 	// Check whether the source area exists and is cloneable. If so, mark it
2215 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2216 	{
2217 		AddressSpaceWriteLocker locker;
2218 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2219 		if (status != B_OK)
2220 			return status;
2221 
2222 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2223 			return B_NOT_ALLOWED;
2224 
2225 		sourceArea->protection |= B_SHARED_AREA;
2226 		protection |= B_SHARED_AREA;
2227 	}
2228 
2229 	// Now lock both address spaces and actually do the cloning.
2230 
2231 	MultiAddressSpaceLocker locker;
2232 	VMAddressSpace* sourceAddressSpace;
2233 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2234 	if (status != B_OK)
2235 		return status;
2236 
2237 	VMAddressSpace* targetAddressSpace;
2238 	status = locker.AddTeam(team, true, &targetAddressSpace);
2239 	if (status != B_OK)
2240 		return status;
2241 
2242 	status = locker.Lock();
2243 	if (status != B_OK)
2244 		return status;
2245 
2246 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2247 	if (sourceArea == NULL)
2248 		return B_BAD_VALUE;
2249 
2250 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2251 		return B_NOT_ALLOWED;
2252 
2253 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2254 
2255 	if (!kernel && sourceAddressSpace != targetAddressSpace
2256 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2257 #if KDEBUG
2258 		Team* team = thread_get_current_thread()->team;
2259 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2260 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2261 #endif
2262 		status = B_NOT_ALLOWED;
2263 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2264 		status = B_NOT_ALLOWED;
2265 	} else {
2266 		virtual_address_restrictions addressRestrictions = {};
2267 		addressRestrictions.address = *address;
2268 		addressRestrictions.address_specification = addressSpec;
2269 		status = map_backing_store(targetAddressSpace, cache,
2270 			sourceArea->cache_offset, name, sourceArea->Size(),
2271 			sourceArea->wiring, protection, sourceArea->protection_max,
2272 			mapping, 0, &addressRestrictions,
2273 			kernel, &newArea, address);
2274 	}
2275 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2276 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2277 		// to create a new cache, and has therefore already acquired a reference
2278 		// to the source cache - but otherwise it has no idea that we need
2279 		// one.
2280 		cache->AcquireRefLocked();
2281 	}
2282 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2283 		// we need to map in everything at this point
2284 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2285 			// we don't have actual pages to map but a physical area
2286 			VMTranslationMap* map
2287 				= sourceArea->address_space->TranslationMap();
2288 			map->Lock();
2289 
2290 			phys_addr_t physicalAddress;
2291 			uint32 oldProtection;
2292 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2293 
2294 			map->Unlock();
2295 
2296 			map = targetAddressSpace->TranslationMap();
2297 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2298 				newArea->Base() + (newArea->Size() - 1));
2299 
2300 			vm_page_reservation reservation;
2301 			vm_page_reserve_pages(&reservation, reservePages,
2302 				targetAddressSpace == VMAddressSpace::Kernel()
2303 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2304 			map->Lock();
2305 
2306 			for (addr_t offset = 0; offset < newArea->Size();
2307 					offset += B_PAGE_SIZE) {
2308 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2309 					protection, newArea->MemoryType(), &reservation);
2310 			}
2311 
2312 			map->Unlock();
2313 			vm_page_unreserve_pages(&reservation);
2314 		} else {
2315 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2316 			size_t reservePages = map->MaxPagesNeededToMap(
2317 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2318 			vm_page_reservation reservation;
2319 			vm_page_reserve_pages(&reservation, reservePages,
2320 				targetAddressSpace == VMAddressSpace::Kernel()
2321 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2322 
2323 			// map in all pages from source
2324 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2325 					vm_page* page  = it.Next();) {
2326 				if (!page->busy) {
2327 					DEBUG_PAGE_ACCESS_START(page);
2328 					map_page(newArea, page,
2329 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2330 							- newArea->cache_offset),
2331 						protection, &reservation);
2332 					DEBUG_PAGE_ACCESS_END(page);
2333 				}
2334 			}
2335 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2336 			// ensuring that!
2337 
2338 			vm_page_unreserve_pages(&reservation);
2339 		}
2340 	}
2341 	if (status == B_OK)
2342 		newArea->cache_type = sourceArea->cache_type;
2343 
2344 	vm_area_put_locked_cache(cache);
2345 
2346 	if (status < B_OK)
2347 		return status;
2348 
2349 	return newArea->id;
2350 }
2351 
2352 
2353 /*!	Deletes the specified area of the given address space.
2354 
2355 	The address space must be write-locked.
2356 	The caller must ensure that the area does not have any wired ranges.
2357 
2358 	\param addressSpace The address space containing the area.
2359 	\param area The area to be deleted.
2360 	\param deletingAddressSpace \c true, if the address space is in the process
2361 		of being deleted.
2362 */
2363 static void
2364 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2365 	bool deletingAddressSpace)
2366 {
2367 	ASSERT(!area->IsWired());
2368 
2369 	VMAreas::Remove(area);
2370 
2371 	// At this point the area is removed from the global hash table, but
2372 	// still exists in the area list.
2373 
2374 	// Unmap the virtual address space the area occupied.
2375 	{
2376 		// We need to lock the complete cache chain.
2377 		VMCache* topCache = vm_area_get_locked_cache(area);
2378 		VMCacheChainLocker cacheChainLocker(topCache);
2379 		cacheChainLocker.LockAllSourceCaches();
2380 
2381 		// If the area's top cache is a temporary cache and the area is the only
2382 		// one referencing it (besides us currently holding a second reference),
2383 		// the unmapping code doesn't need to care about preserving the accessed
2384 		// and dirty flags of the top cache page mappings.
2385 		bool ignoreTopCachePageFlags
2386 			= topCache->temporary && topCache->RefCount() == 2;
2387 
2388 		area->address_space->TranslationMap()->UnmapArea(area,
2389 			deletingAddressSpace, ignoreTopCachePageFlags);
2390 	}
2391 
2392 	if (!area->cache->temporary)
2393 		area->cache->WriteModified();
2394 
2395 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2396 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2397 
2398 	arch_vm_unset_memory_type(area);
2399 	addressSpace->RemoveArea(area, allocationFlags);
2400 	addressSpace->Put();
2401 
2402 	area->cache->RemoveArea(area);
2403 	area->cache->ReleaseRef();
2404 
2405 	addressSpace->DeleteArea(area, allocationFlags);
2406 }
2407 
2408 
2409 status_t
2410 vm_delete_area(team_id team, area_id id, bool kernel)
2411 {
2412 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2413 		team, id));
2414 
2415 	// lock the address space and make sure the area isn't wired
2416 	AddressSpaceWriteLocker locker;
2417 	VMArea* area;
2418 	AreaCacheLocker cacheLocker;
2419 
2420 	do {
2421 		status_t status = locker.SetFromArea(team, id, area);
2422 		if (status != B_OK)
2423 			return status;
2424 
2425 		cacheLocker.SetTo(area);
2426 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2427 
2428 	cacheLocker.Unlock();
2429 
2430 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2431 		return B_NOT_ALLOWED;
2432 
2433 	delete_area(locker.AddressSpace(), area, false);
2434 	return B_OK;
2435 }
2436 
2437 
2438 /*!	Creates a new cache on top of given cache, moves all areas from
2439 	the old cache to the new one, and changes the protection of all affected
2440 	areas' pages to read-only. If requested, wired pages are moved up to the
2441 	new cache and copies are added to the old cache in their place.
2442 	Preconditions:
2443 	- The given cache must be locked.
2444 	- All of the cache's areas' address spaces must be read locked.
2445 	- Either the cache must not have any wired ranges or a page reservation for
2446 	  all wired pages must be provided, so they can be copied.
2447 
2448 	\param lowerCache The cache on top of which a new cache shall be created.
2449 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2450 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2451 		has wired page. The wired pages are copied in this case.
2452 */
2453 static status_t
2454 vm_copy_on_write_area(VMCache* lowerCache,
2455 	vm_page_reservation* wiredPagesReservation)
2456 {
2457 	VMCache* upperCache;
2458 
2459 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2460 
2461 	// We need to separate the cache from its areas. The cache goes one level
2462 	// deeper and we create a new cache inbetween.
2463 
2464 	// create an anonymous cache
2465 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2466 		lowerCache->GuardSize() / B_PAGE_SIZE,
2467 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2468 		VM_PRIORITY_USER);
2469 	if (status != B_OK)
2470 		return status;
2471 
2472 	upperCache->Lock();
2473 
2474 	upperCache->temporary = 1;
2475 	upperCache->virtual_base = lowerCache->virtual_base;
2476 	upperCache->virtual_end = lowerCache->virtual_end;
2477 
2478 	// transfer the lower cache areas to the upper cache
2479 	rw_lock_write_lock(&sAreaCacheLock);
2480 	upperCache->TransferAreas(lowerCache);
2481 	rw_lock_write_unlock(&sAreaCacheLock);
2482 
2483 	lowerCache->AddConsumer(upperCache);
2484 
2485 	// We now need to remap all pages from all of the cache's areas read-only,
2486 	// so that a copy will be created on next write access. If there are wired
2487 	// pages, we keep their protection, move them to the upper cache and create
2488 	// copies for the lower cache.
2489 	if (wiredPagesReservation != NULL) {
2490 		// We need to handle wired pages -- iterate through the cache's pages.
2491 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2492 				vm_page* page = it.Next();) {
2493 			if (page->WiredCount() > 0) {
2494 				// allocate a new page and copy the wired one
2495 				vm_page* copiedPage = vm_page_allocate_page(
2496 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2497 
2498 				vm_memcpy_physical_page(
2499 					copiedPage->physical_page_number * B_PAGE_SIZE,
2500 					page->physical_page_number * B_PAGE_SIZE);
2501 
2502 				// move the wired page to the upper cache (note: removing is OK
2503 				// with the SplayTree iterator) and insert the copy
2504 				upperCache->MovePage(page);
2505 				lowerCache->InsertPage(copiedPage,
2506 					page->cache_offset * B_PAGE_SIZE);
2507 
2508 				DEBUG_PAGE_ACCESS_END(copiedPage);
2509 			} else {
2510 				// Change the protection of this page in all areas.
2511 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2512 						tempArea = tempArea->cache_next) {
2513 					// The area must be readable in the same way it was
2514 					// previously writable.
2515 					addr_t address = virtual_page_address(tempArea, page);
2516 					uint32 protection = 0;
2517 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2518 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2519 						protection |= B_KERNEL_READ_AREA;
2520 					if ((pageProtection & B_READ_AREA) != 0)
2521 						protection |= B_READ_AREA;
2522 
2523 					VMTranslationMap* map
2524 						= tempArea->address_space->TranslationMap();
2525 					map->Lock();
2526 					map->ProtectPage(tempArea, address, protection);
2527 					map->Unlock();
2528 				}
2529 			}
2530 		}
2531 	} else {
2532 		ASSERT(lowerCache->WiredPagesCount() == 0);
2533 
2534 		// just change the protection of all areas
2535 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2536 				tempArea = tempArea->cache_next) {
2537 			if (tempArea->page_protections != NULL) {
2538 				// Change the protection of all pages in this area.
2539 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2540 				map->Lock();
2541 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2542 					vm_page* page = it.Next();) {
2543 					// The area must be readable in the same way it was
2544 					// previously writable.
2545 					addr_t address = virtual_page_address(tempArea, page);
2546 					uint32 protection = 0;
2547 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2548 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2549 						protection |= B_KERNEL_READ_AREA;
2550 					if ((pageProtection & B_READ_AREA) != 0)
2551 						protection |= B_READ_AREA;
2552 
2553 					map->ProtectPage(tempArea, address, protection);
2554 				}
2555 				map->Unlock();
2556 				continue;
2557 			}
2558 			// The area must be readable in the same way it was previously
2559 			// writable.
2560 			uint32 protection = 0;
2561 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2562 				protection |= B_KERNEL_READ_AREA;
2563 			if ((tempArea->protection & B_READ_AREA) != 0)
2564 				protection |= B_READ_AREA;
2565 
2566 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2567 			map->Lock();
2568 			map->ProtectArea(tempArea, protection);
2569 			map->Unlock();
2570 		}
2571 	}
2572 
2573 	vm_area_put_locked_cache(upperCache);
2574 
2575 	return B_OK;
2576 }
2577 
2578 
2579 area_id
2580 vm_copy_area(team_id team, const char* name, void** _address,
2581 	uint32 addressSpec, area_id sourceID)
2582 {
2583 	// Do the locking: target address space, all address spaces associated with
2584 	// the source cache, and the cache itself.
2585 	MultiAddressSpaceLocker locker;
2586 	VMAddressSpace* targetAddressSpace;
2587 	VMCache* cache;
2588 	VMArea* source;
2589 	AreaCacheLocker cacheLocker;
2590 	status_t status;
2591 	bool sharedArea;
2592 
2593 	page_num_t wiredPages = 0;
2594 	vm_page_reservation wiredPagesReservation;
2595 
2596 	bool restart;
2597 	do {
2598 		restart = false;
2599 
2600 		locker.Unset();
2601 		status = locker.AddTeam(team, true, &targetAddressSpace);
2602 		if (status == B_OK) {
2603 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2604 				&cache);
2605 		}
2606 		if (status != B_OK)
2607 			return status;
2608 
2609 		cacheLocker.SetTo(cache, true);	// already locked
2610 
2611 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2612 
2613 		page_num_t oldWiredPages = wiredPages;
2614 		wiredPages = 0;
2615 
2616 		// If the source area isn't shared, count the number of wired pages in
2617 		// the cache and reserve as many pages.
2618 		if (!sharedArea) {
2619 			wiredPages = cache->WiredPagesCount();
2620 
2621 			if (wiredPages > oldWiredPages) {
2622 				cacheLocker.Unlock();
2623 				locker.Unlock();
2624 
2625 				if (oldWiredPages > 0)
2626 					vm_page_unreserve_pages(&wiredPagesReservation);
2627 
2628 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2629 					VM_PRIORITY_USER);
2630 
2631 				restart = true;
2632 			}
2633 		} else if (oldWiredPages > 0)
2634 			vm_page_unreserve_pages(&wiredPagesReservation);
2635 	} while (restart);
2636 
2637 	// unreserve pages later
2638 	struct PagesUnreserver {
2639 		PagesUnreserver(vm_page_reservation* reservation)
2640 			:
2641 			fReservation(reservation)
2642 		{
2643 		}
2644 
2645 		~PagesUnreserver()
2646 		{
2647 			if (fReservation != NULL)
2648 				vm_page_unreserve_pages(fReservation);
2649 		}
2650 
2651 	private:
2652 		vm_page_reservation*	fReservation;
2653 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2654 
2655 	bool writableCopy
2656 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2657 	uint8* targetPageProtections = NULL;
2658 
2659 	if (source->page_protections != NULL) {
2660 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2661 		targetPageProtections = (uint8*)malloc_etc(bytes,
2662 			(source->address_space == VMAddressSpace::Kernel()
2663 					|| targetAddressSpace == VMAddressSpace::Kernel())
2664 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2665 		if (targetPageProtections == NULL)
2666 			return B_NO_MEMORY;
2667 
2668 		memcpy(targetPageProtections, source->page_protections, bytes);
2669 
2670 		if (!writableCopy) {
2671 			for (size_t i = 0; i < bytes; i++) {
2672 				if ((targetPageProtections[i]
2673 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2674 					writableCopy = true;
2675 					break;
2676 				}
2677 			}
2678 		}
2679 	}
2680 
2681 	if (addressSpec == B_CLONE_ADDRESS) {
2682 		addressSpec = B_EXACT_ADDRESS;
2683 		*_address = (void*)source->Base();
2684 	}
2685 
2686 	// First, create a cache on top of the source area, respectively use the
2687 	// existing one, if this is a shared area.
2688 
2689 	VMArea* target;
2690 	virtual_address_restrictions addressRestrictions = {};
2691 	addressRestrictions.address = *_address;
2692 	addressRestrictions.address_specification = addressSpec;
2693 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2694 		name, source->Size(), source->wiring, source->protection,
2695 		source->protection_max,
2696 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2697 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2698 		&addressRestrictions, true, &target, _address);
2699 	if (status < B_OK) {
2700 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2701 		return status;
2702 	}
2703 
2704 	if (targetPageProtections != NULL)
2705 		target->page_protections = targetPageProtections;
2706 
2707 	if (sharedArea) {
2708 		// The new area uses the old area's cache, but map_backing_store()
2709 		// hasn't acquired a ref. So we have to do that now.
2710 		cache->AcquireRefLocked();
2711 	}
2712 
2713 	// If the source area is writable, we need to move it one layer up as well
2714 
2715 	if (!sharedArea) {
2716 		if (writableCopy) {
2717 			// TODO: do something more useful if this fails!
2718 			if (vm_copy_on_write_area(cache,
2719 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2720 				panic("vm_copy_on_write_area() failed!\n");
2721 			}
2722 		}
2723 	}
2724 
2725 	// we return the ID of the newly created area
2726 	return target->id;
2727 }
2728 
2729 
2730 status_t
2731 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2732 	bool kernel)
2733 {
2734 	fix_protection(&newProtection);
2735 
2736 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2737 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2738 
2739 	if (!arch_vm_supports_protection(newProtection))
2740 		return B_NOT_SUPPORTED;
2741 
2742 	bool becomesWritable
2743 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2744 
2745 	// lock address spaces and cache
2746 	MultiAddressSpaceLocker locker;
2747 	VMCache* cache;
2748 	VMArea* area;
2749 	status_t status;
2750 	AreaCacheLocker cacheLocker;
2751 	bool isWritable;
2752 
2753 	bool restart;
2754 	do {
2755 		restart = false;
2756 
2757 		locker.Unset();
2758 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2759 		if (status != B_OK)
2760 			return status;
2761 
2762 		cacheLocker.SetTo(cache, true);	// already locked
2763 
2764 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2765 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2766 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2767 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2768 				" (%s)\n", team, newProtection, areaID, area->name);
2769 			return B_NOT_ALLOWED;
2770 		}
2771 		if (!kernel && area->protection_max != 0
2772 			&& (newProtection & area->protection_max)
2773 				!= (newProtection & B_USER_PROTECTION)) {
2774 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2775 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2776 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2777 				area->protection_max, areaID, area->name);
2778 			return B_NOT_ALLOWED;
2779 		}
2780 
2781 		if (area->protection == newProtection)
2782 			return B_OK;
2783 
2784 		if (team != VMAddressSpace::KernelID()
2785 			&& area->address_space->ID() != team) {
2786 			// unless you're the kernel, you are only allowed to set
2787 			// the protection of your own areas
2788 			return B_NOT_ALLOWED;
2789 		}
2790 
2791 		isWritable
2792 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2793 
2794 		// Make sure the area (respectively, if we're going to call
2795 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2796 		// wired ranges.
2797 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2798 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2799 					otherArea = otherArea->cache_next) {
2800 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2801 					restart = true;
2802 					break;
2803 				}
2804 			}
2805 		} else {
2806 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2807 				restart = true;
2808 		}
2809 	} while (restart);
2810 
2811 	bool changePageProtection = true;
2812 	bool changeTopCachePagesOnly = false;
2813 
2814 	if (isWritable && !becomesWritable) {
2815 		// writable -> !writable
2816 
2817 		if (cache->source != NULL && cache->temporary) {
2818 			if (cache->CountWritableAreas(area) == 0) {
2819 				// Since this cache now lives from the pages in its source cache,
2820 				// we can change the cache's commitment to take only those pages
2821 				// into account that really are in this cache.
2822 
2823 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2824 					team == VMAddressSpace::KernelID()
2825 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2826 
2827 				// TODO: we may be able to join with our source cache, if
2828 				// count == 0
2829 			}
2830 		}
2831 
2832 		// If only the writability changes, we can just remap the pages of the
2833 		// top cache, since the pages of lower caches are mapped read-only
2834 		// anyway. That's advantageous only, if the number of pages in the cache
2835 		// is significantly smaller than the number of pages in the area,
2836 		// though.
2837 		if (newProtection
2838 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2839 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2840 			changeTopCachePagesOnly = true;
2841 		}
2842 	} else if (!isWritable && becomesWritable) {
2843 		// !writable -> writable
2844 
2845 		if (!cache->consumers.IsEmpty()) {
2846 			// There are consumers -- we have to insert a new cache. Fortunately
2847 			// vm_copy_on_write_area() does everything that's needed.
2848 			changePageProtection = false;
2849 			status = vm_copy_on_write_area(cache, NULL);
2850 		} else {
2851 			// No consumers, so we don't need to insert a new one.
2852 			if (cache->source != NULL && cache->temporary) {
2853 				// the cache's commitment must contain all possible pages
2854 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2855 					team == VMAddressSpace::KernelID()
2856 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2857 			}
2858 
2859 			if (status == B_OK && cache->source != NULL) {
2860 				// There's a source cache, hence we can't just change all pages'
2861 				// protection or we might allow writing into pages belonging to
2862 				// a lower cache.
2863 				changeTopCachePagesOnly = true;
2864 			}
2865 		}
2866 	} else {
2867 		// we don't have anything special to do in all other cases
2868 	}
2869 
2870 	if (status == B_OK) {
2871 		// remap existing pages in this cache
2872 		if (changePageProtection) {
2873 			VMTranslationMap* map = area->address_space->TranslationMap();
2874 			map->Lock();
2875 
2876 			if (changeTopCachePagesOnly) {
2877 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2878 				page_num_t lastPageOffset
2879 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2880 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2881 						vm_page* page = it.Next();) {
2882 					if (page->cache_offset >= firstPageOffset
2883 						&& page->cache_offset <= lastPageOffset) {
2884 						addr_t address = virtual_page_address(area, page);
2885 						map->ProtectPage(area, address, newProtection);
2886 					}
2887 				}
2888 			} else
2889 				map->ProtectArea(area, newProtection);
2890 
2891 			map->Unlock();
2892 		}
2893 
2894 		area->protection = newProtection;
2895 	}
2896 
2897 	return status;
2898 }
2899 
2900 
2901 status_t
2902 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2903 {
2904 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2905 	if (addressSpace == NULL)
2906 		return B_BAD_TEAM_ID;
2907 
2908 	VMTranslationMap* map = addressSpace->TranslationMap();
2909 
2910 	map->Lock();
2911 	uint32 dummyFlags;
2912 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2913 	map->Unlock();
2914 
2915 	addressSpace->Put();
2916 	return status;
2917 }
2918 
2919 
2920 /*!	The page's cache must be locked.
2921 */
2922 bool
2923 vm_test_map_modification(vm_page* page)
2924 {
2925 	if (page->modified)
2926 		return true;
2927 
2928 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2929 	vm_page_mapping* mapping;
2930 	while ((mapping = iterator.Next()) != NULL) {
2931 		VMArea* area = mapping->area;
2932 		VMTranslationMap* map = area->address_space->TranslationMap();
2933 
2934 		phys_addr_t physicalAddress;
2935 		uint32 flags;
2936 		map->Lock();
2937 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2938 		map->Unlock();
2939 
2940 		if ((flags & PAGE_MODIFIED) != 0)
2941 			return true;
2942 	}
2943 
2944 	return false;
2945 }
2946 
2947 
2948 /*!	The page's cache must be locked.
2949 */
2950 void
2951 vm_clear_map_flags(vm_page* page, uint32 flags)
2952 {
2953 	if ((flags & PAGE_ACCESSED) != 0)
2954 		page->accessed = false;
2955 	if ((flags & PAGE_MODIFIED) != 0)
2956 		page->modified = false;
2957 
2958 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2959 	vm_page_mapping* mapping;
2960 	while ((mapping = iterator.Next()) != NULL) {
2961 		VMArea* area = mapping->area;
2962 		VMTranslationMap* map = area->address_space->TranslationMap();
2963 
2964 		map->Lock();
2965 		map->ClearFlags(virtual_page_address(area, page), flags);
2966 		map->Unlock();
2967 	}
2968 }
2969 
2970 
2971 /*!	Removes all mappings from a page.
2972 	After you've called this function, the page is unmapped from memory and
2973 	the page's \c accessed and \c modified flags have been updated according
2974 	to the state of the mappings.
2975 	The page's cache must be locked.
2976 */
2977 void
2978 vm_remove_all_page_mappings(vm_page* page)
2979 {
2980 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2981 		VMArea* area = mapping->area;
2982 		VMTranslationMap* map = area->address_space->TranslationMap();
2983 		addr_t address = virtual_page_address(area, page);
2984 		map->UnmapPage(area, address, false);
2985 	}
2986 }
2987 
2988 
2989 int32
2990 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2991 {
2992 	int32 count = 0;
2993 
2994 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2995 	vm_page_mapping* mapping;
2996 	while ((mapping = iterator.Next()) != NULL) {
2997 		VMArea* area = mapping->area;
2998 		VMTranslationMap* map = area->address_space->TranslationMap();
2999 
3000 		bool modified;
3001 		if (map->ClearAccessedAndModified(area,
3002 				virtual_page_address(area, page), false, modified)) {
3003 			count++;
3004 		}
3005 
3006 		page->modified |= modified;
3007 	}
3008 
3009 
3010 	if (page->accessed) {
3011 		count++;
3012 		page->accessed = false;
3013 	}
3014 
3015 	return count;
3016 }
3017 
3018 
3019 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3020 	mappings.
3021 	The function iterates through the page mappings and removes them until
3022 	encountering one that has been accessed. From then on it will continue to
3023 	iterate, but only clear the accessed flag of the mapping. The page's
3024 	\c modified bit will be updated accordingly, the \c accessed bit will be
3025 	cleared.
3026 	\return The number of mapping accessed bits encountered, including the
3027 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3028 		of the page have been removed.
3029 */
3030 int32
3031 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3032 {
3033 	ASSERT(page->WiredCount() == 0);
3034 
3035 	if (page->accessed)
3036 		return vm_clear_page_mapping_accessed_flags(page);
3037 
3038 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3039 		VMArea* area = mapping->area;
3040 		VMTranslationMap* map = area->address_space->TranslationMap();
3041 		addr_t address = virtual_page_address(area, page);
3042 		bool modified = false;
3043 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3044 			page->accessed = true;
3045 			page->modified |= modified;
3046 			return vm_clear_page_mapping_accessed_flags(page);
3047 		}
3048 		page->modified |= modified;
3049 	}
3050 
3051 	return 0;
3052 }
3053 
3054 
3055 static int
3056 display_mem(int argc, char** argv)
3057 {
3058 	bool physical = false;
3059 	addr_t copyAddress;
3060 	int32 displayWidth;
3061 	int32 itemSize;
3062 	int32 num = -1;
3063 	addr_t address;
3064 	int i = 1, j;
3065 
3066 	if (argc > 1 && argv[1][0] == '-') {
3067 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3068 			physical = true;
3069 			i++;
3070 		} else
3071 			i = 99;
3072 	}
3073 
3074 	if (argc < i + 1 || argc > i + 2) {
3075 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3076 			"\tdl - 8 bytes\n"
3077 			"\tdw - 4 bytes\n"
3078 			"\tds - 2 bytes\n"
3079 			"\tdb - 1 byte\n"
3080 			"\tstring - a whole string\n"
3081 			"  -p or --physical only allows memory from a single page to be "
3082 			"displayed.\n");
3083 		return 0;
3084 	}
3085 
3086 	address = parse_expression(argv[i]);
3087 
3088 	if (argc > i + 1)
3089 		num = parse_expression(argv[i + 1]);
3090 
3091 	// build the format string
3092 	if (strcmp(argv[0], "db") == 0) {
3093 		itemSize = 1;
3094 		displayWidth = 16;
3095 	} else if (strcmp(argv[0], "ds") == 0) {
3096 		itemSize = 2;
3097 		displayWidth = 8;
3098 	} else if (strcmp(argv[0], "dw") == 0) {
3099 		itemSize = 4;
3100 		displayWidth = 4;
3101 	} else if (strcmp(argv[0], "dl") == 0) {
3102 		itemSize = 8;
3103 		displayWidth = 2;
3104 	} else if (strcmp(argv[0], "string") == 0) {
3105 		itemSize = 1;
3106 		displayWidth = -1;
3107 	} else {
3108 		kprintf("display_mem called in an invalid way!\n");
3109 		return 0;
3110 	}
3111 
3112 	if (num <= 0)
3113 		num = displayWidth;
3114 
3115 	void* physicalPageHandle = NULL;
3116 
3117 	if (physical) {
3118 		int32 offset = address & (B_PAGE_SIZE - 1);
3119 		if (num * itemSize + offset > B_PAGE_SIZE) {
3120 			num = (B_PAGE_SIZE - offset) / itemSize;
3121 			kprintf("NOTE: number of bytes has been cut to page size\n");
3122 		}
3123 
3124 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3125 
3126 		if (vm_get_physical_page_debug(address, &copyAddress,
3127 				&physicalPageHandle) != B_OK) {
3128 			kprintf("getting the hardware page failed.");
3129 			return 0;
3130 		}
3131 
3132 		address += offset;
3133 		copyAddress += offset;
3134 	} else
3135 		copyAddress = address;
3136 
3137 	if (!strcmp(argv[0], "string")) {
3138 		kprintf("%p \"", (char*)copyAddress);
3139 
3140 		// string mode
3141 		for (i = 0; true; i++) {
3142 			char c;
3143 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3144 					!= B_OK
3145 				|| c == '\0') {
3146 				break;
3147 			}
3148 
3149 			if (c == '\n')
3150 				kprintf("\\n");
3151 			else if (c == '\t')
3152 				kprintf("\\t");
3153 			else {
3154 				if (!isprint(c))
3155 					c = '.';
3156 
3157 				kprintf("%c", c);
3158 			}
3159 		}
3160 
3161 		kprintf("\"\n");
3162 	} else {
3163 		// number mode
3164 		for (i = 0; i < num; i++) {
3165 			uint64 value;
3166 
3167 			if ((i % displayWidth) == 0) {
3168 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3169 				if (i != 0)
3170 					kprintf("\n");
3171 
3172 				kprintf("[0x%lx]  ", address + i * itemSize);
3173 
3174 				for (j = 0; j < displayed; j++) {
3175 					char c;
3176 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3177 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3178 						displayed = j;
3179 						break;
3180 					}
3181 					if (!isprint(c))
3182 						c = '.';
3183 
3184 					kprintf("%c", c);
3185 				}
3186 				if (num > displayWidth) {
3187 					// make sure the spacing in the last line is correct
3188 					for (j = displayed; j < displayWidth * itemSize; j++)
3189 						kprintf(" ");
3190 				}
3191 				kprintf("  ");
3192 			}
3193 
3194 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3195 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3196 				kprintf("read fault");
3197 				break;
3198 			}
3199 
3200 			switch (itemSize) {
3201 				case 1:
3202 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3203 					break;
3204 				case 2:
3205 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3206 					break;
3207 				case 4:
3208 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3209 					break;
3210 				case 8:
3211 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3212 					break;
3213 			}
3214 		}
3215 
3216 		kprintf("\n");
3217 	}
3218 
3219 	if (physical) {
3220 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3221 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3222 	}
3223 	return 0;
3224 }
3225 
3226 
3227 static void
3228 dump_cache_tree_recursively(VMCache* cache, int level,
3229 	VMCache* highlightCache)
3230 {
3231 	// print this cache
3232 	for (int i = 0; i < level; i++)
3233 		kprintf("  ");
3234 	if (cache == highlightCache)
3235 		kprintf("%p <--\n", cache);
3236 	else
3237 		kprintf("%p\n", cache);
3238 
3239 	// recursively print its consumers
3240 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3241 			VMCache* consumer = it.Next();) {
3242 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3243 	}
3244 }
3245 
3246 
3247 static int
3248 dump_cache_tree(int argc, char** argv)
3249 {
3250 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3251 		kprintf("usage: %s <address>\n", argv[0]);
3252 		return 0;
3253 	}
3254 
3255 	addr_t address = parse_expression(argv[1]);
3256 	if (address == 0)
3257 		return 0;
3258 
3259 	VMCache* cache = (VMCache*)address;
3260 	VMCache* root = cache;
3261 
3262 	// find the root cache (the transitive source)
3263 	while (root->source != NULL)
3264 		root = root->source;
3265 
3266 	dump_cache_tree_recursively(root, 0, cache);
3267 
3268 	return 0;
3269 }
3270 
3271 
3272 const char*
3273 vm_cache_type_to_string(int32 type)
3274 {
3275 	switch (type) {
3276 		case CACHE_TYPE_RAM:
3277 			return "RAM";
3278 		case CACHE_TYPE_DEVICE:
3279 			return "device";
3280 		case CACHE_TYPE_VNODE:
3281 			return "vnode";
3282 		case CACHE_TYPE_NULL:
3283 			return "null";
3284 
3285 		default:
3286 			return "unknown";
3287 	}
3288 }
3289 
3290 
3291 #if DEBUG_CACHE_LIST
3292 
3293 static void
3294 update_cache_info_recursively(VMCache* cache, cache_info& info)
3295 {
3296 	info.page_count += cache->page_count;
3297 	if (cache->type == CACHE_TYPE_RAM)
3298 		info.committed += cache->committed_size;
3299 
3300 	// recurse
3301 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3302 			VMCache* consumer = it.Next();) {
3303 		update_cache_info_recursively(consumer, info);
3304 	}
3305 }
3306 
3307 
3308 static int
3309 cache_info_compare_page_count(const void* _a, const void* _b)
3310 {
3311 	const cache_info* a = (const cache_info*)_a;
3312 	const cache_info* b = (const cache_info*)_b;
3313 	if (a->page_count == b->page_count)
3314 		return 0;
3315 	return a->page_count < b->page_count ? 1 : -1;
3316 }
3317 
3318 
3319 static int
3320 cache_info_compare_committed(const void* _a, const void* _b)
3321 {
3322 	const cache_info* a = (const cache_info*)_a;
3323 	const cache_info* b = (const cache_info*)_b;
3324 	if (a->committed == b->committed)
3325 		return 0;
3326 	return a->committed < b->committed ? 1 : -1;
3327 }
3328 
3329 
3330 static void
3331 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3332 {
3333 	for (int i = 0; i < level; i++)
3334 		kprintf("  ");
3335 
3336 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3337 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3338 		cache->virtual_base, cache->virtual_end, cache->page_count);
3339 
3340 	if (level == 0)
3341 		kprintf("/%lu", info.page_count);
3342 
3343 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3344 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3345 
3346 		if (level == 0)
3347 			kprintf("/%lu", info.committed);
3348 	}
3349 
3350 	// areas
3351 	if (cache->areas != NULL) {
3352 		VMArea* area = cache->areas;
3353 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3354 			area->name, area->address_space->ID());
3355 
3356 		while (area->cache_next != NULL) {
3357 			area = area->cache_next;
3358 			kprintf(", %" B_PRId32, area->id);
3359 		}
3360 	}
3361 
3362 	kputs("\n");
3363 
3364 	// recurse
3365 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3366 			VMCache* consumer = it.Next();) {
3367 		dump_caches_recursively(consumer, info, level + 1);
3368 	}
3369 }
3370 
3371 
3372 static int
3373 dump_caches(int argc, char** argv)
3374 {
3375 	if (sCacheInfoTable == NULL) {
3376 		kprintf("No cache info table!\n");
3377 		return 0;
3378 	}
3379 
3380 	bool sortByPageCount = true;
3381 
3382 	for (int32 i = 1; i < argc; i++) {
3383 		if (strcmp(argv[i], "-c") == 0) {
3384 			sortByPageCount = false;
3385 		} else {
3386 			print_debugger_command_usage(argv[0]);
3387 			return 0;
3388 		}
3389 	}
3390 
3391 	uint32 totalCount = 0;
3392 	uint32 rootCount = 0;
3393 	off_t totalCommitted = 0;
3394 	page_num_t totalPages = 0;
3395 
3396 	VMCache* cache = gDebugCacheList;
3397 	while (cache) {
3398 		totalCount++;
3399 		if (cache->source == NULL) {
3400 			cache_info stackInfo;
3401 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3402 				? sCacheInfoTable[rootCount] : stackInfo;
3403 			rootCount++;
3404 			info.cache = cache;
3405 			info.page_count = 0;
3406 			info.committed = 0;
3407 			update_cache_info_recursively(cache, info);
3408 			totalCommitted += info.committed;
3409 			totalPages += info.page_count;
3410 		}
3411 
3412 		cache = cache->debug_next;
3413 	}
3414 
3415 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3416 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3417 			sortByPageCount
3418 				? &cache_info_compare_page_count
3419 				: &cache_info_compare_committed);
3420 	}
3421 
3422 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3423 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3424 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3425 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3426 			"page count" : "committed size");
3427 
3428 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3429 		for (uint32 i = 0; i < rootCount; i++) {
3430 			cache_info& info = sCacheInfoTable[i];
3431 			dump_caches_recursively(info.cache, info, 0);
3432 		}
3433 	} else
3434 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3435 
3436 	return 0;
3437 }
3438 
3439 #endif	// DEBUG_CACHE_LIST
3440 
3441 
3442 static int
3443 dump_cache(int argc, char** argv)
3444 {
3445 	VMCache* cache;
3446 	bool showPages = false;
3447 	int i = 1;
3448 
3449 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3450 		kprintf("usage: %s [-ps] <address>\n"
3451 			"  if -p is specified, all pages are shown, if -s is used\n"
3452 			"  only the cache info is shown respectively.\n", argv[0]);
3453 		return 0;
3454 	}
3455 	while (argv[i][0] == '-') {
3456 		char* arg = argv[i] + 1;
3457 		while (arg[0]) {
3458 			if (arg[0] == 'p')
3459 				showPages = true;
3460 			arg++;
3461 		}
3462 		i++;
3463 	}
3464 	if (argv[i] == NULL) {
3465 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3466 		return 0;
3467 	}
3468 
3469 	addr_t address = parse_expression(argv[i]);
3470 	if (address == 0)
3471 		return 0;
3472 
3473 	cache = (VMCache*)address;
3474 
3475 	cache->Dump(showPages);
3476 
3477 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3478 
3479 	return 0;
3480 }
3481 
3482 
3483 static void
3484 dump_area_struct(VMArea* area, bool mappings)
3485 {
3486 	kprintf("AREA: %p\n", area);
3487 	kprintf("name:\t\t'%s'\n", area->name);
3488 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3489 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3490 	kprintf("base:\t\t0x%lx\n", area->Base());
3491 	kprintf("size:\t\t0x%lx\n", area->Size());
3492 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3493 	kprintf("page_protection:%p\n", area->page_protections);
3494 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3495 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3496 	kprintf("cache:\t\t%p\n", area->cache);
3497 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3498 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3499 	kprintf("cache_next:\t%p\n", area->cache_next);
3500 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3501 
3502 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3503 	if (mappings) {
3504 		kprintf("page mappings:\n");
3505 		while (iterator.HasNext()) {
3506 			vm_page_mapping* mapping = iterator.Next();
3507 			kprintf("  %p", mapping->page);
3508 		}
3509 		kprintf("\n");
3510 	} else {
3511 		uint32 count = 0;
3512 		while (iterator.Next() != NULL) {
3513 			count++;
3514 		}
3515 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3516 	}
3517 }
3518 
3519 
3520 static int
3521 dump_area(int argc, char** argv)
3522 {
3523 	bool mappings = false;
3524 	bool found = false;
3525 	int32 index = 1;
3526 	VMArea* area;
3527 	addr_t num;
3528 
3529 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3530 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3531 			"All areas matching either id/address/name are listed. You can\n"
3532 			"force to check only a specific item by prefixing the specifier\n"
3533 			"with the id/contains/address/name keywords.\n"
3534 			"-m shows the area's mappings as well.\n");
3535 		return 0;
3536 	}
3537 
3538 	if (!strcmp(argv[1], "-m")) {
3539 		mappings = true;
3540 		index++;
3541 	}
3542 
3543 	int32 mode = 0xf;
3544 	if (!strcmp(argv[index], "id"))
3545 		mode = 1;
3546 	else if (!strcmp(argv[index], "contains"))
3547 		mode = 2;
3548 	else if (!strcmp(argv[index], "name"))
3549 		mode = 4;
3550 	else if (!strcmp(argv[index], "address"))
3551 		mode = 0;
3552 	if (mode != 0xf)
3553 		index++;
3554 
3555 	if (index >= argc) {
3556 		kprintf("No area specifier given.\n");
3557 		return 0;
3558 	}
3559 
3560 	num = parse_expression(argv[index]);
3561 
3562 	if (mode == 0) {
3563 		dump_area_struct((struct VMArea*)num, mappings);
3564 	} else {
3565 		// walk through the area list, looking for the arguments as a name
3566 
3567 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3568 		while ((area = it.Next()) != NULL) {
3569 			if (((mode & 4) != 0
3570 					&& !strcmp(argv[index], area->name))
3571 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3572 					|| (((mode & 2) != 0 && area->Base() <= num
3573 						&& area->Base() + area->Size() > num))))) {
3574 				dump_area_struct(area, mappings);
3575 				found = true;
3576 			}
3577 		}
3578 
3579 		if (!found)
3580 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3581 	}
3582 
3583 	return 0;
3584 }
3585 
3586 
3587 static int
3588 dump_area_list(int argc, char** argv)
3589 {
3590 	VMArea* area;
3591 	const char* name = NULL;
3592 	int32 id = 0;
3593 
3594 	if (argc > 1) {
3595 		id = parse_expression(argv[1]);
3596 		if (id == 0)
3597 			name = argv[1];
3598 	}
3599 
3600 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3601 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3602 		B_PRINTF_POINTER_WIDTH, "size");
3603 
3604 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3605 	while ((area = it.Next()) != NULL) {
3606 		if ((id != 0 && area->address_space->ID() != id)
3607 			|| (name != NULL && strstr(area->name, name) == NULL))
3608 			continue;
3609 
3610 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3611 			area->id, (void*)area->Base(), (void*)area->Size(),
3612 			area->protection, area->wiring, area->name);
3613 	}
3614 	return 0;
3615 }
3616 
3617 
3618 static int
3619 dump_available_memory(int argc, char** argv)
3620 {
3621 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3622 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3623 	return 0;
3624 }
3625 
3626 
3627 static int
3628 dump_mapping_info(int argc, char** argv)
3629 {
3630 	bool reverseLookup = false;
3631 	bool pageLookup = false;
3632 
3633 	int argi = 1;
3634 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3635 		const char* arg = argv[argi];
3636 		if (strcmp(arg, "-r") == 0) {
3637 			reverseLookup = true;
3638 		} else if (strcmp(arg, "-p") == 0) {
3639 			reverseLookup = true;
3640 			pageLookup = true;
3641 		} else {
3642 			print_debugger_command_usage(argv[0]);
3643 			return 0;
3644 		}
3645 	}
3646 
3647 	// We need at least one argument, the address. Optionally a thread ID can be
3648 	// specified.
3649 	if (argi >= argc || argi + 2 < argc) {
3650 		print_debugger_command_usage(argv[0]);
3651 		return 0;
3652 	}
3653 
3654 	uint64 addressValue;
3655 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3656 		return 0;
3657 
3658 	Team* team = NULL;
3659 	if (argi < argc) {
3660 		uint64 threadID;
3661 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3662 			return 0;
3663 
3664 		Thread* thread = Thread::GetDebug(threadID);
3665 		if (thread == NULL) {
3666 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3667 			return 0;
3668 		}
3669 
3670 		team = thread->team;
3671 	}
3672 
3673 	if (reverseLookup) {
3674 		phys_addr_t physicalAddress;
3675 		if (pageLookup) {
3676 			vm_page* page = (vm_page*)(addr_t)addressValue;
3677 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3678 		} else {
3679 			physicalAddress = (phys_addr_t)addressValue;
3680 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3681 		}
3682 
3683 		kprintf("    Team     Virtual Address      Area\n");
3684 		kprintf("--------------------------------------\n");
3685 
3686 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3687 			Callback()
3688 				:
3689 				fAddressSpace(NULL)
3690 			{
3691 			}
3692 
3693 			void SetAddressSpace(VMAddressSpace* addressSpace)
3694 			{
3695 				fAddressSpace = addressSpace;
3696 			}
3697 
3698 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3699 			{
3700 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3701 					virtualAddress);
3702 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3703 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3704 				else
3705 					kprintf("\n");
3706 				return false;
3707 			}
3708 
3709 		private:
3710 			VMAddressSpace*	fAddressSpace;
3711 		} callback;
3712 
3713 		if (team != NULL) {
3714 			// team specified -- get its address space
3715 			VMAddressSpace* addressSpace = team->address_space;
3716 			if (addressSpace == NULL) {
3717 				kprintf("Failed to get address space!\n");
3718 				return 0;
3719 			}
3720 
3721 			callback.SetAddressSpace(addressSpace);
3722 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3723 				physicalAddress, callback);
3724 		} else {
3725 			// no team specified -- iterate through all address spaces
3726 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3727 				addressSpace != NULL;
3728 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3729 				callback.SetAddressSpace(addressSpace);
3730 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3731 					physicalAddress, callback);
3732 			}
3733 		}
3734 	} else {
3735 		// get the address space
3736 		addr_t virtualAddress = (addr_t)addressValue;
3737 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3738 		VMAddressSpace* addressSpace;
3739 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3740 			addressSpace = VMAddressSpace::Kernel();
3741 		} else if (team != NULL) {
3742 			addressSpace = team->address_space;
3743 		} else {
3744 			Thread* thread = debug_get_debugged_thread();
3745 			if (thread == NULL || thread->team == NULL) {
3746 				kprintf("Failed to get team!\n");
3747 				return 0;
3748 			}
3749 
3750 			addressSpace = thread->team->address_space;
3751 		}
3752 
3753 		if (addressSpace == NULL) {
3754 			kprintf("Failed to get address space!\n");
3755 			return 0;
3756 		}
3757 
3758 		// let the translation map implementation do the job
3759 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3760 	}
3761 
3762 	return 0;
3763 }
3764 
3765 
3766 /*!	Deletes all areas and reserved regions in the given address space.
3767 
3768 	The caller must ensure that none of the areas has any wired ranges.
3769 
3770 	\param addressSpace The address space.
3771 	\param deletingAddressSpace \c true, if the address space is in the process
3772 		of being deleted.
3773 */
3774 void
3775 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3776 {
3777 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3778 		addressSpace->ID()));
3779 
3780 	addressSpace->WriteLock();
3781 
3782 	// remove all reserved areas in this address space
3783 	addressSpace->UnreserveAllAddressRanges(0);
3784 
3785 	// delete all the areas in this address space
3786 	while (VMArea* area = addressSpace->FirstArea()) {
3787 		ASSERT(!area->IsWired());
3788 		delete_area(addressSpace, area, deletingAddressSpace);
3789 	}
3790 
3791 	addressSpace->WriteUnlock();
3792 }
3793 
3794 
3795 static area_id
3796 vm_area_for(addr_t address, bool kernel)
3797 {
3798 	team_id team;
3799 	if (IS_USER_ADDRESS(address)) {
3800 		// we try the user team address space, if any
3801 		team = VMAddressSpace::CurrentID();
3802 		if (team < 0)
3803 			return team;
3804 	} else
3805 		team = VMAddressSpace::KernelID();
3806 
3807 	AddressSpaceReadLocker locker(team);
3808 	if (!locker.IsLocked())
3809 		return B_BAD_TEAM_ID;
3810 
3811 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3812 	if (area != NULL) {
3813 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3814 			return B_ERROR;
3815 
3816 		return area->id;
3817 	}
3818 
3819 	return B_ERROR;
3820 }
3821 
3822 
3823 /*!	Frees physical pages that were used during the boot process.
3824 	\a end is inclusive.
3825 */
3826 static void
3827 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3828 {
3829 	// free all physical pages in the specified range
3830 
3831 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3832 		phys_addr_t physicalAddress;
3833 		uint32 flags;
3834 
3835 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3836 			&& (flags & PAGE_PRESENT) != 0) {
3837 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3838 			if (page != NULL && page->State() != PAGE_STATE_FREE
3839 					&& page->State() != PAGE_STATE_CLEAR
3840 					&& page->State() != PAGE_STATE_UNUSED) {
3841 				DEBUG_PAGE_ACCESS_START(page);
3842 				vm_page_set_state(page, PAGE_STATE_FREE);
3843 			}
3844 		}
3845 	}
3846 
3847 	// unmap the memory
3848 	map->Unmap(start, end);
3849 }
3850 
3851 
3852 void
3853 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3854 {
3855 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3856 	addr_t end = start + (size - 1);
3857 	addr_t lastEnd = start;
3858 
3859 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3860 		(void*)start, (void*)end));
3861 
3862 	// The areas are sorted in virtual address space order, so
3863 	// we just have to find the holes between them that fall
3864 	// into the area we should dispose
3865 
3866 	map->Lock();
3867 
3868 	for (VMAddressSpace::AreaIterator it
3869 				= VMAddressSpace::Kernel()->GetAreaIterator();
3870 			VMArea* area = it.Next();) {
3871 		addr_t areaStart = area->Base();
3872 		addr_t areaEnd = areaStart + (area->Size() - 1);
3873 
3874 		if (areaEnd < start)
3875 			continue;
3876 
3877 		if (areaStart > end) {
3878 			// we are done, the area is already beyond of what we have to free
3879 			break;
3880 		}
3881 
3882 		if (areaStart > lastEnd) {
3883 			// this is something we can free
3884 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3885 				(void*)areaStart));
3886 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3887 		}
3888 
3889 		if (areaEnd >= end) {
3890 			lastEnd = areaEnd;
3891 				// no +1 to prevent potential overflow
3892 			break;
3893 		}
3894 
3895 		lastEnd = areaEnd + 1;
3896 	}
3897 
3898 	if (lastEnd < end) {
3899 		// we can also get rid of some space at the end of the area
3900 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3901 			(void*)end));
3902 		unmap_and_free_physical_pages(map, lastEnd, end);
3903 	}
3904 
3905 	map->Unlock();
3906 }
3907 
3908 
3909 static void
3910 create_preloaded_image_areas(struct preloaded_image* _image)
3911 {
3912 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3913 	char name[B_OS_NAME_LENGTH];
3914 	void* address;
3915 	int32 length;
3916 
3917 	// use file name to create a good area name
3918 	char* fileName = strrchr(image->name, '/');
3919 	if (fileName == NULL)
3920 		fileName = image->name;
3921 	else
3922 		fileName++;
3923 
3924 	length = strlen(fileName);
3925 	// make sure there is enough space for the suffix
3926 	if (length > 25)
3927 		length = 25;
3928 
3929 	memcpy(name, fileName, length);
3930 	strcpy(name + length, "_text");
3931 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3932 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3933 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3934 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3935 		// this will later be remapped read-only/executable by the
3936 		// ELF initialization code
3937 
3938 	strcpy(name + length, "_data");
3939 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3940 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3941 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3942 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3943 }
3944 
3945 
3946 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3947 	Any boot loader resources contained in that arguments must not be accessed
3948 	anymore past this point.
3949 */
3950 void
3951 vm_free_kernel_args(kernel_args* args)
3952 {
3953 	uint32 i;
3954 
3955 	TRACE(("vm_free_kernel_args()\n"));
3956 
3957 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3958 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3959 		if (area >= B_OK)
3960 			delete_area(area);
3961 	}
3962 }
3963 
3964 
3965 static void
3966 allocate_kernel_args(kernel_args* args)
3967 {
3968 	TRACE(("allocate_kernel_args()\n"));
3969 
3970 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3971 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3972 
3973 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3974 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3975 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3976 	}
3977 }
3978 
3979 
3980 static void
3981 unreserve_boot_loader_ranges(kernel_args* args)
3982 {
3983 	TRACE(("unreserve_boot_loader_ranges()\n"));
3984 
3985 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3986 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3987 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3988 			args->virtual_allocated_range[i].size);
3989 	}
3990 }
3991 
3992 
3993 static void
3994 reserve_boot_loader_ranges(kernel_args* args)
3995 {
3996 	TRACE(("reserve_boot_loader_ranges()\n"));
3997 
3998 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3999 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4000 
4001 		// If the address is no kernel address, we just skip it. The
4002 		// architecture specific code has to deal with it.
4003 		if (!IS_KERNEL_ADDRESS(address)) {
4004 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4005 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4006 			continue;
4007 		}
4008 
4009 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4010 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4011 		if (status < B_OK)
4012 			panic("could not reserve boot loader ranges\n");
4013 	}
4014 }
4015 
4016 
4017 static addr_t
4018 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4019 {
4020 	size = PAGE_ALIGN(size);
4021 
4022 	// find a slot in the virtual allocation addr range
4023 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4024 		// check to see if the space between this one and the last is big enough
4025 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4026 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4027 			+ args->virtual_allocated_range[i - 1].size;
4028 
4029 		addr_t base = alignment > 0
4030 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4031 
4032 		if (base >= KERNEL_BASE && base < rangeStart
4033 				&& rangeStart - base >= size) {
4034 			args->virtual_allocated_range[i - 1].size
4035 				+= base + size - previousRangeEnd;
4036 			return base;
4037 		}
4038 	}
4039 
4040 	// we hadn't found one between allocation ranges. this is ok.
4041 	// see if there's a gap after the last one
4042 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4043 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4044 		+ args->virtual_allocated_range[lastEntryIndex].size;
4045 	addr_t base = alignment > 0
4046 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4047 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4048 		args->virtual_allocated_range[lastEntryIndex].size
4049 			+= base + size - lastRangeEnd;
4050 		return base;
4051 	}
4052 
4053 	// see if there's a gap before the first one
4054 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4055 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4056 		base = rangeStart - size;
4057 		if (alignment > 0)
4058 			base = ROUNDDOWN(base, alignment);
4059 
4060 		if (base >= KERNEL_BASE) {
4061 			args->virtual_allocated_range[0].start = base;
4062 			args->virtual_allocated_range[0].size += rangeStart - base;
4063 			return base;
4064 		}
4065 	}
4066 
4067 	return 0;
4068 }
4069 
4070 
4071 static bool
4072 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4073 {
4074 	// TODO: horrible brute-force method of determining if the page can be
4075 	// allocated
4076 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4077 		if (address >= args->physical_memory_range[i].start
4078 			&& address < args->physical_memory_range[i].start
4079 				+ args->physical_memory_range[i].size)
4080 			return true;
4081 	}
4082 	return false;
4083 }
4084 
4085 
4086 page_num_t
4087 vm_allocate_early_physical_page(kernel_args* args)
4088 {
4089 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4090 		phys_addr_t nextPage;
4091 
4092 		nextPage = args->physical_allocated_range[i].start
4093 			+ args->physical_allocated_range[i].size;
4094 		// see if the page after the next allocated paddr run can be allocated
4095 		if (i + 1 < args->num_physical_allocated_ranges
4096 			&& args->physical_allocated_range[i + 1].size != 0) {
4097 			// see if the next page will collide with the next allocated range
4098 			if (nextPage >= args->physical_allocated_range[i+1].start)
4099 				continue;
4100 		}
4101 		// see if the next physical page fits in the memory block
4102 		if (is_page_in_physical_memory_range(args, nextPage)) {
4103 			// we got one!
4104 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4105 			return nextPage / B_PAGE_SIZE;
4106 		}
4107 	}
4108 
4109 	// Expanding upwards didn't work, try going downwards.
4110 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4111 		phys_addr_t nextPage;
4112 
4113 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4114 		// see if the page after the prev allocated paddr run can be allocated
4115 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4116 			// see if the next page will collide with the next allocated range
4117 			if (nextPage < args->physical_allocated_range[i-1].start
4118 				+ args->physical_allocated_range[i-1].size)
4119 				continue;
4120 		}
4121 		// see if the next physical page fits in the memory block
4122 		if (is_page_in_physical_memory_range(args, nextPage)) {
4123 			// we got one!
4124 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4125 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4126 			return nextPage / B_PAGE_SIZE;
4127 		}
4128 	}
4129 
4130 	return 0;
4131 		// could not allocate a block
4132 }
4133 
4134 
4135 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4136 	allocate some pages before the VM is completely up.
4137 */
4138 addr_t
4139 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4140 	uint32 attributes, addr_t alignment)
4141 {
4142 	if (physicalSize > virtualSize)
4143 		physicalSize = virtualSize;
4144 
4145 	// find the vaddr to allocate at
4146 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4147 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4148 	if (virtualBase == 0) {
4149 		panic("vm_allocate_early: could not allocate virtual address\n");
4150 		return 0;
4151 	}
4152 
4153 	// map the pages
4154 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4155 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4156 		if (physicalAddress == 0)
4157 			panic("error allocating early page!\n");
4158 
4159 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4160 
4161 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4162 			physicalAddress * B_PAGE_SIZE, attributes,
4163 			&vm_allocate_early_physical_page);
4164 	}
4165 
4166 	return virtualBase;
4167 }
4168 
4169 
4170 /*!	The main entrance point to initialize the VM. */
4171 status_t
4172 vm_init(kernel_args* args)
4173 {
4174 	struct preloaded_image* image;
4175 	void* address;
4176 	status_t err = 0;
4177 	uint32 i;
4178 
4179 	TRACE(("vm_init: entry\n"));
4180 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4181 	err = arch_vm_init(args);
4182 
4183 	// initialize some globals
4184 	vm_page_init_num_pages(args);
4185 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4186 
4187 	slab_init(args);
4188 
4189 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4190 	off_t heapSize = INITIAL_HEAP_SIZE;
4191 	// try to accomodate low memory systems
4192 	while (heapSize > sAvailableMemory / 8)
4193 		heapSize /= 2;
4194 	if (heapSize < 1024 * 1024)
4195 		panic("vm_init: go buy some RAM please.");
4196 
4197 	// map in the new heap and initialize it
4198 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4199 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4200 	TRACE(("heap at 0x%lx\n", heapBase));
4201 	heap_init(heapBase, heapSize);
4202 #endif
4203 
4204 	// initialize the free page list and physical page mapper
4205 	vm_page_init(args);
4206 
4207 	// initialize the cache allocators
4208 	vm_cache_init(args);
4209 
4210 	{
4211 		status_t error = VMAreas::Init();
4212 		if (error != B_OK)
4213 			panic("vm_init: error initializing areas map\n");
4214 	}
4215 
4216 	VMAddressSpace::Init();
4217 	reserve_boot_loader_ranges(args);
4218 
4219 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4220 	heap_init_post_area();
4221 #endif
4222 
4223 	// Do any further initialization that the architecture dependant layers may
4224 	// need now
4225 	arch_vm_translation_map_init_post_area(args);
4226 	arch_vm_init_post_area(args);
4227 	vm_page_init_post_area(args);
4228 	slab_init_post_area();
4229 
4230 	// allocate areas to represent stuff that already exists
4231 
4232 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4233 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4234 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4235 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4236 #endif
4237 
4238 	allocate_kernel_args(args);
4239 
4240 	create_preloaded_image_areas(args->kernel_image);
4241 
4242 	// allocate areas for preloaded images
4243 	for (image = args->preloaded_images; image != NULL; image = image->next)
4244 		create_preloaded_image_areas(image);
4245 
4246 	// allocate kernel stacks
4247 	for (i = 0; i < args->num_cpus; i++) {
4248 		char name[64];
4249 
4250 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4251 		address = (void*)args->cpu_kstack[i].start;
4252 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4253 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4254 	}
4255 
4256 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4257 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4258 
4259 #if PARANOID_KERNEL_MALLOC
4260 	vm_block_address_range("uninitialized heap memory",
4261 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4262 #endif
4263 #if PARANOID_KERNEL_FREE
4264 	vm_block_address_range("freed heap memory",
4265 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4266 #endif
4267 
4268 	// create the object cache for the page mappings
4269 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4270 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4271 		NULL, NULL);
4272 	if (gPageMappingsObjectCache == NULL)
4273 		panic("failed to create page mappings object cache");
4274 
4275 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4276 
4277 #if DEBUG_CACHE_LIST
4278 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4279 		virtual_address_restrictions virtualRestrictions = {};
4280 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4281 		physical_address_restrictions physicalRestrictions = {};
4282 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4283 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4284 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4285 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4286 			&physicalRestrictions, (void**)&sCacheInfoTable);
4287 	}
4288 #endif	// DEBUG_CACHE_LIST
4289 
4290 	// add some debugger commands
4291 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4292 	add_debugger_command("area", &dump_area,
4293 		"Dump info about a particular area");
4294 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4295 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4296 #if DEBUG_CACHE_LIST
4297 	if (sCacheInfoTable != NULL) {
4298 		add_debugger_command_etc("caches", &dump_caches,
4299 			"List all VMCache trees",
4300 			"[ \"-c\" ]\n"
4301 			"All cache trees are listed sorted in decreasing order by number "
4302 				"of\n"
4303 			"used pages or, if \"-c\" is specified, by size of committed "
4304 				"memory.\n",
4305 			0);
4306 	}
4307 #endif
4308 	add_debugger_command("avail", &dump_available_memory,
4309 		"Dump available memory");
4310 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4311 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4312 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4313 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4314 	add_debugger_command("string", &display_mem, "dump strings");
4315 
4316 	add_debugger_command_etc("mapping", &dump_mapping_info,
4317 		"Print address mapping information",
4318 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4319 		"Prints low-level page mapping information for a given address. If\n"
4320 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4321 		"address that is looked up in the translation map of the current\n"
4322 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4323 		"\"-r\" is specified, <address> is a physical address that is\n"
4324 		"searched in the translation map of all teams, respectively the team\n"
4325 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4326 		"<address> is the address of a vm_page structure. The behavior is\n"
4327 		"equivalent to specifying \"-r\" with the physical address of that\n"
4328 		"page.\n",
4329 		0);
4330 
4331 	TRACE(("vm_init: exit\n"));
4332 
4333 	vm_cache_init_post_heap();
4334 
4335 	return err;
4336 }
4337 
4338 
4339 status_t
4340 vm_init_post_sem(kernel_args* args)
4341 {
4342 	// This frees all unused boot loader resources and makes its space available
4343 	// again
4344 	arch_vm_init_end(args);
4345 	unreserve_boot_loader_ranges(args);
4346 
4347 	// fill in all of the semaphores that were not allocated before
4348 	// since we're still single threaded and only the kernel address space
4349 	// exists, it isn't that hard to find all of the ones we need to create
4350 
4351 	arch_vm_translation_map_init_post_sem(args);
4352 
4353 	slab_init_post_sem();
4354 
4355 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4356 	heap_init_post_sem();
4357 #endif
4358 
4359 	return B_OK;
4360 }
4361 
4362 
4363 status_t
4364 vm_init_post_thread(kernel_args* args)
4365 {
4366 	vm_page_init_post_thread(args);
4367 	slab_init_post_thread();
4368 	return heap_init_post_thread();
4369 }
4370 
4371 
4372 status_t
4373 vm_init_post_modules(kernel_args* args)
4374 {
4375 	return arch_vm_init_post_modules(args);
4376 }
4377 
4378 
4379 void
4380 permit_page_faults(void)
4381 {
4382 	Thread* thread = thread_get_current_thread();
4383 	if (thread != NULL)
4384 		atomic_add(&thread->page_faults_allowed, 1);
4385 }
4386 
4387 
4388 void
4389 forbid_page_faults(void)
4390 {
4391 	Thread* thread = thread_get_current_thread();
4392 	if (thread != NULL)
4393 		atomic_add(&thread->page_faults_allowed, -1);
4394 }
4395 
4396 
4397 status_t
4398 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4399 	bool isUser, addr_t* newIP)
4400 {
4401 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4402 		faultAddress));
4403 
4404 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4405 
4406 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4407 	VMAddressSpace* addressSpace = NULL;
4408 
4409 	status_t status = B_OK;
4410 	*newIP = 0;
4411 	atomic_add((int32*)&sPageFaults, 1);
4412 
4413 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4414 		addressSpace = VMAddressSpace::GetKernel();
4415 	} else if (IS_USER_ADDRESS(pageAddress)) {
4416 		addressSpace = VMAddressSpace::GetCurrent();
4417 		if (addressSpace == NULL) {
4418 			if (!isUser) {
4419 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4420 					"memory!\n");
4421 				status = B_BAD_ADDRESS;
4422 				TPF(PageFaultError(-1,
4423 					VMPageFaultTracing
4424 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4425 			} else {
4426 				// XXX weird state.
4427 				panic("vm_page_fault: non kernel thread accessing user memory "
4428 					"that doesn't exist!\n");
4429 				status = B_BAD_ADDRESS;
4430 			}
4431 		}
4432 	} else {
4433 		// the hit was probably in the 64k DMZ between kernel and user space
4434 		// this keeps a user space thread from passing a buffer that crosses
4435 		// into kernel space
4436 		status = B_BAD_ADDRESS;
4437 		TPF(PageFaultError(-1,
4438 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4439 	}
4440 
4441 	if (status == B_OK) {
4442 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4443 			isUser, NULL);
4444 	}
4445 
4446 	if (status < B_OK) {
4447 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4448 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4449 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4450 			thread_get_current_thread_id());
4451 		if (!isUser) {
4452 			Thread* thread = thread_get_current_thread();
4453 			if (thread != NULL && thread->fault_handler != 0) {
4454 				// this will cause the arch dependant page fault handler to
4455 				// modify the IP on the interrupt frame or whatever to return
4456 				// to this address
4457 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4458 			} else {
4459 				// unhandled page fault in the kernel
4460 				panic("vm_page_fault: unhandled page fault in kernel space at "
4461 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4462 			}
4463 		} else {
4464 			Thread* thread = thread_get_current_thread();
4465 
4466 #ifdef TRACE_FAULTS
4467 			VMArea* area = NULL;
4468 			if (addressSpace != NULL) {
4469 				addressSpace->ReadLock();
4470 				area = addressSpace->LookupArea(faultAddress);
4471 			}
4472 
4473 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4474 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4475 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4476 				thread->team->Name(), thread->team->id,
4477 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4478 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4479 					area->Base() : 0x0));
4480 
4481 			if (addressSpace != NULL)
4482 				addressSpace->ReadUnlock();
4483 #endif
4484 
4485 			// If the thread has a signal handler for SIGSEGV, we simply
4486 			// send it the signal. Otherwise we notify the user debugger
4487 			// first.
4488 			struct sigaction action;
4489 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4490 					&& action.sa_handler != SIG_DFL
4491 					&& action.sa_handler != SIG_IGN)
4492 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4493 					SIGSEGV)) {
4494 				Signal signal(SIGSEGV,
4495 					status == B_PERMISSION_DENIED
4496 						? SEGV_ACCERR : SEGV_MAPERR,
4497 					EFAULT, thread->team->id);
4498 				signal.SetAddress((void*)address);
4499 				send_signal_to_thread(thread, signal, 0);
4500 			}
4501 		}
4502 	}
4503 
4504 	if (addressSpace != NULL)
4505 		addressSpace->Put();
4506 
4507 	return B_HANDLED_INTERRUPT;
4508 }
4509 
4510 
4511 struct PageFaultContext {
4512 	AddressSpaceReadLocker	addressSpaceLocker;
4513 	VMCacheChainLocker		cacheChainLocker;
4514 
4515 	VMTranslationMap*		map;
4516 	VMCache*				topCache;
4517 	off_t					cacheOffset;
4518 	vm_page_reservation		reservation;
4519 	bool					isWrite;
4520 
4521 	// return values
4522 	vm_page*				page;
4523 	bool					restart;
4524 	bool					pageAllocated;
4525 
4526 
4527 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4528 		:
4529 		addressSpaceLocker(addressSpace, true),
4530 		map(addressSpace->TranslationMap()),
4531 		isWrite(isWrite)
4532 	{
4533 	}
4534 
4535 	~PageFaultContext()
4536 	{
4537 		UnlockAll();
4538 		vm_page_unreserve_pages(&reservation);
4539 	}
4540 
4541 	void Prepare(VMCache* topCache, off_t cacheOffset)
4542 	{
4543 		this->topCache = topCache;
4544 		this->cacheOffset = cacheOffset;
4545 		page = NULL;
4546 		restart = false;
4547 		pageAllocated = false;
4548 
4549 		cacheChainLocker.SetTo(topCache);
4550 	}
4551 
4552 	void UnlockAll(VMCache* exceptCache = NULL)
4553 	{
4554 		topCache = NULL;
4555 		addressSpaceLocker.Unlock();
4556 		cacheChainLocker.Unlock(exceptCache);
4557 	}
4558 };
4559 
4560 
4561 /*!	Gets the page that should be mapped into the area.
4562 	Returns an error code other than \c B_OK, if the page couldn't be found or
4563 	paged in. The locking state of the address space and the caches is undefined
4564 	in that case.
4565 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4566 	had to unlock the address space and all caches and is supposed to be called
4567 	again.
4568 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4569 	found. It is returned in \c context.page. The address space will still be
4570 	locked as well as all caches starting from the top cache to at least the
4571 	cache the page lives in.
4572 */
4573 static status_t
4574 fault_get_page(PageFaultContext& context)
4575 {
4576 	VMCache* cache = context.topCache;
4577 	VMCache* lastCache = NULL;
4578 	vm_page* page = NULL;
4579 
4580 	while (cache != NULL) {
4581 		// We already hold the lock of the cache at this point.
4582 
4583 		lastCache = cache;
4584 
4585 		page = cache->LookupPage(context.cacheOffset);
4586 		if (page != NULL && page->busy) {
4587 			// page must be busy -- wait for it to become unbusy
4588 			context.UnlockAll(cache);
4589 			cache->ReleaseRefLocked();
4590 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4591 
4592 			// restart the whole process
4593 			context.restart = true;
4594 			return B_OK;
4595 		}
4596 
4597 		if (page != NULL)
4598 			break;
4599 
4600 		// The current cache does not contain the page we're looking for.
4601 
4602 		// see if the backing store has it
4603 		if (cache->HasPage(context.cacheOffset)) {
4604 			// insert a fresh page and mark it busy -- we're going to read it in
4605 			page = vm_page_allocate_page(&context.reservation,
4606 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4607 			cache->InsertPage(page, context.cacheOffset);
4608 
4609 			// We need to unlock all caches and the address space while reading
4610 			// the page in. Keep a reference to the cache around.
4611 			cache->AcquireRefLocked();
4612 			context.UnlockAll();
4613 
4614 			// read the page in
4615 			generic_io_vec vec;
4616 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4617 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4618 
4619 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4620 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4621 
4622 			cache->Lock();
4623 
4624 			if (status < B_OK) {
4625 				// on error remove and free the page
4626 				dprintf("reading page from cache %p returned: %s!\n",
4627 					cache, strerror(status));
4628 
4629 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4630 				cache->RemovePage(page);
4631 				vm_page_set_state(page, PAGE_STATE_FREE);
4632 
4633 				cache->ReleaseRefAndUnlock();
4634 				return status;
4635 			}
4636 
4637 			// mark the page unbusy again
4638 			cache->MarkPageUnbusy(page);
4639 
4640 			DEBUG_PAGE_ACCESS_END(page);
4641 
4642 			// Since we needed to unlock everything temporarily, the area
4643 			// situation might have changed. So we need to restart the whole
4644 			// process.
4645 			cache->ReleaseRefAndUnlock();
4646 			context.restart = true;
4647 			return B_OK;
4648 		}
4649 
4650 		cache = context.cacheChainLocker.LockSourceCache();
4651 	}
4652 
4653 	if (page == NULL) {
4654 		// There was no adequate page, determine the cache for a clean one.
4655 		// Read-only pages come in the deepest cache, only the top most cache
4656 		// may have direct write access.
4657 		cache = context.isWrite ? context.topCache : lastCache;
4658 
4659 		// allocate a clean page
4660 		page = vm_page_allocate_page(&context.reservation,
4661 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4662 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4663 			page->physical_page_number));
4664 
4665 		// insert the new page into our cache
4666 		cache->InsertPage(page, context.cacheOffset);
4667 		context.pageAllocated = true;
4668 	} else if (page->Cache() != context.topCache && context.isWrite) {
4669 		// We have a page that has the data we want, but in the wrong cache
4670 		// object so we need to copy it and stick it into the top cache.
4671 		vm_page* sourcePage = page;
4672 
4673 		// TODO: If memory is low, it might be a good idea to steal the page
4674 		// from our source cache -- if possible, that is.
4675 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4676 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4677 
4678 		// To not needlessly kill concurrency we unlock all caches but the top
4679 		// one while copying the page. Lacking another mechanism to ensure that
4680 		// the source page doesn't disappear, we mark it busy.
4681 		sourcePage->busy = true;
4682 		context.cacheChainLocker.UnlockKeepRefs(true);
4683 
4684 		// copy the page
4685 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4686 			sourcePage->physical_page_number * B_PAGE_SIZE);
4687 
4688 		context.cacheChainLocker.RelockCaches(true);
4689 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4690 
4691 		// insert the new page into our cache
4692 		context.topCache->InsertPage(page, context.cacheOffset);
4693 		context.pageAllocated = true;
4694 	} else
4695 		DEBUG_PAGE_ACCESS_START(page);
4696 
4697 	context.page = page;
4698 	return B_OK;
4699 }
4700 
4701 
4702 /*!	Makes sure the address in the given address space is mapped.
4703 
4704 	\param addressSpace The address space.
4705 	\param originalAddress The address. Doesn't need to be page aligned.
4706 	\param isWrite If \c true the address shall be write-accessible.
4707 	\param isUser If \c true the access is requested by a userland team.
4708 	\param wirePage On success, if non \c NULL, the wired count of the page
4709 		mapped at the given address is incremented and the page is returned
4710 		via this parameter.
4711 	\return \c B_OK on success, another error code otherwise.
4712 */
4713 static status_t
4714 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4715 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4716 {
4717 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4718 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4719 		originalAddress, isWrite, isUser));
4720 
4721 	PageFaultContext context(addressSpace, isWrite);
4722 
4723 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4724 	status_t status = B_OK;
4725 
4726 	addressSpace->IncrementFaultCount();
4727 
4728 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4729 	// the pages upfront makes sure we don't have any cache locked, so that the
4730 	// page daemon/thief can do their job without problems.
4731 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4732 		originalAddress);
4733 	context.addressSpaceLocker.Unlock();
4734 	vm_page_reserve_pages(&context.reservation, reservePages,
4735 		addressSpace == VMAddressSpace::Kernel()
4736 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4737 
4738 	while (true) {
4739 		context.addressSpaceLocker.Lock();
4740 
4741 		// get the area the fault was in
4742 		VMArea* area = addressSpace->LookupArea(address);
4743 		if (area == NULL) {
4744 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4745 				"space\n", originalAddress);
4746 			TPF(PageFaultError(-1,
4747 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4748 			status = B_BAD_ADDRESS;
4749 			break;
4750 		}
4751 
4752 		// check permissions
4753 		uint32 protection = get_area_page_protection(area, address);
4754 		if (isUser && (protection & B_USER_PROTECTION) == 0
4755 				&& (area->protection & B_KERNEL_AREA) != 0) {
4756 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4757 				area->id, (void*)originalAddress);
4758 			TPF(PageFaultError(area->id,
4759 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4760 			status = B_PERMISSION_DENIED;
4761 			break;
4762 		}
4763 		if (isWrite && (protection
4764 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4765 			dprintf("write access attempted on write-protected area 0x%"
4766 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4767 			TPF(PageFaultError(area->id,
4768 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4769 			status = B_PERMISSION_DENIED;
4770 			break;
4771 		} else if (isExecute && (protection
4772 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4773 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4774 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4775 			TPF(PageFaultError(area->id,
4776 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4777 			status = B_PERMISSION_DENIED;
4778 			break;
4779 		} else if (!isWrite && !isExecute && (protection
4780 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4781 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4782 				" at %p\n", area->id, (void*)originalAddress);
4783 			TPF(PageFaultError(area->id,
4784 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4785 			status = B_PERMISSION_DENIED;
4786 			break;
4787 		}
4788 
4789 		// We have the area, it was a valid access, so let's try to resolve the
4790 		// page fault now.
4791 		// At first, the top most cache from the area is investigated.
4792 
4793 		context.Prepare(vm_area_get_locked_cache(area),
4794 			address - area->Base() + area->cache_offset);
4795 
4796 		// See if this cache has a fault handler -- this will do all the work
4797 		// for us.
4798 		{
4799 			// Note, since the page fault is resolved with interrupts enabled,
4800 			// the fault handler could be called more than once for the same
4801 			// reason -- the store must take this into account.
4802 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4803 			if (status != B_BAD_HANDLER)
4804 				break;
4805 		}
4806 
4807 		// The top most cache has no fault handler, so let's see if the cache or
4808 		// its sources already have the page we're searching for (we're going
4809 		// from top to bottom).
4810 		status = fault_get_page(context);
4811 		if (status != B_OK) {
4812 			TPF(PageFaultError(area->id, status));
4813 			break;
4814 		}
4815 
4816 		if (context.restart)
4817 			continue;
4818 
4819 		// All went fine, all there is left to do is to map the page into the
4820 		// address space.
4821 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4822 			context.page));
4823 
4824 		// If the page doesn't reside in the area's cache, we need to make sure
4825 		// it's mapped in read-only, so that we cannot overwrite someone else's
4826 		// data (copy-on-write)
4827 		uint32 newProtection = protection;
4828 		if (context.page->Cache() != context.topCache && !isWrite)
4829 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4830 
4831 		bool unmapPage = false;
4832 		bool mapPage = true;
4833 
4834 		// check whether there's already a page mapped at the address
4835 		context.map->Lock();
4836 
4837 		phys_addr_t physicalAddress;
4838 		uint32 flags;
4839 		vm_page* mappedPage = NULL;
4840 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4841 			&& (flags & PAGE_PRESENT) != 0
4842 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4843 				!= NULL) {
4844 			// Yep there's already a page. If it's ours, we can simply adjust
4845 			// its protection. Otherwise we have to unmap it.
4846 			if (mappedPage == context.page) {
4847 				context.map->ProtectPage(area, address, newProtection);
4848 					// Note: We assume that ProtectPage() is atomic (i.e.
4849 					// the page isn't temporarily unmapped), otherwise we'd have
4850 					// to make sure it isn't wired.
4851 				mapPage = false;
4852 			} else
4853 				unmapPage = true;
4854 		}
4855 
4856 		context.map->Unlock();
4857 
4858 		if (unmapPage) {
4859 			// If the page is wired, we can't unmap it. Wait until it is unwired
4860 			// again and restart. Note that the page cannot be wired for
4861 			// writing, since it it isn't in the topmost cache. So we can safely
4862 			// ignore ranges wired for writing (our own and other concurrent
4863 			// wiring attempts in progress) and in fact have to do that to avoid
4864 			// a deadlock.
4865 			VMAreaUnwiredWaiter waiter;
4866 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4867 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4868 				// unlock everything and wait
4869 				if (context.pageAllocated) {
4870 					// ... but since we allocated a page and inserted it into
4871 					// the top cache, remove and free it first. Otherwise we'd
4872 					// have a page from a lower cache mapped while an upper
4873 					// cache has a page that would shadow it.
4874 					context.topCache->RemovePage(context.page);
4875 					vm_page_free_etc(context.topCache, context.page,
4876 						&context.reservation);
4877 				} else
4878 					DEBUG_PAGE_ACCESS_END(context.page);
4879 
4880 				context.UnlockAll();
4881 				waiter.waitEntry.Wait();
4882 				continue;
4883 			}
4884 
4885 			// Note: The mapped page is a page of a lower cache. We are
4886 			// guaranteed to have that cached locked, our new page is a copy of
4887 			// that page, and the page is not busy. The logic for that guarantee
4888 			// is as follows: Since the page is mapped, it must live in the top
4889 			// cache (ruled out above) or any of its lower caches, and there is
4890 			// (was before the new page was inserted) no other page in any
4891 			// cache between the top cache and the page's cache (otherwise that
4892 			// would be mapped instead). That in turn means that our algorithm
4893 			// must have found it and therefore it cannot be busy either.
4894 			DEBUG_PAGE_ACCESS_START(mappedPage);
4895 			unmap_page(area, address);
4896 			DEBUG_PAGE_ACCESS_END(mappedPage);
4897 		}
4898 
4899 		if (mapPage) {
4900 			if (map_page(area, context.page, address, newProtection,
4901 					&context.reservation) != B_OK) {
4902 				// Mapping can only fail, when the page mapping object couldn't
4903 				// be allocated. Save for the missing mapping everything is
4904 				// fine, though. If this was a regular page fault, we'll simply
4905 				// leave and probably fault again. To make sure we'll have more
4906 				// luck then, we ensure that the minimum object reserve is
4907 				// available.
4908 				DEBUG_PAGE_ACCESS_END(context.page);
4909 
4910 				context.UnlockAll();
4911 
4912 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4913 						!= B_OK) {
4914 					// Apparently the situation is serious. Let's get ourselves
4915 					// killed.
4916 					status = B_NO_MEMORY;
4917 				} else if (wirePage != NULL) {
4918 					// The caller expects us to wire the page. Since
4919 					// object_cache_reserve() succeeded, we should now be able
4920 					// to allocate a mapping structure. Restart.
4921 					continue;
4922 				}
4923 
4924 				break;
4925 			}
4926 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4927 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4928 
4929 		// also wire the page, if requested
4930 		if (wirePage != NULL && status == B_OK) {
4931 			increment_page_wired_count(context.page);
4932 			*wirePage = context.page;
4933 		}
4934 
4935 		DEBUG_PAGE_ACCESS_END(context.page);
4936 
4937 		break;
4938 	}
4939 
4940 	return status;
4941 }
4942 
4943 
4944 status_t
4945 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4946 {
4947 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4948 }
4949 
4950 status_t
4951 vm_put_physical_page(addr_t vaddr, void* handle)
4952 {
4953 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4954 }
4955 
4956 
4957 status_t
4958 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4959 	void** _handle)
4960 {
4961 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4962 }
4963 
4964 status_t
4965 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4966 {
4967 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4968 }
4969 
4970 
4971 status_t
4972 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4973 {
4974 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4975 }
4976 
4977 status_t
4978 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4979 {
4980 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4981 }
4982 
4983 
4984 void
4985 vm_get_info(system_info* info)
4986 {
4987 	swap_get_info(info);
4988 
4989 	MutexLocker locker(sAvailableMemoryLock);
4990 	info->needed_memory = sNeededMemory;
4991 	info->free_memory = sAvailableMemory;
4992 }
4993 
4994 
4995 uint32
4996 vm_num_page_faults(void)
4997 {
4998 	return sPageFaults;
4999 }
5000 
5001 
5002 off_t
5003 vm_available_memory(void)
5004 {
5005 	MutexLocker locker(sAvailableMemoryLock);
5006 	return sAvailableMemory;
5007 }
5008 
5009 
5010 off_t
5011 vm_available_not_needed_memory(void)
5012 {
5013 	MutexLocker locker(sAvailableMemoryLock);
5014 	return sAvailableMemory - sNeededMemory;
5015 }
5016 
5017 
5018 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5019 	debugger.
5020 */
5021 off_t
5022 vm_available_not_needed_memory_debug(void)
5023 {
5024 	return sAvailableMemory - sNeededMemory;
5025 }
5026 
5027 
5028 size_t
5029 vm_kernel_address_space_left(void)
5030 {
5031 	return VMAddressSpace::Kernel()->FreeSpace();
5032 }
5033 
5034 
5035 void
5036 vm_unreserve_memory(size_t amount)
5037 {
5038 	mutex_lock(&sAvailableMemoryLock);
5039 
5040 	sAvailableMemory += amount;
5041 
5042 	mutex_unlock(&sAvailableMemoryLock);
5043 }
5044 
5045 
5046 status_t
5047 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5048 {
5049 	size_t reserve = kMemoryReserveForPriority[priority];
5050 
5051 	MutexLocker locker(sAvailableMemoryLock);
5052 
5053 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5054 
5055 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5056 		sAvailableMemory -= amount;
5057 		return B_OK;
5058 	}
5059 
5060 	if (timeout <= 0)
5061 		return B_NO_MEMORY;
5062 
5063 	// turn timeout into an absolute timeout
5064 	timeout += system_time();
5065 
5066 	// loop until we've got the memory or the timeout occurs
5067 	do {
5068 		sNeededMemory += amount;
5069 
5070 		// call the low resource manager
5071 		locker.Unlock();
5072 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5073 			B_ABSOLUTE_TIMEOUT, timeout);
5074 		locker.Lock();
5075 
5076 		sNeededMemory -= amount;
5077 
5078 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5079 			sAvailableMemory -= amount;
5080 			return B_OK;
5081 		}
5082 	} while (timeout > system_time());
5083 
5084 	return B_NO_MEMORY;
5085 }
5086 
5087 
5088 status_t
5089 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5090 {
5091 	// NOTE: The caller is responsible for synchronizing calls to this function!
5092 
5093 	AddressSpaceReadLocker locker;
5094 	VMArea* area;
5095 	status_t status = locker.SetFromArea(id, area);
5096 	if (status != B_OK)
5097 		return status;
5098 
5099 	// nothing to do, if the type doesn't change
5100 	uint32 oldType = area->MemoryType();
5101 	if (type == oldType)
5102 		return B_OK;
5103 
5104 	// set the memory type of the area and the mapped pages
5105 	VMTranslationMap* map = area->address_space->TranslationMap();
5106 	map->Lock();
5107 	area->SetMemoryType(type);
5108 	map->ProtectArea(area, area->protection);
5109 	map->Unlock();
5110 
5111 	// set the physical memory type
5112 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5113 	if (error != B_OK) {
5114 		// reset the memory type of the area and the mapped pages
5115 		map->Lock();
5116 		area->SetMemoryType(oldType);
5117 		map->ProtectArea(area, area->protection);
5118 		map->Unlock();
5119 		return error;
5120 	}
5121 
5122 	return B_OK;
5123 
5124 }
5125 
5126 
5127 /*!	This function enforces some protection properties:
5128 	 - kernel areas must be W^X (after kernel startup)
5129 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5130 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5131 */
5132 static void
5133 fix_protection(uint32* protection)
5134 {
5135 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5136 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5137 			|| (*protection & B_WRITE_AREA) != 0)
5138 		&& !gKernelStartup)
5139 		panic("kernel areas cannot be both writable and executable!");
5140 
5141 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5142 		if ((*protection & B_WRITE_AREA) != 0)
5143 			*protection |= B_KERNEL_WRITE_AREA;
5144 		if ((*protection & B_READ_AREA) != 0)
5145 			*protection |= B_KERNEL_READ_AREA;
5146 	}
5147 }
5148 
5149 
5150 static void
5151 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5152 {
5153 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5154 	info->area = area->id;
5155 	info->address = (void*)area->Base();
5156 	info->size = area->Size();
5157 	info->protection = area->protection;
5158 	info->lock = area->wiring;
5159 	info->team = area->address_space->ID();
5160 	info->copy_count = 0;
5161 	info->in_count = 0;
5162 	info->out_count = 0;
5163 		// TODO: retrieve real values here!
5164 
5165 	VMCache* cache = vm_area_get_locked_cache(area);
5166 
5167 	// Note, this is a simplification; the cache could be larger than this area
5168 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5169 
5170 	vm_area_put_locked_cache(cache);
5171 }
5172 
5173 
5174 static status_t
5175 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5176 {
5177 	// is newSize a multiple of B_PAGE_SIZE?
5178 	if (newSize & (B_PAGE_SIZE - 1))
5179 		return B_BAD_VALUE;
5180 
5181 	// lock all affected address spaces and the cache
5182 	VMArea* area;
5183 	VMCache* cache;
5184 
5185 	MultiAddressSpaceLocker locker;
5186 	AreaCacheLocker cacheLocker;
5187 
5188 	status_t status;
5189 	size_t oldSize;
5190 	bool anyKernelArea;
5191 	bool restart;
5192 
5193 	do {
5194 		anyKernelArea = false;
5195 		restart = false;
5196 
5197 		locker.Unset();
5198 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5199 		if (status != B_OK)
5200 			return status;
5201 		cacheLocker.SetTo(cache, true);	// already locked
5202 
5203 		// enforce restrictions
5204 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5205 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5206 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5207 				"resize kernel area %" B_PRId32 " (%s)\n",
5208 				team_get_current_team_id(), areaID, area->name);
5209 			return B_NOT_ALLOWED;
5210 		}
5211 		// TODO: Enforce all restrictions (team, etc.)!
5212 
5213 		oldSize = area->Size();
5214 		if (newSize == oldSize)
5215 			return B_OK;
5216 
5217 		if (cache->type != CACHE_TYPE_RAM)
5218 			return B_NOT_ALLOWED;
5219 
5220 		if (oldSize < newSize) {
5221 			// We need to check if all areas of this cache can be resized.
5222 			for (VMArea* current = cache->areas; current != NULL;
5223 					current = current->cache_next) {
5224 				if (!current->address_space->CanResizeArea(current, newSize))
5225 					return B_ERROR;
5226 				anyKernelArea
5227 					|= current->address_space == VMAddressSpace::Kernel();
5228 			}
5229 		} else {
5230 			// We're shrinking the areas, so we must make sure the affected
5231 			// ranges are not wired.
5232 			for (VMArea* current = cache->areas; current != NULL;
5233 					current = current->cache_next) {
5234 				anyKernelArea
5235 					|= current->address_space == VMAddressSpace::Kernel();
5236 
5237 				if (wait_if_area_range_is_wired(current,
5238 						current->Base() + newSize, oldSize - newSize, &locker,
5239 						&cacheLocker)) {
5240 					restart = true;
5241 					break;
5242 				}
5243 			}
5244 		}
5245 	} while (restart);
5246 
5247 	// Okay, looks good so far, so let's do it
5248 
5249 	int priority = kernel && anyKernelArea
5250 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5251 	uint32 allocationFlags = kernel && anyKernelArea
5252 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5253 
5254 	if (oldSize < newSize) {
5255 		// Growing the cache can fail, so we do it first.
5256 		status = cache->Resize(cache->virtual_base + newSize, priority);
5257 		if (status != B_OK)
5258 			return status;
5259 	}
5260 
5261 	for (VMArea* current = cache->areas; current != NULL;
5262 			current = current->cache_next) {
5263 		status = current->address_space->ResizeArea(current, newSize,
5264 			allocationFlags);
5265 		if (status != B_OK)
5266 			break;
5267 
5268 		// We also need to unmap all pages beyond the new size, if the area has
5269 		// shrunk
5270 		if (newSize < oldSize) {
5271 			VMCacheChainLocker cacheChainLocker(cache);
5272 			cacheChainLocker.LockAllSourceCaches();
5273 
5274 			unmap_pages(current, current->Base() + newSize,
5275 				oldSize - newSize);
5276 
5277 			cacheChainLocker.Unlock(cache);
5278 		}
5279 	}
5280 
5281 	if (status == B_OK) {
5282 		// Shrink or grow individual page protections if in use.
5283 		if (area->page_protections != NULL) {
5284 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5285 			uint8* newProtections
5286 				= (uint8*)realloc(area->page_protections, bytes);
5287 			if (newProtections == NULL)
5288 				status = B_NO_MEMORY;
5289 			else {
5290 				area->page_protections = newProtections;
5291 
5292 				if (oldSize < newSize) {
5293 					// init the additional page protections to that of the area
5294 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5295 					uint32 areaProtection = area->protection
5296 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5297 					memset(area->page_protections + offset,
5298 						areaProtection | (areaProtection << 4), bytes - offset);
5299 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5300 						uint8& entry = area->page_protections[offset - 1];
5301 						entry = (entry & 0x0f) | (areaProtection << 4);
5302 					}
5303 				}
5304 			}
5305 		}
5306 	}
5307 
5308 	// shrinking the cache can't fail, so we do it now
5309 	if (status == B_OK && newSize < oldSize)
5310 		status = cache->Resize(cache->virtual_base + newSize, priority);
5311 
5312 	if (status != B_OK) {
5313 		// Something failed -- resize the areas back to their original size.
5314 		// This can fail, too, in which case we're seriously screwed.
5315 		for (VMArea* current = cache->areas; current != NULL;
5316 				current = current->cache_next) {
5317 			if (current->address_space->ResizeArea(current, oldSize,
5318 					allocationFlags) != B_OK) {
5319 				panic("vm_resize_area(): Failed and not being able to restore "
5320 					"original state.");
5321 			}
5322 		}
5323 
5324 		cache->Resize(cache->virtual_base + oldSize, priority);
5325 	}
5326 
5327 	// TODO: we must honour the lock restrictions of this area
5328 	return status;
5329 }
5330 
5331 
5332 status_t
5333 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5334 {
5335 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5336 }
5337 
5338 
5339 status_t
5340 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5341 {
5342 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5343 }
5344 
5345 
5346 status_t
5347 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5348 	bool user)
5349 {
5350 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5351 }
5352 
5353 
5354 void
5355 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5356 {
5357 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5358 }
5359 
5360 
5361 /*!	Copies a range of memory directly from/to a page that might not be mapped
5362 	at the moment.
5363 
5364 	For \a unsafeMemory the current mapping (if any is ignored). The function
5365 	walks through the respective area's cache chain to find the physical page
5366 	and copies from/to it directly.
5367 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5368 	must not cross a page boundary.
5369 
5370 	\param teamID The team ID identifying the address space \a unsafeMemory is
5371 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5372 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5373 		is passed, the address space of the thread returned by
5374 		debug_get_debugged_thread() is used.
5375 	\param unsafeMemory The start of the unsafe memory range to be copied
5376 		from/to.
5377 	\param buffer A safely accessible kernel buffer to be copied from/to.
5378 	\param size The number of bytes to be copied.
5379 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5380 		\a unsafeMemory, the other way around otherwise.
5381 */
5382 status_t
5383 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5384 	size_t size, bool copyToUnsafe)
5385 {
5386 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5387 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5388 		return B_BAD_VALUE;
5389 	}
5390 
5391 	// get the address space for the debugged thread
5392 	VMAddressSpace* addressSpace;
5393 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5394 		addressSpace = VMAddressSpace::Kernel();
5395 	} else if (teamID == B_CURRENT_TEAM) {
5396 		Thread* thread = debug_get_debugged_thread();
5397 		if (thread == NULL || thread->team == NULL)
5398 			return B_BAD_ADDRESS;
5399 
5400 		addressSpace = thread->team->address_space;
5401 	} else
5402 		addressSpace = VMAddressSpace::DebugGet(teamID);
5403 
5404 	if (addressSpace == NULL)
5405 		return B_BAD_ADDRESS;
5406 
5407 	// get the area
5408 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5409 	if (area == NULL)
5410 		return B_BAD_ADDRESS;
5411 
5412 	// search the page
5413 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5414 		+ area->cache_offset;
5415 	VMCache* cache = area->cache;
5416 	vm_page* page = NULL;
5417 	while (cache != NULL) {
5418 		page = cache->DebugLookupPage(cacheOffset);
5419 		if (page != NULL)
5420 			break;
5421 
5422 		// Page not found in this cache -- if it is paged out, we must not try
5423 		// to get it from lower caches.
5424 		if (cache->DebugHasPage(cacheOffset))
5425 			break;
5426 
5427 		cache = cache->source;
5428 	}
5429 
5430 	if (page == NULL)
5431 		return B_UNSUPPORTED;
5432 
5433 	// copy from/to physical memory
5434 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5435 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5436 
5437 	if (copyToUnsafe) {
5438 		if (page->Cache() != area->cache)
5439 			return B_UNSUPPORTED;
5440 
5441 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5442 	}
5443 
5444 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5445 }
5446 
5447 
5448 /** Validate that a memory range is either fully in kernel space, or fully in
5449  *  userspace */
5450 static inline bool
5451 validate_memory_range(const void* addr, size_t size)
5452 {
5453 	addr_t address = (addr_t)addr;
5454 
5455 	// Check for overflows on all addresses.
5456 	if ((address + size) < address)
5457 		return false;
5458 
5459 	// Validate that the address range does not cross the kernel/user boundary.
5460 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5461 }
5462 
5463 
5464 //	#pragma mark - kernel public API
5465 
5466 
5467 status_t
5468 user_memcpy(void* to, const void* from, size_t size)
5469 {
5470 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5471 		return B_BAD_ADDRESS;
5472 
5473 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5474 		return B_BAD_ADDRESS;
5475 
5476 	return B_OK;
5477 }
5478 
5479 
5480 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5481 	the string in \a to, NULL-terminating the result.
5482 
5483 	\param to Pointer to the destination C-string.
5484 	\param from Pointer to the source C-string.
5485 	\param size Size in bytes of the string buffer pointed to by \a to.
5486 
5487 	\return strlen(\a from).
5488 */
5489 ssize_t
5490 user_strlcpy(char* to, const char* from, size_t size)
5491 {
5492 	if (to == NULL && size != 0)
5493 		return B_BAD_VALUE;
5494 	if (from == NULL)
5495 		return B_BAD_ADDRESS;
5496 
5497 	// Protect the source address from overflows.
5498 	size_t maxSize = size;
5499 	if ((addr_t)from + maxSize < (addr_t)from)
5500 		maxSize -= (addr_t)from + maxSize;
5501 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5502 		maxSize = USER_TOP - (addr_t)from;
5503 
5504 	if (!validate_memory_range(to, maxSize))
5505 		return B_BAD_ADDRESS;
5506 
5507 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5508 	if (result < 0)
5509 		return result;
5510 
5511 	// If we hit the address overflow boundary, fail.
5512 	if ((size_t)result >= maxSize && maxSize < size)
5513 		return B_BAD_ADDRESS;
5514 
5515 	return result;
5516 }
5517 
5518 
5519 status_t
5520 user_memset(void* s, char c, size_t count)
5521 {
5522 	if (!validate_memory_range(s, count))
5523 		return B_BAD_ADDRESS;
5524 
5525 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5526 		return B_BAD_ADDRESS;
5527 
5528 	return B_OK;
5529 }
5530 
5531 
5532 /*!	Wires a single page at the given address.
5533 
5534 	\param team The team whose address space the address belongs to. Supports
5535 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5536 		parameter is ignored.
5537 	\param address address The virtual address to wire down. Does not need to
5538 		be page aligned.
5539 	\param writable If \c true the page shall be writable.
5540 	\param info On success the info is filled in, among other things
5541 		containing the physical address the given virtual one translates to.
5542 	\return \c B_OK, when the page could be wired, another error code otherwise.
5543 */
5544 status_t
5545 vm_wire_page(team_id team, addr_t address, bool writable,
5546 	VMPageWiringInfo* info)
5547 {
5548 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5549 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5550 
5551 	// compute the page protection that is required
5552 	bool isUser = IS_USER_ADDRESS(address);
5553 	uint32 requiredProtection = PAGE_PRESENT
5554 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5555 	if (writable)
5556 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5557 
5558 	// get and read lock the address space
5559 	VMAddressSpace* addressSpace = NULL;
5560 	if (isUser) {
5561 		if (team == B_CURRENT_TEAM)
5562 			addressSpace = VMAddressSpace::GetCurrent();
5563 		else
5564 			addressSpace = VMAddressSpace::Get(team);
5565 	} else
5566 		addressSpace = VMAddressSpace::GetKernel();
5567 	if (addressSpace == NULL)
5568 		return B_ERROR;
5569 
5570 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5571 
5572 	VMTranslationMap* map = addressSpace->TranslationMap();
5573 	status_t error = B_OK;
5574 
5575 	// get the area
5576 	VMArea* area = addressSpace->LookupArea(pageAddress);
5577 	if (area == NULL) {
5578 		addressSpace->Put();
5579 		return B_BAD_ADDRESS;
5580 	}
5581 
5582 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5583 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5584 
5585 	// mark the area range wired
5586 	area->Wire(&info->range);
5587 
5588 	// Lock the area's cache chain and the translation map. Needed to look
5589 	// up the page and play with its wired count.
5590 	cacheChainLocker.LockAllSourceCaches();
5591 	map->Lock();
5592 
5593 	phys_addr_t physicalAddress;
5594 	uint32 flags;
5595 	vm_page* page;
5596 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5597 		&& (flags & requiredProtection) == requiredProtection
5598 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5599 			!= NULL) {
5600 		// Already mapped with the correct permissions -- just increment
5601 		// the page's wired count.
5602 		increment_page_wired_count(page);
5603 
5604 		map->Unlock();
5605 		cacheChainLocker.Unlock();
5606 		addressSpaceLocker.Unlock();
5607 	} else {
5608 		// Let vm_soft_fault() map the page for us, if possible. We need
5609 		// to fully unlock to avoid deadlocks. Since we have already
5610 		// wired the area itself, nothing disturbing will happen with it
5611 		// in the meantime.
5612 		map->Unlock();
5613 		cacheChainLocker.Unlock();
5614 		addressSpaceLocker.Unlock();
5615 
5616 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5617 			isUser, &page);
5618 
5619 		if (error != B_OK) {
5620 			// The page could not be mapped -- clean up.
5621 			VMCache* cache = vm_area_get_locked_cache(area);
5622 			area->Unwire(&info->range);
5623 			cache->ReleaseRefAndUnlock();
5624 			addressSpace->Put();
5625 			return error;
5626 		}
5627 	}
5628 
5629 	info->physicalAddress
5630 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5631 			+ address % B_PAGE_SIZE;
5632 	info->page = page;
5633 
5634 	return B_OK;
5635 }
5636 
5637 
5638 /*!	Unwires a single page previously wired via vm_wire_page().
5639 
5640 	\param info The same object passed to vm_wire_page() before.
5641 */
5642 void
5643 vm_unwire_page(VMPageWiringInfo* info)
5644 {
5645 	// lock the address space
5646 	VMArea* area = info->range.area;
5647 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5648 		// takes over our reference
5649 
5650 	// lock the top cache
5651 	VMCache* cache = vm_area_get_locked_cache(area);
5652 	VMCacheChainLocker cacheChainLocker(cache);
5653 
5654 	if (info->page->Cache() != cache) {
5655 		// The page is not in the top cache, so we lock the whole cache chain
5656 		// before touching the page's wired count.
5657 		cacheChainLocker.LockAllSourceCaches();
5658 	}
5659 
5660 	decrement_page_wired_count(info->page);
5661 
5662 	// remove the wired range from the range
5663 	area->Unwire(&info->range);
5664 
5665 	cacheChainLocker.Unlock();
5666 }
5667 
5668 
5669 /*!	Wires down the given address range in the specified team's address space.
5670 
5671 	If successful the function
5672 	- acquires a reference to the specified team's address space,
5673 	- adds respective wired ranges to all areas that intersect with the given
5674 	  address range,
5675 	- makes sure all pages in the given address range are mapped with the
5676 	  requested access permissions and increments their wired count.
5677 
5678 	It fails, when \a team doesn't specify a valid address space, when any part
5679 	of the specified address range is not covered by areas, when the concerned
5680 	areas don't allow mapping with the requested permissions, or when mapping
5681 	failed for another reason.
5682 
5683 	When successful the call must be balanced by a unlock_memory_etc() call with
5684 	the exact same parameters.
5685 
5686 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5687 		supported.
5688 	\param address The start of the address range to be wired.
5689 	\param numBytes The size of the address range to be wired.
5690 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5691 		requests that the range must be wired writable ("read from device
5692 		into memory").
5693 	\return \c B_OK on success, another error code otherwise.
5694 */
5695 status_t
5696 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5697 {
5698 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5699 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5700 
5701 	// compute the page protection that is required
5702 	bool isUser = IS_USER_ADDRESS(address);
5703 	bool writable = (flags & B_READ_DEVICE) == 0;
5704 	uint32 requiredProtection = PAGE_PRESENT
5705 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5706 	if (writable)
5707 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5708 
5709 	uint32 mallocFlags = isUser
5710 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5711 
5712 	// get and read lock the address space
5713 	VMAddressSpace* addressSpace = NULL;
5714 	if (isUser) {
5715 		if (team == B_CURRENT_TEAM)
5716 			addressSpace = VMAddressSpace::GetCurrent();
5717 		else
5718 			addressSpace = VMAddressSpace::Get(team);
5719 	} else
5720 		addressSpace = VMAddressSpace::GetKernel();
5721 	if (addressSpace == NULL)
5722 		return B_ERROR;
5723 
5724 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5725 		// We get a new address space reference here. The one we got above will
5726 		// be freed by unlock_memory_etc().
5727 
5728 	VMTranslationMap* map = addressSpace->TranslationMap();
5729 	status_t error = B_OK;
5730 
5731 	// iterate through all concerned areas
5732 	addr_t nextAddress = lockBaseAddress;
5733 	while (nextAddress != lockEndAddress) {
5734 		// get the next area
5735 		VMArea* area = addressSpace->LookupArea(nextAddress);
5736 		if (area == NULL) {
5737 			error = B_BAD_ADDRESS;
5738 			break;
5739 		}
5740 
5741 		addr_t areaStart = nextAddress;
5742 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5743 
5744 		// allocate the wired range (do that before locking the cache to avoid
5745 		// deadlocks)
5746 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5747 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5748 		if (range == NULL) {
5749 			error = B_NO_MEMORY;
5750 			break;
5751 		}
5752 
5753 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5754 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5755 
5756 		// mark the area range wired
5757 		area->Wire(range);
5758 
5759 		// Depending on the area cache type and the wiring, we may not need to
5760 		// look at the individual pages.
5761 		if (area->cache_type == CACHE_TYPE_NULL
5762 			|| area->cache_type == CACHE_TYPE_DEVICE
5763 			|| area->wiring == B_FULL_LOCK
5764 			|| area->wiring == B_CONTIGUOUS) {
5765 			nextAddress = areaEnd;
5766 			continue;
5767 		}
5768 
5769 		// Lock the area's cache chain and the translation map. Needed to look
5770 		// up pages and play with their wired count.
5771 		cacheChainLocker.LockAllSourceCaches();
5772 		map->Lock();
5773 
5774 		// iterate through the pages and wire them
5775 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5776 			phys_addr_t physicalAddress;
5777 			uint32 flags;
5778 
5779 			vm_page* page;
5780 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5781 				&& (flags & requiredProtection) == requiredProtection
5782 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5783 					!= NULL) {
5784 				// Already mapped with the correct permissions -- just increment
5785 				// the page's wired count.
5786 				increment_page_wired_count(page);
5787 			} else {
5788 				// Let vm_soft_fault() map the page for us, if possible. We need
5789 				// to fully unlock to avoid deadlocks. Since we have already
5790 				// wired the area itself, nothing disturbing will happen with it
5791 				// in the meantime.
5792 				map->Unlock();
5793 				cacheChainLocker.Unlock();
5794 				addressSpaceLocker.Unlock();
5795 
5796 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5797 					false, isUser, &page);
5798 
5799 				addressSpaceLocker.Lock();
5800 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5801 				cacheChainLocker.LockAllSourceCaches();
5802 				map->Lock();
5803 			}
5804 
5805 			if (error != B_OK)
5806 				break;
5807 		}
5808 
5809 		map->Unlock();
5810 
5811 		if (error == B_OK) {
5812 			cacheChainLocker.Unlock();
5813 		} else {
5814 			// An error occurred, so abort right here. If the current address
5815 			// is the first in this area, unwire the area, since we won't get
5816 			// to it when reverting what we've done so far.
5817 			if (nextAddress == areaStart) {
5818 				area->Unwire(range);
5819 				cacheChainLocker.Unlock();
5820 				range->~VMAreaWiredRange();
5821 				free_etc(range, mallocFlags);
5822 			} else
5823 				cacheChainLocker.Unlock();
5824 
5825 			break;
5826 		}
5827 	}
5828 
5829 	if (error != B_OK) {
5830 		// An error occurred, so unwire all that we've already wired. Note that
5831 		// even if not a single page was wired, unlock_memory_etc() is called
5832 		// to put the address space reference.
5833 		addressSpaceLocker.Unlock();
5834 		unlock_memory_etc(team, (void*)lockBaseAddress,
5835 			nextAddress - lockBaseAddress, flags);
5836 	}
5837 
5838 	return error;
5839 }
5840 
5841 
5842 status_t
5843 lock_memory(void* address, size_t numBytes, uint32 flags)
5844 {
5845 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5846 }
5847 
5848 
5849 /*!	Unwires an address range previously wired with lock_memory_etc().
5850 
5851 	Note that a call to this function must balance a previous lock_memory_etc()
5852 	call with exactly the same parameters.
5853 */
5854 status_t
5855 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5856 {
5857 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5858 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5859 
5860 	// compute the page protection that is required
5861 	bool isUser = IS_USER_ADDRESS(address);
5862 	bool writable = (flags & B_READ_DEVICE) == 0;
5863 	uint32 requiredProtection = PAGE_PRESENT
5864 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5865 	if (writable)
5866 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5867 
5868 	uint32 mallocFlags = isUser
5869 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5870 
5871 	// get and read lock the address space
5872 	VMAddressSpace* addressSpace = NULL;
5873 	if (isUser) {
5874 		if (team == B_CURRENT_TEAM)
5875 			addressSpace = VMAddressSpace::GetCurrent();
5876 		else
5877 			addressSpace = VMAddressSpace::Get(team);
5878 	} else
5879 		addressSpace = VMAddressSpace::GetKernel();
5880 	if (addressSpace == NULL)
5881 		return B_ERROR;
5882 
5883 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5884 		// Take over the address space reference. We don't unlock until we're
5885 		// done.
5886 
5887 	VMTranslationMap* map = addressSpace->TranslationMap();
5888 	status_t error = B_OK;
5889 
5890 	// iterate through all concerned areas
5891 	addr_t nextAddress = lockBaseAddress;
5892 	while (nextAddress != lockEndAddress) {
5893 		// get the next area
5894 		VMArea* area = addressSpace->LookupArea(nextAddress);
5895 		if (area == NULL) {
5896 			error = B_BAD_ADDRESS;
5897 			break;
5898 		}
5899 
5900 		addr_t areaStart = nextAddress;
5901 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5902 
5903 		// Lock the area's top cache. This is a requirement for
5904 		// VMArea::Unwire().
5905 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5906 
5907 		// Depending on the area cache type and the wiring, we may not need to
5908 		// look at the individual pages.
5909 		if (area->cache_type == CACHE_TYPE_NULL
5910 			|| area->cache_type == CACHE_TYPE_DEVICE
5911 			|| area->wiring == B_FULL_LOCK
5912 			|| area->wiring == B_CONTIGUOUS) {
5913 			// unwire the range (to avoid deadlocks we delete the range after
5914 			// unlocking the cache)
5915 			nextAddress = areaEnd;
5916 			VMAreaWiredRange* range = area->Unwire(areaStart,
5917 				areaEnd - areaStart, writable);
5918 			cacheChainLocker.Unlock();
5919 			if (range != NULL) {
5920 				range->~VMAreaWiredRange();
5921 				free_etc(range, mallocFlags);
5922 			}
5923 			continue;
5924 		}
5925 
5926 		// Lock the area's cache chain and the translation map. Needed to look
5927 		// up pages and play with their wired count.
5928 		cacheChainLocker.LockAllSourceCaches();
5929 		map->Lock();
5930 
5931 		// iterate through the pages and unwire them
5932 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5933 			phys_addr_t physicalAddress;
5934 			uint32 flags;
5935 
5936 			vm_page* page;
5937 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5938 				&& (flags & PAGE_PRESENT) != 0
5939 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5940 					!= NULL) {
5941 				// Already mapped with the correct permissions -- just increment
5942 				// the page's wired count.
5943 				decrement_page_wired_count(page);
5944 			} else {
5945 				panic("unlock_memory_etc(): Failed to unwire page: address "
5946 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5947 					nextAddress);
5948 				error = B_BAD_VALUE;
5949 				break;
5950 			}
5951 		}
5952 
5953 		map->Unlock();
5954 
5955 		// All pages are unwired. Remove the area's wired range as well (to
5956 		// avoid deadlocks we delete the range after unlocking the cache).
5957 		VMAreaWiredRange* range = area->Unwire(areaStart,
5958 			areaEnd - areaStart, writable);
5959 
5960 		cacheChainLocker.Unlock();
5961 
5962 		if (range != NULL) {
5963 			range->~VMAreaWiredRange();
5964 			free_etc(range, mallocFlags);
5965 		}
5966 
5967 		if (error != B_OK)
5968 			break;
5969 	}
5970 
5971 	// get rid of the address space reference lock_memory_etc() acquired
5972 	addressSpace->Put();
5973 
5974 	return error;
5975 }
5976 
5977 
5978 status_t
5979 unlock_memory(void* address, size_t numBytes, uint32 flags)
5980 {
5981 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5982 }
5983 
5984 
5985 /*!	Similar to get_memory_map(), but also allows to specify the address space
5986 	for the memory in question and has a saner semantics.
5987 	Returns \c B_OK when the complete range could be translated or
5988 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5989 	case the actual number of entries is written to \c *_numEntries. Any other
5990 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5991 	in this case.
5992 */
5993 status_t
5994 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5995 	physical_entry* table, uint32* _numEntries)
5996 {
5997 	uint32 numEntries = *_numEntries;
5998 	*_numEntries = 0;
5999 
6000 	VMAddressSpace* addressSpace;
6001 	addr_t virtualAddress = (addr_t)address;
6002 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6003 	phys_addr_t physicalAddress;
6004 	status_t status = B_OK;
6005 	int32 index = -1;
6006 	addr_t offset = 0;
6007 	bool interrupts = are_interrupts_enabled();
6008 
6009 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6010 		"entries)\n", team, address, numBytes, numEntries));
6011 
6012 	if (numEntries == 0 || numBytes == 0)
6013 		return B_BAD_VALUE;
6014 
6015 	// in which address space is the address to be found?
6016 	if (IS_USER_ADDRESS(virtualAddress)) {
6017 		if (team == B_CURRENT_TEAM)
6018 			addressSpace = VMAddressSpace::GetCurrent();
6019 		else
6020 			addressSpace = VMAddressSpace::Get(team);
6021 	} else
6022 		addressSpace = VMAddressSpace::GetKernel();
6023 
6024 	if (addressSpace == NULL)
6025 		return B_ERROR;
6026 
6027 	VMTranslationMap* map = addressSpace->TranslationMap();
6028 
6029 	if (interrupts)
6030 		map->Lock();
6031 
6032 	while (offset < numBytes) {
6033 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6034 		uint32 flags;
6035 
6036 		if (interrupts) {
6037 			status = map->Query((addr_t)address + offset, &physicalAddress,
6038 				&flags);
6039 		} else {
6040 			status = map->QueryInterrupt((addr_t)address + offset,
6041 				&physicalAddress, &flags);
6042 		}
6043 		if (status < B_OK)
6044 			break;
6045 		if ((flags & PAGE_PRESENT) == 0) {
6046 			panic("get_memory_map() called on unmapped memory!");
6047 			return B_BAD_ADDRESS;
6048 		}
6049 
6050 		if (index < 0 && pageOffset > 0) {
6051 			physicalAddress += pageOffset;
6052 			if (bytes > B_PAGE_SIZE - pageOffset)
6053 				bytes = B_PAGE_SIZE - pageOffset;
6054 		}
6055 
6056 		// need to switch to the next physical_entry?
6057 		if (index < 0 || table[index].address
6058 				!= physicalAddress - table[index].size) {
6059 			if ((uint32)++index + 1 > numEntries) {
6060 				// table to small
6061 				break;
6062 			}
6063 			table[index].address = physicalAddress;
6064 			table[index].size = bytes;
6065 		} else {
6066 			// page does fit in current entry
6067 			table[index].size += bytes;
6068 		}
6069 
6070 		offset += bytes;
6071 	}
6072 
6073 	if (interrupts)
6074 		map->Unlock();
6075 
6076 	if (status != B_OK)
6077 		return status;
6078 
6079 	if ((uint32)index + 1 > numEntries) {
6080 		*_numEntries = index;
6081 		return B_BUFFER_OVERFLOW;
6082 	}
6083 
6084 	*_numEntries = index + 1;
6085 	return B_OK;
6086 }
6087 
6088 
6089 /*!	According to the BeBook, this function should always succeed.
6090 	This is no longer the case.
6091 */
6092 extern "C" int32
6093 __get_memory_map_haiku(const void* address, size_t numBytes,
6094 	physical_entry* table, int32 numEntries)
6095 {
6096 	uint32 entriesRead = numEntries;
6097 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6098 		table, &entriesRead);
6099 	if (error != B_OK)
6100 		return error;
6101 
6102 	// close the entry list
6103 
6104 	// if it's only one entry, we will silently accept the missing ending
6105 	if (numEntries == 1)
6106 		return B_OK;
6107 
6108 	if (entriesRead + 1 > (uint32)numEntries)
6109 		return B_BUFFER_OVERFLOW;
6110 
6111 	table[entriesRead].address = 0;
6112 	table[entriesRead].size = 0;
6113 
6114 	return B_OK;
6115 }
6116 
6117 
6118 area_id
6119 area_for(void* address)
6120 {
6121 	return vm_area_for((addr_t)address, true);
6122 }
6123 
6124 
6125 area_id
6126 find_area(const char* name)
6127 {
6128 	return VMAreas::Find(name);
6129 }
6130 
6131 
6132 status_t
6133 _get_area_info(area_id id, area_info* info, size_t size)
6134 {
6135 	if (size != sizeof(area_info) || info == NULL)
6136 		return B_BAD_VALUE;
6137 
6138 	AddressSpaceReadLocker locker;
6139 	VMArea* area;
6140 	status_t status = locker.SetFromArea(id, area);
6141 	if (status != B_OK)
6142 		return status;
6143 
6144 	fill_area_info(area, info, size);
6145 	return B_OK;
6146 }
6147 
6148 
6149 status_t
6150 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6151 {
6152 	addr_t nextBase = *(addr_t*)cookie;
6153 
6154 	// we're already through the list
6155 	if (nextBase == (addr_t)-1)
6156 		return B_ENTRY_NOT_FOUND;
6157 
6158 	if (team == B_CURRENT_TEAM)
6159 		team = team_get_current_team_id();
6160 
6161 	AddressSpaceReadLocker locker(team);
6162 	if (!locker.IsLocked())
6163 		return B_BAD_TEAM_ID;
6164 
6165 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6166 	if (area == NULL) {
6167 		nextBase = (addr_t)-1;
6168 		return B_ENTRY_NOT_FOUND;
6169 	}
6170 
6171 	fill_area_info(area, info, size);
6172 	*cookie = (ssize_t)(area->Base() + 1);
6173 
6174 	return B_OK;
6175 }
6176 
6177 
6178 status_t
6179 set_area_protection(area_id area, uint32 newProtection)
6180 {
6181 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6182 		newProtection, true);
6183 }
6184 
6185 
6186 status_t
6187 resize_area(area_id areaID, size_t newSize)
6188 {
6189 	return vm_resize_area(areaID, newSize, true);
6190 }
6191 
6192 
6193 /*!	Transfers the specified area to a new team. The caller must be the owner
6194 	of the area.
6195 */
6196 area_id
6197 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6198 	bool kernel)
6199 {
6200 	area_info info;
6201 	status_t status = get_area_info(id, &info);
6202 	if (status != B_OK)
6203 		return status;
6204 
6205 	if (info.team != thread_get_current_thread()->team->id)
6206 		return B_PERMISSION_DENIED;
6207 
6208 	// We need to mark the area cloneable so the following operations work.
6209 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6210 	if (status != B_OK)
6211 		return status;
6212 
6213 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6214 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6215 	if (clonedArea < 0)
6216 		return clonedArea;
6217 
6218 	status = vm_delete_area(info.team, id, kernel);
6219 	if (status != B_OK) {
6220 		vm_delete_area(target, clonedArea, kernel);
6221 		return status;
6222 	}
6223 
6224 	// Now we can reset the protection to whatever it was before.
6225 	set_area_protection(clonedArea, info.protection);
6226 
6227 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6228 
6229 	return clonedArea;
6230 }
6231 
6232 
6233 extern "C" area_id
6234 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6235 	size_t numBytes, uint32 addressSpec, uint32 protection,
6236 	void** _virtualAddress)
6237 {
6238 	if (!arch_vm_supports_protection(protection))
6239 		return B_NOT_SUPPORTED;
6240 
6241 	fix_protection(&protection);
6242 
6243 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6244 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6245 		false);
6246 }
6247 
6248 
6249 area_id
6250 clone_area(const char* name, void** _address, uint32 addressSpec,
6251 	uint32 protection, area_id source)
6252 {
6253 	if ((protection & B_KERNEL_PROTECTION) == 0)
6254 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6255 
6256 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6257 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6258 }
6259 
6260 
6261 area_id
6262 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6263 	uint32 protection, uint32 flags, uint32 guardSize,
6264 	const virtual_address_restrictions* virtualAddressRestrictions,
6265 	const physical_address_restrictions* physicalAddressRestrictions,
6266 	void** _address)
6267 {
6268 	fix_protection(&protection);
6269 
6270 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6271 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6272 		true, _address);
6273 }
6274 
6275 
6276 extern "C" area_id
6277 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6278 	size_t size, uint32 lock, uint32 protection)
6279 {
6280 	fix_protection(&protection);
6281 
6282 	virtual_address_restrictions virtualRestrictions = {};
6283 	virtualRestrictions.address = *_address;
6284 	virtualRestrictions.address_specification = addressSpec;
6285 	physical_address_restrictions physicalRestrictions = {};
6286 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6287 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6288 		true, _address);
6289 }
6290 
6291 
6292 status_t
6293 delete_area(area_id area)
6294 {
6295 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6296 }
6297 
6298 
6299 //	#pragma mark - Userland syscalls
6300 
6301 
6302 status_t
6303 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6304 	addr_t size)
6305 {
6306 	// filter out some unavailable values (for userland)
6307 	switch (addressSpec) {
6308 		case B_ANY_KERNEL_ADDRESS:
6309 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6310 			return B_BAD_VALUE;
6311 	}
6312 
6313 	addr_t address;
6314 
6315 	if (!IS_USER_ADDRESS(userAddress)
6316 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6317 		return B_BAD_ADDRESS;
6318 
6319 	status_t status = vm_reserve_address_range(
6320 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6321 		RESERVED_AVOID_BASE);
6322 	if (status != B_OK)
6323 		return status;
6324 
6325 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6326 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6327 			(void*)address, size);
6328 		return B_BAD_ADDRESS;
6329 	}
6330 
6331 	return B_OK;
6332 }
6333 
6334 
6335 status_t
6336 _user_unreserve_address_range(addr_t address, addr_t size)
6337 {
6338 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6339 		(void*)address, size);
6340 }
6341 
6342 
6343 area_id
6344 _user_area_for(void* address)
6345 {
6346 	return vm_area_for((addr_t)address, false);
6347 }
6348 
6349 
6350 area_id
6351 _user_find_area(const char* userName)
6352 {
6353 	char name[B_OS_NAME_LENGTH];
6354 
6355 	if (!IS_USER_ADDRESS(userName)
6356 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6357 		return B_BAD_ADDRESS;
6358 
6359 	return find_area(name);
6360 }
6361 
6362 
6363 status_t
6364 _user_get_area_info(area_id area, area_info* userInfo)
6365 {
6366 	if (!IS_USER_ADDRESS(userInfo))
6367 		return B_BAD_ADDRESS;
6368 
6369 	area_info info;
6370 	status_t status = get_area_info(area, &info);
6371 	if (status < B_OK)
6372 		return status;
6373 
6374 	// TODO: do we want to prevent userland from seeing kernel protections?
6375 	//info.protection &= B_USER_PROTECTION;
6376 
6377 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6378 		return B_BAD_ADDRESS;
6379 
6380 	return status;
6381 }
6382 
6383 
6384 status_t
6385 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6386 {
6387 	ssize_t cookie;
6388 
6389 	if (!IS_USER_ADDRESS(userCookie)
6390 		|| !IS_USER_ADDRESS(userInfo)
6391 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6392 		return B_BAD_ADDRESS;
6393 
6394 	area_info info;
6395 	status_t status = _get_next_area_info(team, &cookie, &info,
6396 		sizeof(area_info));
6397 	if (status != B_OK)
6398 		return status;
6399 
6400 	//info.protection &= B_USER_PROTECTION;
6401 
6402 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6403 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6404 		return B_BAD_ADDRESS;
6405 
6406 	return status;
6407 }
6408 
6409 
6410 status_t
6411 _user_set_area_protection(area_id area, uint32 newProtection)
6412 {
6413 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6414 		return B_BAD_VALUE;
6415 
6416 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6417 		newProtection, false);
6418 }
6419 
6420 
6421 status_t
6422 _user_resize_area(area_id area, size_t newSize)
6423 {
6424 	// TODO: Since we restrict deleting of areas to those owned by the team,
6425 	// we should also do that for resizing (check other functions, too).
6426 	return vm_resize_area(area, newSize, false);
6427 }
6428 
6429 
6430 area_id
6431 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6432 	team_id target)
6433 {
6434 	// filter out some unavailable values (for userland)
6435 	switch (addressSpec) {
6436 		case B_ANY_KERNEL_ADDRESS:
6437 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6438 			return B_BAD_VALUE;
6439 	}
6440 
6441 	void* address;
6442 	if (!IS_USER_ADDRESS(userAddress)
6443 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6444 		return B_BAD_ADDRESS;
6445 
6446 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6447 	if (newArea < B_OK)
6448 		return newArea;
6449 
6450 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6451 		return B_BAD_ADDRESS;
6452 
6453 	return newArea;
6454 }
6455 
6456 
6457 area_id
6458 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6459 	uint32 protection, area_id sourceArea)
6460 {
6461 	char name[B_OS_NAME_LENGTH];
6462 	void* address;
6463 
6464 	// filter out some unavailable values (for userland)
6465 	switch (addressSpec) {
6466 		case B_ANY_KERNEL_ADDRESS:
6467 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6468 			return B_BAD_VALUE;
6469 	}
6470 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6471 		return B_BAD_VALUE;
6472 
6473 	if (!IS_USER_ADDRESS(userName)
6474 		|| !IS_USER_ADDRESS(userAddress)
6475 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6476 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6477 		return B_BAD_ADDRESS;
6478 
6479 	fix_protection(&protection);
6480 
6481 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6482 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6483 		false);
6484 	if (clonedArea < B_OK)
6485 		return clonedArea;
6486 
6487 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6488 		delete_area(clonedArea);
6489 		return B_BAD_ADDRESS;
6490 	}
6491 
6492 	return clonedArea;
6493 }
6494 
6495 
6496 area_id
6497 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6498 	size_t size, uint32 lock, uint32 protection)
6499 {
6500 	char name[B_OS_NAME_LENGTH];
6501 	void* address;
6502 
6503 	// filter out some unavailable values (for userland)
6504 	switch (addressSpec) {
6505 		case B_ANY_KERNEL_ADDRESS:
6506 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6507 			return B_BAD_VALUE;
6508 	}
6509 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6510 		return B_BAD_VALUE;
6511 
6512 	if (!IS_USER_ADDRESS(userName)
6513 		|| !IS_USER_ADDRESS(userAddress)
6514 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6515 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6516 		return B_BAD_ADDRESS;
6517 
6518 	if (addressSpec == B_EXACT_ADDRESS
6519 		&& IS_KERNEL_ADDRESS(address))
6520 		return B_BAD_VALUE;
6521 
6522 	if (addressSpec == B_ANY_ADDRESS)
6523 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6524 	if (addressSpec == B_BASE_ADDRESS)
6525 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6526 
6527 	fix_protection(&protection);
6528 
6529 	virtual_address_restrictions virtualRestrictions = {};
6530 	virtualRestrictions.address = address;
6531 	virtualRestrictions.address_specification = addressSpec;
6532 	physical_address_restrictions physicalRestrictions = {};
6533 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6534 		size, lock, protection, 0, 0, &virtualRestrictions,
6535 		&physicalRestrictions, false, &address);
6536 
6537 	if (area >= B_OK
6538 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6539 		delete_area(area);
6540 		return B_BAD_ADDRESS;
6541 	}
6542 
6543 	return area;
6544 }
6545 
6546 
6547 status_t
6548 _user_delete_area(area_id area)
6549 {
6550 	// Unlike the BeOS implementation, you can now only delete areas
6551 	// that you have created yourself from userland.
6552 	// The documentation to delete_area() explicitly states that this
6553 	// will be restricted in the future, and so it will.
6554 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6555 }
6556 
6557 
6558 // TODO: create a BeOS style call for this!
6559 
6560 area_id
6561 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6562 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6563 	int fd, off_t offset)
6564 {
6565 	char name[B_OS_NAME_LENGTH];
6566 	void* address;
6567 	area_id area;
6568 
6569 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6570 		return B_BAD_VALUE;
6571 
6572 	fix_protection(&protection);
6573 
6574 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6575 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6576 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6577 		return B_BAD_ADDRESS;
6578 
6579 	if (addressSpec == B_EXACT_ADDRESS) {
6580 		if ((addr_t)address + size < (addr_t)address
6581 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6582 			return B_BAD_VALUE;
6583 		}
6584 		if (!IS_USER_ADDRESS(address)
6585 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6586 			return B_BAD_ADDRESS;
6587 		}
6588 	}
6589 
6590 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6591 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6592 		false);
6593 	if (area < B_OK)
6594 		return area;
6595 
6596 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6597 		return B_BAD_ADDRESS;
6598 
6599 	return area;
6600 }
6601 
6602 
6603 status_t
6604 _user_unmap_memory(void* _address, size_t size)
6605 {
6606 	addr_t address = (addr_t)_address;
6607 
6608 	// check params
6609 	if (size == 0 || (addr_t)address + size < (addr_t)address
6610 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6611 		return B_BAD_VALUE;
6612 	}
6613 
6614 	if (!IS_USER_ADDRESS(address)
6615 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6616 		return B_BAD_ADDRESS;
6617 	}
6618 
6619 	// Write lock the address space and ensure the address range is not wired.
6620 	AddressSpaceWriteLocker locker;
6621 	do {
6622 		status_t status = locker.SetTo(team_get_current_team_id());
6623 		if (status != B_OK)
6624 			return status;
6625 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6626 			size, &locker));
6627 
6628 	// unmap
6629 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6630 }
6631 
6632 
6633 status_t
6634 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6635 {
6636 	// check address range
6637 	addr_t address = (addr_t)_address;
6638 	size = PAGE_ALIGN(size);
6639 
6640 	if ((address % B_PAGE_SIZE) != 0)
6641 		return B_BAD_VALUE;
6642 	if (!is_user_address_range(_address, size)) {
6643 		// weird error code required by POSIX
6644 		return ENOMEM;
6645 	}
6646 
6647 	// extend and check protection
6648 	if ((protection & ~B_USER_PROTECTION) != 0)
6649 		return B_BAD_VALUE;
6650 
6651 	fix_protection(&protection);
6652 
6653 	// We need to write lock the address space, since we're going to play with
6654 	// the areas. Also make sure that none of the areas is wired and that we're
6655 	// actually allowed to change the protection.
6656 	AddressSpaceWriteLocker locker;
6657 
6658 	bool restart;
6659 	do {
6660 		restart = false;
6661 
6662 		status_t status = locker.SetTo(team_get_current_team_id());
6663 		if (status != B_OK)
6664 			return status;
6665 
6666 		// First round: Check whether the whole range is covered by areas and we
6667 		// are allowed to modify them.
6668 		addr_t currentAddress = address;
6669 		size_t sizeLeft = size;
6670 		while (sizeLeft > 0) {
6671 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6672 			if (area == NULL)
6673 				return B_NO_MEMORY;
6674 
6675 			if ((area->protection & B_KERNEL_AREA) != 0)
6676 				return B_NOT_ALLOWED;
6677 			if (area->protection_max != 0
6678 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6679 				return B_NOT_ALLOWED;
6680 			}
6681 
6682 			addr_t offset = currentAddress - area->Base();
6683 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6684 
6685 			AreaCacheLocker cacheLocker(area);
6686 
6687 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6688 					&locker, &cacheLocker)) {
6689 				restart = true;
6690 				break;
6691 			}
6692 
6693 			cacheLocker.Unlock();
6694 
6695 			currentAddress += rangeSize;
6696 			sizeLeft -= rangeSize;
6697 		}
6698 	} while (restart);
6699 
6700 	// Second round: If the protections differ from that of the area, create a
6701 	// page protection array and re-map mapped pages.
6702 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6703 	addr_t currentAddress = address;
6704 	size_t sizeLeft = size;
6705 	while (sizeLeft > 0) {
6706 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6707 		if (area == NULL)
6708 			return B_NO_MEMORY;
6709 
6710 		addr_t offset = currentAddress - area->Base();
6711 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6712 
6713 		currentAddress += rangeSize;
6714 		sizeLeft -= rangeSize;
6715 
6716 		if (area->page_protections == NULL) {
6717 			if (area->protection == protection)
6718 				continue;
6719 			if (offset == 0 && rangeSize == area->Size()) {
6720 				status_t status = vm_set_area_protection(area->address_space->ID(),
6721 					area->id, protection, false);
6722 				if (status != B_OK)
6723 					return status;
6724 				continue;
6725 			}
6726 
6727 			status_t status = allocate_area_page_protections(area);
6728 			if (status != B_OK)
6729 				return status;
6730 		}
6731 
6732 		// We need to lock the complete cache chain, since we potentially unmap
6733 		// pages of lower caches.
6734 		VMCache* topCache = vm_area_get_locked_cache(area);
6735 		VMCacheChainLocker cacheChainLocker(topCache);
6736 		cacheChainLocker.LockAllSourceCaches();
6737 
6738 		for (addr_t pageAddress = area->Base() + offset;
6739 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6740 			map->Lock();
6741 
6742 			set_area_page_protection(area, pageAddress, protection);
6743 
6744 			phys_addr_t physicalAddress;
6745 			uint32 flags;
6746 
6747 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6748 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6749 				map->Unlock();
6750 				continue;
6751 			}
6752 
6753 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6754 			if (page == NULL) {
6755 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6756 					"\n", area, physicalAddress);
6757 				map->Unlock();
6758 				return B_ERROR;
6759 			}
6760 
6761 			// If the page is not in the topmost cache and write access is
6762 			// requested, we have to unmap it. Otherwise we can re-map it with
6763 			// the new protection.
6764 			bool unmapPage = page->Cache() != topCache
6765 				&& (protection & B_WRITE_AREA) != 0;
6766 
6767 			if (!unmapPage)
6768 				map->ProtectPage(area, pageAddress, protection);
6769 
6770 			map->Unlock();
6771 
6772 			if (unmapPage) {
6773 				DEBUG_PAGE_ACCESS_START(page);
6774 				unmap_page(area, pageAddress);
6775 				DEBUG_PAGE_ACCESS_END(page);
6776 			}
6777 		}
6778 	}
6779 
6780 	return B_OK;
6781 }
6782 
6783 
6784 status_t
6785 _user_sync_memory(void* _address, size_t size, uint32 flags)
6786 {
6787 	addr_t address = (addr_t)_address;
6788 	size = PAGE_ALIGN(size);
6789 
6790 	// check params
6791 	if ((address % B_PAGE_SIZE) != 0)
6792 		return B_BAD_VALUE;
6793 	if (!is_user_address_range(_address, size)) {
6794 		// weird error code required by POSIX
6795 		return ENOMEM;
6796 	}
6797 
6798 	bool writeSync = (flags & MS_SYNC) != 0;
6799 	bool writeAsync = (flags & MS_ASYNC) != 0;
6800 	if (writeSync && writeAsync)
6801 		return B_BAD_VALUE;
6802 
6803 	if (size == 0 || (!writeSync && !writeAsync))
6804 		return B_OK;
6805 
6806 	// iterate through the range and sync all concerned areas
6807 	while (size > 0) {
6808 		// read lock the address space
6809 		AddressSpaceReadLocker locker;
6810 		status_t error = locker.SetTo(team_get_current_team_id());
6811 		if (error != B_OK)
6812 			return error;
6813 
6814 		// get the first area
6815 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6816 		if (area == NULL)
6817 			return B_NO_MEMORY;
6818 
6819 		uint32 offset = address - area->Base();
6820 		size_t rangeSize = min_c(area->Size() - offset, size);
6821 		offset += area->cache_offset;
6822 
6823 		// lock the cache
6824 		AreaCacheLocker cacheLocker(area);
6825 		if (!cacheLocker)
6826 			return B_BAD_VALUE;
6827 		VMCache* cache = area->cache;
6828 
6829 		locker.Unlock();
6830 
6831 		uint32 firstPage = offset >> PAGE_SHIFT;
6832 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6833 
6834 		// write the pages
6835 		if (cache->type == CACHE_TYPE_VNODE) {
6836 			if (writeSync) {
6837 				// synchronous
6838 				error = vm_page_write_modified_page_range(cache, firstPage,
6839 					endPage);
6840 				if (error != B_OK)
6841 					return error;
6842 			} else {
6843 				// asynchronous
6844 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6845 				// TODO: This is probably not quite what is supposed to happen.
6846 				// Especially when a lot has to be written, it might take ages
6847 				// until it really hits the disk.
6848 			}
6849 		}
6850 
6851 		address += rangeSize;
6852 		size -= rangeSize;
6853 	}
6854 
6855 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6856 	// synchronize multiple mappings of the same file. In our VM they never get
6857 	// out of sync, though, so we don't have to do anything.
6858 
6859 	return B_OK;
6860 }
6861 
6862 
6863 status_t
6864 _user_memory_advice(void* _address, size_t size, uint32 advice)
6865 {
6866 	addr_t address = (addr_t)_address;
6867 	if ((address % B_PAGE_SIZE) != 0)
6868 		return B_BAD_VALUE;
6869 
6870 	size = PAGE_ALIGN(size);
6871 	if (!is_user_address_range(_address, size)) {
6872 		// weird error code required by POSIX
6873 		return B_NO_MEMORY;
6874 	}
6875 
6876 	switch (advice) {
6877 		case MADV_NORMAL:
6878 		case MADV_SEQUENTIAL:
6879 		case MADV_RANDOM:
6880 		case MADV_WILLNEED:
6881 		case MADV_DONTNEED:
6882 			// TODO: Implement!
6883 			break;
6884 
6885 		case MADV_FREE:
6886 		{
6887 			AddressSpaceWriteLocker locker;
6888 			do {
6889 				status_t status = locker.SetTo(team_get_current_team_id());
6890 				if (status != B_OK)
6891 					return status;
6892 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6893 					address, size, &locker));
6894 
6895 			discard_address_range(locker.AddressSpace(), address, size, false);
6896 			break;
6897 		}
6898 
6899 		default:
6900 			return B_BAD_VALUE;
6901 	}
6902 
6903 	return B_OK;
6904 }
6905 
6906 
6907 status_t
6908 _user_get_memory_properties(team_id teamID, const void* address,
6909 	uint32* _protected, uint32* _lock)
6910 {
6911 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6912 		return B_BAD_ADDRESS;
6913 
6914 	AddressSpaceReadLocker locker;
6915 	status_t error = locker.SetTo(teamID);
6916 	if (error != B_OK)
6917 		return error;
6918 
6919 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6920 	if (area == NULL)
6921 		return B_NO_MEMORY;
6922 
6923 	uint32 protection = get_area_page_protection(area, (addr_t)address);
6924 	uint32 wiring = area->wiring;
6925 
6926 	locker.Unlock();
6927 
6928 	error = user_memcpy(_protected, &protection, sizeof(protection));
6929 	if (error != B_OK)
6930 		return error;
6931 
6932 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6933 
6934 	return error;
6935 }
6936 
6937 
6938 static status_t
6939 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
6940 {
6941 #if ENABLE_SWAP_SUPPORT
6942 	// check address range
6943 	addr_t address = (addr_t)_address;
6944 	size = PAGE_ALIGN(size);
6945 
6946 	if ((address % B_PAGE_SIZE) != 0)
6947 		return EINVAL;
6948 	if (!is_user_address_range(_address, size))
6949 		return EINVAL;
6950 
6951 	const addr_t endAddress = address + size;
6952 
6953 	AddressSpaceReadLocker addressSpaceLocker;
6954 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
6955 	if (error != B_OK)
6956 		return error;
6957 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
6958 
6959 	// iterate through all concerned areas
6960 	addr_t nextAddress = address;
6961 	while (nextAddress != endAddress) {
6962 		// get the next area
6963 		VMArea* area = addressSpace->LookupArea(nextAddress);
6964 		if (area == NULL) {
6965 			error = B_BAD_ADDRESS;
6966 			break;
6967 		}
6968 
6969 		const addr_t areaStart = nextAddress;
6970 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
6971 		nextAddress = areaEnd;
6972 
6973 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6974 		if (error != B_OK) {
6975 			// We don't need to unset or reset things on failure.
6976 			break;
6977 		}
6978 
6979 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6980 		VMAnonymousCache* anonCache = NULL;
6981 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
6982 			// This memory will aready never be swapped. Nothing to do.
6983 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
6984 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
6985 				areaEnd - areaStart, swappable);
6986 		} else {
6987 			// Some other cache type? We cannot affect anything here.
6988 			error = EINVAL;
6989 		}
6990 
6991 		cacheChainLocker.Unlock();
6992 
6993 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6994 		if (error != B_OK)
6995 			break;
6996 	}
6997 
6998 	return error;
6999 #else
7000 	// No swap support? Nothing to do.
7001 	return B_OK;
7002 #endif
7003 }
7004 
7005 
7006 status_t
7007 _user_mlock(const void* _address, size_t size)
7008 {
7009 	return user_set_memory_swappable(_address, size, false);
7010 }
7011 
7012 
7013 status_t
7014 _user_munlock(const void* _address, size_t size)
7015 {
7016 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7017 	// if multiple clones of an area had mlock() called on them,
7018 	// munlock() must also be called on all of them to actually unlock.
7019 	// (At present, the first munlock() will unlock all.)
7020 	// TODO: fork() should automatically unlock memory in the child.
7021 	return user_set_memory_swappable(_address, size, true);
7022 }
7023 
7024 
7025 // #pragma mark -- compatibility
7026 
7027 
7028 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7029 
7030 
7031 struct physical_entry_beos {
7032 	uint32	address;
7033 	uint32	size;
7034 };
7035 
7036 
7037 /*!	The physical_entry structure has changed. We need to translate it to the
7038 	old one.
7039 */
7040 extern "C" int32
7041 __get_memory_map_beos(const void* _address, size_t numBytes,
7042 	physical_entry_beos* table, int32 numEntries)
7043 {
7044 	if (numEntries <= 0)
7045 		return B_BAD_VALUE;
7046 
7047 	const uint8* address = (const uint8*)_address;
7048 
7049 	int32 count = 0;
7050 	while (numBytes > 0 && count < numEntries) {
7051 		physical_entry entry;
7052 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7053 		if (result < 0) {
7054 			if (result != B_BUFFER_OVERFLOW)
7055 				return result;
7056 		}
7057 
7058 		if (entry.address >= (phys_addr_t)1 << 32) {
7059 			panic("get_memory_map(): Address is greater 4 GB!");
7060 			return B_ERROR;
7061 		}
7062 
7063 		table[count].address = entry.address;
7064 		table[count++].size = entry.size;
7065 
7066 		address += entry.size;
7067 		numBytes -= entry.size;
7068 	}
7069 
7070 	// null-terminate the table, if possible
7071 	if (count < numEntries) {
7072 		table[count].address = 0;
7073 		table[count].size = 0;
7074 	}
7075 
7076 	return B_OK;
7077 }
7078 
7079 
7080 /*!	The type of the \a physicalAddress parameter has changed from void* to
7081 	phys_addr_t.
7082 */
7083 extern "C" area_id
7084 __map_physical_memory_beos(const char* name, void* physicalAddress,
7085 	size_t numBytes, uint32 addressSpec, uint32 protection,
7086 	void** _virtualAddress)
7087 {
7088 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7089 		addressSpec, protection, _virtualAddress);
7090 }
7091 
7092 
7093 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7094 	we meddle with the \a lock parameter to force 32 bit.
7095 */
7096 extern "C" area_id
7097 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7098 	size_t size, uint32 lock, uint32 protection)
7099 {
7100 	switch (lock) {
7101 		case B_NO_LOCK:
7102 			break;
7103 		case B_FULL_LOCK:
7104 		case B_LAZY_LOCK:
7105 			lock = B_32_BIT_FULL_LOCK;
7106 			break;
7107 		case B_CONTIGUOUS:
7108 			lock = B_32_BIT_CONTIGUOUS;
7109 			break;
7110 	}
7111 
7112 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7113 		protection);
7114 }
7115 
7116 
7117 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7118 	"BASE");
7119 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7120 	"map_physical_memory@", "BASE");
7121 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7122 	"BASE");
7123 
7124 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7125 	"get_memory_map@@", "1_ALPHA3");
7126 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7127 	"map_physical_memory@@", "1_ALPHA3");
7128 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7129 	"1_ALPHA3");
7130 
7131 
7132 #else
7133 
7134 
7135 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7136 	"get_memory_map@@", "BASE");
7137 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7138 	"map_physical_memory@@", "BASE");
7139 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7140 	"BASE");
7141 
7142 
7143 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7144