xref: /haiku/src/system/kernel/vm/vm.cpp (revision e82f2e19431b5797fd18c8d7bf0f677080894103)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/ThreadAutoLock.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_priv.h>
53 #include <vm/VMAddressSpace.h>
54 #include <vm/VMArea.h>
55 #include <vm/VMCache.h>
56 
57 #include "VMAddressSpaceLocking.h"
58 #include "VMAnonymousCache.h"
59 #include "VMAnonymousNoSwapCache.h"
60 #include "IORequest.h"
61 
62 
63 //#define TRACE_VM
64 //#define TRACE_FAULTS
65 #ifdef TRACE_VM
66 #	define TRACE(x) dprintf x
67 #else
68 #	define TRACE(x) ;
69 #endif
70 #ifdef TRACE_FAULTS
71 #	define FTRACE(x) dprintf x
72 #else
73 #	define FTRACE(x) ;
74 #endif
75 
76 
77 namespace {
78 
79 class AreaCacheLocking {
80 public:
81 	inline bool Lock(VMCache* lockable)
82 	{
83 		return false;
84 	}
85 
86 	inline void Unlock(VMCache* lockable)
87 	{
88 		vm_area_put_locked_cache(lockable);
89 	}
90 };
91 
92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
93 public:
94 	inline AreaCacheLocker(VMCache* cache = NULL)
95 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
96 	{
97 	}
98 
99 	inline AreaCacheLocker(VMArea* area)
100 		: AutoLocker<VMCache, AreaCacheLocking>()
101 	{
102 		SetTo(area);
103 	}
104 
105 	inline void SetTo(VMCache* cache, bool alreadyLocked)
106 	{
107 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
108 	}
109 
110 	inline void SetTo(VMArea* area)
111 	{
112 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
113 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
114 	}
115 };
116 
117 
118 class VMCacheChainLocker {
119 public:
120 	VMCacheChainLocker()
121 		:
122 		fTopCache(NULL),
123 		fBottomCache(NULL)
124 	{
125 	}
126 
127 	VMCacheChainLocker(VMCache* topCache)
128 		:
129 		fTopCache(topCache),
130 		fBottomCache(topCache)
131 	{
132 	}
133 
134 	~VMCacheChainLocker()
135 	{
136 		Unlock();
137 	}
138 
139 	void SetTo(VMCache* topCache)
140 	{
141 		fTopCache = topCache;
142 		fBottomCache = topCache;
143 
144 		if (topCache != NULL)
145 			topCache->SetUserData(NULL);
146 	}
147 
148 	VMCache* LockSourceCache()
149 	{
150 		if (fBottomCache == NULL || fBottomCache->source == NULL)
151 			return NULL;
152 
153 		VMCache* previousCache = fBottomCache;
154 
155 		fBottomCache = fBottomCache->source;
156 		fBottomCache->Lock();
157 		fBottomCache->AcquireRefLocked();
158 		fBottomCache->SetUserData(previousCache);
159 
160 		return fBottomCache;
161 	}
162 
163 	void LockAllSourceCaches()
164 	{
165 		while (LockSourceCache() != NULL) {
166 		}
167 	}
168 
169 	void Unlock(VMCache* exceptCache = NULL)
170 	{
171 		if (fTopCache == NULL)
172 			return;
173 
174 		// Unlock caches in source -> consumer direction. This is important to
175 		// avoid double-locking and a reversal of locking order in case a cache
176 		// is eligable for merging.
177 		VMCache* cache = fBottomCache;
178 		while (cache != NULL) {
179 			VMCache* nextCache = (VMCache*)cache->UserData();
180 			if (cache != exceptCache)
181 				cache->ReleaseRefAndUnlock(cache != fTopCache);
182 
183 			if (cache == fTopCache)
184 				break;
185 
186 			cache = nextCache;
187 		}
188 
189 		fTopCache = NULL;
190 		fBottomCache = NULL;
191 	}
192 
193 	void UnlockKeepRefs(bool keepTopCacheLocked)
194 	{
195 		if (fTopCache == NULL)
196 			return;
197 
198 		VMCache* nextCache = fBottomCache;
199 		VMCache* cache = NULL;
200 
201 		while (keepTopCacheLocked
202 				? nextCache != fTopCache : cache != fTopCache) {
203 			cache = nextCache;
204 			nextCache = (VMCache*)cache->UserData();
205 			cache->Unlock(cache != fTopCache);
206 		}
207 	}
208 
209 	void RelockCaches(bool topCacheLocked)
210 	{
211 		if (fTopCache == NULL)
212 			return;
213 
214 		VMCache* nextCache = fTopCache;
215 		VMCache* cache = NULL;
216 		if (topCacheLocked) {
217 			cache = nextCache;
218 			nextCache = cache->source;
219 		}
220 
221 		while (cache != fBottomCache && nextCache != NULL) {
222 			VMCache* consumer = cache;
223 			cache = nextCache;
224 			nextCache = cache->source;
225 			cache->Lock();
226 			cache->SetUserData(consumer);
227 		}
228 	}
229 
230 private:
231 	VMCache*	fTopCache;
232 	VMCache*	fBottomCache;
233 };
234 
235 } // namespace
236 
237 
238 // The memory reserve an allocation of the certain priority must not touch.
239 static const size_t kMemoryReserveForPriority[] = {
240 	VM_MEMORY_RESERVE_USER,		// user
241 	VM_MEMORY_RESERVE_SYSTEM,	// system
242 	0							// VIP
243 };
244 
245 
246 ObjectCache* gPageMappingsObjectCache;
247 
248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
249 
250 static off_t sAvailableMemory;
251 static off_t sNeededMemory;
252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
253 static uint32 sPageFaults;
254 
255 static VMPhysicalPageMapper* sPhysicalPageMapper;
256 
257 #if DEBUG_CACHE_LIST
258 
259 struct cache_info {
260 	VMCache*	cache;
261 	addr_t		page_count;
262 	addr_t		committed;
263 };
264 
265 static const int kCacheInfoTableCount = 100 * 1024;
266 static cache_info* sCacheInfoTable;
267 
268 #endif	// DEBUG_CACHE_LIST
269 
270 
271 // function declarations
272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
273 	bool addressSpaceCleanup);
274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
275 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
276 static status_t map_backing_store(VMAddressSpace* addressSpace,
277 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
278 	int protection, int protectionMax, int mapping, uint32 flags,
279 	const virtual_address_restrictions* addressRestrictions, bool kernel,
280 	VMArea** _area, void** _virtualAddress);
281 static void fix_protection(uint32* protection);
282 
283 
284 //	#pragma mark -
285 
286 
287 #if VM_PAGE_FAULT_TRACING
288 
289 namespace VMPageFaultTracing {
290 
291 class PageFaultStart : public AbstractTraceEntry {
292 public:
293 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
294 		:
295 		fAddress(address),
296 		fPC(pc),
297 		fWrite(write),
298 		fUser(user)
299 	{
300 		Initialized();
301 	}
302 
303 	virtual void AddDump(TraceOutput& out)
304 	{
305 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
306 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
307 	}
308 
309 private:
310 	addr_t	fAddress;
311 	addr_t	fPC;
312 	bool	fWrite;
313 	bool	fUser;
314 };
315 
316 
317 // page fault errors
318 enum {
319 	PAGE_FAULT_ERROR_NO_AREA		= 0,
320 	PAGE_FAULT_ERROR_KERNEL_ONLY,
321 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
322 	PAGE_FAULT_ERROR_READ_PROTECTED,
323 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
324 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
325 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
326 };
327 
328 
329 class PageFaultError : public AbstractTraceEntry {
330 public:
331 	PageFaultError(area_id area, status_t error)
332 		:
333 		fArea(area),
334 		fError(error)
335 	{
336 		Initialized();
337 	}
338 
339 	virtual void AddDump(TraceOutput& out)
340 	{
341 		switch (fError) {
342 			case PAGE_FAULT_ERROR_NO_AREA:
343 				out.Print("page fault error: no area");
344 				break;
345 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
346 				out.Print("page fault error: area: %ld, kernel only", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
349 				out.Print("page fault error: area: %ld, write protected",
350 					fArea);
351 				break;
352 			case PAGE_FAULT_ERROR_READ_PROTECTED:
353 				out.Print("page fault error: area: %ld, read protected", fArea);
354 				break;
355 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
356 				out.Print("page fault error: area: %ld, execute protected",
357 					fArea);
358 				break;
359 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
360 				out.Print("page fault error: kernel touching bad user memory");
361 				break;
362 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
363 				out.Print("page fault error: no address space");
364 				break;
365 			default:
366 				out.Print("page fault error: area: %ld, error: %s", fArea,
367 					strerror(fError));
368 				break;
369 		}
370 	}
371 
372 private:
373 	area_id		fArea;
374 	status_t	fError;
375 };
376 
377 
378 class PageFaultDone : public AbstractTraceEntry {
379 public:
380 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
381 			vm_page* page)
382 		:
383 		fArea(area),
384 		fTopCache(topCache),
385 		fCache(cache),
386 		fPage(page)
387 	{
388 		Initialized();
389 	}
390 
391 	virtual void AddDump(TraceOutput& out)
392 	{
393 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
394 			"page: %p", fArea, fTopCache, fCache, fPage);
395 	}
396 
397 private:
398 	area_id		fArea;
399 	VMCache*	fTopCache;
400 	VMCache*	fCache;
401 	vm_page*	fPage;
402 };
403 
404 }	// namespace VMPageFaultTracing
405 
406 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
407 #else
408 #	define TPF(x) ;
409 #endif	// VM_PAGE_FAULT_TRACING
410 
411 
412 //	#pragma mark -
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 increment_page_wired_count(vm_page* page)
419 {
420 	if (!page->IsMapped())
421 		atomic_add(&gMappedPagesCount, 1);
422 	page->IncrementWiredCount();
423 }
424 
425 
426 /*!	The page's cache must be locked.
427 */
428 static inline void
429 decrement_page_wired_count(vm_page* page)
430 {
431 	page->DecrementWiredCount();
432 	if (!page->IsMapped())
433 		atomic_add(&gMappedPagesCount, -1);
434 }
435 
436 
437 static inline addr_t
438 virtual_page_address(VMArea* area, vm_page* page)
439 {
440 	return area->Base()
441 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
442 }
443 
444 
445 //! You need to have the address space locked when calling this function
446 static VMArea*
447 lookup_area(VMAddressSpace* addressSpace, area_id id)
448 {
449 	VMAreas::ReadLock();
450 
451 	VMArea* area = VMAreas::LookupLocked(id);
452 	if (area != NULL && area->address_space != addressSpace)
453 		area = NULL;
454 
455 	VMAreas::ReadUnlock();
456 
457 	return area;
458 }
459 
460 
461 static status_t
462 allocate_area_page_protections(VMArea* area)
463 {
464 	// In the page protections we store only the three user protections,
465 	// so we use 4 bits per page.
466 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
467 	area->page_protections = (uint8*)malloc_etc(bytes,
468 		area->address_space == VMAddressSpace::Kernel()
469 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
470 	if (area->page_protections == NULL)
471 		return B_NO_MEMORY;
472 
473 	// init the page protections for all pages to that of the area
474 	uint32 areaProtection = area->protection
475 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
476 	memset(area->page_protections, areaProtection | (areaProtection << 4),
477 		bytes);
478 	return B_OK;
479 }
480 
481 
482 static inline void
483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
484 {
485 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
486 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
487 	uint8& entry = area->page_protections[pageIndex / 2];
488 	if (pageIndex % 2 == 0)
489 		entry = (entry & 0xf0) | protection;
490 	else
491 		entry = (entry & 0x0f) | (protection << 4);
492 }
493 
494 
495 static inline uint32
496 get_area_page_protection(VMArea* area, addr_t pageAddress)
497 {
498 	if (area->page_protections == NULL)
499 		return area->protection;
500 
501 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
502 	uint32 protection = area->page_protections[pageIndex / 2];
503 	if (pageIndex % 2 == 0)
504 		protection &= 0x0f;
505 	else
506 		protection >>= 4;
507 
508 	uint32 kernelProtection = 0;
509 	if ((protection & B_READ_AREA) != 0)
510 		kernelProtection |= B_KERNEL_READ_AREA;
511 	if ((protection & B_WRITE_AREA) != 0)
512 		kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 	// If this is a kernel area we return only the kernel flags.
515 	if (area->address_space == VMAddressSpace::Kernel())
516 		return kernelProtection;
517 
518 	return protection | kernelProtection;
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
811 			&addressRestrictions, kernel, &secondArea, NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection,
925 	int protectionMax, int mapping,
926 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
927 	bool kernel, VMArea** _area, void** _virtualAddress)
928 {
929 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
930 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
931 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
932 		addressSpace, cache, addressRestrictions->address, offset, size,
933 		addressRestrictions->address_specification, wiring, protection,
934 		protectionMax, _area, areaName));
935 	cache->AssertLocked();
936 
937 	if (size == 0) {
938 #if KDEBUG
939 		panic("map_backing_store(): called with size=0 for area '%s'!",
940 			areaName);
941 #endif
942 		return B_BAD_VALUE;
943 	}
944 	if (offset < 0)
945 		return B_BAD_VALUE;
946 
947 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
948 		| HEAP_DONT_LOCK_KERNEL_SPACE;
949 	int priority;
950 	if (addressSpace != VMAddressSpace::Kernel()) {
951 		priority = VM_PRIORITY_USER;
952 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
953 		priority = VM_PRIORITY_VIP;
954 		allocationFlags |= HEAP_PRIORITY_VIP;
955 	} else
956 		priority = VM_PRIORITY_SYSTEM;
957 
958 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
959 		allocationFlags);
960 	if (mapping != REGION_PRIVATE_MAP)
961 		area->protection_max = protectionMax & B_USER_PROTECTION;
962 	if (area == NULL)
963 		return B_NO_MEMORY;
964 
965 	status_t status;
966 
967 	// if this is a private map, we need to create a new cache
968 	// to handle the private copies of pages as they are written to
969 	VMCache* sourceCache = cache;
970 	if (mapping == REGION_PRIVATE_MAP) {
971 		VMCache* newCache;
972 
973 		// create an anonymous cache
974 		status = VMCacheFactory::CreateAnonymousCache(newCache,
975 			(protection & B_STACK_AREA) != 0
976 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
977 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
978 		if (status != B_OK)
979 			goto err1;
980 
981 		newCache->Lock();
982 		newCache->temporary = 1;
983 		newCache->virtual_base = offset;
984 		newCache->virtual_end = offset + size;
985 
986 		cache->AddConsumer(newCache);
987 
988 		cache = newCache;
989 	}
990 
991 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
992 		status = cache->SetMinimalCommitment(size, priority);
993 		if (status != B_OK)
994 			goto err2;
995 	}
996 
997 	// check to see if this address space has entered DELETE state
998 	if (addressSpace->IsBeingDeleted()) {
999 		// okay, someone is trying to delete this address space now, so we can't
1000 		// insert the area, so back out
1001 		status = B_BAD_TEAM_ID;
1002 		goto err2;
1003 	}
1004 
1005 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1006 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1007 		status = unmap_address_range(addressSpace,
1008 			(addr_t)addressRestrictions->address, size, kernel);
1009 		if (status != B_OK)
1010 			goto err2;
1011 	}
1012 
1013 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1014 		allocationFlags, _virtualAddress);
1015 	if (status == B_NO_MEMORY
1016 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1017 		// Due to how many locks are held, we cannot wait here for space to be
1018 		// freed up, but we can at least notify the low_resource handler.
1019 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1020 	}
1021 	if (status != B_OK)
1022 		goto err2;
1023 
1024 	// attach the cache to the area
1025 	area->cache = cache;
1026 	area->cache_offset = offset;
1027 
1028 	// point the cache back to the area
1029 	cache->InsertAreaLocked(area);
1030 	if (mapping == REGION_PRIVATE_MAP)
1031 		cache->Unlock();
1032 
1033 	// insert the area in the global areas map
1034 	VMAreas::Insert(area);
1035 
1036 	// grab a ref to the address space (the area holds this)
1037 	addressSpace->Get();
1038 
1039 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1040 //		cache, sourceCache, areaName, area);
1041 
1042 	*_area = area;
1043 	return B_OK;
1044 
1045 err2:
1046 	if (mapping == REGION_PRIVATE_MAP) {
1047 		// We created this cache, so we must delete it again. Note, that we
1048 		// need to temporarily unlock the source cache or we'll otherwise
1049 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1050 		sourceCache->Unlock();
1051 		cache->ReleaseRefAndUnlock();
1052 		sourceCache->Lock();
1053 	}
1054 err1:
1055 	addressSpace->DeleteArea(area, allocationFlags);
1056 	return status;
1057 }
1058 
1059 
1060 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1061 	  locker1, locker2).
1062 */
1063 template<typename LockerType1, typename LockerType2>
1064 static inline bool
1065 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1066 {
1067 	area->cache->AssertLocked();
1068 
1069 	VMAreaUnwiredWaiter waiter;
1070 	if (!area->AddWaiterIfWired(&waiter))
1071 		return false;
1072 
1073 	// unlock everything and wait
1074 	if (locker1 != NULL)
1075 		locker1->Unlock();
1076 	if (locker2 != NULL)
1077 		locker2->Unlock();
1078 
1079 	waiter.waitEntry.Wait();
1080 
1081 	return true;
1082 }
1083 
1084 
1085 /*!	Checks whether the given area has any wired ranges intersecting with the
1086 	specified range and waits, if so.
1087 
1088 	When it has to wait, the function calls \c Unlock() on both \a locker1
1089 	and \a locker2, if given.
1090 	The area's top cache must be locked and must be unlocked as a side effect
1091 	of calling \c Unlock() on either \a locker1 or \a locker2.
1092 
1093 	If the function does not have to wait it does not modify or unlock any
1094 	object.
1095 
1096 	\param area The area to be checked.
1097 	\param base The base address of the range to check.
1098 	\param size The size of the address range to check.
1099 	\param locker1 An object to be unlocked when before starting to wait (may
1100 		be \c NULL).
1101 	\param locker2 An object to be unlocked when before starting to wait (may
1102 		be \c NULL).
1103 	\return \c true, if the function had to wait, \c false otherwise.
1104 */
1105 template<typename LockerType1, typename LockerType2>
1106 static inline bool
1107 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1108 	LockerType1* locker1, LockerType2* locker2)
1109 {
1110 	area->cache->AssertLocked();
1111 
1112 	VMAreaUnwiredWaiter waiter;
1113 	if (!area->AddWaiterIfWired(&waiter, base, size))
1114 		return false;
1115 
1116 	// unlock everything and wait
1117 	if (locker1 != NULL)
1118 		locker1->Unlock();
1119 	if (locker2 != NULL)
1120 		locker2->Unlock();
1121 
1122 	waiter.waitEntry.Wait();
1123 
1124 	return true;
1125 }
1126 
1127 
1128 /*!	Checks whether the given address space has any wired ranges intersecting
1129 	with the specified range and waits, if so.
1130 
1131 	Similar to wait_if_area_range_is_wired(), with the following differences:
1132 	- All areas intersecting with the range are checked (respectively all until
1133 	  one is found that contains a wired range intersecting with the given
1134 	  range).
1135 	- The given address space must at least be read-locked and must be unlocked
1136 	  when \c Unlock() is called on \a locker.
1137 	- None of the areas' caches are allowed to be locked.
1138 */
1139 template<typename LockerType>
1140 static inline bool
1141 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1142 	size_t size, LockerType* locker)
1143 {
1144 	for (VMAddressSpace::AreaRangeIterator it
1145 		= addressSpace->GetAreaRangeIterator(base, size);
1146 			VMArea* area = it.Next();) {
1147 
1148 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1149 
1150 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1151 			return true;
1152 	}
1153 
1154 	return false;
1155 }
1156 
1157 
1158 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1159 	It must be called in a situation where the kernel address space may be
1160 	locked.
1161 */
1162 status_t
1163 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1164 {
1165 	AddressSpaceReadLocker locker;
1166 	VMArea* area;
1167 	status_t status = locker.SetFromArea(id, area);
1168 	if (status != B_OK)
1169 		return status;
1170 
1171 	if (area->page_protections == NULL) {
1172 		status = allocate_area_page_protections(area);
1173 		if (status != B_OK)
1174 			return status;
1175 	}
1176 
1177 	*cookie = (void*)area;
1178 	return B_OK;
1179 }
1180 
1181 
1182 /*!	This is a debug helper function that can only be used with very specific
1183 	use cases.
1184 	Sets protection for the given address range to the protection specified.
1185 	If \a protection is 0 then the involved pages will be marked non-present
1186 	in the translation map to cause a fault on access. The pages aren't
1187 	actually unmapped however so that they can be marked present again with
1188 	additional calls to this function. For this to work the area must be
1189 	fully locked in memory so that the pages aren't otherwise touched.
1190 	This function does not lock the kernel address space and needs to be
1191 	supplied with a \a cookie retrieved from a successful call to
1192 	vm_prepare_kernel_area_debug_protection().
1193 */
1194 status_t
1195 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1196 	uint32 protection)
1197 {
1198 	// check address range
1199 	addr_t address = (addr_t)_address;
1200 	size = PAGE_ALIGN(size);
1201 
1202 	if ((address % B_PAGE_SIZE) != 0
1203 		|| (addr_t)address + size < (addr_t)address
1204 		|| !IS_KERNEL_ADDRESS(address)
1205 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1206 		return B_BAD_VALUE;
1207 	}
1208 
1209 	// Translate the kernel protection to user protection as we only store that.
1210 	if ((protection & B_KERNEL_READ_AREA) != 0)
1211 		protection |= B_READ_AREA;
1212 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1213 		protection |= B_WRITE_AREA;
1214 
1215 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1216 	VMTranslationMap* map = addressSpace->TranslationMap();
1217 	VMArea* area = (VMArea*)cookie;
1218 
1219 	addr_t offset = address - area->Base();
1220 	if (area->Size() - offset < size) {
1221 		panic("protect range not fully within supplied area");
1222 		return B_BAD_VALUE;
1223 	}
1224 
1225 	if (area->page_protections == NULL) {
1226 		panic("area has no page protections");
1227 		return B_BAD_VALUE;
1228 	}
1229 
1230 	// Invalidate the mapping entries so any access to them will fault or
1231 	// restore the mapping entries unchanged so that lookup will success again.
1232 	map->Lock();
1233 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1234 	map->Unlock();
1235 
1236 	// And set the proper page protections so that the fault case will actually
1237 	// fail and not simply try to map a new page.
1238 	for (addr_t pageAddress = address; pageAddress < address + size;
1239 			pageAddress += B_PAGE_SIZE) {
1240 		set_area_page_protection(area, pageAddress, protection);
1241 	}
1242 
1243 	return B_OK;
1244 }
1245 
1246 
1247 status_t
1248 vm_block_address_range(const char* name, void* address, addr_t size)
1249 {
1250 	if (!arch_vm_supports_protection(0))
1251 		return B_NOT_SUPPORTED;
1252 
1253 	AddressSpaceWriteLocker locker;
1254 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1255 	if (status != B_OK)
1256 		return status;
1257 
1258 	VMAddressSpace* addressSpace = locker.AddressSpace();
1259 
1260 	// create an anonymous cache
1261 	VMCache* cache;
1262 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1263 		VM_PRIORITY_SYSTEM);
1264 	if (status != B_OK)
1265 		return status;
1266 
1267 	cache->temporary = 1;
1268 	cache->virtual_end = size;
1269 	cache->Lock();
1270 
1271 	VMArea* area;
1272 	virtual_address_restrictions addressRestrictions = {};
1273 	addressRestrictions.address = address;
1274 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1275 	status = map_backing_store(addressSpace, cache, 0, name, size,
1276 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1277 		true, &area, NULL);
1278 	if (status != B_OK) {
1279 		cache->ReleaseRefAndUnlock();
1280 		return status;
1281 	}
1282 
1283 	cache->Unlock();
1284 	area->cache_type = CACHE_TYPE_RAM;
1285 	return area->id;
1286 }
1287 
1288 
1289 status_t
1290 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1291 {
1292 	AddressSpaceWriteLocker locker(team);
1293 	if (!locker.IsLocked())
1294 		return B_BAD_TEAM_ID;
1295 
1296 	VMAddressSpace* addressSpace = locker.AddressSpace();
1297 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1298 		addressSpace == VMAddressSpace::Kernel()
1299 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1300 }
1301 
1302 
1303 status_t
1304 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1305 	addr_t size, uint32 flags)
1306 {
1307 	if (size == 0)
1308 		return B_BAD_VALUE;
1309 
1310 	AddressSpaceWriteLocker locker(team);
1311 	if (!locker.IsLocked())
1312 		return B_BAD_TEAM_ID;
1313 
1314 	virtual_address_restrictions addressRestrictions = {};
1315 	addressRestrictions.address = *_address;
1316 	addressRestrictions.address_specification = addressSpec;
1317 	VMAddressSpace* addressSpace = locker.AddressSpace();
1318 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1319 		addressSpace == VMAddressSpace::Kernel()
1320 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1321 		_address);
1322 }
1323 
1324 
1325 area_id
1326 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1327 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1328 	const virtual_address_restrictions* virtualAddressRestrictions,
1329 	const physical_address_restrictions* physicalAddressRestrictions,
1330 	bool kernel, void** _address)
1331 {
1332 	VMArea* area;
1333 	VMCache* cache;
1334 	vm_page* page = NULL;
1335 	bool isStack = (protection & B_STACK_AREA) != 0;
1336 	page_num_t guardPages;
1337 	bool canOvercommit = false;
1338 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1339 		? VM_PAGE_ALLOC_CLEAR : 0;
1340 
1341 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1342 		team, name, size));
1343 
1344 	size = PAGE_ALIGN(size);
1345 	guardSize = PAGE_ALIGN(guardSize);
1346 	guardPages = guardSize / B_PAGE_SIZE;
1347 
1348 	if (size == 0 || size < guardSize)
1349 		return B_BAD_VALUE;
1350 	if (!arch_vm_supports_protection(protection))
1351 		return B_NOT_SUPPORTED;
1352 
1353 	if (team == B_CURRENT_TEAM)
1354 		team = VMAddressSpace::CurrentID();
1355 	if (team < 0)
1356 		return B_BAD_TEAM_ID;
1357 
1358 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1359 		canOvercommit = true;
1360 
1361 #ifdef DEBUG_KERNEL_STACKS
1362 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1363 		isStack = true;
1364 #endif
1365 
1366 	// check parameters
1367 	switch (virtualAddressRestrictions->address_specification) {
1368 		case B_ANY_ADDRESS:
1369 		case B_EXACT_ADDRESS:
1370 		case B_BASE_ADDRESS:
1371 		case B_ANY_KERNEL_ADDRESS:
1372 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1373 		case B_RANDOMIZED_ANY_ADDRESS:
1374 		case B_RANDOMIZED_BASE_ADDRESS:
1375 			break;
1376 
1377 		default:
1378 			return B_BAD_VALUE;
1379 	}
1380 
1381 	// If low or high physical address restrictions are given, we force
1382 	// B_CONTIGUOUS wiring, since only then we'll use
1383 	// vm_page_allocate_page_run() which deals with those restrictions.
1384 	if (physicalAddressRestrictions->low_address != 0
1385 		|| physicalAddressRestrictions->high_address != 0) {
1386 		wiring = B_CONTIGUOUS;
1387 	}
1388 
1389 	physical_address_restrictions stackPhysicalRestrictions;
1390 	bool doReserveMemory = false;
1391 	switch (wiring) {
1392 		case B_NO_LOCK:
1393 			break;
1394 		case B_FULL_LOCK:
1395 		case B_LAZY_LOCK:
1396 		case B_CONTIGUOUS:
1397 			doReserveMemory = true;
1398 			break;
1399 		case B_ALREADY_WIRED:
1400 			break;
1401 		case B_LOMEM:
1402 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1403 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1404 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1405 			wiring = B_CONTIGUOUS;
1406 			doReserveMemory = true;
1407 			break;
1408 		case B_32_BIT_FULL_LOCK:
1409 			if (B_HAIKU_PHYSICAL_BITS <= 32
1410 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1411 				wiring = B_FULL_LOCK;
1412 				doReserveMemory = true;
1413 				break;
1414 			}
1415 			// TODO: We don't really support this mode efficiently. Just fall
1416 			// through for now ...
1417 		case B_32_BIT_CONTIGUOUS:
1418 			#if B_HAIKU_PHYSICAL_BITS > 32
1419 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1420 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1421 					stackPhysicalRestrictions.high_address
1422 						= (phys_addr_t)1 << 32;
1423 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1424 				}
1425 			#endif
1426 			wiring = B_CONTIGUOUS;
1427 			doReserveMemory = true;
1428 			break;
1429 		default:
1430 			return B_BAD_VALUE;
1431 	}
1432 
1433 	// Optimization: For a single-page contiguous allocation without low/high
1434 	// memory restriction B_FULL_LOCK wiring suffices.
1435 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1436 		&& physicalAddressRestrictions->low_address == 0
1437 		&& physicalAddressRestrictions->high_address == 0) {
1438 		wiring = B_FULL_LOCK;
1439 	}
1440 
1441 	// For full lock or contiguous areas we're also going to map the pages and
1442 	// thus need to reserve pages for the mapping backend upfront.
1443 	addr_t reservedMapPages = 0;
1444 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1445 		AddressSpaceWriteLocker locker;
1446 		status_t status = locker.SetTo(team);
1447 		if (status != B_OK)
1448 			return status;
1449 
1450 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1451 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1452 	}
1453 
1454 	int priority;
1455 	if (team != VMAddressSpace::KernelID())
1456 		priority = VM_PRIORITY_USER;
1457 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1458 		priority = VM_PRIORITY_VIP;
1459 	else
1460 		priority = VM_PRIORITY_SYSTEM;
1461 
1462 	// Reserve memory before acquiring the address space lock. This reduces the
1463 	// chances of failure, since while holding the write lock to the address
1464 	// space (if it is the kernel address space that is), the low memory handler
1465 	// won't be able to free anything for us.
1466 	addr_t reservedMemory = 0;
1467 	if (doReserveMemory) {
1468 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1469 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1470 			return B_NO_MEMORY;
1471 		reservedMemory = size;
1472 		// TODO: We don't reserve the memory for the pages for the page
1473 		// directories/tables. We actually need to do since we currently don't
1474 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1475 		// there are actually less physical pages than there should be, which
1476 		// can get the VM into trouble in low memory situations.
1477 	}
1478 
1479 	AddressSpaceWriteLocker locker;
1480 	VMAddressSpace* addressSpace;
1481 	status_t status;
1482 
1483 	// For full lock areas reserve the pages before locking the address
1484 	// space. E.g. block caches can't release their memory while we hold the
1485 	// address space lock.
1486 	page_num_t reservedPages = reservedMapPages;
1487 	if (wiring == B_FULL_LOCK)
1488 		reservedPages += size / B_PAGE_SIZE;
1489 
1490 	vm_page_reservation reservation;
1491 	if (reservedPages > 0) {
1492 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1493 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1494 					priority)) {
1495 				reservedPages = 0;
1496 				status = B_WOULD_BLOCK;
1497 				goto err0;
1498 			}
1499 		} else
1500 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1501 	}
1502 
1503 	if (wiring == B_CONTIGUOUS) {
1504 		// we try to allocate the page run here upfront as this may easily
1505 		// fail for obvious reasons
1506 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1507 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1508 		if (page == NULL) {
1509 			status = B_NO_MEMORY;
1510 			goto err0;
1511 		}
1512 	}
1513 
1514 	// Lock the address space and, if B_EXACT_ADDRESS and
1515 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1516 	// is not wired.
1517 	do {
1518 		status = locker.SetTo(team);
1519 		if (status != B_OK)
1520 			goto err1;
1521 
1522 		addressSpace = locker.AddressSpace();
1523 	} while (virtualAddressRestrictions->address_specification
1524 			== B_EXACT_ADDRESS
1525 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1526 		&& wait_if_address_range_is_wired(addressSpace,
1527 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1528 
1529 	// create an anonymous cache
1530 	// if it's a stack, make sure that two pages are available at least
1531 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1532 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1533 		wiring == B_NO_LOCK, priority);
1534 	if (status != B_OK)
1535 		goto err1;
1536 
1537 	cache->temporary = 1;
1538 	cache->virtual_end = size;
1539 	cache->committed_size = reservedMemory;
1540 		// TODO: This should be done via a method.
1541 	reservedMemory = 0;
1542 
1543 	cache->Lock();
1544 
1545 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1546 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1547 		virtualAddressRestrictions, kernel, &area, _address);
1548 
1549 	if (status != B_OK) {
1550 		cache->ReleaseRefAndUnlock();
1551 		goto err1;
1552 	}
1553 
1554 	locker.DegradeToReadLock();
1555 
1556 	switch (wiring) {
1557 		case B_NO_LOCK:
1558 		case B_LAZY_LOCK:
1559 			// do nothing - the pages are mapped in as needed
1560 			break;
1561 
1562 		case B_FULL_LOCK:
1563 		{
1564 			// Allocate and map all pages for this area
1565 
1566 			off_t offset = 0;
1567 			for (addr_t address = area->Base();
1568 					address < area->Base() + (area->Size() - 1);
1569 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1570 #ifdef DEBUG_KERNEL_STACKS
1571 #	ifdef STACK_GROWS_DOWNWARDS
1572 				if (isStack && address < area->Base()
1573 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1574 #	else
1575 				if (isStack && address >= area->Base() + area->Size()
1576 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1577 #	endif
1578 					continue;
1579 #endif
1580 				vm_page* page = vm_page_allocate_page(&reservation,
1581 					PAGE_STATE_WIRED | pageAllocFlags);
1582 				cache->InsertPage(page, offset);
1583 				map_page(area, page, address, protection, &reservation);
1584 
1585 				DEBUG_PAGE_ACCESS_END(page);
1586 			}
1587 
1588 			break;
1589 		}
1590 
1591 		case B_ALREADY_WIRED:
1592 		{
1593 			// The pages should already be mapped. This is only really useful
1594 			// during boot time. Find the appropriate vm_page objects and stick
1595 			// them in the cache object.
1596 			VMTranslationMap* map = addressSpace->TranslationMap();
1597 			off_t offset = 0;
1598 
1599 			if (!gKernelStartup)
1600 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1601 
1602 			map->Lock();
1603 
1604 			for (addr_t virtualAddress = area->Base();
1605 					virtualAddress < area->Base() + (area->Size() - 1);
1606 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1607 				phys_addr_t physicalAddress;
1608 				uint32 flags;
1609 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1610 				if (status < B_OK) {
1611 					panic("looking up mapping failed for va 0x%lx\n",
1612 						virtualAddress);
1613 				}
1614 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1615 				if (page == NULL) {
1616 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1617 						"\n", physicalAddress);
1618 				}
1619 
1620 				DEBUG_PAGE_ACCESS_START(page);
1621 
1622 				cache->InsertPage(page, offset);
1623 				increment_page_wired_count(page);
1624 				vm_page_set_state(page, PAGE_STATE_WIRED);
1625 				page->busy = false;
1626 
1627 				DEBUG_PAGE_ACCESS_END(page);
1628 			}
1629 
1630 			map->Unlock();
1631 			break;
1632 		}
1633 
1634 		case B_CONTIGUOUS:
1635 		{
1636 			// We have already allocated our continuous pages run, so we can now
1637 			// just map them in the address space
1638 			VMTranslationMap* map = addressSpace->TranslationMap();
1639 			phys_addr_t physicalAddress
1640 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1641 			addr_t virtualAddress = area->Base();
1642 			off_t offset = 0;
1643 
1644 			map->Lock();
1645 
1646 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1647 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1648 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1649 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1650 				if (page == NULL)
1651 					panic("couldn't lookup physical page just allocated\n");
1652 
1653 				status = map->Map(virtualAddress, physicalAddress, protection,
1654 					area->MemoryType(), &reservation);
1655 				if (status < B_OK)
1656 					panic("couldn't map physical page in page run\n");
1657 
1658 				cache->InsertPage(page, offset);
1659 				increment_page_wired_count(page);
1660 
1661 				DEBUG_PAGE_ACCESS_END(page);
1662 			}
1663 
1664 			map->Unlock();
1665 			break;
1666 		}
1667 
1668 		default:
1669 			break;
1670 	}
1671 
1672 	cache->Unlock();
1673 
1674 	if (reservedPages > 0)
1675 		vm_page_unreserve_pages(&reservation);
1676 
1677 	TRACE(("vm_create_anonymous_area: done\n"));
1678 
1679 	area->cache_type = CACHE_TYPE_RAM;
1680 	return area->id;
1681 
1682 err1:
1683 	if (wiring == B_CONTIGUOUS) {
1684 		// we had reserved the area space upfront...
1685 		phys_addr_t pageNumber = page->physical_page_number;
1686 		int32 i;
1687 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1688 			page = vm_lookup_page(pageNumber);
1689 			if (page == NULL)
1690 				panic("couldn't lookup physical page just allocated\n");
1691 
1692 			vm_page_set_state(page, PAGE_STATE_FREE);
1693 		}
1694 	}
1695 
1696 err0:
1697 	if (reservedPages > 0)
1698 		vm_page_unreserve_pages(&reservation);
1699 	if (reservedMemory > 0)
1700 		vm_unreserve_memory(reservedMemory);
1701 
1702 	return status;
1703 }
1704 
1705 
1706 area_id
1707 vm_map_physical_memory(team_id team, const char* name, void** _address,
1708 	uint32 addressSpec, addr_t size, uint32 protection,
1709 	phys_addr_t physicalAddress, bool alreadyWired)
1710 {
1711 	VMArea* area;
1712 	VMCache* cache;
1713 	addr_t mapOffset;
1714 
1715 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1716 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1717 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1718 		addressSpec, size, protection, physicalAddress));
1719 
1720 	if (!arch_vm_supports_protection(protection))
1721 		return B_NOT_SUPPORTED;
1722 
1723 	AddressSpaceWriteLocker locker(team);
1724 	if (!locker.IsLocked())
1725 		return B_BAD_TEAM_ID;
1726 
1727 	// if the physical address is somewhat inside a page,
1728 	// move the actual area down to align on a page boundary
1729 	mapOffset = physicalAddress % B_PAGE_SIZE;
1730 	size += mapOffset;
1731 	physicalAddress -= mapOffset;
1732 
1733 	size = PAGE_ALIGN(size);
1734 
1735 	// create a device cache
1736 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1737 	if (status != B_OK)
1738 		return status;
1739 
1740 	cache->virtual_end = size;
1741 
1742 	cache->Lock();
1743 
1744 	virtual_address_restrictions addressRestrictions = {};
1745 	addressRestrictions.address = *_address;
1746 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1747 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1748 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1749 		true, &area, _address);
1750 
1751 	if (status < B_OK)
1752 		cache->ReleaseRefLocked();
1753 
1754 	cache->Unlock();
1755 
1756 	if (status == B_OK) {
1757 		// set requested memory type -- use uncached, if not given
1758 		uint32 memoryType = addressSpec & B_MTR_MASK;
1759 		if (memoryType == 0)
1760 			memoryType = B_MTR_UC;
1761 
1762 		area->SetMemoryType(memoryType);
1763 
1764 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1765 		if (status != B_OK)
1766 			delete_area(locker.AddressSpace(), area, false);
1767 	}
1768 
1769 	if (status != B_OK)
1770 		return status;
1771 
1772 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1773 
1774 	if (alreadyWired) {
1775 		// The area is already mapped, but possibly not with the right
1776 		// memory type.
1777 		map->Lock();
1778 		map->ProtectArea(area, area->protection);
1779 		map->Unlock();
1780 	} else {
1781 		// Map the area completely.
1782 
1783 		// reserve pages needed for the mapping
1784 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1785 			area->Base() + (size - 1));
1786 		vm_page_reservation reservation;
1787 		vm_page_reserve_pages(&reservation, reservePages,
1788 			team == VMAddressSpace::KernelID()
1789 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1790 
1791 		map->Lock();
1792 
1793 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1794 			map->Map(area->Base() + offset, physicalAddress + offset,
1795 				protection, area->MemoryType(), &reservation);
1796 		}
1797 
1798 		map->Unlock();
1799 
1800 		vm_page_unreserve_pages(&reservation);
1801 	}
1802 
1803 	// modify the pointer returned to be offset back into the new area
1804 	// the same way the physical address in was offset
1805 	*_address = (void*)((addr_t)*_address + mapOffset);
1806 
1807 	area->cache_type = CACHE_TYPE_DEVICE;
1808 	return area->id;
1809 }
1810 
1811 
1812 /*!	Don't use!
1813 	TODO: This function was introduced to map physical page vecs to
1814 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1815 	use a device cache and does not track vm_page::wired_count!
1816 */
1817 area_id
1818 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1819 	uint32 addressSpec, addr_t* _size, uint32 protection,
1820 	struct generic_io_vec* vecs, uint32 vecCount)
1821 {
1822 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1823 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1824 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1825 		addressSpec, _size, protection, vecs, vecCount));
1826 
1827 	if (!arch_vm_supports_protection(protection)
1828 		|| (addressSpec & B_MTR_MASK) != 0) {
1829 		return B_NOT_SUPPORTED;
1830 	}
1831 
1832 	AddressSpaceWriteLocker locker(team);
1833 	if (!locker.IsLocked())
1834 		return B_BAD_TEAM_ID;
1835 
1836 	if (vecCount == 0)
1837 		return B_BAD_VALUE;
1838 
1839 	addr_t size = 0;
1840 	for (uint32 i = 0; i < vecCount; i++) {
1841 		if (vecs[i].base % B_PAGE_SIZE != 0
1842 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1843 			return B_BAD_VALUE;
1844 		}
1845 
1846 		size += vecs[i].length;
1847 	}
1848 
1849 	// create a device cache
1850 	VMCache* cache;
1851 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1852 	if (result != B_OK)
1853 		return result;
1854 
1855 	cache->virtual_end = size;
1856 
1857 	cache->Lock();
1858 
1859 	VMArea* area;
1860 	virtual_address_restrictions addressRestrictions = {};
1861 	addressRestrictions.address = *_address;
1862 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1863 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1864 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1865 		&addressRestrictions, true, &area, _address);
1866 
1867 	if (result != B_OK)
1868 		cache->ReleaseRefLocked();
1869 
1870 	cache->Unlock();
1871 
1872 	if (result != B_OK)
1873 		return result;
1874 
1875 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1876 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1877 		area->Base() + (size - 1));
1878 
1879 	vm_page_reservation reservation;
1880 	vm_page_reserve_pages(&reservation, reservePages,
1881 			team == VMAddressSpace::KernelID()
1882 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1883 	map->Lock();
1884 
1885 	uint32 vecIndex = 0;
1886 	size_t vecOffset = 0;
1887 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1888 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1889 			vecOffset = 0;
1890 			vecIndex++;
1891 		}
1892 
1893 		if (vecIndex >= vecCount)
1894 			break;
1895 
1896 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1897 			protection, area->MemoryType(), &reservation);
1898 
1899 		vecOffset += B_PAGE_SIZE;
1900 	}
1901 
1902 	map->Unlock();
1903 	vm_page_unreserve_pages(&reservation);
1904 
1905 	if (_size != NULL)
1906 		*_size = size;
1907 
1908 	area->cache_type = CACHE_TYPE_DEVICE;
1909 	return area->id;
1910 }
1911 
1912 
1913 area_id
1914 vm_create_null_area(team_id team, const char* name, void** address,
1915 	uint32 addressSpec, addr_t size, uint32 flags)
1916 {
1917 	size = PAGE_ALIGN(size);
1918 
1919 	// Lock the address space and, if B_EXACT_ADDRESS and
1920 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1921 	// is not wired.
1922 	AddressSpaceWriteLocker locker;
1923 	do {
1924 		if (locker.SetTo(team) != B_OK)
1925 			return B_BAD_TEAM_ID;
1926 	} while (addressSpec == B_EXACT_ADDRESS
1927 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1928 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1929 			(addr_t)*address, size, &locker));
1930 
1931 	// create a null cache
1932 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1933 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1934 	VMCache* cache;
1935 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1936 	if (status != B_OK)
1937 		return status;
1938 
1939 	cache->temporary = 1;
1940 	cache->virtual_end = size;
1941 
1942 	cache->Lock();
1943 
1944 	VMArea* area;
1945 	virtual_address_restrictions addressRestrictions = {};
1946 	addressRestrictions.address = *address;
1947 	addressRestrictions.address_specification = addressSpec;
1948 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1949 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1950 		REGION_NO_PRIVATE_MAP, flags,
1951 		&addressRestrictions, true, &area, address);
1952 
1953 	if (status < B_OK) {
1954 		cache->ReleaseRefAndUnlock();
1955 		return status;
1956 	}
1957 
1958 	cache->Unlock();
1959 
1960 	area->cache_type = CACHE_TYPE_NULL;
1961 	return area->id;
1962 }
1963 
1964 
1965 /*!	Creates the vnode cache for the specified \a vnode.
1966 	The vnode has to be marked busy when calling this function.
1967 */
1968 status_t
1969 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1970 {
1971 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1972 }
1973 
1974 
1975 /*!	\a cache must be locked. The area's address space must be read-locked.
1976 */
1977 static void
1978 pre_map_area_pages(VMArea* area, VMCache* cache,
1979 	vm_page_reservation* reservation)
1980 {
1981 	addr_t baseAddress = area->Base();
1982 	addr_t cacheOffset = area->cache_offset;
1983 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1984 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1985 
1986 	for (VMCachePagesTree::Iterator it
1987 				= cache->pages.GetIterator(firstPage, true, true);
1988 			vm_page* page = it.Next();) {
1989 		if (page->cache_offset >= endPage)
1990 			break;
1991 
1992 		// skip busy and inactive pages
1993 		if (page->busy || page->usage_count == 0)
1994 			continue;
1995 
1996 		DEBUG_PAGE_ACCESS_START(page);
1997 		map_page(area, page,
1998 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1999 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2000 		DEBUG_PAGE_ACCESS_END(page);
2001 	}
2002 }
2003 
2004 
2005 /*!	Will map the file specified by \a fd to an area in memory.
2006 	The file will be mirrored beginning at the specified \a offset. The
2007 	\a offset and \a size arguments have to be page aligned.
2008 */
2009 static area_id
2010 _vm_map_file(team_id team, const char* name, void** _address,
2011 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2012 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2013 {
2014 	// TODO: for binary files, we want to make sure that they get the
2015 	//	copy of a file at a given time, ie. later changes should not
2016 	//	make it into the mapped copy -- this will need quite some changes
2017 	//	to be done in a nice way
2018 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2019 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2020 
2021 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2022 	size = PAGE_ALIGN(size);
2023 
2024 	if (mapping == REGION_NO_PRIVATE_MAP)
2025 		protection |= B_SHARED_AREA;
2026 	if (addressSpec != B_EXACT_ADDRESS)
2027 		unmapAddressRange = false;
2028 
2029 	if (fd < 0) {
2030 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2031 		virtual_address_restrictions virtualRestrictions = {};
2032 		virtualRestrictions.address = *_address;
2033 		virtualRestrictions.address_specification = addressSpec;
2034 		physical_address_restrictions physicalRestrictions = {};
2035 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2036 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2037 			_address);
2038 	}
2039 
2040 	// get the open flags of the FD
2041 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2042 	if (descriptor == NULL)
2043 		return EBADF;
2044 	int32 openMode = descriptor->open_mode;
2045 	put_fd(descriptor);
2046 
2047 	// The FD must open for reading at any rate. For shared mapping with write
2048 	// access, additionally the FD must be open for writing.
2049 	if ((openMode & O_ACCMODE) == O_WRONLY
2050 		|| (mapping == REGION_NO_PRIVATE_MAP
2051 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2052 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2053 		return EACCES;
2054 	}
2055 
2056 	uint32 protectionMax = 0;
2057 	if (mapping != REGION_PRIVATE_MAP) {
2058 		if ((openMode & O_ACCMODE) == O_RDWR)
2059 			protectionMax = protection | B_USER_PROTECTION;
2060 		else
2061 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2062 	}
2063 
2064 	// get the vnode for the object, this also grabs a ref to it
2065 	struct vnode* vnode = NULL;
2066 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2067 	if (status < B_OK)
2068 		return status;
2069 	VnodePutter vnodePutter(vnode);
2070 
2071 	// If we're going to pre-map pages, we need to reserve the pages needed by
2072 	// the mapping backend upfront.
2073 	page_num_t reservedPreMapPages = 0;
2074 	vm_page_reservation reservation;
2075 	if ((protection & B_READ_AREA) != 0) {
2076 		AddressSpaceWriteLocker locker;
2077 		status = locker.SetTo(team);
2078 		if (status != B_OK)
2079 			return status;
2080 
2081 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2082 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2083 
2084 		locker.Unlock();
2085 
2086 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2087 			team == VMAddressSpace::KernelID()
2088 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2089 	}
2090 
2091 	struct PageUnreserver {
2092 		PageUnreserver(vm_page_reservation* reservation)
2093 			:
2094 			fReservation(reservation)
2095 		{
2096 		}
2097 
2098 		~PageUnreserver()
2099 		{
2100 			if (fReservation != NULL)
2101 				vm_page_unreserve_pages(fReservation);
2102 		}
2103 
2104 		vm_page_reservation* fReservation;
2105 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2106 
2107 	// Lock the address space and, if the specified address range shall be
2108 	// unmapped, ensure it is not wired.
2109 	AddressSpaceWriteLocker locker;
2110 	do {
2111 		if (locker.SetTo(team) != B_OK)
2112 			return B_BAD_TEAM_ID;
2113 	} while (unmapAddressRange
2114 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2115 			(addr_t)*_address, size, &locker));
2116 
2117 	// TODO: this only works for file systems that use the file cache
2118 	VMCache* cache;
2119 	status = vfs_get_vnode_cache(vnode, &cache, false);
2120 	if (status < B_OK)
2121 		return status;
2122 
2123 	cache->Lock();
2124 
2125 	VMArea* area;
2126 	virtual_address_restrictions addressRestrictions = {};
2127 	addressRestrictions.address = *_address;
2128 	addressRestrictions.address_specification = addressSpec;
2129 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2130 		0, protection, protectionMax, mapping,
2131 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2132 		&addressRestrictions, kernel, &area, _address);
2133 
2134 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2135 		// map_backing_store() cannot know we no longer need the ref
2136 		cache->ReleaseRefLocked();
2137 	}
2138 
2139 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2140 		pre_map_area_pages(area, cache, &reservation);
2141 
2142 	cache->Unlock();
2143 
2144 	if (status == B_OK) {
2145 		// TODO: this probably deserves a smarter solution, ie. don't always
2146 		// prefetch stuff, and also, probably don't trigger it at this place.
2147 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2148 			// prefetches at max 10 MB starting from "offset"
2149 	}
2150 
2151 	if (status != B_OK)
2152 		return status;
2153 
2154 	area->cache_type = CACHE_TYPE_VNODE;
2155 	return area->id;
2156 }
2157 
2158 
2159 area_id
2160 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2161 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2162 	int fd, off_t offset)
2163 {
2164 	if (!arch_vm_supports_protection(protection))
2165 		return B_NOT_SUPPORTED;
2166 
2167 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2168 		mapping, unmapAddressRange, fd, offset, true);
2169 }
2170 
2171 
2172 VMCache*
2173 vm_area_get_locked_cache(VMArea* area)
2174 {
2175 	rw_lock_read_lock(&sAreaCacheLock);
2176 
2177 	while (true) {
2178 		VMCache* cache = area->cache;
2179 
2180 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2181 			// cache has been deleted
2182 			rw_lock_read_lock(&sAreaCacheLock);
2183 			continue;
2184 		}
2185 
2186 		rw_lock_read_lock(&sAreaCacheLock);
2187 
2188 		if (cache == area->cache) {
2189 			cache->AcquireRefLocked();
2190 			rw_lock_read_unlock(&sAreaCacheLock);
2191 			return cache;
2192 		}
2193 
2194 		// the cache changed in the meantime
2195 		cache->Unlock();
2196 	}
2197 }
2198 
2199 
2200 void
2201 vm_area_put_locked_cache(VMCache* cache)
2202 {
2203 	cache->ReleaseRefAndUnlock();
2204 }
2205 
2206 
2207 area_id
2208 vm_clone_area(team_id team, const char* name, void** address,
2209 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2210 	bool kernel)
2211 {
2212 	VMArea* newArea = NULL;
2213 	VMArea* sourceArea;
2214 
2215 	// Check whether the source area exists and is cloneable. If so, mark it
2216 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2217 	{
2218 		AddressSpaceWriteLocker locker;
2219 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2220 		if (status != B_OK)
2221 			return status;
2222 
2223 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2224 			return B_NOT_ALLOWED;
2225 
2226 		sourceArea->protection |= B_SHARED_AREA;
2227 		protection |= B_SHARED_AREA;
2228 	}
2229 
2230 	// Now lock both address spaces and actually do the cloning.
2231 
2232 	MultiAddressSpaceLocker locker;
2233 	VMAddressSpace* sourceAddressSpace;
2234 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2235 	if (status != B_OK)
2236 		return status;
2237 
2238 	VMAddressSpace* targetAddressSpace;
2239 	status = locker.AddTeam(team, true, &targetAddressSpace);
2240 	if (status != B_OK)
2241 		return status;
2242 
2243 	status = locker.Lock();
2244 	if (status != B_OK)
2245 		return status;
2246 
2247 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2248 	if (sourceArea == NULL)
2249 		return B_BAD_VALUE;
2250 
2251 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2252 		return B_NOT_ALLOWED;
2253 
2254 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2255 
2256 	if (!kernel && sourceAddressSpace != targetAddressSpace
2257 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2258 #if KDEBUG
2259 		Team* team = thread_get_current_thread()->team;
2260 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2261 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2262 #endif
2263 		status = B_NOT_ALLOWED;
2264 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2265 		status = B_NOT_ALLOWED;
2266 	} else {
2267 		virtual_address_restrictions addressRestrictions = {};
2268 		addressRestrictions.address = *address;
2269 		addressRestrictions.address_specification = addressSpec;
2270 		status = map_backing_store(targetAddressSpace, cache,
2271 			sourceArea->cache_offset, name, sourceArea->Size(),
2272 			sourceArea->wiring, protection, sourceArea->protection_max,
2273 			mapping, 0, &addressRestrictions,
2274 			kernel, &newArea, address);
2275 	}
2276 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2277 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2278 		// to create a new cache, and has therefore already acquired a reference
2279 		// to the source cache - but otherwise it has no idea that we need
2280 		// one.
2281 		cache->AcquireRefLocked();
2282 	}
2283 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2284 		// we need to map in everything at this point
2285 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2286 			// we don't have actual pages to map but a physical area
2287 			VMTranslationMap* map
2288 				= sourceArea->address_space->TranslationMap();
2289 			map->Lock();
2290 
2291 			phys_addr_t physicalAddress;
2292 			uint32 oldProtection;
2293 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2294 
2295 			map->Unlock();
2296 
2297 			map = targetAddressSpace->TranslationMap();
2298 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2299 				newArea->Base() + (newArea->Size() - 1));
2300 
2301 			vm_page_reservation reservation;
2302 			vm_page_reserve_pages(&reservation, reservePages,
2303 				targetAddressSpace == VMAddressSpace::Kernel()
2304 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2305 			map->Lock();
2306 
2307 			for (addr_t offset = 0; offset < newArea->Size();
2308 					offset += B_PAGE_SIZE) {
2309 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2310 					protection, newArea->MemoryType(), &reservation);
2311 			}
2312 
2313 			map->Unlock();
2314 			vm_page_unreserve_pages(&reservation);
2315 		} else {
2316 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2317 			size_t reservePages = map->MaxPagesNeededToMap(
2318 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2319 			vm_page_reservation reservation;
2320 			vm_page_reserve_pages(&reservation, reservePages,
2321 				targetAddressSpace == VMAddressSpace::Kernel()
2322 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2323 
2324 			// map in all pages from source
2325 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2326 					vm_page* page  = it.Next();) {
2327 				if (!page->busy) {
2328 					DEBUG_PAGE_ACCESS_START(page);
2329 					map_page(newArea, page,
2330 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2331 							- newArea->cache_offset),
2332 						protection, &reservation);
2333 					DEBUG_PAGE_ACCESS_END(page);
2334 				}
2335 			}
2336 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2337 			// ensuring that!
2338 
2339 			vm_page_unreserve_pages(&reservation);
2340 		}
2341 	}
2342 	if (status == B_OK)
2343 		newArea->cache_type = sourceArea->cache_type;
2344 
2345 	vm_area_put_locked_cache(cache);
2346 
2347 	if (status < B_OK)
2348 		return status;
2349 
2350 	return newArea->id;
2351 }
2352 
2353 
2354 /*!	Deletes the specified area of the given address space.
2355 
2356 	The address space must be write-locked.
2357 	The caller must ensure that the area does not have any wired ranges.
2358 
2359 	\param addressSpace The address space containing the area.
2360 	\param area The area to be deleted.
2361 	\param deletingAddressSpace \c true, if the address space is in the process
2362 		of being deleted.
2363 */
2364 static void
2365 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2366 	bool deletingAddressSpace)
2367 {
2368 	ASSERT(!area->IsWired());
2369 
2370 	VMAreas::Remove(area);
2371 
2372 	// At this point the area is removed from the global hash table, but
2373 	// still exists in the area list.
2374 
2375 	// Unmap the virtual address space the area occupied.
2376 	{
2377 		// We need to lock the complete cache chain.
2378 		VMCache* topCache = vm_area_get_locked_cache(area);
2379 		VMCacheChainLocker cacheChainLocker(topCache);
2380 		cacheChainLocker.LockAllSourceCaches();
2381 
2382 		// If the area's top cache is a temporary cache and the area is the only
2383 		// one referencing it (besides us currently holding a second reference),
2384 		// the unmapping code doesn't need to care about preserving the accessed
2385 		// and dirty flags of the top cache page mappings.
2386 		bool ignoreTopCachePageFlags
2387 			= topCache->temporary && topCache->RefCount() == 2;
2388 
2389 		area->address_space->TranslationMap()->UnmapArea(area,
2390 			deletingAddressSpace, ignoreTopCachePageFlags);
2391 	}
2392 
2393 	if (!area->cache->temporary)
2394 		area->cache->WriteModified();
2395 
2396 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2397 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2398 
2399 	arch_vm_unset_memory_type(area);
2400 	addressSpace->RemoveArea(area, allocationFlags);
2401 	addressSpace->Put();
2402 
2403 	area->cache->RemoveArea(area);
2404 	area->cache->ReleaseRef();
2405 
2406 	addressSpace->DeleteArea(area, allocationFlags);
2407 }
2408 
2409 
2410 status_t
2411 vm_delete_area(team_id team, area_id id, bool kernel)
2412 {
2413 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2414 		team, id));
2415 
2416 	// lock the address space and make sure the area isn't wired
2417 	AddressSpaceWriteLocker locker;
2418 	VMArea* area;
2419 	AreaCacheLocker cacheLocker;
2420 
2421 	do {
2422 		status_t status = locker.SetFromArea(team, id, area);
2423 		if (status != B_OK)
2424 			return status;
2425 
2426 		cacheLocker.SetTo(area);
2427 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2428 
2429 	cacheLocker.Unlock();
2430 
2431 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2432 		return B_NOT_ALLOWED;
2433 
2434 	delete_area(locker.AddressSpace(), area, false);
2435 	return B_OK;
2436 }
2437 
2438 
2439 /*!	Creates a new cache on top of given cache, moves all areas from
2440 	the old cache to the new one, and changes the protection of all affected
2441 	areas' pages to read-only. If requested, wired pages are moved up to the
2442 	new cache and copies are added to the old cache in their place.
2443 	Preconditions:
2444 	- The given cache must be locked.
2445 	- All of the cache's areas' address spaces must be read locked.
2446 	- Either the cache must not have any wired ranges or a page reservation for
2447 	  all wired pages must be provided, so they can be copied.
2448 
2449 	\param lowerCache The cache on top of which a new cache shall be created.
2450 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2451 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2452 		has wired page. The wired pages are copied in this case.
2453 */
2454 static status_t
2455 vm_copy_on_write_area(VMCache* lowerCache,
2456 	vm_page_reservation* wiredPagesReservation)
2457 {
2458 	VMCache* upperCache;
2459 
2460 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2461 
2462 	// We need to separate the cache from its areas. The cache goes one level
2463 	// deeper and we create a new cache inbetween.
2464 
2465 	// create an anonymous cache
2466 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2467 		lowerCache->GuardSize() / B_PAGE_SIZE,
2468 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2469 		VM_PRIORITY_USER);
2470 	if (status != B_OK)
2471 		return status;
2472 
2473 	upperCache->Lock();
2474 
2475 	upperCache->temporary = 1;
2476 	upperCache->virtual_base = lowerCache->virtual_base;
2477 	upperCache->virtual_end = lowerCache->virtual_end;
2478 
2479 	// transfer the lower cache areas to the upper cache
2480 	rw_lock_write_lock(&sAreaCacheLock);
2481 	upperCache->TransferAreas(lowerCache);
2482 	rw_lock_write_unlock(&sAreaCacheLock);
2483 
2484 	lowerCache->AddConsumer(upperCache);
2485 
2486 	// We now need to remap all pages from all of the cache's areas read-only,
2487 	// so that a copy will be created on next write access. If there are wired
2488 	// pages, we keep their protection, move them to the upper cache and create
2489 	// copies for the lower cache.
2490 	if (wiredPagesReservation != NULL) {
2491 		// We need to handle wired pages -- iterate through the cache's pages.
2492 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2493 				vm_page* page = it.Next();) {
2494 			if (page->WiredCount() > 0) {
2495 				// allocate a new page and copy the wired one
2496 				vm_page* copiedPage = vm_page_allocate_page(
2497 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2498 
2499 				vm_memcpy_physical_page(
2500 					copiedPage->physical_page_number * B_PAGE_SIZE,
2501 					page->physical_page_number * B_PAGE_SIZE);
2502 
2503 				// move the wired page to the upper cache (note: removing is OK
2504 				// with the SplayTree iterator) and insert the copy
2505 				upperCache->MovePage(page);
2506 				lowerCache->InsertPage(copiedPage,
2507 					page->cache_offset * B_PAGE_SIZE);
2508 
2509 				DEBUG_PAGE_ACCESS_END(copiedPage);
2510 			} else {
2511 				// Change the protection of this page in all areas.
2512 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2513 						tempArea = tempArea->cache_next) {
2514 					// The area must be readable in the same way it was
2515 					// previously writable.
2516 					addr_t address = virtual_page_address(tempArea, page);
2517 					uint32 protection = 0;
2518 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2519 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2520 						protection |= B_KERNEL_READ_AREA;
2521 					if ((pageProtection & B_READ_AREA) != 0)
2522 						protection |= B_READ_AREA;
2523 
2524 					VMTranslationMap* map
2525 						= tempArea->address_space->TranslationMap();
2526 					map->Lock();
2527 					map->ProtectPage(tempArea, address, protection);
2528 					map->Unlock();
2529 				}
2530 			}
2531 		}
2532 	} else {
2533 		ASSERT(lowerCache->WiredPagesCount() == 0);
2534 
2535 		// just change the protection of all areas
2536 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2537 				tempArea = tempArea->cache_next) {
2538 			if (tempArea->page_protections != NULL) {
2539 				// Change the protection of all pages in this area.
2540 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2541 				map->Lock();
2542 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2543 					vm_page* page = it.Next();) {
2544 					// The area must be readable in the same way it was
2545 					// previously writable.
2546 					addr_t address = virtual_page_address(tempArea, page);
2547 					uint32 protection = 0;
2548 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2549 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2550 						protection |= B_KERNEL_READ_AREA;
2551 					if ((pageProtection & B_READ_AREA) != 0)
2552 						protection |= B_READ_AREA;
2553 
2554 					map->ProtectPage(tempArea, address, protection);
2555 				}
2556 				map->Unlock();
2557 				continue;
2558 			}
2559 			// The area must be readable in the same way it was previously
2560 			// writable.
2561 			uint32 protection = 0;
2562 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2563 				protection |= B_KERNEL_READ_AREA;
2564 			if ((tempArea->protection & B_READ_AREA) != 0)
2565 				protection |= B_READ_AREA;
2566 
2567 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2568 			map->Lock();
2569 			map->ProtectArea(tempArea, protection);
2570 			map->Unlock();
2571 		}
2572 	}
2573 
2574 	vm_area_put_locked_cache(upperCache);
2575 
2576 	return B_OK;
2577 }
2578 
2579 
2580 area_id
2581 vm_copy_area(team_id team, const char* name, void** _address,
2582 	uint32 addressSpec, area_id sourceID)
2583 {
2584 	// Do the locking: target address space, all address spaces associated with
2585 	// the source cache, and the cache itself.
2586 	MultiAddressSpaceLocker locker;
2587 	VMAddressSpace* targetAddressSpace;
2588 	VMCache* cache;
2589 	VMArea* source;
2590 	AreaCacheLocker cacheLocker;
2591 	status_t status;
2592 	bool sharedArea;
2593 
2594 	page_num_t wiredPages = 0;
2595 	vm_page_reservation wiredPagesReservation;
2596 
2597 	bool restart;
2598 	do {
2599 		restart = false;
2600 
2601 		locker.Unset();
2602 		status = locker.AddTeam(team, true, &targetAddressSpace);
2603 		if (status == B_OK) {
2604 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2605 				&cache);
2606 		}
2607 		if (status != B_OK)
2608 			return status;
2609 
2610 		cacheLocker.SetTo(cache, true);	// already locked
2611 
2612 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2613 
2614 		page_num_t oldWiredPages = wiredPages;
2615 		wiredPages = 0;
2616 
2617 		// If the source area isn't shared, count the number of wired pages in
2618 		// the cache and reserve as many pages.
2619 		if (!sharedArea) {
2620 			wiredPages = cache->WiredPagesCount();
2621 
2622 			if (wiredPages > oldWiredPages) {
2623 				cacheLocker.Unlock();
2624 				locker.Unlock();
2625 
2626 				if (oldWiredPages > 0)
2627 					vm_page_unreserve_pages(&wiredPagesReservation);
2628 
2629 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2630 					VM_PRIORITY_USER);
2631 
2632 				restart = true;
2633 			}
2634 		} else if (oldWiredPages > 0)
2635 			vm_page_unreserve_pages(&wiredPagesReservation);
2636 	} while (restart);
2637 
2638 	// unreserve pages later
2639 	struct PagesUnreserver {
2640 		PagesUnreserver(vm_page_reservation* reservation)
2641 			:
2642 			fReservation(reservation)
2643 		{
2644 		}
2645 
2646 		~PagesUnreserver()
2647 		{
2648 			if (fReservation != NULL)
2649 				vm_page_unreserve_pages(fReservation);
2650 		}
2651 
2652 	private:
2653 		vm_page_reservation*	fReservation;
2654 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2655 
2656 	bool writableCopy
2657 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2658 	uint8* targetPageProtections = NULL;
2659 
2660 	if (source->page_protections != NULL) {
2661 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2662 		targetPageProtections = (uint8*)malloc_etc(bytes,
2663 			(source->address_space == VMAddressSpace::Kernel()
2664 					|| targetAddressSpace == VMAddressSpace::Kernel())
2665 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2666 		if (targetPageProtections == NULL)
2667 			return B_NO_MEMORY;
2668 
2669 		memcpy(targetPageProtections, source->page_protections, bytes);
2670 
2671 		if (!writableCopy) {
2672 			for (size_t i = 0; i < bytes; i++) {
2673 				if ((targetPageProtections[i]
2674 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2675 					writableCopy = true;
2676 					break;
2677 				}
2678 			}
2679 		}
2680 	}
2681 
2682 	if (addressSpec == B_CLONE_ADDRESS) {
2683 		addressSpec = B_EXACT_ADDRESS;
2684 		*_address = (void*)source->Base();
2685 	}
2686 
2687 	// First, create a cache on top of the source area, respectively use the
2688 	// existing one, if this is a shared area.
2689 
2690 	VMArea* target;
2691 	virtual_address_restrictions addressRestrictions = {};
2692 	addressRestrictions.address = *_address;
2693 	addressRestrictions.address_specification = addressSpec;
2694 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2695 		name, source->Size(), source->wiring, source->protection,
2696 		source->protection_max,
2697 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2698 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2699 		&addressRestrictions, true, &target, _address);
2700 	if (status < B_OK) {
2701 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2702 		return status;
2703 	}
2704 
2705 	if (targetPageProtections != NULL)
2706 		target->page_protections = targetPageProtections;
2707 
2708 	if (sharedArea) {
2709 		// The new area uses the old area's cache, but map_backing_store()
2710 		// hasn't acquired a ref. So we have to do that now.
2711 		cache->AcquireRefLocked();
2712 	}
2713 
2714 	// If the source area is writable, we need to move it one layer up as well
2715 
2716 	if (!sharedArea) {
2717 		if (writableCopy) {
2718 			// TODO: do something more useful if this fails!
2719 			if (vm_copy_on_write_area(cache,
2720 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2721 				panic("vm_copy_on_write_area() failed!\n");
2722 			}
2723 		}
2724 	}
2725 
2726 	// we return the ID of the newly created area
2727 	return target->id;
2728 }
2729 
2730 
2731 status_t
2732 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2733 	bool kernel)
2734 {
2735 	fix_protection(&newProtection);
2736 
2737 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2738 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2739 
2740 	if (!arch_vm_supports_protection(newProtection))
2741 		return B_NOT_SUPPORTED;
2742 
2743 	bool becomesWritable
2744 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2745 
2746 	// lock address spaces and cache
2747 	MultiAddressSpaceLocker locker;
2748 	VMCache* cache;
2749 	VMArea* area;
2750 	status_t status;
2751 	AreaCacheLocker cacheLocker;
2752 	bool isWritable;
2753 
2754 	bool restart;
2755 	do {
2756 		restart = false;
2757 
2758 		locker.Unset();
2759 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2760 		if (status != B_OK)
2761 			return status;
2762 
2763 		cacheLocker.SetTo(cache, true);	// already locked
2764 
2765 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2766 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2767 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2768 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2769 				" (%s)\n", team, newProtection, areaID, area->name);
2770 			return B_NOT_ALLOWED;
2771 		}
2772 		if (!kernel && area->protection_max != 0
2773 			&& (newProtection & area->protection_max)
2774 				!= (newProtection & B_USER_PROTECTION)) {
2775 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2776 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2777 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2778 				area->protection_max, areaID, area->name);
2779 			return B_NOT_ALLOWED;
2780 		}
2781 
2782 		if (area->protection == newProtection)
2783 			return B_OK;
2784 
2785 		if (team != VMAddressSpace::KernelID()
2786 			&& area->address_space->ID() != team) {
2787 			// unless you're the kernel, you are only allowed to set
2788 			// the protection of your own areas
2789 			return B_NOT_ALLOWED;
2790 		}
2791 
2792 		isWritable
2793 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2794 
2795 		// Make sure the area (respectively, if we're going to call
2796 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2797 		// wired ranges.
2798 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2799 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2800 					otherArea = otherArea->cache_next) {
2801 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2802 					restart = true;
2803 					break;
2804 				}
2805 			}
2806 		} else {
2807 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2808 				restart = true;
2809 		}
2810 	} while (restart);
2811 
2812 	bool changePageProtection = true;
2813 	bool changeTopCachePagesOnly = false;
2814 
2815 	if (isWritable && !becomesWritable) {
2816 		// writable -> !writable
2817 
2818 		if (cache->source != NULL && cache->temporary) {
2819 			if (cache->CountWritableAreas(area) == 0) {
2820 				// Since this cache now lives from the pages in its source cache,
2821 				// we can change the cache's commitment to take only those pages
2822 				// into account that really are in this cache.
2823 
2824 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2825 					team == VMAddressSpace::KernelID()
2826 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2827 
2828 				// TODO: we may be able to join with our source cache, if
2829 				// count == 0
2830 			}
2831 		}
2832 
2833 		// If only the writability changes, we can just remap the pages of the
2834 		// top cache, since the pages of lower caches are mapped read-only
2835 		// anyway. That's advantageous only, if the number of pages in the cache
2836 		// is significantly smaller than the number of pages in the area,
2837 		// though.
2838 		if (newProtection
2839 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2840 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2841 			changeTopCachePagesOnly = true;
2842 		}
2843 	} else if (!isWritable && becomesWritable) {
2844 		// !writable -> writable
2845 
2846 		if (!cache->consumers.IsEmpty()) {
2847 			// There are consumers -- we have to insert a new cache. Fortunately
2848 			// vm_copy_on_write_area() does everything that's needed.
2849 			changePageProtection = false;
2850 			status = vm_copy_on_write_area(cache, NULL);
2851 		} else {
2852 			// No consumers, so we don't need to insert a new one.
2853 			if (cache->source != NULL && cache->temporary) {
2854 				// the cache's commitment must contain all possible pages
2855 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2856 					team == VMAddressSpace::KernelID()
2857 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2858 			}
2859 
2860 			if (status == B_OK && cache->source != NULL) {
2861 				// There's a source cache, hence we can't just change all pages'
2862 				// protection or we might allow writing into pages belonging to
2863 				// a lower cache.
2864 				changeTopCachePagesOnly = true;
2865 			}
2866 		}
2867 	} else {
2868 		// we don't have anything special to do in all other cases
2869 	}
2870 
2871 	if (status == B_OK) {
2872 		// remap existing pages in this cache
2873 		if (changePageProtection) {
2874 			VMTranslationMap* map = area->address_space->TranslationMap();
2875 			map->Lock();
2876 
2877 			if (changeTopCachePagesOnly) {
2878 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2879 				page_num_t lastPageOffset
2880 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2881 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2882 						vm_page* page = it.Next();) {
2883 					if (page->cache_offset >= firstPageOffset
2884 						&& page->cache_offset <= lastPageOffset) {
2885 						addr_t address = virtual_page_address(area, page);
2886 						map->ProtectPage(area, address, newProtection);
2887 					}
2888 				}
2889 			} else
2890 				map->ProtectArea(area, newProtection);
2891 
2892 			map->Unlock();
2893 		}
2894 
2895 		area->protection = newProtection;
2896 	}
2897 
2898 	return status;
2899 }
2900 
2901 
2902 status_t
2903 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2904 {
2905 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2906 	if (addressSpace == NULL)
2907 		return B_BAD_TEAM_ID;
2908 
2909 	VMTranslationMap* map = addressSpace->TranslationMap();
2910 
2911 	map->Lock();
2912 	uint32 dummyFlags;
2913 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2914 	map->Unlock();
2915 
2916 	addressSpace->Put();
2917 	return status;
2918 }
2919 
2920 
2921 /*!	The page's cache must be locked.
2922 */
2923 bool
2924 vm_test_map_modification(vm_page* page)
2925 {
2926 	if (page->modified)
2927 		return true;
2928 
2929 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2930 	vm_page_mapping* mapping;
2931 	while ((mapping = iterator.Next()) != NULL) {
2932 		VMArea* area = mapping->area;
2933 		VMTranslationMap* map = area->address_space->TranslationMap();
2934 
2935 		phys_addr_t physicalAddress;
2936 		uint32 flags;
2937 		map->Lock();
2938 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2939 		map->Unlock();
2940 
2941 		if ((flags & PAGE_MODIFIED) != 0)
2942 			return true;
2943 	}
2944 
2945 	return false;
2946 }
2947 
2948 
2949 /*!	The page's cache must be locked.
2950 */
2951 void
2952 vm_clear_map_flags(vm_page* page, uint32 flags)
2953 {
2954 	if ((flags & PAGE_ACCESSED) != 0)
2955 		page->accessed = false;
2956 	if ((flags & PAGE_MODIFIED) != 0)
2957 		page->modified = false;
2958 
2959 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2960 	vm_page_mapping* mapping;
2961 	while ((mapping = iterator.Next()) != NULL) {
2962 		VMArea* area = mapping->area;
2963 		VMTranslationMap* map = area->address_space->TranslationMap();
2964 
2965 		map->Lock();
2966 		map->ClearFlags(virtual_page_address(area, page), flags);
2967 		map->Unlock();
2968 	}
2969 }
2970 
2971 
2972 /*!	Removes all mappings from a page.
2973 	After you've called this function, the page is unmapped from memory and
2974 	the page's \c accessed and \c modified flags have been updated according
2975 	to the state of the mappings.
2976 	The page's cache must be locked.
2977 */
2978 void
2979 vm_remove_all_page_mappings(vm_page* page)
2980 {
2981 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2982 		VMArea* area = mapping->area;
2983 		VMTranslationMap* map = area->address_space->TranslationMap();
2984 		addr_t address = virtual_page_address(area, page);
2985 		map->UnmapPage(area, address, false);
2986 	}
2987 }
2988 
2989 
2990 int32
2991 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2992 {
2993 	int32 count = 0;
2994 
2995 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2996 	vm_page_mapping* mapping;
2997 	while ((mapping = iterator.Next()) != NULL) {
2998 		VMArea* area = mapping->area;
2999 		VMTranslationMap* map = area->address_space->TranslationMap();
3000 
3001 		bool modified;
3002 		if (map->ClearAccessedAndModified(area,
3003 				virtual_page_address(area, page), false, modified)) {
3004 			count++;
3005 		}
3006 
3007 		page->modified |= modified;
3008 	}
3009 
3010 
3011 	if (page->accessed) {
3012 		count++;
3013 		page->accessed = false;
3014 	}
3015 
3016 	return count;
3017 }
3018 
3019 
3020 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3021 	mappings.
3022 	The function iterates through the page mappings and removes them until
3023 	encountering one that has been accessed. From then on it will continue to
3024 	iterate, but only clear the accessed flag of the mapping. The page's
3025 	\c modified bit will be updated accordingly, the \c accessed bit will be
3026 	cleared.
3027 	\return The number of mapping accessed bits encountered, including the
3028 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3029 		of the page have been removed.
3030 */
3031 int32
3032 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3033 {
3034 	ASSERT(page->WiredCount() == 0);
3035 
3036 	if (page->accessed)
3037 		return vm_clear_page_mapping_accessed_flags(page);
3038 
3039 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3040 		VMArea* area = mapping->area;
3041 		VMTranslationMap* map = area->address_space->TranslationMap();
3042 		addr_t address = virtual_page_address(area, page);
3043 		bool modified = false;
3044 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3045 			page->accessed = true;
3046 			page->modified |= modified;
3047 			return vm_clear_page_mapping_accessed_flags(page);
3048 		}
3049 		page->modified |= modified;
3050 	}
3051 
3052 	return 0;
3053 }
3054 
3055 
3056 static int
3057 display_mem(int argc, char** argv)
3058 {
3059 	bool physical = false;
3060 	addr_t copyAddress;
3061 	int32 displayWidth;
3062 	int32 itemSize;
3063 	int32 num = -1;
3064 	addr_t address;
3065 	int i = 1, j;
3066 
3067 	if (argc > 1 && argv[1][0] == '-') {
3068 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3069 			physical = true;
3070 			i++;
3071 		} else
3072 			i = 99;
3073 	}
3074 
3075 	if (argc < i + 1 || argc > i + 2) {
3076 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3077 			"\tdl - 8 bytes\n"
3078 			"\tdw - 4 bytes\n"
3079 			"\tds - 2 bytes\n"
3080 			"\tdb - 1 byte\n"
3081 			"\tstring - a whole string\n"
3082 			"  -p or --physical only allows memory from a single page to be "
3083 			"displayed.\n");
3084 		return 0;
3085 	}
3086 
3087 	address = parse_expression(argv[i]);
3088 
3089 	if (argc > i + 1)
3090 		num = parse_expression(argv[i + 1]);
3091 
3092 	// build the format string
3093 	if (strcmp(argv[0], "db") == 0) {
3094 		itemSize = 1;
3095 		displayWidth = 16;
3096 	} else if (strcmp(argv[0], "ds") == 0) {
3097 		itemSize = 2;
3098 		displayWidth = 8;
3099 	} else if (strcmp(argv[0], "dw") == 0) {
3100 		itemSize = 4;
3101 		displayWidth = 4;
3102 	} else if (strcmp(argv[0], "dl") == 0) {
3103 		itemSize = 8;
3104 		displayWidth = 2;
3105 	} else if (strcmp(argv[0], "string") == 0) {
3106 		itemSize = 1;
3107 		displayWidth = -1;
3108 	} else {
3109 		kprintf("display_mem called in an invalid way!\n");
3110 		return 0;
3111 	}
3112 
3113 	if (num <= 0)
3114 		num = displayWidth;
3115 
3116 	void* physicalPageHandle = NULL;
3117 
3118 	if (physical) {
3119 		int32 offset = address & (B_PAGE_SIZE - 1);
3120 		if (num * itemSize + offset > B_PAGE_SIZE) {
3121 			num = (B_PAGE_SIZE - offset) / itemSize;
3122 			kprintf("NOTE: number of bytes has been cut to page size\n");
3123 		}
3124 
3125 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3126 
3127 		if (vm_get_physical_page_debug(address, &copyAddress,
3128 				&physicalPageHandle) != B_OK) {
3129 			kprintf("getting the hardware page failed.");
3130 			return 0;
3131 		}
3132 
3133 		address += offset;
3134 		copyAddress += offset;
3135 	} else
3136 		copyAddress = address;
3137 
3138 	if (!strcmp(argv[0], "string")) {
3139 		kprintf("%p \"", (char*)copyAddress);
3140 
3141 		// string mode
3142 		for (i = 0; true; i++) {
3143 			char c;
3144 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3145 					!= B_OK
3146 				|| c == '\0') {
3147 				break;
3148 			}
3149 
3150 			if (c == '\n')
3151 				kprintf("\\n");
3152 			else if (c == '\t')
3153 				kprintf("\\t");
3154 			else {
3155 				if (!isprint(c))
3156 					c = '.';
3157 
3158 				kprintf("%c", c);
3159 			}
3160 		}
3161 
3162 		kprintf("\"\n");
3163 	} else {
3164 		// number mode
3165 		for (i = 0; i < num; i++) {
3166 			uint64 value;
3167 
3168 			if ((i % displayWidth) == 0) {
3169 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3170 				if (i != 0)
3171 					kprintf("\n");
3172 
3173 				kprintf("[0x%lx]  ", address + i * itemSize);
3174 
3175 				for (j = 0; j < displayed; j++) {
3176 					char c;
3177 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3178 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3179 						displayed = j;
3180 						break;
3181 					}
3182 					if (!isprint(c))
3183 						c = '.';
3184 
3185 					kprintf("%c", c);
3186 				}
3187 				if (num > displayWidth) {
3188 					// make sure the spacing in the last line is correct
3189 					for (j = displayed; j < displayWidth * itemSize; j++)
3190 						kprintf(" ");
3191 				}
3192 				kprintf("  ");
3193 			}
3194 
3195 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3196 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3197 				kprintf("read fault");
3198 				break;
3199 			}
3200 
3201 			switch (itemSize) {
3202 				case 1:
3203 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3204 					break;
3205 				case 2:
3206 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3207 					break;
3208 				case 4:
3209 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3210 					break;
3211 				case 8:
3212 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3213 					break;
3214 			}
3215 		}
3216 
3217 		kprintf("\n");
3218 	}
3219 
3220 	if (physical) {
3221 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3222 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3223 	}
3224 	return 0;
3225 }
3226 
3227 
3228 static void
3229 dump_cache_tree_recursively(VMCache* cache, int level,
3230 	VMCache* highlightCache)
3231 {
3232 	// print this cache
3233 	for (int i = 0; i < level; i++)
3234 		kprintf("  ");
3235 	if (cache == highlightCache)
3236 		kprintf("%p <--\n", cache);
3237 	else
3238 		kprintf("%p\n", cache);
3239 
3240 	// recursively print its consumers
3241 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3242 			VMCache* consumer = it.Next();) {
3243 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3244 	}
3245 }
3246 
3247 
3248 static int
3249 dump_cache_tree(int argc, char** argv)
3250 {
3251 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3252 		kprintf("usage: %s <address>\n", argv[0]);
3253 		return 0;
3254 	}
3255 
3256 	addr_t address = parse_expression(argv[1]);
3257 	if (address == 0)
3258 		return 0;
3259 
3260 	VMCache* cache = (VMCache*)address;
3261 	VMCache* root = cache;
3262 
3263 	// find the root cache (the transitive source)
3264 	while (root->source != NULL)
3265 		root = root->source;
3266 
3267 	dump_cache_tree_recursively(root, 0, cache);
3268 
3269 	return 0;
3270 }
3271 
3272 
3273 const char*
3274 vm_cache_type_to_string(int32 type)
3275 {
3276 	switch (type) {
3277 		case CACHE_TYPE_RAM:
3278 			return "RAM";
3279 		case CACHE_TYPE_DEVICE:
3280 			return "device";
3281 		case CACHE_TYPE_VNODE:
3282 			return "vnode";
3283 		case CACHE_TYPE_NULL:
3284 			return "null";
3285 
3286 		default:
3287 			return "unknown";
3288 	}
3289 }
3290 
3291 
3292 #if DEBUG_CACHE_LIST
3293 
3294 static void
3295 update_cache_info_recursively(VMCache* cache, cache_info& info)
3296 {
3297 	info.page_count += cache->page_count;
3298 	if (cache->type == CACHE_TYPE_RAM)
3299 		info.committed += cache->committed_size;
3300 
3301 	// recurse
3302 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3303 			VMCache* consumer = it.Next();) {
3304 		update_cache_info_recursively(consumer, info);
3305 	}
3306 }
3307 
3308 
3309 static int
3310 cache_info_compare_page_count(const void* _a, const void* _b)
3311 {
3312 	const cache_info* a = (const cache_info*)_a;
3313 	const cache_info* b = (const cache_info*)_b;
3314 	if (a->page_count == b->page_count)
3315 		return 0;
3316 	return a->page_count < b->page_count ? 1 : -1;
3317 }
3318 
3319 
3320 static int
3321 cache_info_compare_committed(const void* _a, const void* _b)
3322 {
3323 	const cache_info* a = (const cache_info*)_a;
3324 	const cache_info* b = (const cache_info*)_b;
3325 	if (a->committed == b->committed)
3326 		return 0;
3327 	return a->committed < b->committed ? 1 : -1;
3328 }
3329 
3330 
3331 static void
3332 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3333 {
3334 	for (int i = 0; i < level; i++)
3335 		kprintf("  ");
3336 
3337 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3338 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3339 		cache->virtual_base, cache->virtual_end, cache->page_count);
3340 
3341 	if (level == 0)
3342 		kprintf("/%lu", info.page_count);
3343 
3344 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3345 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3346 
3347 		if (level == 0)
3348 			kprintf("/%lu", info.committed);
3349 	}
3350 
3351 	// areas
3352 	if (cache->areas != NULL) {
3353 		VMArea* area = cache->areas;
3354 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3355 			area->name, area->address_space->ID());
3356 
3357 		while (area->cache_next != NULL) {
3358 			area = area->cache_next;
3359 			kprintf(", %" B_PRId32, area->id);
3360 		}
3361 	}
3362 
3363 	kputs("\n");
3364 
3365 	// recurse
3366 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3367 			VMCache* consumer = it.Next();) {
3368 		dump_caches_recursively(consumer, info, level + 1);
3369 	}
3370 }
3371 
3372 
3373 static int
3374 dump_caches(int argc, char** argv)
3375 {
3376 	if (sCacheInfoTable == NULL) {
3377 		kprintf("No cache info table!\n");
3378 		return 0;
3379 	}
3380 
3381 	bool sortByPageCount = true;
3382 
3383 	for (int32 i = 1; i < argc; i++) {
3384 		if (strcmp(argv[i], "-c") == 0) {
3385 			sortByPageCount = false;
3386 		} else {
3387 			print_debugger_command_usage(argv[0]);
3388 			return 0;
3389 		}
3390 	}
3391 
3392 	uint32 totalCount = 0;
3393 	uint32 rootCount = 0;
3394 	off_t totalCommitted = 0;
3395 	page_num_t totalPages = 0;
3396 
3397 	VMCache* cache = gDebugCacheList;
3398 	while (cache) {
3399 		totalCount++;
3400 		if (cache->source == NULL) {
3401 			cache_info stackInfo;
3402 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3403 				? sCacheInfoTable[rootCount] : stackInfo;
3404 			rootCount++;
3405 			info.cache = cache;
3406 			info.page_count = 0;
3407 			info.committed = 0;
3408 			update_cache_info_recursively(cache, info);
3409 			totalCommitted += info.committed;
3410 			totalPages += info.page_count;
3411 		}
3412 
3413 		cache = cache->debug_next;
3414 	}
3415 
3416 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3417 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3418 			sortByPageCount
3419 				? &cache_info_compare_page_count
3420 				: &cache_info_compare_committed);
3421 	}
3422 
3423 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3424 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3425 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3426 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3427 			"page count" : "committed size");
3428 
3429 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3430 		for (uint32 i = 0; i < rootCount; i++) {
3431 			cache_info& info = sCacheInfoTable[i];
3432 			dump_caches_recursively(info.cache, info, 0);
3433 		}
3434 	} else
3435 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3436 
3437 	return 0;
3438 }
3439 
3440 #endif	// DEBUG_CACHE_LIST
3441 
3442 
3443 static int
3444 dump_cache(int argc, char** argv)
3445 {
3446 	VMCache* cache;
3447 	bool showPages = false;
3448 	int i = 1;
3449 
3450 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3451 		kprintf("usage: %s [-ps] <address>\n"
3452 			"  if -p is specified, all pages are shown, if -s is used\n"
3453 			"  only the cache info is shown respectively.\n", argv[0]);
3454 		return 0;
3455 	}
3456 	while (argv[i][0] == '-') {
3457 		char* arg = argv[i] + 1;
3458 		while (arg[0]) {
3459 			if (arg[0] == 'p')
3460 				showPages = true;
3461 			arg++;
3462 		}
3463 		i++;
3464 	}
3465 	if (argv[i] == NULL) {
3466 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3467 		return 0;
3468 	}
3469 
3470 	addr_t address = parse_expression(argv[i]);
3471 	if (address == 0)
3472 		return 0;
3473 
3474 	cache = (VMCache*)address;
3475 
3476 	cache->Dump(showPages);
3477 
3478 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3479 
3480 	return 0;
3481 }
3482 
3483 
3484 static void
3485 dump_area_struct(VMArea* area, bool mappings)
3486 {
3487 	kprintf("AREA: %p\n", area);
3488 	kprintf("name:\t\t'%s'\n", area->name);
3489 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3490 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3491 	kprintf("base:\t\t0x%lx\n", area->Base());
3492 	kprintf("size:\t\t0x%lx\n", area->Size());
3493 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3494 	kprintf("page_protection:%p\n", area->page_protections);
3495 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3496 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3497 	kprintf("cache:\t\t%p\n", area->cache);
3498 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3499 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3500 	kprintf("cache_next:\t%p\n", area->cache_next);
3501 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3502 
3503 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3504 	if (mappings) {
3505 		kprintf("page mappings:\n");
3506 		while (iterator.HasNext()) {
3507 			vm_page_mapping* mapping = iterator.Next();
3508 			kprintf("  %p", mapping->page);
3509 		}
3510 		kprintf("\n");
3511 	} else {
3512 		uint32 count = 0;
3513 		while (iterator.Next() != NULL) {
3514 			count++;
3515 		}
3516 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3517 	}
3518 }
3519 
3520 
3521 static int
3522 dump_area(int argc, char** argv)
3523 {
3524 	bool mappings = false;
3525 	bool found = false;
3526 	int32 index = 1;
3527 	VMArea* area;
3528 	addr_t num;
3529 
3530 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3531 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3532 			"All areas matching either id/address/name are listed. You can\n"
3533 			"force to check only a specific item by prefixing the specifier\n"
3534 			"with the id/contains/address/name keywords.\n"
3535 			"-m shows the area's mappings as well.\n");
3536 		return 0;
3537 	}
3538 
3539 	if (!strcmp(argv[1], "-m")) {
3540 		mappings = true;
3541 		index++;
3542 	}
3543 
3544 	int32 mode = 0xf;
3545 	if (!strcmp(argv[index], "id"))
3546 		mode = 1;
3547 	else if (!strcmp(argv[index], "contains"))
3548 		mode = 2;
3549 	else if (!strcmp(argv[index], "name"))
3550 		mode = 4;
3551 	else if (!strcmp(argv[index], "address"))
3552 		mode = 0;
3553 	if (mode != 0xf)
3554 		index++;
3555 
3556 	if (index >= argc) {
3557 		kprintf("No area specifier given.\n");
3558 		return 0;
3559 	}
3560 
3561 	num = parse_expression(argv[index]);
3562 
3563 	if (mode == 0) {
3564 		dump_area_struct((struct VMArea*)num, mappings);
3565 	} else {
3566 		// walk through the area list, looking for the arguments as a name
3567 
3568 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3569 		while ((area = it.Next()) != NULL) {
3570 			if (((mode & 4) != 0
3571 					&& !strcmp(argv[index], area->name))
3572 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3573 					|| (((mode & 2) != 0 && area->Base() <= num
3574 						&& area->Base() + area->Size() > num))))) {
3575 				dump_area_struct(area, mappings);
3576 				found = true;
3577 			}
3578 		}
3579 
3580 		if (!found)
3581 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3582 	}
3583 
3584 	return 0;
3585 }
3586 
3587 
3588 static int
3589 dump_area_list(int argc, char** argv)
3590 {
3591 	VMArea* area;
3592 	const char* name = NULL;
3593 	int32 id = 0;
3594 
3595 	if (argc > 1) {
3596 		id = parse_expression(argv[1]);
3597 		if (id == 0)
3598 			name = argv[1];
3599 	}
3600 
3601 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3602 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3603 		B_PRINTF_POINTER_WIDTH, "size");
3604 
3605 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3606 	while ((area = it.Next()) != NULL) {
3607 		if ((id != 0 && area->address_space->ID() != id)
3608 			|| (name != NULL && strstr(area->name, name) == NULL))
3609 			continue;
3610 
3611 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3612 			area->id, (void*)area->Base(), (void*)area->Size(),
3613 			area->protection, area->wiring, area->name);
3614 	}
3615 	return 0;
3616 }
3617 
3618 
3619 static int
3620 dump_available_memory(int argc, char** argv)
3621 {
3622 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3623 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3624 	return 0;
3625 }
3626 
3627 
3628 static int
3629 dump_mapping_info(int argc, char** argv)
3630 {
3631 	bool reverseLookup = false;
3632 	bool pageLookup = false;
3633 
3634 	int argi = 1;
3635 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3636 		const char* arg = argv[argi];
3637 		if (strcmp(arg, "-r") == 0) {
3638 			reverseLookup = true;
3639 		} else if (strcmp(arg, "-p") == 0) {
3640 			reverseLookup = true;
3641 			pageLookup = true;
3642 		} else {
3643 			print_debugger_command_usage(argv[0]);
3644 			return 0;
3645 		}
3646 	}
3647 
3648 	// We need at least one argument, the address. Optionally a thread ID can be
3649 	// specified.
3650 	if (argi >= argc || argi + 2 < argc) {
3651 		print_debugger_command_usage(argv[0]);
3652 		return 0;
3653 	}
3654 
3655 	uint64 addressValue;
3656 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3657 		return 0;
3658 
3659 	Team* team = NULL;
3660 	if (argi < argc) {
3661 		uint64 threadID;
3662 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3663 			return 0;
3664 
3665 		Thread* thread = Thread::GetDebug(threadID);
3666 		if (thread == NULL) {
3667 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3668 			return 0;
3669 		}
3670 
3671 		team = thread->team;
3672 	}
3673 
3674 	if (reverseLookup) {
3675 		phys_addr_t physicalAddress;
3676 		if (pageLookup) {
3677 			vm_page* page = (vm_page*)(addr_t)addressValue;
3678 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3679 		} else {
3680 			physicalAddress = (phys_addr_t)addressValue;
3681 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3682 		}
3683 
3684 		kprintf("    Team     Virtual Address      Area\n");
3685 		kprintf("--------------------------------------\n");
3686 
3687 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3688 			Callback()
3689 				:
3690 				fAddressSpace(NULL)
3691 			{
3692 			}
3693 
3694 			void SetAddressSpace(VMAddressSpace* addressSpace)
3695 			{
3696 				fAddressSpace = addressSpace;
3697 			}
3698 
3699 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3700 			{
3701 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3702 					virtualAddress);
3703 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3704 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3705 				else
3706 					kprintf("\n");
3707 				return false;
3708 			}
3709 
3710 		private:
3711 			VMAddressSpace*	fAddressSpace;
3712 		} callback;
3713 
3714 		if (team != NULL) {
3715 			// team specified -- get its address space
3716 			VMAddressSpace* addressSpace = team->address_space;
3717 			if (addressSpace == NULL) {
3718 				kprintf("Failed to get address space!\n");
3719 				return 0;
3720 			}
3721 
3722 			callback.SetAddressSpace(addressSpace);
3723 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3724 				physicalAddress, callback);
3725 		} else {
3726 			// no team specified -- iterate through all address spaces
3727 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3728 				addressSpace != NULL;
3729 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3730 				callback.SetAddressSpace(addressSpace);
3731 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3732 					physicalAddress, callback);
3733 			}
3734 		}
3735 	} else {
3736 		// get the address space
3737 		addr_t virtualAddress = (addr_t)addressValue;
3738 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3739 		VMAddressSpace* addressSpace;
3740 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3741 			addressSpace = VMAddressSpace::Kernel();
3742 		} else if (team != NULL) {
3743 			addressSpace = team->address_space;
3744 		} else {
3745 			Thread* thread = debug_get_debugged_thread();
3746 			if (thread == NULL || thread->team == NULL) {
3747 				kprintf("Failed to get team!\n");
3748 				return 0;
3749 			}
3750 
3751 			addressSpace = thread->team->address_space;
3752 		}
3753 
3754 		if (addressSpace == NULL) {
3755 			kprintf("Failed to get address space!\n");
3756 			return 0;
3757 		}
3758 
3759 		// let the translation map implementation do the job
3760 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3761 	}
3762 
3763 	return 0;
3764 }
3765 
3766 
3767 /*!	Deletes all areas and reserved regions in the given address space.
3768 
3769 	The caller must ensure that none of the areas has any wired ranges.
3770 
3771 	\param addressSpace The address space.
3772 	\param deletingAddressSpace \c true, if the address space is in the process
3773 		of being deleted.
3774 */
3775 void
3776 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3777 {
3778 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3779 		addressSpace->ID()));
3780 
3781 	addressSpace->WriteLock();
3782 
3783 	// remove all reserved areas in this address space
3784 	addressSpace->UnreserveAllAddressRanges(0);
3785 
3786 	// delete all the areas in this address space
3787 	while (VMArea* area = addressSpace->FirstArea()) {
3788 		ASSERT(!area->IsWired());
3789 		delete_area(addressSpace, area, deletingAddressSpace);
3790 	}
3791 
3792 	addressSpace->WriteUnlock();
3793 }
3794 
3795 
3796 static area_id
3797 vm_area_for(addr_t address, bool kernel)
3798 {
3799 	team_id team;
3800 	if (IS_USER_ADDRESS(address)) {
3801 		// we try the user team address space, if any
3802 		team = VMAddressSpace::CurrentID();
3803 		if (team < 0)
3804 			return team;
3805 	} else
3806 		team = VMAddressSpace::KernelID();
3807 
3808 	AddressSpaceReadLocker locker(team);
3809 	if (!locker.IsLocked())
3810 		return B_BAD_TEAM_ID;
3811 
3812 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3813 	if (area != NULL) {
3814 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3815 				&& (area->protection & B_KERNEL_AREA) != 0)
3816 			return B_ERROR;
3817 
3818 		return area->id;
3819 	}
3820 
3821 	return B_ERROR;
3822 }
3823 
3824 
3825 /*!	Frees physical pages that were used during the boot process.
3826 	\a end is inclusive.
3827 */
3828 static void
3829 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3830 {
3831 	// free all physical pages in the specified range
3832 
3833 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3834 		phys_addr_t physicalAddress;
3835 		uint32 flags;
3836 
3837 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3838 			&& (flags & PAGE_PRESENT) != 0) {
3839 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3840 			if (page != NULL && page->State() != PAGE_STATE_FREE
3841 					&& page->State() != PAGE_STATE_CLEAR
3842 					&& page->State() != PAGE_STATE_UNUSED) {
3843 				DEBUG_PAGE_ACCESS_START(page);
3844 				vm_page_set_state(page, PAGE_STATE_FREE);
3845 			}
3846 		}
3847 	}
3848 
3849 	// unmap the memory
3850 	map->Unmap(start, end);
3851 }
3852 
3853 
3854 void
3855 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3856 {
3857 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3858 	addr_t end = start + (size - 1);
3859 	addr_t lastEnd = start;
3860 
3861 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3862 		(void*)start, (void*)end));
3863 
3864 	// The areas are sorted in virtual address space order, so
3865 	// we just have to find the holes between them that fall
3866 	// into the area we should dispose
3867 
3868 	map->Lock();
3869 
3870 	for (VMAddressSpace::AreaIterator it
3871 				= VMAddressSpace::Kernel()->GetAreaIterator();
3872 			VMArea* area = it.Next();) {
3873 		addr_t areaStart = area->Base();
3874 		addr_t areaEnd = areaStart + (area->Size() - 1);
3875 
3876 		if (areaEnd < start)
3877 			continue;
3878 
3879 		if (areaStart > end) {
3880 			// we are done, the area is already beyond of what we have to free
3881 			break;
3882 		}
3883 
3884 		if (areaStart > lastEnd) {
3885 			// this is something we can free
3886 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3887 				(void*)areaStart));
3888 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3889 		}
3890 
3891 		if (areaEnd >= end) {
3892 			lastEnd = areaEnd;
3893 				// no +1 to prevent potential overflow
3894 			break;
3895 		}
3896 
3897 		lastEnd = areaEnd + 1;
3898 	}
3899 
3900 	if (lastEnd < end) {
3901 		// we can also get rid of some space at the end of the area
3902 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3903 			(void*)end));
3904 		unmap_and_free_physical_pages(map, lastEnd, end);
3905 	}
3906 
3907 	map->Unlock();
3908 }
3909 
3910 
3911 static void
3912 create_preloaded_image_areas(struct preloaded_image* _image)
3913 {
3914 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3915 	char name[B_OS_NAME_LENGTH];
3916 	void* address;
3917 	int32 length;
3918 
3919 	// use file name to create a good area name
3920 	char* fileName = strrchr(image->name, '/');
3921 	if (fileName == NULL)
3922 		fileName = image->name;
3923 	else
3924 		fileName++;
3925 
3926 	length = strlen(fileName);
3927 	// make sure there is enough space for the suffix
3928 	if (length > 25)
3929 		length = 25;
3930 
3931 	memcpy(name, fileName, length);
3932 	strcpy(name + length, "_text");
3933 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3934 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3935 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3936 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3937 		// this will later be remapped read-only/executable by the
3938 		// ELF initialization code
3939 
3940 	strcpy(name + length, "_data");
3941 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3942 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3943 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3944 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3945 }
3946 
3947 
3948 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3949 	Any boot loader resources contained in that arguments must not be accessed
3950 	anymore past this point.
3951 */
3952 void
3953 vm_free_kernel_args(kernel_args* args)
3954 {
3955 	uint32 i;
3956 
3957 	TRACE(("vm_free_kernel_args()\n"));
3958 
3959 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3960 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3961 		if (area >= B_OK)
3962 			delete_area(area);
3963 	}
3964 }
3965 
3966 
3967 static void
3968 allocate_kernel_args(kernel_args* args)
3969 {
3970 	TRACE(("allocate_kernel_args()\n"));
3971 
3972 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3973 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3974 
3975 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3976 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3977 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3978 	}
3979 }
3980 
3981 
3982 static void
3983 unreserve_boot_loader_ranges(kernel_args* args)
3984 {
3985 	TRACE(("unreserve_boot_loader_ranges()\n"));
3986 
3987 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3988 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3989 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3990 			args->virtual_allocated_range[i].size);
3991 	}
3992 }
3993 
3994 
3995 static void
3996 reserve_boot_loader_ranges(kernel_args* args)
3997 {
3998 	TRACE(("reserve_boot_loader_ranges()\n"));
3999 
4000 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4001 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4002 
4003 		// If the address is no kernel address, we just skip it. The
4004 		// architecture specific code has to deal with it.
4005 		if (!IS_KERNEL_ADDRESS(address)) {
4006 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4007 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4008 			continue;
4009 		}
4010 
4011 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4012 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4013 		if (status < B_OK)
4014 			panic("could not reserve boot loader ranges\n");
4015 	}
4016 }
4017 
4018 
4019 static addr_t
4020 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4021 {
4022 	size = PAGE_ALIGN(size);
4023 
4024 	// find a slot in the virtual allocation addr range
4025 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4026 		// check to see if the space between this one and the last is big enough
4027 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4028 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4029 			+ args->virtual_allocated_range[i - 1].size;
4030 
4031 		addr_t base = alignment > 0
4032 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4033 
4034 		if (base >= KERNEL_BASE && base < rangeStart
4035 				&& rangeStart - base >= size) {
4036 			args->virtual_allocated_range[i - 1].size
4037 				+= base + size - previousRangeEnd;
4038 			return base;
4039 		}
4040 	}
4041 
4042 	// we hadn't found one between allocation ranges. this is ok.
4043 	// see if there's a gap after the last one
4044 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4045 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4046 		+ args->virtual_allocated_range[lastEntryIndex].size;
4047 	addr_t base = alignment > 0
4048 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4049 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4050 		args->virtual_allocated_range[lastEntryIndex].size
4051 			+= base + size - lastRangeEnd;
4052 		return base;
4053 	}
4054 
4055 	// see if there's a gap before the first one
4056 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4057 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4058 		base = rangeStart - size;
4059 		if (alignment > 0)
4060 			base = ROUNDDOWN(base, alignment);
4061 
4062 		if (base >= KERNEL_BASE) {
4063 			args->virtual_allocated_range[0].start = base;
4064 			args->virtual_allocated_range[0].size += rangeStart - base;
4065 			return base;
4066 		}
4067 	}
4068 
4069 	return 0;
4070 }
4071 
4072 
4073 static bool
4074 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4075 {
4076 	// TODO: horrible brute-force method of determining if the page can be
4077 	// allocated
4078 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4079 		if (address >= args->physical_memory_range[i].start
4080 			&& address < args->physical_memory_range[i].start
4081 				+ args->physical_memory_range[i].size)
4082 			return true;
4083 	}
4084 	return false;
4085 }
4086 
4087 
4088 page_num_t
4089 vm_allocate_early_physical_page(kernel_args* args)
4090 {
4091 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4092 		phys_addr_t nextPage;
4093 
4094 		nextPage = args->physical_allocated_range[i].start
4095 			+ args->physical_allocated_range[i].size;
4096 		// see if the page after the next allocated paddr run can be allocated
4097 		if (i + 1 < args->num_physical_allocated_ranges
4098 			&& args->physical_allocated_range[i + 1].size != 0) {
4099 			// see if the next page will collide with the next allocated range
4100 			if (nextPage >= args->physical_allocated_range[i+1].start)
4101 				continue;
4102 		}
4103 		// see if the next physical page fits in the memory block
4104 		if (is_page_in_physical_memory_range(args, nextPage)) {
4105 			// we got one!
4106 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4107 			return nextPage / B_PAGE_SIZE;
4108 		}
4109 	}
4110 
4111 	// Expanding upwards didn't work, try going downwards.
4112 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4113 		phys_addr_t nextPage;
4114 
4115 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4116 		// see if the page after the prev allocated paddr run can be allocated
4117 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4118 			// see if the next page will collide with the next allocated range
4119 			if (nextPage < args->physical_allocated_range[i-1].start
4120 				+ args->physical_allocated_range[i-1].size)
4121 				continue;
4122 		}
4123 		// see if the next physical page fits in the memory block
4124 		if (is_page_in_physical_memory_range(args, nextPage)) {
4125 			// we got one!
4126 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4127 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4128 			return nextPage / B_PAGE_SIZE;
4129 		}
4130 	}
4131 
4132 	return 0;
4133 		// could not allocate a block
4134 }
4135 
4136 
4137 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4138 	allocate some pages before the VM is completely up.
4139 */
4140 addr_t
4141 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4142 	uint32 attributes, addr_t alignment)
4143 {
4144 	if (physicalSize > virtualSize)
4145 		physicalSize = virtualSize;
4146 
4147 	// find the vaddr to allocate at
4148 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4149 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4150 	if (virtualBase == 0) {
4151 		panic("vm_allocate_early: could not allocate virtual address\n");
4152 		return 0;
4153 	}
4154 
4155 	// map the pages
4156 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4157 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4158 		if (physicalAddress == 0)
4159 			panic("error allocating early page!\n");
4160 
4161 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4162 
4163 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4164 			physicalAddress * B_PAGE_SIZE, attributes,
4165 			&vm_allocate_early_physical_page);
4166 	}
4167 
4168 	return virtualBase;
4169 }
4170 
4171 
4172 /*!	The main entrance point to initialize the VM. */
4173 status_t
4174 vm_init(kernel_args* args)
4175 {
4176 	struct preloaded_image* image;
4177 	void* address;
4178 	status_t err = 0;
4179 	uint32 i;
4180 
4181 	TRACE(("vm_init: entry\n"));
4182 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4183 	err = arch_vm_init(args);
4184 
4185 	// initialize some globals
4186 	vm_page_init_num_pages(args);
4187 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4188 
4189 	slab_init(args);
4190 
4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4192 	off_t heapSize = INITIAL_HEAP_SIZE;
4193 	// try to accomodate low memory systems
4194 	while (heapSize > sAvailableMemory / 8)
4195 		heapSize /= 2;
4196 	if (heapSize < 1024 * 1024)
4197 		panic("vm_init: go buy some RAM please.");
4198 
4199 	// map in the new heap and initialize it
4200 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4201 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4202 	TRACE(("heap at 0x%lx\n", heapBase));
4203 	heap_init(heapBase, heapSize);
4204 #endif
4205 
4206 	// initialize the free page list and physical page mapper
4207 	vm_page_init(args);
4208 
4209 	// initialize the cache allocators
4210 	vm_cache_init(args);
4211 
4212 	{
4213 		status_t error = VMAreas::Init();
4214 		if (error != B_OK)
4215 			panic("vm_init: error initializing areas map\n");
4216 	}
4217 
4218 	VMAddressSpace::Init();
4219 	reserve_boot_loader_ranges(args);
4220 
4221 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4222 	heap_init_post_area();
4223 #endif
4224 
4225 	// Do any further initialization that the architecture dependant layers may
4226 	// need now
4227 	arch_vm_translation_map_init_post_area(args);
4228 	arch_vm_init_post_area(args);
4229 	vm_page_init_post_area(args);
4230 	slab_init_post_area();
4231 
4232 	// allocate areas to represent stuff that already exists
4233 
4234 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4235 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4236 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4237 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4238 #endif
4239 
4240 	allocate_kernel_args(args);
4241 
4242 	create_preloaded_image_areas(args->kernel_image);
4243 
4244 	// allocate areas for preloaded images
4245 	for (image = args->preloaded_images; image != NULL; image = image->next)
4246 		create_preloaded_image_areas(image);
4247 
4248 	// allocate kernel stacks
4249 	for (i = 0; i < args->num_cpus; i++) {
4250 		char name[64];
4251 
4252 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4253 		address = (void*)args->cpu_kstack[i].start;
4254 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4255 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4256 	}
4257 
4258 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4259 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4260 
4261 #if PARANOID_KERNEL_MALLOC
4262 	vm_block_address_range("uninitialized heap memory",
4263 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4264 #endif
4265 #if PARANOID_KERNEL_FREE
4266 	vm_block_address_range("freed heap memory",
4267 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4268 #endif
4269 
4270 	// create the object cache for the page mappings
4271 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4272 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4273 		NULL, NULL);
4274 	if (gPageMappingsObjectCache == NULL)
4275 		panic("failed to create page mappings object cache");
4276 
4277 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4278 
4279 #if DEBUG_CACHE_LIST
4280 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4281 		virtual_address_restrictions virtualRestrictions = {};
4282 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4283 		physical_address_restrictions physicalRestrictions = {};
4284 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4285 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4286 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4287 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4288 			&physicalRestrictions, (void**)&sCacheInfoTable);
4289 	}
4290 #endif	// DEBUG_CACHE_LIST
4291 
4292 	// add some debugger commands
4293 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4294 	add_debugger_command("area", &dump_area,
4295 		"Dump info about a particular area");
4296 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4297 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4298 #if DEBUG_CACHE_LIST
4299 	if (sCacheInfoTable != NULL) {
4300 		add_debugger_command_etc("caches", &dump_caches,
4301 			"List all VMCache trees",
4302 			"[ \"-c\" ]\n"
4303 			"All cache trees are listed sorted in decreasing order by number "
4304 				"of\n"
4305 			"used pages or, if \"-c\" is specified, by size of committed "
4306 				"memory.\n",
4307 			0);
4308 	}
4309 #endif
4310 	add_debugger_command("avail", &dump_available_memory,
4311 		"Dump available memory");
4312 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4313 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4314 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4315 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4316 	add_debugger_command("string", &display_mem, "dump strings");
4317 
4318 	add_debugger_command_etc("mapping", &dump_mapping_info,
4319 		"Print address mapping information",
4320 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4321 		"Prints low-level page mapping information for a given address. If\n"
4322 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4323 		"address that is looked up in the translation map of the current\n"
4324 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4325 		"\"-r\" is specified, <address> is a physical address that is\n"
4326 		"searched in the translation map of all teams, respectively the team\n"
4327 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4328 		"<address> is the address of a vm_page structure. The behavior is\n"
4329 		"equivalent to specifying \"-r\" with the physical address of that\n"
4330 		"page.\n",
4331 		0);
4332 
4333 	TRACE(("vm_init: exit\n"));
4334 
4335 	vm_cache_init_post_heap();
4336 
4337 	return err;
4338 }
4339 
4340 
4341 status_t
4342 vm_init_post_sem(kernel_args* args)
4343 {
4344 	// This frees all unused boot loader resources and makes its space available
4345 	// again
4346 	arch_vm_init_end(args);
4347 	unreserve_boot_loader_ranges(args);
4348 
4349 	// fill in all of the semaphores that were not allocated before
4350 	// since we're still single threaded and only the kernel address space
4351 	// exists, it isn't that hard to find all of the ones we need to create
4352 
4353 	arch_vm_translation_map_init_post_sem(args);
4354 
4355 	slab_init_post_sem();
4356 
4357 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4358 	heap_init_post_sem();
4359 #endif
4360 
4361 	return B_OK;
4362 }
4363 
4364 
4365 status_t
4366 vm_init_post_thread(kernel_args* args)
4367 {
4368 	vm_page_init_post_thread(args);
4369 	slab_init_post_thread();
4370 	return heap_init_post_thread();
4371 }
4372 
4373 
4374 status_t
4375 vm_init_post_modules(kernel_args* args)
4376 {
4377 	return arch_vm_init_post_modules(args);
4378 }
4379 
4380 
4381 void
4382 permit_page_faults(void)
4383 {
4384 	Thread* thread = thread_get_current_thread();
4385 	if (thread != NULL)
4386 		atomic_add(&thread->page_faults_allowed, 1);
4387 }
4388 
4389 
4390 void
4391 forbid_page_faults(void)
4392 {
4393 	Thread* thread = thread_get_current_thread();
4394 	if (thread != NULL)
4395 		atomic_add(&thread->page_faults_allowed, -1);
4396 }
4397 
4398 
4399 status_t
4400 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4401 	bool isUser, addr_t* newIP)
4402 {
4403 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4404 		faultAddress));
4405 
4406 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4407 
4408 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4409 	VMAddressSpace* addressSpace = NULL;
4410 
4411 	status_t status = B_OK;
4412 	*newIP = 0;
4413 	atomic_add((int32*)&sPageFaults, 1);
4414 
4415 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4416 		addressSpace = VMAddressSpace::GetKernel();
4417 	} else if (IS_USER_ADDRESS(pageAddress)) {
4418 		addressSpace = VMAddressSpace::GetCurrent();
4419 		if (addressSpace == NULL) {
4420 			if (!isUser) {
4421 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4422 					"memory!\n");
4423 				status = B_BAD_ADDRESS;
4424 				TPF(PageFaultError(-1,
4425 					VMPageFaultTracing
4426 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4427 			} else {
4428 				// XXX weird state.
4429 				panic("vm_page_fault: non kernel thread accessing user memory "
4430 					"that doesn't exist!\n");
4431 				status = B_BAD_ADDRESS;
4432 			}
4433 		}
4434 	} else {
4435 		// the hit was probably in the 64k DMZ between kernel and user space
4436 		// this keeps a user space thread from passing a buffer that crosses
4437 		// into kernel space
4438 		status = B_BAD_ADDRESS;
4439 		TPF(PageFaultError(-1,
4440 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4441 	}
4442 
4443 	if (status == B_OK) {
4444 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4445 			isUser, NULL);
4446 	}
4447 
4448 	if (status < B_OK) {
4449 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4450 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4451 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4452 			thread_get_current_thread_id());
4453 		if (!isUser) {
4454 			Thread* thread = thread_get_current_thread();
4455 			if (thread != NULL && thread->fault_handler != 0) {
4456 				// this will cause the arch dependant page fault handler to
4457 				// modify the IP on the interrupt frame or whatever to return
4458 				// to this address
4459 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4460 			} else {
4461 				// unhandled page fault in the kernel
4462 				panic("vm_page_fault: unhandled page fault in kernel space at "
4463 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4464 			}
4465 		} else {
4466 			Thread* thread = thread_get_current_thread();
4467 
4468 #ifdef TRACE_FAULTS
4469 			VMArea* area = NULL;
4470 			if (addressSpace != NULL) {
4471 				addressSpace->ReadLock();
4472 				area = addressSpace->LookupArea(faultAddress);
4473 			}
4474 
4475 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4476 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4477 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4478 				thread->team->Name(), thread->team->id,
4479 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4480 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4481 					area->Base() : 0x0));
4482 
4483 			if (addressSpace != NULL)
4484 				addressSpace->ReadUnlock();
4485 #endif
4486 
4487 			// If the thread has a signal handler for SIGSEGV, we simply
4488 			// send it the signal. Otherwise we notify the user debugger
4489 			// first.
4490 			struct sigaction action;
4491 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4492 					&& action.sa_handler != SIG_DFL
4493 					&& action.sa_handler != SIG_IGN)
4494 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4495 					SIGSEGV)) {
4496 				Signal signal(SIGSEGV,
4497 					status == B_PERMISSION_DENIED
4498 						? SEGV_ACCERR : SEGV_MAPERR,
4499 					EFAULT, thread->team->id);
4500 				signal.SetAddress((void*)address);
4501 				send_signal_to_thread(thread, signal, 0);
4502 			}
4503 		}
4504 	}
4505 
4506 	if (addressSpace != NULL)
4507 		addressSpace->Put();
4508 
4509 	return B_HANDLED_INTERRUPT;
4510 }
4511 
4512 
4513 struct PageFaultContext {
4514 	AddressSpaceReadLocker	addressSpaceLocker;
4515 	VMCacheChainLocker		cacheChainLocker;
4516 
4517 	VMTranslationMap*		map;
4518 	VMCache*				topCache;
4519 	off_t					cacheOffset;
4520 	vm_page_reservation		reservation;
4521 	bool					isWrite;
4522 
4523 	// return values
4524 	vm_page*				page;
4525 	bool					restart;
4526 	bool					pageAllocated;
4527 
4528 
4529 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4530 		:
4531 		addressSpaceLocker(addressSpace, true),
4532 		map(addressSpace->TranslationMap()),
4533 		isWrite(isWrite)
4534 	{
4535 	}
4536 
4537 	~PageFaultContext()
4538 	{
4539 		UnlockAll();
4540 		vm_page_unreserve_pages(&reservation);
4541 	}
4542 
4543 	void Prepare(VMCache* topCache, off_t cacheOffset)
4544 	{
4545 		this->topCache = topCache;
4546 		this->cacheOffset = cacheOffset;
4547 		page = NULL;
4548 		restart = false;
4549 		pageAllocated = false;
4550 
4551 		cacheChainLocker.SetTo(topCache);
4552 	}
4553 
4554 	void UnlockAll(VMCache* exceptCache = NULL)
4555 	{
4556 		topCache = NULL;
4557 		addressSpaceLocker.Unlock();
4558 		cacheChainLocker.Unlock(exceptCache);
4559 	}
4560 };
4561 
4562 
4563 /*!	Gets the page that should be mapped into the area.
4564 	Returns an error code other than \c B_OK, if the page couldn't be found or
4565 	paged in. The locking state of the address space and the caches is undefined
4566 	in that case.
4567 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4568 	had to unlock the address space and all caches and is supposed to be called
4569 	again.
4570 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4571 	found. It is returned in \c context.page. The address space will still be
4572 	locked as well as all caches starting from the top cache to at least the
4573 	cache the page lives in.
4574 */
4575 static status_t
4576 fault_get_page(PageFaultContext& context)
4577 {
4578 	VMCache* cache = context.topCache;
4579 	VMCache* lastCache = NULL;
4580 	vm_page* page = NULL;
4581 
4582 	while (cache != NULL) {
4583 		// We already hold the lock of the cache at this point.
4584 
4585 		lastCache = cache;
4586 
4587 		page = cache->LookupPage(context.cacheOffset);
4588 		if (page != NULL && page->busy) {
4589 			// page must be busy -- wait for it to become unbusy
4590 			context.UnlockAll(cache);
4591 			cache->ReleaseRefLocked();
4592 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4593 
4594 			// restart the whole process
4595 			context.restart = true;
4596 			return B_OK;
4597 		}
4598 
4599 		if (page != NULL)
4600 			break;
4601 
4602 		// The current cache does not contain the page we're looking for.
4603 
4604 		// see if the backing store has it
4605 		if (cache->HasPage(context.cacheOffset)) {
4606 			// insert a fresh page and mark it busy -- we're going to read it in
4607 			page = vm_page_allocate_page(&context.reservation,
4608 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4609 			cache->InsertPage(page, context.cacheOffset);
4610 
4611 			// We need to unlock all caches and the address space while reading
4612 			// the page in. Keep a reference to the cache around.
4613 			cache->AcquireRefLocked();
4614 			context.UnlockAll();
4615 
4616 			// read the page in
4617 			generic_io_vec vec;
4618 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4619 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4620 
4621 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4622 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4623 
4624 			cache->Lock();
4625 
4626 			if (status < B_OK) {
4627 				// on error remove and free the page
4628 				dprintf("reading page from cache %p returned: %s!\n",
4629 					cache, strerror(status));
4630 
4631 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4632 				cache->RemovePage(page);
4633 				vm_page_set_state(page, PAGE_STATE_FREE);
4634 
4635 				cache->ReleaseRefAndUnlock();
4636 				return status;
4637 			}
4638 
4639 			// mark the page unbusy again
4640 			cache->MarkPageUnbusy(page);
4641 
4642 			DEBUG_PAGE_ACCESS_END(page);
4643 
4644 			// Since we needed to unlock everything temporarily, the area
4645 			// situation might have changed. So we need to restart the whole
4646 			// process.
4647 			cache->ReleaseRefAndUnlock();
4648 			context.restart = true;
4649 			return B_OK;
4650 		}
4651 
4652 		cache = context.cacheChainLocker.LockSourceCache();
4653 	}
4654 
4655 	if (page == NULL) {
4656 		// There was no adequate page, determine the cache for a clean one.
4657 		// Read-only pages come in the deepest cache, only the top most cache
4658 		// may have direct write access.
4659 		cache = context.isWrite ? context.topCache : lastCache;
4660 
4661 		// allocate a clean page
4662 		page = vm_page_allocate_page(&context.reservation,
4663 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4664 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4665 			page->physical_page_number));
4666 
4667 		// insert the new page into our cache
4668 		cache->InsertPage(page, context.cacheOffset);
4669 		context.pageAllocated = true;
4670 	} else if (page->Cache() != context.topCache && context.isWrite) {
4671 		// We have a page that has the data we want, but in the wrong cache
4672 		// object so we need to copy it and stick it into the top cache.
4673 		vm_page* sourcePage = page;
4674 
4675 		// TODO: If memory is low, it might be a good idea to steal the page
4676 		// from our source cache -- if possible, that is.
4677 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4678 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4679 
4680 		// To not needlessly kill concurrency we unlock all caches but the top
4681 		// one while copying the page. Lacking another mechanism to ensure that
4682 		// the source page doesn't disappear, we mark it busy.
4683 		sourcePage->busy = true;
4684 		context.cacheChainLocker.UnlockKeepRefs(true);
4685 
4686 		// copy the page
4687 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4688 			sourcePage->physical_page_number * B_PAGE_SIZE);
4689 
4690 		context.cacheChainLocker.RelockCaches(true);
4691 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4692 
4693 		// insert the new page into our cache
4694 		context.topCache->InsertPage(page, context.cacheOffset);
4695 		context.pageAllocated = true;
4696 	} else
4697 		DEBUG_PAGE_ACCESS_START(page);
4698 
4699 	context.page = page;
4700 	return B_OK;
4701 }
4702 
4703 
4704 /*!	Makes sure the address in the given address space is mapped.
4705 
4706 	\param addressSpace The address space.
4707 	\param originalAddress The address. Doesn't need to be page aligned.
4708 	\param isWrite If \c true the address shall be write-accessible.
4709 	\param isUser If \c true the access is requested by a userland team.
4710 	\param wirePage On success, if non \c NULL, the wired count of the page
4711 		mapped at the given address is incremented and the page is returned
4712 		via this parameter.
4713 	\return \c B_OK on success, another error code otherwise.
4714 */
4715 static status_t
4716 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4717 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4718 {
4719 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4720 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4721 		originalAddress, isWrite, isUser));
4722 
4723 	PageFaultContext context(addressSpace, isWrite);
4724 
4725 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4726 	status_t status = B_OK;
4727 
4728 	addressSpace->IncrementFaultCount();
4729 
4730 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4731 	// the pages upfront makes sure we don't have any cache locked, so that the
4732 	// page daemon/thief can do their job without problems.
4733 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4734 		originalAddress);
4735 	context.addressSpaceLocker.Unlock();
4736 	vm_page_reserve_pages(&context.reservation, reservePages,
4737 		addressSpace == VMAddressSpace::Kernel()
4738 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4739 
4740 	while (true) {
4741 		context.addressSpaceLocker.Lock();
4742 
4743 		// get the area the fault was in
4744 		VMArea* area = addressSpace->LookupArea(address);
4745 		if (area == NULL) {
4746 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4747 				"space\n", originalAddress);
4748 			TPF(PageFaultError(-1,
4749 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4750 			status = B_BAD_ADDRESS;
4751 			break;
4752 		}
4753 
4754 		// check permissions
4755 		uint32 protection = get_area_page_protection(area, address);
4756 		if (isUser && (protection & B_USER_PROTECTION) == 0
4757 				&& (area->protection & B_KERNEL_AREA) != 0) {
4758 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4759 				area->id, (void*)originalAddress);
4760 			TPF(PageFaultError(area->id,
4761 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4762 			status = B_PERMISSION_DENIED;
4763 			break;
4764 		}
4765 		if (isWrite && (protection
4766 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4767 			dprintf("write access attempted on write-protected area 0x%"
4768 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4769 			TPF(PageFaultError(area->id,
4770 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4771 			status = B_PERMISSION_DENIED;
4772 			break;
4773 		} else if (isExecute && (protection
4774 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4775 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4776 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4777 			TPF(PageFaultError(area->id,
4778 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4779 			status = B_PERMISSION_DENIED;
4780 			break;
4781 		} else if (!isWrite && !isExecute && (protection
4782 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4783 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4784 				" at %p\n", area->id, (void*)originalAddress);
4785 			TPF(PageFaultError(area->id,
4786 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4787 			status = B_PERMISSION_DENIED;
4788 			break;
4789 		}
4790 
4791 		// We have the area, it was a valid access, so let's try to resolve the
4792 		// page fault now.
4793 		// At first, the top most cache from the area is investigated.
4794 
4795 		context.Prepare(vm_area_get_locked_cache(area),
4796 			address - area->Base() + area->cache_offset);
4797 
4798 		// See if this cache has a fault handler -- this will do all the work
4799 		// for us.
4800 		{
4801 			// Note, since the page fault is resolved with interrupts enabled,
4802 			// the fault handler could be called more than once for the same
4803 			// reason -- the store must take this into account.
4804 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4805 			if (status != B_BAD_HANDLER)
4806 				break;
4807 		}
4808 
4809 		// The top most cache has no fault handler, so let's see if the cache or
4810 		// its sources already have the page we're searching for (we're going
4811 		// from top to bottom).
4812 		status = fault_get_page(context);
4813 		if (status != B_OK) {
4814 			TPF(PageFaultError(area->id, status));
4815 			break;
4816 		}
4817 
4818 		if (context.restart)
4819 			continue;
4820 
4821 		// All went fine, all there is left to do is to map the page into the
4822 		// address space.
4823 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4824 			context.page));
4825 
4826 		// If the page doesn't reside in the area's cache, we need to make sure
4827 		// it's mapped in read-only, so that we cannot overwrite someone else's
4828 		// data (copy-on-write)
4829 		uint32 newProtection = protection;
4830 		if (context.page->Cache() != context.topCache && !isWrite)
4831 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4832 
4833 		bool unmapPage = false;
4834 		bool mapPage = true;
4835 
4836 		// check whether there's already a page mapped at the address
4837 		context.map->Lock();
4838 
4839 		phys_addr_t physicalAddress;
4840 		uint32 flags;
4841 		vm_page* mappedPage = NULL;
4842 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4843 			&& (flags & PAGE_PRESENT) != 0
4844 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4845 				!= NULL) {
4846 			// Yep there's already a page. If it's ours, we can simply adjust
4847 			// its protection. Otherwise we have to unmap it.
4848 			if (mappedPage == context.page) {
4849 				context.map->ProtectPage(area, address, newProtection);
4850 					// Note: We assume that ProtectPage() is atomic (i.e.
4851 					// the page isn't temporarily unmapped), otherwise we'd have
4852 					// to make sure it isn't wired.
4853 				mapPage = false;
4854 			} else
4855 				unmapPage = true;
4856 		}
4857 
4858 		context.map->Unlock();
4859 
4860 		if (unmapPage) {
4861 			// If the page is wired, we can't unmap it. Wait until it is unwired
4862 			// again and restart. Note that the page cannot be wired for
4863 			// writing, since it it isn't in the topmost cache. So we can safely
4864 			// ignore ranges wired for writing (our own and other concurrent
4865 			// wiring attempts in progress) and in fact have to do that to avoid
4866 			// a deadlock.
4867 			VMAreaUnwiredWaiter waiter;
4868 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4869 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4870 				// unlock everything and wait
4871 				if (context.pageAllocated) {
4872 					// ... but since we allocated a page and inserted it into
4873 					// the top cache, remove and free it first. Otherwise we'd
4874 					// have a page from a lower cache mapped while an upper
4875 					// cache has a page that would shadow it.
4876 					context.topCache->RemovePage(context.page);
4877 					vm_page_free_etc(context.topCache, context.page,
4878 						&context.reservation);
4879 				} else
4880 					DEBUG_PAGE_ACCESS_END(context.page);
4881 
4882 				context.UnlockAll();
4883 				waiter.waitEntry.Wait();
4884 				continue;
4885 			}
4886 
4887 			// Note: The mapped page is a page of a lower cache. We are
4888 			// guaranteed to have that cached locked, our new page is a copy of
4889 			// that page, and the page is not busy. The logic for that guarantee
4890 			// is as follows: Since the page is mapped, it must live in the top
4891 			// cache (ruled out above) or any of its lower caches, and there is
4892 			// (was before the new page was inserted) no other page in any
4893 			// cache between the top cache and the page's cache (otherwise that
4894 			// would be mapped instead). That in turn means that our algorithm
4895 			// must have found it and therefore it cannot be busy either.
4896 			DEBUG_PAGE_ACCESS_START(mappedPage);
4897 			unmap_page(area, address);
4898 			DEBUG_PAGE_ACCESS_END(mappedPage);
4899 		}
4900 
4901 		if (mapPage) {
4902 			if (map_page(area, context.page, address, newProtection,
4903 					&context.reservation) != B_OK) {
4904 				// Mapping can only fail, when the page mapping object couldn't
4905 				// be allocated. Save for the missing mapping everything is
4906 				// fine, though. If this was a regular page fault, we'll simply
4907 				// leave and probably fault again. To make sure we'll have more
4908 				// luck then, we ensure that the minimum object reserve is
4909 				// available.
4910 				DEBUG_PAGE_ACCESS_END(context.page);
4911 
4912 				context.UnlockAll();
4913 
4914 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4915 						!= B_OK) {
4916 					// Apparently the situation is serious. Let's get ourselves
4917 					// killed.
4918 					status = B_NO_MEMORY;
4919 				} else if (wirePage != NULL) {
4920 					// The caller expects us to wire the page. Since
4921 					// object_cache_reserve() succeeded, we should now be able
4922 					// to allocate a mapping structure. Restart.
4923 					continue;
4924 				}
4925 
4926 				break;
4927 			}
4928 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4929 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4930 
4931 		// also wire the page, if requested
4932 		if (wirePage != NULL && status == B_OK) {
4933 			increment_page_wired_count(context.page);
4934 			*wirePage = context.page;
4935 		}
4936 
4937 		DEBUG_PAGE_ACCESS_END(context.page);
4938 
4939 		break;
4940 	}
4941 
4942 	return status;
4943 }
4944 
4945 
4946 status_t
4947 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4948 {
4949 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4950 }
4951 
4952 status_t
4953 vm_put_physical_page(addr_t vaddr, void* handle)
4954 {
4955 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4956 }
4957 
4958 
4959 status_t
4960 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4961 	void** _handle)
4962 {
4963 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4964 }
4965 
4966 status_t
4967 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4968 {
4969 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4970 }
4971 
4972 
4973 status_t
4974 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4975 {
4976 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4977 }
4978 
4979 status_t
4980 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4981 {
4982 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4983 }
4984 
4985 
4986 void
4987 vm_get_info(system_info* info)
4988 {
4989 	swap_get_info(info);
4990 
4991 	MutexLocker locker(sAvailableMemoryLock);
4992 	info->needed_memory = sNeededMemory;
4993 	info->free_memory = sAvailableMemory;
4994 }
4995 
4996 
4997 uint32
4998 vm_num_page_faults(void)
4999 {
5000 	return sPageFaults;
5001 }
5002 
5003 
5004 off_t
5005 vm_available_memory(void)
5006 {
5007 	MutexLocker locker(sAvailableMemoryLock);
5008 	return sAvailableMemory;
5009 }
5010 
5011 
5012 off_t
5013 vm_available_not_needed_memory(void)
5014 {
5015 	MutexLocker locker(sAvailableMemoryLock);
5016 	return sAvailableMemory - sNeededMemory;
5017 }
5018 
5019 
5020 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5021 	debugger.
5022 */
5023 off_t
5024 vm_available_not_needed_memory_debug(void)
5025 {
5026 	return sAvailableMemory - sNeededMemory;
5027 }
5028 
5029 
5030 size_t
5031 vm_kernel_address_space_left(void)
5032 {
5033 	return VMAddressSpace::Kernel()->FreeSpace();
5034 }
5035 
5036 
5037 void
5038 vm_unreserve_memory(size_t amount)
5039 {
5040 	mutex_lock(&sAvailableMemoryLock);
5041 
5042 	sAvailableMemory += amount;
5043 
5044 	mutex_unlock(&sAvailableMemoryLock);
5045 }
5046 
5047 
5048 status_t
5049 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5050 {
5051 	size_t reserve = kMemoryReserveForPriority[priority];
5052 
5053 	MutexLocker locker(sAvailableMemoryLock);
5054 
5055 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5056 
5057 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5058 		sAvailableMemory -= amount;
5059 		return B_OK;
5060 	}
5061 
5062 	if (timeout <= 0)
5063 		return B_NO_MEMORY;
5064 
5065 	// turn timeout into an absolute timeout
5066 	timeout += system_time();
5067 
5068 	// loop until we've got the memory or the timeout occurs
5069 	do {
5070 		sNeededMemory += amount;
5071 
5072 		// call the low resource manager
5073 		locker.Unlock();
5074 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5075 			B_ABSOLUTE_TIMEOUT, timeout);
5076 		locker.Lock();
5077 
5078 		sNeededMemory -= amount;
5079 
5080 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5081 			sAvailableMemory -= amount;
5082 			return B_OK;
5083 		}
5084 	} while (timeout > system_time());
5085 
5086 	return B_NO_MEMORY;
5087 }
5088 
5089 
5090 status_t
5091 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5092 {
5093 	// NOTE: The caller is responsible for synchronizing calls to this function!
5094 
5095 	AddressSpaceReadLocker locker;
5096 	VMArea* area;
5097 	status_t status = locker.SetFromArea(id, area);
5098 	if (status != B_OK)
5099 		return status;
5100 
5101 	// nothing to do, if the type doesn't change
5102 	uint32 oldType = area->MemoryType();
5103 	if (type == oldType)
5104 		return B_OK;
5105 
5106 	// set the memory type of the area and the mapped pages
5107 	VMTranslationMap* map = area->address_space->TranslationMap();
5108 	map->Lock();
5109 	area->SetMemoryType(type);
5110 	map->ProtectArea(area, area->protection);
5111 	map->Unlock();
5112 
5113 	// set the physical memory type
5114 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5115 	if (error != B_OK) {
5116 		// reset the memory type of the area and the mapped pages
5117 		map->Lock();
5118 		area->SetMemoryType(oldType);
5119 		map->ProtectArea(area, area->protection);
5120 		map->Unlock();
5121 		return error;
5122 	}
5123 
5124 	return B_OK;
5125 
5126 }
5127 
5128 
5129 /*!	This function enforces some protection properties:
5130 	 - kernel areas must be W^X (after kernel startup)
5131 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5132 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5133 */
5134 static void
5135 fix_protection(uint32* protection)
5136 {
5137 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5138 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5139 			|| (*protection & B_WRITE_AREA) != 0)
5140 		&& !gKernelStartup)
5141 		panic("kernel areas cannot be both writable and executable!");
5142 
5143 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5144 		if ((*protection & B_WRITE_AREA) != 0)
5145 			*protection |= B_KERNEL_WRITE_AREA;
5146 		if ((*protection & B_READ_AREA) != 0)
5147 			*protection |= B_KERNEL_READ_AREA;
5148 	}
5149 }
5150 
5151 
5152 static void
5153 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5154 {
5155 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5156 	info->area = area->id;
5157 	info->address = (void*)area->Base();
5158 	info->size = area->Size();
5159 	info->protection = area->protection;
5160 	info->lock = area->wiring;
5161 	info->team = area->address_space->ID();
5162 	info->copy_count = 0;
5163 	info->in_count = 0;
5164 	info->out_count = 0;
5165 		// TODO: retrieve real values here!
5166 
5167 	VMCache* cache = vm_area_get_locked_cache(area);
5168 
5169 	// Note, this is a simplification; the cache could be larger than this area
5170 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5171 
5172 	vm_area_put_locked_cache(cache);
5173 }
5174 
5175 
5176 static status_t
5177 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5178 {
5179 	// is newSize a multiple of B_PAGE_SIZE?
5180 	if (newSize & (B_PAGE_SIZE - 1))
5181 		return B_BAD_VALUE;
5182 
5183 	// lock all affected address spaces and the cache
5184 	VMArea* area;
5185 	VMCache* cache;
5186 
5187 	MultiAddressSpaceLocker locker;
5188 	AreaCacheLocker cacheLocker;
5189 
5190 	status_t status;
5191 	size_t oldSize;
5192 	bool anyKernelArea;
5193 	bool restart;
5194 
5195 	do {
5196 		anyKernelArea = false;
5197 		restart = false;
5198 
5199 		locker.Unset();
5200 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5201 		if (status != B_OK)
5202 			return status;
5203 		cacheLocker.SetTo(cache, true);	// already locked
5204 
5205 		// enforce restrictions
5206 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5207 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5208 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5209 				"resize kernel area %" B_PRId32 " (%s)\n",
5210 				team_get_current_team_id(), areaID, area->name);
5211 			return B_NOT_ALLOWED;
5212 		}
5213 		// TODO: Enforce all restrictions (team, etc.)!
5214 
5215 		oldSize = area->Size();
5216 		if (newSize == oldSize)
5217 			return B_OK;
5218 
5219 		if (cache->type != CACHE_TYPE_RAM)
5220 			return B_NOT_ALLOWED;
5221 
5222 		if (oldSize < newSize) {
5223 			// We need to check if all areas of this cache can be resized.
5224 			for (VMArea* current = cache->areas; current != NULL;
5225 					current = current->cache_next) {
5226 				if (!current->address_space->CanResizeArea(current, newSize))
5227 					return B_ERROR;
5228 				anyKernelArea
5229 					|= current->address_space == VMAddressSpace::Kernel();
5230 			}
5231 		} else {
5232 			// We're shrinking the areas, so we must make sure the affected
5233 			// ranges are not wired.
5234 			for (VMArea* current = cache->areas; current != NULL;
5235 					current = current->cache_next) {
5236 				anyKernelArea
5237 					|= current->address_space == VMAddressSpace::Kernel();
5238 
5239 				if (wait_if_area_range_is_wired(current,
5240 						current->Base() + newSize, oldSize - newSize, &locker,
5241 						&cacheLocker)) {
5242 					restart = true;
5243 					break;
5244 				}
5245 			}
5246 		}
5247 	} while (restart);
5248 
5249 	// Okay, looks good so far, so let's do it
5250 
5251 	int priority = kernel && anyKernelArea
5252 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5253 	uint32 allocationFlags = kernel && anyKernelArea
5254 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5255 
5256 	if (oldSize < newSize) {
5257 		// Growing the cache can fail, so we do it first.
5258 		status = cache->Resize(cache->virtual_base + newSize, priority);
5259 		if (status != B_OK)
5260 			return status;
5261 	}
5262 
5263 	for (VMArea* current = cache->areas; current != NULL;
5264 			current = current->cache_next) {
5265 		status = current->address_space->ResizeArea(current, newSize,
5266 			allocationFlags);
5267 		if (status != B_OK)
5268 			break;
5269 
5270 		// We also need to unmap all pages beyond the new size, if the area has
5271 		// shrunk
5272 		if (newSize < oldSize) {
5273 			VMCacheChainLocker cacheChainLocker(cache);
5274 			cacheChainLocker.LockAllSourceCaches();
5275 
5276 			unmap_pages(current, current->Base() + newSize,
5277 				oldSize - newSize);
5278 
5279 			cacheChainLocker.Unlock(cache);
5280 		}
5281 	}
5282 
5283 	if (status == B_OK) {
5284 		// Shrink or grow individual page protections if in use.
5285 		if (area->page_protections != NULL) {
5286 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5287 			uint8* newProtections
5288 				= (uint8*)realloc(area->page_protections, bytes);
5289 			if (newProtections == NULL)
5290 				status = B_NO_MEMORY;
5291 			else {
5292 				area->page_protections = newProtections;
5293 
5294 				if (oldSize < newSize) {
5295 					// init the additional page protections to that of the area
5296 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5297 					uint32 areaProtection = area->protection
5298 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5299 					memset(area->page_protections + offset,
5300 						areaProtection | (areaProtection << 4), bytes - offset);
5301 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5302 						uint8& entry = area->page_protections[offset - 1];
5303 						entry = (entry & 0x0f) | (areaProtection << 4);
5304 					}
5305 				}
5306 			}
5307 		}
5308 	}
5309 
5310 	// shrinking the cache can't fail, so we do it now
5311 	if (status == B_OK && newSize < oldSize)
5312 		status = cache->Resize(cache->virtual_base + newSize, priority);
5313 
5314 	if (status != B_OK) {
5315 		// Something failed -- resize the areas back to their original size.
5316 		// This can fail, too, in which case we're seriously screwed.
5317 		for (VMArea* current = cache->areas; current != NULL;
5318 				current = current->cache_next) {
5319 			if (current->address_space->ResizeArea(current, oldSize,
5320 					allocationFlags) != B_OK) {
5321 				panic("vm_resize_area(): Failed and not being able to restore "
5322 					"original state.");
5323 			}
5324 		}
5325 
5326 		cache->Resize(cache->virtual_base + oldSize, priority);
5327 	}
5328 
5329 	// TODO: we must honour the lock restrictions of this area
5330 	return status;
5331 }
5332 
5333 
5334 status_t
5335 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5336 {
5337 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5338 }
5339 
5340 
5341 status_t
5342 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5343 {
5344 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5345 }
5346 
5347 
5348 status_t
5349 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5350 	bool user)
5351 {
5352 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5353 }
5354 
5355 
5356 void
5357 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5358 {
5359 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5360 }
5361 
5362 
5363 /*!	Copies a range of memory directly from/to a page that might not be mapped
5364 	at the moment.
5365 
5366 	For \a unsafeMemory the current mapping (if any is ignored). The function
5367 	walks through the respective area's cache chain to find the physical page
5368 	and copies from/to it directly.
5369 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5370 	must not cross a page boundary.
5371 
5372 	\param teamID The team ID identifying the address space \a unsafeMemory is
5373 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5374 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5375 		is passed, the address space of the thread returned by
5376 		debug_get_debugged_thread() is used.
5377 	\param unsafeMemory The start of the unsafe memory range to be copied
5378 		from/to.
5379 	\param buffer A safely accessible kernel buffer to be copied from/to.
5380 	\param size The number of bytes to be copied.
5381 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5382 		\a unsafeMemory, the other way around otherwise.
5383 */
5384 status_t
5385 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5386 	size_t size, bool copyToUnsafe)
5387 {
5388 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5389 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5390 		return B_BAD_VALUE;
5391 	}
5392 
5393 	// get the address space for the debugged thread
5394 	VMAddressSpace* addressSpace;
5395 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5396 		addressSpace = VMAddressSpace::Kernel();
5397 	} else if (teamID == B_CURRENT_TEAM) {
5398 		Thread* thread = debug_get_debugged_thread();
5399 		if (thread == NULL || thread->team == NULL)
5400 			return B_BAD_ADDRESS;
5401 
5402 		addressSpace = thread->team->address_space;
5403 	} else
5404 		addressSpace = VMAddressSpace::DebugGet(teamID);
5405 
5406 	if (addressSpace == NULL)
5407 		return B_BAD_ADDRESS;
5408 
5409 	// get the area
5410 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5411 	if (area == NULL)
5412 		return B_BAD_ADDRESS;
5413 
5414 	// search the page
5415 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5416 		+ area->cache_offset;
5417 	VMCache* cache = area->cache;
5418 	vm_page* page = NULL;
5419 	while (cache != NULL) {
5420 		page = cache->DebugLookupPage(cacheOffset);
5421 		if (page != NULL)
5422 			break;
5423 
5424 		// Page not found in this cache -- if it is paged out, we must not try
5425 		// to get it from lower caches.
5426 		if (cache->DebugHasPage(cacheOffset))
5427 			break;
5428 
5429 		cache = cache->source;
5430 	}
5431 
5432 	if (page == NULL)
5433 		return B_UNSUPPORTED;
5434 
5435 	// copy from/to physical memory
5436 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5437 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5438 
5439 	if (copyToUnsafe) {
5440 		if (page->Cache() != area->cache)
5441 			return B_UNSUPPORTED;
5442 
5443 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5444 	}
5445 
5446 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5447 }
5448 
5449 
5450 /** Validate that a memory range is either fully in kernel space, or fully in
5451  *  userspace */
5452 static inline bool
5453 validate_memory_range(const void* addr, size_t size)
5454 {
5455 	addr_t address = (addr_t)addr;
5456 
5457 	// Check for overflows on all addresses.
5458 	if ((address + size) < address)
5459 		return false;
5460 
5461 	// Validate that the address range does not cross the kernel/user boundary.
5462 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5463 }
5464 
5465 
5466 //	#pragma mark - kernel public API
5467 
5468 
5469 status_t
5470 user_memcpy(void* to, const void* from, size_t size)
5471 {
5472 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5473 		return B_BAD_ADDRESS;
5474 
5475 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5476 		return B_BAD_ADDRESS;
5477 
5478 	return B_OK;
5479 }
5480 
5481 
5482 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5483 	the string in \a to, NULL-terminating the result.
5484 
5485 	\param to Pointer to the destination C-string.
5486 	\param from Pointer to the source C-string.
5487 	\param size Size in bytes of the string buffer pointed to by \a to.
5488 
5489 	\return strlen(\a from).
5490 */
5491 ssize_t
5492 user_strlcpy(char* to, const char* from, size_t size)
5493 {
5494 	if (to == NULL && size != 0)
5495 		return B_BAD_VALUE;
5496 	if (from == NULL)
5497 		return B_BAD_ADDRESS;
5498 
5499 	// Protect the source address from overflows.
5500 	size_t maxSize = size;
5501 	if ((addr_t)from + maxSize < (addr_t)from)
5502 		maxSize -= (addr_t)from + maxSize;
5503 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5504 		maxSize = USER_TOP - (addr_t)from;
5505 
5506 	if (!validate_memory_range(to, maxSize))
5507 		return B_BAD_ADDRESS;
5508 
5509 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5510 	if (result < 0)
5511 		return result;
5512 
5513 	// If we hit the address overflow boundary, fail.
5514 	if ((size_t)result >= maxSize && maxSize < size)
5515 		return B_BAD_ADDRESS;
5516 
5517 	return result;
5518 }
5519 
5520 
5521 status_t
5522 user_memset(void* s, char c, size_t count)
5523 {
5524 	if (!validate_memory_range(s, count))
5525 		return B_BAD_ADDRESS;
5526 
5527 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5528 		return B_BAD_ADDRESS;
5529 
5530 	return B_OK;
5531 }
5532 
5533 
5534 /*!	Wires a single page at the given address.
5535 
5536 	\param team The team whose address space the address belongs to. Supports
5537 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5538 		parameter is ignored.
5539 	\param address address The virtual address to wire down. Does not need to
5540 		be page aligned.
5541 	\param writable If \c true the page shall be writable.
5542 	\param info On success the info is filled in, among other things
5543 		containing the physical address the given virtual one translates to.
5544 	\return \c B_OK, when the page could be wired, another error code otherwise.
5545 */
5546 status_t
5547 vm_wire_page(team_id team, addr_t address, bool writable,
5548 	VMPageWiringInfo* info)
5549 {
5550 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5551 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5552 
5553 	// compute the page protection that is required
5554 	bool isUser = IS_USER_ADDRESS(address);
5555 	uint32 requiredProtection = PAGE_PRESENT
5556 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5557 	if (writable)
5558 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5559 
5560 	// get and read lock the address space
5561 	VMAddressSpace* addressSpace = NULL;
5562 	if (isUser) {
5563 		if (team == B_CURRENT_TEAM)
5564 			addressSpace = VMAddressSpace::GetCurrent();
5565 		else
5566 			addressSpace = VMAddressSpace::Get(team);
5567 	} else
5568 		addressSpace = VMAddressSpace::GetKernel();
5569 	if (addressSpace == NULL)
5570 		return B_ERROR;
5571 
5572 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5573 
5574 	VMTranslationMap* map = addressSpace->TranslationMap();
5575 	status_t error = B_OK;
5576 
5577 	// get the area
5578 	VMArea* area = addressSpace->LookupArea(pageAddress);
5579 	if (area == NULL) {
5580 		addressSpace->Put();
5581 		return B_BAD_ADDRESS;
5582 	}
5583 
5584 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5585 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5586 
5587 	// mark the area range wired
5588 	area->Wire(&info->range);
5589 
5590 	// Lock the area's cache chain and the translation map. Needed to look
5591 	// up the page and play with its wired count.
5592 	cacheChainLocker.LockAllSourceCaches();
5593 	map->Lock();
5594 
5595 	phys_addr_t physicalAddress;
5596 	uint32 flags;
5597 	vm_page* page;
5598 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5599 		&& (flags & requiredProtection) == requiredProtection
5600 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5601 			!= NULL) {
5602 		// Already mapped with the correct permissions -- just increment
5603 		// the page's wired count.
5604 		increment_page_wired_count(page);
5605 
5606 		map->Unlock();
5607 		cacheChainLocker.Unlock();
5608 		addressSpaceLocker.Unlock();
5609 	} else {
5610 		// Let vm_soft_fault() map the page for us, if possible. We need
5611 		// to fully unlock to avoid deadlocks. Since we have already
5612 		// wired the area itself, nothing disturbing will happen with it
5613 		// in the meantime.
5614 		map->Unlock();
5615 		cacheChainLocker.Unlock();
5616 		addressSpaceLocker.Unlock();
5617 
5618 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5619 			isUser, &page);
5620 
5621 		if (error != B_OK) {
5622 			// The page could not be mapped -- clean up.
5623 			VMCache* cache = vm_area_get_locked_cache(area);
5624 			area->Unwire(&info->range);
5625 			cache->ReleaseRefAndUnlock();
5626 			addressSpace->Put();
5627 			return error;
5628 		}
5629 	}
5630 
5631 	info->physicalAddress
5632 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5633 			+ address % B_PAGE_SIZE;
5634 	info->page = page;
5635 
5636 	return B_OK;
5637 }
5638 
5639 
5640 /*!	Unwires a single page previously wired via vm_wire_page().
5641 
5642 	\param info The same object passed to vm_wire_page() before.
5643 */
5644 void
5645 vm_unwire_page(VMPageWiringInfo* info)
5646 {
5647 	// lock the address space
5648 	VMArea* area = info->range.area;
5649 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5650 		// takes over our reference
5651 
5652 	// lock the top cache
5653 	VMCache* cache = vm_area_get_locked_cache(area);
5654 	VMCacheChainLocker cacheChainLocker(cache);
5655 
5656 	if (info->page->Cache() != cache) {
5657 		// The page is not in the top cache, so we lock the whole cache chain
5658 		// before touching the page's wired count.
5659 		cacheChainLocker.LockAllSourceCaches();
5660 	}
5661 
5662 	decrement_page_wired_count(info->page);
5663 
5664 	// remove the wired range from the range
5665 	area->Unwire(&info->range);
5666 
5667 	cacheChainLocker.Unlock();
5668 }
5669 
5670 
5671 /*!	Wires down the given address range in the specified team's address space.
5672 
5673 	If successful the function
5674 	- acquires a reference to the specified team's address space,
5675 	- adds respective wired ranges to all areas that intersect with the given
5676 	  address range,
5677 	- makes sure all pages in the given address range are mapped with the
5678 	  requested access permissions and increments their wired count.
5679 
5680 	It fails, when \a team doesn't specify a valid address space, when any part
5681 	of the specified address range is not covered by areas, when the concerned
5682 	areas don't allow mapping with the requested permissions, or when mapping
5683 	failed for another reason.
5684 
5685 	When successful the call must be balanced by a unlock_memory_etc() call with
5686 	the exact same parameters.
5687 
5688 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5689 		supported.
5690 	\param address The start of the address range to be wired.
5691 	\param numBytes The size of the address range to be wired.
5692 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5693 		requests that the range must be wired writable ("read from device
5694 		into memory").
5695 	\return \c B_OK on success, another error code otherwise.
5696 */
5697 status_t
5698 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5699 {
5700 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5701 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5702 
5703 	// compute the page protection that is required
5704 	bool isUser = IS_USER_ADDRESS(address);
5705 	bool writable = (flags & B_READ_DEVICE) == 0;
5706 	uint32 requiredProtection = PAGE_PRESENT
5707 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5708 	if (writable)
5709 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5710 
5711 	uint32 mallocFlags = isUser
5712 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5713 
5714 	// get and read lock the address space
5715 	VMAddressSpace* addressSpace = NULL;
5716 	if (isUser) {
5717 		if (team == B_CURRENT_TEAM)
5718 			addressSpace = VMAddressSpace::GetCurrent();
5719 		else
5720 			addressSpace = VMAddressSpace::Get(team);
5721 	} else
5722 		addressSpace = VMAddressSpace::GetKernel();
5723 	if (addressSpace == NULL)
5724 		return B_ERROR;
5725 
5726 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5727 		// We get a new address space reference here. The one we got above will
5728 		// be freed by unlock_memory_etc().
5729 
5730 	VMTranslationMap* map = addressSpace->TranslationMap();
5731 	status_t error = B_OK;
5732 
5733 	// iterate through all concerned areas
5734 	addr_t nextAddress = lockBaseAddress;
5735 	while (nextAddress != lockEndAddress) {
5736 		// get the next area
5737 		VMArea* area = addressSpace->LookupArea(nextAddress);
5738 		if (area == NULL) {
5739 			error = B_BAD_ADDRESS;
5740 			break;
5741 		}
5742 
5743 		addr_t areaStart = nextAddress;
5744 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5745 
5746 		// allocate the wired range (do that before locking the cache to avoid
5747 		// deadlocks)
5748 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5749 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5750 		if (range == NULL) {
5751 			error = B_NO_MEMORY;
5752 			break;
5753 		}
5754 
5755 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5756 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5757 
5758 		// mark the area range wired
5759 		area->Wire(range);
5760 
5761 		// Depending on the area cache type and the wiring, we may not need to
5762 		// look at the individual pages.
5763 		if (area->cache_type == CACHE_TYPE_NULL
5764 			|| area->cache_type == CACHE_TYPE_DEVICE
5765 			|| area->wiring == B_FULL_LOCK
5766 			|| area->wiring == B_CONTIGUOUS) {
5767 			nextAddress = areaEnd;
5768 			continue;
5769 		}
5770 
5771 		// Lock the area's cache chain and the translation map. Needed to look
5772 		// up pages and play with their wired count.
5773 		cacheChainLocker.LockAllSourceCaches();
5774 		map->Lock();
5775 
5776 		// iterate through the pages and wire them
5777 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5778 			phys_addr_t physicalAddress;
5779 			uint32 flags;
5780 
5781 			vm_page* page;
5782 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5783 				&& (flags & requiredProtection) == requiredProtection
5784 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5785 					!= NULL) {
5786 				// Already mapped with the correct permissions -- just increment
5787 				// the page's wired count.
5788 				increment_page_wired_count(page);
5789 			} else {
5790 				// Let vm_soft_fault() map the page for us, if possible. We need
5791 				// to fully unlock to avoid deadlocks. Since we have already
5792 				// wired the area itself, nothing disturbing will happen with it
5793 				// in the meantime.
5794 				map->Unlock();
5795 				cacheChainLocker.Unlock();
5796 				addressSpaceLocker.Unlock();
5797 
5798 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5799 					false, isUser, &page);
5800 
5801 				addressSpaceLocker.Lock();
5802 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5803 				cacheChainLocker.LockAllSourceCaches();
5804 				map->Lock();
5805 			}
5806 
5807 			if (error != B_OK)
5808 				break;
5809 		}
5810 
5811 		map->Unlock();
5812 
5813 		if (error == B_OK) {
5814 			cacheChainLocker.Unlock();
5815 		} else {
5816 			// An error occurred, so abort right here. If the current address
5817 			// is the first in this area, unwire the area, since we won't get
5818 			// to it when reverting what we've done so far.
5819 			if (nextAddress == areaStart) {
5820 				area->Unwire(range);
5821 				cacheChainLocker.Unlock();
5822 				range->~VMAreaWiredRange();
5823 				free_etc(range, mallocFlags);
5824 			} else
5825 				cacheChainLocker.Unlock();
5826 
5827 			break;
5828 		}
5829 	}
5830 
5831 	if (error != B_OK) {
5832 		// An error occurred, so unwire all that we've already wired. Note that
5833 		// even if not a single page was wired, unlock_memory_etc() is called
5834 		// to put the address space reference.
5835 		addressSpaceLocker.Unlock();
5836 		unlock_memory_etc(team, (void*)lockBaseAddress,
5837 			nextAddress - lockBaseAddress, flags);
5838 	}
5839 
5840 	return error;
5841 }
5842 
5843 
5844 status_t
5845 lock_memory(void* address, size_t numBytes, uint32 flags)
5846 {
5847 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5848 }
5849 
5850 
5851 /*!	Unwires an address range previously wired with lock_memory_etc().
5852 
5853 	Note that a call to this function must balance a previous lock_memory_etc()
5854 	call with exactly the same parameters.
5855 */
5856 status_t
5857 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5858 {
5859 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5860 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5861 
5862 	// compute the page protection that is required
5863 	bool isUser = IS_USER_ADDRESS(address);
5864 	bool writable = (flags & B_READ_DEVICE) == 0;
5865 	uint32 requiredProtection = PAGE_PRESENT
5866 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5867 	if (writable)
5868 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5869 
5870 	uint32 mallocFlags = isUser
5871 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5872 
5873 	// get and read lock the address space
5874 	VMAddressSpace* addressSpace = NULL;
5875 	if (isUser) {
5876 		if (team == B_CURRENT_TEAM)
5877 			addressSpace = VMAddressSpace::GetCurrent();
5878 		else
5879 			addressSpace = VMAddressSpace::Get(team);
5880 	} else
5881 		addressSpace = VMAddressSpace::GetKernel();
5882 	if (addressSpace == NULL)
5883 		return B_ERROR;
5884 
5885 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5886 		// Take over the address space reference. We don't unlock until we're
5887 		// done.
5888 
5889 	VMTranslationMap* map = addressSpace->TranslationMap();
5890 	status_t error = B_OK;
5891 
5892 	// iterate through all concerned areas
5893 	addr_t nextAddress = lockBaseAddress;
5894 	while (nextAddress != lockEndAddress) {
5895 		// get the next area
5896 		VMArea* area = addressSpace->LookupArea(nextAddress);
5897 		if (area == NULL) {
5898 			error = B_BAD_ADDRESS;
5899 			break;
5900 		}
5901 
5902 		addr_t areaStart = nextAddress;
5903 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5904 
5905 		// Lock the area's top cache. This is a requirement for
5906 		// VMArea::Unwire().
5907 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5908 
5909 		// Depending on the area cache type and the wiring, we may not need to
5910 		// look at the individual pages.
5911 		if (area->cache_type == CACHE_TYPE_NULL
5912 			|| area->cache_type == CACHE_TYPE_DEVICE
5913 			|| area->wiring == B_FULL_LOCK
5914 			|| area->wiring == B_CONTIGUOUS) {
5915 			// unwire the range (to avoid deadlocks we delete the range after
5916 			// unlocking the cache)
5917 			nextAddress = areaEnd;
5918 			VMAreaWiredRange* range = area->Unwire(areaStart,
5919 				areaEnd - areaStart, writable);
5920 			cacheChainLocker.Unlock();
5921 			if (range != NULL) {
5922 				range->~VMAreaWiredRange();
5923 				free_etc(range, mallocFlags);
5924 			}
5925 			continue;
5926 		}
5927 
5928 		// Lock the area's cache chain and the translation map. Needed to look
5929 		// up pages and play with their wired count.
5930 		cacheChainLocker.LockAllSourceCaches();
5931 		map->Lock();
5932 
5933 		// iterate through the pages and unwire them
5934 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5935 			phys_addr_t physicalAddress;
5936 			uint32 flags;
5937 
5938 			vm_page* page;
5939 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5940 				&& (flags & PAGE_PRESENT) != 0
5941 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5942 					!= NULL) {
5943 				// Already mapped with the correct permissions -- just increment
5944 				// the page's wired count.
5945 				decrement_page_wired_count(page);
5946 			} else {
5947 				panic("unlock_memory_etc(): Failed to unwire page: address "
5948 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5949 					nextAddress);
5950 				error = B_BAD_VALUE;
5951 				break;
5952 			}
5953 		}
5954 
5955 		map->Unlock();
5956 
5957 		// All pages are unwired. Remove the area's wired range as well (to
5958 		// avoid deadlocks we delete the range after unlocking the cache).
5959 		VMAreaWiredRange* range = area->Unwire(areaStart,
5960 			areaEnd - areaStart, writable);
5961 
5962 		cacheChainLocker.Unlock();
5963 
5964 		if (range != NULL) {
5965 			range->~VMAreaWiredRange();
5966 			free_etc(range, mallocFlags);
5967 		}
5968 
5969 		if (error != B_OK)
5970 			break;
5971 	}
5972 
5973 	// get rid of the address space reference lock_memory_etc() acquired
5974 	addressSpace->Put();
5975 
5976 	return error;
5977 }
5978 
5979 
5980 status_t
5981 unlock_memory(void* address, size_t numBytes, uint32 flags)
5982 {
5983 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5984 }
5985 
5986 
5987 /*!	Similar to get_memory_map(), but also allows to specify the address space
5988 	for the memory in question and has a saner semantics.
5989 	Returns \c B_OK when the complete range could be translated or
5990 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5991 	case the actual number of entries is written to \c *_numEntries. Any other
5992 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5993 	in this case.
5994 */
5995 status_t
5996 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5997 	physical_entry* table, uint32* _numEntries)
5998 {
5999 	uint32 numEntries = *_numEntries;
6000 	*_numEntries = 0;
6001 
6002 	VMAddressSpace* addressSpace;
6003 	addr_t virtualAddress = (addr_t)address;
6004 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6005 	phys_addr_t physicalAddress;
6006 	status_t status = B_OK;
6007 	int32 index = -1;
6008 	addr_t offset = 0;
6009 	bool interrupts = are_interrupts_enabled();
6010 
6011 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6012 		"entries)\n", team, address, numBytes, numEntries));
6013 
6014 	if (numEntries == 0 || numBytes == 0)
6015 		return B_BAD_VALUE;
6016 
6017 	// in which address space is the address to be found?
6018 	if (IS_USER_ADDRESS(virtualAddress)) {
6019 		if (team == B_CURRENT_TEAM)
6020 			addressSpace = VMAddressSpace::GetCurrent();
6021 		else
6022 			addressSpace = VMAddressSpace::Get(team);
6023 	} else
6024 		addressSpace = VMAddressSpace::GetKernel();
6025 
6026 	if (addressSpace == NULL)
6027 		return B_ERROR;
6028 
6029 	VMTranslationMap* map = addressSpace->TranslationMap();
6030 
6031 	if (interrupts)
6032 		map->Lock();
6033 
6034 	while (offset < numBytes) {
6035 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6036 		uint32 flags;
6037 
6038 		if (interrupts) {
6039 			status = map->Query((addr_t)address + offset, &physicalAddress,
6040 				&flags);
6041 		} else {
6042 			status = map->QueryInterrupt((addr_t)address + offset,
6043 				&physicalAddress, &flags);
6044 		}
6045 		if (status < B_OK)
6046 			break;
6047 		if ((flags & PAGE_PRESENT) == 0) {
6048 			panic("get_memory_map() called on unmapped memory!");
6049 			return B_BAD_ADDRESS;
6050 		}
6051 
6052 		if (index < 0 && pageOffset > 0) {
6053 			physicalAddress += pageOffset;
6054 			if (bytes > B_PAGE_SIZE - pageOffset)
6055 				bytes = B_PAGE_SIZE - pageOffset;
6056 		}
6057 
6058 		// need to switch to the next physical_entry?
6059 		if (index < 0 || table[index].address
6060 				!= physicalAddress - table[index].size) {
6061 			if ((uint32)++index + 1 > numEntries) {
6062 				// table to small
6063 				break;
6064 			}
6065 			table[index].address = physicalAddress;
6066 			table[index].size = bytes;
6067 		} else {
6068 			// page does fit in current entry
6069 			table[index].size += bytes;
6070 		}
6071 
6072 		offset += bytes;
6073 	}
6074 
6075 	if (interrupts)
6076 		map->Unlock();
6077 
6078 	if (status != B_OK)
6079 		return status;
6080 
6081 	if ((uint32)index + 1 > numEntries) {
6082 		*_numEntries = index;
6083 		return B_BUFFER_OVERFLOW;
6084 	}
6085 
6086 	*_numEntries = index + 1;
6087 	return B_OK;
6088 }
6089 
6090 
6091 /*!	According to the BeBook, this function should always succeed.
6092 	This is no longer the case.
6093 */
6094 extern "C" int32
6095 __get_memory_map_haiku(const void* address, size_t numBytes,
6096 	physical_entry* table, int32 numEntries)
6097 {
6098 	uint32 entriesRead = numEntries;
6099 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6100 		table, &entriesRead);
6101 	if (error != B_OK)
6102 		return error;
6103 
6104 	// close the entry list
6105 
6106 	// if it's only one entry, we will silently accept the missing ending
6107 	if (numEntries == 1)
6108 		return B_OK;
6109 
6110 	if (entriesRead + 1 > (uint32)numEntries)
6111 		return B_BUFFER_OVERFLOW;
6112 
6113 	table[entriesRead].address = 0;
6114 	table[entriesRead].size = 0;
6115 
6116 	return B_OK;
6117 }
6118 
6119 
6120 area_id
6121 area_for(void* address)
6122 {
6123 	return vm_area_for((addr_t)address, true);
6124 }
6125 
6126 
6127 area_id
6128 find_area(const char* name)
6129 {
6130 	return VMAreas::Find(name);
6131 }
6132 
6133 
6134 status_t
6135 _get_area_info(area_id id, area_info* info, size_t size)
6136 {
6137 	if (size != sizeof(area_info) || info == NULL)
6138 		return B_BAD_VALUE;
6139 
6140 	AddressSpaceReadLocker locker;
6141 	VMArea* area;
6142 	status_t status = locker.SetFromArea(id, area);
6143 	if (status != B_OK)
6144 		return status;
6145 
6146 	fill_area_info(area, info, size);
6147 	return B_OK;
6148 }
6149 
6150 
6151 status_t
6152 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6153 {
6154 	addr_t nextBase = *(addr_t*)cookie;
6155 
6156 	// we're already through the list
6157 	if (nextBase == (addr_t)-1)
6158 		return B_ENTRY_NOT_FOUND;
6159 
6160 	if (team == B_CURRENT_TEAM)
6161 		team = team_get_current_team_id();
6162 
6163 	AddressSpaceReadLocker locker(team);
6164 	if (!locker.IsLocked())
6165 		return B_BAD_TEAM_ID;
6166 
6167 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6168 	if (area == NULL) {
6169 		nextBase = (addr_t)-1;
6170 		return B_ENTRY_NOT_FOUND;
6171 	}
6172 
6173 	fill_area_info(area, info, size);
6174 	*cookie = (ssize_t)(area->Base() + 1);
6175 
6176 	return B_OK;
6177 }
6178 
6179 
6180 status_t
6181 set_area_protection(area_id area, uint32 newProtection)
6182 {
6183 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6184 		newProtection, true);
6185 }
6186 
6187 
6188 status_t
6189 resize_area(area_id areaID, size_t newSize)
6190 {
6191 	return vm_resize_area(areaID, newSize, true);
6192 }
6193 
6194 
6195 /*!	Transfers the specified area to a new team. The caller must be the owner
6196 	of the area.
6197 */
6198 area_id
6199 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6200 	bool kernel)
6201 {
6202 	area_info info;
6203 	status_t status = get_area_info(id, &info);
6204 	if (status != B_OK)
6205 		return status;
6206 
6207 	if (info.team != thread_get_current_thread()->team->id)
6208 		return B_PERMISSION_DENIED;
6209 
6210 	// We need to mark the area cloneable so the following operations work.
6211 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6212 	if (status != B_OK)
6213 		return status;
6214 
6215 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6216 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6217 	if (clonedArea < 0)
6218 		return clonedArea;
6219 
6220 	status = vm_delete_area(info.team, id, kernel);
6221 	if (status != B_OK) {
6222 		vm_delete_area(target, clonedArea, kernel);
6223 		return status;
6224 	}
6225 
6226 	// Now we can reset the protection to whatever it was before.
6227 	set_area_protection(clonedArea, info.protection);
6228 
6229 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6230 
6231 	return clonedArea;
6232 }
6233 
6234 
6235 extern "C" area_id
6236 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6237 	size_t numBytes, uint32 addressSpec, uint32 protection,
6238 	void** _virtualAddress)
6239 {
6240 	if (!arch_vm_supports_protection(protection))
6241 		return B_NOT_SUPPORTED;
6242 
6243 	fix_protection(&protection);
6244 
6245 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6246 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6247 		false);
6248 }
6249 
6250 
6251 area_id
6252 clone_area(const char* name, void** _address, uint32 addressSpec,
6253 	uint32 protection, area_id source)
6254 {
6255 	if ((protection & B_KERNEL_PROTECTION) == 0)
6256 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6257 
6258 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6259 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6260 }
6261 
6262 
6263 area_id
6264 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6265 	uint32 protection, uint32 flags, uint32 guardSize,
6266 	const virtual_address_restrictions* virtualAddressRestrictions,
6267 	const physical_address_restrictions* physicalAddressRestrictions,
6268 	void** _address)
6269 {
6270 	fix_protection(&protection);
6271 
6272 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6273 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6274 		true, _address);
6275 }
6276 
6277 
6278 extern "C" area_id
6279 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6280 	size_t size, uint32 lock, uint32 protection)
6281 {
6282 	fix_protection(&protection);
6283 
6284 	virtual_address_restrictions virtualRestrictions = {};
6285 	virtualRestrictions.address = *_address;
6286 	virtualRestrictions.address_specification = addressSpec;
6287 	physical_address_restrictions physicalRestrictions = {};
6288 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6289 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6290 		true, _address);
6291 }
6292 
6293 
6294 status_t
6295 delete_area(area_id area)
6296 {
6297 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6298 }
6299 
6300 
6301 //	#pragma mark - Userland syscalls
6302 
6303 
6304 status_t
6305 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6306 	addr_t size)
6307 {
6308 	// filter out some unavailable values (for userland)
6309 	switch (addressSpec) {
6310 		case B_ANY_KERNEL_ADDRESS:
6311 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6312 			return B_BAD_VALUE;
6313 	}
6314 
6315 	addr_t address;
6316 
6317 	if (!IS_USER_ADDRESS(userAddress)
6318 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6319 		return B_BAD_ADDRESS;
6320 
6321 	status_t status = vm_reserve_address_range(
6322 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6323 		RESERVED_AVOID_BASE);
6324 	if (status != B_OK)
6325 		return status;
6326 
6327 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6328 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6329 			(void*)address, size);
6330 		return B_BAD_ADDRESS;
6331 	}
6332 
6333 	return B_OK;
6334 }
6335 
6336 
6337 status_t
6338 _user_unreserve_address_range(addr_t address, addr_t size)
6339 {
6340 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6341 		(void*)address, size);
6342 }
6343 
6344 
6345 area_id
6346 _user_area_for(void* address)
6347 {
6348 	return vm_area_for((addr_t)address, false);
6349 }
6350 
6351 
6352 area_id
6353 _user_find_area(const char* userName)
6354 {
6355 	char name[B_OS_NAME_LENGTH];
6356 
6357 	if (!IS_USER_ADDRESS(userName)
6358 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6359 		return B_BAD_ADDRESS;
6360 
6361 	return find_area(name);
6362 }
6363 
6364 
6365 status_t
6366 _user_get_area_info(area_id area, area_info* userInfo)
6367 {
6368 	if (!IS_USER_ADDRESS(userInfo))
6369 		return B_BAD_ADDRESS;
6370 
6371 	area_info info;
6372 	status_t status = get_area_info(area, &info);
6373 	if (status < B_OK)
6374 		return status;
6375 
6376 	// TODO: do we want to prevent userland from seeing kernel protections?
6377 	//info.protection &= B_USER_PROTECTION;
6378 
6379 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6380 		return B_BAD_ADDRESS;
6381 
6382 	return status;
6383 }
6384 
6385 
6386 status_t
6387 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6388 {
6389 	ssize_t cookie;
6390 
6391 	if (!IS_USER_ADDRESS(userCookie)
6392 		|| !IS_USER_ADDRESS(userInfo)
6393 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6394 		return B_BAD_ADDRESS;
6395 
6396 	area_info info;
6397 	status_t status = _get_next_area_info(team, &cookie, &info,
6398 		sizeof(area_info));
6399 	if (status != B_OK)
6400 		return status;
6401 
6402 	//info.protection &= B_USER_PROTECTION;
6403 
6404 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6405 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6406 		return B_BAD_ADDRESS;
6407 
6408 	return status;
6409 }
6410 
6411 
6412 status_t
6413 _user_set_area_protection(area_id area, uint32 newProtection)
6414 {
6415 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6416 		return B_BAD_VALUE;
6417 
6418 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6419 		newProtection, false);
6420 }
6421 
6422 
6423 status_t
6424 _user_resize_area(area_id area, size_t newSize)
6425 {
6426 	// TODO: Since we restrict deleting of areas to those owned by the team,
6427 	// we should also do that for resizing (check other functions, too).
6428 	return vm_resize_area(area, newSize, false);
6429 }
6430 
6431 
6432 area_id
6433 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6434 	team_id target)
6435 {
6436 	// filter out some unavailable values (for userland)
6437 	switch (addressSpec) {
6438 		case B_ANY_KERNEL_ADDRESS:
6439 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6440 			return B_BAD_VALUE;
6441 	}
6442 
6443 	void* address;
6444 	if (!IS_USER_ADDRESS(userAddress)
6445 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6446 		return B_BAD_ADDRESS;
6447 
6448 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6449 	if (newArea < B_OK)
6450 		return newArea;
6451 
6452 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6453 		return B_BAD_ADDRESS;
6454 
6455 	return newArea;
6456 }
6457 
6458 
6459 area_id
6460 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6461 	uint32 protection, area_id sourceArea)
6462 {
6463 	char name[B_OS_NAME_LENGTH];
6464 	void* address;
6465 
6466 	// filter out some unavailable values (for userland)
6467 	switch (addressSpec) {
6468 		case B_ANY_KERNEL_ADDRESS:
6469 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6470 			return B_BAD_VALUE;
6471 	}
6472 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6473 		return B_BAD_VALUE;
6474 
6475 	if (!IS_USER_ADDRESS(userName)
6476 		|| !IS_USER_ADDRESS(userAddress)
6477 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6478 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6479 		return B_BAD_ADDRESS;
6480 
6481 	fix_protection(&protection);
6482 
6483 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6484 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6485 		false);
6486 	if (clonedArea < B_OK)
6487 		return clonedArea;
6488 
6489 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6490 		delete_area(clonedArea);
6491 		return B_BAD_ADDRESS;
6492 	}
6493 
6494 	return clonedArea;
6495 }
6496 
6497 
6498 area_id
6499 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6500 	size_t size, uint32 lock, uint32 protection)
6501 {
6502 	char name[B_OS_NAME_LENGTH];
6503 	void* address;
6504 
6505 	// filter out some unavailable values (for userland)
6506 	switch (addressSpec) {
6507 		case B_ANY_KERNEL_ADDRESS:
6508 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6509 			return B_BAD_VALUE;
6510 	}
6511 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6512 		return B_BAD_VALUE;
6513 
6514 	if (!IS_USER_ADDRESS(userName)
6515 		|| !IS_USER_ADDRESS(userAddress)
6516 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6517 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6518 		return B_BAD_ADDRESS;
6519 
6520 	if (addressSpec == B_EXACT_ADDRESS
6521 		&& IS_KERNEL_ADDRESS(address))
6522 		return B_BAD_VALUE;
6523 
6524 	if (addressSpec == B_ANY_ADDRESS)
6525 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6526 	if (addressSpec == B_BASE_ADDRESS)
6527 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6528 
6529 	fix_protection(&protection);
6530 
6531 	virtual_address_restrictions virtualRestrictions = {};
6532 	virtualRestrictions.address = address;
6533 	virtualRestrictions.address_specification = addressSpec;
6534 	physical_address_restrictions physicalRestrictions = {};
6535 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6536 		size, lock, protection, 0, 0, &virtualRestrictions,
6537 		&physicalRestrictions, false, &address);
6538 
6539 	if (area >= B_OK
6540 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6541 		delete_area(area);
6542 		return B_BAD_ADDRESS;
6543 	}
6544 
6545 	return area;
6546 }
6547 
6548 
6549 status_t
6550 _user_delete_area(area_id area)
6551 {
6552 	// Unlike the BeOS implementation, you can now only delete areas
6553 	// that you have created yourself from userland.
6554 	// The documentation to delete_area() explicitly states that this
6555 	// will be restricted in the future, and so it will.
6556 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6557 }
6558 
6559 
6560 // TODO: create a BeOS style call for this!
6561 
6562 area_id
6563 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6564 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6565 	int fd, off_t offset)
6566 {
6567 	char name[B_OS_NAME_LENGTH];
6568 	void* address;
6569 	area_id area;
6570 
6571 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6572 		return B_BAD_VALUE;
6573 
6574 	fix_protection(&protection);
6575 
6576 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6577 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6578 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6579 		return B_BAD_ADDRESS;
6580 
6581 	if (addressSpec == B_EXACT_ADDRESS) {
6582 		if ((addr_t)address + size < (addr_t)address
6583 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6584 			return B_BAD_VALUE;
6585 		}
6586 		if (!IS_USER_ADDRESS(address)
6587 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6588 			return B_BAD_ADDRESS;
6589 		}
6590 	}
6591 
6592 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6593 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6594 		false);
6595 	if (area < B_OK)
6596 		return area;
6597 
6598 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6599 		return B_BAD_ADDRESS;
6600 
6601 	return area;
6602 }
6603 
6604 
6605 status_t
6606 _user_unmap_memory(void* _address, size_t size)
6607 {
6608 	addr_t address = (addr_t)_address;
6609 
6610 	// check params
6611 	if (size == 0 || (addr_t)address + size < (addr_t)address
6612 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6613 		return B_BAD_VALUE;
6614 	}
6615 
6616 	if (!IS_USER_ADDRESS(address)
6617 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6618 		return B_BAD_ADDRESS;
6619 	}
6620 
6621 	// Write lock the address space and ensure the address range is not wired.
6622 	AddressSpaceWriteLocker locker;
6623 	do {
6624 		status_t status = locker.SetTo(team_get_current_team_id());
6625 		if (status != B_OK)
6626 			return status;
6627 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6628 			size, &locker));
6629 
6630 	// unmap
6631 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6632 }
6633 
6634 
6635 status_t
6636 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6637 {
6638 	// check address range
6639 	addr_t address = (addr_t)_address;
6640 	size = PAGE_ALIGN(size);
6641 
6642 	if ((address % B_PAGE_SIZE) != 0)
6643 		return B_BAD_VALUE;
6644 	if (!is_user_address_range(_address, size)) {
6645 		// weird error code required by POSIX
6646 		return ENOMEM;
6647 	}
6648 
6649 	// extend and check protection
6650 	if ((protection & ~B_USER_PROTECTION) != 0)
6651 		return B_BAD_VALUE;
6652 
6653 	fix_protection(&protection);
6654 
6655 	// We need to write lock the address space, since we're going to play with
6656 	// the areas. Also make sure that none of the areas is wired and that we're
6657 	// actually allowed to change the protection.
6658 	AddressSpaceWriteLocker locker;
6659 
6660 	bool restart;
6661 	do {
6662 		restart = false;
6663 
6664 		status_t status = locker.SetTo(team_get_current_team_id());
6665 		if (status != B_OK)
6666 			return status;
6667 
6668 		// First round: Check whether the whole range is covered by areas and we
6669 		// are allowed to modify them.
6670 		addr_t currentAddress = address;
6671 		size_t sizeLeft = size;
6672 		while (sizeLeft > 0) {
6673 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6674 			if (area == NULL)
6675 				return B_NO_MEMORY;
6676 
6677 			if ((area->protection & B_KERNEL_AREA) != 0)
6678 				return B_NOT_ALLOWED;
6679 			if (area->protection_max != 0
6680 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6681 				return B_NOT_ALLOWED;
6682 			}
6683 
6684 			addr_t offset = currentAddress - area->Base();
6685 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6686 
6687 			AreaCacheLocker cacheLocker(area);
6688 
6689 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6690 					&locker, &cacheLocker)) {
6691 				restart = true;
6692 				break;
6693 			}
6694 
6695 			cacheLocker.Unlock();
6696 
6697 			currentAddress += rangeSize;
6698 			sizeLeft -= rangeSize;
6699 		}
6700 	} while (restart);
6701 
6702 	// Second round: If the protections differ from that of the area, create a
6703 	// page protection array and re-map mapped pages.
6704 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6705 	addr_t currentAddress = address;
6706 	size_t sizeLeft = size;
6707 	while (sizeLeft > 0) {
6708 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6709 		if (area == NULL)
6710 			return B_NO_MEMORY;
6711 
6712 		addr_t offset = currentAddress - area->Base();
6713 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6714 
6715 		currentAddress += rangeSize;
6716 		sizeLeft -= rangeSize;
6717 
6718 		if (area->page_protections == NULL) {
6719 			if (area->protection == protection)
6720 				continue;
6721 			if (offset == 0 && rangeSize == area->Size()) {
6722 				status_t status = vm_set_area_protection(area->address_space->ID(),
6723 					area->id, protection, false);
6724 				if (status != B_OK)
6725 					return status;
6726 				continue;
6727 			}
6728 
6729 			status_t status = allocate_area_page_protections(area);
6730 			if (status != B_OK)
6731 				return status;
6732 		}
6733 
6734 		// We need to lock the complete cache chain, since we potentially unmap
6735 		// pages of lower caches.
6736 		VMCache* topCache = vm_area_get_locked_cache(area);
6737 		VMCacheChainLocker cacheChainLocker(topCache);
6738 		cacheChainLocker.LockAllSourceCaches();
6739 
6740 		for (addr_t pageAddress = area->Base() + offset;
6741 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6742 			map->Lock();
6743 
6744 			set_area_page_protection(area, pageAddress, protection);
6745 
6746 			phys_addr_t physicalAddress;
6747 			uint32 flags;
6748 
6749 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6750 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6751 				map->Unlock();
6752 				continue;
6753 			}
6754 
6755 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6756 			if (page == NULL) {
6757 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6758 					"\n", area, physicalAddress);
6759 				map->Unlock();
6760 				return B_ERROR;
6761 			}
6762 
6763 			// If the page is not in the topmost cache and write access is
6764 			// requested, we have to unmap it. Otherwise we can re-map it with
6765 			// the new protection.
6766 			bool unmapPage = page->Cache() != topCache
6767 				&& (protection & B_WRITE_AREA) != 0;
6768 
6769 			if (!unmapPage)
6770 				map->ProtectPage(area, pageAddress, protection);
6771 
6772 			map->Unlock();
6773 
6774 			if (unmapPage) {
6775 				DEBUG_PAGE_ACCESS_START(page);
6776 				unmap_page(area, pageAddress);
6777 				DEBUG_PAGE_ACCESS_END(page);
6778 			}
6779 		}
6780 	}
6781 
6782 	return B_OK;
6783 }
6784 
6785 
6786 status_t
6787 _user_sync_memory(void* _address, size_t size, uint32 flags)
6788 {
6789 	addr_t address = (addr_t)_address;
6790 	size = PAGE_ALIGN(size);
6791 
6792 	// check params
6793 	if ((address % B_PAGE_SIZE) != 0)
6794 		return B_BAD_VALUE;
6795 	if (!is_user_address_range(_address, size)) {
6796 		// weird error code required by POSIX
6797 		return ENOMEM;
6798 	}
6799 
6800 	bool writeSync = (flags & MS_SYNC) != 0;
6801 	bool writeAsync = (flags & MS_ASYNC) != 0;
6802 	if (writeSync && writeAsync)
6803 		return B_BAD_VALUE;
6804 
6805 	if (size == 0 || (!writeSync && !writeAsync))
6806 		return B_OK;
6807 
6808 	// iterate through the range and sync all concerned areas
6809 	while (size > 0) {
6810 		// read lock the address space
6811 		AddressSpaceReadLocker locker;
6812 		status_t error = locker.SetTo(team_get_current_team_id());
6813 		if (error != B_OK)
6814 			return error;
6815 
6816 		// get the first area
6817 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6818 		if (area == NULL)
6819 			return B_NO_MEMORY;
6820 
6821 		uint32 offset = address - area->Base();
6822 		size_t rangeSize = min_c(area->Size() - offset, size);
6823 		offset += area->cache_offset;
6824 
6825 		// lock the cache
6826 		AreaCacheLocker cacheLocker(area);
6827 		if (!cacheLocker)
6828 			return B_BAD_VALUE;
6829 		VMCache* cache = area->cache;
6830 
6831 		locker.Unlock();
6832 
6833 		uint32 firstPage = offset >> PAGE_SHIFT;
6834 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6835 
6836 		// write the pages
6837 		if (cache->type == CACHE_TYPE_VNODE) {
6838 			if (writeSync) {
6839 				// synchronous
6840 				error = vm_page_write_modified_page_range(cache, firstPage,
6841 					endPage);
6842 				if (error != B_OK)
6843 					return error;
6844 			} else {
6845 				// asynchronous
6846 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6847 				// TODO: This is probably not quite what is supposed to happen.
6848 				// Especially when a lot has to be written, it might take ages
6849 				// until it really hits the disk.
6850 			}
6851 		}
6852 
6853 		address += rangeSize;
6854 		size -= rangeSize;
6855 	}
6856 
6857 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6858 	// synchronize multiple mappings of the same file. In our VM they never get
6859 	// out of sync, though, so we don't have to do anything.
6860 
6861 	return B_OK;
6862 }
6863 
6864 
6865 status_t
6866 _user_memory_advice(void* _address, size_t size, uint32 advice)
6867 {
6868 	addr_t address = (addr_t)_address;
6869 	if ((address % B_PAGE_SIZE) != 0)
6870 		return B_BAD_VALUE;
6871 
6872 	size = PAGE_ALIGN(size);
6873 	if (!is_user_address_range(_address, size)) {
6874 		// weird error code required by POSIX
6875 		return B_NO_MEMORY;
6876 	}
6877 
6878 	switch (advice) {
6879 		case MADV_NORMAL:
6880 		case MADV_SEQUENTIAL:
6881 		case MADV_RANDOM:
6882 		case MADV_WILLNEED:
6883 		case MADV_DONTNEED:
6884 			// TODO: Implement!
6885 			break;
6886 
6887 		case MADV_FREE:
6888 		{
6889 			AddressSpaceWriteLocker locker;
6890 			do {
6891 				status_t status = locker.SetTo(team_get_current_team_id());
6892 				if (status != B_OK)
6893 					return status;
6894 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6895 					address, size, &locker));
6896 
6897 			discard_address_range(locker.AddressSpace(), address, size, false);
6898 			break;
6899 		}
6900 
6901 		default:
6902 			return B_BAD_VALUE;
6903 	}
6904 
6905 	return B_OK;
6906 }
6907 
6908 
6909 status_t
6910 _user_get_memory_properties(team_id teamID, const void* address,
6911 	uint32* _protected, uint32* _lock)
6912 {
6913 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6914 		return B_BAD_ADDRESS;
6915 
6916 	AddressSpaceReadLocker locker;
6917 	status_t error = locker.SetTo(teamID);
6918 	if (error != B_OK)
6919 		return error;
6920 
6921 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6922 	if (area == NULL)
6923 		return B_NO_MEMORY;
6924 
6925 	uint32 protection = get_area_page_protection(area, (addr_t)address);
6926 	uint32 wiring = area->wiring;
6927 
6928 	locker.Unlock();
6929 
6930 	error = user_memcpy(_protected, &protection, sizeof(protection));
6931 	if (error != B_OK)
6932 		return error;
6933 
6934 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6935 
6936 	return error;
6937 }
6938 
6939 
6940 static status_t
6941 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
6942 {
6943 #if ENABLE_SWAP_SUPPORT
6944 	// check address range
6945 	addr_t address = (addr_t)_address;
6946 	size = PAGE_ALIGN(size);
6947 
6948 	if ((address % B_PAGE_SIZE) != 0)
6949 		return EINVAL;
6950 	if (!is_user_address_range(_address, size))
6951 		return EINVAL;
6952 
6953 	const addr_t endAddress = address + size;
6954 
6955 	AddressSpaceReadLocker addressSpaceLocker;
6956 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
6957 	if (error != B_OK)
6958 		return error;
6959 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
6960 
6961 	// iterate through all concerned areas
6962 	addr_t nextAddress = address;
6963 	while (nextAddress != endAddress) {
6964 		// get the next area
6965 		VMArea* area = addressSpace->LookupArea(nextAddress);
6966 		if (area == NULL) {
6967 			error = B_BAD_ADDRESS;
6968 			break;
6969 		}
6970 
6971 		const addr_t areaStart = nextAddress;
6972 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
6973 		nextAddress = areaEnd;
6974 
6975 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6976 		if (error != B_OK) {
6977 			// We don't need to unset or reset things on failure.
6978 			break;
6979 		}
6980 
6981 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6982 		VMAnonymousCache* anonCache = NULL;
6983 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
6984 			// This memory will aready never be swapped. Nothing to do.
6985 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
6986 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
6987 				areaEnd - areaStart, swappable);
6988 		} else {
6989 			// Some other cache type? We cannot affect anything here.
6990 			error = EINVAL;
6991 		}
6992 
6993 		cacheChainLocker.Unlock();
6994 
6995 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6996 		if (error != B_OK)
6997 			break;
6998 	}
6999 
7000 	return error;
7001 #else
7002 	// No swap support? Nothing to do.
7003 	return B_OK;
7004 #endif
7005 }
7006 
7007 
7008 status_t
7009 _user_mlock(const void* _address, size_t size)
7010 {
7011 	return user_set_memory_swappable(_address, size, false);
7012 }
7013 
7014 
7015 status_t
7016 _user_munlock(const void* _address, size_t size)
7017 {
7018 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7019 	// if multiple clones of an area had mlock() called on them,
7020 	// munlock() must also be called on all of them to actually unlock.
7021 	// (At present, the first munlock() will unlock all.)
7022 	// TODO: fork() should automatically unlock memory in the child.
7023 	return user_set_memory_swappable(_address, size, true);
7024 }
7025 
7026 
7027 // #pragma mark -- compatibility
7028 
7029 
7030 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7031 
7032 
7033 struct physical_entry_beos {
7034 	uint32	address;
7035 	uint32	size;
7036 };
7037 
7038 
7039 /*!	The physical_entry structure has changed. We need to translate it to the
7040 	old one.
7041 */
7042 extern "C" int32
7043 __get_memory_map_beos(const void* _address, size_t numBytes,
7044 	physical_entry_beos* table, int32 numEntries)
7045 {
7046 	if (numEntries <= 0)
7047 		return B_BAD_VALUE;
7048 
7049 	const uint8* address = (const uint8*)_address;
7050 
7051 	int32 count = 0;
7052 	while (numBytes > 0 && count < numEntries) {
7053 		physical_entry entry;
7054 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7055 		if (result < 0) {
7056 			if (result != B_BUFFER_OVERFLOW)
7057 				return result;
7058 		}
7059 
7060 		if (entry.address >= (phys_addr_t)1 << 32) {
7061 			panic("get_memory_map(): Address is greater 4 GB!");
7062 			return B_ERROR;
7063 		}
7064 
7065 		table[count].address = entry.address;
7066 		table[count++].size = entry.size;
7067 
7068 		address += entry.size;
7069 		numBytes -= entry.size;
7070 	}
7071 
7072 	// null-terminate the table, if possible
7073 	if (count < numEntries) {
7074 		table[count].address = 0;
7075 		table[count].size = 0;
7076 	}
7077 
7078 	return B_OK;
7079 }
7080 
7081 
7082 /*!	The type of the \a physicalAddress parameter has changed from void* to
7083 	phys_addr_t.
7084 */
7085 extern "C" area_id
7086 __map_physical_memory_beos(const char* name, void* physicalAddress,
7087 	size_t numBytes, uint32 addressSpec, uint32 protection,
7088 	void** _virtualAddress)
7089 {
7090 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7091 		addressSpec, protection, _virtualAddress);
7092 }
7093 
7094 
7095 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7096 	we meddle with the \a lock parameter to force 32 bit.
7097 */
7098 extern "C" area_id
7099 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7100 	size_t size, uint32 lock, uint32 protection)
7101 {
7102 	switch (lock) {
7103 		case B_NO_LOCK:
7104 			break;
7105 		case B_FULL_LOCK:
7106 		case B_LAZY_LOCK:
7107 			lock = B_32_BIT_FULL_LOCK;
7108 			break;
7109 		case B_CONTIGUOUS:
7110 			lock = B_32_BIT_CONTIGUOUS;
7111 			break;
7112 	}
7113 
7114 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7115 		protection);
7116 }
7117 
7118 
7119 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7120 	"BASE");
7121 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7122 	"map_physical_memory@", "BASE");
7123 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7124 	"BASE");
7125 
7126 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7127 	"get_memory_map@@", "1_ALPHA3");
7128 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7129 	"map_physical_memory@@", "1_ALPHA3");
7130 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7131 	"1_ALPHA3");
7132 
7133 
7134 #else
7135 
7136 
7137 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7138 	"get_memory_map@@", "BASE");
7139 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7140 	"map_physical_memory@@", "BASE");
7141 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7142 	"BASE");
7143 
7144 
7145 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7146