xref: /haiku/src/system/kernel/vm/vm.cpp (revision 17889a8c70dbb3d59c1412f6431968753c767bab)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/ThreadAutoLock.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_priv.h>
53 #include <vm/VMAddressSpace.h>
54 #include <vm/VMArea.h>
55 #include <vm/VMCache.h>
56 
57 #include "VMAddressSpaceLocking.h"
58 #include "VMAnonymousCache.h"
59 #include "VMAnonymousNoSwapCache.h"
60 #include "IORequest.h"
61 
62 
63 //#define TRACE_VM
64 //#define TRACE_FAULTS
65 #ifdef TRACE_VM
66 #	define TRACE(x) dprintf x
67 #else
68 #	define TRACE(x) ;
69 #endif
70 #ifdef TRACE_FAULTS
71 #	define FTRACE(x) dprintf x
72 #else
73 #	define FTRACE(x) ;
74 #endif
75 
76 
77 namespace {
78 
79 class AreaCacheLocking {
80 public:
81 	inline bool Lock(VMCache* lockable)
82 	{
83 		return false;
84 	}
85 
86 	inline void Unlock(VMCache* lockable)
87 	{
88 		vm_area_put_locked_cache(lockable);
89 	}
90 };
91 
92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
93 public:
94 	inline AreaCacheLocker(VMCache* cache = NULL)
95 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
96 	{
97 	}
98 
99 	inline AreaCacheLocker(VMArea* area)
100 		: AutoLocker<VMCache, AreaCacheLocking>()
101 	{
102 		SetTo(area);
103 	}
104 
105 	inline void SetTo(VMCache* cache, bool alreadyLocked)
106 	{
107 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
108 	}
109 
110 	inline void SetTo(VMArea* area)
111 	{
112 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
113 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
114 	}
115 };
116 
117 
118 class VMCacheChainLocker {
119 public:
120 	VMCacheChainLocker()
121 		:
122 		fTopCache(NULL),
123 		fBottomCache(NULL)
124 	{
125 	}
126 
127 	VMCacheChainLocker(VMCache* topCache)
128 		:
129 		fTopCache(topCache),
130 		fBottomCache(topCache)
131 	{
132 	}
133 
134 	~VMCacheChainLocker()
135 	{
136 		Unlock();
137 	}
138 
139 	void SetTo(VMCache* topCache)
140 	{
141 		fTopCache = topCache;
142 		fBottomCache = topCache;
143 
144 		if (topCache != NULL)
145 			topCache->SetUserData(NULL);
146 	}
147 
148 	VMCache* LockSourceCache()
149 	{
150 		if (fBottomCache == NULL || fBottomCache->source == NULL)
151 			return NULL;
152 
153 		VMCache* previousCache = fBottomCache;
154 
155 		fBottomCache = fBottomCache->source;
156 		fBottomCache->Lock();
157 		fBottomCache->AcquireRefLocked();
158 		fBottomCache->SetUserData(previousCache);
159 
160 		return fBottomCache;
161 	}
162 
163 	void LockAllSourceCaches()
164 	{
165 		while (LockSourceCache() != NULL) {
166 		}
167 	}
168 
169 	void Unlock(VMCache* exceptCache = NULL)
170 	{
171 		if (fTopCache == NULL)
172 			return;
173 
174 		// Unlock caches in source -> consumer direction. This is important to
175 		// avoid double-locking and a reversal of locking order in case a cache
176 		// is eligable for merging.
177 		VMCache* cache = fBottomCache;
178 		while (cache != NULL) {
179 			VMCache* nextCache = (VMCache*)cache->UserData();
180 			if (cache != exceptCache)
181 				cache->ReleaseRefAndUnlock(cache != fTopCache);
182 
183 			if (cache == fTopCache)
184 				break;
185 
186 			cache = nextCache;
187 		}
188 
189 		fTopCache = NULL;
190 		fBottomCache = NULL;
191 	}
192 
193 	void UnlockKeepRefs(bool keepTopCacheLocked)
194 	{
195 		if (fTopCache == NULL)
196 			return;
197 
198 		VMCache* nextCache = fBottomCache;
199 		VMCache* cache = NULL;
200 
201 		while (keepTopCacheLocked
202 				? nextCache != fTopCache : cache != fTopCache) {
203 			cache = nextCache;
204 			nextCache = (VMCache*)cache->UserData();
205 			cache->Unlock(cache != fTopCache);
206 		}
207 	}
208 
209 	void RelockCaches(bool topCacheLocked)
210 	{
211 		if (fTopCache == NULL)
212 			return;
213 
214 		VMCache* nextCache = fTopCache;
215 		VMCache* cache = NULL;
216 		if (topCacheLocked) {
217 			cache = nextCache;
218 			nextCache = cache->source;
219 		}
220 
221 		while (cache != fBottomCache && nextCache != NULL) {
222 			VMCache* consumer = cache;
223 			cache = nextCache;
224 			nextCache = cache->source;
225 			cache->Lock();
226 			cache->SetUserData(consumer);
227 		}
228 	}
229 
230 private:
231 	VMCache*	fTopCache;
232 	VMCache*	fBottomCache;
233 };
234 
235 } // namespace
236 
237 
238 // The memory reserve an allocation of the certain priority must not touch.
239 static const size_t kMemoryReserveForPriority[] = {
240 	VM_MEMORY_RESERVE_USER,		// user
241 	VM_MEMORY_RESERVE_SYSTEM,	// system
242 	0							// VIP
243 };
244 
245 
246 ObjectCache* gPageMappingsObjectCache;
247 
248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
249 
250 static off_t sAvailableMemory;
251 static off_t sNeededMemory;
252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
253 static uint32 sPageFaults;
254 
255 static VMPhysicalPageMapper* sPhysicalPageMapper;
256 
257 #if DEBUG_CACHE_LIST
258 
259 struct cache_info {
260 	VMCache*	cache;
261 	addr_t		page_count;
262 	addr_t		committed;
263 };
264 
265 static const int kCacheInfoTableCount = 100 * 1024;
266 static cache_info* sCacheInfoTable;
267 
268 #endif	// DEBUG_CACHE_LIST
269 
270 
271 // function declarations
272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
273 	bool addressSpaceCleanup);
274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
275 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
276 static status_t map_backing_store(VMAddressSpace* addressSpace,
277 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
278 	int protection, int protectionMax, int mapping, uint32 flags,
279 	const virtual_address_restrictions* addressRestrictions, bool kernel,
280 	VMArea** _area, void** _virtualAddress);
281 static void fix_protection(uint32* protection);
282 
283 
284 //	#pragma mark -
285 
286 
287 #if VM_PAGE_FAULT_TRACING
288 
289 namespace VMPageFaultTracing {
290 
291 class PageFaultStart : public AbstractTraceEntry {
292 public:
293 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
294 		:
295 		fAddress(address),
296 		fPC(pc),
297 		fWrite(write),
298 		fUser(user)
299 	{
300 		Initialized();
301 	}
302 
303 	virtual void AddDump(TraceOutput& out)
304 	{
305 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
306 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
307 	}
308 
309 private:
310 	addr_t	fAddress;
311 	addr_t	fPC;
312 	bool	fWrite;
313 	bool	fUser;
314 };
315 
316 
317 // page fault errors
318 enum {
319 	PAGE_FAULT_ERROR_NO_AREA		= 0,
320 	PAGE_FAULT_ERROR_KERNEL_ONLY,
321 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
322 	PAGE_FAULT_ERROR_READ_PROTECTED,
323 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
324 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
325 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
326 };
327 
328 
329 class PageFaultError : public AbstractTraceEntry {
330 public:
331 	PageFaultError(area_id area, status_t error)
332 		:
333 		fArea(area),
334 		fError(error)
335 	{
336 		Initialized();
337 	}
338 
339 	virtual void AddDump(TraceOutput& out)
340 	{
341 		switch (fError) {
342 			case PAGE_FAULT_ERROR_NO_AREA:
343 				out.Print("page fault error: no area");
344 				break;
345 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
346 				out.Print("page fault error: area: %ld, kernel only", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
349 				out.Print("page fault error: area: %ld, write protected",
350 					fArea);
351 				break;
352 			case PAGE_FAULT_ERROR_READ_PROTECTED:
353 				out.Print("page fault error: area: %ld, read protected", fArea);
354 				break;
355 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
356 				out.Print("page fault error: area: %ld, execute protected",
357 					fArea);
358 				break;
359 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
360 				out.Print("page fault error: kernel touching bad user memory");
361 				break;
362 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
363 				out.Print("page fault error: no address space");
364 				break;
365 			default:
366 				out.Print("page fault error: area: %ld, error: %s", fArea,
367 					strerror(fError));
368 				break;
369 		}
370 	}
371 
372 private:
373 	area_id		fArea;
374 	status_t	fError;
375 };
376 
377 
378 class PageFaultDone : public AbstractTraceEntry {
379 public:
380 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
381 			vm_page* page)
382 		:
383 		fArea(area),
384 		fTopCache(topCache),
385 		fCache(cache),
386 		fPage(page)
387 	{
388 		Initialized();
389 	}
390 
391 	virtual void AddDump(TraceOutput& out)
392 	{
393 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
394 			"page: %p", fArea, fTopCache, fCache, fPage);
395 	}
396 
397 private:
398 	area_id		fArea;
399 	VMCache*	fTopCache;
400 	VMCache*	fCache;
401 	vm_page*	fPage;
402 };
403 
404 }	// namespace VMPageFaultTracing
405 
406 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
407 #else
408 #	define TPF(x) ;
409 #endif	// VM_PAGE_FAULT_TRACING
410 
411 
412 //	#pragma mark -
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 increment_page_wired_count(vm_page* page)
419 {
420 	if (!page->IsMapped())
421 		atomic_add(&gMappedPagesCount, 1);
422 	page->IncrementWiredCount();
423 }
424 
425 
426 /*!	The page's cache must be locked.
427 */
428 static inline void
429 decrement_page_wired_count(vm_page* page)
430 {
431 	page->DecrementWiredCount();
432 	if (!page->IsMapped())
433 		atomic_add(&gMappedPagesCount, -1);
434 }
435 
436 
437 static inline addr_t
438 virtual_page_address(VMArea* area, vm_page* page)
439 {
440 	return area->Base()
441 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
442 }
443 
444 
445 //! You need to have the address space locked when calling this function
446 static VMArea*
447 lookup_area(VMAddressSpace* addressSpace, area_id id)
448 {
449 	VMAreaHash::ReadLock();
450 
451 	VMArea* area = VMAreaHash::LookupLocked(id);
452 	if (area != NULL && area->address_space != addressSpace)
453 		area = NULL;
454 
455 	VMAreaHash::ReadUnlock();
456 
457 	return area;
458 }
459 
460 
461 static status_t
462 allocate_area_page_protections(VMArea* area)
463 {
464 	// In the page protections we store only the three user protections,
465 	// so we use 4 bits per page.
466 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
467 	area->page_protections = (uint8*)malloc_etc(bytes,
468 		area->address_space == VMAddressSpace::Kernel()
469 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
470 	if (area->page_protections == NULL)
471 		return B_NO_MEMORY;
472 
473 	// init the page protections for all pages to that of the area
474 	uint32 areaProtection = area->protection
475 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
476 	memset(area->page_protections, areaProtection | (areaProtection << 4),
477 		bytes);
478 	return B_OK;
479 }
480 
481 
482 static inline void
483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
484 {
485 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
486 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
487 	uint8& entry = area->page_protections[pageIndex / 2];
488 	if (pageIndex % 2 == 0)
489 		entry = (entry & 0xf0) | protection;
490 	else
491 		entry = (entry & 0x0f) | (protection << 4);
492 }
493 
494 
495 static inline uint32
496 get_area_page_protection(VMArea* area, addr_t pageAddress)
497 {
498 	if (area->page_protections == NULL)
499 		return area->protection;
500 
501 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
502 	uint32 protection = area->page_protections[pageIndex / 2];
503 	if (pageIndex % 2 == 0)
504 		protection &= 0x0f;
505 	else
506 		protection >>= 4;
507 
508 	uint32 kernelProtection = 0;
509 	if ((protection & B_READ_AREA) != 0)
510 		kernelProtection |= B_KERNEL_READ_AREA;
511 	if ((protection & B_WRITE_AREA) != 0)
512 		kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 	// If this is a kernel area we return only the kernel flags.
515 	if (area->address_space == VMAddressSpace::Kernel())
516 		return kernelProtection;
517 
518 	return protection | kernelProtection;
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
811 			&addressRestrictions, kernel, &secondArea, NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection,
925 	int protectionMax, int mapping,
926 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
927 	bool kernel, VMArea** _area, void** _virtualAddress)
928 {
929 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
930 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
931 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
932 		addressSpace, cache, addressRestrictions->address, offset, size,
933 		addressRestrictions->address_specification, wiring, protection,
934 		protectionMax, _area, areaName));
935 	cache->AssertLocked();
936 
937 	if (size == 0) {
938 #if KDEBUG
939 		panic("map_backing_store(): called with size=0 for area '%s'!",
940 			areaName);
941 #endif
942 		return B_BAD_VALUE;
943 	}
944 
945 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
946 		| HEAP_DONT_LOCK_KERNEL_SPACE;
947 	int priority;
948 	if (addressSpace != VMAddressSpace::Kernel()) {
949 		priority = VM_PRIORITY_USER;
950 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
951 		priority = VM_PRIORITY_VIP;
952 		allocationFlags |= HEAP_PRIORITY_VIP;
953 	} else
954 		priority = VM_PRIORITY_SYSTEM;
955 
956 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
957 		allocationFlags);
958 	if (mapping != REGION_PRIVATE_MAP)
959 		area->protection_max = protectionMax & B_USER_PROTECTION;
960 	if (area == NULL)
961 		return B_NO_MEMORY;
962 
963 	status_t status;
964 
965 	// if this is a private map, we need to create a new cache
966 	// to handle the private copies of pages as they are written to
967 	VMCache* sourceCache = cache;
968 	if (mapping == REGION_PRIVATE_MAP) {
969 		VMCache* newCache;
970 
971 		// create an anonymous cache
972 		status = VMCacheFactory::CreateAnonymousCache(newCache,
973 			(protection & B_STACK_AREA) != 0
974 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
975 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
976 		if (status != B_OK)
977 			goto err1;
978 
979 		newCache->Lock();
980 		newCache->temporary = 1;
981 		newCache->virtual_base = offset;
982 		newCache->virtual_end = offset + size;
983 
984 		cache->AddConsumer(newCache);
985 
986 		cache = newCache;
987 	}
988 
989 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
990 		status = cache->SetMinimalCommitment(size, priority);
991 		if (status != B_OK)
992 			goto err2;
993 	}
994 
995 	// check to see if this address space has entered DELETE state
996 	if (addressSpace->IsBeingDeleted()) {
997 		// okay, someone is trying to delete this address space now, so we can't
998 		// insert the area, so back out
999 		status = B_BAD_TEAM_ID;
1000 		goto err2;
1001 	}
1002 
1003 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1004 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1005 		status = unmap_address_range(addressSpace,
1006 			(addr_t)addressRestrictions->address, size, kernel);
1007 		if (status != B_OK)
1008 			goto err2;
1009 	}
1010 
1011 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1012 		allocationFlags, _virtualAddress);
1013 	if (status == B_NO_MEMORY
1014 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1015 		// Due to how many locks are held, we cannot wait here for space to be
1016 		// freed up, but we can at least notify the low_resource handler.
1017 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1018 	}
1019 	if (status != B_OK)
1020 		goto err2;
1021 
1022 	// attach the cache to the area
1023 	area->cache = cache;
1024 	area->cache_offset = offset;
1025 
1026 	// point the cache back to the area
1027 	cache->InsertAreaLocked(area);
1028 	if (mapping == REGION_PRIVATE_MAP)
1029 		cache->Unlock();
1030 
1031 	// insert the area in the global area hash table
1032 	VMAreaHash::Insert(area);
1033 
1034 	// grab a ref to the address space (the area holds this)
1035 	addressSpace->Get();
1036 
1037 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1038 //		cache, sourceCache, areaName, area);
1039 
1040 	*_area = area;
1041 	return B_OK;
1042 
1043 err2:
1044 	if (mapping == REGION_PRIVATE_MAP) {
1045 		// We created this cache, so we must delete it again. Note, that we
1046 		// need to temporarily unlock the source cache or we'll otherwise
1047 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1048 		sourceCache->Unlock();
1049 		cache->ReleaseRefAndUnlock();
1050 		sourceCache->Lock();
1051 	}
1052 err1:
1053 	addressSpace->DeleteArea(area, allocationFlags);
1054 	return status;
1055 }
1056 
1057 
1058 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1059 	  locker1, locker2).
1060 */
1061 template<typename LockerType1, typename LockerType2>
1062 static inline bool
1063 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1064 {
1065 	area->cache->AssertLocked();
1066 
1067 	VMAreaUnwiredWaiter waiter;
1068 	if (!area->AddWaiterIfWired(&waiter))
1069 		return false;
1070 
1071 	// unlock everything and wait
1072 	if (locker1 != NULL)
1073 		locker1->Unlock();
1074 	if (locker2 != NULL)
1075 		locker2->Unlock();
1076 
1077 	waiter.waitEntry.Wait();
1078 
1079 	return true;
1080 }
1081 
1082 
1083 /*!	Checks whether the given area has any wired ranges intersecting with the
1084 	specified range and waits, if so.
1085 
1086 	When it has to wait, the function calls \c Unlock() on both \a locker1
1087 	and \a locker2, if given.
1088 	The area's top cache must be locked and must be unlocked as a side effect
1089 	of calling \c Unlock() on either \a locker1 or \a locker2.
1090 
1091 	If the function does not have to wait it does not modify or unlock any
1092 	object.
1093 
1094 	\param area The area to be checked.
1095 	\param base The base address of the range to check.
1096 	\param size The size of the address range to check.
1097 	\param locker1 An object to be unlocked when before starting to wait (may
1098 		be \c NULL).
1099 	\param locker2 An object to be unlocked when before starting to wait (may
1100 		be \c NULL).
1101 	\return \c true, if the function had to wait, \c false otherwise.
1102 */
1103 template<typename LockerType1, typename LockerType2>
1104 static inline bool
1105 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1106 	LockerType1* locker1, LockerType2* locker2)
1107 {
1108 	area->cache->AssertLocked();
1109 
1110 	VMAreaUnwiredWaiter waiter;
1111 	if (!area->AddWaiterIfWired(&waiter, base, size))
1112 		return false;
1113 
1114 	// unlock everything and wait
1115 	if (locker1 != NULL)
1116 		locker1->Unlock();
1117 	if (locker2 != NULL)
1118 		locker2->Unlock();
1119 
1120 	waiter.waitEntry.Wait();
1121 
1122 	return true;
1123 }
1124 
1125 
1126 /*!	Checks whether the given address space has any wired ranges intersecting
1127 	with the specified range and waits, if so.
1128 
1129 	Similar to wait_if_area_range_is_wired(), with the following differences:
1130 	- All areas intersecting with the range are checked (respectively all until
1131 	  one is found that contains a wired range intersecting with the given
1132 	  range).
1133 	- The given address space must at least be read-locked and must be unlocked
1134 	  when \c Unlock() is called on \a locker.
1135 	- None of the areas' caches are allowed to be locked.
1136 */
1137 template<typename LockerType>
1138 static inline bool
1139 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1140 	size_t size, LockerType* locker)
1141 {
1142 	for (VMAddressSpace::AreaRangeIterator it
1143 		= addressSpace->GetAreaRangeIterator(base, size);
1144 			VMArea* area = it.Next();) {
1145 
1146 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1147 
1148 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1149 			return true;
1150 	}
1151 
1152 	return false;
1153 }
1154 
1155 
1156 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1157 	It must be called in a situation where the kernel address space may be
1158 	locked.
1159 */
1160 status_t
1161 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1162 {
1163 	AddressSpaceReadLocker locker;
1164 	VMArea* area;
1165 	status_t status = locker.SetFromArea(id, area);
1166 	if (status != B_OK)
1167 		return status;
1168 
1169 	if (area->page_protections == NULL) {
1170 		status = allocate_area_page_protections(area);
1171 		if (status != B_OK)
1172 			return status;
1173 	}
1174 
1175 	*cookie = (void*)area;
1176 	return B_OK;
1177 }
1178 
1179 
1180 /*!	This is a debug helper function that can only be used with very specific
1181 	use cases.
1182 	Sets protection for the given address range to the protection specified.
1183 	If \a protection is 0 then the involved pages will be marked non-present
1184 	in the translation map to cause a fault on access. The pages aren't
1185 	actually unmapped however so that they can be marked present again with
1186 	additional calls to this function. For this to work the area must be
1187 	fully locked in memory so that the pages aren't otherwise touched.
1188 	This function does not lock the kernel address space and needs to be
1189 	supplied with a \a cookie retrieved from a successful call to
1190 	vm_prepare_kernel_area_debug_protection().
1191 */
1192 status_t
1193 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1194 	uint32 protection)
1195 {
1196 	// check address range
1197 	addr_t address = (addr_t)_address;
1198 	size = PAGE_ALIGN(size);
1199 
1200 	if ((address % B_PAGE_SIZE) != 0
1201 		|| (addr_t)address + size < (addr_t)address
1202 		|| !IS_KERNEL_ADDRESS(address)
1203 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1204 		return B_BAD_VALUE;
1205 	}
1206 
1207 	// Translate the kernel protection to user protection as we only store that.
1208 	if ((protection & B_KERNEL_READ_AREA) != 0)
1209 		protection |= B_READ_AREA;
1210 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1211 		protection |= B_WRITE_AREA;
1212 
1213 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1214 	VMTranslationMap* map = addressSpace->TranslationMap();
1215 	VMArea* area = (VMArea*)cookie;
1216 
1217 	addr_t offset = address - area->Base();
1218 	if (area->Size() - offset < size) {
1219 		panic("protect range not fully within supplied area");
1220 		return B_BAD_VALUE;
1221 	}
1222 
1223 	if (area->page_protections == NULL) {
1224 		panic("area has no page protections");
1225 		return B_BAD_VALUE;
1226 	}
1227 
1228 	// Invalidate the mapping entries so any access to them will fault or
1229 	// restore the mapping entries unchanged so that lookup will success again.
1230 	map->Lock();
1231 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1232 	map->Unlock();
1233 
1234 	// And set the proper page protections so that the fault case will actually
1235 	// fail and not simply try to map a new page.
1236 	for (addr_t pageAddress = address; pageAddress < address + size;
1237 			pageAddress += B_PAGE_SIZE) {
1238 		set_area_page_protection(area, pageAddress, protection);
1239 	}
1240 
1241 	return B_OK;
1242 }
1243 
1244 
1245 status_t
1246 vm_block_address_range(const char* name, void* address, addr_t size)
1247 {
1248 	if (!arch_vm_supports_protection(0))
1249 		return B_NOT_SUPPORTED;
1250 
1251 	AddressSpaceWriteLocker locker;
1252 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1253 	if (status != B_OK)
1254 		return status;
1255 
1256 	VMAddressSpace* addressSpace = locker.AddressSpace();
1257 
1258 	// create an anonymous cache
1259 	VMCache* cache;
1260 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1261 		VM_PRIORITY_SYSTEM);
1262 	if (status != B_OK)
1263 		return status;
1264 
1265 	cache->temporary = 1;
1266 	cache->virtual_end = size;
1267 	cache->Lock();
1268 
1269 	VMArea* area;
1270 	virtual_address_restrictions addressRestrictions = {};
1271 	addressRestrictions.address = address;
1272 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1273 	status = map_backing_store(addressSpace, cache, 0, name, size,
1274 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1275 		true, &area, NULL);
1276 	if (status != B_OK) {
1277 		cache->ReleaseRefAndUnlock();
1278 		return status;
1279 	}
1280 
1281 	cache->Unlock();
1282 	area->cache_type = CACHE_TYPE_RAM;
1283 	return area->id;
1284 }
1285 
1286 
1287 status_t
1288 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1289 {
1290 	AddressSpaceWriteLocker locker(team);
1291 	if (!locker.IsLocked())
1292 		return B_BAD_TEAM_ID;
1293 
1294 	VMAddressSpace* addressSpace = locker.AddressSpace();
1295 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1296 		addressSpace == VMAddressSpace::Kernel()
1297 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1298 }
1299 
1300 
1301 status_t
1302 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1303 	addr_t size, uint32 flags)
1304 {
1305 	if (size == 0)
1306 		return B_BAD_VALUE;
1307 
1308 	AddressSpaceWriteLocker locker(team);
1309 	if (!locker.IsLocked())
1310 		return B_BAD_TEAM_ID;
1311 
1312 	virtual_address_restrictions addressRestrictions = {};
1313 	addressRestrictions.address = *_address;
1314 	addressRestrictions.address_specification = addressSpec;
1315 	VMAddressSpace* addressSpace = locker.AddressSpace();
1316 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1317 		addressSpace == VMAddressSpace::Kernel()
1318 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1319 		_address);
1320 }
1321 
1322 
1323 area_id
1324 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1325 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1326 	const virtual_address_restrictions* virtualAddressRestrictions,
1327 	const physical_address_restrictions* physicalAddressRestrictions,
1328 	bool kernel, void** _address)
1329 {
1330 	VMArea* area;
1331 	VMCache* cache;
1332 	vm_page* page = NULL;
1333 	bool isStack = (protection & B_STACK_AREA) != 0;
1334 	page_num_t guardPages;
1335 	bool canOvercommit = false;
1336 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1337 		? VM_PAGE_ALLOC_CLEAR : 0;
1338 
1339 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1340 		team, name, size));
1341 
1342 	size = PAGE_ALIGN(size);
1343 	guardSize = PAGE_ALIGN(guardSize);
1344 	guardPages = guardSize / B_PAGE_SIZE;
1345 
1346 	if (size == 0 || size < guardSize)
1347 		return B_BAD_VALUE;
1348 	if (!arch_vm_supports_protection(protection))
1349 		return B_NOT_SUPPORTED;
1350 
1351 	if (team == B_CURRENT_TEAM)
1352 		team = VMAddressSpace::CurrentID();
1353 	if (team < 0)
1354 		return B_BAD_TEAM_ID;
1355 
1356 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1357 		canOvercommit = true;
1358 
1359 #ifdef DEBUG_KERNEL_STACKS
1360 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1361 		isStack = true;
1362 #endif
1363 
1364 	// check parameters
1365 	switch (virtualAddressRestrictions->address_specification) {
1366 		case B_ANY_ADDRESS:
1367 		case B_EXACT_ADDRESS:
1368 		case B_BASE_ADDRESS:
1369 		case B_ANY_KERNEL_ADDRESS:
1370 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1371 		case B_RANDOMIZED_ANY_ADDRESS:
1372 		case B_RANDOMIZED_BASE_ADDRESS:
1373 			break;
1374 
1375 		default:
1376 			return B_BAD_VALUE;
1377 	}
1378 
1379 	// If low or high physical address restrictions are given, we force
1380 	// B_CONTIGUOUS wiring, since only then we'll use
1381 	// vm_page_allocate_page_run() which deals with those restrictions.
1382 	if (physicalAddressRestrictions->low_address != 0
1383 		|| physicalAddressRestrictions->high_address != 0) {
1384 		wiring = B_CONTIGUOUS;
1385 	}
1386 
1387 	physical_address_restrictions stackPhysicalRestrictions;
1388 	bool doReserveMemory = false;
1389 	switch (wiring) {
1390 		case B_NO_LOCK:
1391 			break;
1392 		case B_FULL_LOCK:
1393 		case B_LAZY_LOCK:
1394 		case B_CONTIGUOUS:
1395 			doReserveMemory = true;
1396 			break;
1397 		case B_ALREADY_WIRED:
1398 			break;
1399 		case B_LOMEM:
1400 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1401 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1402 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1403 			wiring = B_CONTIGUOUS;
1404 			doReserveMemory = true;
1405 			break;
1406 		case B_32_BIT_FULL_LOCK:
1407 			if (B_HAIKU_PHYSICAL_BITS <= 32
1408 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1409 				wiring = B_FULL_LOCK;
1410 				doReserveMemory = true;
1411 				break;
1412 			}
1413 			// TODO: We don't really support this mode efficiently. Just fall
1414 			// through for now ...
1415 		case B_32_BIT_CONTIGUOUS:
1416 			#if B_HAIKU_PHYSICAL_BITS > 32
1417 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1418 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1419 					stackPhysicalRestrictions.high_address
1420 						= (phys_addr_t)1 << 32;
1421 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1422 				}
1423 			#endif
1424 			wiring = B_CONTIGUOUS;
1425 			doReserveMemory = true;
1426 			break;
1427 		default:
1428 			return B_BAD_VALUE;
1429 	}
1430 
1431 	// Optimization: For a single-page contiguous allocation without low/high
1432 	// memory restriction B_FULL_LOCK wiring suffices.
1433 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1434 		&& physicalAddressRestrictions->low_address == 0
1435 		&& physicalAddressRestrictions->high_address == 0) {
1436 		wiring = B_FULL_LOCK;
1437 	}
1438 
1439 	// For full lock or contiguous areas we're also going to map the pages and
1440 	// thus need to reserve pages for the mapping backend upfront.
1441 	addr_t reservedMapPages = 0;
1442 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1443 		AddressSpaceWriteLocker locker;
1444 		status_t status = locker.SetTo(team);
1445 		if (status != B_OK)
1446 			return status;
1447 
1448 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1449 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1450 	}
1451 
1452 	int priority;
1453 	if (team != VMAddressSpace::KernelID())
1454 		priority = VM_PRIORITY_USER;
1455 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1456 		priority = VM_PRIORITY_VIP;
1457 	else
1458 		priority = VM_PRIORITY_SYSTEM;
1459 
1460 	// Reserve memory before acquiring the address space lock. This reduces the
1461 	// chances of failure, since while holding the write lock to the address
1462 	// space (if it is the kernel address space that is), the low memory handler
1463 	// won't be able to free anything for us.
1464 	addr_t reservedMemory = 0;
1465 	if (doReserveMemory) {
1466 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1467 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1468 			return B_NO_MEMORY;
1469 		reservedMemory = size;
1470 		// TODO: We don't reserve the memory for the pages for the page
1471 		// directories/tables. We actually need to do since we currently don't
1472 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1473 		// there are actually less physical pages than there should be, which
1474 		// can get the VM into trouble in low memory situations.
1475 	}
1476 
1477 	AddressSpaceWriteLocker locker;
1478 	VMAddressSpace* addressSpace;
1479 	status_t status;
1480 
1481 	// For full lock areas reserve the pages before locking the address
1482 	// space. E.g. block caches can't release their memory while we hold the
1483 	// address space lock.
1484 	page_num_t reservedPages = reservedMapPages;
1485 	if (wiring == B_FULL_LOCK)
1486 		reservedPages += size / B_PAGE_SIZE;
1487 
1488 	vm_page_reservation reservation;
1489 	if (reservedPages > 0) {
1490 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1491 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1492 					priority)) {
1493 				reservedPages = 0;
1494 				status = B_WOULD_BLOCK;
1495 				goto err0;
1496 			}
1497 		} else
1498 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1499 	}
1500 
1501 	if (wiring == B_CONTIGUOUS) {
1502 		// we try to allocate the page run here upfront as this may easily
1503 		// fail for obvious reasons
1504 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1505 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1506 		if (page == NULL) {
1507 			status = B_NO_MEMORY;
1508 			goto err0;
1509 		}
1510 	}
1511 
1512 	// Lock the address space and, if B_EXACT_ADDRESS and
1513 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1514 	// is not wired.
1515 	do {
1516 		status = locker.SetTo(team);
1517 		if (status != B_OK)
1518 			goto err1;
1519 
1520 		addressSpace = locker.AddressSpace();
1521 	} while (virtualAddressRestrictions->address_specification
1522 			== B_EXACT_ADDRESS
1523 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1524 		&& wait_if_address_range_is_wired(addressSpace,
1525 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1526 
1527 	// create an anonymous cache
1528 	// if it's a stack, make sure that two pages are available at least
1529 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1530 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1531 		wiring == B_NO_LOCK, priority);
1532 	if (status != B_OK)
1533 		goto err1;
1534 
1535 	cache->temporary = 1;
1536 	cache->virtual_end = size;
1537 	cache->committed_size = reservedMemory;
1538 		// TODO: This should be done via a method.
1539 	reservedMemory = 0;
1540 
1541 	cache->Lock();
1542 
1543 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1544 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1545 		virtualAddressRestrictions, kernel, &area, _address);
1546 
1547 	if (status != B_OK) {
1548 		cache->ReleaseRefAndUnlock();
1549 		goto err1;
1550 	}
1551 
1552 	locker.DegradeToReadLock();
1553 
1554 	switch (wiring) {
1555 		case B_NO_LOCK:
1556 		case B_LAZY_LOCK:
1557 			// do nothing - the pages are mapped in as needed
1558 			break;
1559 
1560 		case B_FULL_LOCK:
1561 		{
1562 			// Allocate and map all pages for this area
1563 
1564 			off_t offset = 0;
1565 			for (addr_t address = area->Base();
1566 					address < area->Base() + (area->Size() - 1);
1567 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1568 #ifdef DEBUG_KERNEL_STACKS
1569 #	ifdef STACK_GROWS_DOWNWARDS
1570 				if (isStack && address < area->Base()
1571 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1572 #	else
1573 				if (isStack && address >= area->Base() + area->Size()
1574 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1575 #	endif
1576 					continue;
1577 #endif
1578 				vm_page* page = vm_page_allocate_page(&reservation,
1579 					PAGE_STATE_WIRED | pageAllocFlags);
1580 				cache->InsertPage(page, offset);
1581 				map_page(area, page, address, protection, &reservation);
1582 
1583 				DEBUG_PAGE_ACCESS_END(page);
1584 			}
1585 
1586 			break;
1587 		}
1588 
1589 		case B_ALREADY_WIRED:
1590 		{
1591 			// The pages should already be mapped. This is only really useful
1592 			// during boot time. Find the appropriate vm_page objects and stick
1593 			// them in the cache object.
1594 			VMTranslationMap* map = addressSpace->TranslationMap();
1595 			off_t offset = 0;
1596 
1597 			if (!gKernelStartup)
1598 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1599 
1600 			map->Lock();
1601 
1602 			for (addr_t virtualAddress = area->Base();
1603 					virtualAddress < area->Base() + (area->Size() - 1);
1604 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1605 				phys_addr_t physicalAddress;
1606 				uint32 flags;
1607 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1608 				if (status < B_OK) {
1609 					panic("looking up mapping failed for va 0x%lx\n",
1610 						virtualAddress);
1611 				}
1612 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1613 				if (page == NULL) {
1614 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1615 						"\n", physicalAddress);
1616 				}
1617 
1618 				DEBUG_PAGE_ACCESS_START(page);
1619 
1620 				cache->InsertPage(page, offset);
1621 				increment_page_wired_count(page);
1622 				vm_page_set_state(page, PAGE_STATE_WIRED);
1623 				page->busy = false;
1624 
1625 				DEBUG_PAGE_ACCESS_END(page);
1626 			}
1627 
1628 			map->Unlock();
1629 			break;
1630 		}
1631 
1632 		case B_CONTIGUOUS:
1633 		{
1634 			// We have already allocated our continuous pages run, so we can now
1635 			// just map them in the address space
1636 			VMTranslationMap* map = addressSpace->TranslationMap();
1637 			phys_addr_t physicalAddress
1638 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1639 			addr_t virtualAddress = area->Base();
1640 			off_t offset = 0;
1641 
1642 			map->Lock();
1643 
1644 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1645 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1646 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1647 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1648 				if (page == NULL)
1649 					panic("couldn't lookup physical page just allocated\n");
1650 
1651 				status = map->Map(virtualAddress, physicalAddress, protection,
1652 					area->MemoryType(), &reservation);
1653 				if (status < B_OK)
1654 					panic("couldn't map physical page in page run\n");
1655 
1656 				cache->InsertPage(page, offset);
1657 				increment_page_wired_count(page);
1658 
1659 				DEBUG_PAGE_ACCESS_END(page);
1660 			}
1661 
1662 			map->Unlock();
1663 			break;
1664 		}
1665 
1666 		default:
1667 			break;
1668 	}
1669 
1670 	cache->Unlock();
1671 
1672 	if (reservedPages > 0)
1673 		vm_page_unreserve_pages(&reservation);
1674 
1675 	TRACE(("vm_create_anonymous_area: done\n"));
1676 
1677 	area->cache_type = CACHE_TYPE_RAM;
1678 	return area->id;
1679 
1680 err1:
1681 	if (wiring == B_CONTIGUOUS) {
1682 		// we had reserved the area space upfront...
1683 		phys_addr_t pageNumber = page->physical_page_number;
1684 		int32 i;
1685 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1686 			page = vm_lookup_page(pageNumber);
1687 			if (page == NULL)
1688 				panic("couldn't lookup physical page just allocated\n");
1689 
1690 			vm_page_set_state(page, PAGE_STATE_FREE);
1691 		}
1692 	}
1693 
1694 err0:
1695 	if (reservedPages > 0)
1696 		vm_page_unreserve_pages(&reservation);
1697 	if (reservedMemory > 0)
1698 		vm_unreserve_memory(reservedMemory);
1699 
1700 	return status;
1701 }
1702 
1703 
1704 area_id
1705 vm_map_physical_memory(team_id team, const char* name, void** _address,
1706 	uint32 addressSpec, addr_t size, uint32 protection,
1707 	phys_addr_t physicalAddress, bool alreadyWired)
1708 {
1709 	VMArea* area;
1710 	VMCache* cache;
1711 	addr_t mapOffset;
1712 
1713 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1714 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1715 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1716 		addressSpec, size, protection, physicalAddress));
1717 
1718 	if (!arch_vm_supports_protection(protection))
1719 		return B_NOT_SUPPORTED;
1720 
1721 	AddressSpaceWriteLocker locker(team);
1722 	if (!locker.IsLocked())
1723 		return B_BAD_TEAM_ID;
1724 
1725 	// if the physical address is somewhat inside a page,
1726 	// move the actual area down to align on a page boundary
1727 	mapOffset = physicalAddress % B_PAGE_SIZE;
1728 	size += mapOffset;
1729 	physicalAddress -= mapOffset;
1730 
1731 	size = PAGE_ALIGN(size);
1732 
1733 	// create a device cache
1734 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1735 	if (status != B_OK)
1736 		return status;
1737 
1738 	cache->virtual_end = size;
1739 
1740 	cache->Lock();
1741 
1742 	virtual_address_restrictions addressRestrictions = {};
1743 	addressRestrictions.address = *_address;
1744 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1745 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1746 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1747 		true, &area, _address);
1748 
1749 	if (status < B_OK)
1750 		cache->ReleaseRefLocked();
1751 
1752 	cache->Unlock();
1753 
1754 	if (status == B_OK) {
1755 		// set requested memory type -- use uncached, if not given
1756 		uint32 memoryType = addressSpec & B_MTR_MASK;
1757 		if (memoryType == 0)
1758 			memoryType = B_MTR_UC;
1759 
1760 		area->SetMemoryType(memoryType);
1761 
1762 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1763 		if (status != B_OK)
1764 			delete_area(locker.AddressSpace(), area, false);
1765 	}
1766 
1767 	if (status != B_OK)
1768 		return status;
1769 
1770 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1771 
1772 	if (alreadyWired) {
1773 		// The area is already mapped, but possibly not with the right
1774 		// memory type.
1775 		map->Lock();
1776 		map->ProtectArea(area, area->protection);
1777 		map->Unlock();
1778 	} else {
1779 		// Map the area completely.
1780 
1781 		// reserve pages needed for the mapping
1782 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1783 			area->Base() + (size - 1));
1784 		vm_page_reservation reservation;
1785 		vm_page_reserve_pages(&reservation, reservePages,
1786 			team == VMAddressSpace::KernelID()
1787 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1788 
1789 		map->Lock();
1790 
1791 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1792 			map->Map(area->Base() + offset, physicalAddress + offset,
1793 				protection, area->MemoryType(), &reservation);
1794 		}
1795 
1796 		map->Unlock();
1797 
1798 		vm_page_unreserve_pages(&reservation);
1799 	}
1800 
1801 	// modify the pointer returned to be offset back into the new area
1802 	// the same way the physical address in was offset
1803 	*_address = (void*)((addr_t)*_address + mapOffset);
1804 
1805 	area->cache_type = CACHE_TYPE_DEVICE;
1806 	return area->id;
1807 }
1808 
1809 
1810 /*!	Don't use!
1811 	TODO: This function was introduced to map physical page vecs to
1812 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1813 	use a device cache and does not track vm_page::wired_count!
1814 */
1815 area_id
1816 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1817 	uint32 addressSpec, addr_t* _size, uint32 protection,
1818 	struct generic_io_vec* vecs, uint32 vecCount)
1819 {
1820 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1821 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1822 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1823 		addressSpec, _size, protection, vecs, vecCount));
1824 
1825 	if (!arch_vm_supports_protection(protection)
1826 		|| (addressSpec & B_MTR_MASK) != 0) {
1827 		return B_NOT_SUPPORTED;
1828 	}
1829 
1830 	AddressSpaceWriteLocker locker(team);
1831 	if (!locker.IsLocked())
1832 		return B_BAD_TEAM_ID;
1833 
1834 	if (vecCount == 0)
1835 		return B_BAD_VALUE;
1836 
1837 	addr_t size = 0;
1838 	for (uint32 i = 0; i < vecCount; i++) {
1839 		if (vecs[i].base % B_PAGE_SIZE != 0
1840 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1841 			return B_BAD_VALUE;
1842 		}
1843 
1844 		size += vecs[i].length;
1845 	}
1846 
1847 	// create a device cache
1848 	VMCache* cache;
1849 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1850 	if (result != B_OK)
1851 		return result;
1852 
1853 	cache->virtual_end = size;
1854 
1855 	cache->Lock();
1856 
1857 	VMArea* area;
1858 	virtual_address_restrictions addressRestrictions = {};
1859 	addressRestrictions.address = *_address;
1860 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1861 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1862 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1863 		&addressRestrictions, true, &area, _address);
1864 
1865 	if (result != B_OK)
1866 		cache->ReleaseRefLocked();
1867 
1868 	cache->Unlock();
1869 
1870 	if (result != B_OK)
1871 		return result;
1872 
1873 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1874 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1875 		area->Base() + (size - 1));
1876 
1877 	vm_page_reservation reservation;
1878 	vm_page_reserve_pages(&reservation, reservePages,
1879 			team == VMAddressSpace::KernelID()
1880 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1881 	map->Lock();
1882 
1883 	uint32 vecIndex = 0;
1884 	size_t vecOffset = 0;
1885 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1886 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1887 			vecOffset = 0;
1888 			vecIndex++;
1889 		}
1890 
1891 		if (vecIndex >= vecCount)
1892 			break;
1893 
1894 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1895 			protection, area->MemoryType(), &reservation);
1896 
1897 		vecOffset += B_PAGE_SIZE;
1898 	}
1899 
1900 	map->Unlock();
1901 	vm_page_unreserve_pages(&reservation);
1902 
1903 	if (_size != NULL)
1904 		*_size = size;
1905 
1906 	area->cache_type = CACHE_TYPE_DEVICE;
1907 	return area->id;
1908 }
1909 
1910 
1911 area_id
1912 vm_create_null_area(team_id team, const char* name, void** address,
1913 	uint32 addressSpec, addr_t size, uint32 flags)
1914 {
1915 	size = PAGE_ALIGN(size);
1916 
1917 	// Lock the address space and, if B_EXACT_ADDRESS and
1918 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1919 	// is not wired.
1920 	AddressSpaceWriteLocker locker;
1921 	do {
1922 		if (locker.SetTo(team) != B_OK)
1923 			return B_BAD_TEAM_ID;
1924 	} while (addressSpec == B_EXACT_ADDRESS
1925 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1926 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1927 			(addr_t)*address, size, &locker));
1928 
1929 	// create a null cache
1930 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1931 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1932 	VMCache* cache;
1933 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1934 	if (status != B_OK)
1935 		return status;
1936 
1937 	cache->temporary = 1;
1938 	cache->virtual_end = size;
1939 
1940 	cache->Lock();
1941 
1942 	VMArea* area;
1943 	virtual_address_restrictions addressRestrictions = {};
1944 	addressRestrictions.address = *address;
1945 	addressRestrictions.address_specification = addressSpec;
1946 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1947 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1948 		REGION_NO_PRIVATE_MAP, flags,
1949 		&addressRestrictions, true, &area, address);
1950 
1951 	if (status < B_OK) {
1952 		cache->ReleaseRefAndUnlock();
1953 		return status;
1954 	}
1955 
1956 	cache->Unlock();
1957 
1958 	area->cache_type = CACHE_TYPE_NULL;
1959 	return area->id;
1960 }
1961 
1962 
1963 /*!	Creates the vnode cache for the specified \a vnode.
1964 	The vnode has to be marked busy when calling this function.
1965 */
1966 status_t
1967 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1968 {
1969 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1970 }
1971 
1972 
1973 /*!	\a cache must be locked. The area's address space must be read-locked.
1974 */
1975 static void
1976 pre_map_area_pages(VMArea* area, VMCache* cache,
1977 	vm_page_reservation* reservation)
1978 {
1979 	addr_t baseAddress = area->Base();
1980 	addr_t cacheOffset = area->cache_offset;
1981 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1982 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1983 
1984 	for (VMCachePagesTree::Iterator it
1985 				= cache->pages.GetIterator(firstPage, true, true);
1986 			vm_page* page = it.Next();) {
1987 		if (page->cache_offset >= endPage)
1988 			break;
1989 
1990 		// skip busy and inactive pages
1991 		if (page->busy || page->usage_count == 0)
1992 			continue;
1993 
1994 		DEBUG_PAGE_ACCESS_START(page);
1995 		map_page(area, page,
1996 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1997 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1998 		DEBUG_PAGE_ACCESS_END(page);
1999 	}
2000 }
2001 
2002 
2003 /*!	Will map the file specified by \a fd to an area in memory.
2004 	The file will be mirrored beginning at the specified \a offset. The
2005 	\a offset and \a size arguments have to be page aligned.
2006 */
2007 static area_id
2008 _vm_map_file(team_id team, const char* name, void** _address,
2009 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2010 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2011 {
2012 	// TODO: for binary files, we want to make sure that they get the
2013 	//	copy of a file at a given time, ie. later changes should not
2014 	//	make it into the mapped copy -- this will need quite some changes
2015 	//	to be done in a nice way
2016 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2017 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2018 
2019 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2020 	size = PAGE_ALIGN(size);
2021 
2022 	if (mapping == REGION_NO_PRIVATE_MAP)
2023 		protection |= B_SHARED_AREA;
2024 	if (addressSpec != B_EXACT_ADDRESS)
2025 		unmapAddressRange = false;
2026 
2027 	if (fd < 0) {
2028 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2029 		virtual_address_restrictions virtualRestrictions = {};
2030 		virtualRestrictions.address = *_address;
2031 		virtualRestrictions.address_specification = addressSpec;
2032 		physical_address_restrictions physicalRestrictions = {};
2033 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2034 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2035 			_address);
2036 	}
2037 
2038 	// get the open flags of the FD
2039 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2040 	if (descriptor == NULL)
2041 		return EBADF;
2042 	int32 openMode = descriptor->open_mode;
2043 	put_fd(descriptor);
2044 
2045 	// The FD must open for reading at any rate. For shared mapping with write
2046 	// access, additionally the FD must be open for writing.
2047 	if ((openMode & O_ACCMODE) == O_WRONLY
2048 		|| (mapping == REGION_NO_PRIVATE_MAP
2049 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2050 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2051 		return EACCES;
2052 	}
2053 
2054 	uint32 protectionMax = 0;
2055 	if (mapping != REGION_PRIVATE_MAP) {
2056 		protectionMax = protection | B_READ_AREA;
2057 		if ((openMode & O_ACCMODE) == O_RDWR)
2058 			protectionMax |= B_WRITE_AREA;
2059 	}
2060 
2061 	// get the vnode for the object, this also grabs a ref to it
2062 	struct vnode* vnode = NULL;
2063 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2064 	if (status < B_OK)
2065 		return status;
2066 	VnodePutter vnodePutter(vnode);
2067 
2068 	// If we're going to pre-map pages, we need to reserve the pages needed by
2069 	// the mapping backend upfront.
2070 	page_num_t reservedPreMapPages = 0;
2071 	vm_page_reservation reservation;
2072 	if ((protection & B_READ_AREA) != 0) {
2073 		AddressSpaceWriteLocker locker;
2074 		status = locker.SetTo(team);
2075 		if (status != B_OK)
2076 			return status;
2077 
2078 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2079 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2080 
2081 		locker.Unlock();
2082 
2083 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2084 			team == VMAddressSpace::KernelID()
2085 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2086 	}
2087 
2088 	struct PageUnreserver {
2089 		PageUnreserver(vm_page_reservation* reservation)
2090 			:
2091 			fReservation(reservation)
2092 		{
2093 		}
2094 
2095 		~PageUnreserver()
2096 		{
2097 			if (fReservation != NULL)
2098 				vm_page_unreserve_pages(fReservation);
2099 		}
2100 
2101 		vm_page_reservation* fReservation;
2102 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2103 
2104 	// Lock the address space and, if the specified address range shall be
2105 	// unmapped, ensure it is not wired.
2106 	AddressSpaceWriteLocker locker;
2107 	do {
2108 		if (locker.SetTo(team) != B_OK)
2109 			return B_BAD_TEAM_ID;
2110 	} while (unmapAddressRange
2111 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2112 			(addr_t)*_address, size, &locker));
2113 
2114 	// TODO: this only works for file systems that use the file cache
2115 	VMCache* cache;
2116 	status = vfs_get_vnode_cache(vnode, &cache, false);
2117 	if (status < B_OK)
2118 		return status;
2119 
2120 	cache->Lock();
2121 
2122 	VMArea* area;
2123 	virtual_address_restrictions addressRestrictions = {};
2124 	addressRestrictions.address = *_address;
2125 	addressRestrictions.address_specification = addressSpec;
2126 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2127 		0, protection, protectionMax, mapping,
2128 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2129 		&addressRestrictions, kernel, &area, _address);
2130 
2131 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2132 		// map_backing_store() cannot know we no longer need the ref
2133 		cache->ReleaseRefLocked();
2134 	}
2135 
2136 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2137 		pre_map_area_pages(area, cache, &reservation);
2138 
2139 	cache->Unlock();
2140 
2141 	if (status == B_OK) {
2142 		// TODO: this probably deserves a smarter solution, ie. don't always
2143 		// prefetch stuff, and also, probably don't trigger it at this place.
2144 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2145 			// prefetches at max 10 MB starting from "offset"
2146 	}
2147 
2148 	if (status != B_OK)
2149 		return status;
2150 
2151 	area->cache_type = CACHE_TYPE_VNODE;
2152 	return area->id;
2153 }
2154 
2155 
2156 area_id
2157 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2158 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2159 	int fd, off_t offset)
2160 {
2161 	if (!arch_vm_supports_protection(protection))
2162 		return B_NOT_SUPPORTED;
2163 
2164 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2165 		mapping, unmapAddressRange, fd, offset, true);
2166 }
2167 
2168 
2169 VMCache*
2170 vm_area_get_locked_cache(VMArea* area)
2171 {
2172 	rw_lock_read_lock(&sAreaCacheLock);
2173 
2174 	while (true) {
2175 		VMCache* cache = area->cache;
2176 
2177 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2178 			// cache has been deleted
2179 			rw_lock_read_lock(&sAreaCacheLock);
2180 			continue;
2181 		}
2182 
2183 		rw_lock_read_lock(&sAreaCacheLock);
2184 
2185 		if (cache == area->cache) {
2186 			cache->AcquireRefLocked();
2187 			rw_lock_read_unlock(&sAreaCacheLock);
2188 			return cache;
2189 		}
2190 
2191 		// the cache changed in the meantime
2192 		cache->Unlock();
2193 	}
2194 }
2195 
2196 
2197 void
2198 vm_area_put_locked_cache(VMCache* cache)
2199 {
2200 	cache->ReleaseRefAndUnlock();
2201 }
2202 
2203 
2204 area_id
2205 vm_clone_area(team_id team, const char* name, void** address,
2206 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2207 	bool kernel)
2208 {
2209 	VMArea* newArea = NULL;
2210 	VMArea* sourceArea;
2211 
2212 	// Check whether the source area exists and is cloneable. If so, mark it
2213 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2214 	{
2215 		AddressSpaceWriteLocker locker;
2216 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2217 		if (status != B_OK)
2218 			return status;
2219 
2220 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2221 			return B_NOT_ALLOWED;
2222 
2223 		sourceArea->protection |= B_SHARED_AREA;
2224 		protection |= B_SHARED_AREA;
2225 	}
2226 
2227 	// Now lock both address spaces and actually do the cloning.
2228 
2229 	MultiAddressSpaceLocker locker;
2230 	VMAddressSpace* sourceAddressSpace;
2231 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2232 	if (status != B_OK)
2233 		return status;
2234 
2235 	VMAddressSpace* targetAddressSpace;
2236 	status = locker.AddTeam(team, true, &targetAddressSpace);
2237 	if (status != B_OK)
2238 		return status;
2239 
2240 	status = locker.Lock();
2241 	if (status != B_OK)
2242 		return status;
2243 
2244 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2245 	if (sourceArea == NULL)
2246 		return B_BAD_VALUE;
2247 
2248 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2249 		return B_NOT_ALLOWED;
2250 
2251 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2252 
2253 	if (!kernel && sourceAddressSpace != targetAddressSpace
2254 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2255 #if KDEBUG
2256 		Team* team = thread_get_current_thread()->team;
2257 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2258 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2259 #endif
2260 		status = B_NOT_ALLOWED;
2261 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2262 		status = B_NOT_ALLOWED;
2263 	} else {
2264 		virtual_address_restrictions addressRestrictions = {};
2265 		addressRestrictions.address = *address;
2266 		addressRestrictions.address_specification = addressSpec;
2267 		status = map_backing_store(targetAddressSpace, cache,
2268 			sourceArea->cache_offset, name, sourceArea->Size(),
2269 			sourceArea->wiring, protection, sourceArea->protection_max,
2270 			mapping, 0, &addressRestrictions,
2271 			kernel, &newArea, address);
2272 	}
2273 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2274 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2275 		// to create a new cache, and has therefore already acquired a reference
2276 		// to the source cache - but otherwise it has no idea that we need
2277 		// one.
2278 		cache->AcquireRefLocked();
2279 	}
2280 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2281 		// we need to map in everything at this point
2282 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2283 			// we don't have actual pages to map but a physical area
2284 			VMTranslationMap* map
2285 				= sourceArea->address_space->TranslationMap();
2286 			map->Lock();
2287 
2288 			phys_addr_t physicalAddress;
2289 			uint32 oldProtection;
2290 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2291 
2292 			map->Unlock();
2293 
2294 			map = targetAddressSpace->TranslationMap();
2295 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2296 				newArea->Base() + (newArea->Size() - 1));
2297 
2298 			vm_page_reservation reservation;
2299 			vm_page_reserve_pages(&reservation, reservePages,
2300 				targetAddressSpace == VMAddressSpace::Kernel()
2301 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2302 			map->Lock();
2303 
2304 			for (addr_t offset = 0; offset < newArea->Size();
2305 					offset += B_PAGE_SIZE) {
2306 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2307 					protection, newArea->MemoryType(), &reservation);
2308 			}
2309 
2310 			map->Unlock();
2311 			vm_page_unreserve_pages(&reservation);
2312 		} else {
2313 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2314 			size_t reservePages = map->MaxPagesNeededToMap(
2315 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2316 			vm_page_reservation reservation;
2317 			vm_page_reserve_pages(&reservation, reservePages,
2318 				targetAddressSpace == VMAddressSpace::Kernel()
2319 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2320 
2321 			// map in all pages from source
2322 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2323 					vm_page* page  = it.Next();) {
2324 				if (!page->busy) {
2325 					DEBUG_PAGE_ACCESS_START(page);
2326 					map_page(newArea, page,
2327 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2328 							- newArea->cache_offset),
2329 						protection, &reservation);
2330 					DEBUG_PAGE_ACCESS_END(page);
2331 				}
2332 			}
2333 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2334 			// ensuring that!
2335 
2336 			vm_page_unreserve_pages(&reservation);
2337 		}
2338 	}
2339 	if (status == B_OK)
2340 		newArea->cache_type = sourceArea->cache_type;
2341 
2342 	vm_area_put_locked_cache(cache);
2343 
2344 	if (status < B_OK)
2345 		return status;
2346 
2347 	return newArea->id;
2348 }
2349 
2350 
2351 /*!	Deletes the specified area of the given address space.
2352 
2353 	The address space must be write-locked.
2354 	The caller must ensure that the area does not have any wired ranges.
2355 
2356 	\param addressSpace The address space containing the area.
2357 	\param area The area to be deleted.
2358 	\param deletingAddressSpace \c true, if the address space is in the process
2359 		of being deleted.
2360 */
2361 static void
2362 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2363 	bool deletingAddressSpace)
2364 {
2365 	ASSERT(!area->IsWired());
2366 
2367 	VMAreaHash::Remove(area);
2368 
2369 	// At this point the area is removed from the global hash table, but
2370 	// still exists in the area list.
2371 
2372 	// Unmap the virtual address space the area occupied.
2373 	{
2374 		// We need to lock the complete cache chain.
2375 		VMCache* topCache = vm_area_get_locked_cache(area);
2376 		VMCacheChainLocker cacheChainLocker(topCache);
2377 		cacheChainLocker.LockAllSourceCaches();
2378 
2379 		// If the area's top cache is a temporary cache and the area is the only
2380 		// one referencing it (besides us currently holding a second reference),
2381 		// the unmapping code doesn't need to care about preserving the accessed
2382 		// and dirty flags of the top cache page mappings.
2383 		bool ignoreTopCachePageFlags
2384 			= topCache->temporary && topCache->RefCount() == 2;
2385 
2386 		area->address_space->TranslationMap()->UnmapArea(area,
2387 			deletingAddressSpace, ignoreTopCachePageFlags);
2388 	}
2389 
2390 	if (!area->cache->temporary)
2391 		area->cache->WriteModified();
2392 
2393 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2394 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2395 
2396 	arch_vm_unset_memory_type(area);
2397 	addressSpace->RemoveArea(area, allocationFlags);
2398 	addressSpace->Put();
2399 
2400 	area->cache->RemoveArea(area);
2401 	area->cache->ReleaseRef();
2402 
2403 	addressSpace->DeleteArea(area, allocationFlags);
2404 }
2405 
2406 
2407 status_t
2408 vm_delete_area(team_id team, area_id id, bool kernel)
2409 {
2410 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2411 		team, id));
2412 
2413 	// lock the address space and make sure the area isn't wired
2414 	AddressSpaceWriteLocker locker;
2415 	VMArea* area;
2416 	AreaCacheLocker cacheLocker;
2417 
2418 	do {
2419 		status_t status = locker.SetFromArea(team, id, area);
2420 		if (status != B_OK)
2421 			return status;
2422 
2423 		cacheLocker.SetTo(area);
2424 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2425 
2426 	cacheLocker.Unlock();
2427 
2428 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2429 		return B_NOT_ALLOWED;
2430 
2431 	delete_area(locker.AddressSpace(), area, false);
2432 	return B_OK;
2433 }
2434 
2435 
2436 /*!	Creates a new cache on top of given cache, moves all areas from
2437 	the old cache to the new one, and changes the protection of all affected
2438 	areas' pages to read-only. If requested, wired pages are moved up to the
2439 	new cache and copies are added to the old cache in their place.
2440 	Preconditions:
2441 	- The given cache must be locked.
2442 	- All of the cache's areas' address spaces must be read locked.
2443 	- Either the cache must not have any wired ranges or a page reservation for
2444 	  all wired pages must be provided, so they can be copied.
2445 
2446 	\param lowerCache The cache on top of which a new cache shall be created.
2447 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2448 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2449 		has wired page. The wired pages are copied in this case.
2450 */
2451 static status_t
2452 vm_copy_on_write_area(VMCache* lowerCache,
2453 	vm_page_reservation* wiredPagesReservation)
2454 {
2455 	VMCache* upperCache;
2456 
2457 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2458 
2459 	// We need to separate the cache from its areas. The cache goes one level
2460 	// deeper and we create a new cache inbetween.
2461 
2462 	// create an anonymous cache
2463 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2464 		lowerCache->GuardSize() / B_PAGE_SIZE,
2465 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2466 		VM_PRIORITY_USER);
2467 	if (status != B_OK)
2468 		return status;
2469 
2470 	upperCache->Lock();
2471 
2472 	upperCache->temporary = 1;
2473 	upperCache->virtual_base = lowerCache->virtual_base;
2474 	upperCache->virtual_end = lowerCache->virtual_end;
2475 
2476 	// transfer the lower cache areas to the upper cache
2477 	rw_lock_write_lock(&sAreaCacheLock);
2478 	upperCache->TransferAreas(lowerCache);
2479 	rw_lock_write_unlock(&sAreaCacheLock);
2480 
2481 	lowerCache->AddConsumer(upperCache);
2482 
2483 	// We now need to remap all pages from all of the cache's areas read-only,
2484 	// so that a copy will be created on next write access. If there are wired
2485 	// pages, we keep their protection, move them to the upper cache and create
2486 	// copies for the lower cache.
2487 	if (wiredPagesReservation != NULL) {
2488 		// We need to handle wired pages -- iterate through the cache's pages.
2489 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2490 				vm_page* page = it.Next();) {
2491 			if (page->WiredCount() > 0) {
2492 				// allocate a new page and copy the wired one
2493 				vm_page* copiedPage = vm_page_allocate_page(
2494 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2495 
2496 				vm_memcpy_physical_page(
2497 					copiedPage->physical_page_number * B_PAGE_SIZE,
2498 					page->physical_page_number * B_PAGE_SIZE);
2499 
2500 				// move the wired page to the upper cache (note: removing is OK
2501 				// with the SplayTree iterator) and insert the copy
2502 				upperCache->MovePage(page);
2503 				lowerCache->InsertPage(copiedPage,
2504 					page->cache_offset * B_PAGE_SIZE);
2505 
2506 				DEBUG_PAGE_ACCESS_END(copiedPage);
2507 			} else {
2508 				// Change the protection of this page in all areas.
2509 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2510 						tempArea = tempArea->cache_next) {
2511 					// The area must be readable in the same way it was
2512 					// previously writable.
2513 					addr_t address = virtual_page_address(tempArea, page);
2514 					uint32 protection = 0;
2515 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2516 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2517 						protection |= B_KERNEL_READ_AREA;
2518 					if ((pageProtection & B_READ_AREA) != 0)
2519 						protection |= B_READ_AREA;
2520 
2521 					VMTranslationMap* map
2522 						= tempArea->address_space->TranslationMap();
2523 					map->Lock();
2524 					map->ProtectPage(tempArea, address, protection);
2525 					map->Unlock();
2526 				}
2527 			}
2528 		}
2529 	} else {
2530 		ASSERT(lowerCache->WiredPagesCount() == 0);
2531 
2532 		// just change the protection of all areas
2533 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2534 				tempArea = tempArea->cache_next) {
2535 			if (tempArea->page_protections != NULL) {
2536 				// Change the protection of all pages in this area.
2537 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2538 				map->Lock();
2539 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2540 					vm_page* page = it.Next();) {
2541 					// The area must be readable in the same way it was
2542 					// previously writable.
2543 					addr_t address = virtual_page_address(tempArea, page);
2544 					uint32 protection = 0;
2545 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2546 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2547 						protection |= B_KERNEL_READ_AREA;
2548 					if ((pageProtection & B_READ_AREA) != 0)
2549 						protection |= B_READ_AREA;
2550 
2551 					map->ProtectPage(tempArea, address, protection);
2552 				}
2553 				map->Unlock();
2554 				continue;
2555 			}
2556 			// The area must be readable in the same way it was previously
2557 			// writable.
2558 			uint32 protection = 0;
2559 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2560 				protection |= B_KERNEL_READ_AREA;
2561 			if ((tempArea->protection & B_READ_AREA) != 0)
2562 				protection |= B_READ_AREA;
2563 
2564 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2565 			map->Lock();
2566 			map->ProtectArea(tempArea, protection);
2567 			map->Unlock();
2568 		}
2569 	}
2570 
2571 	vm_area_put_locked_cache(upperCache);
2572 
2573 	return B_OK;
2574 }
2575 
2576 
2577 area_id
2578 vm_copy_area(team_id team, const char* name, void** _address,
2579 	uint32 addressSpec, area_id sourceID)
2580 {
2581 	// Do the locking: target address space, all address spaces associated with
2582 	// the source cache, and the cache itself.
2583 	MultiAddressSpaceLocker locker;
2584 	VMAddressSpace* targetAddressSpace;
2585 	VMCache* cache;
2586 	VMArea* source;
2587 	AreaCacheLocker cacheLocker;
2588 	status_t status;
2589 	bool sharedArea;
2590 
2591 	page_num_t wiredPages = 0;
2592 	vm_page_reservation wiredPagesReservation;
2593 
2594 	bool restart;
2595 	do {
2596 		restart = false;
2597 
2598 		locker.Unset();
2599 		status = locker.AddTeam(team, true, &targetAddressSpace);
2600 		if (status == B_OK) {
2601 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2602 				&cache);
2603 		}
2604 		if (status != B_OK)
2605 			return status;
2606 
2607 		cacheLocker.SetTo(cache, true);	// already locked
2608 
2609 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2610 
2611 		page_num_t oldWiredPages = wiredPages;
2612 		wiredPages = 0;
2613 
2614 		// If the source area isn't shared, count the number of wired pages in
2615 		// the cache and reserve as many pages.
2616 		if (!sharedArea) {
2617 			wiredPages = cache->WiredPagesCount();
2618 
2619 			if (wiredPages > oldWiredPages) {
2620 				cacheLocker.Unlock();
2621 				locker.Unlock();
2622 
2623 				if (oldWiredPages > 0)
2624 					vm_page_unreserve_pages(&wiredPagesReservation);
2625 
2626 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2627 					VM_PRIORITY_USER);
2628 
2629 				restart = true;
2630 			}
2631 		} else if (oldWiredPages > 0)
2632 			vm_page_unreserve_pages(&wiredPagesReservation);
2633 	} while (restart);
2634 
2635 	// unreserve pages later
2636 	struct PagesUnreserver {
2637 		PagesUnreserver(vm_page_reservation* reservation)
2638 			:
2639 			fReservation(reservation)
2640 		{
2641 		}
2642 
2643 		~PagesUnreserver()
2644 		{
2645 			if (fReservation != NULL)
2646 				vm_page_unreserve_pages(fReservation);
2647 		}
2648 
2649 	private:
2650 		vm_page_reservation*	fReservation;
2651 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2652 
2653 	bool writableCopy
2654 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2655 	uint8* targetPageProtections = NULL;
2656 
2657 	if (source->page_protections != NULL) {
2658 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2659 		targetPageProtections = (uint8*)malloc_etc(bytes,
2660 			(source->address_space == VMAddressSpace::Kernel()
2661 					|| targetAddressSpace == VMAddressSpace::Kernel())
2662 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2663 		if (targetPageProtections == NULL)
2664 			return B_NO_MEMORY;
2665 
2666 		memcpy(targetPageProtections, source->page_protections, bytes);
2667 
2668 		if (!writableCopy) {
2669 			for (size_t i = 0; i < bytes; i++) {
2670 				if ((targetPageProtections[i]
2671 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2672 					writableCopy = true;
2673 					break;
2674 				}
2675 			}
2676 		}
2677 	}
2678 
2679 	if (addressSpec == B_CLONE_ADDRESS) {
2680 		addressSpec = B_EXACT_ADDRESS;
2681 		*_address = (void*)source->Base();
2682 	}
2683 
2684 	// First, create a cache on top of the source area, respectively use the
2685 	// existing one, if this is a shared area.
2686 
2687 	VMArea* target;
2688 	virtual_address_restrictions addressRestrictions = {};
2689 	addressRestrictions.address = *_address;
2690 	addressRestrictions.address_specification = addressSpec;
2691 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2692 		name, source->Size(), source->wiring, source->protection,
2693 		source->protection_max,
2694 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2695 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2696 		&addressRestrictions, true, &target, _address);
2697 	if (status < B_OK) {
2698 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2699 		return status;
2700 	}
2701 
2702 	if (targetPageProtections != NULL)
2703 		target->page_protections = targetPageProtections;
2704 
2705 	if (sharedArea) {
2706 		// The new area uses the old area's cache, but map_backing_store()
2707 		// hasn't acquired a ref. So we have to do that now.
2708 		cache->AcquireRefLocked();
2709 	}
2710 
2711 	// If the source area is writable, we need to move it one layer up as well
2712 
2713 	if (!sharedArea) {
2714 		if (writableCopy) {
2715 			// TODO: do something more useful if this fails!
2716 			if (vm_copy_on_write_area(cache,
2717 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2718 				panic("vm_copy_on_write_area() failed!\n");
2719 			}
2720 		}
2721 	}
2722 
2723 	// we return the ID of the newly created area
2724 	return target->id;
2725 }
2726 
2727 
2728 status_t
2729 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2730 	bool kernel)
2731 {
2732 	fix_protection(&newProtection);
2733 
2734 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2735 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2736 
2737 	if (!arch_vm_supports_protection(newProtection))
2738 		return B_NOT_SUPPORTED;
2739 
2740 	bool becomesWritable
2741 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2742 
2743 	// lock address spaces and cache
2744 	MultiAddressSpaceLocker locker;
2745 	VMCache* cache;
2746 	VMArea* area;
2747 	status_t status;
2748 	AreaCacheLocker cacheLocker;
2749 	bool isWritable;
2750 
2751 	bool restart;
2752 	do {
2753 		restart = false;
2754 
2755 		locker.Unset();
2756 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2757 		if (status != B_OK)
2758 			return status;
2759 
2760 		cacheLocker.SetTo(cache, true);	// already locked
2761 
2762 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2763 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2764 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2765 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2766 				" (%s)\n", team, newProtection, areaID, area->name);
2767 			return B_NOT_ALLOWED;
2768 		}
2769 		if (!kernel && area->protection_max != 0
2770 			&& (newProtection & area->protection_max)
2771 				!= (newProtection & B_USER_PROTECTION)) {
2772 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2773 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2774 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2775 				area->protection_max, areaID, area->name);
2776 			return B_NOT_ALLOWED;
2777 		}
2778 
2779 		if (area->protection == newProtection)
2780 			return B_OK;
2781 
2782 		if (team != VMAddressSpace::KernelID()
2783 			&& area->address_space->ID() != team) {
2784 			// unless you're the kernel, you are only allowed to set
2785 			// the protection of your own areas
2786 			return B_NOT_ALLOWED;
2787 		}
2788 
2789 		isWritable
2790 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2791 
2792 		// Make sure the area (respectively, if we're going to call
2793 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2794 		// wired ranges.
2795 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2796 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2797 					otherArea = otherArea->cache_next) {
2798 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2799 					restart = true;
2800 					break;
2801 				}
2802 			}
2803 		} else {
2804 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2805 				restart = true;
2806 		}
2807 	} while (restart);
2808 
2809 	bool changePageProtection = true;
2810 	bool changeTopCachePagesOnly = false;
2811 
2812 	if (isWritable && !becomesWritable) {
2813 		// writable -> !writable
2814 
2815 		if (cache->source != NULL && cache->temporary) {
2816 			if (cache->CountWritableAreas(area) == 0) {
2817 				// Since this cache now lives from the pages in its source cache,
2818 				// we can change the cache's commitment to take only those pages
2819 				// into account that really are in this cache.
2820 
2821 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2822 					team == VMAddressSpace::KernelID()
2823 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2824 
2825 				// TODO: we may be able to join with our source cache, if
2826 				// count == 0
2827 			}
2828 		}
2829 
2830 		// If only the writability changes, we can just remap the pages of the
2831 		// top cache, since the pages of lower caches are mapped read-only
2832 		// anyway. That's advantageous only, if the number of pages in the cache
2833 		// is significantly smaller than the number of pages in the area,
2834 		// though.
2835 		if (newProtection
2836 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2837 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2838 			changeTopCachePagesOnly = true;
2839 		}
2840 	} else if (!isWritable && becomesWritable) {
2841 		// !writable -> writable
2842 
2843 		if (!cache->consumers.IsEmpty()) {
2844 			// There are consumers -- we have to insert a new cache. Fortunately
2845 			// vm_copy_on_write_area() does everything that's needed.
2846 			changePageProtection = false;
2847 			status = vm_copy_on_write_area(cache, NULL);
2848 		} else {
2849 			// No consumers, so we don't need to insert a new one.
2850 			if (cache->source != NULL && cache->temporary) {
2851 				// the cache's commitment must contain all possible pages
2852 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2853 					team == VMAddressSpace::KernelID()
2854 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2855 			}
2856 
2857 			if (status == B_OK && cache->source != NULL) {
2858 				// There's a source cache, hence we can't just change all pages'
2859 				// protection or we might allow writing into pages belonging to
2860 				// a lower cache.
2861 				changeTopCachePagesOnly = true;
2862 			}
2863 		}
2864 	} else {
2865 		// we don't have anything special to do in all other cases
2866 	}
2867 
2868 	if (status == B_OK) {
2869 		// remap existing pages in this cache
2870 		if (changePageProtection) {
2871 			VMTranslationMap* map = area->address_space->TranslationMap();
2872 			map->Lock();
2873 
2874 			if (changeTopCachePagesOnly) {
2875 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2876 				page_num_t lastPageOffset
2877 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2878 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2879 						vm_page* page = it.Next();) {
2880 					if (page->cache_offset >= firstPageOffset
2881 						&& page->cache_offset <= lastPageOffset) {
2882 						addr_t address = virtual_page_address(area, page);
2883 						map->ProtectPage(area, address, newProtection);
2884 					}
2885 				}
2886 			} else
2887 				map->ProtectArea(area, newProtection);
2888 
2889 			map->Unlock();
2890 		}
2891 
2892 		area->protection = newProtection;
2893 	}
2894 
2895 	return status;
2896 }
2897 
2898 
2899 status_t
2900 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2901 {
2902 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2903 	if (addressSpace == NULL)
2904 		return B_BAD_TEAM_ID;
2905 
2906 	VMTranslationMap* map = addressSpace->TranslationMap();
2907 
2908 	map->Lock();
2909 	uint32 dummyFlags;
2910 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2911 	map->Unlock();
2912 
2913 	addressSpace->Put();
2914 	return status;
2915 }
2916 
2917 
2918 /*!	The page's cache must be locked.
2919 */
2920 bool
2921 vm_test_map_modification(vm_page* page)
2922 {
2923 	if (page->modified)
2924 		return true;
2925 
2926 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2927 	vm_page_mapping* mapping;
2928 	while ((mapping = iterator.Next()) != NULL) {
2929 		VMArea* area = mapping->area;
2930 		VMTranslationMap* map = area->address_space->TranslationMap();
2931 
2932 		phys_addr_t physicalAddress;
2933 		uint32 flags;
2934 		map->Lock();
2935 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2936 		map->Unlock();
2937 
2938 		if ((flags & PAGE_MODIFIED) != 0)
2939 			return true;
2940 	}
2941 
2942 	return false;
2943 }
2944 
2945 
2946 /*!	The page's cache must be locked.
2947 */
2948 void
2949 vm_clear_map_flags(vm_page* page, uint32 flags)
2950 {
2951 	if ((flags & PAGE_ACCESSED) != 0)
2952 		page->accessed = false;
2953 	if ((flags & PAGE_MODIFIED) != 0)
2954 		page->modified = false;
2955 
2956 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2957 	vm_page_mapping* mapping;
2958 	while ((mapping = iterator.Next()) != NULL) {
2959 		VMArea* area = mapping->area;
2960 		VMTranslationMap* map = area->address_space->TranslationMap();
2961 
2962 		map->Lock();
2963 		map->ClearFlags(virtual_page_address(area, page), flags);
2964 		map->Unlock();
2965 	}
2966 }
2967 
2968 
2969 /*!	Removes all mappings from a page.
2970 	After you've called this function, the page is unmapped from memory and
2971 	the page's \c accessed and \c modified flags have been updated according
2972 	to the state of the mappings.
2973 	The page's cache must be locked.
2974 */
2975 void
2976 vm_remove_all_page_mappings(vm_page* page)
2977 {
2978 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2979 		VMArea* area = mapping->area;
2980 		VMTranslationMap* map = area->address_space->TranslationMap();
2981 		addr_t address = virtual_page_address(area, page);
2982 		map->UnmapPage(area, address, false);
2983 	}
2984 }
2985 
2986 
2987 int32
2988 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2989 {
2990 	int32 count = 0;
2991 
2992 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2993 	vm_page_mapping* mapping;
2994 	while ((mapping = iterator.Next()) != NULL) {
2995 		VMArea* area = mapping->area;
2996 		VMTranslationMap* map = area->address_space->TranslationMap();
2997 
2998 		bool modified;
2999 		if (map->ClearAccessedAndModified(area,
3000 				virtual_page_address(area, page), false, modified)) {
3001 			count++;
3002 		}
3003 
3004 		page->modified |= modified;
3005 	}
3006 
3007 
3008 	if (page->accessed) {
3009 		count++;
3010 		page->accessed = false;
3011 	}
3012 
3013 	return count;
3014 }
3015 
3016 
3017 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3018 	mappings.
3019 	The function iterates through the page mappings and removes them until
3020 	encountering one that has been accessed. From then on it will continue to
3021 	iterate, but only clear the accessed flag of the mapping. The page's
3022 	\c modified bit will be updated accordingly, the \c accessed bit will be
3023 	cleared.
3024 	\return The number of mapping accessed bits encountered, including the
3025 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3026 		of the page have been removed.
3027 */
3028 int32
3029 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3030 {
3031 	ASSERT(page->WiredCount() == 0);
3032 
3033 	if (page->accessed)
3034 		return vm_clear_page_mapping_accessed_flags(page);
3035 
3036 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3037 		VMArea* area = mapping->area;
3038 		VMTranslationMap* map = area->address_space->TranslationMap();
3039 		addr_t address = virtual_page_address(area, page);
3040 		bool modified = false;
3041 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3042 			page->accessed = true;
3043 			page->modified |= modified;
3044 			return vm_clear_page_mapping_accessed_flags(page);
3045 		}
3046 		page->modified |= modified;
3047 	}
3048 
3049 	return 0;
3050 }
3051 
3052 
3053 static int
3054 display_mem(int argc, char** argv)
3055 {
3056 	bool physical = false;
3057 	addr_t copyAddress;
3058 	int32 displayWidth;
3059 	int32 itemSize;
3060 	int32 num = -1;
3061 	addr_t address;
3062 	int i = 1, j;
3063 
3064 	if (argc > 1 && argv[1][0] == '-') {
3065 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3066 			physical = true;
3067 			i++;
3068 		} else
3069 			i = 99;
3070 	}
3071 
3072 	if (argc < i + 1 || argc > i + 2) {
3073 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3074 			"\tdl - 8 bytes\n"
3075 			"\tdw - 4 bytes\n"
3076 			"\tds - 2 bytes\n"
3077 			"\tdb - 1 byte\n"
3078 			"\tstring - a whole string\n"
3079 			"  -p or --physical only allows memory from a single page to be "
3080 			"displayed.\n");
3081 		return 0;
3082 	}
3083 
3084 	address = parse_expression(argv[i]);
3085 
3086 	if (argc > i + 1)
3087 		num = parse_expression(argv[i + 1]);
3088 
3089 	// build the format string
3090 	if (strcmp(argv[0], "db") == 0) {
3091 		itemSize = 1;
3092 		displayWidth = 16;
3093 	} else if (strcmp(argv[0], "ds") == 0) {
3094 		itemSize = 2;
3095 		displayWidth = 8;
3096 	} else if (strcmp(argv[0], "dw") == 0) {
3097 		itemSize = 4;
3098 		displayWidth = 4;
3099 	} else if (strcmp(argv[0], "dl") == 0) {
3100 		itemSize = 8;
3101 		displayWidth = 2;
3102 	} else if (strcmp(argv[0], "string") == 0) {
3103 		itemSize = 1;
3104 		displayWidth = -1;
3105 	} else {
3106 		kprintf("display_mem called in an invalid way!\n");
3107 		return 0;
3108 	}
3109 
3110 	if (num <= 0)
3111 		num = displayWidth;
3112 
3113 	void* physicalPageHandle = NULL;
3114 
3115 	if (physical) {
3116 		int32 offset = address & (B_PAGE_SIZE - 1);
3117 		if (num * itemSize + offset > B_PAGE_SIZE) {
3118 			num = (B_PAGE_SIZE - offset) / itemSize;
3119 			kprintf("NOTE: number of bytes has been cut to page size\n");
3120 		}
3121 
3122 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3123 
3124 		if (vm_get_physical_page_debug(address, &copyAddress,
3125 				&physicalPageHandle) != B_OK) {
3126 			kprintf("getting the hardware page failed.");
3127 			return 0;
3128 		}
3129 
3130 		address += offset;
3131 		copyAddress += offset;
3132 	} else
3133 		copyAddress = address;
3134 
3135 	if (!strcmp(argv[0], "string")) {
3136 		kprintf("%p \"", (char*)copyAddress);
3137 
3138 		// string mode
3139 		for (i = 0; true; i++) {
3140 			char c;
3141 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3142 					!= B_OK
3143 				|| c == '\0') {
3144 				break;
3145 			}
3146 
3147 			if (c == '\n')
3148 				kprintf("\\n");
3149 			else if (c == '\t')
3150 				kprintf("\\t");
3151 			else {
3152 				if (!isprint(c))
3153 					c = '.';
3154 
3155 				kprintf("%c", c);
3156 			}
3157 		}
3158 
3159 		kprintf("\"\n");
3160 	} else {
3161 		// number mode
3162 		for (i = 0; i < num; i++) {
3163 			uint64 value;
3164 
3165 			if ((i % displayWidth) == 0) {
3166 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3167 				if (i != 0)
3168 					kprintf("\n");
3169 
3170 				kprintf("[0x%lx]  ", address + i * itemSize);
3171 
3172 				for (j = 0; j < displayed; j++) {
3173 					char c;
3174 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3175 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3176 						displayed = j;
3177 						break;
3178 					}
3179 					if (!isprint(c))
3180 						c = '.';
3181 
3182 					kprintf("%c", c);
3183 				}
3184 				if (num > displayWidth) {
3185 					// make sure the spacing in the last line is correct
3186 					for (j = displayed; j < displayWidth * itemSize; j++)
3187 						kprintf(" ");
3188 				}
3189 				kprintf("  ");
3190 			}
3191 
3192 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3193 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3194 				kprintf("read fault");
3195 				break;
3196 			}
3197 
3198 			switch (itemSize) {
3199 				case 1:
3200 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3201 					break;
3202 				case 2:
3203 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3204 					break;
3205 				case 4:
3206 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3207 					break;
3208 				case 8:
3209 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3210 					break;
3211 			}
3212 		}
3213 
3214 		kprintf("\n");
3215 	}
3216 
3217 	if (physical) {
3218 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3219 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3220 	}
3221 	return 0;
3222 }
3223 
3224 
3225 static void
3226 dump_cache_tree_recursively(VMCache* cache, int level,
3227 	VMCache* highlightCache)
3228 {
3229 	// print this cache
3230 	for (int i = 0; i < level; i++)
3231 		kprintf("  ");
3232 	if (cache == highlightCache)
3233 		kprintf("%p <--\n", cache);
3234 	else
3235 		kprintf("%p\n", cache);
3236 
3237 	// recursively print its consumers
3238 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3239 			VMCache* consumer = it.Next();) {
3240 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3241 	}
3242 }
3243 
3244 
3245 static int
3246 dump_cache_tree(int argc, char** argv)
3247 {
3248 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3249 		kprintf("usage: %s <address>\n", argv[0]);
3250 		return 0;
3251 	}
3252 
3253 	addr_t address = parse_expression(argv[1]);
3254 	if (address == 0)
3255 		return 0;
3256 
3257 	VMCache* cache = (VMCache*)address;
3258 	VMCache* root = cache;
3259 
3260 	// find the root cache (the transitive source)
3261 	while (root->source != NULL)
3262 		root = root->source;
3263 
3264 	dump_cache_tree_recursively(root, 0, cache);
3265 
3266 	return 0;
3267 }
3268 
3269 
3270 const char*
3271 vm_cache_type_to_string(int32 type)
3272 {
3273 	switch (type) {
3274 		case CACHE_TYPE_RAM:
3275 			return "RAM";
3276 		case CACHE_TYPE_DEVICE:
3277 			return "device";
3278 		case CACHE_TYPE_VNODE:
3279 			return "vnode";
3280 		case CACHE_TYPE_NULL:
3281 			return "null";
3282 
3283 		default:
3284 			return "unknown";
3285 	}
3286 }
3287 
3288 
3289 #if DEBUG_CACHE_LIST
3290 
3291 static void
3292 update_cache_info_recursively(VMCache* cache, cache_info& info)
3293 {
3294 	info.page_count += cache->page_count;
3295 	if (cache->type == CACHE_TYPE_RAM)
3296 		info.committed += cache->committed_size;
3297 
3298 	// recurse
3299 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3300 			VMCache* consumer = it.Next();) {
3301 		update_cache_info_recursively(consumer, info);
3302 	}
3303 }
3304 
3305 
3306 static int
3307 cache_info_compare_page_count(const void* _a, const void* _b)
3308 {
3309 	const cache_info* a = (const cache_info*)_a;
3310 	const cache_info* b = (const cache_info*)_b;
3311 	if (a->page_count == b->page_count)
3312 		return 0;
3313 	return a->page_count < b->page_count ? 1 : -1;
3314 }
3315 
3316 
3317 static int
3318 cache_info_compare_committed(const void* _a, const void* _b)
3319 {
3320 	const cache_info* a = (const cache_info*)_a;
3321 	const cache_info* b = (const cache_info*)_b;
3322 	if (a->committed == b->committed)
3323 		return 0;
3324 	return a->committed < b->committed ? 1 : -1;
3325 }
3326 
3327 
3328 static void
3329 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3330 {
3331 	for (int i = 0; i < level; i++)
3332 		kprintf("  ");
3333 
3334 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3335 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3336 		cache->virtual_base, cache->virtual_end, cache->page_count);
3337 
3338 	if (level == 0)
3339 		kprintf("/%lu", info.page_count);
3340 
3341 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3342 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3343 
3344 		if (level == 0)
3345 			kprintf("/%lu", info.committed);
3346 	}
3347 
3348 	// areas
3349 	if (cache->areas != NULL) {
3350 		VMArea* area = cache->areas;
3351 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3352 			area->name, area->address_space->ID());
3353 
3354 		while (area->cache_next != NULL) {
3355 			area = area->cache_next;
3356 			kprintf(", %" B_PRId32, area->id);
3357 		}
3358 	}
3359 
3360 	kputs("\n");
3361 
3362 	// recurse
3363 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3364 			VMCache* consumer = it.Next();) {
3365 		dump_caches_recursively(consumer, info, level + 1);
3366 	}
3367 }
3368 
3369 
3370 static int
3371 dump_caches(int argc, char** argv)
3372 {
3373 	if (sCacheInfoTable == NULL) {
3374 		kprintf("No cache info table!\n");
3375 		return 0;
3376 	}
3377 
3378 	bool sortByPageCount = true;
3379 
3380 	for (int32 i = 1; i < argc; i++) {
3381 		if (strcmp(argv[i], "-c") == 0) {
3382 			sortByPageCount = false;
3383 		} else {
3384 			print_debugger_command_usage(argv[0]);
3385 			return 0;
3386 		}
3387 	}
3388 
3389 	uint32 totalCount = 0;
3390 	uint32 rootCount = 0;
3391 	off_t totalCommitted = 0;
3392 	page_num_t totalPages = 0;
3393 
3394 	VMCache* cache = gDebugCacheList;
3395 	while (cache) {
3396 		totalCount++;
3397 		if (cache->source == NULL) {
3398 			cache_info stackInfo;
3399 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3400 				? sCacheInfoTable[rootCount] : stackInfo;
3401 			rootCount++;
3402 			info.cache = cache;
3403 			info.page_count = 0;
3404 			info.committed = 0;
3405 			update_cache_info_recursively(cache, info);
3406 			totalCommitted += info.committed;
3407 			totalPages += info.page_count;
3408 		}
3409 
3410 		cache = cache->debug_next;
3411 	}
3412 
3413 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3414 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3415 			sortByPageCount
3416 				? &cache_info_compare_page_count
3417 				: &cache_info_compare_committed);
3418 	}
3419 
3420 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3421 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3422 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3423 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3424 			"page count" : "committed size");
3425 
3426 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3427 		for (uint32 i = 0; i < rootCount; i++) {
3428 			cache_info& info = sCacheInfoTable[i];
3429 			dump_caches_recursively(info.cache, info, 0);
3430 		}
3431 	} else
3432 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3433 
3434 	return 0;
3435 }
3436 
3437 #endif	// DEBUG_CACHE_LIST
3438 
3439 
3440 static int
3441 dump_cache(int argc, char** argv)
3442 {
3443 	VMCache* cache;
3444 	bool showPages = false;
3445 	int i = 1;
3446 
3447 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3448 		kprintf("usage: %s [-ps] <address>\n"
3449 			"  if -p is specified, all pages are shown, if -s is used\n"
3450 			"  only the cache info is shown respectively.\n", argv[0]);
3451 		return 0;
3452 	}
3453 	while (argv[i][0] == '-') {
3454 		char* arg = argv[i] + 1;
3455 		while (arg[0]) {
3456 			if (arg[0] == 'p')
3457 				showPages = true;
3458 			arg++;
3459 		}
3460 		i++;
3461 	}
3462 	if (argv[i] == NULL) {
3463 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3464 		return 0;
3465 	}
3466 
3467 	addr_t address = parse_expression(argv[i]);
3468 	if (address == 0)
3469 		return 0;
3470 
3471 	cache = (VMCache*)address;
3472 
3473 	cache->Dump(showPages);
3474 
3475 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3476 
3477 	return 0;
3478 }
3479 
3480 
3481 static void
3482 dump_area_struct(VMArea* area, bool mappings)
3483 {
3484 	kprintf("AREA: %p\n", area);
3485 	kprintf("name:\t\t'%s'\n", area->name);
3486 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3487 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3488 	kprintf("base:\t\t0x%lx\n", area->Base());
3489 	kprintf("size:\t\t0x%lx\n", area->Size());
3490 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3491 	kprintf("page_protection:%p\n", area->page_protections);
3492 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3493 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3494 	kprintf("cache:\t\t%p\n", area->cache);
3495 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3496 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3497 	kprintf("cache_next:\t%p\n", area->cache_next);
3498 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3499 
3500 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3501 	if (mappings) {
3502 		kprintf("page mappings:\n");
3503 		while (iterator.HasNext()) {
3504 			vm_page_mapping* mapping = iterator.Next();
3505 			kprintf("  %p", mapping->page);
3506 		}
3507 		kprintf("\n");
3508 	} else {
3509 		uint32 count = 0;
3510 		while (iterator.Next() != NULL) {
3511 			count++;
3512 		}
3513 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3514 	}
3515 }
3516 
3517 
3518 static int
3519 dump_area(int argc, char** argv)
3520 {
3521 	bool mappings = false;
3522 	bool found = false;
3523 	int32 index = 1;
3524 	VMArea* area;
3525 	addr_t num;
3526 
3527 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3528 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3529 			"All areas matching either id/address/name are listed. You can\n"
3530 			"force to check only a specific item by prefixing the specifier\n"
3531 			"with the id/contains/address/name keywords.\n"
3532 			"-m shows the area's mappings as well.\n");
3533 		return 0;
3534 	}
3535 
3536 	if (!strcmp(argv[1], "-m")) {
3537 		mappings = true;
3538 		index++;
3539 	}
3540 
3541 	int32 mode = 0xf;
3542 	if (!strcmp(argv[index], "id"))
3543 		mode = 1;
3544 	else if (!strcmp(argv[index], "contains"))
3545 		mode = 2;
3546 	else if (!strcmp(argv[index], "name"))
3547 		mode = 4;
3548 	else if (!strcmp(argv[index], "address"))
3549 		mode = 0;
3550 	if (mode != 0xf)
3551 		index++;
3552 
3553 	if (index >= argc) {
3554 		kprintf("No area specifier given.\n");
3555 		return 0;
3556 	}
3557 
3558 	num = parse_expression(argv[index]);
3559 
3560 	if (mode == 0) {
3561 		dump_area_struct((struct VMArea*)num, mappings);
3562 	} else {
3563 		// walk through the area list, looking for the arguments as a name
3564 
3565 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3566 		while ((area = it.Next()) != NULL) {
3567 			if (((mode & 4) != 0
3568 					&& !strcmp(argv[index], area->name))
3569 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3570 					|| (((mode & 2) != 0 && area->Base() <= num
3571 						&& area->Base() + area->Size() > num))))) {
3572 				dump_area_struct(area, mappings);
3573 				found = true;
3574 			}
3575 		}
3576 
3577 		if (!found)
3578 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3579 	}
3580 
3581 	return 0;
3582 }
3583 
3584 
3585 static int
3586 dump_area_list(int argc, char** argv)
3587 {
3588 	VMArea* area;
3589 	const char* name = NULL;
3590 	int32 id = 0;
3591 
3592 	if (argc > 1) {
3593 		id = parse_expression(argv[1]);
3594 		if (id == 0)
3595 			name = argv[1];
3596 	}
3597 
3598 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3599 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3600 		B_PRINTF_POINTER_WIDTH, "size");
3601 
3602 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3603 	while ((area = it.Next()) != NULL) {
3604 		if ((id != 0 && area->address_space->ID() != id)
3605 			|| (name != NULL && strstr(area->name, name) == NULL))
3606 			continue;
3607 
3608 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3609 			area->id, (void*)area->Base(), (void*)area->Size(),
3610 			area->protection, area->wiring, area->name);
3611 	}
3612 	return 0;
3613 }
3614 
3615 
3616 static int
3617 dump_available_memory(int argc, char** argv)
3618 {
3619 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3620 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3621 	return 0;
3622 }
3623 
3624 
3625 static int
3626 dump_mapping_info(int argc, char** argv)
3627 {
3628 	bool reverseLookup = false;
3629 	bool pageLookup = false;
3630 
3631 	int argi = 1;
3632 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3633 		const char* arg = argv[argi];
3634 		if (strcmp(arg, "-r") == 0) {
3635 			reverseLookup = true;
3636 		} else if (strcmp(arg, "-p") == 0) {
3637 			reverseLookup = true;
3638 			pageLookup = true;
3639 		} else {
3640 			print_debugger_command_usage(argv[0]);
3641 			return 0;
3642 		}
3643 	}
3644 
3645 	// We need at least one argument, the address. Optionally a thread ID can be
3646 	// specified.
3647 	if (argi >= argc || argi + 2 < argc) {
3648 		print_debugger_command_usage(argv[0]);
3649 		return 0;
3650 	}
3651 
3652 	uint64 addressValue;
3653 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3654 		return 0;
3655 
3656 	Team* team = NULL;
3657 	if (argi < argc) {
3658 		uint64 threadID;
3659 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3660 			return 0;
3661 
3662 		Thread* thread = Thread::GetDebug(threadID);
3663 		if (thread == NULL) {
3664 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3665 			return 0;
3666 		}
3667 
3668 		team = thread->team;
3669 	}
3670 
3671 	if (reverseLookup) {
3672 		phys_addr_t physicalAddress;
3673 		if (pageLookup) {
3674 			vm_page* page = (vm_page*)(addr_t)addressValue;
3675 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3676 		} else {
3677 			physicalAddress = (phys_addr_t)addressValue;
3678 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3679 		}
3680 
3681 		kprintf("    Team     Virtual Address      Area\n");
3682 		kprintf("--------------------------------------\n");
3683 
3684 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3685 			Callback()
3686 				:
3687 				fAddressSpace(NULL)
3688 			{
3689 			}
3690 
3691 			void SetAddressSpace(VMAddressSpace* addressSpace)
3692 			{
3693 				fAddressSpace = addressSpace;
3694 			}
3695 
3696 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3697 			{
3698 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3699 					virtualAddress);
3700 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3701 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3702 				else
3703 					kprintf("\n");
3704 				return false;
3705 			}
3706 
3707 		private:
3708 			VMAddressSpace*	fAddressSpace;
3709 		} callback;
3710 
3711 		if (team != NULL) {
3712 			// team specified -- get its address space
3713 			VMAddressSpace* addressSpace = team->address_space;
3714 			if (addressSpace == NULL) {
3715 				kprintf("Failed to get address space!\n");
3716 				return 0;
3717 			}
3718 
3719 			callback.SetAddressSpace(addressSpace);
3720 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3721 				physicalAddress, callback);
3722 		} else {
3723 			// no team specified -- iterate through all address spaces
3724 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3725 				addressSpace != NULL;
3726 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3727 				callback.SetAddressSpace(addressSpace);
3728 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3729 					physicalAddress, callback);
3730 			}
3731 		}
3732 	} else {
3733 		// get the address space
3734 		addr_t virtualAddress = (addr_t)addressValue;
3735 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3736 		VMAddressSpace* addressSpace;
3737 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3738 			addressSpace = VMAddressSpace::Kernel();
3739 		} else if (team != NULL) {
3740 			addressSpace = team->address_space;
3741 		} else {
3742 			Thread* thread = debug_get_debugged_thread();
3743 			if (thread == NULL || thread->team == NULL) {
3744 				kprintf("Failed to get team!\n");
3745 				return 0;
3746 			}
3747 
3748 			addressSpace = thread->team->address_space;
3749 		}
3750 
3751 		if (addressSpace == NULL) {
3752 			kprintf("Failed to get address space!\n");
3753 			return 0;
3754 		}
3755 
3756 		// let the translation map implementation do the job
3757 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3758 	}
3759 
3760 	return 0;
3761 }
3762 
3763 
3764 /*!	Deletes all areas and reserved regions in the given address space.
3765 
3766 	The caller must ensure that none of the areas has any wired ranges.
3767 
3768 	\param addressSpace The address space.
3769 	\param deletingAddressSpace \c true, if the address space is in the process
3770 		of being deleted.
3771 */
3772 void
3773 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3774 {
3775 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3776 		addressSpace->ID()));
3777 
3778 	addressSpace->WriteLock();
3779 
3780 	// remove all reserved areas in this address space
3781 	addressSpace->UnreserveAllAddressRanges(0);
3782 
3783 	// delete all the areas in this address space
3784 	while (VMArea* area = addressSpace->FirstArea()) {
3785 		ASSERT(!area->IsWired());
3786 		delete_area(addressSpace, area, deletingAddressSpace);
3787 	}
3788 
3789 	addressSpace->WriteUnlock();
3790 }
3791 
3792 
3793 static area_id
3794 vm_area_for(addr_t address, bool kernel)
3795 {
3796 	team_id team;
3797 	if (IS_USER_ADDRESS(address)) {
3798 		// we try the user team address space, if any
3799 		team = VMAddressSpace::CurrentID();
3800 		if (team < 0)
3801 			return team;
3802 	} else
3803 		team = VMAddressSpace::KernelID();
3804 
3805 	AddressSpaceReadLocker locker(team);
3806 	if (!locker.IsLocked())
3807 		return B_BAD_TEAM_ID;
3808 
3809 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3810 	if (area != NULL) {
3811 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3812 			return B_ERROR;
3813 
3814 		return area->id;
3815 	}
3816 
3817 	return B_ERROR;
3818 }
3819 
3820 
3821 /*!	Frees physical pages that were used during the boot process.
3822 	\a end is inclusive.
3823 */
3824 static void
3825 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3826 {
3827 	// free all physical pages in the specified range
3828 
3829 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3830 		phys_addr_t physicalAddress;
3831 		uint32 flags;
3832 
3833 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3834 			&& (flags & PAGE_PRESENT) != 0) {
3835 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3836 			if (page != NULL && page->State() != PAGE_STATE_FREE
3837 					&& page->State() != PAGE_STATE_CLEAR
3838 					&& page->State() != PAGE_STATE_UNUSED) {
3839 				DEBUG_PAGE_ACCESS_START(page);
3840 				vm_page_set_state(page, PAGE_STATE_FREE);
3841 			}
3842 		}
3843 	}
3844 
3845 	// unmap the memory
3846 	map->Unmap(start, end);
3847 }
3848 
3849 
3850 void
3851 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3852 {
3853 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3854 	addr_t end = start + (size - 1);
3855 	addr_t lastEnd = start;
3856 
3857 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3858 		(void*)start, (void*)end));
3859 
3860 	// The areas are sorted in virtual address space order, so
3861 	// we just have to find the holes between them that fall
3862 	// into the area we should dispose
3863 
3864 	map->Lock();
3865 
3866 	for (VMAddressSpace::AreaIterator it
3867 				= VMAddressSpace::Kernel()->GetAreaIterator();
3868 			VMArea* area = it.Next();) {
3869 		addr_t areaStart = area->Base();
3870 		addr_t areaEnd = areaStart + (area->Size() - 1);
3871 
3872 		if (areaEnd < start)
3873 			continue;
3874 
3875 		if (areaStart > end) {
3876 			// we are done, the area is already beyond of what we have to free
3877 			break;
3878 		}
3879 
3880 		if (areaStart > lastEnd) {
3881 			// this is something we can free
3882 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3883 				(void*)areaStart));
3884 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3885 		}
3886 
3887 		if (areaEnd >= end) {
3888 			lastEnd = areaEnd;
3889 				// no +1 to prevent potential overflow
3890 			break;
3891 		}
3892 
3893 		lastEnd = areaEnd + 1;
3894 	}
3895 
3896 	if (lastEnd < end) {
3897 		// we can also get rid of some space at the end of the area
3898 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3899 			(void*)end));
3900 		unmap_and_free_physical_pages(map, lastEnd, end);
3901 	}
3902 
3903 	map->Unlock();
3904 }
3905 
3906 
3907 static void
3908 create_preloaded_image_areas(struct preloaded_image* _image)
3909 {
3910 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3911 	char name[B_OS_NAME_LENGTH];
3912 	void* address;
3913 	int32 length;
3914 
3915 	// use file name to create a good area name
3916 	char* fileName = strrchr(image->name, '/');
3917 	if (fileName == NULL)
3918 		fileName = image->name;
3919 	else
3920 		fileName++;
3921 
3922 	length = strlen(fileName);
3923 	// make sure there is enough space for the suffix
3924 	if (length > 25)
3925 		length = 25;
3926 
3927 	memcpy(name, fileName, length);
3928 	strcpy(name + length, "_text");
3929 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3930 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3931 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3932 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3933 		// this will later be remapped read-only/executable by the
3934 		// ELF initialization code
3935 
3936 	strcpy(name + length, "_data");
3937 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3938 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3939 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3940 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3941 }
3942 
3943 
3944 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3945 	Any boot loader resources contained in that arguments must not be accessed
3946 	anymore past this point.
3947 */
3948 void
3949 vm_free_kernel_args(kernel_args* args)
3950 {
3951 	uint32 i;
3952 
3953 	TRACE(("vm_free_kernel_args()\n"));
3954 
3955 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3956 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3957 		if (area >= B_OK)
3958 			delete_area(area);
3959 	}
3960 }
3961 
3962 
3963 static void
3964 allocate_kernel_args(kernel_args* args)
3965 {
3966 	TRACE(("allocate_kernel_args()\n"));
3967 
3968 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3969 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3970 
3971 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3972 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3973 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3974 	}
3975 }
3976 
3977 
3978 static void
3979 unreserve_boot_loader_ranges(kernel_args* args)
3980 {
3981 	TRACE(("unreserve_boot_loader_ranges()\n"));
3982 
3983 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3984 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3985 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3986 			args->virtual_allocated_range[i].size);
3987 	}
3988 }
3989 
3990 
3991 static void
3992 reserve_boot_loader_ranges(kernel_args* args)
3993 {
3994 	TRACE(("reserve_boot_loader_ranges()\n"));
3995 
3996 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3997 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3998 
3999 		// If the address is no kernel address, we just skip it. The
4000 		// architecture specific code has to deal with it.
4001 		if (!IS_KERNEL_ADDRESS(address)) {
4002 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4003 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4004 			continue;
4005 		}
4006 
4007 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4008 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4009 		if (status < B_OK)
4010 			panic("could not reserve boot loader ranges\n");
4011 	}
4012 }
4013 
4014 
4015 static addr_t
4016 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4017 {
4018 	size = PAGE_ALIGN(size);
4019 
4020 	// find a slot in the virtual allocation addr range
4021 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4022 		// check to see if the space between this one and the last is big enough
4023 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4024 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4025 			+ args->virtual_allocated_range[i - 1].size;
4026 
4027 		addr_t base = alignment > 0
4028 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4029 
4030 		if (base >= KERNEL_BASE && base < rangeStart
4031 				&& rangeStart - base >= size) {
4032 			args->virtual_allocated_range[i - 1].size
4033 				+= base + size - previousRangeEnd;
4034 			return base;
4035 		}
4036 	}
4037 
4038 	// we hadn't found one between allocation ranges. this is ok.
4039 	// see if there's a gap after the last one
4040 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4041 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4042 		+ args->virtual_allocated_range[lastEntryIndex].size;
4043 	addr_t base = alignment > 0
4044 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4045 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4046 		args->virtual_allocated_range[lastEntryIndex].size
4047 			+= base + size - lastRangeEnd;
4048 		return base;
4049 	}
4050 
4051 	// see if there's a gap before the first one
4052 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4053 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4054 		base = rangeStart - size;
4055 		if (alignment > 0)
4056 			base = ROUNDDOWN(base, alignment);
4057 
4058 		if (base >= KERNEL_BASE) {
4059 			args->virtual_allocated_range[0].start = base;
4060 			args->virtual_allocated_range[0].size += rangeStart - base;
4061 			return base;
4062 		}
4063 	}
4064 
4065 	return 0;
4066 }
4067 
4068 
4069 static bool
4070 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4071 {
4072 	// TODO: horrible brute-force method of determining if the page can be
4073 	// allocated
4074 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4075 		if (address >= args->physical_memory_range[i].start
4076 			&& address < args->physical_memory_range[i].start
4077 				+ args->physical_memory_range[i].size)
4078 			return true;
4079 	}
4080 	return false;
4081 }
4082 
4083 
4084 page_num_t
4085 vm_allocate_early_physical_page(kernel_args* args)
4086 {
4087 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4088 		phys_addr_t nextPage;
4089 
4090 		nextPage = args->physical_allocated_range[i].start
4091 			+ args->physical_allocated_range[i].size;
4092 		// see if the page after the next allocated paddr run can be allocated
4093 		if (i + 1 < args->num_physical_allocated_ranges
4094 			&& args->physical_allocated_range[i + 1].size != 0) {
4095 			// see if the next page will collide with the next allocated range
4096 			if (nextPage >= args->physical_allocated_range[i+1].start)
4097 				continue;
4098 		}
4099 		// see if the next physical page fits in the memory block
4100 		if (is_page_in_physical_memory_range(args, nextPage)) {
4101 			// we got one!
4102 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4103 			return nextPage / B_PAGE_SIZE;
4104 		}
4105 	}
4106 
4107 	// Expanding upwards didn't work, try going downwards.
4108 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4109 		phys_addr_t nextPage;
4110 
4111 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4112 		// see if the page after the prev allocated paddr run can be allocated
4113 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4114 			// see if the next page will collide with the next allocated range
4115 			if (nextPage < args->physical_allocated_range[i-1].start
4116 				+ args->physical_allocated_range[i-1].size)
4117 				continue;
4118 		}
4119 		// see if the next physical page fits in the memory block
4120 		if (is_page_in_physical_memory_range(args, nextPage)) {
4121 			// we got one!
4122 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4123 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4124 			return nextPage / B_PAGE_SIZE;
4125 		}
4126 	}
4127 
4128 	return 0;
4129 		// could not allocate a block
4130 }
4131 
4132 
4133 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4134 	allocate some pages before the VM is completely up.
4135 */
4136 addr_t
4137 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4138 	uint32 attributes, addr_t alignment)
4139 {
4140 	if (physicalSize > virtualSize)
4141 		physicalSize = virtualSize;
4142 
4143 	// find the vaddr to allocate at
4144 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4145 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4146 	if (virtualBase == 0) {
4147 		panic("vm_allocate_early: could not allocate virtual address\n");
4148 		return 0;
4149 	}
4150 
4151 	// map the pages
4152 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4153 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4154 		if (physicalAddress == 0)
4155 			panic("error allocating early page!\n");
4156 
4157 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4158 
4159 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4160 			physicalAddress * B_PAGE_SIZE, attributes,
4161 			&vm_allocate_early_physical_page);
4162 	}
4163 
4164 	return virtualBase;
4165 }
4166 
4167 
4168 /*!	The main entrance point to initialize the VM. */
4169 status_t
4170 vm_init(kernel_args* args)
4171 {
4172 	struct preloaded_image* image;
4173 	void* address;
4174 	status_t err = 0;
4175 	uint32 i;
4176 
4177 	TRACE(("vm_init: entry\n"));
4178 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4179 	err = arch_vm_init(args);
4180 
4181 	// initialize some globals
4182 	vm_page_init_num_pages(args);
4183 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4184 
4185 	slab_init(args);
4186 
4187 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4188 	off_t heapSize = INITIAL_HEAP_SIZE;
4189 	// try to accomodate low memory systems
4190 	while (heapSize > sAvailableMemory / 8)
4191 		heapSize /= 2;
4192 	if (heapSize < 1024 * 1024)
4193 		panic("vm_init: go buy some RAM please.");
4194 
4195 	// map in the new heap and initialize it
4196 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4197 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4198 	TRACE(("heap at 0x%lx\n", heapBase));
4199 	heap_init(heapBase, heapSize);
4200 #endif
4201 
4202 	// initialize the free page list and physical page mapper
4203 	vm_page_init(args);
4204 
4205 	// initialize the cache allocators
4206 	vm_cache_init(args);
4207 
4208 	{
4209 		status_t error = VMAreaHash::Init();
4210 		if (error != B_OK)
4211 			panic("vm_init: error initializing area hash table\n");
4212 	}
4213 
4214 	VMAddressSpace::Init();
4215 	reserve_boot_loader_ranges(args);
4216 
4217 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4218 	heap_init_post_area();
4219 #endif
4220 
4221 	// Do any further initialization that the architecture dependant layers may
4222 	// need now
4223 	arch_vm_translation_map_init_post_area(args);
4224 	arch_vm_init_post_area(args);
4225 	vm_page_init_post_area(args);
4226 	slab_init_post_area();
4227 
4228 	// allocate areas to represent stuff that already exists
4229 
4230 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4231 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4232 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4233 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4234 #endif
4235 
4236 	allocate_kernel_args(args);
4237 
4238 	create_preloaded_image_areas(args->kernel_image);
4239 
4240 	// allocate areas for preloaded images
4241 	for (image = args->preloaded_images; image != NULL; image = image->next)
4242 		create_preloaded_image_areas(image);
4243 
4244 	// allocate kernel stacks
4245 	for (i = 0; i < args->num_cpus; i++) {
4246 		char name[64];
4247 
4248 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4249 		address = (void*)args->cpu_kstack[i].start;
4250 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4251 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4252 	}
4253 
4254 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4255 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4256 
4257 #if PARANOID_KERNEL_MALLOC
4258 	vm_block_address_range("uninitialized heap memory",
4259 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4260 #endif
4261 #if PARANOID_KERNEL_FREE
4262 	vm_block_address_range("freed heap memory",
4263 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4264 #endif
4265 
4266 	// create the object cache for the page mappings
4267 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4268 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4269 		NULL, NULL);
4270 	if (gPageMappingsObjectCache == NULL)
4271 		panic("failed to create page mappings object cache");
4272 
4273 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4274 
4275 #if DEBUG_CACHE_LIST
4276 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4277 		virtual_address_restrictions virtualRestrictions = {};
4278 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4279 		physical_address_restrictions physicalRestrictions = {};
4280 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4281 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4282 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4283 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4284 			&physicalRestrictions, (void**)&sCacheInfoTable);
4285 	}
4286 #endif	// DEBUG_CACHE_LIST
4287 
4288 	// add some debugger commands
4289 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4290 	add_debugger_command("area", &dump_area,
4291 		"Dump info about a particular area");
4292 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4293 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4294 #if DEBUG_CACHE_LIST
4295 	if (sCacheInfoTable != NULL) {
4296 		add_debugger_command_etc("caches", &dump_caches,
4297 			"List all VMCache trees",
4298 			"[ \"-c\" ]\n"
4299 			"All cache trees are listed sorted in decreasing order by number "
4300 				"of\n"
4301 			"used pages or, if \"-c\" is specified, by size of committed "
4302 				"memory.\n",
4303 			0);
4304 	}
4305 #endif
4306 	add_debugger_command("avail", &dump_available_memory,
4307 		"Dump available memory");
4308 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4309 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4310 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4311 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4312 	add_debugger_command("string", &display_mem, "dump strings");
4313 
4314 	add_debugger_command_etc("mapping", &dump_mapping_info,
4315 		"Print address mapping information",
4316 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4317 		"Prints low-level page mapping information for a given address. If\n"
4318 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4319 		"address that is looked up in the translation map of the current\n"
4320 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4321 		"\"-r\" is specified, <address> is a physical address that is\n"
4322 		"searched in the translation map of all teams, respectively the team\n"
4323 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4324 		"<address> is the address of a vm_page structure. The behavior is\n"
4325 		"equivalent to specifying \"-r\" with the physical address of that\n"
4326 		"page.\n",
4327 		0);
4328 
4329 	TRACE(("vm_init: exit\n"));
4330 
4331 	vm_cache_init_post_heap();
4332 
4333 	return err;
4334 }
4335 
4336 
4337 status_t
4338 vm_init_post_sem(kernel_args* args)
4339 {
4340 	// This frees all unused boot loader resources and makes its space available
4341 	// again
4342 	arch_vm_init_end(args);
4343 	unreserve_boot_loader_ranges(args);
4344 
4345 	// fill in all of the semaphores that were not allocated before
4346 	// since we're still single threaded and only the kernel address space
4347 	// exists, it isn't that hard to find all of the ones we need to create
4348 
4349 	arch_vm_translation_map_init_post_sem(args);
4350 
4351 	slab_init_post_sem();
4352 
4353 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4354 	heap_init_post_sem();
4355 #endif
4356 
4357 	return B_OK;
4358 }
4359 
4360 
4361 status_t
4362 vm_init_post_thread(kernel_args* args)
4363 {
4364 	vm_page_init_post_thread(args);
4365 	slab_init_post_thread();
4366 	return heap_init_post_thread();
4367 }
4368 
4369 
4370 status_t
4371 vm_init_post_modules(kernel_args* args)
4372 {
4373 	return arch_vm_init_post_modules(args);
4374 }
4375 
4376 
4377 void
4378 permit_page_faults(void)
4379 {
4380 	Thread* thread = thread_get_current_thread();
4381 	if (thread != NULL)
4382 		atomic_add(&thread->page_faults_allowed, 1);
4383 }
4384 
4385 
4386 void
4387 forbid_page_faults(void)
4388 {
4389 	Thread* thread = thread_get_current_thread();
4390 	if (thread != NULL)
4391 		atomic_add(&thread->page_faults_allowed, -1);
4392 }
4393 
4394 
4395 status_t
4396 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4397 	bool isUser, addr_t* newIP)
4398 {
4399 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4400 		faultAddress));
4401 
4402 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4403 
4404 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4405 	VMAddressSpace* addressSpace = NULL;
4406 
4407 	status_t status = B_OK;
4408 	*newIP = 0;
4409 	atomic_add((int32*)&sPageFaults, 1);
4410 
4411 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4412 		addressSpace = VMAddressSpace::GetKernel();
4413 	} else if (IS_USER_ADDRESS(pageAddress)) {
4414 		addressSpace = VMAddressSpace::GetCurrent();
4415 		if (addressSpace == NULL) {
4416 			if (!isUser) {
4417 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4418 					"memory!\n");
4419 				status = B_BAD_ADDRESS;
4420 				TPF(PageFaultError(-1,
4421 					VMPageFaultTracing
4422 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4423 			} else {
4424 				// XXX weird state.
4425 				panic("vm_page_fault: non kernel thread accessing user memory "
4426 					"that doesn't exist!\n");
4427 				status = B_BAD_ADDRESS;
4428 			}
4429 		}
4430 	} else {
4431 		// the hit was probably in the 64k DMZ between kernel and user space
4432 		// this keeps a user space thread from passing a buffer that crosses
4433 		// into kernel space
4434 		status = B_BAD_ADDRESS;
4435 		TPF(PageFaultError(-1,
4436 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4437 	}
4438 
4439 	if (status == B_OK) {
4440 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4441 			isUser, NULL);
4442 	}
4443 
4444 	if (status < B_OK) {
4445 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4446 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4447 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4448 			thread_get_current_thread_id());
4449 		if (!isUser) {
4450 			Thread* thread = thread_get_current_thread();
4451 			if (thread != NULL && thread->fault_handler != 0) {
4452 				// this will cause the arch dependant page fault handler to
4453 				// modify the IP on the interrupt frame or whatever to return
4454 				// to this address
4455 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4456 			} else {
4457 				// unhandled page fault in the kernel
4458 				panic("vm_page_fault: unhandled page fault in kernel space at "
4459 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4460 			}
4461 		} else {
4462 			Thread* thread = thread_get_current_thread();
4463 
4464 #ifdef TRACE_FAULTS
4465 			VMArea* area = NULL;
4466 			if (addressSpace != NULL) {
4467 				addressSpace->ReadLock();
4468 				area = addressSpace->LookupArea(faultAddress);
4469 			}
4470 
4471 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4472 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4473 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4474 				thread->team->Name(), thread->team->id,
4475 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4476 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4477 					area->Base() : 0x0));
4478 
4479 			if (addressSpace != NULL)
4480 				addressSpace->ReadUnlock();
4481 #endif
4482 
4483 			// If the thread has a signal handler for SIGSEGV, we simply
4484 			// send it the signal. Otherwise we notify the user debugger
4485 			// first.
4486 			struct sigaction action;
4487 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4488 					&& action.sa_handler != SIG_DFL
4489 					&& action.sa_handler != SIG_IGN)
4490 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4491 					SIGSEGV)) {
4492 				Signal signal(SIGSEGV,
4493 					status == B_PERMISSION_DENIED
4494 						? SEGV_ACCERR : SEGV_MAPERR,
4495 					EFAULT, thread->team->id);
4496 				signal.SetAddress((void*)address);
4497 				send_signal_to_thread(thread, signal, 0);
4498 			}
4499 		}
4500 	}
4501 
4502 	if (addressSpace != NULL)
4503 		addressSpace->Put();
4504 
4505 	return B_HANDLED_INTERRUPT;
4506 }
4507 
4508 
4509 struct PageFaultContext {
4510 	AddressSpaceReadLocker	addressSpaceLocker;
4511 	VMCacheChainLocker		cacheChainLocker;
4512 
4513 	VMTranslationMap*		map;
4514 	VMCache*				topCache;
4515 	off_t					cacheOffset;
4516 	vm_page_reservation		reservation;
4517 	bool					isWrite;
4518 
4519 	// return values
4520 	vm_page*				page;
4521 	bool					restart;
4522 	bool					pageAllocated;
4523 
4524 
4525 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4526 		:
4527 		addressSpaceLocker(addressSpace, true),
4528 		map(addressSpace->TranslationMap()),
4529 		isWrite(isWrite)
4530 	{
4531 	}
4532 
4533 	~PageFaultContext()
4534 	{
4535 		UnlockAll();
4536 		vm_page_unreserve_pages(&reservation);
4537 	}
4538 
4539 	void Prepare(VMCache* topCache, off_t cacheOffset)
4540 	{
4541 		this->topCache = topCache;
4542 		this->cacheOffset = cacheOffset;
4543 		page = NULL;
4544 		restart = false;
4545 		pageAllocated = false;
4546 
4547 		cacheChainLocker.SetTo(topCache);
4548 	}
4549 
4550 	void UnlockAll(VMCache* exceptCache = NULL)
4551 	{
4552 		topCache = NULL;
4553 		addressSpaceLocker.Unlock();
4554 		cacheChainLocker.Unlock(exceptCache);
4555 	}
4556 };
4557 
4558 
4559 /*!	Gets the page that should be mapped into the area.
4560 	Returns an error code other than \c B_OK, if the page couldn't be found or
4561 	paged in. The locking state of the address space and the caches is undefined
4562 	in that case.
4563 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4564 	had to unlock the address space and all caches and is supposed to be called
4565 	again.
4566 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4567 	found. It is returned in \c context.page. The address space will still be
4568 	locked as well as all caches starting from the top cache to at least the
4569 	cache the page lives in.
4570 */
4571 static status_t
4572 fault_get_page(PageFaultContext& context)
4573 {
4574 	VMCache* cache = context.topCache;
4575 	VMCache* lastCache = NULL;
4576 	vm_page* page = NULL;
4577 
4578 	while (cache != NULL) {
4579 		// We already hold the lock of the cache at this point.
4580 
4581 		lastCache = cache;
4582 
4583 		page = cache->LookupPage(context.cacheOffset);
4584 		if (page != NULL && page->busy) {
4585 			// page must be busy -- wait for it to become unbusy
4586 			context.UnlockAll(cache);
4587 			cache->ReleaseRefLocked();
4588 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4589 
4590 			// restart the whole process
4591 			context.restart = true;
4592 			return B_OK;
4593 		}
4594 
4595 		if (page != NULL)
4596 			break;
4597 
4598 		// The current cache does not contain the page we're looking for.
4599 
4600 		// see if the backing store has it
4601 		if (cache->HasPage(context.cacheOffset)) {
4602 			// insert a fresh page and mark it busy -- we're going to read it in
4603 			page = vm_page_allocate_page(&context.reservation,
4604 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4605 			cache->InsertPage(page, context.cacheOffset);
4606 
4607 			// We need to unlock all caches and the address space while reading
4608 			// the page in. Keep a reference to the cache around.
4609 			cache->AcquireRefLocked();
4610 			context.UnlockAll();
4611 
4612 			// read the page in
4613 			generic_io_vec vec;
4614 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4615 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4616 
4617 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4618 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4619 
4620 			cache->Lock();
4621 
4622 			if (status < B_OK) {
4623 				// on error remove and free the page
4624 				dprintf("reading page from cache %p returned: %s!\n",
4625 					cache, strerror(status));
4626 
4627 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4628 				cache->RemovePage(page);
4629 				vm_page_set_state(page, PAGE_STATE_FREE);
4630 
4631 				cache->ReleaseRefAndUnlock();
4632 				return status;
4633 			}
4634 
4635 			// mark the page unbusy again
4636 			cache->MarkPageUnbusy(page);
4637 
4638 			DEBUG_PAGE_ACCESS_END(page);
4639 
4640 			// Since we needed to unlock everything temporarily, the area
4641 			// situation might have changed. So we need to restart the whole
4642 			// process.
4643 			cache->ReleaseRefAndUnlock();
4644 			context.restart = true;
4645 			return B_OK;
4646 		}
4647 
4648 		cache = context.cacheChainLocker.LockSourceCache();
4649 	}
4650 
4651 	if (page == NULL) {
4652 		// There was no adequate page, determine the cache for a clean one.
4653 		// Read-only pages come in the deepest cache, only the top most cache
4654 		// may have direct write access.
4655 		cache = context.isWrite ? context.topCache : lastCache;
4656 
4657 		// allocate a clean page
4658 		page = vm_page_allocate_page(&context.reservation,
4659 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4660 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4661 			page->physical_page_number));
4662 
4663 		// insert the new page into our cache
4664 		cache->InsertPage(page, context.cacheOffset);
4665 		context.pageAllocated = true;
4666 	} else if (page->Cache() != context.topCache && context.isWrite) {
4667 		// We have a page that has the data we want, but in the wrong cache
4668 		// object so we need to copy it and stick it into the top cache.
4669 		vm_page* sourcePage = page;
4670 
4671 		// TODO: If memory is low, it might be a good idea to steal the page
4672 		// from our source cache -- if possible, that is.
4673 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4674 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4675 
4676 		// To not needlessly kill concurrency we unlock all caches but the top
4677 		// one while copying the page. Lacking another mechanism to ensure that
4678 		// the source page doesn't disappear, we mark it busy.
4679 		sourcePage->busy = true;
4680 		context.cacheChainLocker.UnlockKeepRefs(true);
4681 
4682 		// copy the page
4683 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4684 			sourcePage->physical_page_number * B_PAGE_SIZE);
4685 
4686 		context.cacheChainLocker.RelockCaches(true);
4687 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4688 
4689 		// insert the new page into our cache
4690 		context.topCache->InsertPage(page, context.cacheOffset);
4691 		context.pageAllocated = true;
4692 	} else
4693 		DEBUG_PAGE_ACCESS_START(page);
4694 
4695 	context.page = page;
4696 	return B_OK;
4697 }
4698 
4699 
4700 /*!	Makes sure the address in the given address space is mapped.
4701 
4702 	\param addressSpace The address space.
4703 	\param originalAddress The address. Doesn't need to be page aligned.
4704 	\param isWrite If \c true the address shall be write-accessible.
4705 	\param isUser If \c true the access is requested by a userland team.
4706 	\param wirePage On success, if non \c NULL, the wired count of the page
4707 		mapped at the given address is incremented and the page is returned
4708 		via this parameter.
4709 	\return \c B_OK on success, another error code otherwise.
4710 */
4711 static status_t
4712 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4713 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4714 {
4715 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4716 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4717 		originalAddress, isWrite, isUser));
4718 
4719 	PageFaultContext context(addressSpace, isWrite);
4720 
4721 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4722 	status_t status = B_OK;
4723 
4724 	addressSpace->IncrementFaultCount();
4725 
4726 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4727 	// the pages upfront makes sure we don't have any cache locked, so that the
4728 	// page daemon/thief can do their job without problems.
4729 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4730 		originalAddress);
4731 	context.addressSpaceLocker.Unlock();
4732 	vm_page_reserve_pages(&context.reservation, reservePages,
4733 		addressSpace == VMAddressSpace::Kernel()
4734 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4735 
4736 	while (true) {
4737 		context.addressSpaceLocker.Lock();
4738 
4739 		// get the area the fault was in
4740 		VMArea* area = addressSpace->LookupArea(address);
4741 		if (area == NULL) {
4742 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4743 				"space\n", originalAddress);
4744 			TPF(PageFaultError(-1,
4745 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4746 			status = B_BAD_ADDRESS;
4747 			break;
4748 		}
4749 
4750 		// check permissions
4751 		uint32 protection = get_area_page_protection(area, address);
4752 		if (isUser && (protection & B_USER_PROTECTION) == 0
4753 				&& (area->protection & B_KERNEL_AREA) != 0) {
4754 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4755 				area->id, (void*)originalAddress);
4756 			TPF(PageFaultError(area->id,
4757 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4758 			status = B_PERMISSION_DENIED;
4759 			break;
4760 		}
4761 		if (isWrite && (protection
4762 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4763 			dprintf("write access attempted on write-protected area 0x%"
4764 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4765 			TPF(PageFaultError(area->id,
4766 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4767 			status = B_PERMISSION_DENIED;
4768 			break;
4769 		} else if (isExecute && (protection
4770 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4771 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4772 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4773 			TPF(PageFaultError(area->id,
4774 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4775 			status = B_PERMISSION_DENIED;
4776 			break;
4777 		} else if (!isWrite && !isExecute && (protection
4778 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4779 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4780 				" at %p\n", area->id, (void*)originalAddress);
4781 			TPF(PageFaultError(area->id,
4782 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4783 			status = B_PERMISSION_DENIED;
4784 			break;
4785 		}
4786 
4787 		// We have the area, it was a valid access, so let's try to resolve the
4788 		// page fault now.
4789 		// At first, the top most cache from the area is investigated.
4790 
4791 		context.Prepare(vm_area_get_locked_cache(area),
4792 			address - area->Base() + area->cache_offset);
4793 
4794 		// See if this cache has a fault handler -- this will do all the work
4795 		// for us.
4796 		{
4797 			// Note, since the page fault is resolved with interrupts enabled,
4798 			// the fault handler could be called more than once for the same
4799 			// reason -- the store must take this into account.
4800 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4801 			if (status != B_BAD_HANDLER)
4802 				break;
4803 		}
4804 
4805 		// The top most cache has no fault handler, so let's see if the cache or
4806 		// its sources already have the page we're searching for (we're going
4807 		// from top to bottom).
4808 		status = fault_get_page(context);
4809 		if (status != B_OK) {
4810 			TPF(PageFaultError(area->id, status));
4811 			break;
4812 		}
4813 
4814 		if (context.restart)
4815 			continue;
4816 
4817 		// All went fine, all there is left to do is to map the page into the
4818 		// address space.
4819 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4820 			context.page));
4821 
4822 		// If the page doesn't reside in the area's cache, we need to make sure
4823 		// it's mapped in read-only, so that we cannot overwrite someone else's
4824 		// data (copy-on-write)
4825 		uint32 newProtection = protection;
4826 		if (context.page->Cache() != context.topCache && !isWrite)
4827 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4828 
4829 		bool unmapPage = false;
4830 		bool mapPage = true;
4831 
4832 		// check whether there's already a page mapped at the address
4833 		context.map->Lock();
4834 
4835 		phys_addr_t physicalAddress;
4836 		uint32 flags;
4837 		vm_page* mappedPage = NULL;
4838 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4839 			&& (flags & PAGE_PRESENT) != 0
4840 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4841 				!= NULL) {
4842 			// Yep there's already a page. If it's ours, we can simply adjust
4843 			// its protection. Otherwise we have to unmap it.
4844 			if (mappedPage == context.page) {
4845 				context.map->ProtectPage(area, address, newProtection);
4846 					// Note: We assume that ProtectPage() is atomic (i.e.
4847 					// the page isn't temporarily unmapped), otherwise we'd have
4848 					// to make sure it isn't wired.
4849 				mapPage = false;
4850 			} else
4851 				unmapPage = true;
4852 		}
4853 
4854 		context.map->Unlock();
4855 
4856 		if (unmapPage) {
4857 			// If the page is wired, we can't unmap it. Wait until it is unwired
4858 			// again and restart. Note that the page cannot be wired for
4859 			// writing, since it it isn't in the topmost cache. So we can safely
4860 			// ignore ranges wired for writing (our own and other concurrent
4861 			// wiring attempts in progress) and in fact have to do that to avoid
4862 			// a deadlock.
4863 			VMAreaUnwiredWaiter waiter;
4864 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4865 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4866 				// unlock everything and wait
4867 				if (context.pageAllocated) {
4868 					// ... but since we allocated a page and inserted it into
4869 					// the top cache, remove and free it first. Otherwise we'd
4870 					// have a page from a lower cache mapped while an upper
4871 					// cache has a page that would shadow it.
4872 					context.topCache->RemovePage(context.page);
4873 					vm_page_free_etc(context.topCache, context.page,
4874 						&context.reservation);
4875 				} else
4876 					DEBUG_PAGE_ACCESS_END(context.page);
4877 
4878 				context.UnlockAll();
4879 				waiter.waitEntry.Wait();
4880 				continue;
4881 			}
4882 
4883 			// Note: The mapped page is a page of a lower cache. We are
4884 			// guaranteed to have that cached locked, our new page is a copy of
4885 			// that page, and the page is not busy. The logic for that guarantee
4886 			// is as follows: Since the page is mapped, it must live in the top
4887 			// cache (ruled out above) or any of its lower caches, and there is
4888 			// (was before the new page was inserted) no other page in any
4889 			// cache between the top cache and the page's cache (otherwise that
4890 			// would be mapped instead). That in turn means that our algorithm
4891 			// must have found it and therefore it cannot be busy either.
4892 			DEBUG_PAGE_ACCESS_START(mappedPage);
4893 			unmap_page(area, address);
4894 			DEBUG_PAGE_ACCESS_END(mappedPage);
4895 		}
4896 
4897 		if (mapPage) {
4898 			if (map_page(area, context.page, address, newProtection,
4899 					&context.reservation) != B_OK) {
4900 				// Mapping can only fail, when the page mapping object couldn't
4901 				// be allocated. Save for the missing mapping everything is
4902 				// fine, though. If this was a regular page fault, we'll simply
4903 				// leave and probably fault again. To make sure we'll have more
4904 				// luck then, we ensure that the minimum object reserve is
4905 				// available.
4906 				DEBUG_PAGE_ACCESS_END(context.page);
4907 
4908 				context.UnlockAll();
4909 
4910 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4911 						!= B_OK) {
4912 					// Apparently the situation is serious. Let's get ourselves
4913 					// killed.
4914 					status = B_NO_MEMORY;
4915 				} else if (wirePage != NULL) {
4916 					// The caller expects us to wire the page. Since
4917 					// object_cache_reserve() succeeded, we should now be able
4918 					// to allocate a mapping structure. Restart.
4919 					continue;
4920 				}
4921 
4922 				break;
4923 			}
4924 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4925 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4926 
4927 		// also wire the page, if requested
4928 		if (wirePage != NULL && status == B_OK) {
4929 			increment_page_wired_count(context.page);
4930 			*wirePage = context.page;
4931 		}
4932 
4933 		DEBUG_PAGE_ACCESS_END(context.page);
4934 
4935 		break;
4936 	}
4937 
4938 	return status;
4939 }
4940 
4941 
4942 status_t
4943 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4944 {
4945 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4946 }
4947 
4948 status_t
4949 vm_put_physical_page(addr_t vaddr, void* handle)
4950 {
4951 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4952 }
4953 
4954 
4955 status_t
4956 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4957 	void** _handle)
4958 {
4959 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4960 }
4961 
4962 status_t
4963 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4964 {
4965 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4966 }
4967 
4968 
4969 status_t
4970 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4971 {
4972 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4973 }
4974 
4975 status_t
4976 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4977 {
4978 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4979 }
4980 
4981 
4982 void
4983 vm_get_info(system_info* info)
4984 {
4985 	swap_get_info(info);
4986 
4987 	MutexLocker locker(sAvailableMemoryLock);
4988 	info->needed_memory = sNeededMemory;
4989 	info->free_memory = sAvailableMemory;
4990 }
4991 
4992 
4993 uint32
4994 vm_num_page_faults(void)
4995 {
4996 	return sPageFaults;
4997 }
4998 
4999 
5000 off_t
5001 vm_available_memory(void)
5002 {
5003 	MutexLocker locker(sAvailableMemoryLock);
5004 	return sAvailableMemory;
5005 }
5006 
5007 
5008 off_t
5009 vm_available_not_needed_memory(void)
5010 {
5011 	MutexLocker locker(sAvailableMemoryLock);
5012 	return sAvailableMemory - sNeededMemory;
5013 }
5014 
5015 
5016 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5017 	debugger.
5018 */
5019 off_t
5020 vm_available_not_needed_memory_debug(void)
5021 {
5022 	return sAvailableMemory - sNeededMemory;
5023 }
5024 
5025 
5026 size_t
5027 vm_kernel_address_space_left(void)
5028 {
5029 	return VMAddressSpace::Kernel()->FreeSpace();
5030 }
5031 
5032 
5033 void
5034 vm_unreserve_memory(size_t amount)
5035 {
5036 	mutex_lock(&sAvailableMemoryLock);
5037 
5038 	sAvailableMemory += amount;
5039 
5040 	mutex_unlock(&sAvailableMemoryLock);
5041 }
5042 
5043 
5044 status_t
5045 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5046 {
5047 	size_t reserve = kMemoryReserveForPriority[priority];
5048 
5049 	MutexLocker locker(sAvailableMemoryLock);
5050 
5051 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5052 
5053 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5054 		sAvailableMemory -= amount;
5055 		return B_OK;
5056 	}
5057 
5058 	if (timeout <= 0)
5059 		return B_NO_MEMORY;
5060 
5061 	// turn timeout into an absolute timeout
5062 	timeout += system_time();
5063 
5064 	// loop until we've got the memory or the timeout occurs
5065 	do {
5066 		sNeededMemory += amount;
5067 
5068 		// call the low resource manager
5069 		locker.Unlock();
5070 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5071 			B_ABSOLUTE_TIMEOUT, timeout);
5072 		locker.Lock();
5073 
5074 		sNeededMemory -= amount;
5075 
5076 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5077 			sAvailableMemory -= amount;
5078 			return B_OK;
5079 		}
5080 	} while (timeout > system_time());
5081 
5082 	return B_NO_MEMORY;
5083 }
5084 
5085 
5086 status_t
5087 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5088 {
5089 	// NOTE: The caller is responsible for synchronizing calls to this function!
5090 
5091 	AddressSpaceReadLocker locker;
5092 	VMArea* area;
5093 	status_t status = locker.SetFromArea(id, area);
5094 	if (status != B_OK)
5095 		return status;
5096 
5097 	// nothing to do, if the type doesn't change
5098 	uint32 oldType = area->MemoryType();
5099 	if (type == oldType)
5100 		return B_OK;
5101 
5102 	// set the memory type of the area and the mapped pages
5103 	VMTranslationMap* map = area->address_space->TranslationMap();
5104 	map->Lock();
5105 	area->SetMemoryType(type);
5106 	map->ProtectArea(area, area->protection);
5107 	map->Unlock();
5108 
5109 	// set the physical memory type
5110 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5111 	if (error != B_OK) {
5112 		// reset the memory type of the area and the mapped pages
5113 		map->Lock();
5114 		area->SetMemoryType(oldType);
5115 		map->ProtectArea(area, area->protection);
5116 		map->Unlock();
5117 		return error;
5118 	}
5119 
5120 	return B_OK;
5121 
5122 }
5123 
5124 
5125 /*!	This function enforces some protection properties:
5126 	 - kernel areas must be W^X (after kernel startup)
5127 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5128 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5129 */
5130 static void
5131 fix_protection(uint32* protection)
5132 {
5133 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5134 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5135 			|| (*protection & B_WRITE_AREA) != 0)
5136 		&& !gKernelStartup)
5137 		panic("kernel areas cannot be both writable and executable!");
5138 
5139 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5140 		if ((*protection & B_WRITE_AREA) != 0)
5141 			*protection |= B_KERNEL_WRITE_AREA;
5142 		if ((*protection & B_READ_AREA) != 0)
5143 			*protection |= B_KERNEL_READ_AREA;
5144 	}
5145 }
5146 
5147 
5148 static void
5149 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5150 {
5151 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5152 	info->area = area->id;
5153 	info->address = (void*)area->Base();
5154 	info->size = area->Size();
5155 	info->protection = area->protection;
5156 	info->lock = area->wiring;
5157 	info->team = area->address_space->ID();
5158 	info->copy_count = 0;
5159 	info->in_count = 0;
5160 	info->out_count = 0;
5161 		// TODO: retrieve real values here!
5162 
5163 	VMCache* cache = vm_area_get_locked_cache(area);
5164 
5165 	// Note, this is a simplification; the cache could be larger than this area
5166 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5167 
5168 	vm_area_put_locked_cache(cache);
5169 }
5170 
5171 
5172 static status_t
5173 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5174 {
5175 	// is newSize a multiple of B_PAGE_SIZE?
5176 	if (newSize & (B_PAGE_SIZE - 1))
5177 		return B_BAD_VALUE;
5178 
5179 	// lock all affected address spaces and the cache
5180 	VMArea* area;
5181 	VMCache* cache;
5182 
5183 	MultiAddressSpaceLocker locker;
5184 	AreaCacheLocker cacheLocker;
5185 
5186 	status_t status;
5187 	size_t oldSize;
5188 	bool anyKernelArea;
5189 	bool restart;
5190 
5191 	do {
5192 		anyKernelArea = false;
5193 		restart = false;
5194 
5195 		locker.Unset();
5196 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5197 		if (status != B_OK)
5198 			return status;
5199 		cacheLocker.SetTo(cache, true);	// already locked
5200 
5201 		// enforce restrictions
5202 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5203 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5204 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5205 				"resize kernel area %" B_PRId32 " (%s)\n",
5206 				team_get_current_team_id(), areaID, area->name);
5207 			return B_NOT_ALLOWED;
5208 		}
5209 		// TODO: Enforce all restrictions (team, etc.)!
5210 
5211 		oldSize = area->Size();
5212 		if (newSize == oldSize)
5213 			return B_OK;
5214 
5215 		if (cache->type != CACHE_TYPE_RAM)
5216 			return B_NOT_ALLOWED;
5217 
5218 		if (oldSize < newSize) {
5219 			// We need to check if all areas of this cache can be resized.
5220 			for (VMArea* current = cache->areas; current != NULL;
5221 					current = current->cache_next) {
5222 				if (!current->address_space->CanResizeArea(current, newSize))
5223 					return B_ERROR;
5224 				anyKernelArea
5225 					|= current->address_space == VMAddressSpace::Kernel();
5226 			}
5227 		} else {
5228 			// We're shrinking the areas, so we must make sure the affected
5229 			// ranges are not wired.
5230 			for (VMArea* current = cache->areas; current != NULL;
5231 					current = current->cache_next) {
5232 				anyKernelArea
5233 					|= current->address_space == VMAddressSpace::Kernel();
5234 
5235 				if (wait_if_area_range_is_wired(current,
5236 						current->Base() + newSize, oldSize - newSize, &locker,
5237 						&cacheLocker)) {
5238 					restart = true;
5239 					break;
5240 				}
5241 			}
5242 		}
5243 	} while (restart);
5244 
5245 	// Okay, looks good so far, so let's do it
5246 
5247 	int priority = kernel && anyKernelArea
5248 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5249 	uint32 allocationFlags = kernel && anyKernelArea
5250 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5251 
5252 	if (oldSize < newSize) {
5253 		// Growing the cache can fail, so we do it first.
5254 		status = cache->Resize(cache->virtual_base + newSize, priority);
5255 		if (status != B_OK)
5256 			return status;
5257 	}
5258 
5259 	for (VMArea* current = cache->areas; current != NULL;
5260 			current = current->cache_next) {
5261 		status = current->address_space->ResizeArea(current, newSize,
5262 			allocationFlags);
5263 		if (status != B_OK)
5264 			break;
5265 
5266 		// We also need to unmap all pages beyond the new size, if the area has
5267 		// shrunk
5268 		if (newSize < oldSize) {
5269 			VMCacheChainLocker cacheChainLocker(cache);
5270 			cacheChainLocker.LockAllSourceCaches();
5271 
5272 			unmap_pages(current, current->Base() + newSize,
5273 				oldSize - newSize);
5274 
5275 			cacheChainLocker.Unlock(cache);
5276 		}
5277 	}
5278 
5279 	if (status == B_OK) {
5280 		// Shrink or grow individual page protections if in use.
5281 		if (area->page_protections != NULL) {
5282 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5283 			uint8* newProtections
5284 				= (uint8*)realloc(area->page_protections, bytes);
5285 			if (newProtections == NULL)
5286 				status = B_NO_MEMORY;
5287 			else {
5288 				area->page_protections = newProtections;
5289 
5290 				if (oldSize < newSize) {
5291 					// init the additional page protections to that of the area
5292 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5293 					uint32 areaProtection = area->protection
5294 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5295 					memset(area->page_protections + offset,
5296 						areaProtection | (areaProtection << 4), bytes - offset);
5297 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5298 						uint8& entry = area->page_protections[offset - 1];
5299 						entry = (entry & 0x0f) | (areaProtection << 4);
5300 					}
5301 				}
5302 			}
5303 		}
5304 	}
5305 
5306 	// shrinking the cache can't fail, so we do it now
5307 	if (status == B_OK && newSize < oldSize)
5308 		status = cache->Resize(cache->virtual_base + newSize, priority);
5309 
5310 	if (status != B_OK) {
5311 		// Something failed -- resize the areas back to their original size.
5312 		// This can fail, too, in which case we're seriously screwed.
5313 		for (VMArea* current = cache->areas; current != NULL;
5314 				current = current->cache_next) {
5315 			if (current->address_space->ResizeArea(current, oldSize,
5316 					allocationFlags) != B_OK) {
5317 				panic("vm_resize_area(): Failed and not being able to restore "
5318 					"original state.");
5319 			}
5320 		}
5321 
5322 		cache->Resize(cache->virtual_base + oldSize, priority);
5323 	}
5324 
5325 	// TODO: we must honour the lock restrictions of this area
5326 	return status;
5327 }
5328 
5329 
5330 status_t
5331 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5332 {
5333 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5334 }
5335 
5336 
5337 status_t
5338 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5339 {
5340 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5341 }
5342 
5343 
5344 status_t
5345 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5346 	bool user)
5347 {
5348 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5349 }
5350 
5351 
5352 void
5353 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5354 {
5355 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5356 }
5357 
5358 
5359 /*!	Copies a range of memory directly from/to a page that might not be mapped
5360 	at the moment.
5361 
5362 	For \a unsafeMemory the current mapping (if any is ignored). The function
5363 	walks through the respective area's cache chain to find the physical page
5364 	and copies from/to it directly.
5365 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5366 	must not cross a page boundary.
5367 
5368 	\param teamID The team ID identifying the address space \a unsafeMemory is
5369 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5370 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5371 		is passed, the address space of the thread returned by
5372 		debug_get_debugged_thread() is used.
5373 	\param unsafeMemory The start of the unsafe memory range to be copied
5374 		from/to.
5375 	\param buffer A safely accessible kernel buffer to be copied from/to.
5376 	\param size The number of bytes to be copied.
5377 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5378 		\a unsafeMemory, the other way around otherwise.
5379 */
5380 status_t
5381 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5382 	size_t size, bool copyToUnsafe)
5383 {
5384 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5385 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5386 		return B_BAD_VALUE;
5387 	}
5388 
5389 	// get the address space for the debugged thread
5390 	VMAddressSpace* addressSpace;
5391 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5392 		addressSpace = VMAddressSpace::Kernel();
5393 	} else if (teamID == B_CURRENT_TEAM) {
5394 		Thread* thread = debug_get_debugged_thread();
5395 		if (thread == NULL || thread->team == NULL)
5396 			return B_BAD_ADDRESS;
5397 
5398 		addressSpace = thread->team->address_space;
5399 	} else
5400 		addressSpace = VMAddressSpace::DebugGet(teamID);
5401 
5402 	if (addressSpace == NULL)
5403 		return B_BAD_ADDRESS;
5404 
5405 	// get the area
5406 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5407 	if (area == NULL)
5408 		return B_BAD_ADDRESS;
5409 
5410 	// search the page
5411 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5412 		+ area->cache_offset;
5413 	VMCache* cache = area->cache;
5414 	vm_page* page = NULL;
5415 	while (cache != NULL) {
5416 		page = cache->DebugLookupPage(cacheOffset);
5417 		if (page != NULL)
5418 			break;
5419 
5420 		// Page not found in this cache -- if it is paged out, we must not try
5421 		// to get it from lower caches.
5422 		if (cache->DebugHasPage(cacheOffset))
5423 			break;
5424 
5425 		cache = cache->source;
5426 	}
5427 
5428 	if (page == NULL)
5429 		return B_UNSUPPORTED;
5430 
5431 	// copy from/to physical memory
5432 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5433 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5434 
5435 	if (copyToUnsafe) {
5436 		if (page->Cache() != area->cache)
5437 			return B_UNSUPPORTED;
5438 
5439 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5440 	}
5441 
5442 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5443 }
5444 
5445 
5446 /** Validate that a memory range is either fully in kernel space, or fully in
5447  *  userspace */
5448 static inline bool
5449 validate_memory_range(const void* addr, size_t size)
5450 {
5451 	addr_t address = (addr_t)addr;
5452 
5453 	// Check for overflows on all addresses.
5454 	if ((address + size) < address)
5455 		return false;
5456 
5457 	// Validate that the address range does not cross the kernel/user boundary.
5458 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5459 }
5460 
5461 
5462 //	#pragma mark - kernel public API
5463 
5464 
5465 status_t
5466 user_memcpy(void* to, const void* from, size_t size)
5467 {
5468 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5469 		return B_BAD_ADDRESS;
5470 
5471 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5472 		return B_BAD_ADDRESS;
5473 
5474 	return B_OK;
5475 }
5476 
5477 
5478 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5479 	the string in \a to, NULL-terminating the result.
5480 
5481 	\param to Pointer to the destination C-string.
5482 	\param from Pointer to the source C-string.
5483 	\param size Size in bytes of the string buffer pointed to by \a to.
5484 
5485 	\return strlen(\a from).
5486 */
5487 ssize_t
5488 user_strlcpy(char* to, const char* from, size_t size)
5489 {
5490 	if (to == NULL && size != 0)
5491 		return B_BAD_VALUE;
5492 	if (from == NULL)
5493 		return B_BAD_ADDRESS;
5494 
5495 	// Protect the source address from overflows.
5496 	size_t maxSize = size;
5497 	if ((addr_t)from + maxSize < (addr_t)from)
5498 		maxSize -= (addr_t)from + maxSize;
5499 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5500 		maxSize = USER_TOP - (addr_t)from;
5501 
5502 	if (!validate_memory_range(to, maxSize))
5503 		return B_BAD_ADDRESS;
5504 
5505 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5506 	if (result < 0)
5507 		return result;
5508 
5509 	// If we hit the address overflow boundary, fail.
5510 	if ((size_t)result >= maxSize && maxSize < size)
5511 		return B_BAD_ADDRESS;
5512 
5513 	return result;
5514 }
5515 
5516 
5517 status_t
5518 user_memset(void* s, char c, size_t count)
5519 {
5520 	if (!validate_memory_range(s, count))
5521 		return B_BAD_ADDRESS;
5522 
5523 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5524 		return B_BAD_ADDRESS;
5525 
5526 	return B_OK;
5527 }
5528 
5529 
5530 /*!	Wires a single page at the given address.
5531 
5532 	\param team The team whose address space the address belongs to. Supports
5533 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5534 		parameter is ignored.
5535 	\param address address The virtual address to wire down. Does not need to
5536 		be page aligned.
5537 	\param writable If \c true the page shall be writable.
5538 	\param info On success the info is filled in, among other things
5539 		containing the physical address the given virtual one translates to.
5540 	\return \c B_OK, when the page could be wired, another error code otherwise.
5541 */
5542 status_t
5543 vm_wire_page(team_id team, addr_t address, bool writable,
5544 	VMPageWiringInfo* info)
5545 {
5546 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5547 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5548 
5549 	// compute the page protection that is required
5550 	bool isUser = IS_USER_ADDRESS(address);
5551 	uint32 requiredProtection = PAGE_PRESENT
5552 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5553 	if (writable)
5554 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5555 
5556 	// get and read lock the address space
5557 	VMAddressSpace* addressSpace = NULL;
5558 	if (isUser) {
5559 		if (team == B_CURRENT_TEAM)
5560 			addressSpace = VMAddressSpace::GetCurrent();
5561 		else
5562 			addressSpace = VMAddressSpace::Get(team);
5563 	} else
5564 		addressSpace = VMAddressSpace::GetKernel();
5565 	if (addressSpace == NULL)
5566 		return B_ERROR;
5567 
5568 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5569 
5570 	VMTranslationMap* map = addressSpace->TranslationMap();
5571 	status_t error = B_OK;
5572 
5573 	// get the area
5574 	VMArea* area = addressSpace->LookupArea(pageAddress);
5575 	if (area == NULL) {
5576 		addressSpace->Put();
5577 		return B_BAD_ADDRESS;
5578 	}
5579 
5580 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5581 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5582 
5583 	// mark the area range wired
5584 	area->Wire(&info->range);
5585 
5586 	// Lock the area's cache chain and the translation map. Needed to look
5587 	// up the page and play with its wired count.
5588 	cacheChainLocker.LockAllSourceCaches();
5589 	map->Lock();
5590 
5591 	phys_addr_t physicalAddress;
5592 	uint32 flags;
5593 	vm_page* page;
5594 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5595 		&& (flags & requiredProtection) == requiredProtection
5596 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5597 			!= NULL) {
5598 		// Already mapped with the correct permissions -- just increment
5599 		// the page's wired count.
5600 		increment_page_wired_count(page);
5601 
5602 		map->Unlock();
5603 		cacheChainLocker.Unlock();
5604 		addressSpaceLocker.Unlock();
5605 	} else {
5606 		// Let vm_soft_fault() map the page for us, if possible. We need
5607 		// to fully unlock to avoid deadlocks. Since we have already
5608 		// wired the area itself, nothing disturbing will happen with it
5609 		// in the meantime.
5610 		map->Unlock();
5611 		cacheChainLocker.Unlock();
5612 		addressSpaceLocker.Unlock();
5613 
5614 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5615 			isUser, &page);
5616 
5617 		if (error != B_OK) {
5618 			// The page could not be mapped -- clean up.
5619 			VMCache* cache = vm_area_get_locked_cache(area);
5620 			area->Unwire(&info->range);
5621 			cache->ReleaseRefAndUnlock();
5622 			addressSpace->Put();
5623 			return error;
5624 		}
5625 	}
5626 
5627 	info->physicalAddress
5628 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5629 			+ address % B_PAGE_SIZE;
5630 	info->page = page;
5631 
5632 	return B_OK;
5633 }
5634 
5635 
5636 /*!	Unwires a single page previously wired via vm_wire_page().
5637 
5638 	\param info The same object passed to vm_wire_page() before.
5639 */
5640 void
5641 vm_unwire_page(VMPageWiringInfo* info)
5642 {
5643 	// lock the address space
5644 	VMArea* area = info->range.area;
5645 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5646 		// takes over our reference
5647 
5648 	// lock the top cache
5649 	VMCache* cache = vm_area_get_locked_cache(area);
5650 	VMCacheChainLocker cacheChainLocker(cache);
5651 
5652 	if (info->page->Cache() != cache) {
5653 		// The page is not in the top cache, so we lock the whole cache chain
5654 		// before touching the page's wired count.
5655 		cacheChainLocker.LockAllSourceCaches();
5656 	}
5657 
5658 	decrement_page_wired_count(info->page);
5659 
5660 	// remove the wired range from the range
5661 	area->Unwire(&info->range);
5662 
5663 	cacheChainLocker.Unlock();
5664 }
5665 
5666 
5667 /*!	Wires down the given address range in the specified team's address space.
5668 
5669 	If successful the function
5670 	- acquires a reference to the specified team's address space,
5671 	- adds respective wired ranges to all areas that intersect with the given
5672 	  address range,
5673 	- makes sure all pages in the given address range are mapped with the
5674 	  requested access permissions and increments their wired count.
5675 
5676 	It fails, when \a team doesn't specify a valid address space, when any part
5677 	of the specified address range is not covered by areas, when the concerned
5678 	areas don't allow mapping with the requested permissions, or when mapping
5679 	failed for another reason.
5680 
5681 	When successful the call must be balanced by a unlock_memory_etc() call with
5682 	the exact same parameters.
5683 
5684 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5685 		supported.
5686 	\param address The start of the address range to be wired.
5687 	\param numBytes The size of the address range to be wired.
5688 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5689 		requests that the range must be wired writable ("read from device
5690 		into memory").
5691 	\return \c B_OK on success, another error code otherwise.
5692 */
5693 status_t
5694 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5695 {
5696 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5697 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5698 
5699 	// compute the page protection that is required
5700 	bool isUser = IS_USER_ADDRESS(address);
5701 	bool writable = (flags & B_READ_DEVICE) == 0;
5702 	uint32 requiredProtection = PAGE_PRESENT
5703 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5704 	if (writable)
5705 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5706 
5707 	uint32 mallocFlags = isUser
5708 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5709 
5710 	// get and read lock the address space
5711 	VMAddressSpace* addressSpace = NULL;
5712 	if (isUser) {
5713 		if (team == B_CURRENT_TEAM)
5714 			addressSpace = VMAddressSpace::GetCurrent();
5715 		else
5716 			addressSpace = VMAddressSpace::Get(team);
5717 	} else
5718 		addressSpace = VMAddressSpace::GetKernel();
5719 	if (addressSpace == NULL)
5720 		return B_ERROR;
5721 
5722 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5723 		// We get a new address space reference here. The one we got above will
5724 		// be freed by unlock_memory_etc().
5725 
5726 	VMTranslationMap* map = addressSpace->TranslationMap();
5727 	status_t error = B_OK;
5728 
5729 	// iterate through all concerned areas
5730 	addr_t nextAddress = lockBaseAddress;
5731 	while (nextAddress != lockEndAddress) {
5732 		// get the next area
5733 		VMArea* area = addressSpace->LookupArea(nextAddress);
5734 		if (area == NULL) {
5735 			error = B_BAD_ADDRESS;
5736 			break;
5737 		}
5738 
5739 		addr_t areaStart = nextAddress;
5740 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5741 
5742 		// allocate the wired range (do that before locking the cache to avoid
5743 		// deadlocks)
5744 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5745 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5746 		if (range == NULL) {
5747 			error = B_NO_MEMORY;
5748 			break;
5749 		}
5750 
5751 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5752 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5753 
5754 		// mark the area range wired
5755 		area->Wire(range);
5756 
5757 		// Depending on the area cache type and the wiring, we may not need to
5758 		// look at the individual pages.
5759 		if (area->cache_type == CACHE_TYPE_NULL
5760 			|| area->cache_type == CACHE_TYPE_DEVICE
5761 			|| area->wiring == B_FULL_LOCK
5762 			|| area->wiring == B_CONTIGUOUS) {
5763 			nextAddress = areaEnd;
5764 			continue;
5765 		}
5766 
5767 		// Lock the area's cache chain and the translation map. Needed to look
5768 		// up pages and play with their wired count.
5769 		cacheChainLocker.LockAllSourceCaches();
5770 		map->Lock();
5771 
5772 		// iterate through the pages and wire them
5773 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5774 			phys_addr_t physicalAddress;
5775 			uint32 flags;
5776 
5777 			vm_page* page;
5778 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5779 				&& (flags & requiredProtection) == requiredProtection
5780 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5781 					!= NULL) {
5782 				// Already mapped with the correct permissions -- just increment
5783 				// the page's wired count.
5784 				increment_page_wired_count(page);
5785 			} else {
5786 				// Let vm_soft_fault() map the page for us, if possible. We need
5787 				// to fully unlock to avoid deadlocks. Since we have already
5788 				// wired the area itself, nothing disturbing will happen with it
5789 				// in the meantime.
5790 				map->Unlock();
5791 				cacheChainLocker.Unlock();
5792 				addressSpaceLocker.Unlock();
5793 
5794 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5795 					false, isUser, &page);
5796 
5797 				addressSpaceLocker.Lock();
5798 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5799 				cacheChainLocker.LockAllSourceCaches();
5800 				map->Lock();
5801 			}
5802 
5803 			if (error != B_OK)
5804 				break;
5805 		}
5806 
5807 		map->Unlock();
5808 
5809 		if (error == B_OK) {
5810 			cacheChainLocker.Unlock();
5811 		} else {
5812 			// An error occurred, so abort right here. If the current address
5813 			// is the first in this area, unwire the area, since we won't get
5814 			// to it when reverting what we've done so far.
5815 			if (nextAddress == areaStart) {
5816 				area->Unwire(range);
5817 				cacheChainLocker.Unlock();
5818 				range->~VMAreaWiredRange();
5819 				free_etc(range, mallocFlags);
5820 			} else
5821 				cacheChainLocker.Unlock();
5822 
5823 			break;
5824 		}
5825 	}
5826 
5827 	if (error != B_OK) {
5828 		// An error occurred, so unwire all that we've already wired. Note that
5829 		// even if not a single page was wired, unlock_memory_etc() is called
5830 		// to put the address space reference.
5831 		addressSpaceLocker.Unlock();
5832 		unlock_memory_etc(team, (void*)lockBaseAddress,
5833 			nextAddress - lockBaseAddress, flags);
5834 	}
5835 
5836 	return error;
5837 }
5838 
5839 
5840 status_t
5841 lock_memory(void* address, size_t numBytes, uint32 flags)
5842 {
5843 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5844 }
5845 
5846 
5847 /*!	Unwires an address range previously wired with lock_memory_etc().
5848 
5849 	Note that a call to this function must balance a previous lock_memory_etc()
5850 	call with exactly the same parameters.
5851 */
5852 status_t
5853 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5854 {
5855 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5856 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5857 
5858 	// compute the page protection that is required
5859 	bool isUser = IS_USER_ADDRESS(address);
5860 	bool writable = (flags & B_READ_DEVICE) == 0;
5861 	uint32 requiredProtection = PAGE_PRESENT
5862 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5863 	if (writable)
5864 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5865 
5866 	uint32 mallocFlags = isUser
5867 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5868 
5869 	// get and read lock the address space
5870 	VMAddressSpace* addressSpace = NULL;
5871 	if (isUser) {
5872 		if (team == B_CURRENT_TEAM)
5873 			addressSpace = VMAddressSpace::GetCurrent();
5874 		else
5875 			addressSpace = VMAddressSpace::Get(team);
5876 	} else
5877 		addressSpace = VMAddressSpace::GetKernel();
5878 	if (addressSpace == NULL)
5879 		return B_ERROR;
5880 
5881 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5882 		// Take over the address space reference. We don't unlock until we're
5883 		// done.
5884 
5885 	VMTranslationMap* map = addressSpace->TranslationMap();
5886 	status_t error = B_OK;
5887 
5888 	// iterate through all concerned areas
5889 	addr_t nextAddress = lockBaseAddress;
5890 	while (nextAddress != lockEndAddress) {
5891 		// get the next area
5892 		VMArea* area = addressSpace->LookupArea(nextAddress);
5893 		if (area == NULL) {
5894 			error = B_BAD_ADDRESS;
5895 			break;
5896 		}
5897 
5898 		addr_t areaStart = nextAddress;
5899 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5900 
5901 		// Lock the area's top cache. This is a requirement for
5902 		// VMArea::Unwire().
5903 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5904 
5905 		// Depending on the area cache type and the wiring, we may not need to
5906 		// look at the individual pages.
5907 		if (area->cache_type == CACHE_TYPE_NULL
5908 			|| area->cache_type == CACHE_TYPE_DEVICE
5909 			|| area->wiring == B_FULL_LOCK
5910 			|| area->wiring == B_CONTIGUOUS) {
5911 			// unwire the range (to avoid deadlocks we delete the range after
5912 			// unlocking the cache)
5913 			nextAddress = areaEnd;
5914 			VMAreaWiredRange* range = area->Unwire(areaStart,
5915 				areaEnd - areaStart, writable);
5916 			cacheChainLocker.Unlock();
5917 			if (range != NULL) {
5918 				range->~VMAreaWiredRange();
5919 				free_etc(range, mallocFlags);
5920 			}
5921 			continue;
5922 		}
5923 
5924 		// Lock the area's cache chain and the translation map. Needed to look
5925 		// up pages and play with their wired count.
5926 		cacheChainLocker.LockAllSourceCaches();
5927 		map->Lock();
5928 
5929 		// iterate through the pages and unwire them
5930 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5931 			phys_addr_t physicalAddress;
5932 			uint32 flags;
5933 
5934 			vm_page* page;
5935 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5936 				&& (flags & PAGE_PRESENT) != 0
5937 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5938 					!= NULL) {
5939 				// Already mapped with the correct permissions -- just increment
5940 				// the page's wired count.
5941 				decrement_page_wired_count(page);
5942 			} else {
5943 				panic("unlock_memory_etc(): Failed to unwire page: address "
5944 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5945 					nextAddress);
5946 				error = B_BAD_VALUE;
5947 				break;
5948 			}
5949 		}
5950 
5951 		map->Unlock();
5952 
5953 		// All pages are unwired. Remove the area's wired range as well (to
5954 		// avoid deadlocks we delete the range after unlocking the cache).
5955 		VMAreaWiredRange* range = area->Unwire(areaStart,
5956 			areaEnd - areaStart, writable);
5957 
5958 		cacheChainLocker.Unlock();
5959 
5960 		if (range != NULL) {
5961 			range->~VMAreaWiredRange();
5962 			free_etc(range, mallocFlags);
5963 		}
5964 
5965 		if (error != B_OK)
5966 			break;
5967 	}
5968 
5969 	// get rid of the address space reference lock_memory_etc() acquired
5970 	addressSpace->Put();
5971 
5972 	return error;
5973 }
5974 
5975 
5976 status_t
5977 unlock_memory(void* address, size_t numBytes, uint32 flags)
5978 {
5979 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5980 }
5981 
5982 
5983 /*!	Similar to get_memory_map(), but also allows to specify the address space
5984 	for the memory in question and has a saner semantics.
5985 	Returns \c B_OK when the complete range could be translated or
5986 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5987 	case the actual number of entries is written to \c *_numEntries. Any other
5988 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5989 	in this case.
5990 */
5991 status_t
5992 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5993 	physical_entry* table, uint32* _numEntries)
5994 {
5995 	uint32 numEntries = *_numEntries;
5996 	*_numEntries = 0;
5997 
5998 	VMAddressSpace* addressSpace;
5999 	addr_t virtualAddress = (addr_t)address;
6000 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6001 	phys_addr_t physicalAddress;
6002 	status_t status = B_OK;
6003 	int32 index = -1;
6004 	addr_t offset = 0;
6005 	bool interrupts = are_interrupts_enabled();
6006 
6007 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6008 		"entries)\n", team, address, numBytes, numEntries));
6009 
6010 	if (numEntries == 0 || numBytes == 0)
6011 		return B_BAD_VALUE;
6012 
6013 	// in which address space is the address to be found?
6014 	if (IS_USER_ADDRESS(virtualAddress)) {
6015 		if (team == B_CURRENT_TEAM)
6016 			addressSpace = VMAddressSpace::GetCurrent();
6017 		else
6018 			addressSpace = VMAddressSpace::Get(team);
6019 	} else
6020 		addressSpace = VMAddressSpace::GetKernel();
6021 
6022 	if (addressSpace == NULL)
6023 		return B_ERROR;
6024 
6025 	VMTranslationMap* map = addressSpace->TranslationMap();
6026 
6027 	if (interrupts)
6028 		map->Lock();
6029 
6030 	while (offset < numBytes) {
6031 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6032 		uint32 flags;
6033 
6034 		if (interrupts) {
6035 			status = map->Query((addr_t)address + offset, &physicalAddress,
6036 				&flags);
6037 		} else {
6038 			status = map->QueryInterrupt((addr_t)address + offset,
6039 				&physicalAddress, &flags);
6040 		}
6041 		if (status < B_OK)
6042 			break;
6043 		if ((flags & PAGE_PRESENT) == 0) {
6044 			panic("get_memory_map() called on unmapped memory!");
6045 			return B_BAD_ADDRESS;
6046 		}
6047 
6048 		if (index < 0 && pageOffset > 0) {
6049 			physicalAddress += pageOffset;
6050 			if (bytes > B_PAGE_SIZE - pageOffset)
6051 				bytes = B_PAGE_SIZE - pageOffset;
6052 		}
6053 
6054 		// need to switch to the next physical_entry?
6055 		if (index < 0 || table[index].address
6056 				!= physicalAddress - table[index].size) {
6057 			if ((uint32)++index + 1 > numEntries) {
6058 				// table to small
6059 				break;
6060 			}
6061 			table[index].address = physicalAddress;
6062 			table[index].size = bytes;
6063 		} else {
6064 			// page does fit in current entry
6065 			table[index].size += bytes;
6066 		}
6067 
6068 		offset += bytes;
6069 	}
6070 
6071 	if (interrupts)
6072 		map->Unlock();
6073 
6074 	if (status != B_OK)
6075 		return status;
6076 
6077 	if ((uint32)index + 1 > numEntries) {
6078 		*_numEntries = index;
6079 		return B_BUFFER_OVERFLOW;
6080 	}
6081 
6082 	*_numEntries = index + 1;
6083 	return B_OK;
6084 }
6085 
6086 
6087 /*!	According to the BeBook, this function should always succeed.
6088 	This is no longer the case.
6089 */
6090 extern "C" int32
6091 __get_memory_map_haiku(const void* address, size_t numBytes,
6092 	physical_entry* table, int32 numEntries)
6093 {
6094 	uint32 entriesRead = numEntries;
6095 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6096 		table, &entriesRead);
6097 	if (error != B_OK)
6098 		return error;
6099 
6100 	// close the entry list
6101 
6102 	// if it's only one entry, we will silently accept the missing ending
6103 	if (numEntries == 1)
6104 		return B_OK;
6105 
6106 	if (entriesRead + 1 > (uint32)numEntries)
6107 		return B_BUFFER_OVERFLOW;
6108 
6109 	table[entriesRead].address = 0;
6110 	table[entriesRead].size = 0;
6111 
6112 	return B_OK;
6113 }
6114 
6115 
6116 area_id
6117 area_for(void* address)
6118 {
6119 	return vm_area_for((addr_t)address, true);
6120 }
6121 
6122 
6123 area_id
6124 find_area(const char* name)
6125 {
6126 	return VMAreaHash::Find(name);
6127 }
6128 
6129 
6130 status_t
6131 _get_area_info(area_id id, area_info* info, size_t size)
6132 {
6133 	if (size != sizeof(area_info) || info == NULL)
6134 		return B_BAD_VALUE;
6135 
6136 	AddressSpaceReadLocker locker;
6137 	VMArea* area;
6138 	status_t status = locker.SetFromArea(id, area);
6139 	if (status != B_OK)
6140 		return status;
6141 
6142 	fill_area_info(area, info, size);
6143 	return B_OK;
6144 }
6145 
6146 
6147 status_t
6148 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6149 {
6150 	addr_t nextBase = *(addr_t*)cookie;
6151 
6152 	// we're already through the list
6153 	if (nextBase == (addr_t)-1)
6154 		return B_ENTRY_NOT_FOUND;
6155 
6156 	if (team == B_CURRENT_TEAM)
6157 		team = team_get_current_team_id();
6158 
6159 	AddressSpaceReadLocker locker(team);
6160 	if (!locker.IsLocked())
6161 		return B_BAD_TEAM_ID;
6162 
6163 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6164 	if (area == NULL) {
6165 		nextBase = (addr_t)-1;
6166 		return B_ENTRY_NOT_FOUND;
6167 	}
6168 
6169 	fill_area_info(area, info, size);
6170 	*cookie = (ssize_t)(area->Base() + 1);
6171 
6172 	return B_OK;
6173 }
6174 
6175 
6176 status_t
6177 set_area_protection(area_id area, uint32 newProtection)
6178 {
6179 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6180 		newProtection, true);
6181 }
6182 
6183 
6184 status_t
6185 resize_area(area_id areaID, size_t newSize)
6186 {
6187 	return vm_resize_area(areaID, newSize, true);
6188 }
6189 
6190 
6191 /*!	Transfers the specified area to a new team. The caller must be the owner
6192 	of the area.
6193 */
6194 area_id
6195 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6196 	bool kernel)
6197 {
6198 	area_info info;
6199 	status_t status = get_area_info(id, &info);
6200 	if (status != B_OK)
6201 		return status;
6202 
6203 	if (info.team != thread_get_current_thread()->team->id)
6204 		return B_PERMISSION_DENIED;
6205 
6206 	// We need to mark the area cloneable so the following operations work.
6207 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6208 	if (status != B_OK)
6209 		return status;
6210 
6211 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6212 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6213 	if (clonedArea < 0)
6214 		return clonedArea;
6215 
6216 	status = vm_delete_area(info.team, id, kernel);
6217 	if (status != B_OK) {
6218 		vm_delete_area(target, clonedArea, kernel);
6219 		return status;
6220 	}
6221 
6222 	// Now we can reset the protection to whatever it was before.
6223 	set_area_protection(clonedArea, info.protection);
6224 
6225 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6226 
6227 	return clonedArea;
6228 }
6229 
6230 
6231 extern "C" area_id
6232 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6233 	size_t numBytes, uint32 addressSpec, uint32 protection,
6234 	void** _virtualAddress)
6235 {
6236 	if (!arch_vm_supports_protection(protection))
6237 		return B_NOT_SUPPORTED;
6238 
6239 	fix_protection(&protection);
6240 
6241 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6242 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6243 		false);
6244 }
6245 
6246 
6247 area_id
6248 clone_area(const char* name, void** _address, uint32 addressSpec,
6249 	uint32 protection, area_id source)
6250 {
6251 	if ((protection & B_KERNEL_PROTECTION) == 0)
6252 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6253 
6254 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6255 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6256 }
6257 
6258 
6259 area_id
6260 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6261 	uint32 protection, uint32 flags, uint32 guardSize,
6262 	const virtual_address_restrictions* virtualAddressRestrictions,
6263 	const physical_address_restrictions* physicalAddressRestrictions,
6264 	void** _address)
6265 {
6266 	fix_protection(&protection);
6267 
6268 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6269 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6270 		true, _address);
6271 }
6272 
6273 
6274 extern "C" area_id
6275 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6276 	size_t size, uint32 lock, uint32 protection)
6277 {
6278 	fix_protection(&protection);
6279 
6280 	virtual_address_restrictions virtualRestrictions = {};
6281 	virtualRestrictions.address = *_address;
6282 	virtualRestrictions.address_specification = addressSpec;
6283 	physical_address_restrictions physicalRestrictions = {};
6284 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6285 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6286 		true, _address);
6287 }
6288 
6289 
6290 status_t
6291 delete_area(area_id area)
6292 {
6293 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6294 }
6295 
6296 
6297 //	#pragma mark - Userland syscalls
6298 
6299 
6300 status_t
6301 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6302 	addr_t size)
6303 {
6304 	// filter out some unavailable values (for userland)
6305 	switch (addressSpec) {
6306 		case B_ANY_KERNEL_ADDRESS:
6307 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6308 			return B_BAD_VALUE;
6309 	}
6310 
6311 	addr_t address;
6312 
6313 	if (!IS_USER_ADDRESS(userAddress)
6314 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6315 		return B_BAD_ADDRESS;
6316 
6317 	status_t status = vm_reserve_address_range(
6318 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6319 		RESERVED_AVOID_BASE);
6320 	if (status != B_OK)
6321 		return status;
6322 
6323 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6324 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6325 			(void*)address, size);
6326 		return B_BAD_ADDRESS;
6327 	}
6328 
6329 	return B_OK;
6330 }
6331 
6332 
6333 status_t
6334 _user_unreserve_address_range(addr_t address, addr_t size)
6335 {
6336 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6337 		(void*)address, size);
6338 }
6339 
6340 
6341 area_id
6342 _user_area_for(void* address)
6343 {
6344 	return vm_area_for((addr_t)address, false);
6345 }
6346 
6347 
6348 area_id
6349 _user_find_area(const char* userName)
6350 {
6351 	char name[B_OS_NAME_LENGTH];
6352 
6353 	if (!IS_USER_ADDRESS(userName)
6354 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6355 		return B_BAD_ADDRESS;
6356 
6357 	return find_area(name);
6358 }
6359 
6360 
6361 status_t
6362 _user_get_area_info(area_id area, area_info* userInfo)
6363 {
6364 	if (!IS_USER_ADDRESS(userInfo))
6365 		return B_BAD_ADDRESS;
6366 
6367 	area_info info;
6368 	status_t status = get_area_info(area, &info);
6369 	if (status < B_OK)
6370 		return status;
6371 
6372 	// TODO: do we want to prevent userland from seeing kernel protections?
6373 	//info.protection &= B_USER_PROTECTION;
6374 
6375 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6376 		return B_BAD_ADDRESS;
6377 
6378 	return status;
6379 }
6380 
6381 
6382 status_t
6383 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6384 {
6385 	ssize_t cookie;
6386 
6387 	if (!IS_USER_ADDRESS(userCookie)
6388 		|| !IS_USER_ADDRESS(userInfo)
6389 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6390 		return B_BAD_ADDRESS;
6391 
6392 	area_info info;
6393 	status_t status = _get_next_area_info(team, &cookie, &info,
6394 		sizeof(area_info));
6395 	if (status != B_OK)
6396 		return status;
6397 
6398 	//info.protection &= B_USER_PROTECTION;
6399 
6400 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6401 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6402 		return B_BAD_ADDRESS;
6403 
6404 	return status;
6405 }
6406 
6407 
6408 status_t
6409 _user_set_area_protection(area_id area, uint32 newProtection)
6410 {
6411 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6412 		return B_BAD_VALUE;
6413 
6414 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6415 		newProtection, false);
6416 }
6417 
6418 
6419 status_t
6420 _user_resize_area(area_id area, size_t newSize)
6421 {
6422 	// TODO: Since we restrict deleting of areas to those owned by the team,
6423 	// we should also do that for resizing (check other functions, too).
6424 	return vm_resize_area(area, newSize, false);
6425 }
6426 
6427 
6428 area_id
6429 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6430 	team_id target)
6431 {
6432 	// filter out some unavailable values (for userland)
6433 	switch (addressSpec) {
6434 		case B_ANY_KERNEL_ADDRESS:
6435 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6436 			return B_BAD_VALUE;
6437 	}
6438 
6439 	void* address;
6440 	if (!IS_USER_ADDRESS(userAddress)
6441 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6442 		return B_BAD_ADDRESS;
6443 
6444 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6445 	if (newArea < B_OK)
6446 		return newArea;
6447 
6448 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6449 		return B_BAD_ADDRESS;
6450 
6451 	return newArea;
6452 }
6453 
6454 
6455 area_id
6456 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6457 	uint32 protection, area_id sourceArea)
6458 {
6459 	char name[B_OS_NAME_LENGTH];
6460 	void* address;
6461 
6462 	// filter out some unavailable values (for userland)
6463 	switch (addressSpec) {
6464 		case B_ANY_KERNEL_ADDRESS:
6465 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6466 			return B_BAD_VALUE;
6467 	}
6468 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6469 		return B_BAD_VALUE;
6470 
6471 	if (!IS_USER_ADDRESS(userName)
6472 		|| !IS_USER_ADDRESS(userAddress)
6473 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6474 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6475 		return B_BAD_ADDRESS;
6476 
6477 	fix_protection(&protection);
6478 
6479 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6480 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6481 		false);
6482 	if (clonedArea < B_OK)
6483 		return clonedArea;
6484 
6485 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6486 		delete_area(clonedArea);
6487 		return B_BAD_ADDRESS;
6488 	}
6489 
6490 	return clonedArea;
6491 }
6492 
6493 
6494 area_id
6495 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6496 	size_t size, uint32 lock, uint32 protection)
6497 {
6498 	char name[B_OS_NAME_LENGTH];
6499 	void* address;
6500 
6501 	// filter out some unavailable values (for userland)
6502 	switch (addressSpec) {
6503 		case B_ANY_KERNEL_ADDRESS:
6504 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6505 			return B_BAD_VALUE;
6506 	}
6507 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6508 		return B_BAD_VALUE;
6509 
6510 	if (!IS_USER_ADDRESS(userName)
6511 		|| !IS_USER_ADDRESS(userAddress)
6512 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6513 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6514 		return B_BAD_ADDRESS;
6515 
6516 	if (addressSpec == B_EXACT_ADDRESS
6517 		&& IS_KERNEL_ADDRESS(address))
6518 		return B_BAD_VALUE;
6519 
6520 	if (addressSpec == B_ANY_ADDRESS)
6521 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6522 	if (addressSpec == B_BASE_ADDRESS)
6523 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6524 
6525 	fix_protection(&protection);
6526 
6527 	virtual_address_restrictions virtualRestrictions = {};
6528 	virtualRestrictions.address = address;
6529 	virtualRestrictions.address_specification = addressSpec;
6530 	physical_address_restrictions physicalRestrictions = {};
6531 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6532 		size, lock, protection, 0, 0, &virtualRestrictions,
6533 		&physicalRestrictions, false, &address);
6534 
6535 	if (area >= B_OK
6536 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6537 		delete_area(area);
6538 		return B_BAD_ADDRESS;
6539 	}
6540 
6541 	return area;
6542 }
6543 
6544 
6545 status_t
6546 _user_delete_area(area_id area)
6547 {
6548 	// Unlike the BeOS implementation, you can now only delete areas
6549 	// that you have created yourself from userland.
6550 	// The documentation to delete_area() explicitly states that this
6551 	// will be restricted in the future, and so it will.
6552 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6553 }
6554 
6555 
6556 // TODO: create a BeOS style call for this!
6557 
6558 area_id
6559 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6560 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6561 	int fd, off_t offset)
6562 {
6563 	char name[B_OS_NAME_LENGTH];
6564 	void* address;
6565 	area_id area;
6566 
6567 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6568 		return B_BAD_VALUE;
6569 
6570 	fix_protection(&protection);
6571 
6572 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6573 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6574 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6575 		return B_BAD_ADDRESS;
6576 
6577 	if (addressSpec == B_EXACT_ADDRESS) {
6578 		if ((addr_t)address + size < (addr_t)address
6579 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6580 			return B_BAD_VALUE;
6581 		}
6582 		if (!IS_USER_ADDRESS(address)
6583 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6584 			return B_BAD_ADDRESS;
6585 		}
6586 	}
6587 
6588 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6589 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6590 		false);
6591 	if (area < B_OK)
6592 		return area;
6593 
6594 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6595 		return B_BAD_ADDRESS;
6596 
6597 	return area;
6598 }
6599 
6600 
6601 status_t
6602 _user_unmap_memory(void* _address, size_t size)
6603 {
6604 	addr_t address = (addr_t)_address;
6605 
6606 	// check params
6607 	if (size == 0 || (addr_t)address + size < (addr_t)address
6608 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6609 		return B_BAD_VALUE;
6610 	}
6611 
6612 	if (!IS_USER_ADDRESS(address)
6613 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6614 		return B_BAD_ADDRESS;
6615 	}
6616 
6617 	// Write lock the address space and ensure the address range is not wired.
6618 	AddressSpaceWriteLocker locker;
6619 	do {
6620 		status_t status = locker.SetTo(team_get_current_team_id());
6621 		if (status != B_OK)
6622 			return status;
6623 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6624 			size, &locker));
6625 
6626 	// unmap
6627 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6628 }
6629 
6630 
6631 status_t
6632 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6633 {
6634 	// check address range
6635 	addr_t address = (addr_t)_address;
6636 	size = PAGE_ALIGN(size);
6637 
6638 	if ((address % B_PAGE_SIZE) != 0)
6639 		return B_BAD_VALUE;
6640 	if (!is_user_address_range(_address, size)) {
6641 		// weird error code required by POSIX
6642 		return ENOMEM;
6643 	}
6644 
6645 	// extend and check protection
6646 	if ((protection & ~B_USER_PROTECTION) != 0)
6647 		return B_BAD_VALUE;
6648 
6649 	fix_protection(&protection);
6650 
6651 	// We need to write lock the address space, since we're going to play with
6652 	// the areas. Also make sure that none of the areas is wired and that we're
6653 	// actually allowed to change the protection.
6654 	AddressSpaceWriteLocker locker;
6655 
6656 	bool restart;
6657 	do {
6658 		restart = false;
6659 
6660 		status_t status = locker.SetTo(team_get_current_team_id());
6661 		if (status != B_OK)
6662 			return status;
6663 
6664 		// First round: Check whether the whole range is covered by areas and we
6665 		// are allowed to modify them.
6666 		addr_t currentAddress = address;
6667 		size_t sizeLeft = size;
6668 		while (sizeLeft > 0) {
6669 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6670 			if (area == NULL)
6671 				return B_NO_MEMORY;
6672 
6673 			if ((area->protection & B_KERNEL_AREA) != 0)
6674 				return B_NOT_ALLOWED;
6675 			if (area->protection_max != 0
6676 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6677 				return B_NOT_ALLOWED;
6678 			}
6679 
6680 			addr_t offset = currentAddress - area->Base();
6681 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6682 
6683 			AreaCacheLocker cacheLocker(area);
6684 
6685 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6686 					&locker, &cacheLocker)) {
6687 				restart = true;
6688 				break;
6689 			}
6690 
6691 			cacheLocker.Unlock();
6692 
6693 			currentAddress += rangeSize;
6694 			sizeLeft -= rangeSize;
6695 		}
6696 	} while (restart);
6697 
6698 	// Second round: If the protections differ from that of the area, create a
6699 	// page protection array and re-map mapped pages.
6700 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6701 	addr_t currentAddress = address;
6702 	size_t sizeLeft = size;
6703 	while (sizeLeft > 0) {
6704 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6705 		if (area == NULL)
6706 			return B_NO_MEMORY;
6707 
6708 		addr_t offset = currentAddress - area->Base();
6709 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6710 
6711 		currentAddress += rangeSize;
6712 		sizeLeft -= rangeSize;
6713 
6714 		if (area->page_protections == NULL) {
6715 			if (area->protection == protection)
6716 				continue;
6717 			if (offset == 0 && rangeSize == area->Size()) {
6718 				status_t status = vm_set_area_protection(area->address_space->ID(),
6719 					area->id, protection, false);
6720 				if (status != B_OK)
6721 					return status;
6722 				continue;
6723 			}
6724 
6725 			status_t status = allocate_area_page_protections(area);
6726 			if (status != B_OK)
6727 				return status;
6728 		}
6729 
6730 		// We need to lock the complete cache chain, since we potentially unmap
6731 		// pages of lower caches.
6732 		VMCache* topCache = vm_area_get_locked_cache(area);
6733 		VMCacheChainLocker cacheChainLocker(topCache);
6734 		cacheChainLocker.LockAllSourceCaches();
6735 
6736 		for (addr_t pageAddress = area->Base() + offset;
6737 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6738 			map->Lock();
6739 
6740 			set_area_page_protection(area, pageAddress, protection);
6741 
6742 			phys_addr_t physicalAddress;
6743 			uint32 flags;
6744 
6745 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6746 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6747 				map->Unlock();
6748 				continue;
6749 			}
6750 
6751 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6752 			if (page == NULL) {
6753 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6754 					"\n", area, physicalAddress);
6755 				map->Unlock();
6756 				return B_ERROR;
6757 			}
6758 
6759 			// If the page is not in the topmost cache and write access is
6760 			// requested, we have to unmap it. Otherwise we can re-map it with
6761 			// the new protection.
6762 			bool unmapPage = page->Cache() != topCache
6763 				&& (protection & B_WRITE_AREA) != 0;
6764 
6765 			if (!unmapPage)
6766 				map->ProtectPage(area, pageAddress, protection);
6767 
6768 			map->Unlock();
6769 
6770 			if (unmapPage) {
6771 				DEBUG_PAGE_ACCESS_START(page);
6772 				unmap_page(area, pageAddress);
6773 				DEBUG_PAGE_ACCESS_END(page);
6774 			}
6775 		}
6776 	}
6777 
6778 	return B_OK;
6779 }
6780 
6781 
6782 status_t
6783 _user_sync_memory(void* _address, size_t size, uint32 flags)
6784 {
6785 	addr_t address = (addr_t)_address;
6786 	size = PAGE_ALIGN(size);
6787 
6788 	// check params
6789 	if ((address % B_PAGE_SIZE) != 0)
6790 		return B_BAD_VALUE;
6791 	if (!is_user_address_range(_address, size)) {
6792 		// weird error code required by POSIX
6793 		return ENOMEM;
6794 	}
6795 
6796 	bool writeSync = (flags & MS_SYNC) != 0;
6797 	bool writeAsync = (flags & MS_ASYNC) != 0;
6798 	if (writeSync && writeAsync)
6799 		return B_BAD_VALUE;
6800 
6801 	if (size == 0 || (!writeSync && !writeAsync))
6802 		return B_OK;
6803 
6804 	// iterate through the range and sync all concerned areas
6805 	while (size > 0) {
6806 		// read lock the address space
6807 		AddressSpaceReadLocker locker;
6808 		status_t error = locker.SetTo(team_get_current_team_id());
6809 		if (error != B_OK)
6810 			return error;
6811 
6812 		// get the first area
6813 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6814 		if (area == NULL)
6815 			return B_NO_MEMORY;
6816 
6817 		uint32 offset = address - area->Base();
6818 		size_t rangeSize = min_c(area->Size() - offset, size);
6819 		offset += area->cache_offset;
6820 
6821 		// lock the cache
6822 		AreaCacheLocker cacheLocker(area);
6823 		if (!cacheLocker)
6824 			return B_BAD_VALUE;
6825 		VMCache* cache = area->cache;
6826 
6827 		locker.Unlock();
6828 
6829 		uint32 firstPage = offset >> PAGE_SHIFT;
6830 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6831 
6832 		// write the pages
6833 		if (cache->type == CACHE_TYPE_VNODE) {
6834 			if (writeSync) {
6835 				// synchronous
6836 				error = vm_page_write_modified_page_range(cache, firstPage,
6837 					endPage);
6838 				if (error != B_OK)
6839 					return error;
6840 			} else {
6841 				// asynchronous
6842 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6843 				// TODO: This is probably not quite what is supposed to happen.
6844 				// Especially when a lot has to be written, it might take ages
6845 				// until it really hits the disk.
6846 			}
6847 		}
6848 
6849 		address += rangeSize;
6850 		size -= rangeSize;
6851 	}
6852 
6853 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6854 	// synchronize multiple mappings of the same file. In our VM they never get
6855 	// out of sync, though, so we don't have to do anything.
6856 
6857 	return B_OK;
6858 }
6859 
6860 
6861 status_t
6862 _user_memory_advice(void* _address, size_t size, uint32 advice)
6863 {
6864 	addr_t address = (addr_t)_address;
6865 	if ((address % B_PAGE_SIZE) != 0)
6866 		return B_BAD_VALUE;
6867 
6868 	size = PAGE_ALIGN(size);
6869 	if (!is_user_address_range(_address, size)) {
6870 		// weird error code required by POSIX
6871 		return B_NO_MEMORY;
6872 	}
6873 
6874 	switch (advice) {
6875 		case MADV_NORMAL:
6876 		case MADV_SEQUENTIAL:
6877 		case MADV_RANDOM:
6878 		case MADV_WILLNEED:
6879 		case MADV_DONTNEED:
6880 			// TODO: Implement!
6881 			break;
6882 
6883 		case MADV_FREE:
6884 		{
6885 			AddressSpaceWriteLocker locker;
6886 			do {
6887 				status_t status = locker.SetTo(team_get_current_team_id());
6888 				if (status != B_OK)
6889 					return status;
6890 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6891 					address, size, &locker));
6892 
6893 			discard_address_range(locker.AddressSpace(), address, size, false);
6894 			break;
6895 		}
6896 
6897 		default:
6898 			return B_BAD_VALUE;
6899 	}
6900 
6901 	return B_OK;
6902 }
6903 
6904 
6905 status_t
6906 _user_get_memory_properties(team_id teamID, const void* address,
6907 	uint32* _protected, uint32* _lock)
6908 {
6909 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6910 		return B_BAD_ADDRESS;
6911 
6912 	AddressSpaceReadLocker locker;
6913 	status_t error = locker.SetTo(teamID);
6914 	if (error != B_OK)
6915 		return error;
6916 
6917 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6918 	if (area == NULL)
6919 		return B_NO_MEMORY;
6920 
6921 	uint32 protection = get_area_page_protection(area, (addr_t)address);
6922 	uint32 wiring = area->wiring;
6923 
6924 	locker.Unlock();
6925 
6926 	error = user_memcpy(_protected, &protection, sizeof(protection));
6927 	if (error != B_OK)
6928 		return error;
6929 
6930 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6931 
6932 	return error;
6933 }
6934 
6935 
6936 static status_t
6937 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
6938 {
6939 #if ENABLE_SWAP_SUPPORT
6940 	// check address range
6941 	addr_t address = (addr_t)_address;
6942 	size = PAGE_ALIGN(size);
6943 
6944 	if ((address % B_PAGE_SIZE) != 0)
6945 		return EINVAL;
6946 	if (!is_user_address_range(_address, size))
6947 		return EINVAL;
6948 
6949 	const addr_t endAddress = address + size;
6950 
6951 	AddressSpaceReadLocker addressSpaceLocker;
6952 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
6953 	if (error != B_OK)
6954 		return error;
6955 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
6956 
6957 	// iterate through all concerned areas
6958 	addr_t nextAddress = address;
6959 	while (nextAddress != endAddress) {
6960 		// get the next area
6961 		VMArea* area = addressSpace->LookupArea(nextAddress);
6962 		if (area == NULL) {
6963 			error = B_BAD_ADDRESS;
6964 			break;
6965 		}
6966 
6967 		const addr_t areaStart = nextAddress;
6968 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
6969 		nextAddress = areaEnd;
6970 
6971 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6972 		if (error != B_OK) {
6973 			// We don't need to unset or reset things on failure.
6974 			break;
6975 		}
6976 
6977 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6978 		VMAnonymousCache* anonCache = NULL;
6979 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
6980 			// This memory will aready never be swapped. Nothing to do.
6981 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
6982 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
6983 				areaEnd - areaStart, swappable);
6984 		} else {
6985 			// Some other cache type? We cannot affect anything here.
6986 			error = EINVAL;
6987 		}
6988 
6989 		cacheChainLocker.Unlock();
6990 
6991 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6992 		if (error != B_OK)
6993 			break;
6994 	}
6995 
6996 	return error;
6997 #else
6998 	// No swap support? Nothing to do.
6999 	return B_OK;
7000 #endif
7001 }
7002 
7003 
7004 status_t
7005 _user_mlock(const void* _address, size_t size)
7006 {
7007 	return user_set_memory_swappable(_address, size, false);
7008 }
7009 
7010 
7011 status_t
7012 _user_munlock(const void* _address, size_t size)
7013 {
7014 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7015 	// if multiple clones of an area had mlock() called on them,
7016 	// munlock() must also be called on all of them to actually unlock.
7017 	// (At present, the first munlock() will unlock all.)
7018 	// TODO: fork() should automatically unlock memory in the child.
7019 	return user_set_memory_swappable(_address, size, true);
7020 }
7021 
7022 
7023 // #pragma mark -- compatibility
7024 
7025 
7026 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7027 
7028 
7029 struct physical_entry_beos {
7030 	uint32	address;
7031 	uint32	size;
7032 };
7033 
7034 
7035 /*!	The physical_entry structure has changed. We need to translate it to the
7036 	old one.
7037 */
7038 extern "C" int32
7039 __get_memory_map_beos(const void* _address, size_t numBytes,
7040 	physical_entry_beos* table, int32 numEntries)
7041 {
7042 	if (numEntries <= 0)
7043 		return B_BAD_VALUE;
7044 
7045 	const uint8* address = (const uint8*)_address;
7046 
7047 	int32 count = 0;
7048 	while (numBytes > 0 && count < numEntries) {
7049 		physical_entry entry;
7050 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7051 		if (result < 0) {
7052 			if (result != B_BUFFER_OVERFLOW)
7053 				return result;
7054 		}
7055 
7056 		if (entry.address >= (phys_addr_t)1 << 32) {
7057 			panic("get_memory_map(): Address is greater 4 GB!");
7058 			return B_ERROR;
7059 		}
7060 
7061 		table[count].address = entry.address;
7062 		table[count++].size = entry.size;
7063 
7064 		address += entry.size;
7065 		numBytes -= entry.size;
7066 	}
7067 
7068 	// null-terminate the table, if possible
7069 	if (count < numEntries) {
7070 		table[count].address = 0;
7071 		table[count].size = 0;
7072 	}
7073 
7074 	return B_OK;
7075 }
7076 
7077 
7078 /*!	The type of the \a physicalAddress parameter has changed from void* to
7079 	phys_addr_t.
7080 */
7081 extern "C" area_id
7082 __map_physical_memory_beos(const char* name, void* physicalAddress,
7083 	size_t numBytes, uint32 addressSpec, uint32 protection,
7084 	void** _virtualAddress)
7085 {
7086 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7087 		addressSpec, protection, _virtualAddress);
7088 }
7089 
7090 
7091 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7092 	we meddle with the \a lock parameter to force 32 bit.
7093 */
7094 extern "C" area_id
7095 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7096 	size_t size, uint32 lock, uint32 protection)
7097 {
7098 	switch (lock) {
7099 		case B_NO_LOCK:
7100 			break;
7101 		case B_FULL_LOCK:
7102 		case B_LAZY_LOCK:
7103 			lock = B_32_BIT_FULL_LOCK;
7104 			break;
7105 		case B_CONTIGUOUS:
7106 			lock = B_32_BIT_CONTIGUOUS;
7107 			break;
7108 	}
7109 
7110 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7111 		protection);
7112 }
7113 
7114 
7115 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7116 	"BASE");
7117 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7118 	"map_physical_memory@", "BASE");
7119 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7120 	"BASE");
7121 
7122 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7123 	"get_memory_map@@", "1_ALPHA3");
7124 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7125 	"map_physical_memory@@", "1_ALPHA3");
7126 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7127 	"1_ALPHA3");
7128 
7129 
7130 #else
7131 
7132 
7133 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7134 	"get_memory_map@@", "BASE");
7135 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7136 	"map_physical_memory@@", "BASE");
7137 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7138 	"BASE");
7139 
7140 
7141 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7142