xref: /haiku/src/system/kernel/vm/vm.cpp (revision 6f80a9801fedbe7355c4360bd204ba746ec3ec2d)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/ThreadAutoLock.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_priv.h>
53 #include <vm/VMAddressSpace.h>
54 #include <vm/VMArea.h>
55 #include <vm/VMCache.h>
56 
57 #include "VMAddressSpaceLocking.h"
58 #include "VMAnonymousCache.h"
59 #include "VMAnonymousNoSwapCache.h"
60 #include "IORequest.h"
61 
62 
63 //#define TRACE_VM
64 //#define TRACE_FAULTS
65 #ifdef TRACE_VM
66 #	define TRACE(x) dprintf x
67 #else
68 #	define TRACE(x) ;
69 #endif
70 #ifdef TRACE_FAULTS
71 #	define FTRACE(x) dprintf x
72 #else
73 #	define FTRACE(x) ;
74 #endif
75 
76 
77 namespace {
78 
79 class AreaCacheLocking {
80 public:
81 	inline bool Lock(VMCache* lockable)
82 	{
83 		return false;
84 	}
85 
86 	inline void Unlock(VMCache* lockable)
87 	{
88 		vm_area_put_locked_cache(lockable);
89 	}
90 };
91 
92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
93 public:
94 	inline AreaCacheLocker(VMCache* cache = NULL)
95 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
96 	{
97 	}
98 
99 	inline AreaCacheLocker(VMArea* area)
100 		: AutoLocker<VMCache, AreaCacheLocking>()
101 	{
102 		SetTo(area);
103 	}
104 
105 	inline void SetTo(VMCache* cache, bool alreadyLocked)
106 	{
107 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
108 	}
109 
110 	inline void SetTo(VMArea* area)
111 	{
112 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
113 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
114 	}
115 };
116 
117 
118 class VMCacheChainLocker {
119 public:
120 	VMCacheChainLocker()
121 		:
122 		fTopCache(NULL),
123 		fBottomCache(NULL)
124 	{
125 	}
126 
127 	VMCacheChainLocker(VMCache* topCache)
128 		:
129 		fTopCache(topCache),
130 		fBottomCache(topCache)
131 	{
132 	}
133 
134 	~VMCacheChainLocker()
135 	{
136 		Unlock();
137 	}
138 
139 	void SetTo(VMCache* topCache)
140 	{
141 		fTopCache = topCache;
142 		fBottomCache = topCache;
143 
144 		if (topCache != NULL)
145 			topCache->SetUserData(NULL);
146 	}
147 
148 	VMCache* LockSourceCache()
149 	{
150 		if (fBottomCache == NULL || fBottomCache->source == NULL)
151 			return NULL;
152 
153 		VMCache* previousCache = fBottomCache;
154 
155 		fBottomCache = fBottomCache->source;
156 		fBottomCache->Lock();
157 		fBottomCache->AcquireRefLocked();
158 		fBottomCache->SetUserData(previousCache);
159 
160 		return fBottomCache;
161 	}
162 
163 	void LockAllSourceCaches()
164 	{
165 		while (LockSourceCache() != NULL) {
166 		}
167 	}
168 
169 	void Unlock(VMCache* exceptCache = NULL)
170 	{
171 		if (fTopCache == NULL)
172 			return;
173 
174 		// Unlock caches in source -> consumer direction. This is important to
175 		// avoid double-locking and a reversal of locking order in case a cache
176 		// is eligable for merging.
177 		VMCache* cache = fBottomCache;
178 		while (cache != NULL) {
179 			VMCache* nextCache = (VMCache*)cache->UserData();
180 			if (cache != exceptCache)
181 				cache->ReleaseRefAndUnlock(cache != fTopCache);
182 
183 			if (cache == fTopCache)
184 				break;
185 
186 			cache = nextCache;
187 		}
188 
189 		fTopCache = NULL;
190 		fBottomCache = NULL;
191 	}
192 
193 	void UnlockKeepRefs(bool keepTopCacheLocked)
194 	{
195 		if (fTopCache == NULL)
196 			return;
197 
198 		VMCache* nextCache = fBottomCache;
199 		VMCache* cache = NULL;
200 
201 		while (keepTopCacheLocked
202 				? nextCache != fTopCache : cache != fTopCache) {
203 			cache = nextCache;
204 			nextCache = (VMCache*)cache->UserData();
205 			cache->Unlock(cache != fTopCache);
206 		}
207 	}
208 
209 	void RelockCaches(bool topCacheLocked)
210 	{
211 		if (fTopCache == NULL)
212 			return;
213 
214 		VMCache* nextCache = fTopCache;
215 		VMCache* cache = NULL;
216 		if (topCacheLocked) {
217 			cache = nextCache;
218 			nextCache = cache->source;
219 		}
220 
221 		while (cache != fBottomCache && nextCache != NULL) {
222 			VMCache* consumer = cache;
223 			cache = nextCache;
224 			nextCache = cache->source;
225 			cache->Lock();
226 			cache->SetUserData(consumer);
227 		}
228 	}
229 
230 private:
231 	VMCache*	fTopCache;
232 	VMCache*	fBottomCache;
233 };
234 
235 } // namespace
236 
237 
238 // The memory reserve an allocation of the certain priority must not touch.
239 static const size_t kMemoryReserveForPriority[] = {
240 	VM_MEMORY_RESERVE_USER,		// user
241 	VM_MEMORY_RESERVE_SYSTEM,	// system
242 	0							// VIP
243 };
244 
245 
246 ObjectCache* gPageMappingsObjectCache;
247 
248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
249 
250 static off_t sAvailableMemory;
251 static off_t sNeededMemory;
252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
253 static uint32 sPageFaults;
254 
255 static VMPhysicalPageMapper* sPhysicalPageMapper;
256 
257 #if DEBUG_CACHE_LIST
258 
259 struct cache_info {
260 	VMCache*	cache;
261 	addr_t		page_count;
262 	addr_t		committed;
263 };
264 
265 static const int kCacheInfoTableCount = 100 * 1024;
266 static cache_info* sCacheInfoTable;
267 
268 #endif	// DEBUG_CACHE_LIST
269 
270 
271 // function declarations
272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
273 	bool addressSpaceCleanup);
274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
275 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
276 static status_t map_backing_store(VMAddressSpace* addressSpace,
277 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
278 	int protection, int protectionMax, int mapping, uint32 flags,
279 	const virtual_address_restrictions* addressRestrictions, bool kernel,
280 	VMArea** _area, void** _virtualAddress);
281 static void fix_protection(uint32* protection);
282 
283 
284 //	#pragma mark -
285 
286 
287 #if VM_PAGE_FAULT_TRACING
288 
289 namespace VMPageFaultTracing {
290 
291 class PageFaultStart : public AbstractTraceEntry {
292 public:
293 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
294 		:
295 		fAddress(address),
296 		fPC(pc),
297 		fWrite(write),
298 		fUser(user)
299 	{
300 		Initialized();
301 	}
302 
303 	virtual void AddDump(TraceOutput& out)
304 	{
305 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
306 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
307 	}
308 
309 private:
310 	addr_t	fAddress;
311 	addr_t	fPC;
312 	bool	fWrite;
313 	bool	fUser;
314 };
315 
316 
317 // page fault errors
318 enum {
319 	PAGE_FAULT_ERROR_NO_AREA		= 0,
320 	PAGE_FAULT_ERROR_KERNEL_ONLY,
321 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
322 	PAGE_FAULT_ERROR_READ_PROTECTED,
323 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
324 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
325 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
326 };
327 
328 
329 class PageFaultError : public AbstractTraceEntry {
330 public:
331 	PageFaultError(area_id area, status_t error)
332 		:
333 		fArea(area),
334 		fError(error)
335 	{
336 		Initialized();
337 	}
338 
339 	virtual void AddDump(TraceOutput& out)
340 	{
341 		switch (fError) {
342 			case PAGE_FAULT_ERROR_NO_AREA:
343 				out.Print("page fault error: no area");
344 				break;
345 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
346 				out.Print("page fault error: area: %ld, kernel only", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
349 				out.Print("page fault error: area: %ld, write protected",
350 					fArea);
351 				break;
352 			case PAGE_FAULT_ERROR_READ_PROTECTED:
353 				out.Print("page fault error: area: %ld, read protected", fArea);
354 				break;
355 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
356 				out.Print("page fault error: area: %ld, execute protected",
357 					fArea);
358 				break;
359 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
360 				out.Print("page fault error: kernel touching bad user memory");
361 				break;
362 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
363 				out.Print("page fault error: no address space");
364 				break;
365 			default:
366 				out.Print("page fault error: area: %ld, error: %s", fArea,
367 					strerror(fError));
368 				break;
369 		}
370 	}
371 
372 private:
373 	area_id		fArea;
374 	status_t	fError;
375 };
376 
377 
378 class PageFaultDone : public AbstractTraceEntry {
379 public:
380 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
381 			vm_page* page)
382 		:
383 		fArea(area),
384 		fTopCache(topCache),
385 		fCache(cache),
386 		fPage(page)
387 	{
388 		Initialized();
389 	}
390 
391 	virtual void AddDump(TraceOutput& out)
392 	{
393 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
394 			"page: %p", fArea, fTopCache, fCache, fPage);
395 	}
396 
397 private:
398 	area_id		fArea;
399 	VMCache*	fTopCache;
400 	VMCache*	fCache;
401 	vm_page*	fPage;
402 };
403 
404 }	// namespace VMPageFaultTracing
405 
406 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
407 #else
408 #	define TPF(x) ;
409 #endif	// VM_PAGE_FAULT_TRACING
410 
411 
412 //	#pragma mark -
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 increment_page_wired_count(vm_page* page)
419 {
420 	if (!page->IsMapped())
421 		atomic_add(&gMappedPagesCount, 1);
422 	page->IncrementWiredCount();
423 }
424 
425 
426 /*!	The page's cache must be locked.
427 */
428 static inline void
429 decrement_page_wired_count(vm_page* page)
430 {
431 	page->DecrementWiredCount();
432 	if (!page->IsMapped())
433 		atomic_add(&gMappedPagesCount, -1);
434 }
435 
436 
437 static inline addr_t
438 virtual_page_address(VMArea* area, vm_page* page)
439 {
440 	return area->Base()
441 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
442 }
443 
444 
445 //! You need to have the address space locked when calling this function
446 static VMArea*
447 lookup_area(VMAddressSpace* addressSpace, area_id id)
448 {
449 	VMAreaHash::ReadLock();
450 
451 	VMArea* area = VMAreaHash::LookupLocked(id);
452 	if (area != NULL && area->address_space != addressSpace)
453 		area = NULL;
454 
455 	VMAreaHash::ReadUnlock();
456 
457 	return area;
458 }
459 
460 
461 static status_t
462 allocate_area_page_protections(VMArea* area)
463 {
464 	// In the page protections we store only the three user protections,
465 	// so we use 4 bits per page.
466 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
467 	area->page_protections = (uint8*)malloc_etc(bytes,
468 		area->address_space == VMAddressSpace::Kernel()
469 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
470 	if (area->page_protections == NULL)
471 		return B_NO_MEMORY;
472 
473 	// init the page protections for all pages to that of the area
474 	uint32 areaProtection = area->protection
475 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
476 	memset(area->page_protections, areaProtection | (areaProtection << 4),
477 		bytes);
478 	return B_OK;
479 }
480 
481 
482 static inline void
483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
484 {
485 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
486 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
487 	uint8& entry = area->page_protections[pageIndex / 2];
488 	if (pageIndex % 2 == 0)
489 		entry = (entry & 0xf0) | protection;
490 	else
491 		entry = (entry & 0x0f) | (protection << 4);
492 }
493 
494 
495 static inline uint32
496 get_area_page_protection(VMArea* area, addr_t pageAddress)
497 {
498 	if (area->page_protections == NULL)
499 		return area->protection;
500 
501 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
502 	uint32 protection = area->page_protections[pageIndex / 2];
503 	if (pageIndex % 2 == 0)
504 		protection &= 0x0f;
505 	else
506 		protection >>= 4;
507 
508 	uint32 kernelProtection = 0;
509 	if ((protection & B_READ_AREA) != 0)
510 		kernelProtection |= B_KERNEL_READ_AREA;
511 	if ((protection & B_WRITE_AREA) != 0)
512 		kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 	// If this is a kernel area we return only the kernel flags.
515 	if (area->address_space == VMAddressSpace::Kernel())
516 		return kernelProtection;
517 
518 	return protection | kernelProtection;
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
811 			&addressRestrictions, kernel, &secondArea, NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection,
925 	int protectionMax, int mapping,
926 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
927 	bool kernel, VMArea** _area, void** _virtualAddress)
928 {
929 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
930 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
931 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
932 		addressSpace, cache, addressRestrictions->address, offset, size,
933 		addressRestrictions->address_specification, wiring, protection,
934 		protectionMax, _area, areaName));
935 	cache->AssertLocked();
936 
937 	if (size == 0) {
938 #if KDEBUG
939 		panic("map_backing_store(): called with size=0 for area '%s'!",
940 			areaName);
941 #endif
942 		return B_BAD_VALUE;
943 	}
944 
945 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
946 		| HEAP_DONT_LOCK_KERNEL_SPACE;
947 	int priority;
948 	if (addressSpace != VMAddressSpace::Kernel()) {
949 		priority = VM_PRIORITY_USER;
950 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
951 		priority = VM_PRIORITY_VIP;
952 		allocationFlags |= HEAP_PRIORITY_VIP;
953 	} else
954 		priority = VM_PRIORITY_SYSTEM;
955 
956 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
957 		allocationFlags);
958 	if (mapping != REGION_PRIVATE_MAP)
959 		area->protection_max = protectionMax & B_USER_PROTECTION;
960 	if (area == NULL)
961 		return B_NO_MEMORY;
962 
963 	status_t status;
964 
965 	// if this is a private map, we need to create a new cache
966 	// to handle the private copies of pages as they are written to
967 	VMCache* sourceCache = cache;
968 	if (mapping == REGION_PRIVATE_MAP) {
969 		VMCache* newCache;
970 
971 		// create an anonymous cache
972 		status = VMCacheFactory::CreateAnonymousCache(newCache,
973 			(protection & B_STACK_AREA) != 0
974 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
975 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
976 		if (status != B_OK)
977 			goto err1;
978 
979 		newCache->Lock();
980 		newCache->temporary = 1;
981 		newCache->virtual_base = offset;
982 		newCache->virtual_end = offset + size;
983 
984 		cache->AddConsumer(newCache);
985 
986 		cache = newCache;
987 	}
988 
989 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
990 		status = cache->SetMinimalCommitment(size, priority);
991 		if (status != B_OK)
992 			goto err2;
993 	}
994 
995 	// check to see if this address space has entered DELETE state
996 	if (addressSpace->IsBeingDeleted()) {
997 		// okay, someone is trying to delete this address space now, so we can't
998 		// insert the area, so back out
999 		status = B_BAD_TEAM_ID;
1000 		goto err2;
1001 	}
1002 
1003 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1004 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1005 		status = unmap_address_range(addressSpace,
1006 			(addr_t)addressRestrictions->address, size, kernel);
1007 		if (status != B_OK)
1008 			goto err2;
1009 	}
1010 
1011 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1012 		allocationFlags, _virtualAddress);
1013 	if (status == B_NO_MEMORY
1014 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1015 		// Due to how many locks are held, we cannot wait here for space to be
1016 		// freed up, but we can at least notify the low_resource handler.
1017 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1018 	}
1019 	if (status != B_OK)
1020 		goto err2;
1021 
1022 	// attach the cache to the area
1023 	area->cache = cache;
1024 	area->cache_offset = offset;
1025 
1026 	// point the cache back to the area
1027 	cache->InsertAreaLocked(area);
1028 	if (mapping == REGION_PRIVATE_MAP)
1029 		cache->Unlock();
1030 
1031 	// insert the area in the global area hash table
1032 	VMAreaHash::Insert(area);
1033 
1034 	// grab a ref to the address space (the area holds this)
1035 	addressSpace->Get();
1036 
1037 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1038 //		cache, sourceCache, areaName, area);
1039 
1040 	*_area = area;
1041 	return B_OK;
1042 
1043 err2:
1044 	if (mapping == REGION_PRIVATE_MAP) {
1045 		// We created this cache, so we must delete it again. Note, that we
1046 		// need to temporarily unlock the source cache or we'll otherwise
1047 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1048 		sourceCache->Unlock();
1049 		cache->ReleaseRefAndUnlock();
1050 		sourceCache->Lock();
1051 	}
1052 err1:
1053 	addressSpace->DeleteArea(area, allocationFlags);
1054 	return status;
1055 }
1056 
1057 
1058 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1059 	  locker1, locker2).
1060 */
1061 template<typename LockerType1, typename LockerType2>
1062 static inline bool
1063 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1064 {
1065 	area->cache->AssertLocked();
1066 
1067 	VMAreaUnwiredWaiter waiter;
1068 	if (!area->AddWaiterIfWired(&waiter))
1069 		return false;
1070 
1071 	// unlock everything and wait
1072 	if (locker1 != NULL)
1073 		locker1->Unlock();
1074 	if (locker2 != NULL)
1075 		locker2->Unlock();
1076 
1077 	waiter.waitEntry.Wait();
1078 
1079 	return true;
1080 }
1081 
1082 
1083 /*!	Checks whether the given area has any wired ranges intersecting with the
1084 	specified range and waits, if so.
1085 
1086 	When it has to wait, the function calls \c Unlock() on both \a locker1
1087 	and \a locker2, if given.
1088 	The area's top cache must be locked and must be unlocked as a side effect
1089 	of calling \c Unlock() on either \a locker1 or \a locker2.
1090 
1091 	If the function does not have to wait it does not modify or unlock any
1092 	object.
1093 
1094 	\param area The area to be checked.
1095 	\param base The base address of the range to check.
1096 	\param size The size of the address range to check.
1097 	\param locker1 An object to be unlocked when before starting to wait (may
1098 		be \c NULL).
1099 	\param locker2 An object to be unlocked when before starting to wait (may
1100 		be \c NULL).
1101 	\return \c true, if the function had to wait, \c false otherwise.
1102 */
1103 template<typename LockerType1, typename LockerType2>
1104 static inline bool
1105 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1106 	LockerType1* locker1, LockerType2* locker2)
1107 {
1108 	area->cache->AssertLocked();
1109 
1110 	VMAreaUnwiredWaiter waiter;
1111 	if (!area->AddWaiterIfWired(&waiter, base, size))
1112 		return false;
1113 
1114 	// unlock everything and wait
1115 	if (locker1 != NULL)
1116 		locker1->Unlock();
1117 	if (locker2 != NULL)
1118 		locker2->Unlock();
1119 
1120 	waiter.waitEntry.Wait();
1121 
1122 	return true;
1123 }
1124 
1125 
1126 /*!	Checks whether the given address space has any wired ranges intersecting
1127 	with the specified range and waits, if so.
1128 
1129 	Similar to wait_if_area_range_is_wired(), with the following differences:
1130 	- All areas intersecting with the range are checked (respectively all until
1131 	  one is found that contains a wired range intersecting with the given
1132 	  range).
1133 	- The given address space must at least be read-locked and must be unlocked
1134 	  when \c Unlock() is called on \a locker.
1135 	- None of the areas' caches are allowed to be locked.
1136 */
1137 template<typename LockerType>
1138 static inline bool
1139 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1140 	size_t size, LockerType* locker)
1141 {
1142 	for (VMAddressSpace::AreaRangeIterator it
1143 		= addressSpace->GetAreaRangeIterator(base, size);
1144 			VMArea* area = it.Next();) {
1145 
1146 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1147 
1148 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1149 			return true;
1150 	}
1151 
1152 	return false;
1153 }
1154 
1155 
1156 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1157 	It must be called in a situation where the kernel address space may be
1158 	locked.
1159 */
1160 status_t
1161 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1162 {
1163 	AddressSpaceReadLocker locker;
1164 	VMArea* area;
1165 	status_t status = locker.SetFromArea(id, area);
1166 	if (status != B_OK)
1167 		return status;
1168 
1169 	if (area->page_protections == NULL) {
1170 		status = allocate_area_page_protections(area);
1171 		if (status != B_OK)
1172 			return status;
1173 	}
1174 
1175 	*cookie = (void*)area;
1176 	return B_OK;
1177 }
1178 
1179 
1180 /*!	This is a debug helper function that can only be used with very specific
1181 	use cases.
1182 	Sets protection for the given address range to the protection specified.
1183 	If \a protection is 0 then the involved pages will be marked non-present
1184 	in the translation map to cause a fault on access. The pages aren't
1185 	actually unmapped however so that they can be marked present again with
1186 	additional calls to this function. For this to work the area must be
1187 	fully locked in memory so that the pages aren't otherwise touched.
1188 	This function does not lock the kernel address space and needs to be
1189 	supplied with a \a cookie retrieved from a successful call to
1190 	vm_prepare_kernel_area_debug_protection().
1191 */
1192 status_t
1193 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1194 	uint32 protection)
1195 {
1196 	// check address range
1197 	addr_t address = (addr_t)_address;
1198 	size = PAGE_ALIGN(size);
1199 
1200 	if ((address % B_PAGE_SIZE) != 0
1201 		|| (addr_t)address + size < (addr_t)address
1202 		|| !IS_KERNEL_ADDRESS(address)
1203 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1204 		return B_BAD_VALUE;
1205 	}
1206 
1207 	// Translate the kernel protection to user protection as we only store that.
1208 	if ((protection & B_KERNEL_READ_AREA) != 0)
1209 		protection |= B_READ_AREA;
1210 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1211 		protection |= B_WRITE_AREA;
1212 
1213 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1214 	VMTranslationMap* map = addressSpace->TranslationMap();
1215 	VMArea* area = (VMArea*)cookie;
1216 
1217 	addr_t offset = address - area->Base();
1218 	if (area->Size() - offset < size) {
1219 		panic("protect range not fully within supplied area");
1220 		return B_BAD_VALUE;
1221 	}
1222 
1223 	if (area->page_protections == NULL) {
1224 		panic("area has no page protections");
1225 		return B_BAD_VALUE;
1226 	}
1227 
1228 	// Invalidate the mapping entries so any access to them will fault or
1229 	// restore the mapping entries unchanged so that lookup will success again.
1230 	map->Lock();
1231 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1232 	map->Unlock();
1233 
1234 	// And set the proper page protections so that the fault case will actually
1235 	// fail and not simply try to map a new page.
1236 	for (addr_t pageAddress = address; pageAddress < address + size;
1237 			pageAddress += B_PAGE_SIZE) {
1238 		set_area_page_protection(area, pageAddress, protection);
1239 	}
1240 
1241 	return B_OK;
1242 }
1243 
1244 
1245 status_t
1246 vm_block_address_range(const char* name, void* address, addr_t size)
1247 {
1248 	if (!arch_vm_supports_protection(0))
1249 		return B_NOT_SUPPORTED;
1250 
1251 	AddressSpaceWriteLocker locker;
1252 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1253 	if (status != B_OK)
1254 		return status;
1255 
1256 	VMAddressSpace* addressSpace = locker.AddressSpace();
1257 
1258 	// create an anonymous cache
1259 	VMCache* cache;
1260 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1261 		VM_PRIORITY_SYSTEM);
1262 	if (status != B_OK)
1263 		return status;
1264 
1265 	cache->temporary = 1;
1266 	cache->virtual_end = size;
1267 	cache->Lock();
1268 
1269 	VMArea* area;
1270 	virtual_address_restrictions addressRestrictions = {};
1271 	addressRestrictions.address = address;
1272 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1273 	status = map_backing_store(addressSpace, cache, 0, name, size,
1274 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1275 		true, &area, NULL);
1276 	if (status != B_OK) {
1277 		cache->ReleaseRefAndUnlock();
1278 		return status;
1279 	}
1280 
1281 	cache->Unlock();
1282 	area->cache_type = CACHE_TYPE_RAM;
1283 	return area->id;
1284 }
1285 
1286 
1287 status_t
1288 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1289 {
1290 	AddressSpaceWriteLocker locker(team);
1291 	if (!locker.IsLocked())
1292 		return B_BAD_TEAM_ID;
1293 
1294 	VMAddressSpace* addressSpace = locker.AddressSpace();
1295 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1296 		addressSpace == VMAddressSpace::Kernel()
1297 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1298 }
1299 
1300 
1301 status_t
1302 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1303 	addr_t size, uint32 flags)
1304 {
1305 	if (size == 0)
1306 		return B_BAD_VALUE;
1307 
1308 	AddressSpaceWriteLocker locker(team);
1309 	if (!locker.IsLocked())
1310 		return B_BAD_TEAM_ID;
1311 
1312 	virtual_address_restrictions addressRestrictions = {};
1313 	addressRestrictions.address = *_address;
1314 	addressRestrictions.address_specification = addressSpec;
1315 	VMAddressSpace* addressSpace = locker.AddressSpace();
1316 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1317 		addressSpace == VMAddressSpace::Kernel()
1318 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1319 		_address);
1320 }
1321 
1322 
1323 area_id
1324 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1325 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1326 	const virtual_address_restrictions* virtualAddressRestrictions,
1327 	const physical_address_restrictions* physicalAddressRestrictions,
1328 	bool kernel, void** _address)
1329 {
1330 	VMArea* area;
1331 	VMCache* cache;
1332 	vm_page* page = NULL;
1333 	bool isStack = (protection & B_STACK_AREA) != 0;
1334 	page_num_t guardPages;
1335 	bool canOvercommit = false;
1336 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1337 		? VM_PAGE_ALLOC_CLEAR : 0;
1338 
1339 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1340 		team, name, size));
1341 
1342 	size = PAGE_ALIGN(size);
1343 	guardSize = PAGE_ALIGN(guardSize);
1344 	guardPages = guardSize / B_PAGE_SIZE;
1345 
1346 	if (size == 0 || size < guardSize)
1347 		return B_BAD_VALUE;
1348 	if (!arch_vm_supports_protection(protection))
1349 		return B_NOT_SUPPORTED;
1350 
1351 	if (team == B_CURRENT_TEAM)
1352 		team = VMAddressSpace::CurrentID();
1353 	if (team < 0)
1354 		return B_BAD_TEAM_ID;
1355 
1356 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1357 		canOvercommit = true;
1358 
1359 #ifdef DEBUG_KERNEL_STACKS
1360 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1361 		isStack = true;
1362 #endif
1363 
1364 	// check parameters
1365 	switch (virtualAddressRestrictions->address_specification) {
1366 		case B_ANY_ADDRESS:
1367 		case B_EXACT_ADDRESS:
1368 		case B_BASE_ADDRESS:
1369 		case B_ANY_KERNEL_ADDRESS:
1370 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1371 		case B_RANDOMIZED_ANY_ADDRESS:
1372 		case B_RANDOMIZED_BASE_ADDRESS:
1373 			break;
1374 
1375 		default:
1376 			return B_BAD_VALUE;
1377 	}
1378 
1379 	// If low or high physical address restrictions are given, we force
1380 	// B_CONTIGUOUS wiring, since only then we'll use
1381 	// vm_page_allocate_page_run() which deals with those restrictions.
1382 	if (physicalAddressRestrictions->low_address != 0
1383 		|| physicalAddressRestrictions->high_address != 0) {
1384 		wiring = B_CONTIGUOUS;
1385 	}
1386 
1387 	physical_address_restrictions stackPhysicalRestrictions;
1388 	bool doReserveMemory = false;
1389 	switch (wiring) {
1390 		case B_NO_LOCK:
1391 			break;
1392 		case B_FULL_LOCK:
1393 		case B_LAZY_LOCK:
1394 		case B_CONTIGUOUS:
1395 			doReserveMemory = true;
1396 			break;
1397 		case B_ALREADY_WIRED:
1398 			break;
1399 		case B_LOMEM:
1400 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1401 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1402 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1403 			wiring = B_CONTIGUOUS;
1404 			doReserveMemory = true;
1405 			break;
1406 		case B_32_BIT_FULL_LOCK:
1407 			if (B_HAIKU_PHYSICAL_BITS <= 32
1408 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1409 				wiring = B_FULL_LOCK;
1410 				doReserveMemory = true;
1411 				break;
1412 			}
1413 			// TODO: We don't really support this mode efficiently. Just fall
1414 			// through for now ...
1415 		case B_32_BIT_CONTIGUOUS:
1416 			#if B_HAIKU_PHYSICAL_BITS > 32
1417 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1418 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1419 					stackPhysicalRestrictions.high_address
1420 						= (phys_addr_t)1 << 32;
1421 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1422 				}
1423 			#endif
1424 			wiring = B_CONTIGUOUS;
1425 			doReserveMemory = true;
1426 			break;
1427 		default:
1428 			return B_BAD_VALUE;
1429 	}
1430 
1431 	// Optimization: For a single-page contiguous allocation without low/high
1432 	// memory restriction B_FULL_LOCK wiring suffices.
1433 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1434 		&& physicalAddressRestrictions->low_address == 0
1435 		&& physicalAddressRestrictions->high_address == 0) {
1436 		wiring = B_FULL_LOCK;
1437 	}
1438 
1439 	// For full lock or contiguous areas we're also going to map the pages and
1440 	// thus need to reserve pages for the mapping backend upfront.
1441 	addr_t reservedMapPages = 0;
1442 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1443 		AddressSpaceWriteLocker locker;
1444 		status_t status = locker.SetTo(team);
1445 		if (status != B_OK)
1446 			return status;
1447 
1448 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1449 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1450 	}
1451 
1452 	int priority;
1453 	if (team != VMAddressSpace::KernelID())
1454 		priority = VM_PRIORITY_USER;
1455 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1456 		priority = VM_PRIORITY_VIP;
1457 	else
1458 		priority = VM_PRIORITY_SYSTEM;
1459 
1460 	// Reserve memory before acquiring the address space lock. This reduces the
1461 	// chances of failure, since while holding the write lock to the address
1462 	// space (if it is the kernel address space that is), the low memory handler
1463 	// won't be able to free anything for us.
1464 	addr_t reservedMemory = 0;
1465 	if (doReserveMemory) {
1466 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1467 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1468 			return B_NO_MEMORY;
1469 		reservedMemory = size;
1470 		// TODO: We don't reserve the memory for the pages for the page
1471 		// directories/tables. We actually need to do since we currently don't
1472 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1473 		// there are actually less physical pages than there should be, which
1474 		// can get the VM into trouble in low memory situations.
1475 	}
1476 
1477 	AddressSpaceWriteLocker locker;
1478 	VMAddressSpace* addressSpace;
1479 	status_t status;
1480 
1481 	// For full lock areas reserve the pages before locking the address
1482 	// space. E.g. block caches can't release their memory while we hold the
1483 	// address space lock.
1484 	page_num_t reservedPages = reservedMapPages;
1485 	if (wiring == B_FULL_LOCK)
1486 		reservedPages += size / B_PAGE_SIZE;
1487 
1488 	vm_page_reservation reservation;
1489 	if (reservedPages > 0) {
1490 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1491 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1492 					priority)) {
1493 				reservedPages = 0;
1494 				status = B_WOULD_BLOCK;
1495 				goto err0;
1496 			}
1497 		} else
1498 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1499 	}
1500 
1501 	if (wiring == B_CONTIGUOUS) {
1502 		// we try to allocate the page run here upfront as this may easily
1503 		// fail for obvious reasons
1504 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1505 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1506 		if (page == NULL) {
1507 			status = B_NO_MEMORY;
1508 			goto err0;
1509 		}
1510 	}
1511 
1512 	// Lock the address space and, if B_EXACT_ADDRESS and
1513 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1514 	// is not wired.
1515 	do {
1516 		status = locker.SetTo(team);
1517 		if (status != B_OK)
1518 			goto err1;
1519 
1520 		addressSpace = locker.AddressSpace();
1521 	} while (virtualAddressRestrictions->address_specification
1522 			== B_EXACT_ADDRESS
1523 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1524 		&& wait_if_address_range_is_wired(addressSpace,
1525 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1526 
1527 	// create an anonymous cache
1528 	// if it's a stack, make sure that two pages are available at least
1529 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1530 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1531 		wiring == B_NO_LOCK, priority);
1532 	if (status != B_OK)
1533 		goto err1;
1534 
1535 	cache->temporary = 1;
1536 	cache->virtual_end = size;
1537 	cache->committed_size = reservedMemory;
1538 		// TODO: This should be done via a method.
1539 	reservedMemory = 0;
1540 
1541 	cache->Lock();
1542 
1543 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1544 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1545 		virtualAddressRestrictions, kernel, &area, _address);
1546 
1547 	if (status != B_OK) {
1548 		cache->ReleaseRefAndUnlock();
1549 		goto err1;
1550 	}
1551 
1552 	locker.DegradeToReadLock();
1553 
1554 	switch (wiring) {
1555 		case B_NO_LOCK:
1556 		case B_LAZY_LOCK:
1557 			// do nothing - the pages are mapped in as needed
1558 			break;
1559 
1560 		case B_FULL_LOCK:
1561 		{
1562 			// Allocate and map all pages for this area
1563 
1564 			off_t offset = 0;
1565 			for (addr_t address = area->Base();
1566 					address < area->Base() + (area->Size() - 1);
1567 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1568 #ifdef DEBUG_KERNEL_STACKS
1569 #	ifdef STACK_GROWS_DOWNWARDS
1570 				if (isStack && address < area->Base()
1571 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1572 #	else
1573 				if (isStack && address >= area->Base() + area->Size()
1574 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1575 #	endif
1576 					continue;
1577 #endif
1578 				vm_page* page = vm_page_allocate_page(&reservation,
1579 					PAGE_STATE_WIRED | pageAllocFlags);
1580 				cache->InsertPage(page, offset);
1581 				map_page(area, page, address, protection, &reservation);
1582 
1583 				DEBUG_PAGE_ACCESS_END(page);
1584 			}
1585 
1586 			break;
1587 		}
1588 
1589 		case B_ALREADY_WIRED:
1590 		{
1591 			// The pages should already be mapped. This is only really useful
1592 			// during boot time. Find the appropriate vm_page objects and stick
1593 			// them in the cache object.
1594 			VMTranslationMap* map = addressSpace->TranslationMap();
1595 			off_t offset = 0;
1596 
1597 			if (!gKernelStartup)
1598 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1599 
1600 			map->Lock();
1601 
1602 			for (addr_t virtualAddress = area->Base();
1603 					virtualAddress < area->Base() + (area->Size() - 1);
1604 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1605 				phys_addr_t physicalAddress;
1606 				uint32 flags;
1607 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1608 				if (status < B_OK) {
1609 					panic("looking up mapping failed for va 0x%lx\n",
1610 						virtualAddress);
1611 				}
1612 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1613 				if (page == NULL) {
1614 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1615 						"\n", physicalAddress);
1616 				}
1617 
1618 				DEBUG_PAGE_ACCESS_START(page);
1619 
1620 				cache->InsertPage(page, offset);
1621 				increment_page_wired_count(page);
1622 				vm_page_set_state(page, PAGE_STATE_WIRED);
1623 				page->busy = false;
1624 
1625 				DEBUG_PAGE_ACCESS_END(page);
1626 			}
1627 
1628 			map->Unlock();
1629 			break;
1630 		}
1631 
1632 		case B_CONTIGUOUS:
1633 		{
1634 			// We have already allocated our continuous pages run, so we can now
1635 			// just map them in the address space
1636 			VMTranslationMap* map = addressSpace->TranslationMap();
1637 			phys_addr_t physicalAddress
1638 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1639 			addr_t virtualAddress = area->Base();
1640 			off_t offset = 0;
1641 
1642 			map->Lock();
1643 
1644 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1645 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1646 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1647 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1648 				if (page == NULL)
1649 					panic("couldn't lookup physical page just allocated\n");
1650 
1651 				status = map->Map(virtualAddress, physicalAddress, protection,
1652 					area->MemoryType(), &reservation);
1653 				if (status < B_OK)
1654 					panic("couldn't map physical page in page run\n");
1655 
1656 				cache->InsertPage(page, offset);
1657 				increment_page_wired_count(page);
1658 
1659 				DEBUG_PAGE_ACCESS_END(page);
1660 			}
1661 
1662 			map->Unlock();
1663 			break;
1664 		}
1665 
1666 		default:
1667 			break;
1668 	}
1669 
1670 	cache->Unlock();
1671 
1672 	if (reservedPages > 0)
1673 		vm_page_unreserve_pages(&reservation);
1674 
1675 	TRACE(("vm_create_anonymous_area: done\n"));
1676 
1677 	area->cache_type = CACHE_TYPE_RAM;
1678 	return area->id;
1679 
1680 err1:
1681 	if (wiring == B_CONTIGUOUS) {
1682 		// we had reserved the area space upfront...
1683 		phys_addr_t pageNumber = page->physical_page_number;
1684 		int32 i;
1685 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1686 			page = vm_lookup_page(pageNumber);
1687 			if (page == NULL)
1688 				panic("couldn't lookup physical page just allocated\n");
1689 
1690 			vm_page_set_state(page, PAGE_STATE_FREE);
1691 		}
1692 	}
1693 
1694 err0:
1695 	if (reservedPages > 0)
1696 		vm_page_unreserve_pages(&reservation);
1697 	if (reservedMemory > 0)
1698 		vm_unreserve_memory(reservedMemory);
1699 
1700 	return status;
1701 }
1702 
1703 
1704 area_id
1705 vm_map_physical_memory(team_id team, const char* name, void** _address,
1706 	uint32 addressSpec, addr_t size, uint32 protection,
1707 	phys_addr_t physicalAddress, bool alreadyWired)
1708 {
1709 	VMArea* area;
1710 	VMCache* cache;
1711 	addr_t mapOffset;
1712 
1713 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1714 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1715 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1716 		addressSpec, size, protection, physicalAddress));
1717 
1718 	if (!arch_vm_supports_protection(protection))
1719 		return B_NOT_SUPPORTED;
1720 
1721 	AddressSpaceWriteLocker locker(team);
1722 	if (!locker.IsLocked())
1723 		return B_BAD_TEAM_ID;
1724 
1725 	// if the physical address is somewhat inside a page,
1726 	// move the actual area down to align on a page boundary
1727 	mapOffset = physicalAddress % B_PAGE_SIZE;
1728 	size += mapOffset;
1729 	physicalAddress -= mapOffset;
1730 
1731 	size = PAGE_ALIGN(size);
1732 
1733 	// create a device cache
1734 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1735 	if (status != B_OK)
1736 		return status;
1737 
1738 	cache->virtual_end = size;
1739 
1740 	cache->Lock();
1741 
1742 	virtual_address_restrictions addressRestrictions = {};
1743 	addressRestrictions.address = *_address;
1744 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1745 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1746 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1747 		true, &area, _address);
1748 
1749 	if (status < B_OK)
1750 		cache->ReleaseRefLocked();
1751 
1752 	cache->Unlock();
1753 
1754 	if (status == B_OK) {
1755 		// set requested memory type -- use uncached, if not given
1756 		uint32 memoryType = addressSpec & B_MTR_MASK;
1757 		if (memoryType == 0)
1758 			memoryType = B_MTR_UC;
1759 
1760 		area->SetMemoryType(memoryType);
1761 
1762 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1763 		if (status != B_OK)
1764 			delete_area(locker.AddressSpace(), area, false);
1765 	}
1766 
1767 	if (status != B_OK)
1768 		return status;
1769 
1770 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1771 
1772 	if (alreadyWired) {
1773 		// The area is already mapped, but possibly not with the right
1774 		// memory type.
1775 		map->Lock();
1776 		map->ProtectArea(area, area->protection);
1777 		map->Unlock();
1778 	} else {
1779 		// Map the area completely.
1780 
1781 		// reserve pages needed for the mapping
1782 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1783 			area->Base() + (size - 1));
1784 		vm_page_reservation reservation;
1785 		vm_page_reserve_pages(&reservation, reservePages,
1786 			team == VMAddressSpace::KernelID()
1787 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1788 
1789 		map->Lock();
1790 
1791 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1792 			map->Map(area->Base() + offset, physicalAddress + offset,
1793 				protection, area->MemoryType(), &reservation);
1794 		}
1795 
1796 		map->Unlock();
1797 
1798 		vm_page_unreserve_pages(&reservation);
1799 	}
1800 
1801 	// modify the pointer returned to be offset back into the new area
1802 	// the same way the physical address in was offset
1803 	*_address = (void*)((addr_t)*_address + mapOffset);
1804 
1805 	area->cache_type = CACHE_TYPE_DEVICE;
1806 	return area->id;
1807 }
1808 
1809 
1810 /*!	Don't use!
1811 	TODO: This function was introduced to map physical page vecs to
1812 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1813 	use a device cache and does not track vm_page::wired_count!
1814 */
1815 area_id
1816 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1817 	uint32 addressSpec, addr_t* _size, uint32 protection,
1818 	struct generic_io_vec* vecs, uint32 vecCount)
1819 {
1820 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1821 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1822 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1823 		addressSpec, _size, protection, vecs, vecCount));
1824 
1825 	if (!arch_vm_supports_protection(protection)
1826 		|| (addressSpec & B_MTR_MASK) != 0) {
1827 		return B_NOT_SUPPORTED;
1828 	}
1829 
1830 	AddressSpaceWriteLocker locker(team);
1831 	if (!locker.IsLocked())
1832 		return B_BAD_TEAM_ID;
1833 
1834 	if (vecCount == 0)
1835 		return B_BAD_VALUE;
1836 
1837 	addr_t size = 0;
1838 	for (uint32 i = 0; i < vecCount; i++) {
1839 		if (vecs[i].base % B_PAGE_SIZE != 0
1840 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1841 			return B_BAD_VALUE;
1842 		}
1843 
1844 		size += vecs[i].length;
1845 	}
1846 
1847 	// create a device cache
1848 	VMCache* cache;
1849 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1850 	if (result != B_OK)
1851 		return result;
1852 
1853 	cache->virtual_end = size;
1854 
1855 	cache->Lock();
1856 
1857 	VMArea* area;
1858 	virtual_address_restrictions addressRestrictions = {};
1859 	addressRestrictions.address = *_address;
1860 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1861 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1862 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1863 		&addressRestrictions, true, &area, _address);
1864 
1865 	if (result != B_OK)
1866 		cache->ReleaseRefLocked();
1867 
1868 	cache->Unlock();
1869 
1870 	if (result != B_OK)
1871 		return result;
1872 
1873 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1874 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1875 		area->Base() + (size - 1));
1876 
1877 	vm_page_reservation reservation;
1878 	vm_page_reserve_pages(&reservation, reservePages,
1879 			team == VMAddressSpace::KernelID()
1880 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1881 	map->Lock();
1882 
1883 	uint32 vecIndex = 0;
1884 	size_t vecOffset = 0;
1885 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1886 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1887 			vecOffset = 0;
1888 			vecIndex++;
1889 		}
1890 
1891 		if (vecIndex >= vecCount)
1892 			break;
1893 
1894 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1895 			protection, area->MemoryType(), &reservation);
1896 
1897 		vecOffset += B_PAGE_SIZE;
1898 	}
1899 
1900 	map->Unlock();
1901 	vm_page_unreserve_pages(&reservation);
1902 
1903 	if (_size != NULL)
1904 		*_size = size;
1905 
1906 	area->cache_type = CACHE_TYPE_DEVICE;
1907 	return area->id;
1908 }
1909 
1910 
1911 area_id
1912 vm_create_null_area(team_id team, const char* name, void** address,
1913 	uint32 addressSpec, addr_t size, uint32 flags)
1914 {
1915 	size = PAGE_ALIGN(size);
1916 
1917 	// Lock the address space and, if B_EXACT_ADDRESS and
1918 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1919 	// is not wired.
1920 	AddressSpaceWriteLocker locker;
1921 	do {
1922 		if (locker.SetTo(team) != B_OK)
1923 			return B_BAD_TEAM_ID;
1924 	} while (addressSpec == B_EXACT_ADDRESS
1925 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1926 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1927 			(addr_t)*address, size, &locker));
1928 
1929 	// create a null cache
1930 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1931 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1932 	VMCache* cache;
1933 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1934 	if (status != B_OK)
1935 		return status;
1936 
1937 	cache->temporary = 1;
1938 	cache->virtual_end = size;
1939 
1940 	cache->Lock();
1941 
1942 	VMArea* area;
1943 	virtual_address_restrictions addressRestrictions = {};
1944 	addressRestrictions.address = *address;
1945 	addressRestrictions.address_specification = addressSpec;
1946 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1947 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1948 		REGION_NO_PRIVATE_MAP, flags,
1949 		&addressRestrictions, true, &area, address);
1950 
1951 	if (status < B_OK) {
1952 		cache->ReleaseRefAndUnlock();
1953 		return status;
1954 	}
1955 
1956 	cache->Unlock();
1957 
1958 	area->cache_type = CACHE_TYPE_NULL;
1959 	return area->id;
1960 }
1961 
1962 
1963 /*!	Creates the vnode cache for the specified \a vnode.
1964 	The vnode has to be marked busy when calling this function.
1965 */
1966 status_t
1967 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1968 {
1969 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1970 }
1971 
1972 
1973 /*!	\a cache must be locked. The area's address space must be read-locked.
1974 */
1975 static void
1976 pre_map_area_pages(VMArea* area, VMCache* cache,
1977 	vm_page_reservation* reservation)
1978 {
1979 	addr_t baseAddress = area->Base();
1980 	addr_t cacheOffset = area->cache_offset;
1981 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1982 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1983 
1984 	for (VMCachePagesTree::Iterator it
1985 				= cache->pages.GetIterator(firstPage, true, true);
1986 			vm_page* page = it.Next();) {
1987 		if (page->cache_offset >= endPage)
1988 			break;
1989 
1990 		// skip busy and inactive pages
1991 		if (page->busy || page->usage_count == 0)
1992 			continue;
1993 
1994 		DEBUG_PAGE_ACCESS_START(page);
1995 		map_page(area, page,
1996 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1997 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1998 		DEBUG_PAGE_ACCESS_END(page);
1999 	}
2000 }
2001 
2002 
2003 /*!	Will map the file specified by \a fd to an area in memory.
2004 	The file will be mirrored beginning at the specified \a offset. The
2005 	\a offset and \a size arguments have to be page aligned.
2006 */
2007 static area_id
2008 _vm_map_file(team_id team, const char* name, void** _address,
2009 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2010 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2011 {
2012 	// TODO: for binary files, we want to make sure that they get the
2013 	//	copy of a file at a given time, ie. later changes should not
2014 	//	make it into the mapped copy -- this will need quite some changes
2015 	//	to be done in a nice way
2016 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2017 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2018 
2019 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2020 	size = PAGE_ALIGN(size);
2021 
2022 	if (mapping == REGION_NO_PRIVATE_MAP)
2023 		protection |= B_SHARED_AREA;
2024 	if (addressSpec != B_EXACT_ADDRESS)
2025 		unmapAddressRange = false;
2026 
2027 	if (fd < 0) {
2028 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2029 		virtual_address_restrictions virtualRestrictions = {};
2030 		virtualRestrictions.address = *_address;
2031 		virtualRestrictions.address_specification = addressSpec;
2032 		physical_address_restrictions physicalRestrictions = {};
2033 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2034 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2035 			_address);
2036 	}
2037 
2038 	// get the open flags of the FD
2039 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2040 	if (descriptor == NULL)
2041 		return EBADF;
2042 	int32 openMode = descriptor->open_mode;
2043 	put_fd(descriptor);
2044 
2045 	// The FD must open for reading at any rate. For shared mapping with write
2046 	// access, additionally the FD must be open for writing.
2047 	if ((openMode & O_ACCMODE) == O_WRONLY
2048 		|| (mapping == REGION_NO_PRIVATE_MAP
2049 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2050 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2051 		return EACCES;
2052 	}
2053 
2054 	uint32 protectionMax = 0;
2055 	if (mapping != REGION_PRIVATE_MAP) {
2056 		protectionMax = protection | B_READ_AREA;
2057 		if ((openMode & O_ACCMODE) == O_RDWR)
2058 			protectionMax |= B_WRITE_AREA;
2059 	}
2060 
2061 	// get the vnode for the object, this also grabs a ref to it
2062 	struct vnode* vnode = NULL;
2063 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2064 	if (status < B_OK)
2065 		return status;
2066 	VnodePutter vnodePutter(vnode);
2067 
2068 	// If we're going to pre-map pages, we need to reserve the pages needed by
2069 	// the mapping backend upfront.
2070 	page_num_t reservedPreMapPages = 0;
2071 	vm_page_reservation reservation;
2072 	if ((protection & B_READ_AREA) != 0) {
2073 		AddressSpaceWriteLocker locker;
2074 		status = locker.SetTo(team);
2075 		if (status != B_OK)
2076 			return status;
2077 
2078 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2079 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2080 
2081 		locker.Unlock();
2082 
2083 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2084 			team == VMAddressSpace::KernelID()
2085 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2086 	}
2087 
2088 	struct PageUnreserver {
2089 		PageUnreserver(vm_page_reservation* reservation)
2090 			:
2091 			fReservation(reservation)
2092 		{
2093 		}
2094 
2095 		~PageUnreserver()
2096 		{
2097 			if (fReservation != NULL)
2098 				vm_page_unreserve_pages(fReservation);
2099 		}
2100 
2101 		vm_page_reservation* fReservation;
2102 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2103 
2104 	// Lock the address space and, if the specified address range shall be
2105 	// unmapped, ensure it is not wired.
2106 	AddressSpaceWriteLocker locker;
2107 	do {
2108 		if (locker.SetTo(team) != B_OK)
2109 			return B_BAD_TEAM_ID;
2110 	} while (unmapAddressRange
2111 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2112 			(addr_t)*_address, size, &locker));
2113 
2114 	// TODO: this only works for file systems that use the file cache
2115 	VMCache* cache;
2116 	status = vfs_get_vnode_cache(vnode, &cache, false);
2117 	if (status < B_OK)
2118 		return status;
2119 
2120 	cache->Lock();
2121 
2122 	VMArea* area;
2123 	virtual_address_restrictions addressRestrictions = {};
2124 	addressRestrictions.address = *_address;
2125 	addressRestrictions.address_specification = addressSpec;
2126 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2127 		0, protection, protectionMax, mapping,
2128 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2129 		&addressRestrictions, kernel, &area, _address);
2130 
2131 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2132 		// map_backing_store() cannot know we no longer need the ref
2133 		cache->ReleaseRefLocked();
2134 	}
2135 
2136 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2137 		pre_map_area_pages(area, cache, &reservation);
2138 
2139 	cache->Unlock();
2140 
2141 	if (status == B_OK) {
2142 		// TODO: this probably deserves a smarter solution, ie. don't always
2143 		// prefetch stuff, and also, probably don't trigger it at this place.
2144 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2145 			// prefetches at max 10 MB starting from "offset"
2146 	}
2147 
2148 	if (status != B_OK)
2149 		return status;
2150 
2151 	area->cache_type = CACHE_TYPE_VNODE;
2152 	return area->id;
2153 }
2154 
2155 
2156 area_id
2157 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2158 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2159 	int fd, off_t offset)
2160 {
2161 	if (!arch_vm_supports_protection(protection))
2162 		return B_NOT_SUPPORTED;
2163 
2164 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2165 		mapping, unmapAddressRange, fd, offset, true);
2166 }
2167 
2168 
2169 VMCache*
2170 vm_area_get_locked_cache(VMArea* area)
2171 {
2172 	rw_lock_read_lock(&sAreaCacheLock);
2173 
2174 	while (true) {
2175 		VMCache* cache = area->cache;
2176 
2177 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2178 			// cache has been deleted
2179 			rw_lock_read_lock(&sAreaCacheLock);
2180 			continue;
2181 		}
2182 
2183 		rw_lock_read_lock(&sAreaCacheLock);
2184 
2185 		if (cache == area->cache) {
2186 			cache->AcquireRefLocked();
2187 			rw_lock_read_unlock(&sAreaCacheLock);
2188 			return cache;
2189 		}
2190 
2191 		// the cache changed in the meantime
2192 		cache->Unlock();
2193 	}
2194 }
2195 
2196 
2197 void
2198 vm_area_put_locked_cache(VMCache* cache)
2199 {
2200 	cache->ReleaseRefAndUnlock();
2201 }
2202 
2203 
2204 area_id
2205 vm_clone_area(team_id team, const char* name, void** address,
2206 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2207 	bool kernel)
2208 {
2209 	VMArea* newArea = NULL;
2210 	VMArea* sourceArea;
2211 
2212 	// Check whether the source area exists and is cloneable. If so, mark it
2213 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2214 	{
2215 		AddressSpaceWriteLocker locker;
2216 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2217 		if (status != B_OK)
2218 			return status;
2219 
2220 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2221 			return B_NOT_ALLOWED;
2222 
2223 		sourceArea->protection |= B_SHARED_AREA;
2224 		protection |= B_SHARED_AREA;
2225 	}
2226 
2227 	// Now lock both address spaces and actually do the cloning.
2228 
2229 	MultiAddressSpaceLocker locker;
2230 	VMAddressSpace* sourceAddressSpace;
2231 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2232 	if (status != B_OK)
2233 		return status;
2234 
2235 	VMAddressSpace* targetAddressSpace;
2236 	status = locker.AddTeam(team, true, &targetAddressSpace);
2237 	if (status != B_OK)
2238 		return status;
2239 
2240 	status = locker.Lock();
2241 	if (status != B_OK)
2242 		return status;
2243 
2244 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2245 	if (sourceArea == NULL)
2246 		return B_BAD_VALUE;
2247 
2248 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2249 		return B_NOT_ALLOWED;
2250 
2251 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2252 
2253 	if (!kernel && sourceAddressSpace != targetAddressSpace
2254 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2255 #if KDEBUG
2256 		Team* team = thread_get_current_thread()->team;
2257 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2258 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2259 #endif
2260 		status = B_NOT_ALLOWED;
2261 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2262 		status = B_NOT_ALLOWED;
2263 	} else {
2264 		virtual_address_restrictions addressRestrictions = {};
2265 		addressRestrictions.address = *address;
2266 		addressRestrictions.address_specification = addressSpec;
2267 		status = map_backing_store(targetAddressSpace, cache,
2268 			sourceArea->cache_offset, name, sourceArea->Size(),
2269 			sourceArea->wiring, protection, sourceArea->protection_max,
2270 			mapping, 0, &addressRestrictions,
2271 			kernel, &newArea, address);
2272 	}
2273 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2274 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2275 		// to create a new cache, and has therefore already acquired a reference
2276 		// to the source cache - but otherwise it has no idea that we need
2277 		// one.
2278 		cache->AcquireRefLocked();
2279 	}
2280 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2281 		// we need to map in everything at this point
2282 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2283 			// we don't have actual pages to map but a physical area
2284 			VMTranslationMap* map
2285 				= sourceArea->address_space->TranslationMap();
2286 			map->Lock();
2287 
2288 			phys_addr_t physicalAddress;
2289 			uint32 oldProtection;
2290 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2291 
2292 			map->Unlock();
2293 
2294 			map = targetAddressSpace->TranslationMap();
2295 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2296 				newArea->Base() + (newArea->Size() - 1));
2297 
2298 			vm_page_reservation reservation;
2299 			vm_page_reserve_pages(&reservation, reservePages,
2300 				targetAddressSpace == VMAddressSpace::Kernel()
2301 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2302 			map->Lock();
2303 
2304 			for (addr_t offset = 0; offset < newArea->Size();
2305 					offset += B_PAGE_SIZE) {
2306 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2307 					protection, newArea->MemoryType(), &reservation);
2308 			}
2309 
2310 			map->Unlock();
2311 			vm_page_unreserve_pages(&reservation);
2312 		} else {
2313 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2314 			size_t reservePages = map->MaxPagesNeededToMap(
2315 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2316 			vm_page_reservation reservation;
2317 			vm_page_reserve_pages(&reservation, reservePages,
2318 				targetAddressSpace == VMAddressSpace::Kernel()
2319 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2320 
2321 			// map in all pages from source
2322 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2323 					vm_page* page  = it.Next();) {
2324 				if (!page->busy) {
2325 					DEBUG_PAGE_ACCESS_START(page);
2326 					map_page(newArea, page,
2327 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2328 							- newArea->cache_offset),
2329 						protection, &reservation);
2330 					DEBUG_PAGE_ACCESS_END(page);
2331 				}
2332 			}
2333 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2334 			// ensuring that!
2335 
2336 			vm_page_unreserve_pages(&reservation);
2337 		}
2338 	}
2339 	if (status == B_OK)
2340 		newArea->cache_type = sourceArea->cache_type;
2341 
2342 	vm_area_put_locked_cache(cache);
2343 
2344 	if (status < B_OK)
2345 		return status;
2346 
2347 	return newArea->id;
2348 }
2349 
2350 
2351 /*!	Deletes the specified area of the given address space.
2352 
2353 	The address space must be write-locked.
2354 	The caller must ensure that the area does not have any wired ranges.
2355 
2356 	\param addressSpace The address space containing the area.
2357 	\param area The area to be deleted.
2358 	\param deletingAddressSpace \c true, if the address space is in the process
2359 		of being deleted.
2360 */
2361 static void
2362 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2363 	bool deletingAddressSpace)
2364 {
2365 	ASSERT(!area->IsWired());
2366 
2367 	VMAreaHash::Remove(area);
2368 
2369 	// At this point the area is removed from the global hash table, but
2370 	// still exists in the area list.
2371 
2372 	// Unmap the virtual address space the area occupied.
2373 	{
2374 		// We need to lock the complete cache chain.
2375 		VMCache* topCache = vm_area_get_locked_cache(area);
2376 		VMCacheChainLocker cacheChainLocker(topCache);
2377 		cacheChainLocker.LockAllSourceCaches();
2378 
2379 		// If the area's top cache is a temporary cache and the area is the only
2380 		// one referencing it (besides us currently holding a second reference),
2381 		// the unmapping code doesn't need to care about preserving the accessed
2382 		// and dirty flags of the top cache page mappings.
2383 		bool ignoreTopCachePageFlags
2384 			= topCache->temporary && topCache->RefCount() == 2;
2385 
2386 		area->address_space->TranslationMap()->UnmapArea(area,
2387 			deletingAddressSpace, ignoreTopCachePageFlags);
2388 	}
2389 
2390 	if (!area->cache->temporary)
2391 		area->cache->WriteModified();
2392 
2393 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2394 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2395 
2396 	arch_vm_unset_memory_type(area);
2397 	addressSpace->RemoveArea(area, allocationFlags);
2398 	addressSpace->Put();
2399 
2400 	area->cache->RemoveArea(area);
2401 	area->cache->ReleaseRef();
2402 
2403 	addressSpace->DeleteArea(area, allocationFlags);
2404 }
2405 
2406 
2407 status_t
2408 vm_delete_area(team_id team, area_id id, bool kernel)
2409 {
2410 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2411 		team, id));
2412 
2413 	// lock the address space and make sure the area isn't wired
2414 	AddressSpaceWriteLocker locker;
2415 	VMArea* area;
2416 	AreaCacheLocker cacheLocker;
2417 
2418 	do {
2419 		status_t status = locker.SetFromArea(team, id, area);
2420 		if (status != B_OK)
2421 			return status;
2422 
2423 		cacheLocker.SetTo(area);
2424 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2425 
2426 	cacheLocker.Unlock();
2427 
2428 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2429 		return B_NOT_ALLOWED;
2430 
2431 	delete_area(locker.AddressSpace(), area, false);
2432 	return B_OK;
2433 }
2434 
2435 
2436 /*!	Creates a new cache on top of given cache, moves all areas from
2437 	the old cache to the new one, and changes the protection of all affected
2438 	areas' pages to read-only. If requested, wired pages are moved up to the
2439 	new cache and copies are added to the old cache in their place.
2440 	Preconditions:
2441 	- The given cache must be locked.
2442 	- All of the cache's areas' address spaces must be read locked.
2443 	- Either the cache must not have any wired ranges or a page reservation for
2444 	  all wired pages must be provided, so they can be copied.
2445 
2446 	\param lowerCache The cache on top of which a new cache shall be created.
2447 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2448 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2449 		has wired page. The wired pages are copied in this case.
2450 */
2451 static status_t
2452 vm_copy_on_write_area(VMCache* lowerCache,
2453 	vm_page_reservation* wiredPagesReservation)
2454 {
2455 	VMCache* upperCache;
2456 
2457 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2458 
2459 	// We need to separate the cache from its areas. The cache goes one level
2460 	// deeper and we create a new cache inbetween.
2461 
2462 	// create an anonymous cache
2463 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2464 		lowerCache->GuardSize() / B_PAGE_SIZE,
2465 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2466 		VM_PRIORITY_USER);
2467 	if (status != B_OK)
2468 		return status;
2469 
2470 	upperCache->Lock();
2471 
2472 	upperCache->temporary = 1;
2473 	upperCache->virtual_base = lowerCache->virtual_base;
2474 	upperCache->virtual_end = lowerCache->virtual_end;
2475 
2476 	// transfer the lower cache areas to the upper cache
2477 	rw_lock_write_lock(&sAreaCacheLock);
2478 	upperCache->TransferAreas(lowerCache);
2479 	rw_lock_write_unlock(&sAreaCacheLock);
2480 
2481 	lowerCache->AddConsumer(upperCache);
2482 
2483 	// We now need to remap all pages from all of the cache's areas read-only,
2484 	// so that a copy will be created on next write access. If there are wired
2485 	// pages, we keep their protection, move them to the upper cache and create
2486 	// copies for the lower cache.
2487 	if (wiredPagesReservation != NULL) {
2488 		// We need to handle wired pages -- iterate through the cache's pages.
2489 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2490 				vm_page* page = it.Next();) {
2491 			if (page->WiredCount() > 0) {
2492 				// allocate a new page and copy the wired one
2493 				vm_page* copiedPage = vm_page_allocate_page(
2494 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2495 
2496 				vm_memcpy_physical_page(
2497 					copiedPage->physical_page_number * B_PAGE_SIZE,
2498 					page->physical_page_number * B_PAGE_SIZE);
2499 
2500 				// move the wired page to the upper cache (note: removing is OK
2501 				// with the SplayTree iterator) and insert the copy
2502 				upperCache->MovePage(page);
2503 				lowerCache->InsertPage(copiedPage,
2504 					page->cache_offset * B_PAGE_SIZE);
2505 
2506 				DEBUG_PAGE_ACCESS_END(copiedPage);
2507 			} else {
2508 				// Change the protection of this page in all areas.
2509 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2510 						tempArea = tempArea->cache_next) {
2511 					// The area must be readable in the same way it was
2512 					// previously writable.
2513 					uint32 protection = B_KERNEL_READ_AREA;
2514 					if ((tempArea->protection & B_READ_AREA) != 0)
2515 						protection |= B_READ_AREA;
2516 
2517 					VMTranslationMap* map
2518 						= tempArea->address_space->TranslationMap();
2519 					map->Lock();
2520 					map->ProtectPage(tempArea,
2521 						virtual_page_address(tempArea, page), protection);
2522 					map->Unlock();
2523 				}
2524 			}
2525 		}
2526 	} else {
2527 		ASSERT(lowerCache->WiredPagesCount() == 0);
2528 
2529 		// just change the protection of all areas
2530 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2531 				tempArea = tempArea->cache_next) {
2532 			// The area must be readable in the same way it was previously
2533 			// writable.
2534 			uint32 protection = B_KERNEL_READ_AREA;
2535 			if ((tempArea->protection & B_READ_AREA) != 0)
2536 				protection |= B_READ_AREA;
2537 
2538 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2539 			map->Lock();
2540 			map->ProtectArea(tempArea, protection);
2541 			map->Unlock();
2542 		}
2543 	}
2544 
2545 	vm_area_put_locked_cache(upperCache);
2546 
2547 	return B_OK;
2548 }
2549 
2550 
2551 area_id
2552 vm_copy_area(team_id team, const char* name, void** _address,
2553 	uint32 addressSpec, area_id sourceID)
2554 {
2555 	// Do the locking: target address space, all address spaces associated with
2556 	// the source cache, and the cache itself.
2557 	MultiAddressSpaceLocker locker;
2558 	VMAddressSpace* targetAddressSpace;
2559 	VMCache* cache;
2560 	VMArea* source;
2561 	AreaCacheLocker cacheLocker;
2562 	status_t status;
2563 	bool sharedArea;
2564 
2565 	page_num_t wiredPages = 0;
2566 	vm_page_reservation wiredPagesReservation;
2567 
2568 	bool restart;
2569 	do {
2570 		restart = false;
2571 
2572 		locker.Unset();
2573 		status = locker.AddTeam(team, true, &targetAddressSpace);
2574 		if (status == B_OK) {
2575 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2576 				&cache);
2577 		}
2578 		if (status != B_OK)
2579 			return status;
2580 
2581 		cacheLocker.SetTo(cache, true);	// already locked
2582 
2583 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2584 
2585 		page_num_t oldWiredPages = wiredPages;
2586 		wiredPages = 0;
2587 
2588 		// If the source area isn't shared, count the number of wired pages in
2589 		// the cache and reserve as many pages.
2590 		if (!sharedArea) {
2591 			wiredPages = cache->WiredPagesCount();
2592 
2593 			if (wiredPages > oldWiredPages) {
2594 				cacheLocker.Unlock();
2595 				locker.Unlock();
2596 
2597 				if (oldWiredPages > 0)
2598 					vm_page_unreserve_pages(&wiredPagesReservation);
2599 
2600 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2601 					VM_PRIORITY_USER);
2602 
2603 				restart = true;
2604 			}
2605 		} else if (oldWiredPages > 0)
2606 			vm_page_unreserve_pages(&wiredPagesReservation);
2607 	} while (restart);
2608 
2609 	// unreserve pages later
2610 	struct PagesUnreserver {
2611 		PagesUnreserver(vm_page_reservation* reservation)
2612 			:
2613 			fReservation(reservation)
2614 		{
2615 		}
2616 
2617 		~PagesUnreserver()
2618 		{
2619 			if (fReservation != NULL)
2620 				vm_page_unreserve_pages(fReservation);
2621 		}
2622 
2623 	private:
2624 		vm_page_reservation*	fReservation;
2625 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2626 
2627 	bool writableCopy
2628 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2629 	uint8* targetPageProtections = NULL;
2630 
2631 	if (source->page_protections != NULL) {
2632 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2633 		targetPageProtections = (uint8*)malloc_etc(bytes,
2634 			(source->address_space == VMAddressSpace::Kernel()
2635 					|| targetAddressSpace == VMAddressSpace::Kernel())
2636 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2637 		if (targetPageProtections == NULL)
2638 			return B_NO_MEMORY;
2639 
2640 		memcpy(targetPageProtections, source->page_protections, bytes);
2641 
2642 		if (!writableCopy) {
2643 			for (size_t i = 0; i < bytes; i++) {
2644 				if ((targetPageProtections[i]
2645 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2646 					writableCopy = true;
2647 					break;
2648 				}
2649 			}
2650 		}
2651 	}
2652 
2653 	if (addressSpec == B_CLONE_ADDRESS) {
2654 		addressSpec = B_EXACT_ADDRESS;
2655 		*_address = (void*)source->Base();
2656 	}
2657 
2658 	// First, create a cache on top of the source area, respectively use the
2659 	// existing one, if this is a shared area.
2660 
2661 	VMArea* target;
2662 	virtual_address_restrictions addressRestrictions = {};
2663 	addressRestrictions.address = *_address;
2664 	addressRestrictions.address_specification = addressSpec;
2665 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2666 		name, source->Size(), source->wiring, source->protection,
2667 		source->protection_max,
2668 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2669 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2670 		&addressRestrictions, true, &target, _address);
2671 	if (status < B_OK) {
2672 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2673 		return status;
2674 	}
2675 
2676 	if (targetPageProtections != NULL)
2677 		target->page_protections = targetPageProtections;
2678 
2679 	if (sharedArea) {
2680 		// The new area uses the old area's cache, but map_backing_store()
2681 		// hasn't acquired a ref. So we have to do that now.
2682 		cache->AcquireRefLocked();
2683 	}
2684 
2685 	// If the source area is writable, we need to move it one layer up as well
2686 
2687 	if (!sharedArea) {
2688 		if (writableCopy) {
2689 			// TODO: do something more useful if this fails!
2690 			if (vm_copy_on_write_area(cache,
2691 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2692 				panic("vm_copy_on_write_area() failed!\n");
2693 			}
2694 		}
2695 	}
2696 
2697 	// we return the ID of the newly created area
2698 	return target->id;
2699 }
2700 
2701 
2702 status_t
2703 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2704 	bool kernel)
2705 {
2706 	fix_protection(&newProtection);
2707 
2708 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2709 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2710 
2711 	if (!arch_vm_supports_protection(newProtection))
2712 		return B_NOT_SUPPORTED;
2713 
2714 	bool becomesWritable
2715 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2716 
2717 	// lock address spaces and cache
2718 	MultiAddressSpaceLocker locker;
2719 	VMCache* cache;
2720 	VMArea* area;
2721 	status_t status;
2722 	AreaCacheLocker cacheLocker;
2723 	bool isWritable;
2724 
2725 	bool restart;
2726 	do {
2727 		restart = false;
2728 
2729 		locker.Unset();
2730 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2731 		if (status != B_OK)
2732 			return status;
2733 
2734 		cacheLocker.SetTo(cache, true);	// already locked
2735 
2736 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2737 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2738 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2739 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2740 				" (%s)\n", team, newProtection, areaID, area->name);
2741 			return B_NOT_ALLOWED;
2742 		}
2743 		if (!kernel && area->protection_max != 0
2744 			&& (newProtection & area->protection_max)
2745 				!= (newProtection & B_USER_PROTECTION)) {
2746 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2747 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2748 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2749 				area->protection_max, areaID, area->name);
2750 			return B_NOT_ALLOWED;
2751 		}
2752 
2753 		if (area->protection == newProtection)
2754 			return B_OK;
2755 
2756 		if (team != VMAddressSpace::KernelID()
2757 			&& area->address_space->ID() != team) {
2758 			// unless you're the kernel, you are only allowed to set
2759 			// the protection of your own areas
2760 			return B_NOT_ALLOWED;
2761 		}
2762 
2763 		isWritable
2764 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2765 
2766 		// Make sure the area (respectively, if we're going to call
2767 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2768 		// wired ranges.
2769 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2770 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2771 					otherArea = otherArea->cache_next) {
2772 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2773 					restart = true;
2774 					break;
2775 				}
2776 			}
2777 		} else {
2778 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2779 				restart = true;
2780 		}
2781 	} while (restart);
2782 
2783 	bool changePageProtection = true;
2784 	bool changeTopCachePagesOnly = false;
2785 
2786 	if (isWritable && !becomesWritable) {
2787 		// writable -> !writable
2788 
2789 		if (cache->source != NULL && cache->temporary) {
2790 			if (cache->CountWritableAreas(area) == 0) {
2791 				// Since this cache now lives from the pages in its source cache,
2792 				// we can change the cache's commitment to take only those pages
2793 				// into account that really are in this cache.
2794 
2795 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2796 					team == VMAddressSpace::KernelID()
2797 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2798 
2799 				// TODO: we may be able to join with our source cache, if
2800 				// count == 0
2801 			}
2802 		}
2803 
2804 		// If only the writability changes, we can just remap the pages of the
2805 		// top cache, since the pages of lower caches are mapped read-only
2806 		// anyway. That's advantageous only, if the number of pages in the cache
2807 		// is significantly smaller than the number of pages in the area,
2808 		// though.
2809 		if (newProtection
2810 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2811 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2812 			changeTopCachePagesOnly = true;
2813 		}
2814 	} else if (!isWritable && becomesWritable) {
2815 		// !writable -> writable
2816 
2817 		if (!cache->consumers.IsEmpty()) {
2818 			// There are consumers -- we have to insert a new cache. Fortunately
2819 			// vm_copy_on_write_area() does everything that's needed.
2820 			changePageProtection = false;
2821 			status = vm_copy_on_write_area(cache, NULL);
2822 		} else {
2823 			// No consumers, so we don't need to insert a new one.
2824 			if (cache->source != NULL && cache->temporary) {
2825 				// the cache's commitment must contain all possible pages
2826 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2827 					team == VMAddressSpace::KernelID()
2828 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2829 			}
2830 
2831 			if (status == B_OK && cache->source != NULL) {
2832 				// There's a source cache, hence we can't just change all pages'
2833 				// protection or we might allow writing into pages belonging to
2834 				// a lower cache.
2835 				changeTopCachePagesOnly = true;
2836 			}
2837 		}
2838 	} else {
2839 		// we don't have anything special to do in all other cases
2840 	}
2841 
2842 	if (status == B_OK) {
2843 		// remap existing pages in this cache
2844 		if (changePageProtection) {
2845 			VMTranslationMap* map = area->address_space->TranslationMap();
2846 			map->Lock();
2847 
2848 			if (changeTopCachePagesOnly) {
2849 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2850 				page_num_t lastPageOffset
2851 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2852 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2853 						vm_page* page = it.Next();) {
2854 					if (page->cache_offset >= firstPageOffset
2855 						&& page->cache_offset <= lastPageOffset) {
2856 						addr_t address = virtual_page_address(area, page);
2857 						map->ProtectPage(area, address, newProtection);
2858 					}
2859 				}
2860 			} else
2861 				map->ProtectArea(area, newProtection);
2862 
2863 			map->Unlock();
2864 		}
2865 
2866 		area->protection = newProtection;
2867 	}
2868 
2869 	return status;
2870 }
2871 
2872 
2873 status_t
2874 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2875 {
2876 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2877 	if (addressSpace == NULL)
2878 		return B_BAD_TEAM_ID;
2879 
2880 	VMTranslationMap* map = addressSpace->TranslationMap();
2881 
2882 	map->Lock();
2883 	uint32 dummyFlags;
2884 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2885 	map->Unlock();
2886 
2887 	addressSpace->Put();
2888 	return status;
2889 }
2890 
2891 
2892 /*!	The page's cache must be locked.
2893 */
2894 bool
2895 vm_test_map_modification(vm_page* page)
2896 {
2897 	if (page->modified)
2898 		return true;
2899 
2900 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2901 	vm_page_mapping* mapping;
2902 	while ((mapping = iterator.Next()) != NULL) {
2903 		VMArea* area = mapping->area;
2904 		VMTranslationMap* map = area->address_space->TranslationMap();
2905 
2906 		phys_addr_t physicalAddress;
2907 		uint32 flags;
2908 		map->Lock();
2909 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2910 		map->Unlock();
2911 
2912 		if ((flags & PAGE_MODIFIED) != 0)
2913 			return true;
2914 	}
2915 
2916 	return false;
2917 }
2918 
2919 
2920 /*!	The page's cache must be locked.
2921 */
2922 void
2923 vm_clear_map_flags(vm_page* page, uint32 flags)
2924 {
2925 	if ((flags & PAGE_ACCESSED) != 0)
2926 		page->accessed = false;
2927 	if ((flags & PAGE_MODIFIED) != 0)
2928 		page->modified = false;
2929 
2930 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2931 	vm_page_mapping* mapping;
2932 	while ((mapping = iterator.Next()) != NULL) {
2933 		VMArea* area = mapping->area;
2934 		VMTranslationMap* map = area->address_space->TranslationMap();
2935 
2936 		map->Lock();
2937 		map->ClearFlags(virtual_page_address(area, page), flags);
2938 		map->Unlock();
2939 	}
2940 }
2941 
2942 
2943 /*!	Removes all mappings from a page.
2944 	After you've called this function, the page is unmapped from memory and
2945 	the page's \c accessed and \c modified flags have been updated according
2946 	to the state of the mappings.
2947 	The page's cache must be locked.
2948 */
2949 void
2950 vm_remove_all_page_mappings(vm_page* page)
2951 {
2952 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2953 		VMArea* area = mapping->area;
2954 		VMTranslationMap* map = area->address_space->TranslationMap();
2955 		addr_t address = virtual_page_address(area, page);
2956 		map->UnmapPage(area, address, false);
2957 	}
2958 }
2959 
2960 
2961 int32
2962 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2963 {
2964 	int32 count = 0;
2965 
2966 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2967 	vm_page_mapping* mapping;
2968 	while ((mapping = iterator.Next()) != NULL) {
2969 		VMArea* area = mapping->area;
2970 		VMTranslationMap* map = area->address_space->TranslationMap();
2971 
2972 		bool modified;
2973 		if (map->ClearAccessedAndModified(area,
2974 				virtual_page_address(area, page), false, modified)) {
2975 			count++;
2976 		}
2977 
2978 		page->modified |= modified;
2979 	}
2980 
2981 
2982 	if (page->accessed) {
2983 		count++;
2984 		page->accessed = false;
2985 	}
2986 
2987 	return count;
2988 }
2989 
2990 
2991 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2992 	mappings.
2993 	The function iterates through the page mappings and removes them until
2994 	encountering one that has been accessed. From then on it will continue to
2995 	iterate, but only clear the accessed flag of the mapping. The page's
2996 	\c modified bit will be updated accordingly, the \c accessed bit will be
2997 	cleared.
2998 	\return The number of mapping accessed bits encountered, including the
2999 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3000 		of the page have been removed.
3001 */
3002 int32
3003 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3004 {
3005 	ASSERT(page->WiredCount() == 0);
3006 
3007 	if (page->accessed)
3008 		return vm_clear_page_mapping_accessed_flags(page);
3009 
3010 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3011 		VMArea* area = mapping->area;
3012 		VMTranslationMap* map = area->address_space->TranslationMap();
3013 		addr_t address = virtual_page_address(area, page);
3014 		bool modified = false;
3015 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3016 			page->accessed = true;
3017 			page->modified |= modified;
3018 			return vm_clear_page_mapping_accessed_flags(page);
3019 		}
3020 		page->modified |= modified;
3021 	}
3022 
3023 	return 0;
3024 }
3025 
3026 
3027 static int
3028 display_mem(int argc, char** argv)
3029 {
3030 	bool physical = false;
3031 	addr_t copyAddress;
3032 	int32 displayWidth;
3033 	int32 itemSize;
3034 	int32 num = -1;
3035 	addr_t address;
3036 	int i = 1, j;
3037 
3038 	if (argc > 1 && argv[1][0] == '-') {
3039 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3040 			physical = true;
3041 			i++;
3042 		} else
3043 			i = 99;
3044 	}
3045 
3046 	if (argc < i + 1 || argc > i + 2) {
3047 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3048 			"\tdl - 8 bytes\n"
3049 			"\tdw - 4 bytes\n"
3050 			"\tds - 2 bytes\n"
3051 			"\tdb - 1 byte\n"
3052 			"\tstring - a whole string\n"
3053 			"  -p or --physical only allows memory from a single page to be "
3054 			"displayed.\n");
3055 		return 0;
3056 	}
3057 
3058 	address = parse_expression(argv[i]);
3059 
3060 	if (argc > i + 1)
3061 		num = parse_expression(argv[i + 1]);
3062 
3063 	// build the format string
3064 	if (strcmp(argv[0], "db") == 0) {
3065 		itemSize = 1;
3066 		displayWidth = 16;
3067 	} else if (strcmp(argv[0], "ds") == 0) {
3068 		itemSize = 2;
3069 		displayWidth = 8;
3070 	} else if (strcmp(argv[0], "dw") == 0) {
3071 		itemSize = 4;
3072 		displayWidth = 4;
3073 	} else if (strcmp(argv[0], "dl") == 0) {
3074 		itemSize = 8;
3075 		displayWidth = 2;
3076 	} else if (strcmp(argv[0], "string") == 0) {
3077 		itemSize = 1;
3078 		displayWidth = -1;
3079 	} else {
3080 		kprintf("display_mem called in an invalid way!\n");
3081 		return 0;
3082 	}
3083 
3084 	if (num <= 0)
3085 		num = displayWidth;
3086 
3087 	void* physicalPageHandle = NULL;
3088 
3089 	if (physical) {
3090 		int32 offset = address & (B_PAGE_SIZE - 1);
3091 		if (num * itemSize + offset > B_PAGE_SIZE) {
3092 			num = (B_PAGE_SIZE - offset) / itemSize;
3093 			kprintf("NOTE: number of bytes has been cut to page size\n");
3094 		}
3095 
3096 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3097 
3098 		if (vm_get_physical_page_debug(address, &copyAddress,
3099 				&physicalPageHandle) != B_OK) {
3100 			kprintf("getting the hardware page failed.");
3101 			return 0;
3102 		}
3103 
3104 		address += offset;
3105 		copyAddress += offset;
3106 	} else
3107 		copyAddress = address;
3108 
3109 	if (!strcmp(argv[0], "string")) {
3110 		kprintf("%p \"", (char*)copyAddress);
3111 
3112 		// string mode
3113 		for (i = 0; true; i++) {
3114 			char c;
3115 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3116 					!= B_OK
3117 				|| c == '\0') {
3118 				break;
3119 			}
3120 
3121 			if (c == '\n')
3122 				kprintf("\\n");
3123 			else if (c == '\t')
3124 				kprintf("\\t");
3125 			else {
3126 				if (!isprint(c))
3127 					c = '.';
3128 
3129 				kprintf("%c", c);
3130 			}
3131 		}
3132 
3133 		kprintf("\"\n");
3134 	} else {
3135 		// number mode
3136 		for (i = 0; i < num; i++) {
3137 			uint64 value;
3138 
3139 			if ((i % displayWidth) == 0) {
3140 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3141 				if (i != 0)
3142 					kprintf("\n");
3143 
3144 				kprintf("[0x%lx]  ", address + i * itemSize);
3145 
3146 				for (j = 0; j < displayed; j++) {
3147 					char c;
3148 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3149 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3150 						displayed = j;
3151 						break;
3152 					}
3153 					if (!isprint(c))
3154 						c = '.';
3155 
3156 					kprintf("%c", c);
3157 				}
3158 				if (num > displayWidth) {
3159 					// make sure the spacing in the last line is correct
3160 					for (j = displayed; j < displayWidth * itemSize; j++)
3161 						kprintf(" ");
3162 				}
3163 				kprintf("  ");
3164 			}
3165 
3166 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3167 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3168 				kprintf("read fault");
3169 				break;
3170 			}
3171 
3172 			switch (itemSize) {
3173 				case 1:
3174 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3175 					break;
3176 				case 2:
3177 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3178 					break;
3179 				case 4:
3180 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3181 					break;
3182 				case 8:
3183 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3184 					break;
3185 			}
3186 		}
3187 
3188 		kprintf("\n");
3189 	}
3190 
3191 	if (physical) {
3192 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3193 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3194 	}
3195 	return 0;
3196 }
3197 
3198 
3199 static void
3200 dump_cache_tree_recursively(VMCache* cache, int level,
3201 	VMCache* highlightCache)
3202 {
3203 	// print this cache
3204 	for (int i = 0; i < level; i++)
3205 		kprintf("  ");
3206 	if (cache == highlightCache)
3207 		kprintf("%p <--\n", cache);
3208 	else
3209 		kprintf("%p\n", cache);
3210 
3211 	// recursively print its consumers
3212 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3213 			VMCache* consumer = it.Next();) {
3214 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3215 	}
3216 }
3217 
3218 
3219 static int
3220 dump_cache_tree(int argc, char** argv)
3221 {
3222 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3223 		kprintf("usage: %s <address>\n", argv[0]);
3224 		return 0;
3225 	}
3226 
3227 	addr_t address = parse_expression(argv[1]);
3228 	if (address == 0)
3229 		return 0;
3230 
3231 	VMCache* cache = (VMCache*)address;
3232 	VMCache* root = cache;
3233 
3234 	// find the root cache (the transitive source)
3235 	while (root->source != NULL)
3236 		root = root->source;
3237 
3238 	dump_cache_tree_recursively(root, 0, cache);
3239 
3240 	return 0;
3241 }
3242 
3243 
3244 const char*
3245 vm_cache_type_to_string(int32 type)
3246 {
3247 	switch (type) {
3248 		case CACHE_TYPE_RAM:
3249 			return "RAM";
3250 		case CACHE_TYPE_DEVICE:
3251 			return "device";
3252 		case CACHE_TYPE_VNODE:
3253 			return "vnode";
3254 		case CACHE_TYPE_NULL:
3255 			return "null";
3256 
3257 		default:
3258 			return "unknown";
3259 	}
3260 }
3261 
3262 
3263 #if DEBUG_CACHE_LIST
3264 
3265 static void
3266 update_cache_info_recursively(VMCache* cache, cache_info& info)
3267 {
3268 	info.page_count += cache->page_count;
3269 	if (cache->type == CACHE_TYPE_RAM)
3270 		info.committed += cache->committed_size;
3271 
3272 	// recurse
3273 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3274 			VMCache* consumer = it.Next();) {
3275 		update_cache_info_recursively(consumer, info);
3276 	}
3277 }
3278 
3279 
3280 static int
3281 cache_info_compare_page_count(const void* _a, const void* _b)
3282 {
3283 	const cache_info* a = (const cache_info*)_a;
3284 	const cache_info* b = (const cache_info*)_b;
3285 	if (a->page_count == b->page_count)
3286 		return 0;
3287 	return a->page_count < b->page_count ? 1 : -1;
3288 }
3289 
3290 
3291 static int
3292 cache_info_compare_committed(const void* _a, const void* _b)
3293 {
3294 	const cache_info* a = (const cache_info*)_a;
3295 	const cache_info* b = (const cache_info*)_b;
3296 	if (a->committed == b->committed)
3297 		return 0;
3298 	return a->committed < b->committed ? 1 : -1;
3299 }
3300 
3301 
3302 static void
3303 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3304 {
3305 	for (int i = 0; i < level; i++)
3306 		kprintf("  ");
3307 
3308 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3309 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3310 		cache->virtual_base, cache->virtual_end, cache->page_count);
3311 
3312 	if (level == 0)
3313 		kprintf("/%lu", info.page_count);
3314 
3315 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3316 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3317 
3318 		if (level == 0)
3319 			kprintf("/%lu", info.committed);
3320 	}
3321 
3322 	// areas
3323 	if (cache->areas != NULL) {
3324 		VMArea* area = cache->areas;
3325 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3326 			area->name, area->address_space->ID());
3327 
3328 		while (area->cache_next != NULL) {
3329 			area = area->cache_next;
3330 			kprintf(", %" B_PRId32, area->id);
3331 		}
3332 	}
3333 
3334 	kputs("\n");
3335 
3336 	// recurse
3337 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3338 			VMCache* consumer = it.Next();) {
3339 		dump_caches_recursively(consumer, info, level + 1);
3340 	}
3341 }
3342 
3343 
3344 static int
3345 dump_caches(int argc, char** argv)
3346 {
3347 	if (sCacheInfoTable == NULL) {
3348 		kprintf("No cache info table!\n");
3349 		return 0;
3350 	}
3351 
3352 	bool sortByPageCount = true;
3353 
3354 	for (int32 i = 1; i < argc; i++) {
3355 		if (strcmp(argv[i], "-c") == 0) {
3356 			sortByPageCount = false;
3357 		} else {
3358 			print_debugger_command_usage(argv[0]);
3359 			return 0;
3360 		}
3361 	}
3362 
3363 	uint32 totalCount = 0;
3364 	uint32 rootCount = 0;
3365 	off_t totalCommitted = 0;
3366 	page_num_t totalPages = 0;
3367 
3368 	VMCache* cache = gDebugCacheList;
3369 	while (cache) {
3370 		totalCount++;
3371 		if (cache->source == NULL) {
3372 			cache_info stackInfo;
3373 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3374 				? sCacheInfoTable[rootCount] : stackInfo;
3375 			rootCount++;
3376 			info.cache = cache;
3377 			info.page_count = 0;
3378 			info.committed = 0;
3379 			update_cache_info_recursively(cache, info);
3380 			totalCommitted += info.committed;
3381 			totalPages += info.page_count;
3382 		}
3383 
3384 		cache = cache->debug_next;
3385 	}
3386 
3387 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3388 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3389 			sortByPageCount
3390 				? &cache_info_compare_page_count
3391 				: &cache_info_compare_committed);
3392 	}
3393 
3394 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3395 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3396 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3397 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3398 			"page count" : "committed size");
3399 
3400 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3401 		for (uint32 i = 0; i < rootCount; i++) {
3402 			cache_info& info = sCacheInfoTable[i];
3403 			dump_caches_recursively(info.cache, info, 0);
3404 		}
3405 	} else
3406 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3407 
3408 	return 0;
3409 }
3410 
3411 #endif	// DEBUG_CACHE_LIST
3412 
3413 
3414 static int
3415 dump_cache(int argc, char** argv)
3416 {
3417 	VMCache* cache;
3418 	bool showPages = false;
3419 	int i = 1;
3420 
3421 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3422 		kprintf("usage: %s [-ps] <address>\n"
3423 			"  if -p is specified, all pages are shown, if -s is used\n"
3424 			"  only the cache info is shown respectively.\n", argv[0]);
3425 		return 0;
3426 	}
3427 	while (argv[i][0] == '-') {
3428 		char* arg = argv[i] + 1;
3429 		while (arg[0]) {
3430 			if (arg[0] == 'p')
3431 				showPages = true;
3432 			arg++;
3433 		}
3434 		i++;
3435 	}
3436 	if (argv[i] == NULL) {
3437 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3438 		return 0;
3439 	}
3440 
3441 	addr_t address = parse_expression(argv[i]);
3442 	if (address == 0)
3443 		return 0;
3444 
3445 	cache = (VMCache*)address;
3446 
3447 	cache->Dump(showPages);
3448 
3449 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3450 
3451 	return 0;
3452 }
3453 
3454 
3455 static void
3456 dump_area_struct(VMArea* area, bool mappings)
3457 {
3458 	kprintf("AREA: %p\n", area);
3459 	kprintf("name:\t\t'%s'\n", area->name);
3460 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3461 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3462 	kprintf("base:\t\t0x%lx\n", area->Base());
3463 	kprintf("size:\t\t0x%lx\n", area->Size());
3464 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3465 	kprintf("page_protection:%p\n", area->page_protections);
3466 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3467 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3468 	kprintf("cache:\t\t%p\n", area->cache);
3469 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3470 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3471 	kprintf("cache_next:\t%p\n", area->cache_next);
3472 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3473 
3474 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3475 	if (mappings) {
3476 		kprintf("page mappings:\n");
3477 		while (iterator.HasNext()) {
3478 			vm_page_mapping* mapping = iterator.Next();
3479 			kprintf("  %p", mapping->page);
3480 		}
3481 		kprintf("\n");
3482 	} else {
3483 		uint32 count = 0;
3484 		while (iterator.Next() != NULL) {
3485 			count++;
3486 		}
3487 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3488 	}
3489 }
3490 
3491 
3492 static int
3493 dump_area(int argc, char** argv)
3494 {
3495 	bool mappings = false;
3496 	bool found = false;
3497 	int32 index = 1;
3498 	VMArea* area;
3499 	addr_t num;
3500 
3501 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3502 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3503 			"All areas matching either id/address/name are listed. You can\n"
3504 			"force to check only a specific item by prefixing the specifier\n"
3505 			"with the id/contains/address/name keywords.\n"
3506 			"-m shows the area's mappings as well.\n");
3507 		return 0;
3508 	}
3509 
3510 	if (!strcmp(argv[1], "-m")) {
3511 		mappings = true;
3512 		index++;
3513 	}
3514 
3515 	int32 mode = 0xf;
3516 	if (!strcmp(argv[index], "id"))
3517 		mode = 1;
3518 	else if (!strcmp(argv[index], "contains"))
3519 		mode = 2;
3520 	else if (!strcmp(argv[index], "name"))
3521 		mode = 4;
3522 	else if (!strcmp(argv[index], "address"))
3523 		mode = 0;
3524 	if (mode != 0xf)
3525 		index++;
3526 
3527 	if (index >= argc) {
3528 		kprintf("No area specifier given.\n");
3529 		return 0;
3530 	}
3531 
3532 	num = parse_expression(argv[index]);
3533 
3534 	if (mode == 0) {
3535 		dump_area_struct((struct VMArea*)num, mappings);
3536 	} else {
3537 		// walk through the area list, looking for the arguments as a name
3538 
3539 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3540 		while ((area = it.Next()) != NULL) {
3541 			if (((mode & 4) != 0
3542 					&& !strcmp(argv[index], area->name))
3543 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3544 					|| (((mode & 2) != 0 && area->Base() <= num
3545 						&& area->Base() + area->Size() > num))))) {
3546 				dump_area_struct(area, mappings);
3547 				found = true;
3548 			}
3549 		}
3550 
3551 		if (!found)
3552 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3553 	}
3554 
3555 	return 0;
3556 }
3557 
3558 
3559 static int
3560 dump_area_list(int argc, char** argv)
3561 {
3562 	VMArea* area;
3563 	const char* name = NULL;
3564 	int32 id = 0;
3565 
3566 	if (argc > 1) {
3567 		id = parse_expression(argv[1]);
3568 		if (id == 0)
3569 			name = argv[1];
3570 	}
3571 
3572 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3573 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3574 		B_PRINTF_POINTER_WIDTH, "size");
3575 
3576 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3577 	while ((area = it.Next()) != NULL) {
3578 		if ((id != 0 && area->address_space->ID() != id)
3579 			|| (name != NULL && strstr(area->name, name) == NULL))
3580 			continue;
3581 
3582 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3583 			area->id, (void*)area->Base(), (void*)area->Size(),
3584 			area->protection, area->wiring, area->name);
3585 	}
3586 	return 0;
3587 }
3588 
3589 
3590 static int
3591 dump_available_memory(int argc, char** argv)
3592 {
3593 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3594 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3595 	return 0;
3596 }
3597 
3598 
3599 static int
3600 dump_mapping_info(int argc, char** argv)
3601 {
3602 	bool reverseLookup = false;
3603 	bool pageLookup = false;
3604 
3605 	int argi = 1;
3606 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3607 		const char* arg = argv[argi];
3608 		if (strcmp(arg, "-r") == 0) {
3609 			reverseLookup = true;
3610 		} else if (strcmp(arg, "-p") == 0) {
3611 			reverseLookup = true;
3612 			pageLookup = true;
3613 		} else {
3614 			print_debugger_command_usage(argv[0]);
3615 			return 0;
3616 		}
3617 	}
3618 
3619 	// We need at least one argument, the address. Optionally a thread ID can be
3620 	// specified.
3621 	if (argi >= argc || argi + 2 < argc) {
3622 		print_debugger_command_usage(argv[0]);
3623 		return 0;
3624 	}
3625 
3626 	uint64 addressValue;
3627 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3628 		return 0;
3629 
3630 	Team* team = NULL;
3631 	if (argi < argc) {
3632 		uint64 threadID;
3633 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3634 			return 0;
3635 
3636 		Thread* thread = Thread::GetDebug(threadID);
3637 		if (thread == NULL) {
3638 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3639 			return 0;
3640 		}
3641 
3642 		team = thread->team;
3643 	}
3644 
3645 	if (reverseLookup) {
3646 		phys_addr_t physicalAddress;
3647 		if (pageLookup) {
3648 			vm_page* page = (vm_page*)(addr_t)addressValue;
3649 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3650 		} else {
3651 			physicalAddress = (phys_addr_t)addressValue;
3652 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3653 		}
3654 
3655 		kprintf("    Team     Virtual Address      Area\n");
3656 		kprintf("--------------------------------------\n");
3657 
3658 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3659 			Callback()
3660 				:
3661 				fAddressSpace(NULL)
3662 			{
3663 			}
3664 
3665 			void SetAddressSpace(VMAddressSpace* addressSpace)
3666 			{
3667 				fAddressSpace = addressSpace;
3668 			}
3669 
3670 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3671 			{
3672 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3673 					virtualAddress);
3674 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3675 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3676 				else
3677 					kprintf("\n");
3678 				return false;
3679 			}
3680 
3681 		private:
3682 			VMAddressSpace*	fAddressSpace;
3683 		} callback;
3684 
3685 		if (team != NULL) {
3686 			// team specified -- get its address space
3687 			VMAddressSpace* addressSpace = team->address_space;
3688 			if (addressSpace == NULL) {
3689 				kprintf("Failed to get address space!\n");
3690 				return 0;
3691 			}
3692 
3693 			callback.SetAddressSpace(addressSpace);
3694 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3695 				physicalAddress, callback);
3696 		} else {
3697 			// no team specified -- iterate through all address spaces
3698 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3699 				addressSpace != NULL;
3700 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3701 				callback.SetAddressSpace(addressSpace);
3702 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3703 					physicalAddress, callback);
3704 			}
3705 		}
3706 	} else {
3707 		// get the address space
3708 		addr_t virtualAddress = (addr_t)addressValue;
3709 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3710 		VMAddressSpace* addressSpace;
3711 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3712 			addressSpace = VMAddressSpace::Kernel();
3713 		} else if (team != NULL) {
3714 			addressSpace = team->address_space;
3715 		} else {
3716 			Thread* thread = debug_get_debugged_thread();
3717 			if (thread == NULL || thread->team == NULL) {
3718 				kprintf("Failed to get team!\n");
3719 				return 0;
3720 			}
3721 
3722 			addressSpace = thread->team->address_space;
3723 		}
3724 
3725 		if (addressSpace == NULL) {
3726 			kprintf("Failed to get address space!\n");
3727 			return 0;
3728 		}
3729 
3730 		// let the translation map implementation do the job
3731 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3732 	}
3733 
3734 	return 0;
3735 }
3736 
3737 
3738 /*!	Deletes all areas and reserved regions in the given address space.
3739 
3740 	The caller must ensure that none of the areas has any wired ranges.
3741 
3742 	\param addressSpace The address space.
3743 	\param deletingAddressSpace \c true, if the address space is in the process
3744 		of being deleted.
3745 */
3746 void
3747 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3748 {
3749 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3750 		addressSpace->ID()));
3751 
3752 	addressSpace->WriteLock();
3753 
3754 	// remove all reserved areas in this address space
3755 	addressSpace->UnreserveAllAddressRanges(0);
3756 
3757 	// delete all the areas in this address space
3758 	while (VMArea* area = addressSpace->FirstArea()) {
3759 		ASSERT(!area->IsWired());
3760 		delete_area(addressSpace, area, deletingAddressSpace);
3761 	}
3762 
3763 	addressSpace->WriteUnlock();
3764 }
3765 
3766 
3767 static area_id
3768 vm_area_for(addr_t address, bool kernel)
3769 {
3770 	team_id team;
3771 	if (IS_USER_ADDRESS(address)) {
3772 		// we try the user team address space, if any
3773 		team = VMAddressSpace::CurrentID();
3774 		if (team < 0)
3775 			return team;
3776 	} else
3777 		team = VMAddressSpace::KernelID();
3778 
3779 	AddressSpaceReadLocker locker(team);
3780 	if (!locker.IsLocked())
3781 		return B_BAD_TEAM_ID;
3782 
3783 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3784 	if (area != NULL) {
3785 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3786 			return B_ERROR;
3787 
3788 		return area->id;
3789 	}
3790 
3791 	return B_ERROR;
3792 }
3793 
3794 
3795 /*!	Frees physical pages that were used during the boot process.
3796 	\a end is inclusive.
3797 */
3798 static void
3799 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3800 {
3801 	// free all physical pages in the specified range
3802 
3803 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3804 		phys_addr_t physicalAddress;
3805 		uint32 flags;
3806 
3807 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3808 			&& (flags & PAGE_PRESENT) != 0) {
3809 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3810 			if (page != NULL && page->State() != PAGE_STATE_FREE
3811 					&& page->State() != PAGE_STATE_CLEAR
3812 					&& page->State() != PAGE_STATE_UNUSED) {
3813 				DEBUG_PAGE_ACCESS_START(page);
3814 				vm_page_set_state(page, PAGE_STATE_FREE);
3815 			}
3816 		}
3817 	}
3818 
3819 	// unmap the memory
3820 	map->Unmap(start, end);
3821 }
3822 
3823 
3824 void
3825 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3826 {
3827 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3828 	addr_t end = start + (size - 1);
3829 	addr_t lastEnd = start;
3830 
3831 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3832 		(void*)start, (void*)end));
3833 
3834 	// The areas are sorted in virtual address space order, so
3835 	// we just have to find the holes between them that fall
3836 	// into the area we should dispose
3837 
3838 	map->Lock();
3839 
3840 	for (VMAddressSpace::AreaIterator it
3841 				= VMAddressSpace::Kernel()->GetAreaIterator();
3842 			VMArea* area = it.Next();) {
3843 		addr_t areaStart = area->Base();
3844 		addr_t areaEnd = areaStart + (area->Size() - 1);
3845 
3846 		if (areaEnd < start)
3847 			continue;
3848 
3849 		if (areaStart > end) {
3850 			// we are done, the area is already beyond of what we have to free
3851 			break;
3852 		}
3853 
3854 		if (areaStart > lastEnd) {
3855 			// this is something we can free
3856 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3857 				(void*)areaStart));
3858 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3859 		}
3860 
3861 		if (areaEnd >= end) {
3862 			lastEnd = areaEnd;
3863 				// no +1 to prevent potential overflow
3864 			break;
3865 		}
3866 
3867 		lastEnd = areaEnd + 1;
3868 	}
3869 
3870 	if (lastEnd < end) {
3871 		// we can also get rid of some space at the end of the area
3872 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3873 			(void*)end));
3874 		unmap_and_free_physical_pages(map, lastEnd, end);
3875 	}
3876 
3877 	map->Unlock();
3878 }
3879 
3880 
3881 static void
3882 create_preloaded_image_areas(struct preloaded_image* _image)
3883 {
3884 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3885 	char name[B_OS_NAME_LENGTH];
3886 	void* address;
3887 	int32 length;
3888 
3889 	// use file name to create a good area name
3890 	char* fileName = strrchr(image->name, '/');
3891 	if (fileName == NULL)
3892 		fileName = image->name;
3893 	else
3894 		fileName++;
3895 
3896 	length = strlen(fileName);
3897 	// make sure there is enough space for the suffix
3898 	if (length > 25)
3899 		length = 25;
3900 
3901 	memcpy(name, fileName, length);
3902 	strcpy(name + length, "_text");
3903 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3904 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3905 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3906 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3907 		// this will later be remapped read-only/executable by the
3908 		// ELF initialization code
3909 
3910 	strcpy(name + length, "_data");
3911 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3912 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3913 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3914 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3915 }
3916 
3917 
3918 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3919 	Any boot loader resources contained in that arguments must not be accessed
3920 	anymore past this point.
3921 */
3922 void
3923 vm_free_kernel_args(kernel_args* args)
3924 {
3925 	uint32 i;
3926 
3927 	TRACE(("vm_free_kernel_args()\n"));
3928 
3929 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3930 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3931 		if (area >= B_OK)
3932 			delete_area(area);
3933 	}
3934 }
3935 
3936 
3937 static void
3938 allocate_kernel_args(kernel_args* args)
3939 {
3940 	TRACE(("allocate_kernel_args()\n"));
3941 
3942 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3943 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3944 
3945 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3946 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3947 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3948 	}
3949 }
3950 
3951 
3952 static void
3953 unreserve_boot_loader_ranges(kernel_args* args)
3954 {
3955 	TRACE(("unreserve_boot_loader_ranges()\n"));
3956 
3957 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3958 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3959 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3960 			args->virtual_allocated_range[i].size);
3961 	}
3962 }
3963 
3964 
3965 static void
3966 reserve_boot_loader_ranges(kernel_args* args)
3967 {
3968 	TRACE(("reserve_boot_loader_ranges()\n"));
3969 
3970 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3971 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3972 
3973 		// If the address is no kernel address, we just skip it. The
3974 		// architecture specific code has to deal with it.
3975 		if (!IS_KERNEL_ADDRESS(address)) {
3976 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3977 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3978 			continue;
3979 		}
3980 
3981 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3982 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3983 		if (status < B_OK)
3984 			panic("could not reserve boot loader ranges\n");
3985 	}
3986 }
3987 
3988 
3989 static addr_t
3990 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3991 {
3992 	size = PAGE_ALIGN(size);
3993 
3994 	// find a slot in the virtual allocation addr range
3995 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3996 		// check to see if the space between this one and the last is big enough
3997 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3998 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3999 			+ args->virtual_allocated_range[i - 1].size;
4000 
4001 		addr_t base = alignment > 0
4002 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4003 
4004 		if (base >= KERNEL_BASE && base < rangeStart
4005 				&& rangeStart - base >= size) {
4006 			args->virtual_allocated_range[i - 1].size
4007 				+= base + size - previousRangeEnd;
4008 			return base;
4009 		}
4010 	}
4011 
4012 	// we hadn't found one between allocation ranges. this is ok.
4013 	// see if there's a gap after the last one
4014 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4015 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4016 		+ args->virtual_allocated_range[lastEntryIndex].size;
4017 	addr_t base = alignment > 0
4018 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4019 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4020 		args->virtual_allocated_range[lastEntryIndex].size
4021 			+= base + size - lastRangeEnd;
4022 		return base;
4023 	}
4024 
4025 	// see if there's a gap before the first one
4026 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4027 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4028 		base = rangeStart - size;
4029 		if (alignment > 0)
4030 			base = ROUNDDOWN(base, alignment);
4031 
4032 		if (base >= KERNEL_BASE) {
4033 			args->virtual_allocated_range[0].start = base;
4034 			args->virtual_allocated_range[0].size += rangeStart - base;
4035 			return base;
4036 		}
4037 	}
4038 
4039 	return 0;
4040 }
4041 
4042 
4043 static bool
4044 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4045 {
4046 	// TODO: horrible brute-force method of determining if the page can be
4047 	// allocated
4048 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4049 		if (address >= args->physical_memory_range[i].start
4050 			&& address < args->physical_memory_range[i].start
4051 				+ args->physical_memory_range[i].size)
4052 			return true;
4053 	}
4054 	return false;
4055 }
4056 
4057 
4058 page_num_t
4059 vm_allocate_early_physical_page(kernel_args* args)
4060 {
4061 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4062 		phys_addr_t nextPage;
4063 
4064 		nextPage = args->physical_allocated_range[i].start
4065 			+ args->physical_allocated_range[i].size;
4066 		// see if the page after the next allocated paddr run can be allocated
4067 		if (i + 1 < args->num_physical_allocated_ranges
4068 			&& args->physical_allocated_range[i + 1].size != 0) {
4069 			// see if the next page will collide with the next allocated range
4070 			if (nextPage >= args->physical_allocated_range[i+1].start)
4071 				continue;
4072 		}
4073 		// see if the next physical page fits in the memory block
4074 		if (is_page_in_physical_memory_range(args, nextPage)) {
4075 			// we got one!
4076 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4077 			return nextPage / B_PAGE_SIZE;
4078 		}
4079 	}
4080 
4081 	// Expanding upwards didn't work, try going downwards.
4082 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4083 		phys_addr_t nextPage;
4084 
4085 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4086 		// see if the page after the prev allocated paddr run can be allocated
4087 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4088 			// see if the next page will collide with the next allocated range
4089 			if (nextPage < args->physical_allocated_range[i-1].start
4090 				+ args->physical_allocated_range[i-1].size)
4091 				continue;
4092 		}
4093 		// see if the next physical page fits in the memory block
4094 		if (is_page_in_physical_memory_range(args, nextPage)) {
4095 			// we got one!
4096 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4097 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4098 			return nextPage / B_PAGE_SIZE;
4099 		}
4100 	}
4101 
4102 	return 0;
4103 		// could not allocate a block
4104 }
4105 
4106 
4107 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4108 	allocate some pages before the VM is completely up.
4109 */
4110 addr_t
4111 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4112 	uint32 attributes, addr_t alignment)
4113 {
4114 	if (physicalSize > virtualSize)
4115 		physicalSize = virtualSize;
4116 
4117 	// find the vaddr to allocate at
4118 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4119 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4120 	if (virtualBase == 0) {
4121 		panic("vm_allocate_early: could not allocate virtual address\n");
4122 		return 0;
4123 	}
4124 
4125 	// map the pages
4126 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4127 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4128 		if (physicalAddress == 0)
4129 			panic("error allocating early page!\n");
4130 
4131 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4132 
4133 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4134 			physicalAddress * B_PAGE_SIZE, attributes,
4135 			&vm_allocate_early_physical_page);
4136 	}
4137 
4138 	return virtualBase;
4139 }
4140 
4141 
4142 /*!	The main entrance point to initialize the VM. */
4143 status_t
4144 vm_init(kernel_args* args)
4145 {
4146 	struct preloaded_image* image;
4147 	void* address;
4148 	status_t err = 0;
4149 	uint32 i;
4150 
4151 	TRACE(("vm_init: entry\n"));
4152 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4153 	err = arch_vm_init(args);
4154 
4155 	// initialize some globals
4156 	vm_page_init_num_pages(args);
4157 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4158 
4159 	slab_init(args);
4160 
4161 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4162 	off_t heapSize = INITIAL_HEAP_SIZE;
4163 	// try to accomodate low memory systems
4164 	while (heapSize > sAvailableMemory / 8)
4165 		heapSize /= 2;
4166 	if (heapSize < 1024 * 1024)
4167 		panic("vm_init: go buy some RAM please.");
4168 
4169 	// map in the new heap and initialize it
4170 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4171 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4172 	TRACE(("heap at 0x%lx\n", heapBase));
4173 	heap_init(heapBase, heapSize);
4174 #endif
4175 
4176 	// initialize the free page list and physical page mapper
4177 	vm_page_init(args);
4178 
4179 	// initialize the cache allocators
4180 	vm_cache_init(args);
4181 
4182 	{
4183 		status_t error = VMAreaHash::Init();
4184 		if (error != B_OK)
4185 			panic("vm_init: error initializing area hash table\n");
4186 	}
4187 
4188 	VMAddressSpace::Init();
4189 	reserve_boot_loader_ranges(args);
4190 
4191 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4192 	heap_init_post_area();
4193 #endif
4194 
4195 	// Do any further initialization that the architecture dependant layers may
4196 	// need now
4197 	arch_vm_translation_map_init_post_area(args);
4198 	arch_vm_init_post_area(args);
4199 	vm_page_init_post_area(args);
4200 	slab_init_post_area();
4201 
4202 	// allocate areas to represent stuff that already exists
4203 
4204 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4205 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4206 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4207 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4208 #endif
4209 
4210 	allocate_kernel_args(args);
4211 
4212 	create_preloaded_image_areas(args->kernel_image);
4213 
4214 	// allocate areas for preloaded images
4215 	for (image = args->preloaded_images; image != NULL; image = image->next)
4216 		create_preloaded_image_areas(image);
4217 
4218 	// allocate kernel stacks
4219 	for (i = 0; i < args->num_cpus; i++) {
4220 		char name[64];
4221 
4222 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4223 		address = (void*)args->cpu_kstack[i].start;
4224 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4225 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4226 	}
4227 
4228 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4229 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4230 
4231 #if PARANOID_KERNEL_MALLOC
4232 	vm_block_address_range("uninitialized heap memory",
4233 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4234 #endif
4235 #if PARANOID_KERNEL_FREE
4236 	vm_block_address_range("freed heap memory",
4237 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4238 #endif
4239 
4240 	// create the object cache for the page mappings
4241 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4242 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4243 		NULL, NULL);
4244 	if (gPageMappingsObjectCache == NULL)
4245 		panic("failed to create page mappings object cache");
4246 
4247 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4248 
4249 #if DEBUG_CACHE_LIST
4250 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4251 		virtual_address_restrictions virtualRestrictions = {};
4252 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4253 		physical_address_restrictions physicalRestrictions = {};
4254 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4255 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4256 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4257 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4258 			&physicalRestrictions, (void**)&sCacheInfoTable);
4259 	}
4260 #endif	// DEBUG_CACHE_LIST
4261 
4262 	// add some debugger commands
4263 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4264 	add_debugger_command("area", &dump_area,
4265 		"Dump info about a particular area");
4266 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4267 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4268 #if DEBUG_CACHE_LIST
4269 	if (sCacheInfoTable != NULL) {
4270 		add_debugger_command_etc("caches", &dump_caches,
4271 			"List all VMCache trees",
4272 			"[ \"-c\" ]\n"
4273 			"All cache trees are listed sorted in decreasing order by number "
4274 				"of\n"
4275 			"used pages or, if \"-c\" is specified, by size of committed "
4276 				"memory.\n",
4277 			0);
4278 	}
4279 #endif
4280 	add_debugger_command("avail", &dump_available_memory,
4281 		"Dump available memory");
4282 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4283 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4284 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4285 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4286 	add_debugger_command("string", &display_mem, "dump strings");
4287 
4288 	add_debugger_command_etc("mapping", &dump_mapping_info,
4289 		"Print address mapping information",
4290 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4291 		"Prints low-level page mapping information for a given address. If\n"
4292 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4293 		"address that is looked up in the translation map of the current\n"
4294 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4295 		"\"-r\" is specified, <address> is a physical address that is\n"
4296 		"searched in the translation map of all teams, respectively the team\n"
4297 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4298 		"<address> is the address of a vm_page structure. The behavior is\n"
4299 		"equivalent to specifying \"-r\" with the physical address of that\n"
4300 		"page.\n",
4301 		0);
4302 
4303 	TRACE(("vm_init: exit\n"));
4304 
4305 	vm_cache_init_post_heap();
4306 
4307 	return err;
4308 }
4309 
4310 
4311 status_t
4312 vm_init_post_sem(kernel_args* args)
4313 {
4314 	// This frees all unused boot loader resources and makes its space available
4315 	// again
4316 	arch_vm_init_end(args);
4317 	unreserve_boot_loader_ranges(args);
4318 
4319 	// fill in all of the semaphores that were not allocated before
4320 	// since we're still single threaded and only the kernel address space
4321 	// exists, it isn't that hard to find all of the ones we need to create
4322 
4323 	arch_vm_translation_map_init_post_sem(args);
4324 
4325 	slab_init_post_sem();
4326 
4327 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4328 	heap_init_post_sem();
4329 #endif
4330 
4331 	return B_OK;
4332 }
4333 
4334 
4335 status_t
4336 vm_init_post_thread(kernel_args* args)
4337 {
4338 	vm_page_init_post_thread(args);
4339 	slab_init_post_thread();
4340 	return heap_init_post_thread();
4341 }
4342 
4343 
4344 status_t
4345 vm_init_post_modules(kernel_args* args)
4346 {
4347 	return arch_vm_init_post_modules(args);
4348 }
4349 
4350 
4351 void
4352 permit_page_faults(void)
4353 {
4354 	Thread* thread = thread_get_current_thread();
4355 	if (thread != NULL)
4356 		atomic_add(&thread->page_faults_allowed, 1);
4357 }
4358 
4359 
4360 void
4361 forbid_page_faults(void)
4362 {
4363 	Thread* thread = thread_get_current_thread();
4364 	if (thread != NULL)
4365 		atomic_add(&thread->page_faults_allowed, -1);
4366 }
4367 
4368 
4369 status_t
4370 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4371 	bool isUser, addr_t* newIP)
4372 {
4373 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4374 		faultAddress));
4375 
4376 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4377 
4378 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4379 	VMAddressSpace* addressSpace = NULL;
4380 
4381 	status_t status = B_OK;
4382 	*newIP = 0;
4383 	atomic_add((int32*)&sPageFaults, 1);
4384 
4385 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4386 		addressSpace = VMAddressSpace::GetKernel();
4387 	} else if (IS_USER_ADDRESS(pageAddress)) {
4388 		addressSpace = VMAddressSpace::GetCurrent();
4389 		if (addressSpace == NULL) {
4390 			if (!isUser) {
4391 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4392 					"memory!\n");
4393 				status = B_BAD_ADDRESS;
4394 				TPF(PageFaultError(-1,
4395 					VMPageFaultTracing
4396 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4397 			} else {
4398 				// XXX weird state.
4399 				panic("vm_page_fault: non kernel thread accessing user memory "
4400 					"that doesn't exist!\n");
4401 				status = B_BAD_ADDRESS;
4402 			}
4403 		}
4404 	} else {
4405 		// the hit was probably in the 64k DMZ between kernel and user space
4406 		// this keeps a user space thread from passing a buffer that crosses
4407 		// into kernel space
4408 		status = B_BAD_ADDRESS;
4409 		TPF(PageFaultError(-1,
4410 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4411 	}
4412 
4413 	if (status == B_OK) {
4414 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4415 			isUser, NULL);
4416 	}
4417 
4418 	if (status < B_OK) {
4419 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4420 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4421 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4422 			thread_get_current_thread_id());
4423 		if (!isUser) {
4424 			Thread* thread = thread_get_current_thread();
4425 			if (thread != NULL && thread->fault_handler != 0) {
4426 				// this will cause the arch dependant page fault handler to
4427 				// modify the IP on the interrupt frame or whatever to return
4428 				// to this address
4429 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4430 			} else {
4431 				// unhandled page fault in the kernel
4432 				panic("vm_page_fault: unhandled page fault in kernel space at "
4433 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4434 			}
4435 		} else {
4436 			Thread* thread = thread_get_current_thread();
4437 
4438 #ifdef TRACE_FAULTS
4439 			VMArea* area = NULL;
4440 			if (addressSpace != NULL) {
4441 				addressSpace->ReadLock();
4442 				area = addressSpace->LookupArea(faultAddress);
4443 			}
4444 
4445 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4446 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4447 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4448 				thread->team->Name(), thread->team->id,
4449 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4450 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4451 					area->Base() : 0x0));
4452 
4453 			if (addressSpace != NULL)
4454 				addressSpace->ReadUnlock();
4455 #endif
4456 
4457 			// If the thread has a signal handler for SIGSEGV, we simply
4458 			// send it the signal. Otherwise we notify the user debugger
4459 			// first.
4460 			struct sigaction action;
4461 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4462 					&& action.sa_handler != SIG_DFL
4463 					&& action.sa_handler != SIG_IGN)
4464 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4465 					SIGSEGV)) {
4466 				Signal signal(SIGSEGV,
4467 					status == B_PERMISSION_DENIED
4468 						? SEGV_ACCERR : SEGV_MAPERR,
4469 					EFAULT, thread->team->id);
4470 				signal.SetAddress((void*)address);
4471 				send_signal_to_thread(thread, signal, 0);
4472 			}
4473 		}
4474 	}
4475 
4476 	if (addressSpace != NULL)
4477 		addressSpace->Put();
4478 
4479 	return B_HANDLED_INTERRUPT;
4480 }
4481 
4482 
4483 struct PageFaultContext {
4484 	AddressSpaceReadLocker	addressSpaceLocker;
4485 	VMCacheChainLocker		cacheChainLocker;
4486 
4487 	VMTranslationMap*		map;
4488 	VMCache*				topCache;
4489 	off_t					cacheOffset;
4490 	vm_page_reservation		reservation;
4491 	bool					isWrite;
4492 
4493 	// return values
4494 	vm_page*				page;
4495 	bool					restart;
4496 	bool					pageAllocated;
4497 
4498 
4499 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4500 		:
4501 		addressSpaceLocker(addressSpace, true),
4502 		map(addressSpace->TranslationMap()),
4503 		isWrite(isWrite)
4504 	{
4505 	}
4506 
4507 	~PageFaultContext()
4508 	{
4509 		UnlockAll();
4510 		vm_page_unreserve_pages(&reservation);
4511 	}
4512 
4513 	void Prepare(VMCache* topCache, off_t cacheOffset)
4514 	{
4515 		this->topCache = topCache;
4516 		this->cacheOffset = cacheOffset;
4517 		page = NULL;
4518 		restart = false;
4519 		pageAllocated = false;
4520 
4521 		cacheChainLocker.SetTo(topCache);
4522 	}
4523 
4524 	void UnlockAll(VMCache* exceptCache = NULL)
4525 	{
4526 		topCache = NULL;
4527 		addressSpaceLocker.Unlock();
4528 		cacheChainLocker.Unlock(exceptCache);
4529 	}
4530 };
4531 
4532 
4533 /*!	Gets the page that should be mapped into the area.
4534 	Returns an error code other than \c B_OK, if the page couldn't be found or
4535 	paged in. The locking state of the address space and the caches is undefined
4536 	in that case.
4537 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4538 	had to unlock the address space and all caches and is supposed to be called
4539 	again.
4540 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4541 	found. It is returned in \c context.page. The address space will still be
4542 	locked as well as all caches starting from the top cache to at least the
4543 	cache the page lives in.
4544 */
4545 static status_t
4546 fault_get_page(PageFaultContext& context)
4547 {
4548 	VMCache* cache = context.topCache;
4549 	VMCache* lastCache = NULL;
4550 	vm_page* page = NULL;
4551 
4552 	while (cache != NULL) {
4553 		// We already hold the lock of the cache at this point.
4554 
4555 		lastCache = cache;
4556 
4557 		page = cache->LookupPage(context.cacheOffset);
4558 		if (page != NULL && page->busy) {
4559 			// page must be busy -- wait for it to become unbusy
4560 			context.UnlockAll(cache);
4561 			cache->ReleaseRefLocked();
4562 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4563 
4564 			// restart the whole process
4565 			context.restart = true;
4566 			return B_OK;
4567 		}
4568 
4569 		if (page != NULL)
4570 			break;
4571 
4572 		// The current cache does not contain the page we're looking for.
4573 
4574 		// see if the backing store has it
4575 		if (cache->HasPage(context.cacheOffset)) {
4576 			// insert a fresh page and mark it busy -- we're going to read it in
4577 			page = vm_page_allocate_page(&context.reservation,
4578 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4579 			cache->InsertPage(page, context.cacheOffset);
4580 
4581 			// We need to unlock all caches and the address space while reading
4582 			// the page in. Keep a reference to the cache around.
4583 			cache->AcquireRefLocked();
4584 			context.UnlockAll();
4585 
4586 			// read the page in
4587 			generic_io_vec vec;
4588 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4589 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4590 
4591 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4592 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4593 
4594 			cache->Lock();
4595 
4596 			if (status < B_OK) {
4597 				// on error remove and free the page
4598 				dprintf("reading page from cache %p returned: %s!\n",
4599 					cache, strerror(status));
4600 
4601 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4602 				cache->RemovePage(page);
4603 				vm_page_set_state(page, PAGE_STATE_FREE);
4604 
4605 				cache->ReleaseRefAndUnlock();
4606 				return status;
4607 			}
4608 
4609 			// mark the page unbusy again
4610 			cache->MarkPageUnbusy(page);
4611 
4612 			DEBUG_PAGE_ACCESS_END(page);
4613 
4614 			// Since we needed to unlock everything temporarily, the area
4615 			// situation might have changed. So we need to restart the whole
4616 			// process.
4617 			cache->ReleaseRefAndUnlock();
4618 			context.restart = true;
4619 			return B_OK;
4620 		}
4621 
4622 		cache = context.cacheChainLocker.LockSourceCache();
4623 	}
4624 
4625 	if (page == NULL) {
4626 		// There was no adequate page, determine the cache for a clean one.
4627 		// Read-only pages come in the deepest cache, only the top most cache
4628 		// may have direct write access.
4629 		cache = context.isWrite ? context.topCache : lastCache;
4630 
4631 		// allocate a clean page
4632 		page = vm_page_allocate_page(&context.reservation,
4633 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4634 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4635 			page->physical_page_number));
4636 
4637 		// insert the new page into our cache
4638 		cache->InsertPage(page, context.cacheOffset);
4639 		context.pageAllocated = true;
4640 	} else if (page->Cache() != context.topCache && context.isWrite) {
4641 		// We have a page that has the data we want, but in the wrong cache
4642 		// object so we need to copy it and stick it into the top cache.
4643 		vm_page* sourcePage = page;
4644 
4645 		// TODO: If memory is low, it might be a good idea to steal the page
4646 		// from our source cache -- if possible, that is.
4647 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4648 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4649 
4650 		// To not needlessly kill concurrency we unlock all caches but the top
4651 		// one while copying the page. Lacking another mechanism to ensure that
4652 		// the source page doesn't disappear, we mark it busy.
4653 		sourcePage->busy = true;
4654 		context.cacheChainLocker.UnlockKeepRefs(true);
4655 
4656 		// copy the page
4657 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4658 			sourcePage->physical_page_number * B_PAGE_SIZE);
4659 
4660 		context.cacheChainLocker.RelockCaches(true);
4661 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4662 
4663 		// insert the new page into our cache
4664 		context.topCache->InsertPage(page, context.cacheOffset);
4665 		context.pageAllocated = true;
4666 	} else
4667 		DEBUG_PAGE_ACCESS_START(page);
4668 
4669 	context.page = page;
4670 	return B_OK;
4671 }
4672 
4673 
4674 /*!	Makes sure the address in the given address space is mapped.
4675 
4676 	\param addressSpace The address space.
4677 	\param originalAddress The address. Doesn't need to be page aligned.
4678 	\param isWrite If \c true the address shall be write-accessible.
4679 	\param isUser If \c true the access is requested by a userland team.
4680 	\param wirePage On success, if non \c NULL, the wired count of the page
4681 		mapped at the given address is incremented and the page is returned
4682 		via this parameter.
4683 	\return \c B_OK on success, another error code otherwise.
4684 */
4685 static status_t
4686 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4687 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4688 {
4689 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4690 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4691 		originalAddress, isWrite, isUser));
4692 
4693 	PageFaultContext context(addressSpace, isWrite);
4694 
4695 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4696 	status_t status = B_OK;
4697 
4698 	addressSpace->IncrementFaultCount();
4699 
4700 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4701 	// the pages upfront makes sure we don't have any cache locked, so that the
4702 	// page daemon/thief can do their job without problems.
4703 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4704 		originalAddress);
4705 	context.addressSpaceLocker.Unlock();
4706 	vm_page_reserve_pages(&context.reservation, reservePages,
4707 		addressSpace == VMAddressSpace::Kernel()
4708 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4709 
4710 	while (true) {
4711 		context.addressSpaceLocker.Lock();
4712 
4713 		// get the area the fault was in
4714 		VMArea* area = addressSpace->LookupArea(address);
4715 		if (area == NULL) {
4716 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4717 				"space\n", originalAddress);
4718 			TPF(PageFaultError(-1,
4719 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4720 			status = B_BAD_ADDRESS;
4721 			break;
4722 		}
4723 
4724 		// check permissions
4725 		uint32 protection = get_area_page_protection(area, address);
4726 		if (isUser && (protection & B_USER_PROTECTION) == 0
4727 				&& (area->protection & B_KERNEL_AREA) != 0) {
4728 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4729 				area->id, (void*)originalAddress);
4730 			TPF(PageFaultError(area->id,
4731 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4732 			status = B_PERMISSION_DENIED;
4733 			break;
4734 		}
4735 		if (isWrite && (protection
4736 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4737 			dprintf("write access attempted on write-protected area 0x%"
4738 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4739 			TPF(PageFaultError(area->id,
4740 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4741 			status = B_PERMISSION_DENIED;
4742 			break;
4743 		} else if (isExecute && (protection
4744 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4745 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4746 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4747 			TPF(PageFaultError(area->id,
4748 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4749 			status = B_PERMISSION_DENIED;
4750 			break;
4751 		} else if (!isWrite && !isExecute && (protection
4752 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4753 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4754 				" at %p\n", area->id, (void*)originalAddress);
4755 			TPF(PageFaultError(area->id,
4756 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4757 			status = B_PERMISSION_DENIED;
4758 			break;
4759 		}
4760 
4761 		// We have the area, it was a valid access, so let's try to resolve the
4762 		// page fault now.
4763 		// At first, the top most cache from the area is investigated.
4764 
4765 		context.Prepare(vm_area_get_locked_cache(area),
4766 			address - area->Base() + area->cache_offset);
4767 
4768 		// See if this cache has a fault handler -- this will do all the work
4769 		// for us.
4770 		{
4771 			// Note, since the page fault is resolved with interrupts enabled,
4772 			// the fault handler could be called more than once for the same
4773 			// reason -- the store must take this into account.
4774 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4775 			if (status != B_BAD_HANDLER)
4776 				break;
4777 		}
4778 
4779 		// The top most cache has no fault handler, so let's see if the cache or
4780 		// its sources already have the page we're searching for (we're going
4781 		// from top to bottom).
4782 		status = fault_get_page(context);
4783 		if (status != B_OK) {
4784 			TPF(PageFaultError(area->id, status));
4785 			break;
4786 		}
4787 
4788 		if (context.restart)
4789 			continue;
4790 
4791 		// All went fine, all there is left to do is to map the page into the
4792 		// address space.
4793 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4794 			context.page));
4795 
4796 		// If the page doesn't reside in the area's cache, we need to make sure
4797 		// it's mapped in read-only, so that we cannot overwrite someone else's
4798 		// data (copy-on-write)
4799 		uint32 newProtection = protection;
4800 		if (context.page->Cache() != context.topCache && !isWrite)
4801 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4802 
4803 		bool unmapPage = false;
4804 		bool mapPage = true;
4805 
4806 		// check whether there's already a page mapped at the address
4807 		context.map->Lock();
4808 
4809 		phys_addr_t physicalAddress;
4810 		uint32 flags;
4811 		vm_page* mappedPage = NULL;
4812 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4813 			&& (flags & PAGE_PRESENT) != 0
4814 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4815 				!= NULL) {
4816 			// Yep there's already a page. If it's ours, we can simply adjust
4817 			// its protection. Otherwise we have to unmap it.
4818 			if (mappedPage == context.page) {
4819 				context.map->ProtectPage(area, address, newProtection);
4820 					// Note: We assume that ProtectPage() is atomic (i.e.
4821 					// the page isn't temporarily unmapped), otherwise we'd have
4822 					// to make sure it isn't wired.
4823 				mapPage = false;
4824 			} else
4825 				unmapPage = true;
4826 		}
4827 
4828 		context.map->Unlock();
4829 
4830 		if (unmapPage) {
4831 			// If the page is wired, we can't unmap it. Wait until it is unwired
4832 			// again and restart. Note that the page cannot be wired for
4833 			// writing, since it it isn't in the topmost cache. So we can safely
4834 			// ignore ranges wired for writing (our own and other concurrent
4835 			// wiring attempts in progress) and in fact have to do that to avoid
4836 			// a deadlock.
4837 			VMAreaUnwiredWaiter waiter;
4838 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4839 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4840 				// unlock everything and wait
4841 				if (context.pageAllocated) {
4842 					// ... but since we allocated a page and inserted it into
4843 					// the top cache, remove and free it first. Otherwise we'd
4844 					// have a page from a lower cache mapped while an upper
4845 					// cache has a page that would shadow it.
4846 					context.topCache->RemovePage(context.page);
4847 					vm_page_free_etc(context.topCache, context.page,
4848 						&context.reservation);
4849 				} else
4850 					DEBUG_PAGE_ACCESS_END(context.page);
4851 
4852 				context.UnlockAll();
4853 				waiter.waitEntry.Wait();
4854 				continue;
4855 			}
4856 
4857 			// Note: The mapped page is a page of a lower cache. We are
4858 			// guaranteed to have that cached locked, our new page is a copy of
4859 			// that page, and the page is not busy. The logic for that guarantee
4860 			// is as follows: Since the page is mapped, it must live in the top
4861 			// cache (ruled out above) or any of its lower caches, and there is
4862 			// (was before the new page was inserted) no other page in any
4863 			// cache between the top cache and the page's cache (otherwise that
4864 			// would be mapped instead). That in turn means that our algorithm
4865 			// must have found it and therefore it cannot be busy either.
4866 			DEBUG_PAGE_ACCESS_START(mappedPage);
4867 			unmap_page(area, address);
4868 			DEBUG_PAGE_ACCESS_END(mappedPage);
4869 		}
4870 
4871 		if (mapPage) {
4872 			if (map_page(area, context.page, address, newProtection,
4873 					&context.reservation) != B_OK) {
4874 				// Mapping can only fail, when the page mapping object couldn't
4875 				// be allocated. Save for the missing mapping everything is
4876 				// fine, though. If this was a regular page fault, we'll simply
4877 				// leave and probably fault again. To make sure we'll have more
4878 				// luck then, we ensure that the minimum object reserve is
4879 				// available.
4880 				DEBUG_PAGE_ACCESS_END(context.page);
4881 
4882 				context.UnlockAll();
4883 
4884 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4885 						!= B_OK) {
4886 					// Apparently the situation is serious. Let's get ourselves
4887 					// killed.
4888 					status = B_NO_MEMORY;
4889 				} else if (wirePage != NULL) {
4890 					// The caller expects us to wire the page. Since
4891 					// object_cache_reserve() succeeded, we should now be able
4892 					// to allocate a mapping structure. Restart.
4893 					continue;
4894 				}
4895 
4896 				break;
4897 			}
4898 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4899 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4900 
4901 		// also wire the page, if requested
4902 		if (wirePage != NULL && status == B_OK) {
4903 			increment_page_wired_count(context.page);
4904 			*wirePage = context.page;
4905 		}
4906 
4907 		DEBUG_PAGE_ACCESS_END(context.page);
4908 
4909 		break;
4910 	}
4911 
4912 	return status;
4913 }
4914 
4915 
4916 status_t
4917 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4918 {
4919 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4920 }
4921 
4922 status_t
4923 vm_put_physical_page(addr_t vaddr, void* handle)
4924 {
4925 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4926 }
4927 
4928 
4929 status_t
4930 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4931 	void** _handle)
4932 {
4933 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4934 }
4935 
4936 status_t
4937 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4938 {
4939 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4940 }
4941 
4942 
4943 status_t
4944 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4945 {
4946 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4947 }
4948 
4949 status_t
4950 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4951 {
4952 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4953 }
4954 
4955 
4956 void
4957 vm_get_info(system_info* info)
4958 {
4959 	swap_get_info(info);
4960 
4961 	MutexLocker locker(sAvailableMemoryLock);
4962 	info->needed_memory = sNeededMemory;
4963 	info->free_memory = sAvailableMemory;
4964 }
4965 
4966 
4967 uint32
4968 vm_num_page_faults(void)
4969 {
4970 	return sPageFaults;
4971 }
4972 
4973 
4974 off_t
4975 vm_available_memory(void)
4976 {
4977 	MutexLocker locker(sAvailableMemoryLock);
4978 	return sAvailableMemory;
4979 }
4980 
4981 
4982 off_t
4983 vm_available_not_needed_memory(void)
4984 {
4985 	MutexLocker locker(sAvailableMemoryLock);
4986 	return sAvailableMemory - sNeededMemory;
4987 }
4988 
4989 
4990 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4991 	debugger.
4992 */
4993 off_t
4994 vm_available_not_needed_memory_debug(void)
4995 {
4996 	return sAvailableMemory - sNeededMemory;
4997 }
4998 
4999 
5000 size_t
5001 vm_kernel_address_space_left(void)
5002 {
5003 	return VMAddressSpace::Kernel()->FreeSpace();
5004 }
5005 
5006 
5007 void
5008 vm_unreserve_memory(size_t amount)
5009 {
5010 	mutex_lock(&sAvailableMemoryLock);
5011 
5012 	sAvailableMemory += amount;
5013 
5014 	mutex_unlock(&sAvailableMemoryLock);
5015 }
5016 
5017 
5018 status_t
5019 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5020 {
5021 	size_t reserve = kMemoryReserveForPriority[priority];
5022 
5023 	MutexLocker locker(sAvailableMemoryLock);
5024 
5025 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5026 
5027 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5028 		sAvailableMemory -= amount;
5029 		return B_OK;
5030 	}
5031 
5032 	if (timeout <= 0)
5033 		return B_NO_MEMORY;
5034 
5035 	// turn timeout into an absolute timeout
5036 	timeout += system_time();
5037 
5038 	// loop until we've got the memory or the timeout occurs
5039 	do {
5040 		sNeededMemory += amount;
5041 
5042 		// call the low resource manager
5043 		locker.Unlock();
5044 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5045 			B_ABSOLUTE_TIMEOUT, timeout);
5046 		locker.Lock();
5047 
5048 		sNeededMemory -= amount;
5049 
5050 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5051 			sAvailableMemory -= amount;
5052 			return B_OK;
5053 		}
5054 	} while (timeout > system_time());
5055 
5056 	return B_NO_MEMORY;
5057 }
5058 
5059 
5060 status_t
5061 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5062 {
5063 	// NOTE: The caller is responsible for synchronizing calls to this function!
5064 
5065 	AddressSpaceReadLocker locker;
5066 	VMArea* area;
5067 	status_t status = locker.SetFromArea(id, area);
5068 	if (status != B_OK)
5069 		return status;
5070 
5071 	// nothing to do, if the type doesn't change
5072 	uint32 oldType = area->MemoryType();
5073 	if (type == oldType)
5074 		return B_OK;
5075 
5076 	// set the memory type of the area and the mapped pages
5077 	VMTranslationMap* map = area->address_space->TranslationMap();
5078 	map->Lock();
5079 	area->SetMemoryType(type);
5080 	map->ProtectArea(area, area->protection);
5081 	map->Unlock();
5082 
5083 	// set the physical memory type
5084 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5085 	if (error != B_OK) {
5086 		// reset the memory type of the area and the mapped pages
5087 		map->Lock();
5088 		area->SetMemoryType(oldType);
5089 		map->ProtectArea(area, area->protection);
5090 		map->Unlock();
5091 		return error;
5092 	}
5093 
5094 	return B_OK;
5095 
5096 }
5097 
5098 
5099 /*!	This function enforces some protection properties:
5100 	 - kernel areas must be W^X (after kernel startup)
5101 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5102 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5103 */
5104 static void
5105 fix_protection(uint32* protection)
5106 {
5107 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5108 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5109 			|| (*protection & B_WRITE_AREA) != 0)
5110 		&& !gKernelStartup)
5111 		panic("kernel areas cannot be both writable and executable!");
5112 
5113 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5114 		if ((*protection & B_WRITE_AREA) != 0)
5115 			*protection |= B_KERNEL_WRITE_AREA;
5116 		if ((*protection & B_READ_AREA) != 0)
5117 			*protection |= B_KERNEL_READ_AREA;
5118 	}
5119 }
5120 
5121 
5122 static void
5123 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5124 {
5125 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5126 	info->area = area->id;
5127 	info->address = (void*)area->Base();
5128 	info->size = area->Size();
5129 	info->protection = area->protection;
5130 	info->lock = area->wiring;
5131 	info->team = area->address_space->ID();
5132 	info->copy_count = 0;
5133 	info->in_count = 0;
5134 	info->out_count = 0;
5135 		// TODO: retrieve real values here!
5136 
5137 	VMCache* cache = vm_area_get_locked_cache(area);
5138 
5139 	// Note, this is a simplification; the cache could be larger than this area
5140 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5141 
5142 	vm_area_put_locked_cache(cache);
5143 }
5144 
5145 
5146 static status_t
5147 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5148 {
5149 	// is newSize a multiple of B_PAGE_SIZE?
5150 	if (newSize & (B_PAGE_SIZE - 1))
5151 		return B_BAD_VALUE;
5152 
5153 	// lock all affected address spaces and the cache
5154 	VMArea* area;
5155 	VMCache* cache;
5156 
5157 	MultiAddressSpaceLocker locker;
5158 	AreaCacheLocker cacheLocker;
5159 
5160 	status_t status;
5161 	size_t oldSize;
5162 	bool anyKernelArea;
5163 	bool restart;
5164 
5165 	do {
5166 		anyKernelArea = false;
5167 		restart = false;
5168 
5169 		locker.Unset();
5170 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5171 		if (status != B_OK)
5172 			return status;
5173 		cacheLocker.SetTo(cache, true);	// already locked
5174 
5175 		// enforce restrictions
5176 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5177 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5178 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5179 				"resize kernel area %" B_PRId32 " (%s)\n",
5180 				team_get_current_team_id(), areaID, area->name);
5181 			return B_NOT_ALLOWED;
5182 		}
5183 		// TODO: Enforce all restrictions (team, etc.)!
5184 
5185 		oldSize = area->Size();
5186 		if (newSize == oldSize)
5187 			return B_OK;
5188 
5189 		if (cache->type != CACHE_TYPE_RAM)
5190 			return B_NOT_ALLOWED;
5191 
5192 		if (oldSize < newSize) {
5193 			// We need to check if all areas of this cache can be resized.
5194 			for (VMArea* current = cache->areas; current != NULL;
5195 					current = current->cache_next) {
5196 				if (!current->address_space->CanResizeArea(current, newSize))
5197 					return B_ERROR;
5198 				anyKernelArea
5199 					|= current->address_space == VMAddressSpace::Kernel();
5200 			}
5201 		} else {
5202 			// We're shrinking the areas, so we must make sure the affected
5203 			// ranges are not wired.
5204 			for (VMArea* current = cache->areas; current != NULL;
5205 					current = current->cache_next) {
5206 				anyKernelArea
5207 					|= current->address_space == VMAddressSpace::Kernel();
5208 
5209 				if (wait_if_area_range_is_wired(current,
5210 						current->Base() + newSize, oldSize - newSize, &locker,
5211 						&cacheLocker)) {
5212 					restart = true;
5213 					break;
5214 				}
5215 			}
5216 		}
5217 	} while (restart);
5218 
5219 	// Okay, looks good so far, so let's do it
5220 
5221 	int priority = kernel && anyKernelArea
5222 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5223 	uint32 allocationFlags = kernel && anyKernelArea
5224 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5225 
5226 	if (oldSize < newSize) {
5227 		// Growing the cache can fail, so we do it first.
5228 		status = cache->Resize(cache->virtual_base + newSize, priority);
5229 		if (status != B_OK)
5230 			return status;
5231 	}
5232 
5233 	for (VMArea* current = cache->areas; current != NULL;
5234 			current = current->cache_next) {
5235 		status = current->address_space->ResizeArea(current, newSize,
5236 			allocationFlags);
5237 		if (status != B_OK)
5238 			break;
5239 
5240 		// We also need to unmap all pages beyond the new size, if the area has
5241 		// shrunk
5242 		if (newSize < oldSize) {
5243 			VMCacheChainLocker cacheChainLocker(cache);
5244 			cacheChainLocker.LockAllSourceCaches();
5245 
5246 			unmap_pages(current, current->Base() + newSize,
5247 				oldSize - newSize);
5248 
5249 			cacheChainLocker.Unlock(cache);
5250 		}
5251 	}
5252 
5253 	if (status == B_OK) {
5254 		// Shrink or grow individual page protections if in use.
5255 		if (area->page_protections != NULL) {
5256 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5257 			uint8* newProtections
5258 				= (uint8*)realloc(area->page_protections, bytes);
5259 			if (newProtections == NULL)
5260 				status = B_NO_MEMORY;
5261 			else {
5262 				area->page_protections = newProtections;
5263 
5264 				if (oldSize < newSize) {
5265 					// init the additional page protections to that of the area
5266 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5267 					uint32 areaProtection = area->protection
5268 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5269 					memset(area->page_protections + offset,
5270 						areaProtection | (areaProtection << 4), bytes - offset);
5271 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5272 						uint8& entry = area->page_protections[offset - 1];
5273 						entry = (entry & 0x0f) | (areaProtection << 4);
5274 					}
5275 				}
5276 			}
5277 		}
5278 	}
5279 
5280 	// shrinking the cache can't fail, so we do it now
5281 	if (status == B_OK && newSize < oldSize)
5282 		status = cache->Resize(cache->virtual_base + newSize, priority);
5283 
5284 	if (status != B_OK) {
5285 		// Something failed -- resize the areas back to their original size.
5286 		// This can fail, too, in which case we're seriously screwed.
5287 		for (VMArea* current = cache->areas; current != NULL;
5288 				current = current->cache_next) {
5289 			if (current->address_space->ResizeArea(current, oldSize,
5290 					allocationFlags) != B_OK) {
5291 				panic("vm_resize_area(): Failed and not being able to restore "
5292 					"original state.");
5293 			}
5294 		}
5295 
5296 		cache->Resize(cache->virtual_base + oldSize, priority);
5297 	}
5298 
5299 	// TODO: we must honour the lock restrictions of this area
5300 	return status;
5301 }
5302 
5303 
5304 status_t
5305 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5306 {
5307 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5308 }
5309 
5310 
5311 status_t
5312 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5313 {
5314 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5315 }
5316 
5317 
5318 status_t
5319 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5320 	bool user)
5321 {
5322 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5323 }
5324 
5325 
5326 void
5327 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5328 {
5329 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5330 }
5331 
5332 
5333 /*!	Copies a range of memory directly from/to a page that might not be mapped
5334 	at the moment.
5335 
5336 	For \a unsafeMemory the current mapping (if any is ignored). The function
5337 	walks through the respective area's cache chain to find the physical page
5338 	and copies from/to it directly.
5339 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5340 	must not cross a page boundary.
5341 
5342 	\param teamID The team ID identifying the address space \a unsafeMemory is
5343 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5344 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5345 		is passed, the address space of the thread returned by
5346 		debug_get_debugged_thread() is used.
5347 	\param unsafeMemory The start of the unsafe memory range to be copied
5348 		from/to.
5349 	\param buffer A safely accessible kernel buffer to be copied from/to.
5350 	\param size The number of bytes to be copied.
5351 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5352 		\a unsafeMemory, the other way around otherwise.
5353 */
5354 status_t
5355 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5356 	size_t size, bool copyToUnsafe)
5357 {
5358 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5359 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5360 		return B_BAD_VALUE;
5361 	}
5362 
5363 	// get the address space for the debugged thread
5364 	VMAddressSpace* addressSpace;
5365 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5366 		addressSpace = VMAddressSpace::Kernel();
5367 	} else if (teamID == B_CURRENT_TEAM) {
5368 		Thread* thread = debug_get_debugged_thread();
5369 		if (thread == NULL || thread->team == NULL)
5370 			return B_BAD_ADDRESS;
5371 
5372 		addressSpace = thread->team->address_space;
5373 	} else
5374 		addressSpace = VMAddressSpace::DebugGet(teamID);
5375 
5376 	if (addressSpace == NULL)
5377 		return B_BAD_ADDRESS;
5378 
5379 	// get the area
5380 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5381 	if (area == NULL)
5382 		return B_BAD_ADDRESS;
5383 
5384 	// search the page
5385 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5386 		+ area->cache_offset;
5387 	VMCache* cache = area->cache;
5388 	vm_page* page = NULL;
5389 	while (cache != NULL) {
5390 		page = cache->DebugLookupPage(cacheOffset);
5391 		if (page != NULL)
5392 			break;
5393 
5394 		// Page not found in this cache -- if it is paged out, we must not try
5395 		// to get it from lower caches.
5396 		if (cache->DebugHasPage(cacheOffset))
5397 			break;
5398 
5399 		cache = cache->source;
5400 	}
5401 
5402 	if (page == NULL)
5403 		return B_UNSUPPORTED;
5404 
5405 	// copy from/to physical memory
5406 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5407 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5408 
5409 	if (copyToUnsafe) {
5410 		if (page->Cache() != area->cache)
5411 			return B_UNSUPPORTED;
5412 
5413 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5414 	}
5415 
5416 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5417 }
5418 
5419 
5420 /** Validate that a memory range is either fully in kernel space, or fully in
5421  *  userspace */
5422 static inline bool
5423 validate_memory_range(const void* addr, size_t size)
5424 {
5425 	addr_t address = (addr_t)addr;
5426 
5427 	// Check for overflows on all addresses.
5428 	if ((address + size) < address)
5429 		return false;
5430 
5431 	// Validate that the address range does not cross the kernel/user boundary.
5432 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5433 }
5434 
5435 
5436 /** Validate that a memory range is fully in userspace. */
5437 static inline bool
5438 validate_user_memory_range(const void* addr, size_t size)
5439 {
5440 	addr_t address = (addr_t)addr;
5441 
5442 	// Check for overflows on all addresses.
5443 	if ((address + size) < address)
5444 		return false;
5445 
5446 	// Validate that both the start and end address are in userspace
5447 	return IS_USER_ADDRESS(address) && IS_USER_ADDRESS(address + size - 1);
5448 }
5449 
5450 
5451 //	#pragma mark - kernel public API
5452 
5453 
5454 status_t
5455 user_memcpy(void* to, const void* from, size_t size)
5456 {
5457 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5458 		return B_BAD_ADDRESS;
5459 
5460 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5461 		return B_BAD_ADDRESS;
5462 
5463 	return B_OK;
5464 }
5465 
5466 
5467 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5468 	the string in \a to, NULL-terminating the result.
5469 
5470 	\param to Pointer to the destination C-string.
5471 	\param from Pointer to the source C-string.
5472 	\param size Size in bytes of the string buffer pointed to by \a to.
5473 
5474 	\return strlen(\a from).
5475 */
5476 ssize_t
5477 user_strlcpy(char* to, const char* from, size_t size)
5478 {
5479 	if (to == NULL && size != 0)
5480 		return B_BAD_VALUE;
5481 	if (from == NULL)
5482 		return B_BAD_ADDRESS;
5483 
5484 	// Protect the source address from overflows.
5485 	size_t maxSize = size;
5486 	if ((addr_t)from + maxSize < (addr_t)from)
5487 		maxSize -= (addr_t)from + maxSize;
5488 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5489 		maxSize = USER_TOP - (addr_t)from;
5490 
5491 	if (!validate_memory_range(to, maxSize))
5492 		return B_BAD_ADDRESS;
5493 
5494 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5495 	if (result < 0)
5496 		return result;
5497 
5498 	// If we hit the address overflow boundary, fail.
5499 	if ((size_t)result >= maxSize && maxSize < size)
5500 		return B_BAD_ADDRESS;
5501 
5502 	return result;
5503 }
5504 
5505 
5506 status_t
5507 user_memset(void* s, char c, size_t count)
5508 {
5509 	if (!validate_memory_range(s, count))
5510 		return B_BAD_ADDRESS;
5511 
5512 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5513 		return B_BAD_ADDRESS;
5514 
5515 	return B_OK;
5516 }
5517 
5518 
5519 /*!	Wires a single page at the given address.
5520 
5521 	\param team The team whose address space the address belongs to. Supports
5522 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5523 		parameter is ignored.
5524 	\param address address The virtual address to wire down. Does not need to
5525 		be page aligned.
5526 	\param writable If \c true the page shall be writable.
5527 	\param info On success the info is filled in, among other things
5528 		containing the physical address the given virtual one translates to.
5529 	\return \c B_OK, when the page could be wired, another error code otherwise.
5530 */
5531 status_t
5532 vm_wire_page(team_id team, addr_t address, bool writable,
5533 	VMPageWiringInfo* info)
5534 {
5535 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5536 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5537 
5538 	// compute the page protection that is required
5539 	bool isUser = IS_USER_ADDRESS(address);
5540 	uint32 requiredProtection = PAGE_PRESENT
5541 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5542 	if (writable)
5543 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5544 
5545 	// get and read lock the address space
5546 	VMAddressSpace* addressSpace = NULL;
5547 	if (isUser) {
5548 		if (team == B_CURRENT_TEAM)
5549 			addressSpace = VMAddressSpace::GetCurrent();
5550 		else
5551 			addressSpace = VMAddressSpace::Get(team);
5552 	} else
5553 		addressSpace = VMAddressSpace::GetKernel();
5554 	if (addressSpace == NULL)
5555 		return B_ERROR;
5556 
5557 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5558 
5559 	VMTranslationMap* map = addressSpace->TranslationMap();
5560 	status_t error = B_OK;
5561 
5562 	// get the area
5563 	VMArea* area = addressSpace->LookupArea(pageAddress);
5564 	if (area == NULL) {
5565 		addressSpace->Put();
5566 		return B_BAD_ADDRESS;
5567 	}
5568 
5569 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5570 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5571 
5572 	// mark the area range wired
5573 	area->Wire(&info->range);
5574 
5575 	// Lock the area's cache chain and the translation map. Needed to look
5576 	// up the page and play with its wired count.
5577 	cacheChainLocker.LockAllSourceCaches();
5578 	map->Lock();
5579 
5580 	phys_addr_t physicalAddress;
5581 	uint32 flags;
5582 	vm_page* page;
5583 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5584 		&& (flags & requiredProtection) == requiredProtection
5585 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5586 			!= NULL) {
5587 		// Already mapped with the correct permissions -- just increment
5588 		// the page's wired count.
5589 		increment_page_wired_count(page);
5590 
5591 		map->Unlock();
5592 		cacheChainLocker.Unlock();
5593 		addressSpaceLocker.Unlock();
5594 	} else {
5595 		// Let vm_soft_fault() map the page for us, if possible. We need
5596 		// to fully unlock to avoid deadlocks. Since we have already
5597 		// wired the area itself, nothing disturbing will happen with it
5598 		// in the meantime.
5599 		map->Unlock();
5600 		cacheChainLocker.Unlock();
5601 		addressSpaceLocker.Unlock();
5602 
5603 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5604 			isUser, &page);
5605 
5606 		if (error != B_OK) {
5607 			// The page could not be mapped -- clean up.
5608 			VMCache* cache = vm_area_get_locked_cache(area);
5609 			area->Unwire(&info->range);
5610 			cache->ReleaseRefAndUnlock();
5611 			addressSpace->Put();
5612 			return error;
5613 		}
5614 	}
5615 
5616 	info->physicalAddress
5617 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5618 			+ address % B_PAGE_SIZE;
5619 	info->page = page;
5620 
5621 	return B_OK;
5622 }
5623 
5624 
5625 /*!	Unwires a single page previously wired via vm_wire_page().
5626 
5627 	\param info The same object passed to vm_wire_page() before.
5628 */
5629 void
5630 vm_unwire_page(VMPageWiringInfo* info)
5631 {
5632 	// lock the address space
5633 	VMArea* area = info->range.area;
5634 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5635 		// takes over our reference
5636 
5637 	// lock the top cache
5638 	VMCache* cache = vm_area_get_locked_cache(area);
5639 	VMCacheChainLocker cacheChainLocker(cache);
5640 
5641 	if (info->page->Cache() != cache) {
5642 		// The page is not in the top cache, so we lock the whole cache chain
5643 		// before touching the page's wired count.
5644 		cacheChainLocker.LockAllSourceCaches();
5645 	}
5646 
5647 	decrement_page_wired_count(info->page);
5648 
5649 	// remove the wired range from the range
5650 	area->Unwire(&info->range);
5651 
5652 	cacheChainLocker.Unlock();
5653 }
5654 
5655 
5656 /*!	Wires down the given address range in the specified team's address space.
5657 
5658 	If successful the function
5659 	- acquires a reference to the specified team's address space,
5660 	- adds respective wired ranges to all areas that intersect with the given
5661 	  address range,
5662 	- makes sure all pages in the given address range are mapped with the
5663 	  requested access permissions and increments their wired count.
5664 
5665 	It fails, when \a team doesn't specify a valid address space, when any part
5666 	of the specified address range is not covered by areas, when the concerned
5667 	areas don't allow mapping with the requested permissions, or when mapping
5668 	failed for another reason.
5669 
5670 	When successful the call must be balanced by a unlock_memory_etc() call with
5671 	the exact same parameters.
5672 
5673 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5674 		supported.
5675 	\param address The start of the address range to be wired.
5676 	\param numBytes The size of the address range to be wired.
5677 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5678 		requests that the range must be wired writable ("read from device
5679 		into memory").
5680 	\return \c B_OK on success, another error code otherwise.
5681 */
5682 status_t
5683 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5684 {
5685 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5686 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5687 
5688 	// compute the page protection that is required
5689 	bool isUser = IS_USER_ADDRESS(address);
5690 	bool writable = (flags & B_READ_DEVICE) == 0;
5691 	uint32 requiredProtection = PAGE_PRESENT
5692 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5693 	if (writable)
5694 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5695 
5696 	uint32 mallocFlags = isUser
5697 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5698 
5699 	// get and read lock the address space
5700 	VMAddressSpace* addressSpace = NULL;
5701 	if (isUser) {
5702 		if (team == B_CURRENT_TEAM)
5703 			addressSpace = VMAddressSpace::GetCurrent();
5704 		else
5705 			addressSpace = VMAddressSpace::Get(team);
5706 	} else
5707 		addressSpace = VMAddressSpace::GetKernel();
5708 	if (addressSpace == NULL)
5709 		return B_ERROR;
5710 
5711 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5712 		// We get a new address space reference here. The one we got above will
5713 		// be freed by unlock_memory_etc().
5714 
5715 	VMTranslationMap* map = addressSpace->TranslationMap();
5716 	status_t error = B_OK;
5717 
5718 	// iterate through all concerned areas
5719 	addr_t nextAddress = lockBaseAddress;
5720 	while (nextAddress != lockEndAddress) {
5721 		// get the next area
5722 		VMArea* area = addressSpace->LookupArea(nextAddress);
5723 		if (area == NULL) {
5724 			error = B_BAD_ADDRESS;
5725 			break;
5726 		}
5727 
5728 		addr_t areaStart = nextAddress;
5729 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5730 
5731 		// allocate the wired range (do that before locking the cache to avoid
5732 		// deadlocks)
5733 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5734 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5735 		if (range == NULL) {
5736 			error = B_NO_MEMORY;
5737 			break;
5738 		}
5739 
5740 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5741 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5742 
5743 		// mark the area range wired
5744 		area->Wire(range);
5745 
5746 		// Depending on the area cache type and the wiring, we may not need to
5747 		// look at the individual pages.
5748 		if (area->cache_type == CACHE_TYPE_NULL
5749 			|| area->cache_type == CACHE_TYPE_DEVICE
5750 			|| area->wiring == B_FULL_LOCK
5751 			|| area->wiring == B_CONTIGUOUS) {
5752 			nextAddress = areaEnd;
5753 			continue;
5754 		}
5755 
5756 		// Lock the area's cache chain and the translation map. Needed to look
5757 		// up pages and play with their wired count.
5758 		cacheChainLocker.LockAllSourceCaches();
5759 		map->Lock();
5760 
5761 		// iterate through the pages and wire them
5762 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5763 			phys_addr_t physicalAddress;
5764 			uint32 flags;
5765 
5766 			vm_page* page;
5767 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5768 				&& (flags & requiredProtection) == requiredProtection
5769 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5770 					!= NULL) {
5771 				// Already mapped with the correct permissions -- just increment
5772 				// the page's wired count.
5773 				increment_page_wired_count(page);
5774 			} else {
5775 				// Let vm_soft_fault() map the page for us, if possible. We need
5776 				// to fully unlock to avoid deadlocks. Since we have already
5777 				// wired the area itself, nothing disturbing will happen with it
5778 				// in the meantime.
5779 				map->Unlock();
5780 				cacheChainLocker.Unlock();
5781 				addressSpaceLocker.Unlock();
5782 
5783 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5784 					false, isUser, &page);
5785 
5786 				addressSpaceLocker.Lock();
5787 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5788 				cacheChainLocker.LockAllSourceCaches();
5789 				map->Lock();
5790 			}
5791 
5792 			if (error != B_OK)
5793 				break;
5794 		}
5795 
5796 		map->Unlock();
5797 
5798 		if (error == B_OK) {
5799 			cacheChainLocker.Unlock();
5800 		} else {
5801 			// An error occurred, so abort right here. If the current address
5802 			// is the first in this area, unwire the area, since we won't get
5803 			// to it when reverting what we've done so far.
5804 			if (nextAddress == areaStart) {
5805 				area->Unwire(range);
5806 				cacheChainLocker.Unlock();
5807 				range->~VMAreaWiredRange();
5808 				free_etc(range, mallocFlags);
5809 			} else
5810 				cacheChainLocker.Unlock();
5811 
5812 			break;
5813 		}
5814 	}
5815 
5816 	if (error != B_OK) {
5817 		// An error occurred, so unwire all that we've already wired. Note that
5818 		// even if not a single page was wired, unlock_memory_etc() is called
5819 		// to put the address space reference.
5820 		addressSpaceLocker.Unlock();
5821 		unlock_memory_etc(team, (void*)lockBaseAddress,
5822 			nextAddress - lockBaseAddress, flags);
5823 	}
5824 
5825 	return error;
5826 }
5827 
5828 
5829 status_t
5830 lock_memory(void* address, size_t numBytes, uint32 flags)
5831 {
5832 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5833 }
5834 
5835 
5836 /*!	Unwires an address range previously wired with lock_memory_etc().
5837 
5838 	Note that a call to this function must balance a previous lock_memory_etc()
5839 	call with exactly the same parameters.
5840 */
5841 status_t
5842 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5843 {
5844 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5845 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5846 
5847 	// compute the page protection that is required
5848 	bool isUser = IS_USER_ADDRESS(address);
5849 	bool writable = (flags & B_READ_DEVICE) == 0;
5850 	uint32 requiredProtection = PAGE_PRESENT
5851 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5852 	if (writable)
5853 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5854 
5855 	uint32 mallocFlags = isUser
5856 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5857 
5858 	// get and read lock the address space
5859 	VMAddressSpace* addressSpace = NULL;
5860 	if (isUser) {
5861 		if (team == B_CURRENT_TEAM)
5862 			addressSpace = VMAddressSpace::GetCurrent();
5863 		else
5864 			addressSpace = VMAddressSpace::Get(team);
5865 	} else
5866 		addressSpace = VMAddressSpace::GetKernel();
5867 	if (addressSpace == NULL)
5868 		return B_ERROR;
5869 
5870 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5871 		// Take over the address space reference. We don't unlock until we're
5872 		// done.
5873 
5874 	VMTranslationMap* map = addressSpace->TranslationMap();
5875 	status_t error = B_OK;
5876 
5877 	// iterate through all concerned areas
5878 	addr_t nextAddress = lockBaseAddress;
5879 	while (nextAddress != lockEndAddress) {
5880 		// get the next area
5881 		VMArea* area = addressSpace->LookupArea(nextAddress);
5882 		if (area == NULL) {
5883 			error = B_BAD_ADDRESS;
5884 			break;
5885 		}
5886 
5887 		addr_t areaStart = nextAddress;
5888 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5889 
5890 		// Lock the area's top cache. This is a requirement for
5891 		// VMArea::Unwire().
5892 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5893 
5894 		// Depending on the area cache type and the wiring, we may not need to
5895 		// look at the individual pages.
5896 		if (area->cache_type == CACHE_TYPE_NULL
5897 			|| area->cache_type == CACHE_TYPE_DEVICE
5898 			|| area->wiring == B_FULL_LOCK
5899 			|| area->wiring == B_CONTIGUOUS) {
5900 			// unwire the range (to avoid deadlocks we delete the range after
5901 			// unlocking the cache)
5902 			nextAddress = areaEnd;
5903 			VMAreaWiredRange* range = area->Unwire(areaStart,
5904 				areaEnd - areaStart, writable);
5905 			cacheChainLocker.Unlock();
5906 			if (range != NULL) {
5907 				range->~VMAreaWiredRange();
5908 				free_etc(range, mallocFlags);
5909 			}
5910 			continue;
5911 		}
5912 
5913 		// Lock the area's cache chain and the translation map. Needed to look
5914 		// up pages and play with their wired count.
5915 		cacheChainLocker.LockAllSourceCaches();
5916 		map->Lock();
5917 
5918 		// iterate through the pages and unwire them
5919 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5920 			phys_addr_t physicalAddress;
5921 			uint32 flags;
5922 
5923 			vm_page* page;
5924 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5925 				&& (flags & PAGE_PRESENT) != 0
5926 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5927 					!= NULL) {
5928 				// Already mapped with the correct permissions -- just increment
5929 				// the page's wired count.
5930 				decrement_page_wired_count(page);
5931 			} else {
5932 				panic("unlock_memory_etc(): Failed to unwire page: address "
5933 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5934 					nextAddress);
5935 				error = B_BAD_VALUE;
5936 				break;
5937 			}
5938 		}
5939 
5940 		map->Unlock();
5941 
5942 		// All pages are unwired. Remove the area's wired range as well (to
5943 		// avoid deadlocks we delete the range after unlocking the cache).
5944 		VMAreaWiredRange* range = area->Unwire(areaStart,
5945 			areaEnd - areaStart, writable);
5946 
5947 		cacheChainLocker.Unlock();
5948 
5949 		if (range != NULL) {
5950 			range->~VMAreaWiredRange();
5951 			free_etc(range, mallocFlags);
5952 		}
5953 
5954 		if (error != B_OK)
5955 			break;
5956 	}
5957 
5958 	// get rid of the address space reference lock_memory_etc() acquired
5959 	addressSpace->Put();
5960 
5961 	return error;
5962 }
5963 
5964 
5965 status_t
5966 unlock_memory(void* address, size_t numBytes, uint32 flags)
5967 {
5968 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5969 }
5970 
5971 
5972 /*!	Similar to get_memory_map(), but also allows to specify the address space
5973 	for the memory in question and has a saner semantics.
5974 	Returns \c B_OK when the complete range could be translated or
5975 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5976 	case the actual number of entries is written to \c *_numEntries. Any other
5977 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5978 	in this case.
5979 */
5980 status_t
5981 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5982 	physical_entry* table, uint32* _numEntries)
5983 {
5984 	uint32 numEntries = *_numEntries;
5985 	*_numEntries = 0;
5986 
5987 	VMAddressSpace* addressSpace;
5988 	addr_t virtualAddress = (addr_t)address;
5989 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5990 	phys_addr_t physicalAddress;
5991 	status_t status = B_OK;
5992 	int32 index = -1;
5993 	addr_t offset = 0;
5994 	bool interrupts = are_interrupts_enabled();
5995 
5996 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5997 		"entries)\n", team, address, numBytes, numEntries));
5998 
5999 	if (numEntries == 0 || numBytes == 0)
6000 		return B_BAD_VALUE;
6001 
6002 	// in which address space is the address to be found?
6003 	if (IS_USER_ADDRESS(virtualAddress)) {
6004 		if (team == B_CURRENT_TEAM)
6005 			addressSpace = VMAddressSpace::GetCurrent();
6006 		else
6007 			addressSpace = VMAddressSpace::Get(team);
6008 	} else
6009 		addressSpace = VMAddressSpace::GetKernel();
6010 
6011 	if (addressSpace == NULL)
6012 		return B_ERROR;
6013 
6014 	VMTranslationMap* map = addressSpace->TranslationMap();
6015 
6016 	if (interrupts)
6017 		map->Lock();
6018 
6019 	while (offset < numBytes) {
6020 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6021 		uint32 flags;
6022 
6023 		if (interrupts) {
6024 			status = map->Query((addr_t)address + offset, &physicalAddress,
6025 				&flags);
6026 		} else {
6027 			status = map->QueryInterrupt((addr_t)address + offset,
6028 				&physicalAddress, &flags);
6029 		}
6030 		if (status < B_OK)
6031 			break;
6032 		if ((flags & PAGE_PRESENT) == 0) {
6033 			panic("get_memory_map() called on unmapped memory!");
6034 			return B_BAD_ADDRESS;
6035 		}
6036 
6037 		if (index < 0 && pageOffset > 0) {
6038 			physicalAddress += pageOffset;
6039 			if (bytes > B_PAGE_SIZE - pageOffset)
6040 				bytes = B_PAGE_SIZE - pageOffset;
6041 		}
6042 
6043 		// need to switch to the next physical_entry?
6044 		if (index < 0 || table[index].address
6045 				!= physicalAddress - table[index].size) {
6046 			if ((uint32)++index + 1 > numEntries) {
6047 				// table to small
6048 				break;
6049 			}
6050 			table[index].address = physicalAddress;
6051 			table[index].size = bytes;
6052 		} else {
6053 			// page does fit in current entry
6054 			table[index].size += bytes;
6055 		}
6056 
6057 		offset += bytes;
6058 	}
6059 
6060 	if (interrupts)
6061 		map->Unlock();
6062 
6063 	if (status != B_OK)
6064 		return status;
6065 
6066 	if ((uint32)index + 1 > numEntries) {
6067 		*_numEntries = index;
6068 		return B_BUFFER_OVERFLOW;
6069 	}
6070 
6071 	*_numEntries = index + 1;
6072 	return B_OK;
6073 }
6074 
6075 
6076 /*!	According to the BeBook, this function should always succeed.
6077 	This is no longer the case.
6078 */
6079 extern "C" int32
6080 __get_memory_map_haiku(const void* address, size_t numBytes,
6081 	physical_entry* table, int32 numEntries)
6082 {
6083 	uint32 entriesRead = numEntries;
6084 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6085 		table, &entriesRead);
6086 	if (error != B_OK)
6087 		return error;
6088 
6089 	// close the entry list
6090 
6091 	// if it's only one entry, we will silently accept the missing ending
6092 	if (numEntries == 1)
6093 		return B_OK;
6094 
6095 	if (entriesRead + 1 > (uint32)numEntries)
6096 		return B_BUFFER_OVERFLOW;
6097 
6098 	table[entriesRead].address = 0;
6099 	table[entriesRead].size = 0;
6100 
6101 	return B_OK;
6102 }
6103 
6104 
6105 area_id
6106 area_for(void* address)
6107 {
6108 	return vm_area_for((addr_t)address, true);
6109 }
6110 
6111 
6112 area_id
6113 find_area(const char* name)
6114 {
6115 	return VMAreaHash::Find(name);
6116 }
6117 
6118 
6119 status_t
6120 _get_area_info(area_id id, area_info* info, size_t size)
6121 {
6122 	if (size != sizeof(area_info) || info == NULL)
6123 		return B_BAD_VALUE;
6124 
6125 	AddressSpaceReadLocker locker;
6126 	VMArea* area;
6127 	status_t status = locker.SetFromArea(id, area);
6128 	if (status != B_OK)
6129 		return status;
6130 
6131 	fill_area_info(area, info, size);
6132 	return B_OK;
6133 }
6134 
6135 
6136 status_t
6137 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6138 {
6139 	addr_t nextBase = *(addr_t*)cookie;
6140 
6141 	// we're already through the list
6142 	if (nextBase == (addr_t)-1)
6143 		return B_ENTRY_NOT_FOUND;
6144 
6145 	if (team == B_CURRENT_TEAM)
6146 		team = team_get_current_team_id();
6147 
6148 	AddressSpaceReadLocker locker(team);
6149 	if (!locker.IsLocked())
6150 		return B_BAD_TEAM_ID;
6151 
6152 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6153 	if (area == NULL) {
6154 		nextBase = (addr_t)-1;
6155 		return B_ENTRY_NOT_FOUND;
6156 	}
6157 
6158 	fill_area_info(area, info, size);
6159 	*cookie = (ssize_t)(area->Base() + 1);
6160 
6161 	return B_OK;
6162 }
6163 
6164 
6165 status_t
6166 set_area_protection(area_id area, uint32 newProtection)
6167 {
6168 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6169 		newProtection, true);
6170 }
6171 
6172 
6173 status_t
6174 resize_area(area_id areaID, size_t newSize)
6175 {
6176 	return vm_resize_area(areaID, newSize, true);
6177 }
6178 
6179 
6180 /*!	Transfers the specified area to a new team. The caller must be the owner
6181 	of the area.
6182 */
6183 area_id
6184 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6185 	bool kernel)
6186 {
6187 	area_info info;
6188 	status_t status = get_area_info(id, &info);
6189 	if (status != B_OK)
6190 		return status;
6191 
6192 	if (info.team != thread_get_current_thread()->team->id)
6193 		return B_PERMISSION_DENIED;
6194 
6195 	// We need to mark the area cloneable so the following operations work.
6196 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6197 	if (status != B_OK)
6198 		return status;
6199 
6200 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6201 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6202 	if (clonedArea < 0)
6203 		return clonedArea;
6204 
6205 	status = vm_delete_area(info.team, id, kernel);
6206 	if (status != B_OK) {
6207 		vm_delete_area(target, clonedArea, kernel);
6208 		return status;
6209 	}
6210 
6211 	// Now we can reset the protection to whatever it was before.
6212 	set_area_protection(clonedArea, info.protection);
6213 
6214 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6215 
6216 	return clonedArea;
6217 }
6218 
6219 
6220 extern "C" area_id
6221 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6222 	size_t numBytes, uint32 addressSpec, uint32 protection,
6223 	void** _virtualAddress)
6224 {
6225 	if (!arch_vm_supports_protection(protection))
6226 		return B_NOT_SUPPORTED;
6227 
6228 	fix_protection(&protection);
6229 
6230 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6231 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6232 		false);
6233 }
6234 
6235 
6236 area_id
6237 clone_area(const char* name, void** _address, uint32 addressSpec,
6238 	uint32 protection, area_id source)
6239 {
6240 	if ((protection & B_KERNEL_PROTECTION) == 0)
6241 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6242 
6243 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6244 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6245 }
6246 
6247 
6248 area_id
6249 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6250 	uint32 protection, uint32 flags, uint32 guardSize,
6251 	const virtual_address_restrictions* virtualAddressRestrictions,
6252 	const physical_address_restrictions* physicalAddressRestrictions,
6253 	void** _address)
6254 {
6255 	fix_protection(&protection);
6256 
6257 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6258 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6259 		true, _address);
6260 }
6261 
6262 
6263 extern "C" area_id
6264 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6265 	size_t size, uint32 lock, uint32 protection)
6266 {
6267 	fix_protection(&protection);
6268 
6269 	virtual_address_restrictions virtualRestrictions = {};
6270 	virtualRestrictions.address = *_address;
6271 	virtualRestrictions.address_specification = addressSpec;
6272 	physical_address_restrictions physicalRestrictions = {};
6273 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6274 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6275 		true, _address);
6276 }
6277 
6278 
6279 status_t
6280 delete_area(area_id area)
6281 {
6282 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6283 }
6284 
6285 
6286 //	#pragma mark - Userland syscalls
6287 
6288 
6289 status_t
6290 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6291 	addr_t size)
6292 {
6293 	// filter out some unavailable values (for userland)
6294 	switch (addressSpec) {
6295 		case B_ANY_KERNEL_ADDRESS:
6296 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6297 			return B_BAD_VALUE;
6298 	}
6299 
6300 	addr_t address;
6301 
6302 	if (!IS_USER_ADDRESS(userAddress)
6303 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6304 		return B_BAD_ADDRESS;
6305 
6306 	status_t status = vm_reserve_address_range(
6307 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6308 		RESERVED_AVOID_BASE);
6309 	if (status != B_OK)
6310 		return status;
6311 
6312 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6313 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6314 			(void*)address, size);
6315 		return B_BAD_ADDRESS;
6316 	}
6317 
6318 	return B_OK;
6319 }
6320 
6321 
6322 status_t
6323 _user_unreserve_address_range(addr_t address, addr_t size)
6324 {
6325 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6326 		(void*)address, size);
6327 }
6328 
6329 
6330 area_id
6331 _user_area_for(void* address)
6332 {
6333 	return vm_area_for((addr_t)address, false);
6334 }
6335 
6336 
6337 area_id
6338 _user_find_area(const char* userName)
6339 {
6340 	char name[B_OS_NAME_LENGTH];
6341 
6342 	if (!IS_USER_ADDRESS(userName)
6343 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6344 		return B_BAD_ADDRESS;
6345 
6346 	return find_area(name);
6347 }
6348 
6349 
6350 status_t
6351 _user_get_area_info(area_id area, area_info* userInfo)
6352 {
6353 	if (!IS_USER_ADDRESS(userInfo))
6354 		return B_BAD_ADDRESS;
6355 
6356 	area_info info;
6357 	status_t status = get_area_info(area, &info);
6358 	if (status < B_OK)
6359 		return status;
6360 
6361 	// TODO: do we want to prevent userland from seeing kernel protections?
6362 	//info.protection &= B_USER_PROTECTION;
6363 
6364 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6365 		return B_BAD_ADDRESS;
6366 
6367 	return status;
6368 }
6369 
6370 
6371 status_t
6372 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6373 {
6374 	ssize_t cookie;
6375 
6376 	if (!IS_USER_ADDRESS(userCookie)
6377 		|| !IS_USER_ADDRESS(userInfo)
6378 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6379 		return B_BAD_ADDRESS;
6380 
6381 	area_info info;
6382 	status_t status = _get_next_area_info(team, &cookie, &info,
6383 		sizeof(area_info));
6384 	if (status != B_OK)
6385 		return status;
6386 
6387 	//info.protection &= B_USER_PROTECTION;
6388 
6389 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6390 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6391 		return B_BAD_ADDRESS;
6392 
6393 	return status;
6394 }
6395 
6396 
6397 status_t
6398 _user_set_area_protection(area_id area, uint32 newProtection)
6399 {
6400 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6401 		return B_BAD_VALUE;
6402 
6403 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6404 		newProtection, false);
6405 }
6406 
6407 
6408 status_t
6409 _user_resize_area(area_id area, size_t newSize)
6410 {
6411 	// TODO: Since we restrict deleting of areas to those owned by the team,
6412 	// we should also do that for resizing (check other functions, too).
6413 	return vm_resize_area(area, newSize, false);
6414 }
6415 
6416 
6417 area_id
6418 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6419 	team_id target)
6420 {
6421 	// filter out some unavailable values (for userland)
6422 	switch (addressSpec) {
6423 		case B_ANY_KERNEL_ADDRESS:
6424 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6425 			return B_BAD_VALUE;
6426 	}
6427 
6428 	void* address;
6429 	if (!IS_USER_ADDRESS(userAddress)
6430 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6431 		return B_BAD_ADDRESS;
6432 
6433 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6434 	if (newArea < B_OK)
6435 		return newArea;
6436 
6437 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6438 		return B_BAD_ADDRESS;
6439 
6440 	return newArea;
6441 }
6442 
6443 
6444 area_id
6445 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6446 	uint32 protection, area_id sourceArea)
6447 {
6448 	char name[B_OS_NAME_LENGTH];
6449 	void* address;
6450 
6451 	// filter out some unavailable values (for userland)
6452 	switch (addressSpec) {
6453 		case B_ANY_KERNEL_ADDRESS:
6454 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6455 			return B_BAD_VALUE;
6456 	}
6457 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6458 		return B_BAD_VALUE;
6459 
6460 	if (!IS_USER_ADDRESS(userName)
6461 		|| !IS_USER_ADDRESS(userAddress)
6462 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6463 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6464 		return B_BAD_ADDRESS;
6465 
6466 	fix_protection(&protection);
6467 
6468 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6469 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6470 		false);
6471 	if (clonedArea < B_OK)
6472 		return clonedArea;
6473 
6474 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6475 		delete_area(clonedArea);
6476 		return B_BAD_ADDRESS;
6477 	}
6478 
6479 	return clonedArea;
6480 }
6481 
6482 
6483 area_id
6484 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6485 	size_t size, uint32 lock, uint32 protection)
6486 {
6487 	char name[B_OS_NAME_LENGTH];
6488 	void* address;
6489 
6490 	// filter out some unavailable values (for userland)
6491 	switch (addressSpec) {
6492 		case B_ANY_KERNEL_ADDRESS:
6493 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6494 			return B_BAD_VALUE;
6495 	}
6496 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6497 		return B_BAD_VALUE;
6498 
6499 	if (!IS_USER_ADDRESS(userName)
6500 		|| !IS_USER_ADDRESS(userAddress)
6501 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6502 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6503 		return B_BAD_ADDRESS;
6504 
6505 	if (addressSpec == B_EXACT_ADDRESS
6506 		&& IS_KERNEL_ADDRESS(address))
6507 		return B_BAD_VALUE;
6508 
6509 	if (addressSpec == B_ANY_ADDRESS)
6510 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6511 	if (addressSpec == B_BASE_ADDRESS)
6512 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6513 
6514 	fix_protection(&protection);
6515 
6516 	virtual_address_restrictions virtualRestrictions = {};
6517 	virtualRestrictions.address = address;
6518 	virtualRestrictions.address_specification = addressSpec;
6519 	physical_address_restrictions physicalRestrictions = {};
6520 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6521 		size, lock, protection, 0, 0, &virtualRestrictions,
6522 		&physicalRestrictions, false, &address);
6523 
6524 	if (area >= B_OK
6525 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6526 		delete_area(area);
6527 		return B_BAD_ADDRESS;
6528 	}
6529 
6530 	return area;
6531 }
6532 
6533 
6534 status_t
6535 _user_delete_area(area_id area)
6536 {
6537 	// Unlike the BeOS implementation, you can now only delete areas
6538 	// that you have created yourself from userland.
6539 	// The documentation to delete_area() explicitly states that this
6540 	// will be restricted in the future, and so it will.
6541 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6542 }
6543 
6544 
6545 // TODO: create a BeOS style call for this!
6546 
6547 area_id
6548 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6549 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6550 	int fd, off_t offset)
6551 {
6552 	char name[B_OS_NAME_LENGTH];
6553 	void* address;
6554 	area_id area;
6555 
6556 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6557 		return B_BAD_VALUE;
6558 
6559 	fix_protection(&protection);
6560 
6561 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6562 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6563 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6564 		return B_BAD_ADDRESS;
6565 
6566 	if (addressSpec == B_EXACT_ADDRESS) {
6567 		if ((addr_t)address + size < (addr_t)address
6568 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6569 			return B_BAD_VALUE;
6570 		}
6571 		if (!IS_USER_ADDRESS(address)
6572 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6573 			return B_BAD_ADDRESS;
6574 		}
6575 	}
6576 
6577 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6578 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6579 		false);
6580 	if (area < B_OK)
6581 		return area;
6582 
6583 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6584 		return B_BAD_ADDRESS;
6585 
6586 	return area;
6587 }
6588 
6589 
6590 status_t
6591 _user_unmap_memory(void* _address, size_t size)
6592 {
6593 	addr_t address = (addr_t)_address;
6594 
6595 	// check params
6596 	if (size == 0 || (addr_t)address + size < (addr_t)address
6597 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6598 		return B_BAD_VALUE;
6599 	}
6600 
6601 	if (!IS_USER_ADDRESS(address)
6602 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6603 		return B_BAD_ADDRESS;
6604 	}
6605 
6606 	// Write lock the address space and ensure the address range is not wired.
6607 	AddressSpaceWriteLocker locker;
6608 	do {
6609 		status_t status = locker.SetTo(team_get_current_team_id());
6610 		if (status != B_OK)
6611 			return status;
6612 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6613 			size, &locker));
6614 
6615 	// unmap
6616 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6617 }
6618 
6619 
6620 status_t
6621 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6622 {
6623 	// check address range
6624 	addr_t address = (addr_t)_address;
6625 	size = PAGE_ALIGN(size);
6626 
6627 	if ((address % B_PAGE_SIZE) != 0)
6628 		return B_BAD_VALUE;
6629 	if (!validate_user_memory_range(_address, size)) {
6630 		// weird error code required by POSIX
6631 		return ENOMEM;
6632 	}
6633 
6634 	// extend and check protection
6635 	if ((protection & ~B_USER_PROTECTION) != 0)
6636 		return B_BAD_VALUE;
6637 
6638 	fix_protection(&protection);
6639 
6640 	// We need to write lock the address space, since we're going to play with
6641 	// the areas. Also make sure that none of the areas is wired and that we're
6642 	// actually allowed to change the protection.
6643 	AddressSpaceWriteLocker locker;
6644 
6645 	bool restart;
6646 	do {
6647 		restart = false;
6648 
6649 		status_t status = locker.SetTo(team_get_current_team_id());
6650 		if (status != B_OK)
6651 			return status;
6652 
6653 		// First round: Check whether the whole range is covered by areas and we
6654 		// are allowed to modify them.
6655 		addr_t currentAddress = address;
6656 		size_t sizeLeft = size;
6657 		while (sizeLeft > 0) {
6658 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6659 			if (area == NULL)
6660 				return B_NO_MEMORY;
6661 
6662 			if ((area->protection & B_KERNEL_AREA) != 0)
6663 				return B_NOT_ALLOWED;
6664 			if (area->protection_max != 0
6665 				&& (protection & area->protection_max) != protection) {
6666 				return B_NOT_ALLOWED;
6667 			}
6668 
6669 			addr_t offset = currentAddress - area->Base();
6670 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6671 
6672 			AreaCacheLocker cacheLocker(area);
6673 
6674 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6675 					&locker, &cacheLocker)) {
6676 				restart = true;
6677 				break;
6678 			}
6679 
6680 			cacheLocker.Unlock();
6681 
6682 			currentAddress += rangeSize;
6683 			sizeLeft -= rangeSize;
6684 		}
6685 	} while (restart);
6686 
6687 	// Second round: If the protections differ from that of the area, create a
6688 	// page protection array and re-map mapped pages.
6689 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6690 	addr_t currentAddress = address;
6691 	size_t sizeLeft = size;
6692 	while (sizeLeft > 0) {
6693 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6694 		if (area == NULL)
6695 			return B_NO_MEMORY;
6696 
6697 		addr_t offset = currentAddress - area->Base();
6698 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6699 
6700 		currentAddress += rangeSize;
6701 		sizeLeft -= rangeSize;
6702 
6703 		if (area->page_protections == NULL) {
6704 			if (area->protection == protection)
6705 				continue;
6706 			if (offset == 0 && rangeSize == area->Size()) {
6707 				status_t status = vm_set_area_protection(area->address_space->ID(),
6708 					area->id, protection, false);
6709 				if (status != B_OK)
6710 					return status;
6711 				continue;
6712 			}
6713 
6714 			status_t status = allocate_area_page_protections(area);
6715 			if (status != B_OK)
6716 				return status;
6717 		}
6718 
6719 		// We need to lock the complete cache chain, since we potentially unmap
6720 		// pages of lower caches.
6721 		VMCache* topCache = vm_area_get_locked_cache(area);
6722 		VMCacheChainLocker cacheChainLocker(topCache);
6723 		cacheChainLocker.LockAllSourceCaches();
6724 
6725 		for (addr_t pageAddress = area->Base() + offset;
6726 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6727 			map->Lock();
6728 
6729 			set_area_page_protection(area, pageAddress, protection);
6730 
6731 			phys_addr_t physicalAddress;
6732 			uint32 flags;
6733 
6734 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6735 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6736 				map->Unlock();
6737 				continue;
6738 			}
6739 
6740 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6741 			if (page == NULL) {
6742 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6743 					"\n", area, physicalAddress);
6744 				map->Unlock();
6745 				return B_ERROR;
6746 			}
6747 
6748 			// If the page is not in the topmost cache and write access is
6749 			// requested, we have to unmap it. Otherwise we can re-map it with
6750 			// the new protection.
6751 			bool unmapPage = page->Cache() != topCache
6752 				&& (protection & B_WRITE_AREA) != 0;
6753 
6754 			if (!unmapPage)
6755 				map->ProtectPage(area, pageAddress, protection);
6756 
6757 			map->Unlock();
6758 
6759 			if (unmapPage) {
6760 				DEBUG_PAGE_ACCESS_START(page);
6761 				unmap_page(area, pageAddress);
6762 				DEBUG_PAGE_ACCESS_END(page);
6763 			}
6764 		}
6765 	}
6766 
6767 	return B_OK;
6768 }
6769 
6770 
6771 status_t
6772 _user_sync_memory(void* _address, size_t size, uint32 flags)
6773 {
6774 	addr_t address = (addr_t)_address;
6775 	size = PAGE_ALIGN(size);
6776 
6777 	// check params
6778 	if ((address % B_PAGE_SIZE) != 0)
6779 		return B_BAD_VALUE;
6780 	if (!validate_user_memory_range(_address, size)) {
6781 		// weird error code required by POSIX
6782 		return ENOMEM;
6783 	}
6784 
6785 	bool writeSync = (flags & MS_SYNC) != 0;
6786 	bool writeAsync = (flags & MS_ASYNC) != 0;
6787 	if (writeSync && writeAsync)
6788 		return B_BAD_VALUE;
6789 
6790 	if (size == 0 || (!writeSync && !writeAsync))
6791 		return B_OK;
6792 
6793 	// iterate through the range and sync all concerned areas
6794 	while (size > 0) {
6795 		// read lock the address space
6796 		AddressSpaceReadLocker locker;
6797 		status_t error = locker.SetTo(team_get_current_team_id());
6798 		if (error != B_OK)
6799 			return error;
6800 
6801 		// get the first area
6802 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6803 		if (area == NULL)
6804 			return B_NO_MEMORY;
6805 
6806 		uint32 offset = address - area->Base();
6807 		size_t rangeSize = min_c(area->Size() - offset, size);
6808 		offset += area->cache_offset;
6809 
6810 		// lock the cache
6811 		AreaCacheLocker cacheLocker(area);
6812 		if (!cacheLocker)
6813 			return B_BAD_VALUE;
6814 		VMCache* cache = area->cache;
6815 
6816 		locker.Unlock();
6817 
6818 		uint32 firstPage = offset >> PAGE_SHIFT;
6819 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6820 
6821 		// write the pages
6822 		if (cache->type == CACHE_TYPE_VNODE) {
6823 			if (writeSync) {
6824 				// synchronous
6825 				error = vm_page_write_modified_page_range(cache, firstPage,
6826 					endPage);
6827 				if (error != B_OK)
6828 					return error;
6829 			} else {
6830 				// asynchronous
6831 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6832 				// TODO: This is probably not quite what is supposed to happen.
6833 				// Especially when a lot has to be written, it might take ages
6834 				// until it really hits the disk.
6835 			}
6836 		}
6837 
6838 		address += rangeSize;
6839 		size -= rangeSize;
6840 	}
6841 
6842 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6843 	// synchronize multiple mappings of the same file. In our VM they never get
6844 	// out of sync, though, so we don't have to do anything.
6845 
6846 	return B_OK;
6847 }
6848 
6849 
6850 status_t
6851 _user_memory_advice(void* _address, size_t size, uint32 advice)
6852 {
6853 	addr_t address = (addr_t)_address;
6854 	if ((address % B_PAGE_SIZE) != 0)
6855 		return B_BAD_VALUE;
6856 
6857 	size = PAGE_ALIGN(size);
6858 	if (!validate_user_memory_range(_address, size)) {
6859 		// weird error code required by POSIX
6860 		return B_NO_MEMORY;
6861 	}
6862 
6863 	switch (advice) {
6864 		case MADV_NORMAL:
6865 		case MADV_SEQUENTIAL:
6866 		case MADV_RANDOM:
6867 		case MADV_WILLNEED:
6868 		case MADV_DONTNEED:
6869 			// TODO: Implement!
6870 			break;
6871 
6872 		case MADV_FREE:
6873 		{
6874 			AddressSpaceWriteLocker locker;
6875 			do {
6876 				status_t status = locker.SetTo(team_get_current_team_id());
6877 				if (status != B_OK)
6878 					return status;
6879 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6880 					address, size, &locker));
6881 
6882 			discard_address_range(locker.AddressSpace(), address, size, false);
6883 			break;
6884 		}
6885 
6886 		default:
6887 			return B_BAD_VALUE;
6888 	}
6889 
6890 	return B_OK;
6891 }
6892 
6893 
6894 status_t
6895 _user_get_memory_properties(team_id teamID, const void* address,
6896 	uint32* _protected, uint32* _lock)
6897 {
6898 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6899 		return B_BAD_ADDRESS;
6900 
6901 	AddressSpaceReadLocker locker;
6902 	status_t error = locker.SetTo(teamID);
6903 	if (error != B_OK)
6904 		return error;
6905 
6906 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6907 	if (area == NULL)
6908 		return B_NO_MEMORY;
6909 
6910 	uint32 protection = get_area_page_protection(area, (addr_t)address);
6911 	uint32 wiring = area->wiring;
6912 
6913 	locker.Unlock();
6914 
6915 	error = user_memcpy(_protected, &protection, sizeof(protection));
6916 	if (error != B_OK)
6917 		return error;
6918 
6919 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6920 
6921 	return error;
6922 }
6923 
6924 
6925 static status_t
6926 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
6927 {
6928 #if ENABLE_SWAP_SUPPORT
6929 	// check address range
6930 	addr_t address = (addr_t)_address;
6931 	size = PAGE_ALIGN(size);
6932 
6933 	if ((address % B_PAGE_SIZE) != 0)
6934 		return EINVAL;
6935 	if (!validate_user_memory_range(_address, size))
6936 		return EINVAL;
6937 
6938 	const addr_t endAddress = address + size;
6939 
6940 	AddressSpaceReadLocker addressSpaceLocker;
6941 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
6942 	if (error != B_OK)
6943 		return error;
6944 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
6945 
6946 	// iterate through all concerned areas
6947 	addr_t nextAddress = address;
6948 	while (nextAddress != endAddress) {
6949 		// get the next area
6950 		VMArea* area = addressSpace->LookupArea(nextAddress);
6951 		if (area == NULL) {
6952 			error = B_BAD_ADDRESS;
6953 			break;
6954 		}
6955 
6956 		const addr_t areaStart = nextAddress;
6957 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
6958 		nextAddress = areaEnd;
6959 
6960 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6961 		if (error != B_OK) {
6962 			// We don't need to unset or reset things on failure.
6963 			break;
6964 		}
6965 
6966 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6967 		VMAnonymousCache* anonCache = NULL;
6968 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
6969 			// This memory will aready never be swapped. Nothing to do.
6970 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
6971 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
6972 				areaEnd - areaStart, swappable);
6973 		} else {
6974 			// Some other cache type? We cannot affect anything here.
6975 			error = EINVAL;
6976 		}
6977 
6978 		cacheChainLocker.Unlock();
6979 
6980 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6981 		if (error != B_OK)
6982 			break;
6983 	}
6984 
6985 	return error;
6986 #else
6987 	// No swap support? Nothing to do.
6988 	return B_OK;
6989 #endif
6990 }
6991 
6992 
6993 status_t
6994 _user_mlock(const void* _address, size_t size)
6995 {
6996 	return user_set_memory_swappable(_address, size, false);
6997 }
6998 
6999 
7000 status_t
7001 _user_munlock(const void* _address, size_t size)
7002 {
7003 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7004 	// if multiple clones of an area had mlock() called on them,
7005 	// munlock() must also be called on all of them to actually unlock.
7006 	// (At present, the first munlock() will unlock all.)
7007 	// TODO: fork() should automatically unlock memory in the child.
7008 	return user_set_memory_swappable(_address, size, true);
7009 }
7010 
7011 
7012 // #pragma mark -- compatibility
7013 
7014 
7015 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7016 
7017 
7018 struct physical_entry_beos {
7019 	uint32	address;
7020 	uint32	size;
7021 };
7022 
7023 
7024 /*!	The physical_entry structure has changed. We need to translate it to the
7025 	old one.
7026 */
7027 extern "C" int32
7028 __get_memory_map_beos(const void* _address, size_t numBytes,
7029 	physical_entry_beos* table, int32 numEntries)
7030 {
7031 	if (numEntries <= 0)
7032 		return B_BAD_VALUE;
7033 
7034 	const uint8* address = (const uint8*)_address;
7035 
7036 	int32 count = 0;
7037 	while (numBytes > 0 && count < numEntries) {
7038 		physical_entry entry;
7039 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7040 		if (result < 0) {
7041 			if (result != B_BUFFER_OVERFLOW)
7042 				return result;
7043 		}
7044 
7045 		if (entry.address >= (phys_addr_t)1 << 32) {
7046 			panic("get_memory_map(): Address is greater 4 GB!");
7047 			return B_ERROR;
7048 		}
7049 
7050 		table[count].address = entry.address;
7051 		table[count++].size = entry.size;
7052 
7053 		address += entry.size;
7054 		numBytes -= entry.size;
7055 	}
7056 
7057 	// null-terminate the table, if possible
7058 	if (count < numEntries) {
7059 		table[count].address = 0;
7060 		table[count].size = 0;
7061 	}
7062 
7063 	return B_OK;
7064 }
7065 
7066 
7067 /*!	The type of the \a physicalAddress parameter has changed from void* to
7068 	phys_addr_t.
7069 */
7070 extern "C" area_id
7071 __map_physical_memory_beos(const char* name, void* physicalAddress,
7072 	size_t numBytes, uint32 addressSpec, uint32 protection,
7073 	void** _virtualAddress)
7074 {
7075 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7076 		addressSpec, protection, _virtualAddress);
7077 }
7078 
7079 
7080 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7081 	we meddle with the \a lock parameter to force 32 bit.
7082 */
7083 extern "C" area_id
7084 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7085 	size_t size, uint32 lock, uint32 protection)
7086 {
7087 	switch (lock) {
7088 		case B_NO_LOCK:
7089 			break;
7090 		case B_FULL_LOCK:
7091 		case B_LAZY_LOCK:
7092 			lock = B_32_BIT_FULL_LOCK;
7093 			break;
7094 		case B_CONTIGUOUS:
7095 			lock = B_32_BIT_CONTIGUOUS;
7096 			break;
7097 	}
7098 
7099 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7100 		protection);
7101 }
7102 
7103 
7104 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7105 	"BASE");
7106 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7107 	"map_physical_memory@", "BASE");
7108 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7109 	"BASE");
7110 
7111 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7112 	"get_memory_map@@", "1_ALPHA3");
7113 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7114 	"map_physical_memory@@", "1_ALPHA3");
7115 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7116 	"1_ALPHA3");
7117 
7118 
7119 #else
7120 
7121 
7122 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7123 	"get_memory_map@@", "BASE");
7124 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7125 	"map_physical_memory@@", "BASE");
7126 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7127 	"BASE");
7128 
7129 
7130 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7131