xref: /haiku/src/system/kernel/vm/vm.cpp (revision a127b88ecbfab58f64944c98aa47722a18e363b2)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/ThreadAutoLock.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_priv.h>
53 #include <vm/VMAddressSpace.h>
54 #include <vm/VMArea.h>
55 #include <vm/VMCache.h>
56 
57 #include "VMAddressSpaceLocking.h"
58 #include "VMAnonymousCache.h"
59 #include "VMAnonymousNoSwapCache.h"
60 #include "IORequest.h"
61 
62 
63 //#define TRACE_VM
64 //#define TRACE_FAULTS
65 #ifdef TRACE_VM
66 #	define TRACE(x) dprintf x
67 #else
68 #	define TRACE(x) ;
69 #endif
70 #ifdef TRACE_FAULTS
71 #	define FTRACE(x) dprintf x
72 #else
73 #	define FTRACE(x) ;
74 #endif
75 
76 
77 namespace {
78 
79 class AreaCacheLocking {
80 public:
81 	inline bool Lock(VMCache* lockable)
82 	{
83 		return false;
84 	}
85 
86 	inline void Unlock(VMCache* lockable)
87 	{
88 		vm_area_put_locked_cache(lockable);
89 	}
90 };
91 
92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
93 public:
94 	inline AreaCacheLocker(VMCache* cache = NULL)
95 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
96 	{
97 	}
98 
99 	inline AreaCacheLocker(VMArea* area)
100 		: AutoLocker<VMCache, AreaCacheLocking>()
101 	{
102 		SetTo(area);
103 	}
104 
105 	inline void SetTo(VMCache* cache, bool alreadyLocked)
106 	{
107 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
108 	}
109 
110 	inline void SetTo(VMArea* area)
111 	{
112 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
113 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
114 	}
115 };
116 
117 
118 class VMCacheChainLocker {
119 public:
120 	VMCacheChainLocker()
121 		:
122 		fTopCache(NULL),
123 		fBottomCache(NULL)
124 	{
125 	}
126 
127 	VMCacheChainLocker(VMCache* topCache)
128 		:
129 		fTopCache(topCache),
130 		fBottomCache(topCache)
131 	{
132 	}
133 
134 	~VMCacheChainLocker()
135 	{
136 		Unlock();
137 	}
138 
139 	void SetTo(VMCache* topCache)
140 	{
141 		fTopCache = topCache;
142 		fBottomCache = topCache;
143 
144 		if (topCache != NULL)
145 			topCache->SetUserData(NULL);
146 	}
147 
148 	VMCache* LockSourceCache()
149 	{
150 		if (fBottomCache == NULL || fBottomCache->source == NULL)
151 			return NULL;
152 
153 		VMCache* previousCache = fBottomCache;
154 
155 		fBottomCache = fBottomCache->source;
156 		fBottomCache->Lock();
157 		fBottomCache->AcquireRefLocked();
158 		fBottomCache->SetUserData(previousCache);
159 
160 		return fBottomCache;
161 	}
162 
163 	void LockAllSourceCaches()
164 	{
165 		while (LockSourceCache() != NULL) {
166 		}
167 	}
168 
169 	void Unlock(VMCache* exceptCache = NULL)
170 	{
171 		if (fTopCache == NULL)
172 			return;
173 
174 		// Unlock caches in source -> consumer direction. This is important to
175 		// avoid double-locking and a reversal of locking order in case a cache
176 		// is eligable for merging.
177 		VMCache* cache = fBottomCache;
178 		while (cache != NULL) {
179 			VMCache* nextCache = (VMCache*)cache->UserData();
180 			if (cache != exceptCache)
181 				cache->ReleaseRefAndUnlock(cache != fTopCache);
182 
183 			if (cache == fTopCache)
184 				break;
185 
186 			cache = nextCache;
187 		}
188 
189 		fTopCache = NULL;
190 		fBottomCache = NULL;
191 	}
192 
193 	void UnlockKeepRefs(bool keepTopCacheLocked)
194 	{
195 		if (fTopCache == NULL)
196 			return;
197 
198 		VMCache* nextCache = fBottomCache;
199 		VMCache* cache = NULL;
200 
201 		while (keepTopCacheLocked
202 				? nextCache != fTopCache : cache != fTopCache) {
203 			cache = nextCache;
204 			nextCache = (VMCache*)cache->UserData();
205 			cache->Unlock(cache != fTopCache);
206 		}
207 	}
208 
209 	void RelockCaches(bool topCacheLocked)
210 	{
211 		if (fTopCache == NULL)
212 			return;
213 
214 		VMCache* nextCache = fTopCache;
215 		VMCache* cache = NULL;
216 		if (topCacheLocked) {
217 			cache = nextCache;
218 			nextCache = cache->source;
219 		}
220 
221 		while (cache != fBottomCache && nextCache != NULL) {
222 			VMCache* consumer = cache;
223 			cache = nextCache;
224 			nextCache = cache->source;
225 			cache->Lock();
226 			cache->SetUserData(consumer);
227 		}
228 	}
229 
230 private:
231 	VMCache*	fTopCache;
232 	VMCache*	fBottomCache;
233 };
234 
235 } // namespace
236 
237 
238 // The memory reserve an allocation of the certain priority must not touch.
239 static const size_t kMemoryReserveForPriority[] = {
240 	VM_MEMORY_RESERVE_USER,		// user
241 	VM_MEMORY_RESERVE_SYSTEM,	// system
242 	0							// VIP
243 };
244 
245 
246 ObjectCache* gPageMappingsObjectCache;
247 
248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
249 
250 static off_t sAvailableMemory;
251 static off_t sNeededMemory;
252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
253 static uint32 sPageFaults;
254 
255 static VMPhysicalPageMapper* sPhysicalPageMapper;
256 
257 #if DEBUG_CACHE_LIST
258 
259 struct cache_info {
260 	VMCache*	cache;
261 	addr_t		page_count;
262 	addr_t		committed;
263 };
264 
265 static const int kCacheInfoTableCount = 100 * 1024;
266 static cache_info* sCacheInfoTable;
267 
268 #endif	// DEBUG_CACHE_LIST
269 
270 
271 // function declarations
272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
273 	bool addressSpaceCleanup);
274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
275 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
276 static status_t map_backing_store(VMAddressSpace* addressSpace,
277 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
278 	int protection, int protectionMax, int mapping, uint32 flags,
279 	const virtual_address_restrictions* addressRestrictions, bool kernel,
280 	VMArea** _area, void** _virtualAddress);
281 static void fix_protection(uint32* protection);
282 
283 
284 //	#pragma mark -
285 
286 
287 #if VM_PAGE_FAULT_TRACING
288 
289 namespace VMPageFaultTracing {
290 
291 class PageFaultStart : public AbstractTraceEntry {
292 public:
293 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
294 		:
295 		fAddress(address),
296 		fPC(pc),
297 		fWrite(write),
298 		fUser(user)
299 	{
300 		Initialized();
301 	}
302 
303 	virtual void AddDump(TraceOutput& out)
304 	{
305 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
306 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
307 	}
308 
309 private:
310 	addr_t	fAddress;
311 	addr_t	fPC;
312 	bool	fWrite;
313 	bool	fUser;
314 };
315 
316 
317 // page fault errors
318 enum {
319 	PAGE_FAULT_ERROR_NO_AREA		= 0,
320 	PAGE_FAULT_ERROR_KERNEL_ONLY,
321 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
322 	PAGE_FAULT_ERROR_READ_PROTECTED,
323 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
324 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
325 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
326 };
327 
328 
329 class PageFaultError : public AbstractTraceEntry {
330 public:
331 	PageFaultError(area_id area, status_t error)
332 		:
333 		fArea(area),
334 		fError(error)
335 	{
336 		Initialized();
337 	}
338 
339 	virtual void AddDump(TraceOutput& out)
340 	{
341 		switch (fError) {
342 			case PAGE_FAULT_ERROR_NO_AREA:
343 				out.Print("page fault error: no area");
344 				break;
345 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
346 				out.Print("page fault error: area: %ld, kernel only", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
349 				out.Print("page fault error: area: %ld, write protected",
350 					fArea);
351 				break;
352 			case PAGE_FAULT_ERROR_READ_PROTECTED:
353 				out.Print("page fault error: area: %ld, read protected", fArea);
354 				break;
355 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
356 				out.Print("page fault error: area: %ld, execute protected",
357 					fArea);
358 				break;
359 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
360 				out.Print("page fault error: kernel touching bad user memory");
361 				break;
362 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
363 				out.Print("page fault error: no address space");
364 				break;
365 			default:
366 				out.Print("page fault error: area: %ld, error: %s", fArea,
367 					strerror(fError));
368 				break;
369 		}
370 	}
371 
372 private:
373 	area_id		fArea;
374 	status_t	fError;
375 };
376 
377 
378 class PageFaultDone : public AbstractTraceEntry {
379 public:
380 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
381 			vm_page* page)
382 		:
383 		fArea(area),
384 		fTopCache(topCache),
385 		fCache(cache),
386 		fPage(page)
387 	{
388 		Initialized();
389 	}
390 
391 	virtual void AddDump(TraceOutput& out)
392 	{
393 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
394 			"page: %p", fArea, fTopCache, fCache, fPage);
395 	}
396 
397 private:
398 	area_id		fArea;
399 	VMCache*	fTopCache;
400 	VMCache*	fCache;
401 	vm_page*	fPage;
402 };
403 
404 }	// namespace VMPageFaultTracing
405 
406 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
407 #else
408 #	define TPF(x) ;
409 #endif	// VM_PAGE_FAULT_TRACING
410 
411 
412 //	#pragma mark -
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 increment_page_wired_count(vm_page* page)
419 {
420 	if (!page->IsMapped())
421 		atomic_add(&gMappedPagesCount, 1);
422 	page->IncrementWiredCount();
423 }
424 
425 
426 /*!	The page's cache must be locked.
427 */
428 static inline void
429 decrement_page_wired_count(vm_page* page)
430 {
431 	page->DecrementWiredCount();
432 	if (!page->IsMapped())
433 		atomic_add(&gMappedPagesCount, -1);
434 }
435 
436 
437 static inline addr_t
438 virtual_page_address(VMArea* area, vm_page* page)
439 {
440 	return area->Base()
441 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
442 }
443 
444 
445 //! You need to have the address space locked when calling this function
446 static VMArea*
447 lookup_area(VMAddressSpace* addressSpace, area_id id)
448 {
449 	VMAreas::ReadLock();
450 
451 	VMArea* area = VMAreas::LookupLocked(id);
452 	if (area != NULL && area->address_space != addressSpace)
453 		area = NULL;
454 
455 	VMAreas::ReadUnlock();
456 
457 	return area;
458 }
459 
460 
461 static status_t
462 allocate_area_page_protections(VMArea* area)
463 {
464 	// In the page protections we store only the three user protections,
465 	// so we use 4 bits per page.
466 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
467 	area->page_protections = (uint8*)malloc_etc(bytes,
468 		area->address_space == VMAddressSpace::Kernel()
469 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
470 	if (area->page_protections == NULL)
471 		return B_NO_MEMORY;
472 
473 	// init the page protections for all pages to that of the area
474 	uint32 areaProtection = area->protection
475 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
476 	memset(area->page_protections, areaProtection | (areaProtection << 4),
477 		bytes);
478 	return B_OK;
479 }
480 
481 
482 static inline void
483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
484 {
485 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
486 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
487 	uint8& entry = area->page_protections[pageIndex / 2];
488 	if (pageIndex % 2 == 0)
489 		entry = (entry & 0xf0) | protection;
490 	else
491 		entry = (entry & 0x0f) | (protection << 4);
492 }
493 
494 
495 static inline uint32
496 get_area_page_protection(VMArea* area, addr_t pageAddress)
497 {
498 	if (area->page_protections == NULL)
499 		return area->protection;
500 
501 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
502 	uint32 protection = area->page_protections[pageIndex / 2];
503 	if (pageIndex % 2 == 0)
504 		protection &= 0x0f;
505 	else
506 		protection >>= 4;
507 
508 	uint32 kernelProtection = 0;
509 	if ((protection & B_READ_AREA) != 0)
510 		kernelProtection |= B_KERNEL_READ_AREA;
511 	if ((protection & B_WRITE_AREA) != 0)
512 		kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 	// If this is a kernel area we return only the kernel flags.
515 	if (area->address_space == VMAddressSpace::Kernel())
516 		return kernelProtection;
517 
518 	return protection | kernelProtection;
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
749 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
750 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
751 		if (error != B_OK) {
752 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
753 			return error;
754 		}
755 
756 		secondCache->Lock();
757 		secondCache->temporary = cache->temporary;
758 		secondCache->virtual_base = area->cache_offset;
759 		secondCache->virtual_end = area->cache_offset + secondSize;
760 
761 		// Transfer the concerned pages from the first cache.
762 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
763 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
764 			area->cache_offset);
765 
766 		if (error == B_OK) {
767 			// Since VMCache::Resize() can temporarily drop the lock, we must
768 			// unlock all lower caches to prevent locking order inversion.
769 			cacheChainLocker.Unlock(cache);
770 			cache->Resize(cache->virtual_base + firstNewSize, priority);
771 			// Don't unlock the cache yet because we might have to resize it
772 			// back.
773 
774 			// Map the second area.
775 			error = map_backing_store(addressSpace, secondCache,
776 				area->cache_offset, area->name, secondSize, area->wiring,
777 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
778 				&addressRestrictions, kernel, &secondArea, NULL);
779 		}
780 
781 		if (error != B_OK) {
782 			// Restore the original cache.
783 			cache->Resize(cache->virtual_base + oldSize, priority);
784 
785 			// Move the pages back.
786 			status_t readoptStatus = cache->Adopt(secondCache,
787 				area->cache_offset, secondSize, adoptOffset);
788 			if (readoptStatus != B_OK) {
789 				// Some (swap) pages have not been moved back and will be lost
790 				// once the second cache is deleted.
791 				panic("failed to restore cache range: %s",
792 					strerror(readoptStatus));
793 
794 				// TODO: Handle out of memory cases by freeing memory and
795 				// retrying.
796 			}
797 
798 			cache->ReleaseRefAndUnlock();
799 			secondCache->ReleaseRefAndUnlock();
800 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
801 			return error;
802 		}
803 
804 		// Now we can unlock it.
805 		cache->ReleaseRefAndUnlock();
806 		secondCache->Unlock();
807 	} else {
808 		error = map_backing_store(addressSpace, cache, area->cache_offset
809 			+ (secondBase - area->Base()),
810 			area->name, secondSize, area->wiring, area->protection,
811 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
812 			&addressRestrictions, kernel, &secondArea, NULL);
813 		if (error != B_OK) {
814 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
815 			return error;
816 		}
817 		// We need a cache reference for the new area.
818 		cache->AcquireRefLocked();
819 	}
820 
821 	if (_secondArea != NULL)
822 		*_secondArea = secondArea;
823 
824 	return B_OK;
825 }
826 
827 
828 /*!	Deletes or cuts all areas in the given address range.
829 	The address space must be write-locked.
830 	The caller must ensure that no part of the given range is wired.
831 */
832 static status_t
833 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
834 	bool kernel)
835 {
836 	size = PAGE_ALIGN(size);
837 
838 	// Check, whether the caller is allowed to modify the concerned areas.
839 	if (!kernel) {
840 		for (VMAddressSpace::AreaRangeIterator it
841 				= addressSpace->GetAreaRangeIterator(address, size);
842 			VMArea* area = it.Next();) {
843 
844 			if ((area->protection & B_KERNEL_AREA) != 0) {
845 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
846 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
847 					team_get_current_team_id(), area->id, area->name);
848 				return B_NOT_ALLOWED;
849 			}
850 		}
851 	}
852 
853 	for (VMAddressSpace::AreaRangeIterator it
854 			= addressSpace->GetAreaRangeIterator(address, size);
855 		VMArea* area = it.Next();) {
856 
857 		status_t error = cut_area(addressSpace, area, address, size, NULL,
858 			kernel);
859 		if (error != B_OK)
860 			return error;
861 			// Failing after already messing with areas is ugly, but we
862 			// can't do anything about it.
863 	}
864 
865 	return B_OK;
866 }
867 
868 
869 static status_t
870 discard_area_range(VMArea* area, addr_t address, addr_t size)
871 {
872 	addr_t offset;
873 	if (!intersect_area(area, address, size, offset))
874 		return B_OK;
875 
876 	// If someone else uses the area's cache or it's not an anonymous cache, we
877 	// can't discard.
878 	VMCache* cache = vm_area_get_locked_cache(area);
879 	if (cache->areas != area || area->cache_next != NULL
880 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
881 		return B_OK;
882 	}
883 
884 	VMCacheChainLocker cacheChainLocker(cache);
885 	cacheChainLocker.LockAllSourceCaches();
886 
887 	unmap_pages(area, address, size);
888 
889 	// Since VMCache::Discard() can temporarily drop the lock, we must
890 	// unlock all lower caches to prevent locking order inversion.
891 	cacheChainLocker.Unlock(cache);
892 	cache->Discard(cache->virtual_base + offset, size);
893 	cache->ReleaseRefAndUnlock();
894 
895 	return B_OK;
896 }
897 
898 
899 static status_t
900 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
901 	bool kernel)
902 {
903 	for (VMAddressSpace::AreaRangeIterator it
904 		= addressSpace->GetAreaRangeIterator(address, size);
905 			VMArea* area = it.Next();) {
906 		status_t error = discard_area_range(area, address, size);
907 		if (error != B_OK)
908 			return error;
909 	}
910 
911 	return B_OK;
912 }
913 
914 
915 /*! You need to hold the lock of the cache and the write lock of the address
916 	space when calling this function.
917 	Note, that in case of error your cache will be temporarily unlocked.
918 	If \a addressSpec is \c B_EXACT_ADDRESS and the
919 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
920 	that no part of the specified address range (base \c *_virtualAddress, size
921 	\a size) is wired. The cache will also be temporarily unlocked.
922 */
923 static status_t
924 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
925 	const char* areaName, addr_t size, int wiring, int protection,
926 	int protectionMax, int mapping,
927 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
928 	bool kernel, VMArea** _area, void** _virtualAddress)
929 {
930 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
931 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
932 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
933 		addressSpace, cache, addressRestrictions->address, offset, size,
934 		addressRestrictions->address_specification, wiring, protection,
935 		protectionMax, _area, areaName));
936 	cache->AssertLocked();
937 
938 	if (size == 0) {
939 #if KDEBUG
940 		panic("map_backing_store(): called with size=0 for area '%s'!",
941 			areaName);
942 #endif
943 		return B_BAD_VALUE;
944 	}
945 	if (offset < 0)
946 		return B_BAD_VALUE;
947 
948 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
949 		| HEAP_DONT_LOCK_KERNEL_SPACE;
950 	int priority;
951 	if (addressSpace != VMAddressSpace::Kernel()) {
952 		priority = VM_PRIORITY_USER;
953 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
954 		priority = VM_PRIORITY_VIP;
955 		allocationFlags |= HEAP_PRIORITY_VIP;
956 	} else
957 		priority = VM_PRIORITY_SYSTEM;
958 
959 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
960 		allocationFlags);
961 	if (mapping != REGION_PRIVATE_MAP)
962 		area->protection_max = protectionMax & B_USER_PROTECTION;
963 	if (area == NULL)
964 		return B_NO_MEMORY;
965 
966 	status_t status;
967 
968 	// if this is a private map, we need to create a new cache
969 	// to handle the private copies of pages as they are written to
970 	VMCache* sourceCache = cache;
971 	if (mapping == REGION_PRIVATE_MAP) {
972 		VMCache* newCache;
973 
974 		// create an anonymous cache
975 		status = VMCacheFactory::CreateAnonymousCache(newCache,
976 			(protection & B_STACK_AREA) != 0
977 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
978 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
979 		if (status != B_OK)
980 			goto err1;
981 
982 		newCache->Lock();
983 		newCache->temporary = 1;
984 		newCache->virtual_base = offset;
985 		newCache->virtual_end = offset + size;
986 
987 		cache->AddConsumer(newCache);
988 
989 		cache = newCache;
990 	}
991 
992 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
993 		status = cache->SetMinimalCommitment(size, priority);
994 		if (status != B_OK)
995 			goto err2;
996 	}
997 
998 	// check to see if this address space has entered DELETE state
999 	if (addressSpace->IsBeingDeleted()) {
1000 		// okay, someone is trying to delete this address space now, so we can't
1001 		// insert the area, so back out
1002 		status = B_BAD_TEAM_ID;
1003 		goto err2;
1004 	}
1005 
1006 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1007 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1008 		// temporarily unlock the current cache since it might be mapped to
1009 		// some existing area, and unmap_address_range also needs to lock that
1010 		// cache to delete the area.
1011 		cache->Unlock();
1012 		status = unmap_address_range(addressSpace,
1013 			(addr_t)addressRestrictions->address, size, kernel);
1014 		cache->Lock();
1015 		if (status != B_OK)
1016 			goto err2;
1017 	}
1018 
1019 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1020 		allocationFlags, _virtualAddress);
1021 	if (status == B_NO_MEMORY
1022 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1023 		// Due to how many locks are held, we cannot wait here for space to be
1024 		// freed up, but we can at least notify the low_resource handler.
1025 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1026 	}
1027 	if (status != B_OK)
1028 		goto err2;
1029 
1030 	// attach the cache to the area
1031 	area->cache = cache;
1032 	area->cache_offset = offset;
1033 
1034 	// point the cache back to the area
1035 	cache->InsertAreaLocked(area);
1036 	if (mapping == REGION_PRIVATE_MAP)
1037 		cache->Unlock();
1038 
1039 	// insert the area in the global areas map
1040 	VMAreas::Insert(area);
1041 
1042 	// grab a ref to the address space (the area holds this)
1043 	addressSpace->Get();
1044 
1045 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1046 //		cache, sourceCache, areaName, area);
1047 
1048 	*_area = area;
1049 	return B_OK;
1050 
1051 err2:
1052 	if (mapping == REGION_PRIVATE_MAP) {
1053 		// We created this cache, so we must delete it again. Note, that we
1054 		// need to temporarily unlock the source cache or we'll otherwise
1055 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1056 		sourceCache->Unlock();
1057 		cache->ReleaseRefAndUnlock();
1058 		sourceCache->Lock();
1059 	}
1060 err1:
1061 	addressSpace->DeleteArea(area, allocationFlags);
1062 	return status;
1063 }
1064 
1065 
1066 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1067 	  locker1, locker2).
1068 */
1069 template<typename LockerType1, typename LockerType2>
1070 static inline bool
1071 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1072 {
1073 	area->cache->AssertLocked();
1074 
1075 	VMAreaUnwiredWaiter waiter;
1076 	if (!area->AddWaiterIfWired(&waiter))
1077 		return false;
1078 
1079 	// unlock everything and wait
1080 	if (locker1 != NULL)
1081 		locker1->Unlock();
1082 	if (locker2 != NULL)
1083 		locker2->Unlock();
1084 
1085 	waiter.waitEntry.Wait();
1086 
1087 	return true;
1088 }
1089 
1090 
1091 /*!	Checks whether the given area has any wired ranges intersecting with the
1092 	specified range and waits, if so.
1093 
1094 	When it has to wait, the function calls \c Unlock() on both \a locker1
1095 	and \a locker2, if given.
1096 	The area's top cache must be locked and must be unlocked as a side effect
1097 	of calling \c Unlock() on either \a locker1 or \a locker2.
1098 
1099 	If the function does not have to wait it does not modify or unlock any
1100 	object.
1101 
1102 	\param area The area to be checked.
1103 	\param base The base address of the range to check.
1104 	\param size The size of the address range to check.
1105 	\param locker1 An object to be unlocked when before starting to wait (may
1106 		be \c NULL).
1107 	\param locker2 An object to be unlocked when before starting to wait (may
1108 		be \c NULL).
1109 	\return \c true, if the function had to wait, \c false otherwise.
1110 */
1111 template<typename LockerType1, typename LockerType2>
1112 static inline bool
1113 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1114 	LockerType1* locker1, LockerType2* locker2)
1115 {
1116 	area->cache->AssertLocked();
1117 
1118 	VMAreaUnwiredWaiter waiter;
1119 	if (!area->AddWaiterIfWired(&waiter, base, size))
1120 		return false;
1121 
1122 	// unlock everything and wait
1123 	if (locker1 != NULL)
1124 		locker1->Unlock();
1125 	if (locker2 != NULL)
1126 		locker2->Unlock();
1127 
1128 	waiter.waitEntry.Wait();
1129 
1130 	return true;
1131 }
1132 
1133 
1134 /*!	Checks whether the given address space has any wired ranges intersecting
1135 	with the specified range and waits, if so.
1136 
1137 	Similar to wait_if_area_range_is_wired(), with the following differences:
1138 	- All areas intersecting with the range are checked (respectively all until
1139 	  one is found that contains a wired range intersecting with the given
1140 	  range).
1141 	- The given address space must at least be read-locked and must be unlocked
1142 	  when \c Unlock() is called on \a locker.
1143 	- None of the areas' caches are allowed to be locked.
1144 */
1145 template<typename LockerType>
1146 static inline bool
1147 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1148 	size_t size, LockerType* locker)
1149 {
1150 	for (VMAddressSpace::AreaRangeIterator it
1151 		= addressSpace->GetAreaRangeIterator(base, size);
1152 			VMArea* area = it.Next();) {
1153 
1154 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1155 
1156 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1157 			return true;
1158 	}
1159 
1160 	return false;
1161 }
1162 
1163 
1164 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1165 	It must be called in a situation where the kernel address space may be
1166 	locked.
1167 */
1168 status_t
1169 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1170 {
1171 	AddressSpaceReadLocker locker;
1172 	VMArea* area;
1173 	status_t status = locker.SetFromArea(id, area);
1174 	if (status != B_OK)
1175 		return status;
1176 
1177 	if (area->page_protections == NULL) {
1178 		status = allocate_area_page_protections(area);
1179 		if (status != B_OK)
1180 			return status;
1181 	}
1182 
1183 	*cookie = (void*)area;
1184 	return B_OK;
1185 }
1186 
1187 
1188 /*!	This is a debug helper function that can only be used with very specific
1189 	use cases.
1190 	Sets protection for the given address range to the protection specified.
1191 	If \a protection is 0 then the involved pages will be marked non-present
1192 	in the translation map to cause a fault on access. The pages aren't
1193 	actually unmapped however so that they can be marked present again with
1194 	additional calls to this function. For this to work the area must be
1195 	fully locked in memory so that the pages aren't otherwise touched.
1196 	This function does not lock the kernel address space and needs to be
1197 	supplied with a \a cookie retrieved from a successful call to
1198 	vm_prepare_kernel_area_debug_protection().
1199 */
1200 status_t
1201 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1202 	uint32 protection)
1203 {
1204 	// check address range
1205 	addr_t address = (addr_t)_address;
1206 	size = PAGE_ALIGN(size);
1207 
1208 	if ((address % B_PAGE_SIZE) != 0
1209 		|| (addr_t)address + size < (addr_t)address
1210 		|| !IS_KERNEL_ADDRESS(address)
1211 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1212 		return B_BAD_VALUE;
1213 	}
1214 
1215 	// Translate the kernel protection to user protection as we only store that.
1216 	if ((protection & B_KERNEL_READ_AREA) != 0)
1217 		protection |= B_READ_AREA;
1218 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1219 		protection |= B_WRITE_AREA;
1220 
1221 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1222 	VMTranslationMap* map = addressSpace->TranslationMap();
1223 	VMArea* area = (VMArea*)cookie;
1224 
1225 	addr_t offset = address - area->Base();
1226 	if (area->Size() - offset < size) {
1227 		panic("protect range not fully within supplied area");
1228 		return B_BAD_VALUE;
1229 	}
1230 
1231 	if (area->page_protections == NULL) {
1232 		panic("area has no page protections");
1233 		return B_BAD_VALUE;
1234 	}
1235 
1236 	// Invalidate the mapping entries so any access to them will fault or
1237 	// restore the mapping entries unchanged so that lookup will success again.
1238 	map->Lock();
1239 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1240 	map->Unlock();
1241 
1242 	// And set the proper page protections so that the fault case will actually
1243 	// fail and not simply try to map a new page.
1244 	for (addr_t pageAddress = address; pageAddress < address + size;
1245 			pageAddress += B_PAGE_SIZE) {
1246 		set_area_page_protection(area, pageAddress, protection);
1247 	}
1248 
1249 	return B_OK;
1250 }
1251 
1252 
1253 status_t
1254 vm_block_address_range(const char* name, void* address, addr_t size)
1255 {
1256 	if (!arch_vm_supports_protection(0))
1257 		return B_NOT_SUPPORTED;
1258 
1259 	AddressSpaceWriteLocker locker;
1260 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1261 	if (status != B_OK)
1262 		return status;
1263 
1264 	VMAddressSpace* addressSpace = locker.AddressSpace();
1265 
1266 	// create an anonymous cache
1267 	VMCache* cache;
1268 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1269 		VM_PRIORITY_SYSTEM);
1270 	if (status != B_OK)
1271 		return status;
1272 
1273 	cache->temporary = 1;
1274 	cache->virtual_end = size;
1275 	cache->Lock();
1276 
1277 	VMArea* area;
1278 	virtual_address_restrictions addressRestrictions = {};
1279 	addressRestrictions.address = address;
1280 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1281 	status = map_backing_store(addressSpace, cache, 0, name, size,
1282 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1283 		true, &area, NULL);
1284 	if (status != B_OK) {
1285 		cache->ReleaseRefAndUnlock();
1286 		return status;
1287 	}
1288 
1289 	cache->Unlock();
1290 	area->cache_type = CACHE_TYPE_RAM;
1291 	return area->id;
1292 }
1293 
1294 
1295 status_t
1296 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1297 {
1298 	AddressSpaceWriteLocker locker(team);
1299 	if (!locker.IsLocked())
1300 		return B_BAD_TEAM_ID;
1301 
1302 	VMAddressSpace* addressSpace = locker.AddressSpace();
1303 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1304 		addressSpace == VMAddressSpace::Kernel()
1305 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1306 }
1307 
1308 
1309 status_t
1310 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1311 	addr_t size, uint32 flags)
1312 {
1313 	if (size == 0)
1314 		return B_BAD_VALUE;
1315 
1316 	AddressSpaceWriteLocker locker(team);
1317 	if (!locker.IsLocked())
1318 		return B_BAD_TEAM_ID;
1319 
1320 	virtual_address_restrictions addressRestrictions = {};
1321 	addressRestrictions.address = *_address;
1322 	addressRestrictions.address_specification = addressSpec;
1323 	VMAddressSpace* addressSpace = locker.AddressSpace();
1324 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1325 		addressSpace == VMAddressSpace::Kernel()
1326 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1327 		_address);
1328 }
1329 
1330 
1331 area_id
1332 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1333 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1334 	const virtual_address_restrictions* virtualAddressRestrictions,
1335 	const physical_address_restrictions* physicalAddressRestrictions,
1336 	bool kernel, void** _address)
1337 {
1338 	VMArea* area;
1339 	VMCache* cache;
1340 	vm_page* page = NULL;
1341 	bool isStack = (protection & B_STACK_AREA) != 0;
1342 	page_num_t guardPages;
1343 	bool canOvercommit = false;
1344 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1345 		? VM_PAGE_ALLOC_CLEAR : 0;
1346 
1347 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1348 		team, name, size));
1349 
1350 	size = PAGE_ALIGN(size);
1351 	guardSize = PAGE_ALIGN(guardSize);
1352 	guardPages = guardSize / B_PAGE_SIZE;
1353 
1354 	if (size == 0 || size < guardSize)
1355 		return B_BAD_VALUE;
1356 	if (!arch_vm_supports_protection(protection))
1357 		return B_NOT_SUPPORTED;
1358 
1359 	if (team == B_CURRENT_TEAM)
1360 		team = VMAddressSpace::CurrentID();
1361 	if (team < 0)
1362 		return B_BAD_TEAM_ID;
1363 
1364 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1365 		canOvercommit = true;
1366 
1367 #ifdef DEBUG_KERNEL_STACKS
1368 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1369 		isStack = true;
1370 #endif
1371 
1372 	// check parameters
1373 	switch (virtualAddressRestrictions->address_specification) {
1374 		case B_ANY_ADDRESS:
1375 		case B_EXACT_ADDRESS:
1376 		case B_BASE_ADDRESS:
1377 		case B_ANY_KERNEL_ADDRESS:
1378 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1379 		case B_RANDOMIZED_ANY_ADDRESS:
1380 		case B_RANDOMIZED_BASE_ADDRESS:
1381 			break;
1382 
1383 		default:
1384 			return B_BAD_VALUE;
1385 	}
1386 
1387 	// If low or high physical address restrictions are given, we force
1388 	// B_CONTIGUOUS wiring, since only then we'll use
1389 	// vm_page_allocate_page_run() which deals with those restrictions.
1390 	if (physicalAddressRestrictions->low_address != 0
1391 		|| physicalAddressRestrictions->high_address != 0) {
1392 		wiring = B_CONTIGUOUS;
1393 	}
1394 
1395 	physical_address_restrictions stackPhysicalRestrictions;
1396 	bool doReserveMemory = false;
1397 	switch (wiring) {
1398 		case B_NO_LOCK:
1399 			break;
1400 		case B_FULL_LOCK:
1401 		case B_LAZY_LOCK:
1402 		case B_CONTIGUOUS:
1403 			doReserveMemory = true;
1404 			break;
1405 		case B_ALREADY_WIRED:
1406 			break;
1407 		case B_LOMEM:
1408 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1409 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1410 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1411 			wiring = B_CONTIGUOUS;
1412 			doReserveMemory = true;
1413 			break;
1414 		case B_32_BIT_FULL_LOCK:
1415 			if (B_HAIKU_PHYSICAL_BITS <= 32
1416 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1417 				wiring = B_FULL_LOCK;
1418 				doReserveMemory = true;
1419 				break;
1420 			}
1421 			// TODO: We don't really support this mode efficiently. Just fall
1422 			// through for now ...
1423 		case B_32_BIT_CONTIGUOUS:
1424 			#if B_HAIKU_PHYSICAL_BITS > 32
1425 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1426 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1427 					stackPhysicalRestrictions.high_address
1428 						= (phys_addr_t)1 << 32;
1429 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1430 				}
1431 			#endif
1432 			wiring = B_CONTIGUOUS;
1433 			doReserveMemory = true;
1434 			break;
1435 		default:
1436 			return B_BAD_VALUE;
1437 	}
1438 
1439 	// Optimization: For a single-page contiguous allocation without low/high
1440 	// memory restriction B_FULL_LOCK wiring suffices.
1441 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1442 		&& physicalAddressRestrictions->low_address == 0
1443 		&& physicalAddressRestrictions->high_address == 0) {
1444 		wiring = B_FULL_LOCK;
1445 	}
1446 
1447 	// For full lock or contiguous areas we're also going to map the pages and
1448 	// thus need to reserve pages for the mapping backend upfront.
1449 	addr_t reservedMapPages = 0;
1450 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1451 		AddressSpaceWriteLocker locker;
1452 		status_t status = locker.SetTo(team);
1453 		if (status != B_OK)
1454 			return status;
1455 
1456 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1457 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1458 	}
1459 
1460 	int priority;
1461 	if (team != VMAddressSpace::KernelID())
1462 		priority = VM_PRIORITY_USER;
1463 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1464 		priority = VM_PRIORITY_VIP;
1465 	else
1466 		priority = VM_PRIORITY_SYSTEM;
1467 
1468 	// Reserve memory before acquiring the address space lock. This reduces the
1469 	// chances of failure, since while holding the write lock to the address
1470 	// space (if it is the kernel address space that is), the low memory handler
1471 	// won't be able to free anything for us.
1472 	addr_t reservedMemory = 0;
1473 	if (doReserveMemory) {
1474 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1475 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1476 			return B_NO_MEMORY;
1477 		reservedMemory = size;
1478 		// TODO: We don't reserve the memory for the pages for the page
1479 		// directories/tables. We actually need to do since we currently don't
1480 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1481 		// there are actually less physical pages than there should be, which
1482 		// can get the VM into trouble in low memory situations.
1483 	}
1484 
1485 	AddressSpaceWriteLocker locker;
1486 	VMAddressSpace* addressSpace;
1487 	status_t status;
1488 
1489 	// For full lock areas reserve the pages before locking the address
1490 	// space. E.g. block caches can't release their memory while we hold the
1491 	// address space lock.
1492 	page_num_t reservedPages = reservedMapPages;
1493 	if (wiring == B_FULL_LOCK)
1494 		reservedPages += size / B_PAGE_SIZE;
1495 
1496 	vm_page_reservation reservation;
1497 	if (reservedPages > 0) {
1498 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1499 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1500 					priority)) {
1501 				reservedPages = 0;
1502 				status = B_WOULD_BLOCK;
1503 				goto err0;
1504 			}
1505 		} else
1506 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1507 	}
1508 
1509 	if (wiring == B_CONTIGUOUS) {
1510 		// we try to allocate the page run here upfront as this may easily
1511 		// fail for obvious reasons
1512 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1513 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1514 		if (page == NULL) {
1515 			status = B_NO_MEMORY;
1516 			goto err0;
1517 		}
1518 	}
1519 
1520 	// Lock the address space and, if B_EXACT_ADDRESS and
1521 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1522 	// is not wired.
1523 	do {
1524 		status = locker.SetTo(team);
1525 		if (status != B_OK)
1526 			goto err1;
1527 
1528 		addressSpace = locker.AddressSpace();
1529 	} while (virtualAddressRestrictions->address_specification
1530 			== B_EXACT_ADDRESS
1531 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1532 		&& wait_if_address_range_is_wired(addressSpace,
1533 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1534 
1535 	// create an anonymous cache
1536 	// if it's a stack, make sure that two pages are available at least
1537 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1538 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1539 		wiring == B_NO_LOCK, priority);
1540 	if (status != B_OK)
1541 		goto err1;
1542 
1543 	cache->temporary = 1;
1544 	cache->virtual_end = size;
1545 	cache->committed_size = reservedMemory;
1546 		// TODO: This should be done via a method.
1547 	reservedMemory = 0;
1548 
1549 	cache->Lock();
1550 
1551 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1552 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1553 		virtualAddressRestrictions, kernel, &area, _address);
1554 
1555 	if (status != B_OK) {
1556 		cache->ReleaseRefAndUnlock();
1557 		goto err1;
1558 	}
1559 
1560 	locker.DegradeToReadLock();
1561 
1562 	switch (wiring) {
1563 		case B_NO_LOCK:
1564 		case B_LAZY_LOCK:
1565 			// do nothing - the pages are mapped in as needed
1566 			break;
1567 
1568 		case B_FULL_LOCK:
1569 		{
1570 			// Allocate and map all pages for this area
1571 
1572 			off_t offset = 0;
1573 			for (addr_t address = area->Base();
1574 					address < area->Base() + (area->Size() - 1);
1575 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1576 #ifdef DEBUG_KERNEL_STACKS
1577 #	ifdef STACK_GROWS_DOWNWARDS
1578 				if (isStack && address < area->Base()
1579 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1580 #	else
1581 				if (isStack && address >= area->Base() + area->Size()
1582 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1583 #	endif
1584 					continue;
1585 #endif
1586 				vm_page* page = vm_page_allocate_page(&reservation,
1587 					PAGE_STATE_WIRED | pageAllocFlags);
1588 				cache->InsertPage(page, offset);
1589 				map_page(area, page, address, protection, &reservation);
1590 
1591 				DEBUG_PAGE_ACCESS_END(page);
1592 			}
1593 
1594 			break;
1595 		}
1596 
1597 		case B_ALREADY_WIRED:
1598 		{
1599 			// The pages should already be mapped. This is only really useful
1600 			// during boot time. Find the appropriate vm_page objects and stick
1601 			// them in the cache object.
1602 			VMTranslationMap* map = addressSpace->TranslationMap();
1603 			off_t offset = 0;
1604 
1605 			if (!gKernelStartup)
1606 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1607 
1608 			map->Lock();
1609 
1610 			for (addr_t virtualAddress = area->Base();
1611 					virtualAddress < area->Base() + (area->Size() - 1);
1612 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1613 				phys_addr_t physicalAddress;
1614 				uint32 flags;
1615 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1616 				if (status < B_OK) {
1617 					panic("looking up mapping failed for va 0x%lx\n",
1618 						virtualAddress);
1619 				}
1620 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1621 				if (page == NULL) {
1622 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1623 						"\n", physicalAddress);
1624 				}
1625 
1626 				DEBUG_PAGE_ACCESS_START(page);
1627 
1628 				cache->InsertPage(page, offset);
1629 				increment_page_wired_count(page);
1630 				vm_page_set_state(page, PAGE_STATE_WIRED);
1631 				page->busy = false;
1632 
1633 				DEBUG_PAGE_ACCESS_END(page);
1634 			}
1635 
1636 			map->Unlock();
1637 			break;
1638 		}
1639 
1640 		case B_CONTIGUOUS:
1641 		{
1642 			// We have already allocated our continuous pages run, so we can now
1643 			// just map them in the address space
1644 			VMTranslationMap* map = addressSpace->TranslationMap();
1645 			phys_addr_t physicalAddress
1646 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1647 			addr_t virtualAddress = area->Base();
1648 			off_t offset = 0;
1649 
1650 			map->Lock();
1651 
1652 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1653 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1654 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1655 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1656 				if (page == NULL)
1657 					panic("couldn't lookup physical page just allocated\n");
1658 
1659 				status = map->Map(virtualAddress, physicalAddress, protection,
1660 					area->MemoryType(), &reservation);
1661 				if (status < B_OK)
1662 					panic("couldn't map physical page in page run\n");
1663 
1664 				cache->InsertPage(page, offset);
1665 				increment_page_wired_count(page);
1666 
1667 				DEBUG_PAGE_ACCESS_END(page);
1668 			}
1669 
1670 			map->Unlock();
1671 			break;
1672 		}
1673 
1674 		default:
1675 			break;
1676 	}
1677 
1678 	cache->Unlock();
1679 
1680 	if (reservedPages > 0)
1681 		vm_page_unreserve_pages(&reservation);
1682 
1683 	TRACE(("vm_create_anonymous_area: done\n"));
1684 
1685 	area->cache_type = CACHE_TYPE_RAM;
1686 	return area->id;
1687 
1688 err1:
1689 	if (wiring == B_CONTIGUOUS) {
1690 		// we had reserved the area space upfront...
1691 		phys_addr_t pageNumber = page->physical_page_number;
1692 		int32 i;
1693 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1694 			page = vm_lookup_page(pageNumber);
1695 			if (page == NULL)
1696 				panic("couldn't lookup physical page just allocated\n");
1697 
1698 			vm_page_set_state(page, PAGE_STATE_FREE);
1699 		}
1700 	}
1701 
1702 err0:
1703 	if (reservedPages > 0)
1704 		vm_page_unreserve_pages(&reservation);
1705 	if (reservedMemory > 0)
1706 		vm_unreserve_memory(reservedMemory);
1707 
1708 	return status;
1709 }
1710 
1711 
1712 area_id
1713 vm_map_physical_memory(team_id team, const char* name, void** _address,
1714 	uint32 addressSpec, addr_t size, uint32 protection,
1715 	phys_addr_t physicalAddress, bool alreadyWired)
1716 {
1717 	VMArea* area;
1718 	VMCache* cache;
1719 	addr_t mapOffset;
1720 
1721 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1722 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1723 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1724 		addressSpec, size, protection, physicalAddress));
1725 
1726 	if (!arch_vm_supports_protection(protection))
1727 		return B_NOT_SUPPORTED;
1728 
1729 	AddressSpaceWriteLocker locker(team);
1730 	if (!locker.IsLocked())
1731 		return B_BAD_TEAM_ID;
1732 
1733 	// if the physical address is somewhat inside a page,
1734 	// move the actual area down to align on a page boundary
1735 	mapOffset = physicalAddress % B_PAGE_SIZE;
1736 	size += mapOffset;
1737 	physicalAddress -= mapOffset;
1738 
1739 	size = PAGE_ALIGN(size);
1740 
1741 	// create a device cache
1742 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1743 	if (status != B_OK)
1744 		return status;
1745 
1746 	cache->virtual_end = size;
1747 
1748 	cache->Lock();
1749 
1750 	virtual_address_restrictions addressRestrictions = {};
1751 	addressRestrictions.address = *_address;
1752 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1753 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1754 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1755 		true, &area, _address);
1756 
1757 	if (status < B_OK)
1758 		cache->ReleaseRefLocked();
1759 
1760 	cache->Unlock();
1761 
1762 	if (status == B_OK) {
1763 		// set requested memory type -- use uncached, if not given
1764 		uint32 memoryType = addressSpec & B_MTR_MASK;
1765 		if (memoryType == 0)
1766 			memoryType = B_MTR_UC;
1767 
1768 		area->SetMemoryType(memoryType);
1769 
1770 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1771 		if (status != B_OK)
1772 			delete_area(locker.AddressSpace(), area, false);
1773 	}
1774 
1775 	if (status != B_OK)
1776 		return status;
1777 
1778 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1779 
1780 	if (alreadyWired) {
1781 		// The area is already mapped, but possibly not with the right
1782 		// memory type.
1783 		map->Lock();
1784 		map->ProtectArea(area, area->protection);
1785 		map->Unlock();
1786 	} else {
1787 		// Map the area completely.
1788 
1789 		// reserve pages needed for the mapping
1790 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1791 			area->Base() + (size - 1));
1792 		vm_page_reservation reservation;
1793 		vm_page_reserve_pages(&reservation, reservePages,
1794 			team == VMAddressSpace::KernelID()
1795 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1796 
1797 		map->Lock();
1798 
1799 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1800 			map->Map(area->Base() + offset, physicalAddress + offset,
1801 				protection, area->MemoryType(), &reservation);
1802 		}
1803 
1804 		map->Unlock();
1805 
1806 		vm_page_unreserve_pages(&reservation);
1807 	}
1808 
1809 	// modify the pointer returned to be offset back into the new area
1810 	// the same way the physical address in was offset
1811 	*_address = (void*)((addr_t)*_address + mapOffset);
1812 
1813 	area->cache_type = CACHE_TYPE_DEVICE;
1814 	return area->id;
1815 }
1816 
1817 
1818 /*!	Don't use!
1819 	TODO: This function was introduced to map physical page vecs to
1820 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1821 	use a device cache and does not track vm_page::wired_count!
1822 */
1823 area_id
1824 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1825 	uint32 addressSpec, addr_t* _size, uint32 protection,
1826 	struct generic_io_vec* vecs, uint32 vecCount)
1827 {
1828 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1829 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1830 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1831 		addressSpec, _size, protection, vecs, vecCount));
1832 
1833 	if (!arch_vm_supports_protection(protection)
1834 		|| (addressSpec & B_MTR_MASK) != 0) {
1835 		return B_NOT_SUPPORTED;
1836 	}
1837 
1838 	AddressSpaceWriteLocker locker(team);
1839 	if (!locker.IsLocked())
1840 		return B_BAD_TEAM_ID;
1841 
1842 	if (vecCount == 0)
1843 		return B_BAD_VALUE;
1844 
1845 	addr_t size = 0;
1846 	for (uint32 i = 0; i < vecCount; i++) {
1847 		if (vecs[i].base % B_PAGE_SIZE != 0
1848 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1849 			return B_BAD_VALUE;
1850 		}
1851 
1852 		size += vecs[i].length;
1853 	}
1854 
1855 	// create a device cache
1856 	VMCache* cache;
1857 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1858 	if (result != B_OK)
1859 		return result;
1860 
1861 	cache->virtual_end = size;
1862 
1863 	cache->Lock();
1864 
1865 	VMArea* area;
1866 	virtual_address_restrictions addressRestrictions = {};
1867 	addressRestrictions.address = *_address;
1868 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1869 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1870 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1871 		&addressRestrictions, true, &area, _address);
1872 
1873 	if (result != B_OK)
1874 		cache->ReleaseRefLocked();
1875 
1876 	cache->Unlock();
1877 
1878 	if (result != B_OK)
1879 		return result;
1880 
1881 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1882 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1883 		area->Base() + (size - 1));
1884 
1885 	vm_page_reservation reservation;
1886 	vm_page_reserve_pages(&reservation, reservePages,
1887 			team == VMAddressSpace::KernelID()
1888 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1889 	map->Lock();
1890 
1891 	uint32 vecIndex = 0;
1892 	size_t vecOffset = 0;
1893 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1894 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1895 			vecOffset = 0;
1896 			vecIndex++;
1897 		}
1898 
1899 		if (vecIndex >= vecCount)
1900 			break;
1901 
1902 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1903 			protection, area->MemoryType(), &reservation);
1904 
1905 		vecOffset += B_PAGE_SIZE;
1906 	}
1907 
1908 	map->Unlock();
1909 	vm_page_unreserve_pages(&reservation);
1910 
1911 	if (_size != NULL)
1912 		*_size = size;
1913 
1914 	area->cache_type = CACHE_TYPE_DEVICE;
1915 	return area->id;
1916 }
1917 
1918 
1919 area_id
1920 vm_create_null_area(team_id team, const char* name, void** address,
1921 	uint32 addressSpec, addr_t size, uint32 flags)
1922 {
1923 	size = PAGE_ALIGN(size);
1924 
1925 	// Lock the address space and, if B_EXACT_ADDRESS and
1926 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1927 	// is not wired.
1928 	AddressSpaceWriteLocker locker;
1929 	do {
1930 		if (locker.SetTo(team) != B_OK)
1931 			return B_BAD_TEAM_ID;
1932 	} while (addressSpec == B_EXACT_ADDRESS
1933 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1934 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1935 			(addr_t)*address, size, &locker));
1936 
1937 	// create a null cache
1938 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1939 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1940 	VMCache* cache;
1941 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1942 	if (status != B_OK)
1943 		return status;
1944 
1945 	cache->temporary = 1;
1946 	cache->virtual_end = size;
1947 
1948 	cache->Lock();
1949 
1950 	VMArea* area;
1951 	virtual_address_restrictions addressRestrictions = {};
1952 	addressRestrictions.address = *address;
1953 	addressRestrictions.address_specification = addressSpec;
1954 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1955 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
1956 		REGION_NO_PRIVATE_MAP, flags,
1957 		&addressRestrictions, true, &area, address);
1958 
1959 	if (status < B_OK) {
1960 		cache->ReleaseRefAndUnlock();
1961 		return status;
1962 	}
1963 
1964 	cache->Unlock();
1965 
1966 	area->cache_type = CACHE_TYPE_NULL;
1967 	return area->id;
1968 }
1969 
1970 
1971 /*!	Creates the vnode cache for the specified \a vnode.
1972 	The vnode has to be marked busy when calling this function.
1973 */
1974 status_t
1975 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1976 {
1977 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1978 }
1979 
1980 
1981 /*!	\a cache must be locked. The area's address space must be read-locked.
1982 */
1983 static void
1984 pre_map_area_pages(VMArea* area, VMCache* cache,
1985 	vm_page_reservation* reservation)
1986 {
1987 	addr_t baseAddress = area->Base();
1988 	addr_t cacheOffset = area->cache_offset;
1989 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1990 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1991 
1992 	for (VMCachePagesTree::Iterator it
1993 				= cache->pages.GetIterator(firstPage, true, true);
1994 			vm_page* page = it.Next();) {
1995 		if (page->cache_offset >= endPage)
1996 			break;
1997 
1998 		// skip busy and inactive pages
1999 		if (page->busy || page->usage_count == 0)
2000 			continue;
2001 
2002 		DEBUG_PAGE_ACCESS_START(page);
2003 		map_page(area, page,
2004 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2005 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2006 		DEBUG_PAGE_ACCESS_END(page);
2007 	}
2008 }
2009 
2010 
2011 /*!	Will map the file specified by \a fd to an area in memory.
2012 	The file will be mirrored beginning at the specified \a offset. The
2013 	\a offset and \a size arguments have to be page aligned.
2014 */
2015 static area_id
2016 _vm_map_file(team_id team, const char* name, void** _address,
2017 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2018 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2019 {
2020 	// TODO: for binary files, we want to make sure that they get the
2021 	//	copy of a file at a given time, ie. later changes should not
2022 	//	make it into the mapped copy -- this will need quite some changes
2023 	//	to be done in a nice way
2024 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2025 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2026 
2027 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2028 	size = PAGE_ALIGN(size);
2029 
2030 	if (mapping == REGION_NO_PRIVATE_MAP)
2031 		protection |= B_SHARED_AREA;
2032 	if (addressSpec != B_EXACT_ADDRESS)
2033 		unmapAddressRange = false;
2034 
2035 	if (fd < 0) {
2036 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2037 		virtual_address_restrictions virtualRestrictions = {};
2038 		virtualRestrictions.address = *_address;
2039 		virtualRestrictions.address_specification = addressSpec;
2040 		physical_address_restrictions physicalRestrictions = {};
2041 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2042 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2043 			_address);
2044 	}
2045 
2046 	// get the open flags of the FD
2047 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2048 	if (descriptor == NULL)
2049 		return EBADF;
2050 	int32 openMode = descriptor->open_mode;
2051 	put_fd(descriptor);
2052 
2053 	// The FD must open for reading at any rate. For shared mapping with write
2054 	// access, additionally the FD must be open for writing.
2055 	if ((openMode & O_ACCMODE) == O_WRONLY
2056 		|| (mapping == REGION_NO_PRIVATE_MAP
2057 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2058 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2059 		return EACCES;
2060 	}
2061 
2062 	uint32 protectionMax = 0;
2063 	if (mapping != REGION_PRIVATE_MAP) {
2064 		if ((openMode & O_ACCMODE) == O_RDWR)
2065 			protectionMax = protection | B_USER_PROTECTION;
2066 		else
2067 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2068 	}
2069 
2070 	// get the vnode for the object, this also grabs a ref to it
2071 	struct vnode* vnode = NULL;
2072 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2073 	if (status < B_OK)
2074 		return status;
2075 	VnodePutter vnodePutter(vnode);
2076 
2077 	// If we're going to pre-map pages, we need to reserve the pages needed by
2078 	// the mapping backend upfront.
2079 	page_num_t reservedPreMapPages = 0;
2080 	vm_page_reservation reservation;
2081 	if ((protection & B_READ_AREA) != 0) {
2082 		AddressSpaceWriteLocker locker;
2083 		status = locker.SetTo(team);
2084 		if (status != B_OK)
2085 			return status;
2086 
2087 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2088 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2089 
2090 		locker.Unlock();
2091 
2092 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2093 			team == VMAddressSpace::KernelID()
2094 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2095 	}
2096 
2097 	struct PageUnreserver {
2098 		PageUnreserver(vm_page_reservation* reservation)
2099 			:
2100 			fReservation(reservation)
2101 		{
2102 		}
2103 
2104 		~PageUnreserver()
2105 		{
2106 			if (fReservation != NULL)
2107 				vm_page_unreserve_pages(fReservation);
2108 		}
2109 
2110 		vm_page_reservation* fReservation;
2111 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2112 
2113 	// Lock the address space and, if the specified address range shall be
2114 	// unmapped, ensure it is not wired.
2115 	AddressSpaceWriteLocker locker;
2116 	do {
2117 		if (locker.SetTo(team) != B_OK)
2118 			return B_BAD_TEAM_ID;
2119 	} while (unmapAddressRange
2120 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2121 			(addr_t)*_address, size, &locker));
2122 
2123 	// TODO: this only works for file systems that use the file cache
2124 	VMCache* cache;
2125 	status = vfs_get_vnode_cache(vnode, &cache, false);
2126 	if (status < B_OK)
2127 		return status;
2128 
2129 	cache->Lock();
2130 
2131 	VMArea* area;
2132 	virtual_address_restrictions addressRestrictions = {};
2133 	addressRestrictions.address = *_address;
2134 	addressRestrictions.address_specification = addressSpec;
2135 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2136 		0, protection, protectionMax, mapping,
2137 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2138 		&addressRestrictions, kernel, &area, _address);
2139 
2140 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2141 		// map_backing_store() cannot know we no longer need the ref
2142 		cache->ReleaseRefLocked();
2143 	}
2144 
2145 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2146 		pre_map_area_pages(area, cache, &reservation);
2147 
2148 	cache->Unlock();
2149 
2150 	if (status == B_OK) {
2151 		// TODO: this probably deserves a smarter solution, ie. don't always
2152 		// prefetch stuff, and also, probably don't trigger it at this place.
2153 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2154 			// prefetches at max 10 MB starting from "offset"
2155 	}
2156 
2157 	if (status != B_OK)
2158 		return status;
2159 
2160 	area->cache_type = CACHE_TYPE_VNODE;
2161 	return area->id;
2162 }
2163 
2164 
2165 area_id
2166 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2167 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2168 	int fd, off_t offset)
2169 {
2170 	if (!arch_vm_supports_protection(protection))
2171 		return B_NOT_SUPPORTED;
2172 
2173 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2174 		mapping, unmapAddressRange, fd, offset, true);
2175 }
2176 
2177 
2178 VMCache*
2179 vm_area_get_locked_cache(VMArea* area)
2180 {
2181 	rw_lock_read_lock(&sAreaCacheLock);
2182 
2183 	while (true) {
2184 		VMCache* cache = area->cache;
2185 
2186 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2187 			// cache has been deleted
2188 			rw_lock_read_lock(&sAreaCacheLock);
2189 			continue;
2190 		}
2191 
2192 		rw_lock_read_lock(&sAreaCacheLock);
2193 
2194 		if (cache == area->cache) {
2195 			cache->AcquireRefLocked();
2196 			rw_lock_read_unlock(&sAreaCacheLock);
2197 			return cache;
2198 		}
2199 
2200 		// the cache changed in the meantime
2201 		cache->Unlock();
2202 	}
2203 }
2204 
2205 
2206 void
2207 vm_area_put_locked_cache(VMCache* cache)
2208 {
2209 	cache->ReleaseRefAndUnlock();
2210 }
2211 
2212 
2213 area_id
2214 vm_clone_area(team_id team, const char* name, void** address,
2215 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2216 	bool kernel)
2217 {
2218 	VMArea* newArea = NULL;
2219 	VMArea* sourceArea;
2220 
2221 	// Check whether the source area exists and is cloneable. If so, mark it
2222 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2223 	{
2224 		AddressSpaceWriteLocker locker;
2225 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2226 		if (status != B_OK)
2227 			return status;
2228 
2229 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2230 			return B_NOT_ALLOWED;
2231 
2232 		sourceArea->protection |= B_SHARED_AREA;
2233 		protection |= B_SHARED_AREA;
2234 	}
2235 
2236 	// Now lock both address spaces and actually do the cloning.
2237 
2238 	MultiAddressSpaceLocker locker;
2239 	VMAddressSpace* sourceAddressSpace;
2240 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2241 	if (status != B_OK)
2242 		return status;
2243 
2244 	VMAddressSpace* targetAddressSpace;
2245 	status = locker.AddTeam(team, true, &targetAddressSpace);
2246 	if (status != B_OK)
2247 		return status;
2248 
2249 	status = locker.Lock();
2250 	if (status != B_OK)
2251 		return status;
2252 
2253 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2254 	if (sourceArea == NULL)
2255 		return B_BAD_VALUE;
2256 
2257 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2258 		return B_NOT_ALLOWED;
2259 
2260 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2261 
2262 	if (!kernel && sourceAddressSpace != targetAddressSpace
2263 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2264 #if KDEBUG
2265 		Team* team = thread_get_current_thread()->team;
2266 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2267 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2268 #endif
2269 		status = B_NOT_ALLOWED;
2270 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2271 		status = B_NOT_ALLOWED;
2272 	} else {
2273 		virtual_address_restrictions addressRestrictions = {};
2274 		addressRestrictions.address = *address;
2275 		addressRestrictions.address_specification = addressSpec;
2276 		status = map_backing_store(targetAddressSpace, cache,
2277 			sourceArea->cache_offset, name, sourceArea->Size(),
2278 			sourceArea->wiring, protection, sourceArea->protection_max,
2279 			mapping, 0, &addressRestrictions,
2280 			kernel, &newArea, address);
2281 	}
2282 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2283 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2284 		// to create a new cache, and has therefore already acquired a reference
2285 		// to the source cache - but otherwise it has no idea that we need
2286 		// one.
2287 		cache->AcquireRefLocked();
2288 	}
2289 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2290 		// we need to map in everything at this point
2291 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2292 			// we don't have actual pages to map but a physical area
2293 			VMTranslationMap* map
2294 				= sourceArea->address_space->TranslationMap();
2295 			map->Lock();
2296 
2297 			phys_addr_t physicalAddress;
2298 			uint32 oldProtection;
2299 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2300 
2301 			map->Unlock();
2302 
2303 			map = targetAddressSpace->TranslationMap();
2304 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2305 				newArea->Base() + (newArea->Size() - 1));
2306 
2307 			vm_page_reservation reservation;
2308 			vm_page_reserve_pages(&reservation, reservePages,
2309 				targetAddressSpace == VMAddressSpace::Kernel()
2310 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2311 			map->Lock();
2312 
2313 			for (addr_t offset = 0; offset < newArea->Size();
2314 					offset += B_PAGE_SIZE) {
2315 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2316 					protection, newArea->MemoryType(), &reservation);
2317 			}
2318 
2319 			map->Unlock();
2320 			vm_page_unreserve_pages(&reservation);
2321 		} else {
2322 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2323 			size_t reservePages = map->MaxPagesNeededToMap(
2324 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2325 			vm_page_reservation reservation;
2326 			vm_page_reserve_pages(&reservation, reservePages,
2327 				targetAddressSpace == VMAddressSpace::Kernel()
2328 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2329 
2330 			// map in all pages from source
2331 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2332 					vm_page* page  = it.Next();) {
2333 				if (!page->busy) {
2334 					DEBUG_PAGE_ACCESS_START(page);
2335 					map_page(newArea, page,
2336 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2337 							- newArea->cache_offset),
2338 						protection, &reservation);
2339 					DEBUG_PAGE_ACCESS_END(page);
2340 				}
2341 			}
2342 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2343 			// ensuring that!
2344 
2345 			vm_page_unreserve_pages(&reservation);
2346 		}
2347 	}
2348 	if (status == B_OK)
2349 		newArea->cache_type = sourceArea->cache_type;
2350 
2351 	vm_area_put_locked_cache(cache);
2352 
2353 	if (status < B_OK)
2354 		return status;
2355 
2356 	return newArea->id;
2357 }
2358 
2359 
2360 /*!	Deletes the specified area of the given address space.
2361 
2362 	The address space must be write-locked.
2363 	The caller must ensure that the area does not have any wired ranges.
2364 
2365 	\param addressSpace The address space containing the area.
2366 	\param area The area to be deleted.
2367 	\param deletingAddressSpace \c true, if the address space is in the process
2368 		of being deleted.
2369 */
2370 static void
2371 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2372 	bool deletingAddressSpace)
2373 {
2374 	ASSERT(!area->IsWired());
2375 
2376 	VMAreas::Remove(area);
2377 
2378 	// At this point the area is removed from the global hash table, but
2379 	// still exists in the area list.
2380 
2381 	// Unmap the virtual address space the area occupied.
2382 	{
2383 		// We need to lock the complete cache chain.
2384 		VMCache* topCache = vm_area_get_locked_cache(area);
2385 		VMCacheChainLocker cacheChainLocker(topCache);
2386 		cacheChainLocker.LockAllSourceCaches();
2387 
2388 		// If the area's top cache is a temporary cache and the area is the only
2389 		// one referencing it (besides us currently holding a second reference),
2390 		// the unmapping code doesn't need to care about preserving the accessed
2391 		// and dirty flags of the top cache page mappings.
2392 		bool ignoreTopCachePageFlags
2393 			= topCache->temporary && topCache->RefCount() == 2;
2394 
2395 		area->address_space->TranslationMap()->UnmapArea(area,
2396 			deletingAddressSpace, ignoreTopCachePageFlags);
2397 	}
2398 
2399 	if (!area->cache->temporary)
2400 		area->cache->WriteModified();
2401 
2402 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2403 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2404 
2405 	arch_vm_unset_memory_type(area);
2406 	addressSpace->RemoveArea(area, allocationFlags);
2407 	addressSpace->Put();
2408 
2409 	area->cache->RemoveArea(area);
2410 	area->cache->ReleaseRef();
2411 
2412 	addressSpace->DeleteArea(area, allocationFlags);
2413 }
2414 
2415 
2416 status_t
2417 vm_delete_area(team_id team, area_id id, bool kernel)
2418 {
2419 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2420 		team, id));
2421 
2422 	// lock the address space and make sure the area isn't wired
2423 	AddressSpaceWriteLocker locker;
2424 	VMArea* area;
2425 	AreaCacheLocker cacheLocker;
2426 
2427 	do {
2428 		status_t status = locker.SetFromArea(team, id, area);
2429 		if (status != B_OK)
2430 			return status;
2431 
2432 		cacheLocker.SetTo(area);
2433 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2434 
2435 	cacheLocker.Unlock();
2436 
2437 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2438 		return B_NOT_ALLOWED;
2439 
2440 	delete_area(locker.AddressSpace(), area, false);
2441 	return B_OK;
2442 }
2443 
2444 
2445 /*!	Creates a new cache on top of given cache, moves all areas from
2446 	the old cache to the new one, and changes the protection of all affected
2447 	areas' pages to read-only. If requested, wired pages are moved up to the
2448 	new cache and copies are added to the old cache in their place.
2449 	Preconditions:
2450 	- The given cache must be locked.
2451 	- All of the cache's areas' address spaces must be read locked.
2452 	- Either the cache must not have any wired ranges or a page reservation for
2453 	  all wired pages must be provided, so they can be copied.
2454 
2455 	\param lowerCache The cache on top of which a new cache shall be created.
2456 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2457 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2458 		has wired page. The wired pages are copied in this case.
2459 */
2460 static status_t
2461 vm_copy_on_write_area(VMCache* lowerCache,
2462 	vm_page_reservation* wiredPagesReservation)
2463 {
2464 	VMCache* upperCache;
2465 
2466 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2467 
2468 	// We need to separate the cache from its areas. The cache goes one level
2469 	// deeper and we create a new cache inbetween.
2470 
2471 	// create an anonymous cache
2472 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2473 		lowerCache->GuardSize() / B_PAGE_SIZE,
2474 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2475 		VM_PRIORITY_USER);
2476 	if (status != B_OK)
2477 		return status;
2478 
2479 	upperCache->Lock();
2480 
2481 	upperCache->temporary = 1;
2482 	upperCache->virtual_base = lowerCache->virtual_base;
2483 	upperCache->virtual_end = lowerCache->virtual_end;
2484 
2485 	// transfer the lower cache areas to the upper cache
2486 	rw_lock_write_lock(&sAreaCacheLock);
2487 	upperCache->TransferAreas(lowerCache);
2488 	rw_lock_write_unlock(&sAreaCacheLock);
2489 
2490 	lowerCache->AddConsumer(upperCache);
2491 
2492 	// We now need to remap all pages from all of the cache's areas read-only,
2493 	// so that a copy will be created on next write access. If there are wired
2494 	// pages, we keep their protection, move them to the upper cache and create
2495 	// copies for the lower cache.
2496 	if (wiredPagesReservation != NULL) {
2497 		// We need to handle wired pages -- iterate through the cache's pages.
2498 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2499 				vm_page* page = it.Next();) {
2500 			if (page->WiredCount() > 0) {
2501 				// allocate a new page and copy the wired one
2502 				vm_page* copiedPage = vm_page_allocate_page(
2503 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2504 
2505 				vm_memcpy_physical_page(
2506 					copiedPage->physical_page_number * B_PAGE_SIZE,
2507 					page->physical_page_number * B_PAGE_SIZE);
2508 
2509 				// move the wired page to the upper cache (note: removing is OK
2510 				// with the SplayTree iterator) and insert the copy
2511 				upperCache->MovePage(page);
2512 				lowerCache->InsertPage(copiedPage,
2513 					page->cache_offset * B_PAGE_SIZE);
2514 
2515 				DEBUG_PAGE_ACCESS_END(copiedPage);
2516 			} else {
2517 				// Change the protection of this page in all areas.
2518 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2519 						tempArea = tempArea->cache_next) {
2520 					// The area must be readable in the same way it was
2521 					// previously writable.
2522 					addr_t address = virtual_page_address(tempArea, page);
2523 					uint32 protection = 0;
2524 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2525 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2526 						protection |= B_KERNEL_READ_AREA;
2527 					if ((pageProtection & B_READ_AREA) != 0)
2528 						protection |= B_READ_AREA;
2529 
2530 					VMTranslationMap* map
2531 						= tempArea->address_space->TranslationMap();
2532 					map->Lock();
2533 					map->ProtectPage(tempArea, address, protection);
2534 					map->Unlock();
2535 				}
2536 			}
2537 		}
2538 	} else {
2539 		ASSERT(lowerCache->WiredPagesCount() == 0);
2540 
2541 		// just change the protection of all areas
2542 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2543 				tempArea = tempArea->cache_next) {
2544 			if (tempArea->page_protections != NULL) {
2545 				// Change the protection of all pages in this area.
2546 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2547 				map->Lock();
2548 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2549 					vm_page* page = it.Next();) {
2550 					// The area must be readable in the same way it was
2551 					// previously writable.
2552 					addr_t address = virtual_page_address(tempArea, page);
2553 					uint32 protection = 0;
2554 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2555 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2556 						protection |= B_KERNEL_READ_AREA;
2557 					if ((pageProtection & B_READ_AREA) != 0)
2558 						protection |= B_READ_AREA;
2559 
2560 					map->ProtectPage(tempArea, address, protection);
2561 				}
2562 				map->Unlock();
2563 				continue;
2564 			}
2565 			// The area must be readable in the same way it was previously
2566 			// writable.
2567 			uint32 protection = 0;
2568 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2569 				protection |= B_KERNEL_READ_AREA;
2570 			if ((tempArea->protection & B_READ_AREA) != 0)
2571 				protection |= B_READ_AREA;
2572 
2573 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2574 			map->Lock();
2575 			map->ProtectArea(tempArea, protection);
2576 			map->Unlock();
2577 		}
2578 	}
2579 
2580 	vm_area_put_locked_cache(upperCache);
2581 
2582 	return B_OK;
2583 }
2584 
2585 
2586 area_id
2587 vm_copy_area(team_id team, const char* name, void** _address,
2588 	uint32 addressSpec, area_id sourceID)
2589 {
2590 	// Do the locking: target address space, all address spaces associated with
2591 	// the source cache, and the cache itself.
2592 	MultiAddressSpaceLocker locker;
2593 	VMAddressSpace* targetAddressSpace;
2594 	VMCache* cache;
2595 	VMArea* source;
2596 	AreaCacheLocker cacheLocker;
2597 	status_t status;
2598 	bool sharedArea;
2599 
2600 	page_num_t wiredPages = 0;
2601 	vm_page_reservation wiredPagesReservation;
2602 
2603 	bool restart;
2604 	do {
2605 		restart = false;
2606 
2607 		locker.Unset();
2608 		status = locker.AddTeam(team, true, &targetAddressSpace);
2609 		if (status == B_OK) {
2610 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2611 				&cache);
2612 		}
2613 		if (status != B_OK)
2614 			return status;
2615 
2616 		cacheLocker.SetTo(cache, true);	// already locked
2617 
2618 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2619 
2620 		page_num_t oldWiredPages = wiredPages;
2621 		wiredPages = 0;
2622 
2623 		// If the source area isn't shared, count the number of wired pages in
2624 		// the cache and reserve as many pages.
2625 		if (!sharedArea) {
2626 			wiredPages = cache->WiredPagesCount();
2627 
2628 			if (wiredPages > oldWiredPages) {
2629 				cacheLocker.Unlock();
2630 				locker.Unlock();
2631 
2632 				if (oldWiredPages > 0)
2633 					vm_page_unreserve_pages(&wiredPagesReservation);
2634 
2635 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2636 					VM_PRIORITY_USER);
2637 
2638 				restart = true;
2639 			}
2640 		} else if (oldWiredPages > 0)
2641 			vm_page_unreserve_pages(&wiredPagesReservation);
2642 	} while (restart);
2643 
2644 	// unreserve pages later
2645 	struct PagesUnreserver {
2646 		PagesUnreserver(vm_page_reservation* reservation)
2647 			:
2648 			fReservation(reservation)
2649 		{
2650 		}
2651 
2652 		~PagesUnreserver()
2653 		{
2654 			if (fReservation != NULL)
2655 				vm_page_unreserve_pages(fReservation);
2656 		}
2657 
2658 	private:
2659 		vm_page_reservation*	fReservation;
2660 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2661 
2662 	bool writableCopy
2663 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2664 	uint8* targetPageProtections = NULL;
2665 
2666 	if (source->page_protections != NULL) {
2667 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2668 		targetPageProtections = (uint8*)malloc_etc(bytes,
2669 			(source->address_space == VMAddressSpace::Kernel()
2670 					|| targetAddressSpace == VMAddressSpace::Kernel())
2671 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2672 		if (targetPageProtections == NULL)
2673 			return B_NO_MEMORY;
2674 
2675 		memcpy(targetPageProtections, source->page_protections, bytes);
2676 
2677 		if (!writableCopy) {
2678 			for (size_t i = 0; i < bytes; i++) {
2679 				if ((targetPageProtections[i]
2680 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2681 					writableCopy = true;
2682 					break;
2683 				}
2684 			}
2685 		}
2686 	}
2687 
2688 	if (addressSpec == B_CLONE_ADDRESS) {
2689 		addressSpec = B_EXACT_ADDRESS;
2690 		*_address = (void*)source->Base();
2691 	}
2692 
2693 	// First, create a cache on top of the source area, respectively use the
2694 	// existing one, if this is a shared area.
2695 
2696 	VMArea* target;
2697 	virtual_address_restrictions addressRestrictions = {};
2698 	addressRestrictions.address = *_address;
2699 	addressRestrictions.address_specification = addressSpec;
2700 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2701 		name, source->Size(), source->wiring, source->protection,
2702 		source->protection_max,
2703 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2704 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2705 		&addressRestrictions, true, &target, _address);
2706 	if (status < B_OK) {
2707 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2708 		return status;
2709 	}
2710 
2711 	if (targetPageProtections != NULL)
2712 		target->page_protections = targetPageProtections;
2713 
2714 	if (sharedArea) {
2715 		// The new area uses the old area's cache, but map_backing_store()
2716 		// hasn't acquired a ref. So we have to do that now.
2717 		cache->AcquireRefLocked();
2718 	}
2719 
2720 	// If the source area is writable, we need to move it one layer up as well
2721 
2722 	if (!sharedArea) {
2723 		if (writableCopy) {
2724 			// TODO: do something more useful if this fails!
2725 			if (vm_copy_on_write_area(cache,
2726 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2727 				panic("vm_copy_on_write_area() failed!\n");
2728 			}
2729 		}
2730 	}
2731 
2732 	// we return the ID of the newly created area
2733 	return target->id;
2734 }
2735 
2736 
2737 status_t
2738 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2739 	bool kernel)
2740 {
2741 	fix_protection(&newProtection);
2742 
2743 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2744 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2745 
2746 	if (!arch_vm_supports_protection(newProtection))
2747 		return B_NOT_SUPPORTED;
2748 
2749 	bool becomesWritable
2750 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2751 
2752 	// lock address spaces and cache
2753 	MultiAddressSpaceLocker locker;
2754 	VMCache* cache;
2755 	VMArea* area;
2756 	status_t status;
2757 	AreaCacheLocker cacheLocker;
2758 	bool isWritable;
2759 
2760 	bool restart;
2761 	do {
2762 		restart = false;
2763 
2764 		locker.Unset();
2765 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2766 		if (status != B_OK)
2767 			return status;
2768 
2769 		cacheLocker.SetTo(cache, true);	// already locked
2770 
2771 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2772 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2773 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2774 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2775 				" (%s)\n", team, newProtection, areaID, area->name);
2776 			return B_NOT_ALLOWED;
2777 		}
2778 		if (!kernel && area->protection_max != 0
2779 			&& (newProtection & area->protection_max)
2780 				!= (newProtection & B_USER_PROTECTION)) {
2781 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2782 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2783 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2784 				area->protection_max, areaID, area->name);
2785 			return B_NOT_ALLOWED;
2786 		}
2787 
2788 		if (area->protection == newProtection)
2789 			return B_OK;
2790 
2791 		if (team != VMAddressSpace::KernelID()
2792 			&& area->address_space->ID() != team) {
2793 			// unless you're the kernel, you are only allowed to set
2794 			// the protection of your own areas
2795 			return B_NOT_ALLOWED;
2796 		}
2797 
2798 		isWritable
2799 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2800 
2801 		// Make sure the area (respectively, if we're going to call
2802 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2803 		// wired ranges.
2804 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2805 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2806 					otherArea = otherArea->cache_next) {
2807 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2808 					restart = true;
2809 					break;
2810 				}
2811 			}
2812 		} else {
2813 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2814 				restart = true;
2815 		}
2816 	} while (restart);
2817 
2818 	bool changePageProtection = true;
2819 	bool changeTopCachePagesOnly = false;
2820 
2821 	if (isWritable && !becomesWritable) {
2822 		// writable -> !writable
2823 
2824 		if (cache->source != NULL && cache->temporary) {
2825 			if (cache->CountWritableAreas(area) == 0) {
2826 				// Since this cache now lives from the pages in its source cache,
2827 				// we can change the cache's commitment to take only those pages
2828 				// into account that really are in this cache.
2829 
2830 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2831 					team == VMAddressSpace::KernelID()
2832 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2833 
2834 				// TODO: we may be able to join with our source cache, if
2835 				// count == 0
2836 			}
2837 		}
2838 
2839 		// If only the writability changes, we can just remap the pages of the
2840 		// top cache, since the pages of lower caches are mapped read-only
2841 		// anyway. That's advantageous only, if the number of pages in the cache
2842 		// is significantly smaller than the number of pages in the area,
2843 		// though.
2844 		if (newProtection
2845 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2846 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2847 			changeTopCachePagesOnly = true;
2848 		}
2849 	} else if (!isWritable && becomesWritable) {
2850 		// !writable -> writable
2851 
2852 		if (!cache->consumers.IsEmpty()) {
2853 			// There are consumers -- we have to insert a new cache. Fortunately
2854 			// vm_copy_on_write_area() does everything that's needed.
2855 			changePageProtection = false;
2856 			status = vm_copy_on_write_area(cache, NULL);
2857 		} else {
2858 			// No consumers, so we don't need to insert a new one.
2859 			if (cache->source != NULL && cache->temporary) {
2860 				// the cache's commitment must contain all possible pages
2861 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2862 					team == VMAddressSpace::KernelID()
2863 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2864 			}
2865 
2866 			if (status == B_OK && cache->source != NULL) {
2867 				// There's a source cache, hence we can't just change all pages'
2868 				// protection or we might allow writing into pages belonging to
2869 				// a lower cache.
2870 				changeTopCachePagesOnly = true;
2871 			}
2872 		}
2873 	} else {
2874 		// we don't have anything special to do in all other cases
2875 	}
2876 
2877 	if (status == B_OK) {
2878 		// remap existing pages in this cache
2879 		if (changePageProtection) {
2880 			VMTranslationMap* map = area->address_space->TranslationMap();
2881 			map->Lock();
2882 
2883 			if (changeTopCachePagesOnly) {
2884 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2885 				page_num_t lastPageOffset
2886 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2887 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2888 						vm_page* page = it.Next();) {
2889 					if (page->cache_offset >= firstPageOffset
2890 						&& page->cache_offset <= lastPageOffset) {
2891 						addr_t address = virtual_page_address(area, page);
2892 						map->ProtectPage(area, address, newProtection);
2893 					}
2894 				}
2895 			} else
2896 				map->ProtectArea(area, newProtection);
2897 
2898 			map->Unlock();
2899 		}
2900 
2901 		area->protection = newProtection;
2902 	}
2903 
2904 	return status;
2905 }
2906 
2907 
2908 status_t
2909 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2910 {
2911 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2912 	if (addressSpace == NULL)
2913 		return B_BAD_TEAM_ID;
2914 
2915 	VMTranslationMap* map = addressSpace->TranslationMap();
2916 
2917 	map->Lock();
2918 	uint32 dummyFlags;
2919 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2920 	map->Unlock();
2921 
2922 	addressSpace->Put();
2923 	return status;
2924 }
2925 
2926 
2927 /*!	The page's cache must be locked.
2928 */
2929 bool
2930 vm_test_map_modification(vm_page* page)
2931 {
2932 	if (page->modified)
2933 		return true;
2934 
2935 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2936 	vm_page_mapping* mapping;
2937 	while ((mapping = iterator.Next()) != NULL) {
2938 		VMArea* area = mapping->area;
2939 		VMTranslationMap* map = area->address_space->TranslationMap();
2940 
2941 		phys_addr_t physicalAddress;
2942 		uint32 flags;
2943 		map->Lock();
2944 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2945 		map->Unlock();
2946 
2947 		if ((flags & PAGE_MODIFIED) != 0)
2948 			return true;
2949 	}
2950 
2951 	return false;
2952 }
2953 
2954 
2955 /*!	The page's cache must be locked.
2956 */
2957 void
2958 vm_clear_map_flags(vm_page* page, uint32 flags)
2959 {
2960 	if ((flags & PAGE_ACCESSED) != 0)
2961 		page->accessed = false;
2962 	if ((flags & PAGE_MODIFIED) != 0)
2963 		page->modified = false;
2964 
2965 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2966 	vm_page_mapping* mapping;
2967 	while ((mapping = iterator.Next()) != NULL) {
2968 		VMArea* area = mapping->area;
2969 		VMTranslationMap* map = area->address_space->TranslationMap();
2970 
2971 		map->Lock();
2972 		map->ClearFlags(virtual_page_address(area, page), flags);
2973 		map->Unlock();
2974 	}
2975 }
2976 
2977 
2978 /*!	Removes all mappings from a page.
2979 	After you've called this function, the page is unmapped from memory and
2980 	the page's \c accessed and \c modified flags have been updated according
2981 	to the state of the mappings.
2982 	The page's cache must be locked.
2983 */
2984 void
2985 vm_remove_all_page_mappings(vm_page* page)
2986 {
2987 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2988 		VMArea* area = mapping->area;
2989 		VMTranslationMap* map = area->address_space->TranslationMap();
2990 		addr_t address = virtual_page_address(area, page);
2991 		map->UnmapPage(area, address, false);
2992 	}
2993 }
2994 
2995 
2996 int32
2997 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2998 {
2999 	int32 count = 0;
3000 
3001 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3002 	vm_page_mapping* mapping;
3003 	while ((mapping = iterator.Next()) != NULL) {
3004 		VMArea* area = mapping->area;
3005 		VMTranslationMap* map = area->address_space->TranslationMap();
3006 
3007 		bool modified;
3008 		if (map->ClearAccessedAndModified(area,
3009 				virtual_page_address(area, page), false, modified)) {
3010 			count++;
3011 		}
3012 
3013 		page->modified |= modified;
3014 	}
3015 
3016 
3017 	if (page->accessed) {
3018 		count++;
3019 		page->accessed = false;
3020 	}
3021 
3022 	return count;
3023 }
3024 
3025 
3026 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3027 	mappings.
3028 	The function iterates through the page mappings and removes them until
3029 	encountering one that has been accessed. From then on it will continue to
3030 	iterate, but only clear the accessed flag of the mapping. The page's
3031 	\c modified bit will be updated accordingly, the \c accessed bit will be
3032 	cleared.
3033 	\return The number of mapping accessed bits encountered, including the
3034 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3035 		of the page have been removed.
3036 */
3037 int32
3038 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3039 {
3040 	ASSERT(page->WiredCount() == 0);
3041 
3042 	if (page->accessed)
3043 		return vm_clear_page_mapping_accessed_flags(page);
3044 
3045 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3046 		VMArea* area = mapping->area;
3047 		VMTranslationMap* map = area->address_space->TranslationMap();
3048 		addr_t address = virtual_page_address(area, page);
3049 		bool modified = false;
3050 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3051 			page->accessed = true;
3052 			page->modified |= modified;
3053 			return vm_clear_page_mapping_accessed_flags(page);
3054 		}
3055 		page->modified |= modified;
3056 	}
3057 
3058 	return 0;
3059 }
3060 
3061 
3062 static int
3063 display_mem(int argc, char** argv)
3064 {
3065 	bool physical = false;
3066 	addr_t copyAddress;
3067 	int32 displayWidth;
3068 	int32 itemSize;
3069 	int32 num = -1;
3070 	addr_t address;
3071 	int i = 1, j;
3072 
3073 	if (argc > 1 && argv[1][0] == '-') {
3074 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3075 			physical = true;
3076 			i++;
3077 		} else
3078 			i = 99;
3079 	}
3080 
3081 	if (argc < i + 1 || argc > i + 2) {
3082 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3083 			"\tdl - 8 bytes\n"
3084 			"\tdw - 4 bytes\n"
3085 			"\tds - 2 bytes\n"
3086 			"\tdb - 1 byte\n"
3087 			"\tstring - a whole string\n"
3088 			"  -p or --physical only allows memory from a single page to be "
3089 			"displayed.\n");
3090 		return 0;
3091 	}
3092 
3093 	address = parse_expression(argv[i]);
3094 
3095 	if (argc > i + 1)
3096 		num = parse_expression(argv[i + 1]);
3097 
3098 	// build the format string
3099 	if (strcmp(argv[0], "db") == 0) {
3100 		itemSize = 1;
3101 		displayWidth = 16;
3102 	} else if (strcmp(argv[0], "ds") == 0) {
3103 		itemSize = 2;
3104 		displayWidth = 8;
3105 	} else if (strcmp(argv[0], "dw") == 0) {
3106 		itemSize = 4;
3107 		displayWidth = 4;
3108 	} else if (strcmp(argv[0], "dl") == 0) {
3109 		itemSize = 8;
3110 		displayWidth = 2;
3111 	} else if (strcmp(argv[0], "string") == 0) {
3112 		itemSize = 1;
3113 		displayWidth = -1;
3114 	} else {
3115 		kprintf("display_mem called in an invalid way!\n");
3116 		return 0;
3117 	}
3118 
3119 	if (num <= 0)
3120 		num = displayWidth;
3121 
3122 	void* physicalPageHandle = NULL;
3123 
3124 	if (physical) {
3125 		int32 offset = address & (B_PAGE_SIZE - 1);
3126 		if (num * itemSize + offset > B_PAGE_SIZE) {
3127 			num = (B_PAGE_SIZE - offset) / itemSize;
3128 			kprintf("NOTE: number of bytes has been cut to page size\n");
3129 		}
3130 
3131 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3132 
3133 		if (vm_get_physical_page_debug(address, &copyAddress,
3134 				&physicalPageHandle) != B_OK) {
3135 			kprintf("getting the hardware page failed.");
3136 			return 0;
3137 		}
3138 
3139 		address += offset;
3140 		copyAddress += offset;
3141 	} else
3142 		copyAddress = address;
3143 
3144 	if (!strcmp(argv[0], "string")) {
3145 		kprintf("%p \"", (char*)copyAddress);
3146 
3147 		// string mode
3148 		for (i = 0; true; i++) {
3149 			char c;
3150 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3151 					!= B_OK
3152 				|| c == '\0') {
3153 				break;
3154 			}
3155 
3156 			if (c == '\n')
3157 				kprintf("\\n");
3158 			else if (c == '\t')
3159 				kprintf("\\t");
3160 			else {
3161 				if (!isprint(c))
3162 					c = '.';
3163 
3164 				kprintf("%c", c);
3165 			}
3166 		}
3167 
3168 		kprintf("\"\n");
3169 	} else {
3170 		// number mode
3171 		for (i = 0; i < num; i++) {
3172 			uint64 value;
3173 
3174 			if ((i % displayWidth) == 0) {
3175 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3176 				if (i != 0)
3177 					kprintf("\n");
3178 
3179 				kprintf("[0x%lx]  ", address + i * itemSize);
3180 
3181 				for (j = 0; j < displayed; j++) {
3182 					char c;
3183 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3184 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3185 						displayed = j;
3186 						break;
3187 					}
3188 					if (!isprint(c))
3189 						c = '.';
3190 
3191 					kprintf("%c", c);
3192 				}
3193 				if (num > displayWidth) {
3194 					// make sure the spacing in the last line is correct
3195 					for (j = displayed; j < displayWidth * itemSize; j++)
3196 						kprintf(" ");
3197 				}
3198 				kprintf("  ");
3199 			}
3200 
3201 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3202 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3203 				kprintf("read fault");
3204 				break;
3205 			}
3206 
3207 			switch (itemSize) {
3208 				case 1:
3209 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3210 					break;
3211 				case 2:
3212 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3213 					break;
3214 				case 4:
3215 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3216 					break;
3217 				case 8:
3218 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3219 					break;
3220 			}
3221 		}
3222 
3223 		kprintf("\n");
3224 	}
3225 
3226 	if (physical) {
3227 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3228 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3229 	}
3230 	return 0;
3231 }
3232 
3233 
3234 static void
3235 dump_cache_tree_recursively(VMCache* cache, int level,
3236 	VMCache* highlightCache)
3237 {
3238 	// print this cache
3239 	for (int i = 0; i < level; i++)
3240 		kprintf("  ");
3241 	if (cache == highlightCache)
3242 		kprintf("%p <--\n", cache);
3243 	else
3244 		kprintf("%p\n", cache);
3245 
3246 	// recursively print its consumers
3247 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3248 			VMCache* consumer = it.Next();) {
3249 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3250 	}
3251 }
3252 
3253 
3254 static int
3255 dump_cache_tree(int argc, char** argv)
3256 {
3257 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3258 		kprintf("usage: %s <address>\n", argv[0]);
3259 		return 0;
3260 	}
3261 
3262 	addr_t address = parse_expression(argv[1]);
3263 	if (address == 0)
3264 		return 0;
3265 
3266 	VMCache* cache = (VMCache*)address;
3267 	VMCache* root = cache;
3268 
3269 	// find the root cache (the transitive source)
3270 	while (root->source != NULL)
3271 		root = root->source;
3272 
3273 	dump_cache_tree_recursively(root, 0, cache);
3274 
3275 	return 0;
3276 }
3277 
3278 
3279 const char*
3280 vm_cache_type_to_string(int32 type)
3281 {
3282 	switch (type) {
3283 		case CACHE_TYPE_RAM:
3284 			return "RAM";
3285 		case CACHE_TYPE_DEVICE:
3286 			return "device";
3287 		case CACHE_TYPE_VNODE:
3288 			return "vnode";
3289 		case CACHE_TYPE_NULL:
3290 			return "null";
3291 
3292 		default:
3293 			return "unknown";
3294 	}
3295 }
3296 
3297 
3298 #if DEBUG_CACHE_LIST
3299 
3300 static void
3301 update_cache_info_recursively(VMCache* cache, cache_info& info)
3302 {
3303 	info.page_count += cache->page_count;
3304 	if (cache->type == CACHE_TYPE_RAM)
3305 		info.committed += cache->committed_size;
3306 
3307 	// recurse
3308 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3309 			VMCache* consumer = it.Next();) {
3310 		update_cache_info_recursively(consumer, info);
3311 	}
3312 }
3313 
3314 
3315 static int
3316 cache_info_compare_page_count(const void* _a, const void* _b)
3317 {
3318 	const cache_info* a = (const cache_info*)_a;
3319 	const cache_info* b = (const cache_info*)_b;
3320 	if (a->page_count == b->page_count)
3321 		return 0;
3322 	return a->page_count < b->page_count ? 1 : -1;
3323 }
3324 
3325 
3326 static int
3327 cache_info_compare_committed(const void* _a, const void* _b)
3328 {
3329 	const cache_info* a = (const cache_info*)_a;
3330 	const cache_info* b = (const cache_info*)_b;
3331 	if (a->committed == b->committed)
3332 		return 0;
3333 	return a->committed < b->committed ? 1 : -1;
3334 }
3335 
3336 
3337 static void
3338 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3339 {
3340 	for (int i = 0; i < level; i++)
3341 		kprintf("  ");
3342 
3343 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3344 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3345 		cache->virtual_base, cache->virtual_end, cache->page_count);
3346 
3347 	if (level == 0)
3348 		kprintf("/%lu", info.page_count);
3349 
3350 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3351 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3352 
3353 		if (level == 0)
3354 			kprintf("/%lu", info.committed);
3355 	}
3356 
3357 	// areas
3358 	if (cache->areas != NULL) {
3359 		VMArea* area = cache->areas;
3360 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3361 			area->name, area->address_space->ID());
3362 
3363 		while (area->cache_next != NULL) {
3364 			area = area->cache_next;
3365 			kprintf(", %" B_PRId32, area->id);
3366 		}
3367 	}
3368 
3369 	kputs("\n");
3370 
3371 	// recurse
3372 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3373 			VMCache* consumer = it.Next();) {
3374 		dump_caches_recursively(consumer, info, level + 1);
3375 	}
3376 }
3377 
3378 
3379 static int
3380 dump_caches(int argc, char** argv)
3381 {
3382 	if (sCacheInfoTable == NULL) {
3383 		kprintf("No cache info table!\n");
3384 		return 0;
3385 	}
3386 
3387 	bool sortByPageCount = true;
3388 
3389 	for (int32 i = 1; i < argc; i++) {
3390 		if (strcmp(argv[i], "-c") == 0) {
3391 			sortByPageCount = false;
3392 		} else {
3393 			print_debugger_command_usage(argv[0]);
3394 			return 0;
3395 		}
3396 	}
3397 
3398 	uint32 totalCount = 0;
3399 	uint32 rootCount = 0;
3400 	off_t totalCommitted = 0;
3401 	page_num_t totalPages = 0;
3402 
3403 	VMCache* cache = gDebugCacheList;
3404 	while (cache) {
3405 		totalCount++;
3406 		if (cache->source == NULL) {
3407 			cache_info stackInfo;
3408 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3409 				? sCacheInfoTable[rootCount] : stackInfo;
3410 			rootCount++;
3411 			info.cache = cache;
3412 			info.page_count = 0;
3413 			info.committed = 0;
3414 			update_cache_info_recursively(cache, info);
3415 			totalCommitted += info.committed;
3416 			totalPages += info.page_count;
3417 		}
3418 
3419 		cache = cache->debug_next;
3420 	}
3421 
3422 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3423 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3424 			sortByPageCount
3425 				? &cache_info_compare_page_count
3426 				: &cache_info_compare_committed);
3427 	}
3428 
3429 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3430 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3431 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3432 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3433 			"page count" : "committed size");
3434 
3435 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3436 		for (uint32 i = 0; i < rootCount; i++) {
3437 			cache_info& info = sCacheInfoTable[i];
3438 			dump_caches_recursively(info.cache, info, 0);
3439 		}
3440 	} else
3441 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3442 
3443 	return 0;
3444 }
3445 
3446 #endif	// DEBUG_CACHE_LIST
3447 
3448 
3449 static int
3450 dump_cache(int argc, char** argv)
3451 {
3452 	VMCache* cache;
3453 	bool showPages = false;
3454 	int i = 1;
3455 
3456 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3457 		kprintf("usage: %s [-ps] <address>\n"
3458 			"  if -p is specified, all pages are shown, if -s is used\n"
3459 			"  only the cache info is shown respectively.\n", argv[0]);
3460 		return 0;
3461 	}
3462 	while (argv[i][0] == '-') {
3463 		char* arg = argv[i] + 1;
3464 		while (arg[0]) {
3465 			if (arg[0] == 'p')
3466 				showPages = true;
3467 			arg++;
3468 		}
3469 		i++;
3470 	}
3471 	if (argv[i] == NULL) {
3472 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3473 		return 0;
3474 	}
3475 
3476 	addr_t address = parse_expression(argv[i]);
3477 	if (address == 0)
3478 		return 0;
3479 
3480 	cache = (VMCache*)address;
3481 
3482 	cache->Dump(showPages);
3483 
3484 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3485 
3486 	return 0;
3487 }
3488 
3489 
3490 static void
3491 dump_area_struct(VMArea* area, bool mappings)
3492 {
3493 	kprintf("AREA: %p\n", area);
3494 	kprintf("name:\t\t'%s'\n", area->name);
3495 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3496 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3497 	kprintf("base:\t\t0x%lx\n", area->Base());
3498 	kprintf("size:\t\t0x%lx\n", area->Size());
3499 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3500 	kprintf("page_protection:%p\n", area->page_protections);
3501 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3502 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3503 	kprintf("cache:\t\t%p\n", area->cache);
3504 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3505 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3506 	kprintf("cache_next:\t%p\n", area->cache_next);
3507 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3508 
3509 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3510 	if (mappings) {
3511 		kprintf("page mappings:\n");
3512 		while (iterator.HasNext()) {
3513 			vm_page_mapping* mapping = iterator.Next();
3514 			kprintf("  %p", mapping->page);
3515 		}
3516 		kprintf("\n");
3517 	} else {
3518 		uint32 count = 0;
3519 		while (iterator.Next() != NULL) {
3520 			count++;
3521 		}
3522 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3523 	}
3524 }
3525 
3526 
3527 static int
3528 dump_area(int argc, char** argv)
3529 {
3530 	bool mappings = false;
3531 	bool found = false;
3532 	int32 index = 1;
3533 	VMArea* area;
3534 	addr_t num;
3535 
3536 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3537 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3538 			"All areas matching either id/address/name are listed. You can\n"
3539 			"force to check only a specific item by prefixing the specifier\n"
3540 			"with the id/contains/address/name keywords.\n"
3541 			"-m shows the area's mappings as well.\n");
3542 		return 0;
3543 	}
3544 
3545 	if (!strcmp(argv[1], "-m")) {
3546 		mappings = true;
3547 		index++;
3548 	}
3549 
3550 	int32 mode = 0xf;
3551 	if (!strcmp(argv[index], "id"))
3552 		mode = 1;
3553 	else if (!strcmp(argv[index], "contains"))
3554 		mode = 2;
3555 	else if (!strcmp(argv[index], "name"))
3556 		mode = 4;
3557 	else if (!strcmp(argv[index], "address"))
3558 		mode = 0;
3559 	if (mode != 0xf)
3560 		index++;
3561 
3562 	if (index >= argc) {
3563 		kprintf("No area specifier given.\n");
3564 		return 0;
3565 	}
3566 
3567 	num = parse_expression(argv[index]);
3568 
3569 	if (mode == 0) {
3570 		dump_area_struct((struct VMArea*)num, mappings);
3571 	} else {
3572 		// walk through the area list, looking for the arguments as a name
3573 
3574 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3575 		while ((area = it.Next()) != NULL) {
3576 			if (((mode & 4) != 0
3577 					&& !strcmp(argv[index], area->name))
3578 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3579 					|| (((mode & 2) != 0 && area->Base() <= num
3580 						&& area->Base() + area->Size() > num))))) {
3581 				dump_area_struct(area, mappings);
3582 				found = true;
3583 			}
3584 		}
3585 
3586 		if (!found)
3587 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3588 	}
3589 
3590 	return 0;
3591 }
3592 
3593 
3594 static int
3595 dump_area_list(int argc, char** argv)
3596 {
3597 	VMArea* area;
3598 	const char* name = NULL;
3599 	int32 id = 0;
3600 
3601 	if (argc > 1) {
3602 		id = parse_expression(argv[1]);
3603 		if (id == 0)
3604 			name = argv[1];
3605 	}
3606 
3607 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3608 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3609 		B_PRINTF_POINTER_WIDTH, "size");
3610 
3611 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3612 	while ((area = it.Next()) != NULL) {
3613 		if ((id != 0 && area->address_space->ID() != id)
3614 			|| (name != NULL && strstr(area->name, name) == NULL))
3615 			continue;
3616 
3617 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3618 			area->id, (void*)area->Base(), (void*)area->Size(),
3619 			area->protection, area->wiring, area->name);
3620 	}
3621 	return 0;
3622 }
3623 
3624 
3625 static int
3626 dump_available_memory(int argc, char** argv)
3627 {
3628 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3629 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3630 	return 0;
3631 }
3632 
3633 
3634 static int
3635 dump_mapping_info(int argc, char** argv)
3636 {
3637 	bool reverseLookup = false;
3638 	bool pageLookup = false;
3639 
3640 	int argi = 1;
3641 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3642 		const char* arg = argv[argi];
3643 		if (strcmp(arg, "-r") == 0) {
3644 			reverseLookup = true;
3645 		} else if (strcmp(arg, "-p") == 0) {
3646 			reverseLookup = true;
3647 			pageLookup = true;
3648 		} else {
3649 			print_debugger_command_usage(argv[0]);
3650 			return 0;
3651 		}
3652 	}
3653 
3654 	// We need at least one argument, the address. Optionally a thread ID can be
3655 	// specified.
3656 	if (argi >= argc || argi + 2 < argc) {
3657 		print_debugger_command_usage(argv[0]);
3658 		return 0;
3659 	}
3660 
3661 	uint64 addressValue;
3662 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3663 		return 0;
3664 
3665 	Team* team = NULL;
3666 	if (argi < argc) {
3667 		uint64 threadID;
3668 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3669 			return 0;
3670 
3671 		Thread* thread = Thread::GetDebug(threadID);
3672 		if (thread == NULL) {
3673 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3674 			return 0;
3675 		}
3676 
3677 		team = thread->team;
3678 	}
3679 
3680 	if (reverseLookup) {
3681 		phys_addr_t physicalAddress;
3682 		if (pageLookup) {
3683 			vm_page* page = (vm_page*)(addr_t)addressValue;
3684 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3685 		} else {
3686 			physicalAddress = (phys_addr_t)addressValue;
3687 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3688 		}
3689 
3690 		kprintf("    Team     Virtual Address      Area\n");
3691 		kprintf("--------------------------------------\n");
3692 
3693 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3694 			Callback()
3695 				:
3696 				fAddressSpace(NULL)
3697 			{
3698 			}
3699 
3700 			void SetAddressSpace(VMAddressSpace* addressSpace)
3701 			{
3702 				fAddressSpace = addressSpace;
3703 			}
3704 
3705 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3706 			{
3707 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3708 					virtualAddress);
3709 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3710 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3711 				else
3712 					kprintf("\n");
3713 				return false;
3714 			}
3715 
3716 		private:
3717 			VMAddressSpace*	fAddressSpace;
3718 		} callback;
3719 
3720 		if (team != NULL) {
3721 			// team specified -- get its address space
3722 			VMAddressSpace* addressSpace = team->address_space;
3723 			if (addressSpace == NULL) {
3724 				kprintf("Failed to get address space!\n");
3725 				return 0;
3726 			}
3727 
3728 			callback.SetAddressSpace(addressSpace);
3729 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3730 				physicalAddress, callback);
3731 		} else {
3732 			// no team specified -- iterate through all address spaces
3733 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3734 				addressSpace != NULL;
3735 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3736 				callback.SetAddressSpace(addressSpace);
3737 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3738 					physicalAddress, callback);
3739 			}
3740 		}
3741 	} else {
3742 		// get the address space
3743 		addr_t virtualAddress = (addr_t)addressValue;
3744 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3745 		VMAddressSpace* addressSpace;
3746 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3747 			addressSpace = VMAddressSpace::Kernel();
3748 		} else if (team != NULL) {
3749 			addressSpace = team->address_space;
3750 		} else {
3751 			Thread* thread = debug_get_debugged_thread();
3752 			if (thread == NULL || thread->team == NULL) {
3753 				kprintf("Failed to get team!\n");
3754 				return 0;
3755 			}
3756 
3757 			addressSpace = thread->team->address_space;
3758 		}
3759 
3760 		if (addressSpace == NULL) {
3761 			kprintf("Failed to get address space!\n");
3762 			return 0;
3763 		}
3764 
3765 		// let the translation map implementation do the job
3766 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3767 	}
3768 
3769 	return 0;
3770 }
3771 
3772 
3773 /*!	Deletes all areas and reserved regions in the given address space.
3774 
3775 	The caller must ensure that none of the areas has any wired ranges.
3776 
3777 	\param addressSpace The address space.
3778 	\param deletingAddressSpace \c true, if the address space is in the process
3779 		of being deleted.
3780 */
3781 void
3782 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3783 {
3784 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3785 		addressSpace->ID()));
3786 
3787 	addressSpace->WriteLock();
3788 
3789 	// remove all reserved areas in this address space
3790 	addressSpace->UnreserveAllAddressRanges(0);
3791 
3792 	// delete all the areas in this address space
3793 	while (VMArea* area = addressSpace->FirstArea()) {
3794 		ASSERT(!area->IsWired());
3795 		delete_area(addressSpace, area, deletingAddressSpace);
3796 	}
3797 
3798 	addressSpace->WriteUnlock();
3799 }
3800 
3801 
3802 static area_id
3803 vm_area_for(addr_t address, bool kernel)
3804 {
3805 	team_id team;
3806 	if (IS_USER_ADDRESS(address)) {
3807 		// we try the user team address space, if any
3808 		team = VMAddressSpace::CurrentID();
3809 		if (team < 0)
3810 			return team;
3811 	} else
3812 		team = VMAddressSpace::KernelID();
3813 
3814 	AddressSpaceReadLocker locker(team);
3815 	if (!locker.IsLocked())
3816 		return B_BAD_TEAM_ID;
3817 
3818 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3819 	if (area != NULL) {
3820 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3821 				&& (area->protection & B_KERNEL_AREA) != 0)
3822 			return B_ERROR;
3823 
3824 		return area->id;
3825 	}
3826 
3827 	return B_ERROR;
3828 }
3829 
3830 
3831 /*!	Frees physical pages that were used during the boot process.
3832 	\a end is inclusive.
3833 */
3834 static void
3835 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3836 {
3837 	// free all physical pages in the specified range
3838 
3839 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3840 		phys_addr_t physicalAddress;
3841 		uint32 flags;
3842 
3843 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3844 			&& (flags & PAGE_PRESENT) != 0) {
3845 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3846 			if (page != NULL && page->State() != PAGE_STATE_FREE
3847 					&& page->State() != PAGE_STATE_CLEAR
3848 					&& page->State() != PAGE_STATE_UNUSED) {
3849 				DEBUG_PAGE_ACCESS_START(page);
3850 				vm_page_set_state(page, PAGE_STATE_FREE);
3851 			}
3852 		}
3853 	}
3854 
3855 	// unmap the memory
3856 	map->Unmap(start, end);
3857 }
3858 
3859 
3860 void
3861 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3862 {
3863 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3864 	addr_t end = start + (size - 1);
3865 	addr_t lastEnd = start;
3866 
3867 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3868 		(void*)start, (void*)end));
3869 
3870 	// The areas are sorted in virtual address space order, so
3871 	// we just have to find the holes between them that fall
3872 	// into the area we should dispose
3873 
3874 	map->Lock();
3875 
3876 	for (VMAddressSpace::AreaIterator it
3877 				= VMAddressSpace::Kernel()->GetAreaIterator();
3878 			VMArea* area = it.Next();) {
3879 		addr_t areaStart = area->Base();
3880 		addr_t areaEnd = areaStart + (area->Size() - 1);
3881 
3882 		if (areaEnd < start)
3883 			continue;
3884 
3885 		if (areaStart > end) {
3886 			// we are done, the area is already beyond of what we have to free
3887 			break;
3888 		}
3889 
3890 		if (areaStart > lastEnd) {
3891 			// this is something we can free
3892 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3893 				(void*)areaStart));
3894 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3895 		}
3896 
3897 		if (areaEnd >= end) {
3898 			lastEnd = areaEnd;
3899 				// no +1 to prevent potential overflow
3900 			break;
3901 		}
3902 
3903 		lastEnd = areaEnd + 1;
3904 	}
3905 
3906 	if (lastEnd < end) {
3907 		// we can also get rid of some space at the end of the area
3908 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3909 			(void*)end));
3910 		unmap_and_free_physical_pages(map, lastEnd, end);
3911 	}
3912 
3913 	map->Unlock();
3914 }
3915 
3916 
3917 static void
3918 create_preloaded_image_areas(struct preloaded_image* _image)
3919 {
3920 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3921 	char name[B_OS_NAME_LENGTH];
3922 	void* address;
3923 	int32 length;
3924 
3925 	// use file name to create a good area name
3926 	char* fileName = strrchr(image->name, '/');
3927 	if (fileName == NULL)
3928 		fileName = image->name;
3929 	else
3930 		fileName++;
3931 
3932 	length = strlen(fileName);
3933 	// make sure there is enough space for the suffix
3934 	if (length > 25)
3935 		length = 25;
3936 
3937 	memcpy(name, fileName, length);
3938 	strcpy(name + length, "_text");
3939 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3940 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3941 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3942 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3943 		// this will later be remapped read-only/executable by the
3944 		// ELF initialization code
3945 
3946 	strcpy(name + length, "_data");
3947 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3948 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3949 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3950 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3951 }
3952 
3953 
3954 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3955 	Any boot loader resources contained in that arguments must not be accessed
3956 	anymore past this point.
3957 */
3958 void
3959 vm_free_kernel_args(kernel_args* args)
3960 {
3961 	uint32 i;
3962 
3963 	TRACE(("vm_free_kernel_args()\n"));
3964 
3965 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3966 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3967 		if (area >= B_OK)
3968 			delete_area(area);
3969 	}
3970 }
3971 
3972 
3973 static void
3974 allocate_kernel_args(kernel_args* args)
3975 {
3976 	TRACE(("allocate_kernel_args()\n"));
3977 
3978 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3979 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3980 
3981 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3982 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3983 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3984 	}
3985 }
3986 
3987 
3988 static void
3989 unreserve_boot_loader_ranges(kernel_args* args)
3990 {
3991 	TRACE(("unreserve_boot_loader_ranges()\n"));
3992 
3993 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3994 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3995 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3996 			args->virtual_allocated_range[i].size);
3997 	}
3998 }
3999 
4000 
4001 static void
4002 reserve_boot_loader_ranges(kernel_args* args)
4003 {
4004 	TRACE(("reserve_boot_loader_ranges()\n"));
4005 
4006 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4007 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4008 
4009 		// If the address is no kernel address, we just skip it. The
4010 		// architecture specific code has to deal with it.
4011 		if (!IS_KERNEL_ADDRESS(address)) {
4012 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4013 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4014 			continue;
4015 		}
4016 
4017 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4018 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4019 		if (status < B_OK)
4020 			panic("could not reserve boot loader ranges\n");
4021 	}
4022 }
4023 
4024 
4025 static addr_t
4026 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4027 {
4028 	size = PAGE_ALIGN(size);
4029 
4030 	// find a slot in the virtual allocation addr range
4031 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4032 		// check to see if the space between this one and the last is big enough
4033 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4034 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4035 			+ args->virtual_allocated_range[i - 1].size;
4036 
4037 		addr_t base = alignment > 0
4038 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4039 
4040 		if (base >= KERNEL_BASE && base < rangeStart
4041 				&& rangeStart - base >= size) {
4042 			args->virtual_allocated_range[i - 1].size
4043 				+= base + size - previousRangeEnd;
4044 			return base;
4045 		}
4046 	}
4047 
4048 	// we hadn't found one between allocation ranges. this is ok.
4049 	// see if there's a gap after the last one
4050 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4051 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4052 		+ args->virtual_allocated_range[lastEntryIndex].size;
4053 	addr_t base = alignment > 0
4054 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4055 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4056 		args->virtual_allocated_range[lastEntryIndex].size
4057 			+= base + size - lastRangeEnd;
4058 		return base;
4059 	}
4060 
4061 	// see if there's a gap before the first one
4062 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4063 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4064 		base = rangeStart - size;
4065 		if (alignment > 0)
4066 			base = ROUNDDOWN(base, alignment);
4067 
4068 		if (base >= KERNEL_BASE) {
4069 			args->virtual_allocated_range[0].start = base;
4070 			args->virtual_allocated_range[0].size += rangeStart - base;
4071 			return base;
4072 		}
4073 	}
4074 
4075 	return 0;
4076 }
4077 
4078 
4079 static bool
4080 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4081 {
4082 	// TODO: horrible brute-force method of determining if the page can be
4083 	// allocated
4084 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4085 		if (address >= args->physical_memory_range[i].start
4086 			&& address < args->physical_memory_range[i].start
4087 				+ args->physical_memory_range[i].size)
4088 			return true;
4089 	}
4090 	return false;
4091 }
4092 
4093 
4094 page_num_t
4095 vm_allocate_early_physical_page(kernel_args* args)
4096 {
4097 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4098 		phys_addr_t nextPage;
4099 
4100 		nextPage = args->physical_allocated_range[i].start
4101 			+ args->physical_allocated_range[i].size;
4102 		// see if the page after the next allocated paddr run can be allocated
4103 		if (i + 1 < args->num_physical_allocated_ranges
4104 			&& args->physical_allocated_range[i + 1].size != 0) {
4105 			// see if the next page will collide with the next allocated range
4106 			if (nextPage >= args->physical_allocated_range[i+1].start)
4107 				continue;
4108 		}
4109 		// see if the next physical page fits in the memory block
4110 		if (is_page_in_physical_memory_range(args, nextPage)) {
4111 			// we got one!
4112 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4113 			return nextPage / B_PAGE_SIZE;
4114 		}
4115 	}
4116 
4117 	// Expanding upwards didn't work, try going downwards.
4118 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4119 		phys_addr_t nextPage;
4120 
4121 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4122 		// see if the page after the prev allocated paddr run can be allocated
4123 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4124 			// see if the next page will collide with the next allocated range
4125 			if (nextPage < args->physical_allocated_range[i-1].start
4126 				+ args->physical_allocated_range[i-1].size)
4127 				continue;
4128 		}
4129 		// see if the next physical page fits in the memory block
4130 		if (is_page_in_physical_memory_range(args, nextPage)) {
4131 			// we got one!
4132 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4133 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4134 			return nextPage / B_PAGE_SIZE;
4135 		}
4136 	}
4137 
4138 	return 0;
4139 		// could not allocate a block
4140 }
4141 
4142 
4143 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4144 	allocate some pages before the VM is completely up.
4145 */
4146 addr_t
4147 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4148 	uint32 attributes, addr_t alignment)
4149 {
4150 	if (physicalSize > virtualSize)
4151 		physicalSize = virtualSize;
4152 
4153 	// find the vaddr to allocate at
4154 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4155 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4156 	if (virtualBase == 0) {
4157 		panic("vm_allocate_early: could not allocate virtual address\n");
4158 		return 0;
4159 	}
4160 
4161 	// map the pages
4162 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4163 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4164 		if (physicalAddress == 0)
4165 			panic("error allocating early page!\n");
4166 
4167 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4168 
4169 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4170 			physicalAddress * B_PAGE_SIZE, attributes,
4171 			&vm_allocate_early_physical_page);
4172 	}
4173 
4174 	return virtualBase;
4175 }
4176 
4177 
4178 /*!	The main entrance point to initialize the VM. */
4179 status_t
4180 vm_init(kernel_args* args)
4181 {
4182 	struct preloaded_image* image;
4183 	void* address;
4184 	status_t err = 0;
4185 	uint32 i;
4186 
4187 	TRACE(("vm_init: entry\n"));
4188 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4189 	err = arch_vm_init(args);
4190 
4191 	// initialize some globals
4192 	vm_page_init_num_pages(args);
4193 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4194 
4195 	slab_init(args);
4196 
4197 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4198 	off_t heapSize = INITIAL_HEAP_SIZE;
4199 	// try to accomodate low memory systems
4200 	while (heapSize > sAvailableMemory / 8)
4201 		heapSize /= 2;
4202 	if (heapSize < 1024 * 1024)
4203 		panic("vm_init: go buy some RAM please.");
4204 
4205 	// map in the new heap and initialize it
4206 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4207 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4208 	TRACE(("heap at 0x%lx\n", heapBase));
4209 	heap_init(heapBase, heapSize);
4210 #endif
4211 
4212 	// initialize the free page list and physical page mapper
4213 	vm_page_init(args);
4214 
4215 	// initialize the cache allocators
4216 	vm_cache_init(args);
4217 
4218 	{
4219 		status_t error = VMAreas::Init();
4220 		if (error != B_OK)
4221 			panic("vm_init: error initializing areas map\n");
4222 	}
4223 
4224 	VMAddressSpace::Init();
4225 	reserve_boot_loader_ranges(args);
4226 
4227 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4228 	heap_init_post_area();
4229 #endif
4230 
4231 	// Do any further initialization that the architecture dependant layers may
4232 	// need now
4233 	arch_vm_translation_map_init_post_area(args);
4234 	arch_vm_init_post_area(args);
4235 	vm_page_init_post_area(args);
4236 	slab_init_post_area();
4237 
4238 	// allocate areas to represent stuff that already exists
4239 
4240 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4241 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4242 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4243 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4244 #endif
4245 
4246 	allocate_kernel_args(args);
4247 
4248 	create_preloaded_image_areas(args->kernel_image);
4249 
4250 	// allocate areas for preloaded images
4251 	for (image = args->preloaded_images; image != NULL; image = image->next)
4252 		create_preloaded_image_areas(image);
4253 
4254 	// allocate kernel stacks
4255 	for (i = 0; i < args->num_cpus; i++) {
4256 		char name[64];
4257 
4258 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4259 		address = (void*)args->cpu_kstack[i].start;
4260 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4261 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4262 	}
4263 
4264 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4265 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4266 
4267 #if PARANOID_KERNEL_MALLOC
4268 	vm_block_address_range("uninitialized heap memory",
4269 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4270 #endif
4271 #if PARANOID_KERNEL_FREE
4272 	vm_block_address_range("freed heap memory",
4273 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4274 #endif
4275 
4276 	// create the object cache for the page mappings
4277 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4278 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4279 		NULL, NULL);
4280 	if (gPageMappingsObjectCache == NULL)
4281 		panic("failed to create page mappings object cache");
4282 
4283 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4284 
4285 #if DEBUG_CACHE_LIST
4286 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4287 		virtual_address_restrictions virtualRestrictions = {};
4288 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4289 		physical_address_restrictions physicalRestrictions = {};
4290 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4291 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4292 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4293 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4294 			&physicalRestrictions, (void**)&sCacheInfoTable);
4295 	}
4296 #endif	// DEBUG_CACHE_LIST
4297 
4298 	// add some debugger commands
4299 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4300 	add_debugger_command("area", &dump_area,
4301 		"Dump info about a particular area");
4302 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4303 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4304 #if DEBUG_CACHE_LIST
4305 	if (sCacheInfoTable != NULL) {
4306 		add_debugger_command_etc("caches", &dump_caches,
4307 			"List all VMCache trees",
4308 			"[ \"-c\" ]\n"
4309 			"All cache trees are listed sorted in decreasing order by number "
4310 				"of\n"
4311 			"used pages or, if \"-c\" is specified, by size of committed "
4312 				"memory.\n",
4313 			0);
4314 	}
4315 #endif
4316 	add_debugger_command("avail", &dump_available_memory,
4317 		"Dump available memory");
4318 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4319 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4320 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4321 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4322 	add_debugger_command("string", &display_mem, "dump strings");
4323 
4324 	add_debugger_command_etc("mapping", &dump_mapping_info,
4325 		"Print address mapping information",
4326 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4327 		"Prints low-level page mapping information for a given address. If\n"
4328 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4329 		"address that is looked up in the translation map of the current\n"
4330 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4331 		"\"-r\" is specified, <address> is a physical address that is\n"
4332 		"searched in the translation map of all teams, respectively the team\n"
4333 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4334 		"<address> is the address of a vm_page structure. The behavior is\n"
4335 		"equivalent to specifying \"-r\" with the physical address of that\n"
4336 		"page.\n",
4337 		0);
4338 
4339 	TRACE(("vm_init: exit\n"));
4340 
4341 	vm_cache_init_post_heap();
4342 
4343 	return err;
4344 }
4345 
4346 
4347 status_t
4348 vm_init_post_sem(kernel_args* args)
4349 {
4350 	// This frees all unused boot loader resources and makes its space available
4351 	// again
4352 	arch_vm_init_end(args);
4353 	unreserve_boot_loader_ranges(args);
4354 
4355 	// fill in all of the semaphores that were not allocated before
4356 	// since we're still single threaded and only the kernel address space
4357 	// exists, it isn't that hard to find all of the ones we need to create
4358 
4359 	arch_vm_translation_map_init_post_sem(args);
4360 
4361 	slab_init_post_sem();
4362 
4363 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4364 	heap_init_post_sem();
4365 #endif
4366 
4367 	return B_OK;
4368 }
4369 
4370 
4371 status_t
4372 vm_init_post_thread(kernel_args* args)
4373 {
4374 	vm_page_init_post_thread(args);
4375 	slab_init_post_thread();
4376 	return heap_init_post_thread();
4377 }
4378 
4379 
4380 status_t
4381 vm_init_post_modules(kernel_args* args)
4382 {
4383 	return arch_vm_init_post_modules(args);
4384 }
4385 
4386 
4387 void
4388 permit_page_faults(void)
4389 {
4390 	Thread* thread = thread_get_current_thread();
4391 	if (thread != NULL)
4392 		atomic_add(&thread->page_faults_allowed, 1);
4393 }
4394 
4395 
4396 void
4397 forbid_page_faults(void)
4398 {
4399 	Thread* thread = thread_get_current_thread();
4400 	if (thread != NULL)
4401 		atomic_add(&thread->page_faults_allowed, -1);
4402 }
4403 
4404 
4405 status_t
4406 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4407 	bool isUser, addr_t* newIP)
4408 {
4409 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4410 		faultAddress));
4411 
4412 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4413 
4414 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4415 	VMAddressSpace* addressSpace = NULL;
4416 
4417 	status_t status = B_OK;
4418 	*newIP = 0;
4419 	atomic_add((int32*)&sPageFaults, 1);
4420 
4421 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4422 		addressSpace = VMAddressSpace::GetKernel();
4423 	} else if (IS_USER_ADDRESS(pageAddress)) {
4424 		addressSpace = VMAddressSpace::GetCurrent();
4425 		if (addressSpace == NULL) {
4426 			if (!isUser) {
4427 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4428 					"memory!\n");
4429 				status = B_BAD_ADDRESS;
4430 				TPF(PageFaultError(-1,
4431 					VMPageFaultTracing
4432 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4433 			} else {
4434 				// XXX weird state.
4435 				panic("vm_page_fault: non kernel thread accessing user memory "
4436 					"that doesn't exist!\n");
4437 				status = B_BAD_ADDRESS;
4438 			}
4439 		}
4440 	} else {
4441 		// the hit was probably in the 64k DMZ between kernel and user space
4442 		// this keeps a user space thread from passing a buffer that crosses
4443 		// into kernel space
4444 		status = B_BAD_ADDRESS;
4445 		TPF(PageFaultError(-1,
4446 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4447 	}
4448 
4449 	if (status == B_OK) {
4450 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4451 			isUser, NULL);
4452 	}
4453 
4454 	if (status < B_OK) {
4455 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4456 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4457 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4458 			thread_get_current_thread_id());
4459 		if (!isUser) {
4460 			Thread* thread = thread_get_current_thread();
4461 			if (thread != NULL && thread->fault_handler != 0) {
4462 				// this will cause the arch dependant page fault handler to
4463 				// modify the IP on the interrupt frame or whatever to return
4464 				// to this address
4465 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4466 			} else {
4467 				// unhandled page fault in the kernel
4468 				panic("vm_page_fault: unhandled page fault in kernel space at "
4469 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4470 			}
4471 		} else {
4472 			Thread* thread = thread_get_current_thread();
4473 
4474 #ifdef TRACE_FAULTS
4475 			VMArea* area = NULL;
4476 			if (addressSpace != NULL) {
4477 				addressSpace->ReadLock();
4478 				area = addressSpace->LookupArea(faultAddress);
4479 			}
4480 
4481 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4482 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4483 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4484 				thread->team->Name(), thread->team->id,
4485 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4486 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4487 					area->Base() : 0x0));
4488 
4489 			if (addressSpace != NULL)
4490 				addressSpace->ReadUnlock();
4491 #endif
4492 
4493 			// If the thread has a signal handler for SIGSEGV, we simply
4494 			// send it the signal. Otherwise we notify the user debugger
4495 			// first.
4496 			struct sigaction action;
4497 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4498 					&& action.sa_handler != SIG_DFL
4499 					&& action.sa_handler != SIG_IGN)
4500 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4501 					SIGSEGV)) {
4502 				Signal signal(SIGSEGV,
4503 					status == B_PERMISSION_DENIED
4504 						? SEGV_ACCERR : SEGV_MAPERR,
4505 					EFAULT, thread->team->id);
4506 				signal.SetAddress((void*)address);
4507 				send_signal_to_thread(thread, signal, 0);
4508 			}
4509 		}
4510 	}
4511 
4512 	if (addressSpace != NULL)
4513 		addressSpace->Put();
4514 
4515 	return B_HANDLED_INTERRUPT;
4516 }
4517 
4518 
4519 struct PageFaultContext {
4520 	AddressSpaceReadLocker	addressSpaceLocker;
4521 	VMCacheChainLocker		cacheChainLocker;
4522 
4523 	VMTranslationMap*		map;
4524 	VMCache*				topCache;
4525 	off_t					cacheOffset;
4526 	vm_page_reservation		reservation;
4527 	bool					isWrite;
4528 
4529 	// return values
4530 	vm_page*				page;
4531 	bool					restart;
4532 	bool					pageAllocated;
4533 
4534 
4535 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4536 		:
4537 		addressSpaceLocker(addressSpace, true),
4538 		map(addressSpace->TranslationMap()),
4539 		isWrite(isWrite)
4540 	{
4541 	}
4542 
4543 	~PageFaultContext()
4544 	{
4545 		UnlockAll();
4546 		vm_page_unreserve_pages(&reservation);
4547 	}
4548 
4549 	void Prepare(VMCache* topCache, off_t cacheOffset)
4550 	{
4551 		this->topCache = topCache;
4552 		this->cacheOffset = cacheOffset;
4553 		page = NULL;
4554 		restart = false;
4555 		pageAllocated = false;
4556 
4557 		cacheChainLocker.SetTo(topCache);
4558 	}
4559 
4560 	void UnlockAll(VMCache* exceptCache = NULL)
4561 	{
4562 		topCache = NULL;
4563 		addressSpaceLocker.Unlock();
4564 		cacheChainLocker.Unlock(exceptCache);
4565 	}
4566 };
4567 
4568 
4569 /*!	Gets the page that should be mapped into the area.
4570 	Returns an error code other than \c B_OK, if the page couldn't be found or
4571 	paged in. The locking state of the address space and the caches is undefined
4572 	in that case.
4573 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4574 	had to unlock the address space and all caches and is supposed to be called
4575 	again.
4576 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4577 	found. It is returned in \c context.page. The address space will still be
4578 	locked as well as all caches starting from the top cache to at least the
4579 	cache the page lives in.
4580 */
4581 static status_t
4582 fault_get_page(PageFaultContext& context)
4583 {
4584 	VMCache* cache = context.topCache;
4585 	VMCache* lastCache = NULL;
4586 	vm_page* page = NULL;
4587 
4588 	while (cache != NULL) {
4589 		// We already hold the lock of the cache at this point.
4590 
4591 		lastCache = cache;
4592 
4593 		page = cache->LookupPage(context.cacheOffset);
4594 		if (page != NULL && page->busy) {
4595 			// page must be busy -- wait for it to become unbusy
4596 			context.UnlockAll(cache);
4597 			cache->ReleaseRefLocked();
4598 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4599 
4600 			// restart the whole process
4601 			context.restart = true;
4602 			return B_OK;
4603 		}
4604 
4605 		if (page != NULL)
4606 			break;
4607 
4608 		// The current cache does not contain the page we're looking for.
4609 
4610 		// see if the backing store has it
4611 		if (cache->HasPage(context.cacheOffset)) {
4612 			// insert a fresh page and mark it busy -- we're going to read it in
4613 			page = vm_page_allocate_page(&context.reservation,
4614 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4615 			cache->InsertPage(page, context.cacheOffset);
4616 
4617 			// We need to unlock all caches and the address space while reading
4618 			// the page in. Keep a reference to the cache around.
4619 			cache->AcquireRefLocked();
4620 			context.UnlockAll();
4621 
4622 			// read the page in
4623 			generic_io_vec vec;
4624 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4625 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4626 
4627 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4628 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4629 
4630 			cache->Lock();
4631 
4632 			if (status < B_OK) {
4633 				// on error remove and free the page
4634 				dprintf("reading page from cache %p returned: %s!\n",
4635 					cache, strerror(status));
4636 
4637 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4638 				cache->RemovePage(page);
4639 				vm_page_set_state(page, PAGE_STATE_FREE);
4640 
4641 				cache->ReleaseRefAndUnlock();
4642 				return status;
4643 			}
4644 
4645 			// mark the page unbusy again
4646 			cache->MarkPageUnbusy(page);
4647 
4648 			DEBUG_PAGE_ACCESS_END(page);
4649 
4650 			// Since we needed to unlock everything temporarily, the area
4651 			// situation might have changed. So we need to restart the whole
4652 			// process.
4653 			cache->ReleaseRefAndUnlock();
4654 			context.restart = true;
4655 			return B_OK;
4656 		}
4657 
4658 		cache = context.cacheChainLocker.LockSourceCache();
4659 	}
4660 
4661 	if (page == NULL) {
4662 		// There was no adequate page, determine the cache for a clean one.
4663 		// Read-only pages come in the deepest cache, only the top most cache
4664 		// may have direct write access.
4665 		cache = context.isWrite ? context.topCache : lastCache;
4666 
4667 		// allocate a clean page
4668 		page = vm_page_allocate_page(&context.reservation,
4669 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4670 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4671 			page->physical_page_number));
4672 
4673 		// insert the new page into our cache
4674 		cache->InsertPage(page, context.cacheOffset);
4675 		context.pageAllocated = true;
4676 	} else if (page->Cache() != context.topCache && context.isWrite) {
4677 		// We have a page that has the data we want, but in the wrong cache
4678 		// object so we need to copy it and stick it into the top cache.
4679 		vm_page* sourcePage = page;
4680 
4681 		// TODO: If memory is low, it might be a good idea to steal the page
4682 		// from our source cache -- if possible, that is.
4683 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4684 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4685 
4686 		// To not needlessly kill concurrency we unlock all caches but the top
4687 		// one while copying the page. Lacking another mechanism to ensure that
4688 		// the source page doesn't disappear, we mark it busy.
4689 		sourcePage->busy = true;
4690 		context.cacheChainLocker.UnlockKeepRefs(true);
4691 
4692 		// copy the page
4693 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4694 			sourcePage->physical_page_number * B_PAGE_SIZE);
4695 
4696 		context.cacheChainLocker.RelockCaches(true);
4697 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4698 
4699 		// insert the new page into our cache
4700 		context.topCache->InsertPage(page, context.cacheOffset);
4701 		context.pageAllocated = true;
4702 	} else
4703 		DEBUG_PAGE_ACCESS_START(page);
4704 
4705 	context.page = page;
4706 	return B_OK;
4707 }
4708 
4709 
4710 /*!	Makes sure the address in the given address space is mapped.
4711 
4712 	\param addressSpace The address space.
4713 	\param originalAddress The address. Doesn't need to be page aligned.
4714 	\param isWrite If \c true the address shall be write-accessible.
4715 	\param isUser If \c true the access is requested by a userland team.
4716 	\param wirePage On success, if non \c NULL, the wired count of the page
4717 		mapped at the given address is incremented and the page is returned
4718 		via this parameter.
4719 	\return \c B_OK on success, another error code otherwise.
4720 */
4721 static status_t
4722 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4723 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4724 {
4725 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4726 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4727 		originalAddress, isWrite, isUser));
4728 
4729 	PageFaultContext context(addressSpace, isWrite);
4730 
4731 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4732 	status_t status = B_OK;
4733 
4734 	addressSpace->IncrementFaultCount();
4735 
4736 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4737 	// the pages upfront makes sure we don't have any cache locked, so that the
4738 	// page daemon/thief can do their job without problems.
4739 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4740 		originalAddress);
4741 	context.addressSpaceLocker.Unlock();
4742 	vm_page_reserve_pages(&context.reservation, reservePages,
4743 		addressSpace == VMAddressSpace::Kernel()
4744 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4745 
4746 	while (true) {
4747 		context.addressSpaceLocker.Lock();
4748 
4749 		// get the area the fault was in
4750 		VMArea* area = addressSpace->LookupArea(address);
4751 		if (area == NULL) {
4752 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4753 				"space\n", originalAddress);
4754 			TPF(PageFaultError(-1,
4755 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4756 			status = B_BAD_ADDRESS;
4757 			break;
4758 		}
4759 
4760 		// check permissions
4761 		uint32 protection = get_area_page_protection(area, address);
4762 		if (isUser && (protection & B_USER_PROTECTION) == 0
4763 				&& (area->protection & B_KERNEL_AREA) != 0) {
4764 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4765 				area->id, (void*)originalAddress);
4766 			TPF(PageFaultError(area->id,
4767 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4768 			status = B_PERMISSION_DENIED;
4769 			break;
4770 		}
4771 		if (isWrite && (protection
4772 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4773 			dprintf("write access attempted on write-protected area 0x%"
4774 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4775 			TPF(PageFaultError(area->id,
4776 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4777 			status = B_PERMISSION_DENIED;
4778 			break;
4779 		} else if (isExecute && (protection
4780 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4781 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4782 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4783 			TPF(PageFaultError(area->id,
4784 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4785 			status = B_PERMISSION_DENIED;
4786 			break;
4787 		} else if (!isWrite && !isExecute && (protection
4788 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4789 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4790 				" at %p\n", area->id, (void*)originalAddress);
4791 			TPF(PageFaultError(area->id,
4792 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4793 			status = B_PERMISSION_DENIED;
4794 			break;
4795 		}
4796 
4797 		// We have the area, it was a valid access, so let's try to resolve the
4798 		// page fault now.
4799 		// At first, the top most cache from the area is investigated.
4800 
4801 		context.Prepare(vm_area_get_locked_cache(area),
4802 			address - area->Base() + area->cache_offset);
4803 
4804 		// See if this cache has a fault handler -- this will do all the work
4805 		// for us.
4806 		{
4807 			// Note, since the page fault is resolved with interrupts enabled,
4808 			// the fault handler could be called more than once for the same
4809 			// reason -- the store must take this into account.
4810 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4811 			if (status != B_BAD_HANDLER)
4812 				break;
4813 		}
4814 
4815 		// The top most cache has no fault handler, so let's see if the cache or
4816 		// its sources already have the page we're searching for (we're going
4817 		// from top to bottom).
4818 		status = fault_get_page(context);
4819 		if (status != B_OK) {
4820 			TPF(PageFaultError(area->id, status));
4821 			break;
4822 		}
4823 
4824 		if (context.restart)
4825 			continue;
4826 
4827 		// All went fine, all there is left to do is to map the page into the
4828 		// address space.
4829 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4830 			context.page));
4831 
4832 		// If the page doesn't reside in the area's cache, we need to make sure
4833 		// it's mapped in read-only, so that we cannot overwrite someone else's
4834 		// data (copy-on-write)
4835 		uint32 newProtection = protection;
4836 		if (context.page->Cache() != context.topCache && !isWrite)
4837 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4838 
4839 		bool unmapPage = false;
4840 		bool mapPage = true;
4841 
4842 		// check whether there's already a page mapped at the address
4843 		context.map->Lock();
4844 
4845 		phys_addr_t physicalAddress;
4846 		uint32 flags;
4847 		vm_page* mappedPage = NULL;
4848 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4849 			&& (flags & PAGE_PRESENT) != 0
4850 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4851 				!= NULL) {
4852 			// Yep there's already a page. If it's ours, we can simply adjust
4853 			// its protection. Otherwise we have to unmap it.
4854 			if (mappedPage == context.page) {
4855 				context.map->ProtectPage(area, address, newProtection);
4856 					// Note: We assume that ProtectPage() is atomic (i.e.
4857 					// the page isn't temporarily unmapped), otherwise we'd have
4858 					// to make sure it isn't wired.
4859 				mapPage = false;
4860 			} else
4861 				unmapPage = true;
4862 		}
4863 
4864 		context.map->Unlock();
4865 
4866 		if (unmapPage) {
4867 			// If the page is wired, we can't unmap it. Wait until it is unwired
4868 			// again and restart. Note that the page cannot be wired for
4869 			// writing, since it it isn't in the topmost cache. So we can safely
4870 			// ignore ranges wired for writing (our own and other concurrent
4871 			// wiring attempts in progress) and in fact have to do that to avoid
4872 			// a deadlock.
4873 			VMAreaUnwiredWaiter waiter;
4874 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4875 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4876 				// unlock everything and wait
4877 				if (context.pageAllocated) {
4878 					// ... but since we allocated a page and inserted it into
4879 					// the top cache, remove and free it first. Otherwise we'd
4880 					// have a page from a lower cache mapped while an upper
4881 					// cache has a page that would shadow it.
4882 					context.topCache->RemovePage(context.page);
4883 					vm_page_free_etc(context.topCache, context.page,
4884 						&context.reservation);
4885 				} else
4886 					DEBUG_PAGE_ACCESS_END(context.page);
4887 
4888 				context.UnlockAll();
4889 				waiter.waitEntry.Wait();
4890 				continue;
4891 			}
4892 
4893 			// Note: The mapped page is a page of a lower cache. We are
4894 			// guaranteed to have that cached locked, our new page is a copy of
4895 			// that page, and the page is not busy. The logic for that guarantee
4896 			// is as follows: Since the page is mapped, it must live in the top
4897 			// cache (ruled out above) or any of its lower caches, and there is
4898 			// (was before the new page was inserted) no other page in any
4899 			// cache between the top cache and the page's cache (otherwise that
4900 			// would be mapped instead). That in turn means that our algorithm
4901 			// must have found it and therefore it cannot be busy either.
4902 			DEBUG_PAGE_ACCESS_START(mappedPage);
4903 			unmap_page(area, address);
4904 			DEBUG_PAGE_ACCESS_END(mappedPage);
4905 		}
4906 
4907 		if (mapPage) {
4908 			if (map_page(area, context.page, address, newProtection,
4909 					&context.reservation) != B_OK) {
4910 				// Mapping can only fail, when the page mapping object couldn't
4911 				// be allocated. Save for the missing mapping everything is
4912 				// fine, though. If this was a regular page fault, we'll simply
4913 				// leave and probably fault again. To make sure we'll have more
4914 				// luck then, we ensure that the minimum object reserve is
4915 				// available.
4916 				DEBUG_PAGE_ACCESS_END(context.page);
4917 
4918 				context.UnlockAll();
4919 
4920 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4921 						!= B_OK) {
4922 					// Apparently the situation is serious. Let's get ourselves
4923 					// killed.
4924 					status = B_NO_MEMORY;
4925 				} else if (wirePage != NULL) {
4926 					// The caller expects us to wire the page. Since
4927 					// object_cache_reserve() succeeded, we should now be able
4928 					// to allocate a mapping structure. Restart.
4929 					continue;
4930 				}
4931 
4932 				break;
4933 			}
4934 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4935 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4936 
4937 		// also wire the page, if requested
4938 		if (wirePage != NULL && status == B_OK) {
4939 			increment_page_wired_count(context.page);
4940 			*wirePage = context.page;
4941 		}
4942 
4943 		DEBUG_PAGE_ACCESS_END(context.page);
4944 
4945 		break;
4946 	}
4947 
4948 	return status;
4949 }
4950 
4951 
4952 status_t
4953 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4954 {
4955 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4956 }
4957 
4958 status_t
4959 vm_put_physical_page(addr_t vaddr, void* handle)
4960 {
4961 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4962 }
4963 
4964 
4965 status_t
4966 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4967 	void** _handle)
4968 {
4969 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4970 }
4971 
4972 status_t
4973 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4974 {
4975 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4976 }
4977 
4978 
4979 status_t
4980 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4981 {
4982 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4983 }
4984 
4985 status_t
4986 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4987 {
4988 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4989 }
4990 
4991 
4992 void
4993 vm_get_info(system_info* info)
4994 {
4995 	swap_get_info(info);
4996 
4997 	MutexLocker locker(sAvailableMemoryLock);
4998 	info->needed_memory = sNeededMemory;
4999 	info->free_memory = sAvailableMemory;
5000 }
5001 
5002 
5003 uint32
5004 vm_num_page_faults(void)
5005 {
5006 	return sPageFaults;
5007 }
5008 
5009 
5010 off_t
5011 vm_available_memory(void)
5012 {
5013 	MutexLocker locker(sAvailableMemoryLock);
5014 	return sAvailableMemory;
5015 }
5016 
5017 
5018 off_t
5019 vm_available_not_needed_memory(void)
5020 {
5021 	MutexLocker locker(sAvailableMemoryLock);
5022 	return sAvailableMemory - sNeededMemory;
5023 }
5024 
5025 
5026 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5027 	debugger.
5028 */
5029 off_t
5030 vm_available_not_needed_memory_debug(void)
5031 {
5032 	return sAvailableMemory - sNeededMemory;
5033 }
5034 
5035 
5036 size_t
5037 vm_kernel_address_space_left(void)
5038 {
5039 	return VMAddressSpace::Kernel()->FreeSpace();
5040 }
5041 
5042 
5043 void
5044 vm_unreserve_memory(size_t amount)
5045 {
5046 	mutex_lock(&sAvailableMemoryLock);
5047 
5048 	sAvailableMemory += amount;
5049 
5050 	mutex_unlock(&sAvailableMemoryLock);
5051 }
5052 
5053 
5054 status_t
5055 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5056 {
5057 	size_t reserve = kMemoryReserveForPriority[priority];
5058 
5059 	MutexLocker locker(sAvailableMemoryLock);
5060 
5061 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5062 
5063 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5064 		sAvailableMemory -= amount;
5065 		return B_OK;
5066 	}
5067 
5068 	if (timeout <= 0)
5069 		return B_NO_MEMORY;
5070 
5071 	// turn timeout into an absolute timeout
5072 	timeout += system_time();
5073 
5074 	// loop until we've got the memory or the timeout occurs
5075 	do {
5076 		sNeededMemory += amount;
5077 
5078 		// call the low resource manager
5079 		locker.Unlock();
5080 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5081 			B_ABSOLUTE_TIMEOUT, timeout);
5082 		locker.Lock();
5083 
5084 		sNeededMemory -= amount;
5085 
5086 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5087 			sAvailableMemory -= amount;
5088 			return B_OK;
5089 		}
5090 	} while (timeout > system_time());
5091 
5092 	return B_NO_MEMORY;
5093 }
5094 
5095 
5096 status_t
5097 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5098 {
5099 	// NOTE: The caller is responsible for synchronizing calls to this function!
5100 
5101 	AddressSpaceReadLocker locker;
5102 	VMArea* area;
5103 	status_t status = locker.SetFromArea(id, area);
5104 	if (status != B_OK)
5105 		return status;
5106 
5107 	// nothing to do, if the type doesn't change
5108 	uint32 oldType = area->MemoryType();
5109 	if (type == oldType)
5110 		return B_OK;
5111 
5112 	// set the memory type of the area and the mapped pages
5113 	VMTranslationMap* map = area->address_space->TranslationMap();
5114 	map->Lock();
5115 	area->SetMemoryType(type);
5116 	map->ProtectArea(area, area->protection);
5117 	map->Unlock();
5118 
5119 	// set the physical memory type
5120 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5121 	if (error != B_OK) {
5122 		// reset the memory type of the area and the mapped pages
5123 		map->Lock();
5124 		area->SetMemoryType(oldType);
5125 		map->ProtectArea(area, area->protection);
5126 		map->Unlock();
5127 		return error;
5128 	}
5129 
5130 	return B_OK;
5131 
5132 }
5133 
5134 
5135 /*!	This function enforces some protection properties:
5136 	 - kernel areas must be W^X (after kernel startup)
5137 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5138 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5139 */
5140 static void
5141 fix_protection(uint32* protection)
5142 {
5143 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5144 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5145 			|| (*protection & B_WRITE_AREA) != 0)
5146 		&& !gKernelStartup)
5147 		panic("kernel areas cannot be both writable and executable!");
5148 
5149 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5150 		if ((*protection & B_WRITE_AREA) != 0)
5151 			*protection |= B_KERNEL_WRITE_AREA;
5152 		if ((*protection & B_READ_AREA) != 0)
5153 			*protection |= B_KERNEL_READ_AREA;
5154 	}
5155 }
5156 
5157 
5158 static void
5159 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5160 {
5161 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5162 	info->area = area->id;
5163 	info->address = (void*)area->Base();
5164 	info->size = area->Size();
5165 	info->protection = area->protection;
5166 	info->lock = area->wiring;
5167 	info->team = area->address_space->ID();
5168 	info->copy_count = 0;
5169 	info->in_count = 0;
5170 	info->out_count = 0;
5171 		// TODO: retrieve real values here!
5172 
5173 	VMCache* cache = vm_area_get_locked_cache(area);
5174 
5175 	// Note, this is a simplification; the cache could be larger than this area
5176 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5177 
5178 	vm_area_put_locked_cache(cache);
5179 }
5180 
5181 
5182 static status_t
5183 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5184 {
5185 	// is newSize a multiple of B_PAGE_SIZE?
5186 	if (newSize & (B_PAGE_SIZE - 1))
5187 		return B_BAD_VALUE;
5188 
5189 	// lock all affected address spaces and the cache
5190 	VMArea* area;
5191 	VMCache* cache;
5192 
5193 	MultiAddressSpaceLocker locker;
5194 	AreaCacheLocker cacheLocker;
5195 
5196 	status_t status;
5197 	size_t oldSize;
5198 	bool anyKernelArea;
5199 	bool restart;
5200 
5201 	do {
5202 		anyKernelArea = false;
5203 		restart = false;
5204 
5205 		locker.Unset();
5206 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5207 		if (status != B_OK)
5208 			return status;
5209 		cacheLocker.SetTo(cache, true);	// already locked
5210 
5211 		// enforce restrictions
5212 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5213 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5214 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5215 				"resize kernel area %" B_PRId32 " (%s)\n",
5216 				team_get_current_team_id(), areaID, area->name);
5217 			return B_NOT_ALLOWED;
5218 		}
5219 		// TODO: Enforce all restrictions (team, etc.)!
5220 
5221 		oldSize = area->Size();
5222 		if (newSize == oldSize)
5223 			return B_OK;
5224 
5225 		if (cache->type != CACHE_TYPE_RAM)
5226 			return B_NOT_ALLOWED;
5227 
5228 		if (oldSize < newSize) {
5229 			// We need to check if all areas of this cache can be resized.
5230 			for (VMArea* current = cache->areas; current != NULL;
5231 					current = current->cache_next) {
5232 				if (!current->address_space->CanResizeArea(current, newSize))
5233 					return B_ERROR;
5234 				anyKernelArea
5235 					|= current->address_space == VMAddressSpace::Kernel();
5236 			}
5237 		} else {
5238 			// We're shrinking the areas, so we must make sure the affected
5239 			// ranges are not wired.
5240 			for (VMArea* current = cache->areas; current != NULL;
5241 					current = current->cache_next) {
5242 				anyKernelArea
5243 					|= current->address_space == VMAddressSpace::Kernel();
5244 
5245 				if (wait_if_area_range_is_wired(current,
5246 						current->Base() + newSize, oldSize - newSize, &locker,
5247 						&cacheLocker)) {
5248 					restart = true;
5249 					break;
5250 				}
5251 			}
5252 		}
5253 	} while (restart);
5254 
5255 	// Okay, looks good so far, so let's do it
5256 
5257 	int priority = kernel && anyKernelArea
5258 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5259 	uint32 allocationFlags = kernel && anyKernelArea
5260 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5261 
5262 	if (oldSize < newSize) {
5263 		// Growing the cache can fail, so we do it first.
5264 		status = cache->Resize(cache->virtual_base + newSize, priority);
5265 		if (status != B_OK)
5266 			return status;
5267 	}
5268 
5269 	for (VMArea* current = cache->areas; current != NULL;
5270 			current = current->cache_next) {
5271 		status = current->address_space->ResizeArea(current, newSize,
5272 			allocationFlags);
5273 		if (status != B_OK)
5274 			break;
5275 
5276 		// We also need to unmap all pages beyond the new size, if the area has
5277 		// shrunk
5278 		if (newSize < oldSize) {
5279 			VMCacheChainLocker cacheChainLocker(cache);
5280 			cacheChainLocker.LockAllSourceCaches();
5281 
5282 			unmap_pages(current, current->Base() + newSize,
5283 				oldSize - newSize);
5284 
5285 			cacheChainLocker.Unlock(cache);
5286 		}
5287 	}
5288 
5289 	if (status == B_OK) {
5290 		// Shrink or grow individual page protections if in use.
5291 		if (area->page_protections != NULL) {
5292 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5293 			uint8* newProtections
5294 				= (uint8*)realloc(area->page_protections, bytes);
5295 			if (newProtections == NULL)
5296 				status = B_NO_MEMORY;
5297 			else {
5298 				area->page_protections = newProtections;
5299 
5300 				if (oldSize < newSize) {
5301 					// init the additional page protections to that of the area
5302 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5303 					uint32 areaProtection = area->protection
5304 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5305 					memset(area->page_protections + offset,
5306 						areaProtection | (areaProtection << 4), bytes - offset);
5307 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5308 						uint8& entry = area->page_protections[offset - 1];
5309 						entry = (entry & 0x0f) | (areaProtection << 4);
5310 					}
5311 				}
5312 			}
5313 		}
5314 	}
5315 
5316 	// shrinking the cache can't fail, so we do it now
5317 	if (status == B_OK && newSize < oldSize)
5318 		status = cache->Resize(cache->virtual_base + newSize, priority);
5319 
5320 	if (status != B_OK) {
5321 		// Something failed -- resize the areas back to their original size.
5322 		// This can fail, too, in which case we're seriously screwed.
5323 		for (VMArea* current = cache->areas; current != NULL;
5324 				current = current->cache_next) {
5325 			if (current->address_space->ResizeArea(current, oldSize,
5326 					allocationFlags) != B_OK) {
5327 				panic("vm_resize_area(): Failed and not being able to restore "
5328 					"original state.");
5329 			}
5330 		}
5331 
5332 		cache->Resize(cache->virtual_base + oldSize, priority);
5333 	}
5334 
5335 	// TODO: we must honour the lock restrictions of this area
5336 	return status;
5337 }
5338 
5339 
5340 status_t
5341 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5342 {
5343 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5344 }
5345 
5346 
5347 status_t
5348 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5349 {
5350 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5351 }
5352 
5353 
5354 status_t
5355 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5356 	bool user)
5357 {
5358 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5359 }
5360 
5361 
5362 void
5363 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5364 {
5365 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5366 }
5367 
5368 
5369 /*!	Copies a range of memory directly from/to a page that might not be mapped
5370 	at the moment.
5371 
5372 	For \a unsafeMemory the current mapping (if any is ignored). The function
5373 	walks through the respective area's cache chain to find the physical page
5374 	and copies from/to it directly.
5375 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5376 	must not cross a page boundary.
5377 
5378 	\param teamID The team ID identifying the address space \a unsafeMemory is
5379 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5380 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5381 		is passed, the address space of the thread returned by
5382 		debug_get_debugged_thread() is used.
5383 	\param unsafeMemory The start of the unsafe memory range to be copied
5384 		from/to.
5385 	\param buffer A safely accessible kernel buffer to be copied from/to.
5386 	\param size The number of bytes to be copied.
5387 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5388 		\a unsafeMemory, the other way around otherwise.
5389 */
5390 status_t
5391 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5392 	size_t size, bool copyToUnsafe)
5393 {
5394 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5395 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5396 		return B_BAD_VALUE;
5397 	}
5398 
5399 	// get the address space for the debugged thread
5400 	VMAddressSpace* addressSpace;
5401 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5402 		addressSpace = VMAddressSpace::Kernel();
5403 	} else if (teamID == B_CURRENT_TEAM) {
5404 		Thread* thread = debug_get_debugged_thread();
5405 		if (thread == NULL || thread->team == NULL)
5406 			return B_BAD_ADDRESS;
5407 
5408 		addressSpace = thread->team->address_space;
5409 	} else
5410 		addressSpace = VMAddressSpace::DebugGet(teamID);
5411 
5412 	if (addressSpace == NULL)
5413 		return B_BAD_ADDRESS;
5414 
5415 	// get the area
5416 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5417 	if (area == NULL)
5418 		return B_BAD_ADDRESS;
5419 
5420 	// search the page
5421 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5422 		+ area->cache_offset;
5423 	VMCache* cache = area->cache;
5424 	vm_page* page = NULL;
5425 	while (cache != NULL) {
5426 		page = cache->DebugLookupPage(cacheOffset);
5427 		if (page != NULL)
5428 			break;
5429 
5430 		// Page not found in this cache -- if it is paged out, we must not try
5431 		// to get it from lower caches.
5432 		if (cache->DebugHasPage(cacheOffset))
5433 			break;
5434 
5435 		cache = cache->source;
5436 	}
5437 
5438 	if (page == NULL)
5439 		return B_UNSUPPORTED;
5440 
5441 	// copy from/to physical memory
5442 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5443 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5444 
5445 	if (copyToUnsafe) {
5446 		if (page->Cache() != area->cache)
5447 			return B_UNSUPPORTED;
5448 
5449 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5450 	}
5451 
5452 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5453 }
5454 
5455 
5456 /** Validate that a memory range is either fully in kernel space, or fully in
5457  *  userspace */
5458 static inline bool
5459 validate_memory_range(const void* addr, size_t size)
5460 {
5461 	addr_t address = (addr_t)addr;
5462 
5463 	// Check for overflows on all addresses.
5464 	if ((address + size) < address)
5465 		return false;
5466 
5467 	// Validate that the address range does not cross the kernel/user boundary.
5468 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5469 }
5470 
5471 
5472 //	#pragma mark - kernel public API
5473 
5474 
5475 status_t
5476 user_memcpy(void* to, const void* from, size_t size)
5477 {
5478 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5479 		return B_BAD_ADDRESS;
5480 
5481 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5482 		return B_BAD_ADDRESS;
5483 
5484 	return B_OK;
5485 }
5486 
5487 
5488 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5489 	the string in \a to, NULL-terminating the result.
5490 
5491 	\param to Pointer to the destination C-string.
5492 	\param from Pointer to the source C-string.
5493 	\param size Size in bytes of the string buffer pointed to by \a to.
5494 
5495 	\return strlen(\a from).
5496 */
5497 ssize_t
5498 user_strlcpy(char* to, const char* from, size_t size)
5499 {
5500 	if (to == NULL && size != 0)
5501 		return B_BAD_VALUE;
5502 	if (from == NULL)
5503 		return B_BAD_ADDRESS;
5504 
5505 	// Protect the source address from overflows.
5506 	size_t maxSize = size;
5507 	if ((addr_t)from + maxSize < (addr_t)from)
5508 		maxSize -= (addr_t)from + maxSize;
5509 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5510 		maxSize = USER_TOP - (addr_t)from;
5511 
5512 	if (!validate_memory_range(to, maxSize))
5513 		return B_BAD_ADDRESS;
5514 
5515 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5516 	if (result < 0)
5517 		return result;
5518 
5519 	// If we hit the address overflow boundary, fail.
5520 	if ((size_t)result >= maxSize && maxSize < size)
5521 		return B_BAD_ADDRESS;
5522 
5523 	return result;
5524 }
5525 
5526 
5527 status_t
5528 user_memset(void* s, char c, size_t count)
5529 {
5530 	if (!validate_memory_range(s, count))
5531 		return B_BAD_ADDRESS;
5532 
5533 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5534 		return B_BAD_ADDRESS;
5535 
5536 	return B_OK;
5537 }
5538 
5539 
5540 /*!	Wires a single page at the given address.
5541 
5542 	\param team The team whose address space the address belongs to. Supports
5543 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5544 		parameter is ignored.
5545 	\param address address The virtual address to wire down. Does not need to
5546 		be page aligned.
5547 	\param writable If \c true the page shall be writable.
5548 	\param info On success the info is filled in, among other things
5549 		containing the physical address the given virtual one translates to.
5550 	\return \c B_OK, when the page could be wired, another error code otherwise.
5551 */
5552 status_t
5553 vm_wire_page(team_id team, addr_t address, bool writable,
5554 	VMPageWiringInfo* info)
5555 {
5556 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5557 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5558 
5559 	// compute the page protection that is required
5560 	bool isUser = IS_USER_ADDRESS(address);
5561 	uint32 requiredProtection = PAGE_PRESENT
5562 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5563 	if (writable)
5564 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5565 
5566 	// get and read lock the address space
5567 	VMAddressSpace* addressSpace = NULL;
5568 	if (isUser) {
5569 		if (team == B_CURRENT_TEAM)
5570 			addressSpace = VMAddressSpace::GetCurrent();
5571 		else
5572 			addressSpace = VMAddressSpace::Get(team);
5573 	} else
5574 		addressSpace = VMAddressSpace::GetKernel();
5575 	if (addressSpace == NULL)
5576 		return B_ERROR;
5577 
5578 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5579 
5580 	VMTranslationMap* map = addressSpace->TranslationMap();
5581 	status_t error = B_OK;
5582 
5583 	// get the area
5584 	VMArea* area = addressSpace->LookupArea(pageAddress);
5585 	if (area == NULL) {
5586 		addressSpace->Put();
5587 		return B_BAD_ADDRESS;
5588 	}
5589 
5590 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5591 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5592 
5593 	// mark the area range wired
5594 	area->Wire(&info->range);
5595 
5596 	// Lock the area's cache chain and the translation map. Needed to look
5597 	// up the page and play with its wired count.
5598 	cacheChainLocker.LockAllSourceCaches();
5599 	map->Lock();
5600 
5601 	phys_addr_t physicalAddress;
5602 	uint32 flags;
5603 	vm_page* page;
5604 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5605 		&& (flags & requiredProtection) == requiredProtection
5606 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5607 			!= NULL) {
5608 		// Already mapped with the correct permissions -- just increment
5609 		// the page's wired count.
5610 		increment_page_wired_count(page);
5611 
5612 		map->Unlock();
5613 		cacheChainLocker.Unlock();
5614 		addressSpaceLocker.Unlock();
5615 	} else {
5616 		// Let vm_soft_fault() map the page for us, if possible. We need
5617 		// to fully unlock to avoid deadlocks. Since we have already
5618 		// wired the area itself, nothing disturbing will happen with it
5619 		// in the meantime.
5620 		map->Unlock();
5621 		cacheChainLocker.Unlock();
5622 		addressSpaceLocker.Unlock();
5623 
5624 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5625 			isUser, &page);
5626 
5627 		if (error != B_OK) {
5628 			// The page could not be mapped -- clean up.
5629 			VMCache* cache = vm_area_get_locked_cache(area);
5630 			area->Unwire(&info->range);
5631 			cache->ReleaseRefAndUnlock();
5632 			addressSpace->Put();
5633 			return error;
5634 		}
5635 	}
5636 
5637 	info->physicalAddress
5638 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5639 			+ address % B_PAGE_SIZE;
5640 	info->page = page;
5641 
5642 	return B_OK;
5643 }
5644 
5645 
5646 /*!	Unwires a single page previously wired via vm_wire_page().
5647 
5648 	\param info The same object passed to vm_wire_page() before.
5649 */
5650 void
5651 vm_unwire_page(VMPageWiringInfo* info)
5652 {
5653 	// lock the address space
5654 	VMArea* area = info->range.area;
5655 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5656 		// takes over our reference
5657 
5658 	// lock the top cache
5659 	VMCache* cache = vm_area_get_locked_cache(area);
5660 	VMCacheChainLocker cacheChainLocker(cache);
5661 
5662 	if (info->page->Cache() != cache) {
5663 		// The page is not in the top cache, so we lock the whole cache chain
5664 		// before touching the page's wired count.
5665 		cacheChainLocker.LockAllSourceCaches();
5666 	}
5667 
5668 	decrement_page_wired_count(info->page);
5669 
5670 	// remove the wired range from the range
5671 	area->Unwire(&info->range);
5672 
5673 	cacheChainLocker.Unlock();
5674 }
5675 
5676 
5677 /*!	Wires down the given address range in the specified team's address space.
5678 
5679 	If successful the function
5680 	- acquires a reference to the specified team's address space,
5681 	- adds respective wired ranges to all areas that intersect with the given
5682 	  address range,
5683 	- makes sure all pages in the given address range are mapped with the
5684 	  requested access permissions and increments their wired count.
5685 
5686 	It fails, when \a team doesn't specify a valid address space, when any part
5687 	of the specified address range is not covered by areas, when the concerned
5688 	areas don't allow mapping with the requested permissions, or when mapping
5689 	failed for another reason.
5690 
5691 	When successful the call must be balanced by a unlock_memory_etc() call with
5692 	the exact same parameters.
5693 
5694 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5695 		supported.
5696 	\param address The start of the address range to be wired.
5697 	\param numBytes The size of the address range to be wired.
5698 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5699 		requests that the range must be wired writable ("read from device
5700 		into memory").
5701 	\return \c B_OK on success, another error code otherwise.
5702 */
5703 status_t
5704 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5705 {
5706 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5707 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5708 
5709 	// compute the page protection that is required
5710 	bool isUser = IS_USER_ADDRESS(address);
5711 	bool writable = (flags & B_READ_DEVICE) == 0;
5712 	uint32 requiredProtection = PAGE_PRESENT
5713 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5714 	if (writable)
5715 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5716 
5717 	uint32 mallocFlags = isUser
5718 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5719 
5720 	// get and read lock the address space
5721 	VMAddressSpace* addressSpace = NULL;
5722 	if (isUser) {
5723 		if (team == B_CURRENT_TEAM)
5724 			addressSpace = VMAddressSpace::GetCurrent();
5725 		else
5726 			addressSpace = VMAddressSpace::Get(team);
5727 	} else
5728 		addressSpace = VMAddressSpace::GetKernel();
5729 	if (addressSpace == NULL)
5730 		return B_ERROR;
5731 
5732 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5733 		// We get a new address space reference here. The one we got above will
5734 		// be freed by unlock_memory_etc().
5735 
5736 	VMTranslationMap* map = addressSpace->TranslationMap();
5737 	status_t error = B_OK;
5738 
5739 	// iterate through all concerned areas
5740 	addr_t nextAddress = lockBaseAddress;
5741 	while (nextAddress != lockEndAddress) {
5742 		// get the next area
5743 		VMArea* area = addressSpace->LookupArea(nextAddress);
5744 		if (area == NULL) {
5745 			error = B_BAD_ADDRESS;
5746 			break;
5747 		}
5748 
5749 		addr_t areaStart = nextAddress;
5750 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5751 
5752 		// allocate the wired range (do that before locking the cache to avoid
5753 		// deadlocks)
5754 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5755 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5756 		if (range == NULL) {
5757 			error = B_NO_MEMORY;
5758 			break;
5759 		}
5760 
5761 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5762 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5763 
5764 		// mark the area range wired
5765 		area->Wire(range);
5766 
5767 		// Depending on the area cache type and the wiring, we may not need to
5768 		// look at the individual pages.
5769 		if (area->cache_type == CACHE_TYPE_NULL
5770 			|| area->cache_type == CACHE_TYPE_DEVICE
5771 			|| area->wiring == B_FULL_LOCK
5772 			|| area->wiring == B_CONTIGUOUS) {
5773 			nextAddress = areaEnd;
5774 			continue;
5775 		}
5776 
5777 		// Lock the area's cache chain and the translation map. Needed to look
5778 		// up pages and play with their wired count.
5779 		cacheChainLocker.LockAllSourceCaches();
5780 		map->Lock();
5781 
5782 		// iterate through the pages and wire them
5783 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5784 			phys_addr_t physicalAddress;
5785 			uint32 flags;
5786 
5787 			vm_page* page;
5788 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5789 				&& (flags & requiredProtection) == requiredProtection
5790 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5791 					!= NULL) {
5792 				// Already mapped with the correct permissions -- just increment
5793 				// the page's wired count.
5794 				increment_page_wired_count(page);
5795 			} else {
5796 				// Let vm_soft_fault() map the page for us, if possible. We need
5797 				// to fully unlock to avoid deadlocks. Since we have already
5798 				// wired the area itself, nothing disturbing will happen with it
5799 				// in the meantime.
5800 				map->Unlock();
5801 				cacheChainLocker.Unlock();
5802 				addressSpaceLocker.Unlock();
5803 
5804 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5805 					false, isUser, &page);
5806 
5807 				addressSpaceLocker.Lock();
5808 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5809 				cacheChainLocker.LockAllSourceCaches();
5810 				map->Lock();
5811 			}
5812 
5813 			if (error != B_OK)
5814 				break;
5815 		}
5816 
5817 		map->Unlock();
5818 
5819 		if (error == B_OK) {
5820 			cacheChainLocker.Unlock();
5821 		} else {
5822 			// An error occurred, so abort right here. If the current address
5823 			// is the first in this area, unwire the area, since we won't get
5824 			// to it when reverting what we've done so far.
5825 			if (nextAddress == areaStart) {
5826 				area->Unwire(range);
5827 				cacheChainLocker.Unlock();
5828 				range->~VMAreaWiredRange();
5829 				free_etc(range, mallocFlags);
5830 			} else
5831 				cacheChainLocker.Unlock();
5832 
5833 			break;
5834 		}
5835 	}
5836 
5837 	if (error != B_OK) {
5838 		// An error occurred, so unwire all that we've already wired. Note that
5839 		// even if not a single page was wired, unlock_memory_etc() is called
5840 		// to put the address space reference.
5841 		addressSpaceLocker.Unlock();
5842 		unlock_memory_etc(team, (void*)lockBaseAddress,
5843 			nextAddress - lockBaseAddress, flags);
5844 	}
5845 
5846 	return error;
5847 }
5848 
5849 
5850 status_t
5851 lock_memory(void* address, size_t numBytes, uint32 flags)
5852 {
5853 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5854 }
5855 
5856 
5857 /*!	Unwires an address range previously wired with lock_memory_etc().
5858 
5859 	Note that a call to this function must balance a previous lock_memory_etc()
5860 	call with exactly the same parameters.
5861 */
5862 status_t
5863 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5864 {
5865 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5866 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5867 
5868 	// compute the page protection that is required
5869 	bool isUser = IS_USER_ADDRESS(address);
5870 	bool writable = (flags & B_READ_DEVICE) == 0;
5871 	uint32 requiredProtection = PAGE_PRESENT
5872 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5873 	if (writable)
5874 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5875 
5876 	uint32 mallocFlags = isUser
5877 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5878 
5879 	// get and read lock the address space
5880 	VMAddressSpace* addressSpace = NULL;
5881 	if (isUser) {
5882 		if (team == B_CURRENT_TEAM)
5883 			addressSpace = VMAddressSpace::GetCurrent();
5884 		else
5885 			addressSpace = VMAddressSpace::Get(team);
5886 	} else
5887 		addressSpace = VMAddressSpace::GetKernel();
5888 	if (addressSpace == NULL)
5889 		return B_ERROR;
5890 
5891 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5892 		// Take over the address space reference. We don't unlock until we're
5893 		// done.
5894 
5895 	VMTranslationMap* map = addressSpace->TranslationMap();
5896 	status_t error = B_OK;
5897 
5898 	// iterate through all concerned areas
5899 	addr_t nextAddress = lockBaseAddress;
5900 	while (nextAddress != lockEndAddress) {
5901 		// get the next area
5902 		VMArea* area = addressSpace->LookupArea(nextAddress);
5903 		if (area == NULL) {
5904 			error = B_BAD_ADDRESS;
5905 			break;
5906 		}
5907 
5908 		addr_t areaStart = nextAddress;
5909 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5910 
5911 		// Lock the area's top cache. This is a requirement for
5912 		// VMArea::Unwire().
5913 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5914 
5915 		// Depending on the area cache type and the wiring, we may not need to
5916 		// look at the individual pages.
5917 		if (area->cache_type == CACHE_TYPE_NULL
5918 			|| area->cache_type == CACHE_TYPE_DEVICE
5919 			|| area->wiring == B_FULL_LOCK
5920 			|| area->wiring == B_CONTIGUOUS) {
5921 			// unwire the range (to avoid deadlocks we delete the range after
5922 			// unlocking the cache)
5923 			nextAddress = areaEnd;
5924 			VMAreaWiredRange* range = area->Unwire(areaStart,
5925 				areaEnd - areaStart, writable);
5926 			cacheChainLocker.Unlock();
5927 			if (range != NULL) {
5928 				range->~VMAreaWiredRange();
5929 				free_etc(range, mallocFlags);
5930 			}
5931 			continue;
5932 		}
5933 
5934 		// Lock the area's cache chain and the translation map. Needed to look
5935 		// up pages and play with their wired count.
5936 		cacheChainLocker.LockAllSourceCaches();
5937 		map->Lock();
5938 
5939 		// iterate through the pages and unwire them
5940 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5941 			phys_addr_t physicalAddress;
5942 			uint32 flags;
5943 
5944 			vm_page* page;
5945 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5946 				&& (flags & PAGE_PRESENT) != 0
5947 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5948 					!= NULL) {
5949 				// Already mapped with the correct permissions -- just increment
5950 				// the page's wired count.
5951 				decrement_page_wired_count(page);
5952 			} else {
5953 				panic("unlock_memory_etc(): Failed to unwire page: address "
5954 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5955 					nextAddress);
5956 				error = B_BAD_VALUE;
5957 				break;
5958 			}
5959 		}
5960 
5961 		map->Unlock();
5962 
5963 		// All pages are unwired. Remove the area's wired range as well (to
5964 		// avoid deadlocks we delete the range after unlocking the cache).
5965 		VMAreaWiredRange* range = area->Unwire(areaStart,
5966 			areaEnd - areaStart, writable);
5967 
5968 		cacheChainLocker.Unlock();
5969 
5970 		if (range != NULL) {
5971 			range->~VMAreaWiredRange();
5972 			free_etc(range, mallocFlags);
5973 		}
5974 
5975 		if (error != B_OK)
5976 			break;
5977 	}
5978 
5979 	// get rid of the address space reference lock_memory_etc() acquired
5980 	addressSpace->Put();
5981 
5982 	return error;
5983 }
5984 
5985 
5986 status_t
5987 unlock_memory(void* address, size_t numBytes, uint32 flags)
5988 {
5989 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5990 }
5991 
5992 
5993 /*!	Similar to get_memory_map(), but also allows to specify the address space
5994 	for the memory in question and has a saner semantics.
5995 	Returns \c B_OK when the complete range could be translated or
5996 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5997 	case the actual number of entries is written to \c *_numEntries. Any other
5998 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5999 	in this case.
6000 */
6001 status_t
6002 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
6003 	physical_entry* table, uint32* _numEntries)
6004 {
6005 	uint32 numEntries = *_numEntries;
6006 	*_numEntries = 0;
6007 
6008 	VMAddressSpace* addressSpace;
6009 	addr_t virtualAddress = (addr_t)address;
6010 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6011 	phys_addr_t physicalAddress;
6012 	status_t status = B_OK;
6013 	int32 index = -1;
6014 	addr_t offset = 0;
6015 	bool interrupts = are_interrupts_enabled();
6016 
6017 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6018 		"entries)\n", team, address, numBytes, numEntries));
6019 
6020 	if (numEntries == 0 || numBytes == 0)
6021 		return B_BAD_VALUE;
6022 
6023 	// in which address space is the address to be found?
6024 	if (IS_USER_ADDRESS(virtualAddress)) {
6025 		if (team == B_CURRENT_TEAM)
6026 			addressSpace = VMAddressSpace::GetCurrent();
6027 		else
6028 			addressSpace = VMAddressSpace::Get(team);
6029 	} else
6030 		addressSpace = VMAddressSpace::GetKernel();
6031 
6032 	if (addressSpace == NULL)
6033 		return B_ERROR;
6034 
6035 	VMTranslationMap* map = addressSpace->TranslationMap();
6036 
6037 	if (interrupts)
6038 		map->Lock();
6039 
6040 	while (offset < numBytes) {
6041 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6042 		uint32 flags;
6043 
6044 		if (interrupts) {
6045 			status = map->Query((addr_t)address + offset, &physicalAddress,
6046 				&flags);
6047 		} else {
6048 			status = map->QueryInterrupt((addr_t)address + offset,
6049 				&physicalAddress, &flags);
6050 		}
6051 		if (status < B_OK)
6052 			break;
6053 		if ((flags & PAGE_PRESENT) == 0) {
6054 			panic("get_memory_map() called on unmapped memory!");
6055 			return B_BAD_ADDRESS;
6056 		}
6057 
6058 		if (index < 0 && pageOffset > 0) {
6059 			physicalAddress += pageOffset;
6060 			if (bytes > B_PAGE_SIZE - pageOffset)
6061 				bytes = B_PAGE_SIZE - pageOffset;
6062 		}
6063 
6064 		// need to switch to the next physical_entry?
6065 		if (index < 0 || table[index].address
6066 				!= physicalAddress - table[index].size) {
6067 			if ((uint32)++index + 1 > numEntries) {
6068 				// table to small
6069 				break;
6070 			}
6071 			table[index].address = physicalAddress;
6072 			table[index].size = bytes;
6073 		} else {
6074 			// page does fit in current entry
6075 			table[index].size += bytes;
6076 		}
6077 
6078 		offset += bytes;
6079 	}
6080 
6081 	if (interrupts)
6082 		map->Unlock();
6083 
6084 	if (status != B_OK)
6085 		return status;
6086 
6087 	if ((uint32)index + 1 > numEntries) {
6088 		*_numEntries = index;
6089 		return B_BUFFER_OVERFLOW;
6090 	}
6091 
6092 	*_numEntries = index + 1;
6093 	return B_OK;
6094 }
6095 
6096 
6097 /*!	According to the BeBook, this function should always succeed.
6098 	This is no longer the case.
6099 */
6100 extern "C" int32
6101 __get_memory_map_haiku(const void* address, size_t numBytes,
6102 	physical_entry* table, int32 numEntries)
6103 {
6104 	uint32 entriesRead = numEntries;
6105 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6106 		table, &entriesRead);
6107 	if (error != B_OK)
6108 		return error;
6109 
6110 	// close the entry list
6111 
6112 	// if it's only one entry, we will silently accept the missing ending
6113 	if (numEntries == 1)
6114 		return B_OK;
6115 
6116 	if (entriesRead + 1 > (uint32)numEntries)
6117 		return B_BUFFER_OVERFLOW;
6118 
6119 	table[entriesRead].address = 0;
6120 	table[entriesRead].size = 0;
6121 
6122 	return B_OK;
6123 }
6124 
6125 
6126 area_id
6127 area_for(void* address)
6128 {
6129 	return vm_area_for((addr_t)address, true);
6130 }
6131 
6132 
6133 area_id
6134 find_area(const char* name)
6135 {
6136 	return VMAreas::Find(name);
6137 }
6138 
6139 
6140 status_t
6141 _get_area_info(area_id id, area_info* info, size_t size)
6142 {
6143 	if (size != sizeof(area_info) || info == NULL)
6144 		return B_BAD_VALUE;
6145 
6146 	AddressSpaceReadLocker locker;
6147 	VMArea* area;
6148 	status_t status = locker.SetFromArea(id, area);
6149 	if (status != B_OK)
6150 		return status;
6151 
6152 	fill_area_info(area, info, size);
6153 	return B_OK;
6154 }
6155 
6156 
6157 status_t
6158 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6159 {
6160 	addr_t nextBase = *(addr_t*)cookie;
6161 
6162 	// we're already through the list
6163 	if (nextBase == (addr_t)-1)
6164 		return B_ENTRY_NOT_FOUND;
6165 
6166 	if (team == B_CURRENT_TEAM)
6167 		team = team_get_current_team_id();
6168 
6169 	AddressSpaceReadLocker locker(team);
6170 	if (!locker.IsLocked())
6171 		return B_BAD_TEAM_ID;
6172 
6173 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6174 	if (area == NULL) {
6175 		nextBase = (addr_t)-1;
6176 		return B_ENTRY_NOT_FOUND;
6177 	}
6178 
6179 	fill_area_info(area, info, size);
6180 	*cookie = (ssize_t)(area->Base() + 1);
6181 
6182 	return B_OK;
6183 }
6184 
6185 
6186 status_t
6187 set_area_protection(area_id area, uint32 newProtection)
6188 {
6189 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6190 		newProtection, true);
6191 }
6192 
6193 
6194 status_t
6195 resize_area(area_id areaID, size_t newSize)
6196 {
6197 	return vm_resize_area(areaID, newSize, true);
6198 }
6199 
6200 
6201 /*!	Transfers the specified area to a new team. The caller must be the owner
6202 	of the area.
6203 */
6204 area_id
6205 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6206 	bool kernel)
6207 {
6208 	area_info info;
6209 	status_t status = get_area_info(id, &info);
6210 	if (status != B_OK)
6211 		return status;
6212 
6213 	if (info.team != thread_get_current_thread()->team->id)
6214 		return B_PERMISSION_DENIED;
6215 
6216 	// We need to mark the area cloneable so the following operations work.
6217 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6218 	if (status != B_OK)
6219 		return status;
6220 
6221 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6222 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6223 	if (clonedArea < 0)
6224 		return clonedArea;
6225 
6226 	status = vm_delete_area(info.team, id, kernel);
6227 	if (status != B_OK) {
6228 		vm_delete_area(target, clonedArea, kernel);
6229 		return status;
6230 	}
6231 
6232 	// Now we can reset the protection to whatever it was before.
6233 	set_area_protection(clonedArea, info.protection);
6234 
6235 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6236 
6237 	return clonedArea;
6238 }
6239 
6240 
6241 extern "C" area_id
6242 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6243 	size_t numBytes, uint32 addressSpec, uint32 protection,
6244 	void** _virtualAddress)
6245 {
6246 	if (!arch_vm_supports_protection(protection))
6247 		return B_NOT_SUPPORTED;
6248 
6249 	fix_protection(&protection);
6250 
6251 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6252 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6253 		false);
6254 }
6255 
6256 
6257 area_id
6258 clone_area(const char* name, void** _address, uint32 addressSpec,
6259 	uint32 protection, area_id source)
6260 {
6261 	if ((protection & B_KERNEL_PROTECTION) == 0)
6262 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6263 
6264 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6265 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6266 }
6267 
6268 
6269 area_id
6270 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6271 	uint32 protection, uint32 flags, uint32 guardSize,
6272 	const virtual_address_restrictions* virtualAddressRestrictions,
6273 	const physical_address_restrictions* physicalAddressRestrictions,
6274 	void** _address)
6275 {
6276 	fix_protection(&protection);
6277 
6278 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6279 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6280 		true, _address);
6281 }
6282 
6283 
6284 extern "C" area_id
6285 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6286 	size_t size, uint32 lock, uint32 protection)
6287 {
6288 	fix_protection(&protection);
6289 
6290 	virtual_address_restrictions virtualRestrictions = {};
6291 	virtualRestrictions.address = *_address;
6292 	virtualRestrictions.address_specification = addressSpec;
6293 	physical_address_restrictions physicalRestrictions = {};
6294 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6295 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6296 		true, _address);
6297 }
6298 
6299 
6300 status_t
6301 delete_area(area_id area)
6302 {
6303 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6304 }
6305 
6306 
6307 //	#pragma mark - Userland syscalls
6308 
6309 
6310 status_t
6311 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6312 	addr_t size)
6313 {
6314 	// filter out some unavailable values (for userland)
6315 	switch (addressSpec) {
6316 		case B_ANY_KERNEL_ADDRESS:
6317 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6318 			return B_BAD_VALUE;
6319 	}
6320 
6321 	addr_t address;
6322 
6323 	if (!IS_USER_ADDRESS(userAddress)
6324 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6325 		return B_BAD_ADDRESS;
6326 
6327 	status_t status = vm_reserve_address_range(
6328 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6329 		RESERVED_AVOID_BASE);
6330 	if (status != B_OK)
6331 		return status;
6332 
6333 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6334 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6335 			(void*)address, size);
6336 		return B_BAD_ADDRESS;
6337 	}
6338 
6339 	return B_OK;
6340 }
6341 
6342 
6343 status_t
6344 _user_unreserve_address_range(addr_t address, addr_t size)
6345 {
6346 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6347 		(void*)address, size);
6348 }
6349 
6350 
6351 area_id
6352 _user_area_for(void* address)
6353 {
6354 	return vm_area_for((addr_t)address, false);
6355 }
6356 
6357 
6358 area_id
6359 _user_find_area(const char* userName)
6360 {
6361 	char name[B_OS_NAME_LENGTH];
6362 
6363 	if (!IS_USER_ADDRESS(userName)
6364 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6365 		return B_BAD_ADDRESS;
6366 
6367 	return find_area(name);
6368 }
6369 
6370 
6371 status_t
6372 _user_get_area_info(area_id area, area_info* userInfo)
6373 {
6374 	if (!IS_USER_ADDRESS(userInfo))
6375 		return B_BAD_ADDRESS;
6376 
6377 	area_info info;
6378 	status_t status = get_area_info(area, &info);
6379 	if (status < B_OK)
6380 		return status;
6381 
6382 	// TODO: do we want to prevent userland from seeing kernel protections?
6383 	//info.protection &= B_USER_PROTECTION;
6384 
6385 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6386 		return B_BAD_ADDRESS;
6387 
6388 	return status;
6389 }
6390 
6391 
6392 status_t
6393 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6394 {
6395 	ssize_t cookie;
6396 
6397 	if (!IS_USER_ADDRESS(userCookie)
6398 		|| !IS_USER_ADDRESS(userInfo)
6399 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6400 		return B_BAD_ADDRESS;
6401 
6402 	area_info info;
6403 	status_t status = _get_next_area_info(team, &cookie, &info,
6404 		sizeof(area_info));
6405 	if (status != B_OK)
6406 		return status;
6407 
6408 	//info.protection &= B_USER_PROTECTION;
6409 
6410 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6411 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6412 		return B_BAD_ADDRESS;
6413 
6414 	return status;
6415 }
6416 
6417 
6418 status_t
6419 _user_set_area_protection(area_id area, uint32 newProtection)
6420 {
6421 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6422 		return B_BAD_VALUE;
6423 
6424 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6425 		newProtection, false);
6426 }
6427 
6428 
6429 status_t
6430 _user_resize_area(area_id area, size_t newSize)
6431 {
6432 	// TODO: Since we restrict deleting of areas to those owned by the team,
6433 	// we should also do that for resizing (check other functions, too).
6434 	return vm_resize_area(area, newSize, false);
6435 }
6436 
6437 
6438 area_id
6439 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6440 	team_id target)
6441 {
6442 	// filter out some unavailable values (for userland)
6443 	switch (addressSpec) {
6444 		case B_ANY_KERNEL_ADDRESS:
6445 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6446 			return B_BAD_VALUE;
6447 	}
6448 
6449 	void* address;
6450 	if (!IS_USER_ADDRESS(userAddress)
6451 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6452 		return B_BAD_ADDRESS;
6453 
6454 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6455 	if (newArea < B_OK)
6456 		return newArea;
6457 
6458 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6459 		return B_BAD_ADDRESS;
6460 
6461 	return newArea;
6462 }
6463 
6464 
6465 area_id
6466 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6467 	uint32 protection, area_id sourceArea)
6468 {
6469 	char name[B_OS_NAME_LENGTH];
6470 	void* address;
6471 
6472 	// filter out some unavailable values (for userland)
6473 	switch (addressSpec) {
6474 		case B_ANY_KERNEL_ADDRESS:
6475 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6476 			return B_BAD_VALUE;
6477 	}
6478 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6479 		return B_BAD_VALUE;
6480 
6481 	if (!IS_USER_ADDRESS(userName)
6482 		|| !IS_USER_ADDRESS(userAddress)
6483 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6484 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6485 		return B_BAD_ADDRESS;
6486 
6487 	fix_protection(&protection);
6488 
6489 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6490 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6491 		false);
6492 	if (clonedArea < B_OK)
6493 		return clonedArea;
6494 
6495 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6496 		delete_area(clonedArea);
6497 		return B_BAD_ADDRESS;
6498 	}
6499 
6500 	return clonedArea;
6501 }
6502 
6503 
6504 area_id
6505 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6506 	size_t size, uint32 lock, uint32 protection)
6507 {
6508 	char name[B_OS_NAME_LENGTH];
6509 	void* address;
6510 
6511 	// filter out some unavailable values (for userland)
6512 	switch (addressSpec) {
6513 		case B_ANY_KERNEL_ADDRESS:
6514 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6515 			return B_BAD_VALUE;
6516 	}
6517 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6518 		return B_BAD_VALUE;
6519 
6520 	if (!IS_USER_ADDRESS(userName)
6521 		|| !IS_USER_ADDRESS(userAddress)
6522 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6523 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6524 		return B_BAD_ADDRESS;
6525 
6526 	if (addressSpec == B_EXACT_ADDRESS
6527 		&& IS_KERNEL_ADDRESS(address))
6528 		return B_BAD_VALUE;
6529 
6530 	if (addressSpec == B_ANY_ADDRESS)
6531 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6532 	if (addressSpec == B_BASE_ADDRESS)
6533 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6534 
6535 	fix_protection(&protection);
6536 
6537 	virtual_address_restrictions virtualRestrictions = {};
6538 	virtualRestrictions.address = address;
6539 	virtualRestrictions.address_specification = addressSpec;
6540 	physical_address_restrictions physicalRestrictions = {};
6541 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6542 		size, lock, protection, 0, 0, &virtualRestrictions,
6543 		&physicalRestrictions, false, &address);
6544 
6545 	if (area >= B_OK
6546 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6547 		delete_area(area);
6548 		return B_BAD_ADDRESS;
6549 	}
6550 
6551 	return area;
6552 }
6553 
6554 
6555 status_t
6556 _user_delete_area(area_id area)
6557 {
6558 	// Unlike the BeOS implementation, you can now only delete areas
6559 	// that you have created yourself from userland.
6560 	// The documentation to delete_area() explicitly states that this
6561 	// will be restricted in the future, and so it will.
6562 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6563 }
6564 
6565 
6566 // TODO: create a BeOS style call for this!
6567 
6568 area_id
6569 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6570 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6571 	int fd, off_t offset)
6572 {
6573 	char name[B_OS_NAME_LENGTH];
6574 	void* address;
6575 	area_id area;
6576 
6577 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6578 		return B_BAD_VALUE;
6579 
6580 	fix_protection(&protection);
6581 
6582 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6583 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6584 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6585 		return B_BAD_ADDRESS;
6586 
6587 	if (addressSpec == B_EXACT_ADDRESS) {
6588 		if ((addr_t)address + size < (addr_t)address
6589 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6590 			return B_BAD_VALUE;
6591 		}
6592 		if (!IS_USER_ADDRESS(address)
6593 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6594 			return B_BAD_ADDRESS;
6595 		}
6596 	}
6597 
6598 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6599 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6600 		false);
6601 	if (area < B_OK)
6602 		return area;
6603 
6604 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6605 		return B_BAD_ADDRESS;
6606 
6607 	return area;
6608 }
6609 
6610 
6611 status_t
6612 _user_unmap_memory(void* _address, size_t size)
6613 {
6614 	addr_t address = (addr_t)_address;
6615 
6616 	// check params
6617 	if (size == 0 || (addr_t)address + size < (addr_t)address
6618 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6619 		return B_BAD_VALUE;
6620 	}
6621 
6622 	if (!IS_USER_ADDRESS(address)
6623 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6624 		return B_BAD_ADDRESS;
6625 	}
6626 
6627 	// Write lock the address space and ensure the address range is not wired.
6628 	AddressSpaceWriteLocker locker;
6629 	do {
6630 		status_t status = locker.SetTo(team_get_current_team_id());
6631 		if (status != B_OK)
6632 			return status;
6633 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6634 			size, &locker));
6635 
6636 	// unmap
6637 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6638 }
6639 
6640 
6641 status_t
6642 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6643 {
6644 	// check address range
6645 	addr_t address = (addr_t)_address;
6646 	size = PAGE_ALIGN(size);
6647 
6648 	if ((address % B_PAGE_SIZE) != 0)
6649 		return B_BAD_VALUE;
6650 	if (!is_user_address_range(_address, size)) {
6651 		// weird error code required by POSIX
6652 		return ENOMEM;
6653 	}
6654 
6655 	// extend and check protection
6656 	if ((protection & ~B_USER_PROTECTION) != 0)
6657 		return B_BAD_VALUE;
6658 
6659 	fix_protection(&protection);
6660 
6661 	// We need to write lock the address space, since we're going to play with
6662 	// the areas. Also make sure that none of the areas is wired and that we're
6663 	// actually allowed to change the protection.
6664 	AddressSpaceWriteLocker locker;
6665 
6666 	bool restart;
6667 	do {
6668 		restart = false;
6669 
6670 		status_t status = locker.SetTo(team_get_current_team_id());
6671 		if (status != B_OK)
6672 			return status;
6673 
6674 		// First round: Check whether the whole range is covered by areas and we
6675 		// are allowed to modify them.
6676 		addr_t currentAddress = address;
6677 		size_t sizeLeft = size;
6678 		while (sizeLeft > 0) {
6679 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6680 			if (area == NULL)
6681 				return B_NO_MEMORY;
6682 
6683 			if ((area->protection & B_KERNEL_AREA) != 0)
6684 				return B_NOT_ALLOWED;
6685 			if (area->protection_max != 0
6686 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6687 				return B_NOT_ALLOWED;
6688 			}
6689 
6690 			addr_t offset = currentAddress - area->Base();
6691 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6692 
6693 			AreaCacheLocker cacheLocker(area);
6694 
6695 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6696 					&locker, &cacheLocker)) {
6697 				restart = true;
6698 				break;
6699 			}
6700 
6701 			cacheLocker.Unlock();
6702 
6703 			currentAddress += rangeSize;
6704 			sizeLeft -= rangeSize;
6705 		}
6706 	} while (restart);
6707 
6708 	// Second round: If the protections differ from that of the area, create a
6709 	// page protection array and re-map mapped pages.
6710 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6711 	addr_t currentAddress = address;
6712 	size_t sizeLeft = size;
6713 	while (sizeLeft > 0) {
6714 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6715 		if (area == NULL)
6716 			return B_NO_MEMORY;
6717 
6718 		addr_t offset = currentAddress - area->Base();
6719 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6720 
6721 		currentAddress += rangeSize;
6722 		sizeLeft -= rangeSize;
6723 
6724 		if (area->page_protections == NULL) {
6725 			if (area->protection == protection)
6726 				continue;
6727 			if (offset == 0 && rangeSize == area->Size()) {
6728 				status_t status = vm_set_area_protection(area->address_space->ID(),
6729 					area->id, protection, false);
6730 				if (status != B_OK)
6731 					return status;
6732 				continue;
6733 			}
6734 
6735 			status_t status = allocate_area_page_protections(area);
6736 			if (status != B_OK)
6737 				return status;
6738 		}
6739 
6740 		// We need to lock the complete cache chain, since we potentially unmap
6741 		// pages of lower caches.
6742 		VMCache* topCache = vm_area_get_locked_cache(area);
6743 		VMCacheChainLocker cacheChainLocker(topCache);
6744 		cacheChainLocker.LockAllSourceCaches();
6745 
6746 		for (addr_t pageAddress = area->Base() + offset;
6747 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6748 			map->Lock();
6749 
6750 			set_area_page_protection(area, pageAddress, protection);
6751 
6752 			phys_addr_t physicalAddress;
6753 			uint32 flags;
6754 
6755 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6756 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6757 				map->Unlock();
6758 				continue;
6759 			}
6760 
6761 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6762 			if (page == NULL) {
6763 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6764 					"\n", area, physicalAddress);
6765 				map->Unlock();
6766 				return B_ERROR;
6767 			}
6768 
6769 			// If the page is not in the topmost cache and write access is
6770 			// requested, we have to unmap it. Otherwise we can re-map it with
6771 			// the new protection.
6772 			bool unmapPage = page->Cache() != topCache
6773 				&& (protection & B_WRITE_AREA) != 0;
6774 
6775 			if (!unmapPage)
6776 				map->ProtectPage(area, pageAddress, protection);
6777 
6778 			map->Unlock();
6779 
6780 			if (unmapPage) {
6781 				DEBUG_PAGE_ACCESS_START(page);
6782 				unmap_page(area, pageAddress);
6783 				DEBUG_PAGE_ACCESS_END(page);
6784 			}
6785 		}
6786 	}
6787 
6788 	return B_OK;
6789 }
6790 
6791 
6792 status_t
6793 _user_sync_memory(void* _address, size_t size, uint32 flags)
6794 {
6795 	addr_t address = (addr_t)_address;
6796 	size = PAGE_ALIGN(size);
6797 
6798 	// check params
6799 	if ((address % B_PAGE_SIZE) != 0)
6800 		return B_BAD_VALUE;
6801 	if (!is_user_address_range(_address, size)) {
6802 		// weird error code required by POSIX
6803 		return ENOMEM;
6804 	}
6805 
6806 	bool writeSync = (flags & MS_SYNC) != 0;
6807 	bool writeAsync = (flags & MS_ASYNC) != 0;
6808 	if (writeSync && writeAsync)
6809 		return B_BAD_VALUE;
6810 
6811 	if (size == 0 || (!writeSync && !writeAsync))
6812 		return B_OK;
6813 
6814 	// iterate through the range and sync all concerned areas
6815 	while (size > 0) {
6816 		// read lock the address space
6817 		AddressSpaceReadLocker locker;
6818 		status_t error = locker.SetTo(team_get_current_team_id());
6819 		if (error != B_OK)
6820 			return error;
6821 
6822 		// get the first area
6823 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6824 		if (area == NULL)
6825 			return B_NO_MEMORY;
6826 
6827 		uint32 offset = address - area->Base();
6828 		size_t rangeSize = min_c(area->Size() - offset, size);
6829 		offset += area->cache_offset;
6830 
6831 		// lock the cache
6832 		AreaCacheLocker cacheLocker(area);
6833 		if (!cacheLocker)
6834 			return B_BAD_VALUE;
6835 		VMCache* cache = area->cache;
6836 
6837 		locker.Unlock();
6838 
6839 		uint32 firstPage = offset >> PAGE_SHIFT;
6840 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6841 
6842 		// write the pages
6843 		if (cache->type == CACHE_TYPE_VNODE) {
6844 			if (writeSync) {
6845 				// synchronous
6846 				error = vm_page_write_modified_page_range(cache, firstPage,
6847 					endPage);
6848 				if (error != B_OK)
6849 					return error;
6850 			} else {
6851 				// asynchronous
6852 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6853 				// TODO: This is probably not quite what is supposed to happen.
6854 				// Especially when a lot has to be written, it might take ages
6855 				// until it really hits the disk.
6856 			}
6857 		}
6858 
6859 		address += rangeSize;
6860 		size -= rangeSize;
6861 	}
6862 
6863 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6864 	// synchronize multiple mappings of the same file. In our VM they never get
6865 	// out of sync, though, so we don't have to do anything.
6866 
6867 	return B_OK;
6868 }
6869 
6870 
6871 status_t
6872 _user_memory_advice(void* _address, size_t size, uint32 advice)
6873 {
6874 	addr_t address = (addr_t)_address;
6875 	if ((address % B_PAGE_SIZE) != 0)
6876 		return B_BAD_VALUE;
6877 
6878 	size = PAGE_ALIGN(size);
6879 	if (!is_user_address_range(_address, size)) {
6880 		// weird error code required by POSIX
6881 		return B_NO_MEMORY;
6882 	}
6883 
6884 	switch (advice) {
6885 		case MADV_NORMAL:
6886 		case MADV_SEQUENTIAL:
6887 		case MADV_RANDOM:
6888 		case MADV_WILLNEED:
6889 		case MADV_DONTNEED:
6890 			// TODO: Implement!
6891 			break;
6892 
6893 		case MADV_FREE:
6894 		{
6895 			AddressSpaceWriteLocker locker;
6896 			do {
6897 				status_t status = locker.SetTo(team_get_current_team_id());
6898 				if (status != B_OK)
6899 					return status;
6900 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6901 					address, size, &locker));
6902 
6903 			discard_address_range(locker.AddressSpace(), address, size, false);
6904 			break;
6905 		}
6906 
6907 		default:
6908 			return B_BAD_VALUE;
6909 	}
6910 
6911 	return B_OK;
6912 }
6913 
6914 
6915 status_t
6916 _user_get_memory_properties(team_id teamID, const void* address,
6917 	uint32* _protected, uint32* _lock)
6918 {
6919 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6920 		return B_BAD_ADDRESS;
6921 
6922 	AddressSpaceReadLocker locker;
6923 	status_t error = locker.SetTo(teamID);
6924 	if (error != B_OK)
6925 		return error;
6926 
6927 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6928 	if (area == NULL)
6929 		return B_NO_MEMORY;
6930 
6931 	uint32 protection = get_area_page_protection(area, (addr_t)address);
6932 	uint32 wiring = area->wiring;
6933 
6934 	locker.Unlock();
6935 
6936 	error = user_memcpy(_protected, &protection, sizeof(protection));
6937 	if (error != B_OK)
6938 		return error;
6939 
6940 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6941 
6942 	return error;
6943 }
6944 
6945 
6946 static status_t
6947 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
6948 {
6949 #if ENABLE_SWAP_SUPPORT
6950 	// check address range
6951 	addr_t address = (addr_t)_address;
6952 	size = PAGE_ALIGN(size);
6953 
6954 	if ((address % B_PAGE_SIZE) != 0)
6955 		return EINVAL;
6956 	if (!is_user_address_range(_address, size))
6957 		return EINVAL;
6958 
6959 	const addr_t endAddress = address + size;
6960 
6961 	AddressSpaceReadLocker addressSpaceLocker;
6962 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
6963 	if (error != B_OK)
6964 		return error;
6965 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
6966 
6967 	// iterate through all concerned areas
6968 	addr_t nextAddress = address;
6969 	while (nextAddress != endAddress) {
6970 		// get the next area
6971 		VMArea* area = addressSpace->LookupArea(nextAddress);
6972 		if (area == NULL) {
6973 			error = B_BAD_ADDRESS;
6974 			break;
6975 		}
6976 
6977 		const addr_t areaStart = nextAddress;
6978 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
6979 		nextAddress = areaEnd;
6980 
6981 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6982 		if (error != B_OK) {
6983 			// We don't need to unset or reset things on failure.
6984 			break;
6985 		}
6986 
6987 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6988 		VMAnonymousCache* anonCache = NULL;
6989 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
6990 			// This memory will aready never be swapped. Nothing to do.
6991 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
6992 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
6993 				areaEnd - areaStart, swappable);
6994 		} else {
6995 			// Some other cache type? We cannot affect anything here.
6996 			error = EINVAL;
6997 		}
6998 
6999 		cacheChainLocker.Unlock();
7000 
7001 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7002 		if (error != B_OK)
7003 			break;
7004 	}
7005 
7006 	return error;
7007 #else
7008 	// No swap support? Nothing to do.
7009 	return B_OK;
7010 #endif
7011 }
7012 
7013 
7014 status_t
7015 _user_mlock(const void* _address, size_t size)
7016 {
7017 	return user_set_memory_swappable(_address, size, false);
7018 }
7019 
7020 
7021 status_t
7022 _user_munlock(const void* _address, size_t size)
7023 {
7024 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7025 	// if multiple clones of an area had mlock() called on them,
7026 	// munlock() must also be called on all of them to actually unlock.
7027 	// (At present, the first munlock() will unlock all.)
7028 	// TODO: fork() should automatically unlock memory in the child.
7029 	return user_set_memory_swappable(_address, size, true);
7030 }
7031 
7032 
7033 // #pragma mark -- compatibility
7034 
7035 
7036 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7037 
7038 
7039 struct physical_entry_beos {
7040 	uint32	address;
7041 	uint32	size;
7042 };
7043 
7044 
7045 /*!	The physical_entry structure has changed. We need to translate it to the
7046 	old one.
7047 */
7048 extern "C" int32
7049 __get_memory_map_beos(const void* _address, size_t numBytes,
7050 	physical_entry_beos* table, int32 numEntries)
7051 {
7052 	if (numEntries <= 0)
7053 		return B_BAD_VALUE;
7054 
7055 	const uint8* address = (const uint8*)_address;
7056 
7057 	int32 count = 0;
7058 	while (numBytes > 0 && count < numEntries) {
7059 		physical_entry entry;
7060 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7061 		if (result < 0) {
7062 			if (result != B_BUFFER_OVERFLOW)
7063 				return result;
7064 		}
7065 
7066 		if (entry.address >= (phys_addr_t)1 << 32) {
7067 			panic("get_memory_map(): Address is greater 4 GB!");
7068 			return B_ERROR;
7069 		}
7070 
7071 		table[count].address = entry.address;
7072 		table[count++].size = entry.size;
7073 
7074 		address += entry.size;
7075 		numBytes -= entry.size;
7076 	}
7077 
7078 	// null-terminate the table, if possible
7079 	if (count < numEntries) {
7080 		table[count].address = 0;
7081 		table[count].size = 0;
7082 	}
7083 
7084 	return B_OK;
7085 }
7086 
7087 
7088 /*!	The type of the \a physicalAddress parameter has changed from void* to
7089 	phys_addr_t.
7090 */
7091 extern "C" area_id
7092 __map_physical_memory_beos(const char* name, void* physicalAddress,
7093 	size_t numBytes, uint32 addressSpec, uint32 protection,
7094 	void** _virtualAddress)
7095 {
7096 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7097 		addressSpec, protection, _virtualAddress);
7098 }
7099 
7100 
7101 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7102 	we meddle with the \a lock parameter to force 32 bit.
7103 */
7104 extern "C" area_id
7105 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7106 	size_t size, uint32 lock, uint32 protection)
7107 {
7108 	switch (lock) {
7109 		case B_NO_LOCK:
7110 			break;
7111 		case B_FULL_LOCK:
7112 		case B_LAZY_LOCK:
7113 			lock = B_32_BIT_FULL_LOCK;
7114 			break;
7115 		case B_CONTIGUOUS:
7116 			lock = B_32_BIT_CONTIGUOUS;
7117 			break;
7118 	}
7119 
7120 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7121 		protection);
7122 }
7123 
7124 
7125 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7126 	"BASE");
7127 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7128 	"map_physical_memory@", "BASE");
7129 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7130 	"BASE");
7131 
7132 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7133 	"get_memory_map@@", "1_ALPHA3");
7134 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7135 	"map_physical_memory@@", "1_ALPHA3");
7136 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7137 	"1_ALPHA3");
7138 
7139 
7140 #else
7141 
7142 
7143 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7144 	"get_memory_map@@", "BASE");
7145 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7146 	"map_physical_memory@@", "BASE");
7147 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7148 	"BASE");
7149 
7150 
7151 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7152