xref: /haiku/src/system/kernel/vm/vm.cpp (revision 1fe24d0cd0b547a771c00f6fca8f50ba6ca2fb2c)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <boot/elf.h>
31 #include <boot/stage2.h>
32 #include <condition_variable.h>
33 #include <console.h>
34 #include <debug.h>
35 #include <file_cache.h>
36 #include <fs/fd.h>
37 #include <heap.h>
38 #include <kernel.h>
39 #include <int.h>
40 #include <lock.h>
41 #include <low_resource_manager.h>
42 #include <slab/Slab.h>
43 #include <smp.h>
44 #include <system_info.h>
45 #include <thread.h>
46 #include <team.h>
47 #include <tracing.h>
48 #include <util/AutoLock.h>
49 #include <util/khash.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "IORequest.h"
59 
60 
61 //#define TRACE_VM
62 //#define TRACE_FAULTS
63 #ifdef TRACE_VM
64 #	define TRACE(x) dprintf x
65 #else
66 #	define TRACE(x) ;
67 #endif
68 #ifdef TRACE_FAULTS
69 #	define FTRACE(x) dprintf x
70 #else
71 #	define FTRACE(x) ;
72 #endif
73 
74 
75 class AreaCacheLocking {
76 public:
77 	inline bool Lock(VMCache* lockable)
78 	{
79 		return false;
80 	}
81 
82 	inline void Unlock(VMCache* lockable)
83 	{
84 		vm_area_put_locked_cache(lockable);
85 	}
86 };
87 
88 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
89 public:
90 	inline AreaCacheLocker(VMCache* cache = NULL)
91 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
92 	{
93 	}
94 
95 	inline AreaCacheLocker(VMArea* area)
96 		: AutoLocker<VMCache, AreaCacheLocking>()
97 	{
98 		SetTo(area);
99 	}
100 
101 	inline void SetTo(VMCache* cache, bool alreadyLocked)
102 	{
103 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
104 	}
105 
106 	inline void SetTo(VMArea* area)
107 	{
108 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
109 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
110 	}
111 };
112 
113 
114 class VMCacheChainLocker {
115 public:
116 	VMCacheChainLocker()
117 		:
118 		fTopCache(NULL),
119 		fBottomCache(NULL)
120 	{
121 	}
122 
123 	VMCacheChainLocker(VMCache* topCache)
124 		:
125 		fTopCache(topCache),
126 		fBottomCache(topCache)
127 	{
128 	}
129 
130 	~VMCacheChainLocker()
131 	{
132 		Unlock();
133 	}
134 
135 	void SetTo(VMCache* topCache)
136 	{
137 		fTopCache = topCache;
138 		fBottomCache = topCache;
139 
140 		if (topCache != NULL)
141 			topCache->SetUserData(NULL);
142 	}
143 
144 	VMCache* LockSourceCache()
145 	{
146 		if (fBottomCache == NULL || fBottomCache->source == NULL)
147 			return NULL;
148 
149 		VMCache* previousCache = fBottomCache;
150 
151 		fBottomCache = fBottomCache->source;
152 		fBottomCache->Lock();
153 		fBottomCache->AcquireRefLocked();
154 		fBottomCache->SetUserData(previousCache);
155 
156 		return fBottomCache;
157 	}
158 
159 	void LockAllSourceCaches()
160 	{
161 		while (LockSourceCache() != NULL) {
162 		}
163 	}
164 
165 	void Unlock(VMCache* exceptCache = NULL)
166 	{
167 		if (fTopCache == NULL)
168 			return;
169 
170 		// Unlock caches in source -> consumer direction. This is important to
171 		// avoid double-locking and a reversal of locking order in case a cache
172 		// is eligable for merging.
173 		VMCache* cache = fBottomCache;
174 		while (cache != NULL) {
175 			VMCache* nextCache = (VMCache*)cache->UserData();
176 			if (cache != exceptCache)
177 				cache->ReleaseRefAndUnlock(cache != fTopCache);
178 
179 			if (cache == fTopCache)
180 				break;
181 
182 			cache = nextCache;
183 		}
184 
185 		fTopCache = NULL;
186 		fBottomCache = NULL;
187 	}
188 
189 	void UnlockKeepRefs(bool keepTopCacheLocked)
190 	{
191 		if (fTopCache == NULL)
192 			return;
193 
194 		VMCache* nextCache = fBottomCache;
195 		VMCache* cache = NULL;
196 
197 		while (keepTopCacheLocked
198 				? nextCache != fTopCache : cache != fTopCache) {
199 			cache = nextCache;
200 			nextCache = (VMCache*)cache->UserData();
201 			cache->Unlock(cache != fTopCache);
202 		}
203 	}
204 
205 	void RelockCaches(bool topCacheLocked)
206 	{
207 		if (fTopCache == NULL)
208 			return;
209 
210 		VMCache* nextCache = fTopCache;
211 		VMCache* cache = NULL;
212 		if (topCacheLocked) {
213 			cache = nextCache;
214 			nextCache = cache->source;
215 		}
216 
217 		while (cache != fBottomCache && nextCache != NULL) {
218 			VMCache* consumer = cache;
219 			cache = nextCache;
220 			nextCache = cache->source;
221 			cache->Lock();
222 			cache->SetUserData(consumer);
223 		}
224 	}
225 
226 private:
227 	VMCache*	fTopCache;
228 	VMCache*	fBottomCache;
229 };
230 
231 
232 // The memory reserve an allocation of the certain priority must not touch.
233 static const size_t kMemoryReserveForPriority[] = {
234 	VM_MEMORY_RESERVE_USER,		// user
235 	VM_MEMORY_RESERVE_SYSTEM,	// system
236 	0							// VIP
237 };
238 
239 
240 ObjectCache* gPageMappingsObjectCache;
241 
242 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
243 
244 static off_t sAvailableMemory;
245 static off_t sNeededMemory;
246 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
247 static uint32 sPageFaults;
248 
249 static VMPhysicalPageMapper* sPhysicalPageMapper;
250 
251 #if DEBUG_CACHE_LIST
252 
253 struct cache_info {
254 	VMCache*	cache;
255 	addr_t		page_count;
256 	addr_t		committed;
257 };
258 
259 static const int kCacheInfoTableCount = 100 * 1024;
260 static cache_info* sCacheInfoTable;
261 
262 #endif	// DEBUG_CACHE_LIST
263 
264 
265 // function declarations
266 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
267 	bool addressSpaceCleanup);
268 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
269 	bool isWrite, bool isUser, vm_page** wirePage,
270 	VMAreaWiredRange* wiredRange = NULL);
271 static status_t map_backing_store(VMAddressSpace* addressSpace,
272 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
273 	int protection, int mapping, uint32 flags,
274 	const virtual_address_restrictions* addressRestrictions, bool kernel,
275 	VMArea** _area, void** _virtualAddress);
276 
277 
278 //	#pragma mark -
279 
280 
281 #if VM_PAGE_FAULT_TRACING
282 
283 namespace VMPageFaultTracing {
284 
285 class PageFaultStart : public AbstractTraceEntry {
286 public:
287 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
288 		:
289 		fAddress(address),
290 		fPC(pc),
291 		fWrite(write),
292 		fUser(user)
293 	{
294 		Initialized();
295 	}
296 
297 	virtual void AddDump(TraceOutput& out)
298 	{
299 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
300 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
301 	}
302 
303 private:
304 	addr_t	fAddress;
305 	addr_t	fPC;
306 	bool	fWrite;
307 	bool	fUser;
308 };
309 
310 
311 // page fault errors
312 enum {
313 	PAGE_FAULT_ERROR_NO_AREA		= 0,
314 	PAGE_FAULT_ERROR_KERNEL_ONLY,
315 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
316 	PAGE_FAULT_ERROR_READ_PROTECTED,
317 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
318 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
319 };
320 
321 
322 class PageFaultError : public AbstractTraceEntry {
323 public:
324 	PageFaultError(area_id area, status_t error)
325 		:
326 		fArea(area),
327 		fError(error)
328 	{
329 		Initialized();
330 	}
331 
332 	virtual void AddDump(TraceOutput& out)
333 	{
334 		switch (fError) {
335 			case PAGE_FAULT_ERROR_NO_AREA:
336 				out.Print("page fault error: no area");
337 				break;
338 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
339 				out.Print("page fault error: area: %ld, kernel only", fArea);
340 				break;
341 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
342 				out.Print("page fault error: area: %ld, write protected",
343 					fArea);
344 				break;
345 			case PAGE_FAULT_ERROR_READ_PROTECTED:
346 				out.Print("page fault error: area: %ld, read protected", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
349 				out.Print("page fault error: kernel touching bad user memory");
350 				break;
351 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
352 				out.Print("page fault error: no address space");
353 				break;
354 			default:
355 				out.Print("page fault error: area: %ld, error: %s", fArea,
356 					strerror(fError));
357 				break;
358 		}
359 	}
360 
361 private:
362 	area_id		fArea;
363 	status_t	fError;
364 };
365 
366 
367 class PageFaultDone : public AbstractTraceEntry {
368 public:
369 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
370 			vm_page* page)
371 		:
372 		fArea(area),
373 		fTopCache(topCache),
374 		fCache(cache),
375 		fPage(page)
376 	{
377 		Initialized();
378 	}
379 
380 	virtual void AddDump(TraceOutput& out)
381 	{
382 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
383 			"page: %p", fArea, fTopCache, fCache, fPage);
384 	}
385 
386 private:
387 	area_id		fArea;
388 	VMCache*	fTopCache;
389 	VMCache*	fCache;
390 	vm_page*	fPage;
391 };
392 
393 }	// namespace VMPageFaultTracing
394 
395 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
396 #else
397 #	define TPF(x) ;
398 #endif	// VM_PAGE_FAULT_TRACING
399 
400 
401 //	#pragma mark -
402 
403 
404 /*!	The page's cache must be locked.
405 */
406 static inline void
407 increment_page_wired_count(vm_page* page)
408 {
409 	if (!page->IsMapped())
410 		atomic_add(&gMappedPagesCount, 1);
411 	page->IncrementWiredCount();
412 }
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 decrement_page_wired_count(vm_page* page)
419 {
420 	page->DecrementWiredCount();
421 	if (!page->IsMapped())
422 		atomic_add(&gMappedPagesCount, -1);
423 }
424 
425 
426 static inline addr_t
427 virtual_page_address(VMArea* area, vm_page* page)
428 {
429 	return area->Base()
430 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
431 }
432 
433 
434 //! You need to have the address space locked when calling this function
435 static VMArea*
436 lookup_area(VMAddressSpace* addressSpace, area_id id)
437 {
438 	VMAreaHash::ReadLock();
439 
440 	VMArea* area = VMAreaHash::LookupLocked(id);
441 	if (area != NULL && area->address_space != addressSpace)
442 		area = NULL;
443 
444 	VMAreaHash::ReadUnlock();
445 
446 	return area;
447 }
448 
449 
450 static status_t
451 allocate_area_page_protections(VMArea* area)
452 {
453 	// In the page protections we store only the three user protections,
454 	// so we use 4 bits per page.
455 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
456 	area->page_protections = (uint8*)malloc_etc(bytes,
457 		HEAP_DONT_LOCK_KERNEL_SPACE);
458 	if (area->page_protections == NULL)
459 		return B_NO_MEMORY;
460 
461 	// init the page protections for all pages to that of the area
462 	uint32 areaProtection = area->protection
463 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
464 	memset(area->page_protections, areaProtection | (areaProtection << 4),
465 		bytes);
466 	return B_OK;
467 }
468 
469 
470 static inline void
471 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
472 {
473 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
474 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
475 	uint8& entry = area->page_protections[pageIndex / 2];
476 	if (pageIndex % 2 == 0)
477 		entry = (entry & 0xf0) | protection;
478 	else
479 		entry = (entry & 0x0f) | (protection << 4);
480 }
481 
482 
483 static inline uint32
484 get_area_page_protection(VMArea* area, addr_t pageAddress)
485 {
486 	if (area->page_protections == NULL)
487 		return area->protection;
488 
489 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
490 	uint32 protection = area->page_protections[pageIndex / 2];
491 	if (pageIndex % 2 == 0)
492 		protection &= 0x0f;
493 	else
494 		protection >>= 4;
495 
496 	// If this is a kernel area we translate the user flags to kernel flags.
497 	if (area->address_space == VMAddressSpace::Kernel()) {
498 		uint32 kernelProtection = 0;
499 		if ((protection & B_READ_AREA) != 0)
500 			kernelProtection |= B_KERNEL_READ_AREA;
501 		if ((protection & B_WRITE_AREA) != 0)
502 			kernelProtection |= B_KERNEL_WRITE_AREA;
503 
504 		return kernelProtection;
505 	}
506 
507 	return protection | B_KERNEL_READ_AREA
508 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
509 }
510 
511 
512 /*!	The caller must have reserved enough pages the translation map
513 	implementation might need to map this page.
514 	The page's cache must be locked.
515 */
516 static status_t
517 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
518 	vm_page_reservation* reservation)
519 {
520 	VMTranslationMap* map = area->address_space->TranslationMap();
521 
522 	bool wasMapped = page->IsMapped();
523 
524 	if (area->wiring == B_NO_LOCK) {
525 		DEBUG_PAGE_ACCESS_CHECK(page);
526 
527 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
528 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
529 			gPageMappingsObjectCache,
530 			CACHE_DONT_WAIT_FOR_MEMORY
531 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
532 		if (mapping == NULL)
533 			return B_NO_MEMORY;
534 
535 		mapping->page = page;
536 		mapping->area = area;
537 
538 		map->Lock();
539 
540 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
541 			area->MemoryType(), reservation);
542 
543 		// insert mapping into lists
544 		if (!page->IsMapped())
545 			atomic_add(&gMappedPagesCount, 1);
546 
547 		page->mappings.Add(mapping);
548 		area->mappings.Add(mapping);
549 
550 		map->Unlock();
551 	} else {
552 		DEBUG_PAGE_ACCESS_CHECK(page);
553 
554 		map->Lock();
555 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
556 			area->MemoryType(), reservation);
557 		map->Unlock();
558 
559 		increment_page_wired_count(page);
560 	}
561 
562 	if (!wasMapped) {
563 		// The page is mapped now, so we must not remain in the cached queue.
564 		// It also makes sense to move it from the inactive to the active, since
565 		// otherwise the page daemon wouldn't come to keep track of it (in idle
566 		// mode) -- if the page isn't touched, it will be deactivated after a
567 		// full iteration through the queue at the latest.
568 		if (page->State() == PAGE_STATE_CACHED
569 				|| page->State() == PAGE_STATE_INACTIVE) {
570 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
571 		}
572 	}
573 
574 	return B_OK;
575 }
576 
577 
578 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
579 	page's cache.
580 */
581 static inline bool
582 unmap_page(VMArea* area, addr_t virtualAddress)
583 {
584 	return area->address_space->TranslationMap()->UnmapPage(area,
585 		virtualAddress, true);
586 }
587 
588 
589 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
590 	mapped pages' caches.
591 */
592 static inline void
593 unmap_pages(VMArea* area, addr_t base, size_t size)
594 {
595 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
596 }
597 
598 
599 /*!	Cuts a piece out of an area. If the given cut range covers the complete
600 	area, it is deleted. If it covers the beginning or the end, the area is
601 	resized accordingly. If the range covers some part in the middle of the
602 	area, it is split in two; in this case the second area is returned via
603 	\a _secondArea (the variable is left untouched in the other cases).
604 	The address space must be write locked.
605 	The caller must ensure that no part of the given range is wired.
606 */
607 static status_t
608 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
609 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
610 {
611 	// Does the cut range intersect with the area at all?
612 	addr_t areaLast = area->Base() + (area->Size() - 1);
613 	if (area->Base() > lastAddress || areaLast < address)
614 		return B_OK;
615 
616 	// Is the area fully covered?
617 	if (area->Base() >= address && areaLast <= lastAddress) {
618 		delete_area(addressSpace, area, false);
619 		return B_OK;
620 	}
621 
622 	int priority;
623 	uint32 allocationFlags;
624 	if (addressSpace == VMAddressSpace::Kernel()) {
625 		priority = VM_PRIORITY_SYSTEM;
626 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
627 			| HEAP_DONT_LOCK_KERNEL_SPACE;
628 	} else {
629 		priority = VM_PRIORITY_USER;
630 		allocationFlags = 0;
631 	}
632 
633 	VMCache* cache = vm_area_get_locked_cache(area);
634 	VMCacheChainLocker cacheChainLocker(cache);
635 	cacheChainLocker.LockAllSourceCaches();
636 
637 	// Cut the end only?
638 	if (areaLast <= lastAddress) {
639 		size_t oldSize = area->Size();
640 		size_t newSize = address - area->Base();
641 
642 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
643 			allocationFlags);
644 		if (error != B_OK)
645 			return error;
646 
647 		// unmap pages
648 		unmap_pages(area, address, oldSize - newSize);
649 
650 		// If no one else uses the area's cache, we can resize it, too.
651 		if (cache->areas == area && area->cache_next == NULL
652 			&& cache->consumers.IsEmpty()
653 			&& cache->type == CACHE_TYPE_RAM) {
654 			// Since VMCache::Resize() can temporarily drop the lock, we must
655 			// unlock all lower caches to prevent locking order inversion.
656 			cacheChainLocker.Unlock(cache);
657 			cache->Resize(cache->virtual_base + newSize, priority);
658 			cache->ReleaseRefAndUnlock();
659 		}
660 
661 		return B_OK;
662 	}
663 
664 	// Cut the beginning only?
665 	if (area->Base() >= address) {
666 		addr_t oldBase = area->Base();
667 		addr_t newBase = lastAddress + 1;
668 		size_t newSize = areaLast - lastAddress;
669 
670 		// unmap pages
671 		unmap_pages(area, oldBase, newBase - oldBase);
672 
673 		// resize the area
674 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
675 			allocationFlags);
676 		if (error != B_OK)
677 			return error;
678 
679 		// TODO: If no one else uses the area's cache, we should resize it, too!
680 
681 		area->cache_offset += newBase - oldBase;
682 
683 		return B_OK;
684 	}
685 
686 	// The tough part -- cut a piece out of the middle of the area.
687 	// We do that by shrinking the area to the begin section and creating a
688 	// new area for the end section.
689 
690 	addr_t firstNewSize = address - area->Base();
691 	addr_t secondBase = lastAddress + 1;
692 	addr_t secondSize = areaLast - lastAddress;
693 
694 	// unmap pages
695 	unmap_pages(area, address, area->Size() - firstNewSize);
696 
697 	// resize the area
698 	addr_t oldSize = area->Size();
699 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
700 		allocationFlags);
701 	if (error != B_OK)
702 		return error;
703 
704 	// TODO: If no one else uses the area's cache, we might want to create a
705 	// new cache for the second area, transfer the concerned pages from the
706 	// first cache to it and resize the first cache.
707 
708 	// map the second area
709 	virtual_address_restrictions addressRestrictions = {};
710 	addressRestrictions.address = (void*)secondBase;
711 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
712 	VMArea* secondArea;
713 	error = map_backing_store(addressSpace, cache,
714 		area->cache_offset + (secondBase - area->Base()), area->name,
715 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
716 		&addressRestrictions, kernel, &secondArea, NULL);
717 	if (error != B_OK) {
718 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
719 		return error;
720 	}
721 
722 	// We need a cache reference for the new area.
723 	cache->AcquireRefLocked();
724 
725 	if (_secondArea != NULL)
726 		*_secondArea = secondArea;
727 
728 	return B_OK;
729 }
730 
731 
732 /*!	Deletes all areas in the given address range.
733 	The address space must be write-locked.
734 	The caller must ensure that no part of the given range is wired.
735 */
736 static status_t
737 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
738 	bool kernel)
739 {
740 	size = PAGE_ALIGN(size);
741 	addr_t lastAddress = address + (size - 1);
742 
743 	// Check, whether the caller is allowed to modify the concerned areas.
744 	if (!kernel) {
745 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
746 				VMArea* area = it.Next();) {
747 			addr_t areaLast = area->Base() + (area->Size() - 1);
748 			if (area->Base() < lastAddress && address < areaLast) {
749 				if ((area->protection & B_KERNEL_AREA) != 0)
750 					return B_NOT_ALLOWED;
751 			}
752 		}
753 	}
754 
755 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 			VMArea* area = it.Next();) {
757 		addr_t areaLast = area->Base() + (area->Size() - 1);
758 		if (area->Base() < lastAddress && address < areaLast) {
759 			status_t error = cut_area(addressSpace, area, address,
760 				lastAddress, NULL, kernel);
761 			if (error != B_OK)
762 				return error;
763 				// Failing after already messing with areas is ugly, but we
764 				// can't do anything about it.
765 		}
766 	}
767 
768 	return B_OK;
769 }
770 
771 
772 /*! You need to hold the lock of the cache and the write lock of the address
773 	space when calling this function.
774 	Note, that in case of error your cache will be temporarily unlocked.
775 	If \a addressSpec is \c B_EXACT_ADDRESS and the
776 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
777 	that no part of the specified address range (base \c *_virtualAddress, size
778 	\a size) is wired.
779 */
780 static status_t
781 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
782 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
783 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
784 	bool kernel, VMArea** _area, void** _virtualAddress)
785 {
786 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%Lx, "
787 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
788 		"'%s'\n", addressSpace, cache, addressRestrictions->address, offset,
789 		size, addressRestrictions->address_specification, wiring, protection,
790 		_area, areaName));
791 	cache->AssertLocked();
792 
793 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
794 		| HEAP_DONT_LOCK_KERNEL_SPACE;
795 	int priority;
796 	if (addressSpace != VMAddressSpace::Kernel()) {
797 		priority = VM_PRIORITY_USER;
798 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
799 		priority = VM_PRIORITY_VIP;
800 		allocationFlags |= HEAP_PRIORITY_VIP;
801 	} else
802 		priority = VM_PRIORITY_SYSTEM;
803 
804 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
805 		allocationFlags);
806 	if (area == NULL)
807 		return B_NO_MEMORY;
808 
809 	status_t status;
810 
811 	// if this is a private map, we need to create a new cache
812 	// to handle the private copies of pages as they are written to
813 	VMCache* sourceCache = cache;
814 	if (mapping == REGION_PRIVATE_MAP) {
815 		VMCache* newCache;
816 
817 		// create an anonymous cache
818 		bool isStack = (protection & B_STACK_AREA) != 0;
819 		status = VMCacheFactory::CreateAnonymousCache(newCache,
820 			isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0,
821 			isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER);
822 		if (status != B_OK)
823 			goto err1;
824 
825 		newCache->Lock();
826 		newCache->temporary = 1;
827 		newCache->virtual_base = offset;
828 		newCache->virtual_end = offset + size;
829 
830 		cache->AddConsumer(newCache);
831 
832 		cache = newCache;
833 	}
834 
835 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
836 		status = cache->SetMinimalCommitment(size, priority);
837 		if (status != B_OK)
838 			goto err2;
839 	}
840 
841 	// check to see if this address space has entered DELETE state
842 	if (addressSpace->IsBeingDeleted()) {
843 		// okay, someone is trying to delete this address space now, so we can't
844 		// insert the area, so back out
845 		status = B_BAD_TEAM_ID;
846 		goto err2;
847 	}
848 
849 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
850 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
851 		status = unmap_address_range(addressSpace,
852 			(addr_t)addressRestrictions->address, size, kernel);
853 		if (status != B_OK)
854 			goto err2;
855 	}
856 
857 	status = addressSpace->InsertArea(area, size, addressRestrictions,
858 		allocationFlags, _virtualAddress);
859 	if (status != B_OK) {
860 		// TODO: wait and try again once this is working in the backend
861 #if 0
862 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
863 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
864 				0, 0);
865 		}
866 #endif
867 		goto err2;
868 	}
869 
870 	// attach the cache to the area
871 	area->cache = cache;
872 	area->cache_offset = offset;
873 
874 	// point the cache back to the area
875 	cache->InsertAreaLocked(area);
876 	if (mapping == REGION_PRIVATE_MAP)
877 		cache->Unlock();
878 
879 	// insert the area in the global area hash table
880 	VMAreaHash::Insert(area);
881 
882 	// grab a ref to the address space (the area holds this)
883 	addressSpace->Get();
884 
885 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
886 //		cache, sourceCache, areaName, area);
887 
888 	*_area = area;
889 	return B_OK;
890 
891 err2:
892 	if (mapping == REGION_PRIVATE_MAP) {
893 		// We created this cache, so we must delete it again. Note, that we
894 		// need to temporarily unlock the source cache or we'll otherwise
895 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
896 		sourceCache->Unlock();
897 		cache->ReleaseRefAndUnlock();
898 		sourceCache->Lock();
899 	}
900 err1:
901 	addressSpace->DeleteArea(area, allocationFlags);
902 	return status;
903 }
904 
905 
906 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
907 	  locker1, locker2).
908 */
909 template<typename LockerType1, typename LockerType2>
910 static inline bool
911 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
912 {
913 	area->cache->AssertLocked();
914 
915 	VMAreaUnwiredWaiter waiter;
916 	if (!area->AddWaiterIfWired(&waiter))
917 		return false;
918 
919 	// unlock everything and wait
920 	if (locker1 != NULL)
921 		locker1->Unlock();
922 	if (locker2 != NULL)
923 		locker2->Unlock();
924 
925 	waiter.waitEntry.Wait();
926 
927 	return true;
928 }
929 
930 
931 /*!	Checks whether the given area has any wired ranges intersecting with the
932 	specified range and waits, if so.
933 
934 	When it has to wait, the function calls \c Unlock() on both \a locker1
935 	and \a locker2, if given.
936 	The area's top cache must be locked and must be unlocked as a side effect
937 	of calling \c Unlock() on either \a locker1 or \a locker2.
938 
939 	If the function does not have to wait it does not modify or unlock any
940 	object.
941 
942 	\param area The area to be checked.
943 	\param base The base address of the range to check.
944 	\param size The size of the address range to check.
945 	\param locker1 An object to be unlocked when before starting to wait (may
946 		be \c NULL).
947 	\param locker2 An object to be unlocked when before starting to wait (may
948 		be \c NULL).
949 	\return \c true, if the function had to wait, \c false otherwise.
950 */
951 template<typename LockerType1, typename LockerType2>
952 static inline bool
953 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
954 	LockerType1* locker1, LockerType2* locker2)
955 {
956 	area->cache->AssertLocked();
957 
958 	VMAreaUnwiredWaiter waiter;
959 	if (!area->AddWaiterIfWired(&waiter, base, size))
960 		return false;
961 
962 	// unlock everything and wait
963 	if (locker1 != NULL)
964 		locker1->Unlock();
965 	if (locker2 != NULL)
966 		locker2->Unlock();
967 
968 	waiter.waitEntry.Wait();
969 
970 	return true;
971 }
972 
973 
974 /*!	Checks whether the given address space has any wired ranges intersecting
975 	with the specified range and waits, if so.
976 
977 	Similar to wait_if_area_range_is_wired(), with the following differences:
978 	- All areas intersecting with the range are checked (respectively all until
979 	  one is found that contains a wired range intersecting with the given
980 	  range).
981 	- The given address space must at least be read-locked and must be unlocked
982 	  when \c Unlock() is called on \a locker.
983 	- None of the areas' caches are allowed to be locked.
984 */
985 template<typename LockerType>
986 static inline bool
987 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
988 	size_t size, LockerType* locker)
989 {
990 	addr_t end = base + size - 1;
991 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
992 			VMArea* area = it.Next();) {
993 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
994 		if (area->Base() > end)
995 			return false;
996 
997 		if (base >= area->Base() + area->Size() - 1)
998 			continue;
999 
1000 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1001 
1002 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1003 			return true;
1004 	}
1005 
1006 	return false;
1007 }
1008 
1009 
1010 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1011 	It must be called in a situation where the kernel address space may be
1012 	locked.
1013 */
1014 status_t
1015 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1016 {
1017 	AddressSpaceReadLocker locker;
1018 	VMArea* area;
1019 	status_t status = locker.SetFromArea(id, area);
1020 	if (status != B_OK)
1021 		return status;
1022 
1023 	if (area->page_protections == NULL) {
1024 		status = allocate_area_page_protections(area);
1025 		if (status != B_OK)
1026 			return status;
1027 	}
1028 
1029 	*cookie = (void*)area;
1030 	return B_OK;
1031 }
1032 
1033 
1034 /*!	This is a debug helper function that can only be used with very specific
1035 	use cases.
1036 	Sets protection for the given address range to the protection specified.
1037 	If \a protection is 0 then the involved pages will be marked non-present
1038 	in the translation map to cause a fault on access. The pages aren't
1039 	actually unmapped however so that they can be marked present again with
1040 	additional calls to this function. For this to work the area must be
1041 	fully locked in memory so that the pages aren't otherwise touched.
1042 	This function does not lock the kernel address space and needs to be
1043 	supplied with a \a cookie retrieved from a successful call to
1044 	vm_prepare_kernel_area_debug_protection().
1045 */
1046 status_t
1047 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1048 	uint32 protection)
1049 {
1050 	// check address range
1051 	addr_t address = (addr_t)_address;
1052 	size = PAGE_ALIGN(size);
1053 
1054 	if ((address % B_PAGE_SIZE) != 0
1055 		|| (addr_t)address + size < (addr_t)address
1056 		|| !IS_KERNEL_ADDRESS(address)
1057 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1058 		return B_BAD_VALUE;
1059 	}
1060 
1061 	// Translate the kernel protection to user protection as we only store that.
1062 	if ((protection & B_KERNEL_READ_AREA) != 0)
1063 		protection |= B_READ_AREA;
1064 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1065 		protection |= B_WRITE_AREA;
1066 
1067 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1068 	VMTranslationMap* map = addressSpace->TranslationMap();
1069 	VMArea* area = (VMArea*)cookie;
1070 
1071 	addr_t offset = address - area->Base();
1072 	if (area->Size() - offset < size) {
1073 		panic("protect range not fully within supplied area");
1074 		return B_BAD_VALUE;
1075 	}
1076 
1077 	if (area->page_protections == NULL) {
1078 		panic("area has no page protections");
1079 		return B_BAD_VALUE;
1080 	}
1081 
1082 	// Invalidate the mapping entries so any access to them will fault or
1083 	// restore the mapping entries unchanged so that lookup will success again.
1084 	map->Lock();
1085 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1086 	map->Unlock();
1087 
1088 	// And set the proper page protections so that the fault case will actually
1089 	// fail and not simply try to map a new page.
1090 	for (addr_t pageAddress = address; pageAddress < address + size;
1091 			pageAddress += B_PAGE_SIZE) {
1092 		set_area_page_protection(area, pageAddress, protection);
1093 	}
1094 
1095 	return B_OK;
1096 }
1097 
1098 
1099 status_t
1100 vm_block_address_range(const char* name, void* address, addr_t size)
1101 {
1102 	if (!arch_vm_supports_protection(0))
1103 		return B_NOT_SUPPORTED;
1104 
1105 	AddressSpaceWriteLocker locker;
1106 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1107 	if (status != B_OK)
1108 		return status;
1109 
1110 	VMAddressSpace* addressSpace = locker.AddressSpace();
1111 
1112 	// create an anonymous cache
1113 	VMCache* cache;
1114 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1115 		VM_PRIORITY_SYSTEM);
1116 	if (status != B_OK)
1117 		return status;
1118 
1119 	cache->temporary = 1;
1120 	cache->virtual_end = size;
1121 	cache->Lock();
1122 
1123 	VMArea* area;
1124 	virtual_address_restrictions addressRestrictions = {};
1125 	addressRestrictions.address = address;
1126 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1127 	status = map_backing_store(addressSpace, cache, 0, name, size,
1128 		B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0,
1129 		&addressRestrictions, true, &area, NULL);
1130 	if (status != B_OK) {
1131 		cache->ReleaseRefAndUnlock();
1132 		return status;
1133 	}
1134 
1135 	cache->Unlock();
1136 	area->cache_type = CACHE_TYPE_RAM;
1137 	return area->id;
1138 }
1139 
1140 
1141 status_t
1142 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1143 {
1144 	AddressSpaceWriteLocker locker(team);
1145 	if (!locker.IsLocked())
1146 		return B_BAD_TEAM_ID;
1147 
1148 	VMAddressSpace* addressSpace = locker.AddressSpace();
1149 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1150 		addressSpace == VMAddressSpace::Kernel()
1151 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1152 }
1153 
1154 
1155 status_t
1156 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1157 	addr_t size, uint32 flags)
1158 {
1159 	if (size == 0)
1160 		return B_BAD_VALUE;
1161 
1162 	AddressSpaceWriteLocker locker(team);
1163 	if (!locker.IsLocked())
1164 		return B_BAD_TEAM_ID;
1165 
1166 	virtual_address_restrictions addressRestrictions = {};
1167 	addressRestrictions.address = *_address;
1168 	addressRestrictions.address_specification = addressSpec;
1169 	VMAddressSpace* addressSpace = locker.AddressSpace();
1170 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1171 		addressSpace == VMAddressSpace::Kernel()
1172 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1173 		_address);
1174 }
1175 
1176 
1177 area_id
1178 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1179 	uint32 wiring, uint32 protection, uint32 flags,
1180 	const virtual_address_restrictions* virtualAddressRestrictions,
1181 	const physical_address_restrictions* physicalAddressRestrictions,
1182 	bool kernel, void** _address)
1183 {
1184 	VMArea* area;
1185 	VMCache* cache;
1186 	vm_page* page = NULL;
1187 	bool isStack = (protection & B_STACK_AREA) != 0;
1188 	page_num_t guardPages;
1189 	bool canOvercommit = false;
1190 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1191 		? VM_PAGE_ALLOC_CLEAR : 0;
1192 
1193 	TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size));
1194 
1195 	size = PAGE_ALIGN(size);
1196 
1197 	if (size == 0)
1198 		return B_BAD_VALUE;
1199 	if (!arch_vm_supports_protection(protection))
1200 		return B_NOT_SUPPORTED;
1201 
1202 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1203 		canOvercommit = true;
1204 
1205 #ifdef DEBUG_KERNEL_STACKS
1206 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1207 		isStack = true;
1208 #endif
1209 
1210 	// check parameters
1211 	switch (virtualAddressRestrictions->address_specification) {
1212 		case B_ANY_ADDRESS:
1213 		case B_EXACT_ADDRESS:
1214 		case B_BASE_ADDRESS:
1215 		case B_ANY_KERNEL_ADDRESS:
1216 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1217 			break;
1218 
1219 		default:
1220 			return B_BAD_VALUE;
1221 	}
1222 
1223 	// If low or high physical address restrictions are given, we force
1224 	// B_CONTIGUOUS wiring, since only then we'll use
1225 	// vm_page_allocate_page_run() which deals with those restrictions.
1226 	if (physicalAddressRestrictions->low_address != 0
1227 		|| physicalAddressRestrictions->high_address != 0) {
1228 		wiring = B_CONTIGUOUS;
1229 	}
1230 
1231 	physical_address_restrictions stackPhysicalRestrictions;
1232 	bool doReserveMemory = false;
1233 	switch (wiring) {
1234 		case B_NO_LOCK:
1235 			break;
1236 		case B_FULL_LOCK:
1237 		case B_LAZY_LOCK:
1238 		case B_CONTIGUOUS:
1239 			doReserveMemory = true;
1240 			break;
1241 		case B_ALREADY_WIRED:
1242 			break;
1243 		case B_LOMEM:
1244 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1245 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1246 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1247 			wiring = B_CONTIGUOUS;
1248 			doReserveMemory = true;
1249 			break;
1250 		case B_32_BIT_FULL_LOCK:
1251 			if (B_HAIKU_PHYSICAL_BITS <= 32
1252 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1253 				wiring = B_FULL_LOCK;
1254 				doReserveMemory = true;
1255 				break;
1256 			}
1257 			// TODO: We don't really support this mode efficiently. Just fall
1258 			// through for now ...
1259 		case B_32_BIT_CONTIGUOUS:
1260 			#if B_HAIKU_PHYSICAL_BITS > 32
1261 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1262 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1263 					stackPhysicalRestrictions.high_address
1264 						= (phys_addr_t)1 << 32;
1265 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1266 				}
1267 			#endif
1268 			wiring = B_CONTIGUOUS;
1269 			doReserveMemory = true;
1270 			break;
1271 		default:
1272 			return B_BAD_VALUE;
1273 	}
1274 
1275 	// Optimization: For a single-page contiguous allocation without low/high
1276 	// memory restriction B_FULL_LOCK wiring suffices.
1277 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1278 		&& physicalAddressRestrictions->low_address == 0
1279 		&& physicalAddressRestrictions->high_address == 0) {
1280 		wiring = B_FULL_LOCK;
1281 	}
1282 
1283 	// For full lock or contiguous areas we're also going to map the pages and
1284 	// thus need to reserve pages for the mapping backend upfront.
1285 	addr_t reservedMapPages = 0;
1286 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1287 		AddressSpaceWriteLocker locker;
1288 		status_t status = locker.SetTo(team);
1289 		if (status != B_OK)
1290 			return status;
1291 
1292 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1293 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1294 	}
1295 
1296 	int priority;
1297 	if (team != VMAddressSpace::KernelID())
1298 		priority = VM_PRIORITY_USER;
1299 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1300 		priority = VM_PRIORITY_VIP;
1301 	else
1302 		priority = VM_PRIORITY_SYSTEM;
1303 
1304 	// Reserve memory before acquiring the address space lock. This reduces the
1305 	// chances of failure, since while holding the write lock to the address
1306 	// space (if it is the kernel address space that is), the low memory handler
1307 	// won't be able to free anything for us.
1308 	addr_t reservedMemory = 0;
1309 	if (doReserveMemory) {
1310 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1311 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1312 			return B_NO_MEMORY;
1313 		reservedMemory = size;
1314 		// TODO: We don't reserve the memory for the pages for the page
1315 		// directories/tables. We actually need to do since we currently don't
1316 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1317 		// there are actually less physical pages than there should be, which
1318 		// can get the VM into trouble in low memory situations.
1319 	}
1320 
1321 	AddressSpaceWriteLocker locker;
1322 	VMAddressSpace* addressSpace;
1323 	status_t status;
1324 
1325 	// For full lock areas reserve the pages before locking the address
1326 	// space. E.g. block caches can't release their memory while we hold the
1327 	// address space lock.
1328 	page_num_t reservedPages = reservedMapPages;
1329 	if (wiring == B_FULL_LOCK)
1330 		reservedPages += size / B_PAGE_SIZE;
1331 
1332 	vm_page_reservation reservation;
1333 	if (reservedPages > 0) {
1334 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1335 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1336 					priority)) {
1337 				reservedPages = 0;
1338 				status = B_WOULD_BLOCK;
1339 				goto err0;
1340 			}
1341 		} else
1342 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1343 	}
1344 
1345 	if (wiring == B_CONTIGUOUS) {
1346 		// we try to allocate the page run here upfront as this may easily
1347 		// fail for obvious reasons
1348 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1349 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1350 		if (page == NULL) {
1351 			status = B_NO_MEMORY;
1352 			goto err0;
1353 		}
1354 	}
1355 
1356 	// Lock the address space and, if B_EXACT_ADDRESS and
1357 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1358 	// is not wired.
1359 	do {
1360 		status = locker.SetTo(team);
1361 		if (status != B_OK)
1362 			goto err1;
1363 
1364 		addressSpace = locker.AddressSpace();
1365 	} while (virtualAddressRestrictions->address_specification
1366 			== B_EXACT_ADDRESS
1367 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1368 		&& wait_if_address_range_is_wired(addressSpace,
1369 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1370 
1371 	// create an anonymous cache
1372 	// if it's a stack, make sure that two pages are available at least
1373 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1374 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1375 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1376 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1377 		wiring == B_NO_LOCK, priority);
1378 	if (status != B_OK)
1379 		goto err1;
1380 
1381 	cache->temporary = 1;
1382 	cache->virtual_end = size;
1383 	cache->committed_size = reservedMemory;
1384 		// TODO: This should be done via a method.
1385 	reservedMemory = 0;
1386 
1387 	cache->Lock();
1388 
1389 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1390 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1391 		kernel, &area, _address);
1392 
1393 	if (status != B_OK) {
1394 		cache->ReleaseRefAndUnlock();
1395 		goto err1;
1396 	}
1397 
1398 	locker.DegradeToReadLock();
1399 
1400 	switch (wiring) {
1401 		case B_NO_LOCK:
1402 		case B_LAZY_LOCK:
1403 			// do nothing - the pages are mapped in as needed
1404 			break;
1405 
1406 		case B_FULL_LOCK:
1407 		{
1408 			// Allocate and map all pages for this area
1409 
1410 			off_t offset = 0;
1411 			for (addr_t address = area->Base();
1412 					address < area->Base() + (area->Size() - 1);
1413 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1414 #ifdef DEBUG_KERNEL_STACKS
1415 #	ifdef STACK_GROWS_DOWNWARDS
1416 				if (isStack && address < area->Base()
1417 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1418 #	else
1419 				if (isStack && address >= area->Base() + area->Size()
1420 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1421 #	endif
1422 					continue;
1423 #endif
1424 				vm_page* page = vm_page_allocate_page(&reservation,
1425 					PAGE_STATE_WIRED | pageAllocFlags);
1426 				cache->InsertPage(page, offset);
1427 				map_page(area, page, address, protection, &reservation);
1428 
1429 				DEBUG_PAGE_ACCESS_END(page);
1430 			}
1431 
1432 			break;
1433 		}
1434 
1435 		case B_ALREADY_WIRED:
1436 		{
1437 			// The pages should already be mapped. This is only really useful
1438 			// during boot time. Find the appropriate vm_page objects and stick
1439 			// them in the cache object.
1440 			VMTranslationMap* map = addressSpace->TranslationMap();
1441 			off_t offset = 0;
1442 
1443 			if (!gKernelStartup)
1444 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1445 
1446 			map->Lock();
1447 
1448 			for (addr_t virtualAddress = area->Base();
1449 					virtualAddress < area->Base() + (area->Size() - 1);
1450 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1451 				phys_addr_t physicalAddress;
1452 				uint32 flags;
1453 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1454 				if (status < B_OK) {
1455 					panic("looking up mapping failed for va 0x%lx\n",
1456 						virtualAddress);
1457 				}
1458 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1459 				if (page == NULL) {
1460 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1461 						"\n", physicalAddress);
1462 				}
1463 
1464 				DEBUG_PAGE_ACCESS_START(page);
1465 
1466 				cache->InsertPage(page, offset);
1467 				increment_page_wired_count(page);
1468 				vm_page_set_state(page, PAGE_STATE_WIRED);
1469 				page->busy = false;
1470 
1471 				DEBUG_PAGE_ACCESS_END(page);
1472 			}
1473 
1474 			map->Unlock();
1475 			break;
1476 		}
1477 
1478 		case B_CONTIGUOUS:
1479 		{
1480 			// We have already allocated our continuous pages run, so we can now
1481 			// just map them in the address space
1482 			VMTranslationMap* map = addressSpace->TranslationMap();
1483 			phys_addr_t physicalAddress
1484 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1485 			addr_t virtualAddress = area->Base();
1486 			off_t offset = 0;
1487 
1488 			map->Lock();
1489 
1490 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1491 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1492 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1493 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1494 				if (page == NULL)
1495 					panic("couldn't lookup physical page just allocated\n");
1496 
1497 				status = map->Map(virtualAddress, physicalAddress, protection,
1498 					area->MemoryType(), &reservation);
1499 				if (status < B_OK)
1500 					panic("couldn't map physical page in page run\n");
1501 
1502 				cache->InsertPage(page, offset);
1503 				increment_page_wired_count(page);
1504 
1505 				DEBUG_PAGE_ACCESS_END(page);
1506 			}
1507 
1508 			map->Unlock();
1509 			break;
1510 		}
1511 
1512 		default:
1513 			break;
1514 	}
1515 
1516 	cache->Unlock();
1517 
1518 	if (reservedPages > 0)
1519 		vm_page_unreserve_pages(&reservation);
1520 
1521 	TRACE(("vm_create_anonymous_area: done\n"));
1522 
1523 	area->cache_type = CACHE_TYPE_RAM;
1524 	return area->id;
1525 
1526 err1:
1527 	if (wiring == B_CONTIGUOUS) {
1528 		// we had reserved the area space upfront...
1529 		phys_addr_t pageNumber = page->physical_page_number;
1530 		int32 i;
1531 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1532 			page = vm_lookup_page(pageNumber);
1533 			if (page == NULL)
1534 				panic("couldn't lookup physical page just allocated\n");
1535 
1536 			vm_page_set_state(page, PAGE_STATE_FREE);
1537 		}
1538 	}
1539 
1540 err0:
1541 	if (reservedPages > 0)
1542 		vm_page_unreserve_pages(&reservation);
1543 	if (reservedMemory > 0)
1544 		vm_unreserve_memory(reservedMemory);
1545 
1546 	return status;
1547 }
1548 
1549 
1550 area_id
1551 vm_map_physical_memory(team_id team, const char* name, void** _address,
1552 	uint32 addressSpec, addr_t size, uint32 protection,
1553 	phys_addr_t physicalAddress, bool alreadyWired)
1554 {
1555 	VMArea* area;
1556 	VMCache* cache;
1557 	addr_t mapOffset;
1558 
1559 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1560 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
1561 		name, *_address, addressSpec, size, protection, physicalAddress));
1562 
1563 	if (!arch_vm_supports_protection(protection))
1564 		return B_NOT_SUPPORTED;
1565 
1566 	AddressSpaceWriteLocker locker(team);
1567 	if (!locker.IsLocked())
1568 		return B_BAD_TEAM_ID;
1569 
1570 	// if the physical address is somewhat inside a page,
1571 	// move the actual area down to align on a page boundary
1572 	mapOffset = physicalAddress % B_PAGE_SIZE;
1573 	size += mapOffset;
1574 	physicalAddress -= mapOffset;
1575 
1576 	size = PAGE_ALIGN(size);
1577 
1578 	// create a device cache
1579 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1580 	if (status != B_OK)
1581 		return status;
1582 
1583 	cache->virtual_end = size;
1584 
1585 	cache->Lock();
1586 
1587 	virtual_address_restrictions addressRestrictions = {};
1588 	addressRestrictions.address = *_address;
1589 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1590 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1591 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1592 		true, &area, _address);
1593 
1594 	if (status < B_OK)
1595 		cache->ReleaseRefLocked();
1596 
1597 	cache->Unlock();
1598 
1599 	if (status == B_OK) {
1600 		// set requested memory type -- use uncached, if not given
1601 		uint32 memoryType = addressSpec & B_MTR_MASK;
1602 		if (memoryType == 0)
1603 			memoryType = B_MTR_UC;
1604 
1605 		area->SetMemoryType(memoryType);
1606 
1607 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1608 		if (status != B_OK)
1609 			delete_area(locker.AddressSpace(), area, false);
1610 	}
1611 
1612 	if (status != B_OK)
1613 		return status;
1614 
1615 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1616 
1617 	if (alreadyWired) {
1618 		// The area is already mapped, but possibly not with the right
1619 		// memory type.
1620 		map->Lock();
1621 		map->ProtectArea(area, area->protection);
1622 		map->Unlock();
1623 	} else {
1624 		// Map the area completely.
1625 
1626 		// reserve pages needed for the mapping
1627 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1628 			area->Base() + (size - 1));
1629 		vm_page_reservation reservation;
1630 		vm_page_reserve_pages(&reservation, reservePages,
1631 			team == VMAddressSpace::KernelID()
1632 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1633 
1634 		map->Lock();
1635 
1636 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1637 			map->Map(area->Base() + offset, physicalAddress + offset,
1638 				protection, area->MemoryType(), &reservation);
1639 		}
1640 
1641 		map->Unlock();
1642 
1643 		vm_page_unreserve_pages(&reservation);
1644 	}
1645 
1646 	// modify the pointer returned to be offset back into the new area
1647 	// the same way the physical address in was offset
1648 	*_address = (void*)((addr_t)*_address + mapOffset);
1649 
1650 	area->cache_type = CACHE_TYPE_DEVICE;
1651 	return area->id;
1652 }
1653 
1654 
1655 /*!	Don't use!
1656 	TODO: This function was introduced to map physical page vecs to
1657 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1658 	use a device cache and does not track vm_page::wired_count!
1659 */
1660 area_id
1661 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1662 	uint32 addressSpec, addr_t* _size, uint32 protection,
1663 	struct generic_io_vec* vecs, uint32 vecCount)
1664 {
1665 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
1666 		"spec = %ld, _size = %p, protection = %ld, vecs = %p, "
1667 		"vecCount = %ld)\n", team, name, *_address, addressSpec, _size,
1668 		protection, vecs, vecCount));
1669 
1670 	if (!arch_vm_supports_protection(protection)
1671 		|| (addressSpec & B_MTR_MASK) != 0) {
1672 		return B_NOT_SUPPORTED;
1673 	}
1674 
1675 	AddressSpaceWriteLocker locker(team);
1676 	if (!locker.IsLocked())
1677 		return B_BAD_TEAM_ID;
1678 
1679 	if (vecCount == 0)
1680 		return B_BAD_VALUE;
1681 
1682 	addr_t size = 0;
1683 	for (uint32 i = 0; i < vecCount; i++) {
1684 		if (vecs[i].base % B_PAGE_SIZE != 0
1685 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1686 			return B_BAD_VALUE;
1687 		}
1688 
1689 		size += vecs[i].length;
1690 	}
1691 
1692 	// create a device cache
1693 	VMCache* cache;
1694 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1695 	if (result != B_OK)
1696 		return result;
1697 
1698 	cache->virtual_end = size;
1699 
1700 	cache->Lock();
1701 
1702 	VMArea* area;
1703 	virtual_address_restrictions addressRestrictions = {};
1704 	addressRestrictions.address = *_address;
1705 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1706 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1707 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1708 		&addressRestrictions, true, &area, _address);
1709 
1710 	if (result != B_OK)
1711 		cache->ReleaseRefLocked();
1712 
1713 	cache->Unlock();
1714 
1715 	if (result != B_OK)
1716 		return result;
1717 
1718 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1719 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1720 		area->Base() + (size - 1));
1721 
1722 	vm_page_reservation reservation;
1723 	vm_page_reserve_pages(&reservation, reservePages,
1724 			team == VMAddressSpace::KernelID()
1725 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1726 	map->Lock();
1727 
1728 	uint32 vecIndex = 0;
1729 	size_t vecOffset = 0;
1730 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1731 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1732 			vecOffset = 0;
1733 			vecIndex++;
1734 		}
1735 
1736 		if (vecIndex >= vecCount)
1737 			break;
1738 
1739 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1740 			protection, area->MemoryType(), &reservation);
1741 
1742 		vecOffset += B_PAGE_SIZE;
1743 	}
1744 
1745 	map->Unlock();
1746 	vm_page_unreserve_pages(&reservation);
1747 
1748 	if (_size != NULL)
1749 		*_size = size;
1750 
1751 	area->cache_type = CACHE_TYPE_DEVICE;
1752 	return area->id;
1753 }
1754 
1755 
1756 area_id
1757 vm_create_null_area(team_id team, const char* name, void** address,
1758 	uint32 addressSpec, addr_t size, uint32 flags)
1759 {
1760 	size = PAGE_ALIGN(size);
1761 
1762 	// Lock the address space and, if B_EXACT_ADDRESS and
1763 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1764 	// is not wired.
1765 	AddressSpaceWriteLocker locker;
1766 	do {
1767 		if (locker.SetTo(team) != B_OK)
1768 			return B_BAD_TEAM_ID;
1769 	} while (addressSpec == B_EXACT_ADDRESS
1770 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1771 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1772 			(addr_t)*address, size, &locker));
1773 
1774 	// create a null cache
1775 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1776 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1777 	VMCache* cache;
1778 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1779 	if (status != B_OK)
1780 		return status;
1781 
1782 	cache->temporary = 1;
1783 	cache->virtual_end = size;
1784 
1785 	cache->Lock();
1786 
1787 	VMArea* area;
1788 	virtual_address_restrictions addressRestrictions = {};
1789 	addressRestrictions.address = *address;
1790 	addressRestrictions.address_specification = addressSpec;
1791 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1792 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1793 		&addressRestrictions, true, &area, address);
1794 
1795 	if (status < B_OK) {
1796 		cache->ReleaseRefAndUnlock();
1797 		return status;
1798 	}
1799 
1800 	cache->Unlock();
1801 
1802 	area->cache_type = CACHE_TYPE_NULL;
1803 	return area->id;
1804 }
1805 
1806 
1807 /*!	Creates the vnode cache for the specified \a vnode.
1808 	The vnode has to be marked busy when calling this function.
1809 */
1810 status_t
1811 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1812 {
1813 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1814 }
1815 
1816 
1817 /*!	\a cache must be locked. The area's address space must be read-locked.
1818 */
1819 static void
1820 pre_map_area_pages(VMArea* area, VMCache* cache,
1821 	vm_page_reservation* reservation)
1822 {
1823 	addr_t baseAddress = area->Base();
1824 	addr_t cacheOffset = area->cache_offset;
1825 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1826 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1827 
1828 	for (VMCachePagesTree::Iterator it
1829 				= cache->pages.GetIterator(firstPage, true, true);
1830 			vm_page* page = it.Next();) {
1831 		if (page->cache_offset >= endPage)
1832 			break;
1833 
1834 		// skip busy and inactive pages
1835 		if (page->busy || page->usage_count == 0)
1836 			continue;
1837 
1838 		DEBUG_PAGE_ACCESS_START(page);
1839 		map_page(area, page,
1840 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1841 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1842 		DEBUG_PAGE_ACCESS_END(page);
1843 	}
1844 }
1845 
1846 
1847 /*!	Will map the file specified by \a fd to an area in memory.
1848 	The file will be mirrored beginning at the specified \a offset. The
1849 	\a offset and \a size arguments have to be page aligned.
1850 */
1851 static area_id
1852 _vm_map_file(team_id team, const char* name, void** _address,
1853 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1854 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1855 {
1856 	// TODO: for binary files, we want to make sure that they get the
1857 	//	copy of a file at a given time, ie. later changes should not
1858 	//	make it into the mapped copy -- this will need quite some changes
1859 	//	to be done in a nice way
1860 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
1861 		fd, offset, size, mapping));
1862 
1863 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1864 	size = PAGE_ALIGN(size);
1865 
1866 	if (mapping == REGION_NO_PRIVATE_MAP)
1867 		protection |= B_SHARED_AREA;
1868 	if (addressSpec != B_EXACT_ADDRESS)
1869 		unmapAddressRange = false;
1870 
1871 	if (fd < 0) {
1872 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1873 		virtual_address_restrictions virtualRestrictions = {};
1874 		virtualRestrictions.address = *_address;
1875 		virtualRestrictions.address_specification = addressSpec;
1876 		physical_address_restrictions physicalRestrictions = {};
1877 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1878 			flags, &virtualRestrictions, &physicalRestrictions, kernel,
1879 			_address);
1880 	}
1881 
1882 	// get the open flags of the FD
1883 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1884 	if (descriptor == NULL)
1885 		return EBADF;
1886 	int32 openMode = descriptor->open_mode;
1887 	put_fd(descriptor);
1888 
1889 	// The FD must open for reading at any rate. For shared mapping with write
1890 	// access, additionally the FD must be open for writing.
1891 	if ((openMode & O_ACCMODE) == O_WRONLY
1892 		|| (mapping == REGION_NO_PRIVATE_MAP
1893 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1894 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1895 		return EACCES;
1896 	}
1897 
1898 	// get the vnode for the object, this also grabs a ref to it
1899 	struct vnode* vnode = NULL;
1900 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1901 	if (status < B_OK)
1902 		return status;
1903 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1904 
1905 	// If we're going to pre-map pages, we need to reserve the pages needed by
1906 	// the mapping backend upfront.
1907 	page_num_t reservedPreMapPages = 0;
1908 	vm_page_reservation reservation;
1909 	if ((protection & B_READ_AREA) != 0) {
1910 		AddressSpaceWriteLocker locker;
1911 		status = locker.SetTo(team);
1912 		if (status != B_OK)
1913 			return status;
1914 
1915 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1916 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1917 
1918 		locker.Unlock();
1919 
1920 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1921 			team == VMAddressSpace::KernelID()
1922 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1923 	}
1924 
1925 	struct PageUnreserver {
1926 		PageUnreserver(vm_page_reservation* reservation)
1927 			:
1928 			fReservation(reservation)
1929 		{
1930 		}
1931 
1932 		~PageUnreserver()
1933 		{
1934 			if (fReservation != NULL)
1935 				vm_page_unreserve_pages(fReservation);
1936 		}
1937 
1938 		vm_page_reservation* fReservation;
1939 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1940 
1941 	// Lock the address space and, if the specified address range shall be
1942 	// unmapped, ensure it is not wired.
1943 	AddressSpaceWriteLocker locker;
1944 	do {
1945 		if (locker.SetTo(team) != B_OK)
1946 			return B_BAD_TEAM_ID;
1947 	} while (unmapAddressRange
1948 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1949 			(addr_t)*_address, size, &locker));
1950 
1951 	// TODO: this only works for file systems that use the file cache
1952 	VMCache* cache;
1953 	status = vfs_get_vnode_cache(vnode, &cache, false);
1954 	if (status < B_OK)
1955 		return status;
1956 
1957 	cache->Lock();
1958 
1959 	VMArea* area;
1960 	virtual_address_restrictions addressRestrictions = {};
1961 	addressRestrictions.address = *_address;
1962 	addressRestrictions.address_specification = addressSpec;
1963 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1964 		0, protection, mapping,
1965 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1966 		&addressRestrictions, kernel, &area, _address);
1967 
1968 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1969 		// map_backing_store() cannot know we no longer need the ref
1970 		cache->ReleaseRefLocked();
1971 	}
1972 
1973 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1974 		pre_map_area_pages(area, cache, &reservation);
1975 
1976 	cache->Unlock();
1977 
1978 	if (status == B_OK) {
1979 		// TODO: this probably deserves a smarter solution, ie. don't always
1980 		// prefetch stuff, and also, probably don't trigger it at this place.
1981 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1982 			// prefetches at max 10 MB starting from "offset"
1983 	}
1984 
1985 	if (status != B_OK)
1986 		return status;
1987 
1988 	area->cache_type = CACHE_TYPE_VNODE;
1989 	return area->id;
1990 }
1991 
1992 
1993 area_id
1994 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1995 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
1996 	int fd, off_t offset)
1997 {
1998 	if (!arch_vm_supports_protection(protection))
1999 		return B_NOT_SUPPORTED;
2000 
2001 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2002 		mapping, unmapAddressRange, fd, offset, true);
2003 }
2004 
2005 
2006 VMCache*
2007 vm_area_get_locked_cache(VMArea* area)
2008 {
2009 	rw_lock_read_lock(&sAreaCacheLock);
2010 
2011 	while (true) {
2012 		VMCache* cache = area->cache;
2013 
2014 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2015 			// cache has been deleted
2016 			rw_lock_read_lock(&sAreaCacheLock);
2017 			continue;
2018 		}
2019 
2020 		rw_lock_read_lock(&sAreaCacheLock);
2021 
2022 		if (cache == area->cache) {
2023 			cache->AcquireRefLocked();
2024 			rw_lock_read_unlock(&sAreaCacheLock);
2025 			return cache;
2026 		}
2027 
2028 		// the cache changed in the meantime
2029 		cache->Unlock();
2030 	}
2031 }
2032 
2033 
2034 void
2035 vm_area_put_locked_cache(VMCache* cache)
2036 {
2037 	cache->ReleaseRefAndUnlock();
2038 }
2039 
2040 
2041 area_id
2042 vm_clone_area(team_id team, const char* name, void** address,
2043 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2044 	bool kernel)
2045 {
2046 	VMArea* newArea = NULL;
2047 	VMArea* sourceArea;
2048 
2049 	// Check whether the source area exists and is cloneable. If so, mark it
2050 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2051 	{
2052 		AddressSpaceWriteLocker locker;
2053 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2054 		if (status != B_OK)
2055 			return status;
2056 
2057 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2058 			return B_NOT_ALLOWED;
2059 
2060 		sourceArea->protection |= B_SHARED_AREA;
2061 		protection |= B_SHARED_AREA;
2062 	}
2063 
2064 	// Now lock both address spaces and actually do the cloning.
2065 
2066 	MultiAddressSpaceLocker locker;
2067 	VMAddressSpace* sourceAddressSpace;
2068 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2069 	if (status != B_OK)
2070 		return status;
2071 
2072 	VMAddressSpace* targetAddressSpace;
2073 	status = locker.AddTeam(team, true, &targetAddressSpace);
2074 	if (status != B_OK)
2075 		return status;
2076 
2077 	status = locker.Lock();
2078 	if (status != B_OK)
2079 		return status;
2080 
2081 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2082 	if (sourceArea == NULL)
2083 		return B_BAD_VALUE;
2084 
2085 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2086 		return B_NOT_ALLOWED;
2087 
2088 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2089 
2090 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2091 	//	have been adapted. Maybe it should be part of the kernel settings,
2092 	//	anyway (so that old drivers can always work).
2093 #if 0
2094 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2095 		&& addressSpace != VMAddressSpace::Kernel()
2096 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2097 		// kernel areas must not be cloned in userland, unless explicitly
2098 		// declared user-cloneable upon construction
2099 		status = B_NOT_ALLOWED;
2100 	} else
2101 #endif
2102 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2103 		status = B_NOT_ALLOWED;
2104 	else {
2105 		virtual_address_restrictions addressRestrictions = {};
2106 		addressRestrictions.address = *address;
2107 		addressRestrictions.address_specification = addressSpec;
2108 		status = map_backing_store(targetAddressSpace, cache,
2109 			sourceArea->cache_offset, name, sourceArea->Size(),
2110 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2111 			kernel, &newArea, address);
2112 	}
2113 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2114 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2115 		// to create a new cache, and has therefore already acquired a reference
2116 		// to the source cache - but otherwise it has no idea that we need
2117 		// one.
2118 		cache->AcquireRefLocked();
2119 	}
2120 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2121 		// we need to map in everything at this point
2122 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2123 			// we don't have actual pages to map but a physical area
2124 			VMTranslationMap* map
2125 				= sourceArea->address_space->TranslationMap();
2126 			map->Lock();
2127 
2128 			phys_addr_t physicalAddress;
2129 			uint32 oldProtection;
2130 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2131 
2132 			map->Unlock();
2133 
2134 			map = targetAddressSpace->TranslationMap();
2135 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2136 				newArea->Base() + (newArea->Size() - 1));
2137 
2138 			vm_page_reservation reservation;
2139 			vm_page_reserve_pages(&reservation, reservePages,
2140 				targetAddressSpace == VMAddressSpace::Kernel()
2141 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2142 			map->Lock();
2143 
2144 			for (addr_t offset = 0; offset < newArea->Size();
2145 					offset += B_PAGE_SIZE) {
2146 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2147 					protection, newArea->MemoryType(), &reservation);
2148 			}
2149 
2150 			map->Unlock();
2151 			vm_page_unreserve_pages(&reservation);
2152 		} else {
2153 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2154 			size_t reservePages = map->MaxPagesNeededToMap(
2155 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2156 			vm_page_reservation reservation;
2157 			vm_page_reserve_pages(&reservation, reservePages,
2158 				targetAddressSpace == VMAddressSpace::Kernel()
2159 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2160 
2161 			// map in all pages from source
2162 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2163 					vm_page* page  = it.Next();) {
2164 				if (!page->busy) {
2165 					DEBUG_PAGE_ACCESS_START(page);
2166 					map_page(newArea, page,
2167 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2168 							- newArea->cache_offset),
2169 						protection, &reservation);
2170 					DEBUG_PAGE_ACCESS_END(page);
2171 				}
2172 			}
2173 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2174 			// ensuring that!
2175 
2176 			vm_page_unreserve_pages(&reservation);
2177 		}
2178 	}
2179 	if (status == B_OK)
2180 		newArea->cache_type = sourceArea->cache_type;
2181 
2182 	vm_area_put_locked_cache(cache);
2183 
2184 	if (status < B_OK)
2185 		return status;
2186 
2187 	return newArea->id;
2188 }
2189 
2190 
2191 /*!	Deletes the specified area of the given address space.
2192 
2193 	The address space must be write-locked.
2194 	The caller must ensure that the area does not have any wired ranges.
2195 
2196 	\param addressSpace The address space containing the area.
2197 	\param area The area to be deleted.
2198 	\param deletingAddressSpace \c true, if the address space is in the process
2199 		of being deleted.
2200 */
2201 static void
2202 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2203 	bool deletingAddressSpace)
2204 {
2205 	ASSERT(!area->IsWired());
2206 
2207 	VMAreaHash::Remove(area);
2208 
2209 	// At this point the area is removed from the global hash table, but
2210 	// still exists in the area list.
2211 
2212 	// Unmap the virtual address space the area occupied.
2213 	{
2214 		// We need to lock the complete cache chain.
2215 		VMCache* topCache = vm_area_get_locked_cache(area);
2216 		VMCacheChainLocker cacheChainLocker(topCache);
2217 		cacheChainLocker.LockAllSourceCaches();
2218 
2219 		// If the area's top cache is a temporary cache and the area is the only
2220 		// one referencing it (besides us currently holding a second reference),
2221 		// the unmapping code doesn't need to care about preserving the accessed
2222 		// and dirty flags of the top cache page mappings.
2223 		bool ignoreTopCachePageFlags
2224 			= topCache->temporary && topCache->RefCount() == 2;
2225 
2226 		area->address_space->TranslationMap()->UnmapArea(area,
2227 			deletingAddressSpace, ignoreTopCachePageFlags);
2228 	}
2229 
2230 	if (!area->cache->temporary)
2231 		area->cache->WriteModified();
2232 
2233 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2234 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2235 
2236 	arch_vm_unset_memory_type(area);
2237 	addressSpace->RemoveArea(area, allocationFlags);
2238 	addressSpace->Put();
2239 
2240 	area->cache->RemoveArea(area);
2241 	area->cache->ReleaseRef();
2242 
2243 	addressSpace->DeleteArea(area, allocationFlags);
2244 }
2245 
2246 
2247 status_t
2248 vm_delete_area(team_id team, area_id id, bool kernel)
2249 {
2250 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2251 
2252 	// lock the address space and make sure the area isn't wired
2253 	AddressSpaceWriteLocker locker;
2254 	VMArea* area;
2255 	AreaCacheLocker cacheLocker;
2256 
2257 	do {
2258 		status_t status = locker.SetFromArea(team, id, area);
2259 		if (status != B_OK)
2260 			return status;
2261 
2262 		cacheLocker.SetTo(area);
2263 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2264 
2265 	cacheLocker.Unlock();
2266 
2267 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2268 		return B_NOT_ALLOWED;
2269 
2270 	delete_area(locker.AddressSpace(), area, false);
2271 	return B_OK;
2272 }
2273 
2274 
2275 /*!	Creates a new cache on top of given cache, moves all areas from
2276 	the old cache to the new one, and changes the protection of all affected
2277 	areas' pages to read-only. If requested, wired pages are moved up to the
2278 	new cache and copies are added to the old cache in their place.
2279 	Preconditions:
2280 	- The given cache must be locked.
2281 	- All of the cache's areas' address spaces must be read locked.
2282 	- Either the cache must not have any wired ranges or a page reservation for
2283 	  all wired pages must be provided, so they can be copied.
2284 
2285 	\param lowerCache The cache on top of which a new cache shall be created.
2286 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2287 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2288 		has wired page. The wired pages are copied in this case.
2289 */
2290 static status_t
2291 vm_copy_on_write_area(VMCache* lowerCache,
2292 	vm_page_reservation* wiredPagesReservation)
2293 {
2294 	VMCache* upperCache;
2295 
2296 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2297 
2298 	// We need to separate the cache from its areas. The cache goes one level
2299 	// deeper and we create a new cache inbetween.
2300 
2301 	// create an anonymous cache
2302 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2303 		0, true, VM_PRIORITY_USER);
2304 	if (status != B_OK)
2305 		return status;
2306 
2307 	upperCache->Lock();
2308 
2309 	upperCache->temporary = 1;
2310 	upperCache->virtual_base = lowerCache->virtual_base;
2311 	upperCache->virtual_end = lowerCache->virtual_end;
2312 
2313 	// transfer the lower cache areas to the upper cache
2314 	rw_lock_write_lock(&sAreaCacheLock);
2315 	upperCache->TransferAreas(lowerCache);
2316 	rw_lock_write_unlock(&sAreaCacheLock);
2317 
2318 	lowerCache->AddConsumer(upperCache);
2319 
2320 	// We now need to remap all pages from all of the cache's areas read-only,
2321 	// so that a copy will be created on next write access. If there are wired
2322 	// pages, we keep their protection, move them to the upper cache and create
2323 	// copies for the lower cache.
2324 	if (wiredPagesReservation != NULL) {
2325 		// We need to handle wired pages -- iterate through the cache's pages.
2326 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2327 				vm_page* page = it.Next();) {
2328 			if (page->WiredCount() > 0) {
2329 				// allocate a new page and copy the wired one
2330 				vm_page* copiedPage = vm_page_allocate_page(
2331 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2332 
2333 				vm_memcpy_physical_page(
2334 					copiedPage->physical_page_number * B_PAGE_SIZE,
2335 					page->physical_page_number * B_PAGE_SIZE);
2336 
2337 				// move the wired page to the upper cache (note: removing is OK
2338 				// with the SplayTree iterator) and insert the copy
2339 				upperCache->MovePage(page);
2340 				lowerCache->InsertPage(copiedPage,
2341 					page->cache_offset * B_PAGE_SIZE);
2342 
2343 				DEBUG_PAGE_ACCESS_END(copiedPage);
2344 			} else {
2345 				// Change the protection of this page in all areas.
2346 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2347 						tempArea = tempArea->cache_next) {
2348 					// The area must be readable in the same way it was
2349 					// previously writable.
2350 					uint32 protection = B_KERNEL_READ_AREA;
2351 					if ((tempArea->protection & B_READ_AREA) != 0)
2352 						protection |= B_READ_AREA;
2353 
2354 					VMTranslationMap* map
2355 						= tempArea->address_space->TranslationMap();
2356 					map->Lock();
2357 					map->ProtectPage(tempArea,
2358 						virtual_page_address(tempArea, page), protection);
2359 					map->Unlock();
2360 				}
2361 			}
2362 		}
2363 	} else {
2364 		// just change the protection of all areas
2365 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2366 				tempArea = tempArea->cache_next) {
2367 			// The area must be readable in the same way it was previously
2368 			// writable.
2369 			uint32 protection = B_KERNEL_READ_AREA;
2370 			if ((tempArea->protection & B_READ_AREA) != 0)
2371 				protection |= B_READ_AREA;
2372 
2373 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2374 			map->Lock();
2375 			map->ProtectArea(tempArea, protection);
2376 			map->Unlock();
2377 		}
2378 	}
2379 
2380 	vm_area_put_locked_cache(upperCache);
2381 
2382 	return B_OK;
2383 }
2384 
2385 
2386 area_id
2387 vm_copy_area(team_id team, const char* name, void** _address,
2388 	uint32 addressSpec, uint32 protection, area_id sourceID)
2389 {
2390 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2391 
2392 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2393 		// set the same protection for the kernel as for userland
2394 		protection |= B_KERNEL_READ_AREA;
2395 		if (writableCopy)
2396 			protection |= B_KERNEL_WRITE_AREA;
2397 	}
2398 
2399 	// Do the locking: target address space, all address spaces associated with
2400 	// the source cache, and the cache itself.
2401 	MultiAddressSpaceLocker locker;
2402 	VMAddressSpace* targetAddressSpace;
2403 	VMCache* cache;
2404 	VMArea* source;
2405 	AreaCacheLocker cacheLocker;
2406 	status_t status;
2407 	bool sharedArea;
2408 
2409 	page_num_t wiredPages = 0;
2410 	vm_page_reservation wiredPagesReservation;
2411 
2412 	bool restart;
2413 	do {
2414 		restart = false;
2415 
2416 		locker.Unset();
2417 		status = locker.AddTeam(team, true, &targetAddressSpace);
2418 		if (status == B_OK) {
2419 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2420 				&cache);
2421 		}
2422 		if (status != B_OK)
2423 			return status;
2424 
2425 		cacheLocker.SetTo(cache, true);	// already locked
2426 
2427 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2428 
2429 		page_num_t oldWiredPages = wiredPages;
2430 		wiredPages = 0;
2431 
2432 		// If the source area isn't shared, count the number of wired pages in
2433 		// the cache and reserve as many pages.
2434 		if (!sharedArea) {
2435 			wiredPages = cache->WiredPagesCount();
2436 
2437 			if (wiredPages > oldWiredPages) {
2438 				cacheLocker.Unlock();
2439 				locker.Unlock();
2440 
2441 				if (oldWiredPages > 0)
2442 					vm_page_unreserve_pages(&wiredPagesReservation);
2443 
2444 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2445 					VM_PRIORITY_USER);
2446 
2447 				restart = true;
2448 			}
2449 		} else if (oldWiredPages > 0)
2450 			vm_page_unreserve_pages(&wiredPagesReservation);
2451 	} while (restart);
2452 
2453 	// unreserve pages later
2454 	struct PagesUnreserver {
2455 		PagesUnreserver(vm_page_reservation* reservation)
2456 			:
2457 			fReservation(reservation)
2458 		{
2459 		}
2460 
2461 		~PagesUnreserver()
2462 		{
2463 			if (fReservation != NULL)
2464 				vm_page_unreserve_pages(fReservation);
2465 		}
2466 
2467 	private:
2468 		vm_page_reservation*	fReservation;
2469 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2470 
2471 	if (addressSpec == B_CLONE_ADDRESS) {
2472 		addressSpec = B_EXACT_ADDRESS;
2473 		*_address = (void*)source->Base();
2474 	}
2475 
2476 	// First, create a cache on top of the source area, respectively use the
2477 	// existing one, if this is a shared area.
2478 
2479 	VMArea* target;
2480 	virtual_address_restrictions addressRestrictions = {};
2481 	addressRestrictions.address = *_address;
2482 	addressRestrictions.address_specification = addressSpec;
2483 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2484 		name, source->Size(), source->wiring, protection,
2485 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2486 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2487 		&addressRestrictions, true, &target, _address);
2488 	if (status < B_OK)
2489 		return status;
2490 
2491 	if (sharedArea) {
2492 		// The new area uses the old area's cache, but map_backing_store()
2493 		// hasn't acquired a ref. So we have to do that now.
2494 		cache->AcquireRefLocked();
2495 	}
2496 
2497 	// If the source area is writable, we need to move it one layer up as well
2498 
2499 	if (!sharedArea) {
2500 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2501 			// TODO: do something more useful if this fails!
2502 			if (vm_copy_on_write_area(cache,
2503 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2504 				panic("vm_copy_on_write_area() failed!\n");
2505 			}
2506 		}
2507 	}
2508 
2509 	// we return the ID of the newly created area
2510 	return target->id;
2511 }
2512 
2513 
2514 static status_t
2515 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2516 	bool kernel)
2517 {
2518 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2519 		"%#lx)\n", team, areaID, newProtection));
2520 
2521 	if (!arch_vm_supports_protection(newProtection))
2522 		return B_NOT_SUPPORTED;
2523 
2524 	bool becomesWritable
2525 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2526 
2527 	// lock address spaces and cache
2528 	MultiAddressSpaceLocker locker;
2529 	VMCache* cache;
2530 	VMArea* area;
2531 	status_t status;
2532 	AreaCacheLocker cacheLocker;
2533 	bool isWritable;
2534 
2535 	bool restart;
2536 	do {
2537 		restart = false;
2538 
2539 		locker.Unset();
2540 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2541 		if (status != B_OK)
2542 			return status;
2543 
2544 		cacheLocker.SetTo(cache, true);	// already locked
2545 
2546 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2547 			return B_NOT_ALLOWED;
2548 
2549 		if (area->protection == newProtection)
2550 			return B_OK;
2551 
2552 		if (team != VMAddressSpace::KernelID()
2553 			&& area->address_space->ID() != team) {
2554 			// unless you're the kernel, you are only allowed to set
2555 			// the protection of your own areas
2556 			return B_NOT_ALLOWED;
2557 		}
2558 
2559 		isWritable
2560 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2561 
2562 		// Make sure the area (respectively, if we're going to call
2563 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2564 		// wired ranges.
2565 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2566 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2567 					otherArea = otherArea->cache_next) {
2568 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2569 					restart = true;
2570 					break;
2571 				}
2572 			}
2573 		} else {
2574 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2575 				restart = true;
2576 		}
2577 	} while (restart);
2578 
2579 	bool changePageProtection = true;
2580 	bool changeTopCachePagesOnly = false;
2581 
2582 	if (isWritable && !becomesWritable) {
2583 		// writable -> !writable
2584 
2585 		if (cache->source != NULL && cache->temporary) {
2586 			if (cache->CountWritableAreas(area) == 0) {
2587 				// Since this cache now lives from the pages in its source cache,
2588 				// we can change the cache's commitment to take only those pages
2589 				// into account that really are in this cache.
2590 
2591 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2592 					team == VMAddressSpace::KernelID()
2593 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2594 
2595 				// TODO: we may be able to join with our source cache, if
2596 				// count == 0
2597 			}
2598 		}
2599 
2600 		// If only the writability changes, we can just remap the pages of the
2601 		// top cache, since the pages of lower caches are mapped read-only
2602 		// anyway. That's advantageous only, if the number of pages in the cache
2603 		// is significantly smaller than the number of pages in the area,
2604 		// though.
2605 		if (newProtection
2606 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2607 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2608 			changeTopCachePagesOnly = true;
2609 		}
2610 	} else if (!isWritable && becomesWritable) {
2611 		// !writable -> writable
2612 
2613 		if (!cache->consumers.IsEmpty()) {
2614 			// There are consumers -- we have to insert a new cache. Fortunately
2615 			// vm_copy_on_write_area() does everything that's needed.
2616 			changePageProtection = false;
2617 			status = vm_copy_on_write_area(cache, NULL);
2618 		} else {
2619 			// No consumers, so we don't need to insert a new one.
2620 			if (cache->source != NULL && cache->temporary) {
2621 				// the cache's commitment must contain all possible pages
2622 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2623 					team == VMAddressSpace::KernelID()
2624 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2625 			}
2626 
2627 			if (status == B_OK && cache->source != NULL) {
2628 				// There's a source cache, hence we can't just change all pages'
2629 				// protection or we might allow writing into pages belonging to
2630 				// a lower cache.
2631 				changeTopCachePagesOnly = true;
2632 			}
2633 		}
2634 	} else {
2635 		// we don't have anything special to do in all other cases
2636 	}
2637 
2638 	if (status == B_OK) {
2639 		// remap existing pages in this cache
2640 		if (changePageProtection) {
2641 			VMTranslationMap* map = area->address_space->TranslationMap();
2642 			map->Lock();
2643 
2644 			if (changeTopCachePagesOnly) {
2645 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2646 				page_num_t lastPageOffset
2647 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2648 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2649 						vm_page* page = it.Next();) {
2650 					if (page->cache_offset >= firstPageOffset
2651 						&& page->cache_offset <= lastPageOffset) {
2652 						addr_t address = virtual_page_address(area, page);
2653 						map->ProtectPage(area, address, newProtection);
2654 					}
2655 				}
2656 			} else
2657 				map->ProtectArea(area, newProtection);
2658 
2659 			map->Unlock();
2660 		}
2661 
2662 		area->protection = newProtection;
2663 	}
2664 
2665 	return status;
2666 }
2667 
2668 
2669 status_t
2670 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2671 {
2672 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2673 	if (addressSpace == NULL)
2674 		return B_BAD_TEAM_ID;
2675 
2676 	VMTranslationMap* map = addressSpace->TranslationMap();
2677 
2678 	map->Lock();
2679 	uint32 dummyFlags;
2680 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2681 	map->Unlock();
2682 
2683 	addressSpace->Put();
2684 	return status;
2685 }
2686 
2687 
2688 /*!	The page's cache must be locked.
2689 */
2690 bool
2691 vm_test_map_modification(vm_page* page)
2692 {
2693 	if (page->modified)
2694 		return true;
2695 
2696 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2697 	vm_page_mapping* mapping;
2698 	while ((mapping = iterator.Next()) != NULL) {
2699 		VMArea* area = mapping->area;
2700 		VMTranslationMap* map = area->address_space->TranslationMap();
2701 
2702 		phys_addr_t physicalAddress;
2703 		uint32 flags;
2704 		map->Lock();
2705 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2706 		map->Unlock();
2707 
2708 		if ((flags & PAGE_MODIFIED) != 0)
2709 			return true;
2710 	}
2711 
2712 	return false;
2713 }
2714 
2715 
2716 /*!	The page's cache must be locked.
2717 */
2718 void
2719 vm_clear_map_flags(vm_page* page, uint32 flags)
2720 {
2721 	if ((flags & PAGE_ACCESSED) != 0)
2722 		page->accessed = false;
2723 	if ((flags & PAGE_MODIFIED) != 0)
2724 		page->modified = false;
2725 
2726 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2727 	vm_page_mapping* mapping;
2728 	while ((mapping = iterator.Next()) != NULL) {
2729 		VMArea* area = mapping->area;
2730 		VMTranslationMap* map = area->address_space->TranslationMap();
2731 
2732 		map->Lock();
2733 		map->ClearFlags(virtual_page_address(area, page), flags);
2734 		map->Unlock();
2735 	}
2736 }
2737 
2738 
2739 /*!	Removes all mappings from a page.
2740 	After you've called this function, the page is unmapped from memory and
2741 	the page's \c accessed and \c modified flags have been updated according
2742 	to the state of the mappings.
2743 	The page's cache must be locked.
2744 */
2745 void
2746 vm_remove_all_page_mappings(vm_page* page)
2747 {
2748 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2749 		VMArea* area = mapping->area;
2750 		VMTranslationMap* map = area->address_space->TranslationMap();
2751 		addr_t address = virtual_page_address(area, page);
2752 		map->UnmapPage(area, address, false);
2753 	}
2754 }
2755 
2756 
2757 int32
2758 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2759 {
2760 	int32 count = 0;
2761 
2762 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2763 	vm_page_mapping* mapping;
2764 	while ((mapping = iterator.Next()) != NULL) {
2765 		VMArea* area = mapping->area;
2766 		VMTranslationMap* map = area->address_space->TranslationMap();
2767 
2768 		bool modified;
2769 		if (map->ClearAccessedAndModified(area,
2770 				virtual_page_address(area, page), false, modified)) {
2771 			count++;
2772 		}
2773 
2774 		page->modified |= modified;
2775 	}
2776 
2777 
2778 	if (page->accessed) {
2779 		count++;
2780 		page->accessed = false;
2781 	}
2782 
2783 	return count;
2784 }
2785 
2786 
2787 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2788 	mappings.
2789 	The function iterates through the page mappings and removes them until
2790 	encountering one that has been accessed. From then on it will continue to
2791 	iterate, but only clear the accessed flag of the mapping. The page's
2792 	\c modified bit will be updated accordingly, the \c accessed bit will be
2793 	cleared.
2794 	\return The number of mapping accessed bits encountered, including the
2795 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2796 		of the page have been removed.
2797 */
2798 int32
2799 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2800 {
2801 	ASSERT(page->WiredCount() == 0);
2802 
2803 	if (page->accessed)
2804 		return vm_clear_page_mapping_accessed_flags(page);
2805 
2806 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2807 		VMArea* area = mapping->area;
2808 		VMTranslationMap* map = area->address_space->TranslationMap();
2809 		addr_t address = virtual_page_address(area, page);
2810 		bool modified = false;
2811 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2812 			page->accessed = true;
2813 			page->modified |= modified;
2814 			return vm_clear_page_mapping_accessed_flags(page);
2815 		}
2816 		page->modified |= modified;
2817 	}
2818 
2819 	return 0;
2820 }
2821 
2822 
2823 static int
2824 display_mem(int argc, char** argv)
2825 {
2826 	bool physical = false;
2827 	addr_t copyAddress;
2828 	int32 displayWidth;
2829 	int32 itemSize;
2830 	int32 num = -1;
2831 	addr_t address;
2832 	int i = 1, j;
2833 
2834 	if (argc > 1 && argv[1][0] == '-') {
2835 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2836 			physical = true;
2837 			i++;
2838 		} else
2839 			i = 99;
2840 	}
2841 
2842 	if (argc < i + 1 || argc > i + 2) {
2843 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2844 			"\tdl - 8 bytes\n"
2845 			"\tdw - 4 bytes\n"
2846 			"\tds - 2 bytes\n"
2847 			"\tdb - 1 byte\n"
2848 			"\tstring - a whole string\n"
2849 			"  -p or --physical only allows memory from a single page to be "
2850 			"displayed.\n");
2851 		return 0;
2852 	}
2853 
2854 	address = parse_expression(argv[i]);
2855 
2856 	if (argc > i + 1)
2857 		num = parse_expression(argv[i + 1]);
2858 
2859 	// build the format string
2860 	if (strcmp(argv[0], "db") == 0) {
2861 		itemSize = 1;
2862 		displayWidth = 16;
2863 	} else if (strcmp(argv[0], "ds") == 0) {
2864 		itemSize = 2;
2865 		displayWidth = 8;
2866 	} else if (strcmp(argv[0], "dw") == 0) {
2867 		itemSize = 4;
2868 		displayWidth = 4;
2869 	} else if (strcmp(argv[0], "dl") == 0) {
2870 		itemSize = 8;
2871 		displayWidth = 2;
2872 	} else if (strcmp(argv[0], "string") == 0) {
2873 		itemSize = 1;
2874 		displayWidth = -1;
2875 	} else {
2876 		kprintf("display_mem called in an invalid way!\n");
2877 		return 0;
2878 	}
2879 
2880 	if (num <= 0)
2881 		num = displayWidth;
2882 
2883 	void* physicalPageHandle = NULL;
2884 
2885 	if (physical) {
2886 		int32 offset = address & (B_PAGE_SIZE - 1);
2887 		if (num * itemSize + offset > B_PAGE_SIZE) {
2888 			num = (B_PAGE_SIZE - offset) / itemSize;
2889 			kprintf("NOTE: number of bytes has been cut to page size\n");
2890 		}
2891 
2892 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2893 
2894 		if (vm_get_physical_page_debug(address, &copyAddress,
2895 				&physicalPageHandle) != B_OK) {
2896 			kprintf("getting the hardware page failed.");
2897 			return 0;
2898 		}
2899 
2900 		address += offset;
2901 		copyAddress += offset;
2902 	} else
2903 		copyAddress = address;
2904 
2905 	if (!strcmp(argv[0], "string")) {
2906 		kprintf("%p \"", (char*)copyAddress);
2907 
2908 		// string mode
2909 		for (i = 0; true; i++) {
2910 			char c;
2911 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2912 					!= B_OK
2913 				|| c == '\0') {
2914 				break;
2915 			}
2916 
2917 			if (c == '\n')
2918 				kprintf("\\n");
2919 			else if (c == '\t')
2920 				kprintf("\\t");
2921 			else {
2922 				if (!isprint(c))
2923 					c = '.';
2924 
2925 				kprintf("%c", c);
2926 			}
2927 		}
2928 
2929 		kprintf("\"\n");
2930 	} else {
2931 		// number mode
2932 		for (i = 0; i < num; i++) {
2933 			uint32 value;
2934 
2935 			if ((i % displayWidth) == 0) {
2936 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2937 				if (i != 0)
2938 					kprintf("\n");
2939 
2940 				kprintf("[0x%lx]  ", address + i * itemSize);
2941 
2942 				for (j = 0; j < displayed; j++) {
2943 					char c;
2944 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2945 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2946 						displayed = j;
2947 						break;
2948 					}
2949 					if (!isprint(c))
2950 						c = '.';
2951 
2952 					kprintf("%c", c);
2953 				}
2954 				if (num > displayWidth) {
2955 					// make sure the spacing in the last line is correct
2956 					for (j = displayed; j < displayWidth * itemSize; j++)
2957 						kprintf(" ");
2958 				}
2959 				kprintf("  ");
2960 			}
2961 
2962 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2963 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2964 				kprintf("read fault");
2965 				break;
2966 			}
2967 
2968 			switch (itemSize) {
2969 				case 1:
2970 					kprintf(" %02x", *(uint8*)&value);
2971 					break;
2972 				case 2:
2973 					kprintf(" %04x", *(uint16*)&value);
2974 					break;
2975 				case 4:
2976 					kprintf(" %08lx", *(uint32*)&value);
2977 					break;
2978 				case 8:
2979 					kprintf(" %016Lx", *(uint64*)&value);
2980 					break;
2981 			}
2982 		}
2983 
2984 		kprintf("\n");
2985 	}
2986 
2987 	if (physical) {
2988 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2989 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2990 	}
2991 	return 0;
2992 }
2993 
2994 
2995 static void
2996 dump_cache_tree_recursively(VMCache* cache, int level,
2997 	VMCache* highlightCache)
2998 {
2999 	// print this cache
3000 	for (int i = 0; i < level; i++)
3001 		kprintf("  ");
3002 	if (cache == highlightCache)
3003 		kprintf("%p <--\n", cache);
3004 	else
3005 		kprintf("%p\n", cache);
3006 
3007 	// recursively print its consumers
3008 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3009 			VMCache* consumer = it.Next();) {
3010 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3011 	}
3012 }
3013 
3014 
3015 static int
3016 dump_cache_tree(int argc, char** argv)
3017 {
3018 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3019 		kprintf("usage: %s <address>\n", argv[0]);
3020 		return 0;
3021 	}
3022 
3023 	addr_t address = parse_expression(argv[1]);
3024 	if (address == 0)
3025 		return 0;
3026 
3027 	VMCache* cache = (VMCache*)address;
3028 	VMCache* root = cache;
3029 
3030 	// find the root cache (the transitive source)
3031 	while (root->source != NULL)
3032 		root = root->source;
3033 
3034 	dump_cache_tree_recursively(root, 0, cache);
3035 
3036 	return 0;
3037 }
3038 
3039 
3040 const char*
3041 vm_cache_type_to_string(int32 type)
3042 {
3043 	switch (type) {
3044 		case CACHE_TYPE_RAM:
3045 			return "RAM";
3046 		case CACHE_TYPE_DEVICE:
3047 			return "device";
3048 		case CACHE_TYPE_VNODE:
3049 			return "vnode";
3050 		case CACHE_TYPE_NULL:
3051 			return "null";
3052 
3053 		default:
3054 			return "unknown";
3055 	}
3056 }
3057 
3058 
3059 #if DEBUG_CACHE_LIST
3060 
3061 static void
3062 update_cache_info_recursively(VMCache* cache, cache_info& info)
3063 {
3064 	info.page_count += cache->page_count;
3065 	if (cache->type == CACHE_TYPE_RAM)
3066 		info.committed += cache->committed_size;
3067 
3068 	// recurse
3069 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3070 			VMCache* consumer = it.Next();) {
3071 		update_cache_info_recursively(consumer, info);
3072 	}
3073 }
3074 
3075 
3076 static int
3077 cache_info_compare_page_count(const void* _a, const void* _b)
3078 {
3079 	const cache_info* a = (const cache_info*)_a;
3080 	const cache_info* b = (const cache_info*)_b;
3081 	if (a->page_count == b->page_count)
3082 		return 0;
3083 	return a->page_count < b->page_count ? 1 : -1;
3084 }
3085 
3086 
3087 static int
3088 cache_info_compare_committed(const void* _a, const void* _b)
3089 {
3090 	const cache_info* a = (const cache_info*)_a;
3091 	const cache_info* b = (const cache_info*)_b;
3092 	if (a->committed == b->committed)
3093 		return 0;
3094 	return a->committed < b->committed ? 1 : -1;
3095 }
3096 
3097 
3098 static void
3099 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3100 {
3101 	for (int i = 0; i < level; i++)
3102 		kprintf("  ");
3103 
3104 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
3105 		vm_cache_type_to_string(cache->type), cache->virtual_base,
3106 		cache->virtual_end, cache->page_count);
3107 
3108 	if (level == 0)
3109 		kprintf("/%lu", info.page_count);
3110 
3111 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3112 		kprintf(", committed: %lld", cache->committed_size);
3113 
3114 		if (level == 0)
3115 			kprintf("/%lu", info.committed);
3116 	}
3117 
3118 	// areas
3119 	if (cache->areas != NULL) {
3120 		VMArea* area = cache->areas;
3121 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3122 			area->address_space->ID());
3123 
3124 		while (area->cache_next != NULL) {
3125 			area = area->cache_next;
3126 			kprintf(", %ld", area->id);
3127 		}
3128 	}
3129 
3130 	kputs("\n");
3131 
3132 	// recurse
3133 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3134 			VMCache* consumer = it.Next();) {
3135 		dump_caches_recursively(consumer, info, level + 1);
3136 	}
3137 }
3138 
3139 
3140 static int
3141 dump_caches(int argc, char** argv)
3142 {
3143 	if (sCacheInfoTable == NULL) {
3144 		kprintf("No cache info table!\n");
3145 		return 0;
3146 	}
3147 
3148 	bool sortByPageCount = true;
3149 
3150 	for (int32 i = 1; i < argc; i++) {
3151 		if (strcmp(argv[i], "-c") == 0) {
3152 			sortByPageCount = false;
3153 		} else {
3154 			print_debugger_command_usage(argv[0]);
3155 			return 0;
3156 		}
3157 	}
3158 
3159 	uint32 totalCount = 0;
3160 	uint32 rootCount = 0;
3161 	off_t totalCommitted = 0;
3162 	page_num_t totalPages = 0;
3163 
3164 	VMCache* cache = gDebugCacheList;
3165 	while (cache) {
3166 		totalCount++;
3167 		if (cache->source == NULL) {
3168 			cache_info stackInfo;
3169 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3170 				? sCacheInfoTable[rootCount] : stackInfo;
3171 			rootCount++;
3172 			info.cache = cache;
3173 			info.page_count = 0;
3174 			info.committed = 0;
3175 			update_cache_info_recursively(cache, info);
3176 			totalCommitted += info.committed;
3177 			totalPages += info.page_count;
3178 		}
3179 
3180 		cache = cache->debug_next;
3181 	}
3182 
3183 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3184 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3185 			sortByPageCount
3186 				? &cache_info_compare_page_count
3187 				: &cache_info_compare_committed);
3188 	}
3189 
3190 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3191 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3192 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3193 		"tree...\n\n", totalCount, rootCount,
3194 		sortByPageCount ? "page count" : "committed size");
3195 
3196 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3197 		for (uint32 i = 0; i < rootCount; i++) {
3198 			cache_info& info = sCacheInfoTable[i];
3199 			dump_caches_recursively(info.cache, info, 0);
3200 		}
3201 	} else
3202 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3203 
3204 	return 0;
3205 }
3206 
3207 #endif	// DEBUG_CACHE_LIST
3208 
3209 
3210 static int
3211 dump_cache(int argc, char** argv)
3212 {
3213 	VMCache* cache;
3214 	bool showPages = false;
3215 	int i = 1;
3216 
3217 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3218 		kprintf("usage: %s [-ps] <address>\n"
3219 			"  if -p is specified, all pages are shown, if -s is used\n"
3220 			"  only the cache info is shown respectively.\n", argv[0]);
3221 		return 0;
3222 	}
3223 	while (argv[i][0] == '-') {
3224 		char* arg = argv[i] + 1;
3225 		while (arg[0]) {
3226 			if (arg[0] == 'p')
3227 				showPages = true;
3228 			arg++;
3229 		}
3230 		i++;
3231 	}
3232 	if (argv[i] == NULL) {
3233 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3234 		return 0;
3235 	}
3236 
3237 	addr_t address = parse_expression(argv[i]);
3238 	if (address == 0)
3239 		return 0;
3240 
3241 	cache = (VMCache*)address;
3242 
3243 	cache->Dump(showPages);
3244 
3245 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3246 
3247 	return 0;
3248 }
3249 
3250 
3251 static void
3252 dump_area_struct(VMArea* area, bool mappings)
3253 {
3254 	kprintf("AREA: %p\n", area);
3255 	kprintf("name:\t\t'%s'\n", area->name);
3256 	kprintf("owner:\t\t0x%lx\n", area->address_space->ID());
3257 	kprintf("id:\t\t0x%lx\n", area->id);
3258 	kprintf("base:\t\t0x%lx\n", area->Base());
3259 	kprintf("size:\t\t0x%lx\n", area->Size());
3260 	kprintf("protection:\t0x%lx\n", area->protection);
3261 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3262 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3263 	kprintf("cache:\t\t%p\n", area->cache);
3264 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3265 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3266 	kprintf("cache_next:\t%p\n", area->cache_next);
3267 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3268 
3269 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3270 	if (mappings) {
3271 		kprintf("page mappings:\n");
3272 		while (iterator.HasNext()) {
3273 			vm_page_mapping* mapping = iterator.Next();
3274 			kprintf("  %p", mapping->page);
3275 		}
3276 		kprintf("\n");
3277 	} else {
3278 		uint32 count = 0;
3279 		while (iterator.Next() != NULL) {
3280 			count++;
3281 		}
3282 		kprintf("page mappings:\t%lu\n", count);
3283 	}
3284 }
3285 
3286 
3287 static int
3288 dump_area(int argc, char** argv)
3289 {
3290 	bool mappings = false;
3291 	bool found = false;
3292 	int32 index = 1;
3293 	VMArea* area;
3294 	addr_t num;
3295 
3296 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3297 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3298 			"All areas matching either id/address/name are listed. You can\n"
3299 			"force to check only a specific item by prefixing the specifier\n"
3300 			"with the id/contains/address/name keywords.\n"
3301 			"-m shows the area's mappings as well.\n");
3302 		return 0;
3303 	}
3304 
3305 	if (!strcmp(argv[1], "-m")) {
3306 		mappings = true;
3307 		index++;
3308 	}
3309 
3310 	int32 mode = 0xf;
3311 	if (!strcmp(argv[index], "id"))
3312 		mode = 1;
3313 	else if (!strcmp(argv[index], "contains"))
3314 		mode = 2;
3315 	else if (!strcmp(argv[index], "name"))
3316 		mode = 4;
3317 	else if (!strcmp(argv[index], "address"))
3318 		mode = 0;
3319 	if (mode != 0xf)
3320 		index++;
3321 
3322 	if (index >= argc) {
3323 		kprintf("No area specifier given.\n");
3324 		return 0;
3325 	}
3326 
3327 	num = parse_expression(argv[index]);
3328 
3329 	if (mode == 0) {
3330 		dump_area_struct((struct VMArea*)num, mappings);
3331 	} else {
3332 		// walk through the area list, looking for the arguments as a name
3333 
3334 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3335 		while ((area = it.Next()) != NULL) {
3336 			if (((mode & 4) != 0 && area->name != NULL
3337 					&& !strcmp(argv[index], area->name))
3338 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3339 					|| (((mode & 2) != 0 && area->Base() <= num
3340 						&& area->Base() + area->Size() > num))))) {
3341 				dump_area_struct(area, mappings);
3342 				found = true;
3343 			}
3344 		}
3345 
3346 		if (!found)
3347 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3348 	}
3349 
3350 	return 0;
3351 }
3352 
3353 
3354 static int
3355 dump_area_list(int argc, char** argv)
3356 {
3357 	VMArea* area;
3358 	const char* name = NULL;
3359 	int32 id = 0;
3360 
3361 	if (argc > 1) {
3362 		id = parse_expression(argv[1]);
3363 		if (id == 0)
3364 			name = argv[1];
3365 	}
3366 
3367 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3368 
3369 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3370 	while ((area = it.Next()) != NULL) {
3371 		if ((id != 0 && area->address_space->ID() != id)
3372 			|| (name != NULL && strstr(area->name, name) == NULL))
3373 			continue;
3374 
3375 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
3376 			(void*)area->Base(), (void*)area->Size(), area->protection,
3377 			area->wiring, area->name);
3378 	}
3379 	return 0;
3380 }
3381 
3382 
3383 static int
3384 dump_available_memory(int argc, char** argv)
3385 {
3386 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3387 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3388 	return 0;
3389 }
3390 
3391 
3392 /*!	Deletes all areas and reserved regions in the given address space.
3393 
3394 	The caller must ensure that none of the areas has any wired ranges.
3395 
3396 	\param addressSpace The address space.
3397 	\param deletingAddressSpace \c true, if the address space is in the process
3398 		of being deleted.
3399 */
3400 void
3401 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3402 {
3403 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3404 		addressSpace->ID()));
3405 
3406 	addressSpace->WriteLock();
3407 
3408 	// remove all reserved areas in this address space
3409 	addressSpace->UnreserveAllAddressRanges(0);
3410 
3411 	// delete all the areas in this address space
3412 	while (VMArea* area = addressSpace->FirstArea()) {
3413 		ASSERT(!area->IsWired());
3414 		delete_area(addressSpace, area, deletingAddressSpace);
3415 	}
3416 
3417 	addressSpace->WriteUnlock();
3418 }
3419 
3420 
3421 static area_id
3422 vm_area_for(addr_t address, bool kernel)
3423 {
3424 	team_id team;
3425 	if (IS_USER_ADDRESS(address)) {
3426 		// we try the user team address space, if any
3427 		team = VMAddressSpace::CurrentID();
3428 		if (team < 0)
3429 			return team;
3430 	} else
3431 		team = VMAddressSpace::KernelID();
3432 
3433 	AddressSpaceReadLocker locker(team);
3434 	if (!locker.IsLocked())
3435 		return B_BAD_TEAM_ID;
3436 
3437 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3438 	if (area != NULL) {
3439 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3440 			return B_ERROR;
3441 
3442 		return area->id;
3443 	}
3444 
3445 	return B_ERROR;
3446 }
3447 
3448 
3449 /*!	Frees physical pages that were used during the boot process.
3450 	\a end is inclusive.
3451 */
3452 static void
3453 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3454 {
3455 	// free all physical pages in the specified range
3456 
3457 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3458 		phys_addr_t physicalAddress;
3459 		uint32 flags;
3460 
3461 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3462 			&& (flags & PAGE_PRESENT) != 0) {
3463 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3464 			if (page != NULL && page->State() != PAGE_STATE_FREE
3465 					 && page->State() != PAGE_STATE_CLEAR
3466 					 && page->State() != PAGE_STATE_UNUSED) {
3467 				DEBUG_PAGE_ACCESS_START(page);
3468 				vm_page_set_state(page, PAGE_STATE_FREE);
3469 			}
3470 		}
3471 	}
3472 
3473 	// unmap the memory
3474 	map->Unmap(start, end);
3475 }
3476 
3477 
3478 void
3479 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3480 {
3481 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3482 	addr_t end = start + (size - 1);
3483 	addr_t lastEnd = start;
3484 
3485 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3486 		(void*)start, (void*)end));
3487 
3488 	// The areas are sorted in virtual address space order, so
3489 	// we just have to find the holes between them that fall
3490 	// into the area we should dispose
3491 
3492 	map->Lock();
3493 
3494 	for (VMAddressSpace::AreaIterator it
3495 				= VMAddressSpace::Kernel()->GetAreaIterator();
3496 			VMArea* area = it.Next();) {
3497 		addr_t areaStart = area->Base();
3498 		addr_t areaEnd = areaStart + (area->Size() - 1);
3499 
3500 		if (areaEnd < start)
3501 			continue;
3502 
3503 		if (areaStart > end) {
3504 			// we are done, the area is already beyond of what we have to free
3505 			break;
3506 		}
3507 
3508 		if (areaStart > lastEnd) {
3509 			// this is something we can free
3510 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3511 				(void*)areaStart));
3512 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3513 		}
3514 
3515 		if (areaEnd >= end) {
3516 			lastEnd = areaEnd;
3517 				// no +1 to prevent potential overflow
3518 			break;
3519 		}
3520 
3521 		lastEnd = areaEnd + 1;
3522 	}
3523 
3524 	if (lastEnd < end) {
3525 		// we can also get rid of some space at the end of the area
3526 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3527 			(void*)end));
3528 		unmap_and_free_physical_pages(map, lastEnd, end);
3529 	}
3530 
3531 	map->Unlock();
3532 }
3533 
3534 
3535 static void
3536 create_preloaded_image_areas(struct preloaded_image* image)
3537 {
3538 	char name[B_OS_NAME_LENGTH];
3539 	void* address;
3540 	int32 length;
3541 
3542 	// use file name to create a good area name
3543 	char* fileName = strrchr(image->name, '/');
3544 	if (fileName == NULL)
3545 		fileName = image->name;
3546 	else
3547 		fileName++;
3548 
3549 	length = strlen(fileName);
3550 	// make sure there is enough space for the suffix
3551 	if (length > 25)
3552 		length = 25;
3553 
3554 	memcpy(name, fileName, length);
3555 	strcpy(name + length, "_text");
3556 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3557 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3558 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3559 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3560 		// this will later be remapped read-only/executable by the
3561 		// ELF initialization code
3562 
3563 	strcpy(name + length, "_data");
3564 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3565 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3566 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3567 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3568 }
3569 
3570 
3571 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3572 	Any boot loader resources contained in that arguments must not be accessed
3573 	anymore past this point.
3574 */
3575 void
3576 vm_free_kernel_args(kernel_args* args)
3577 {
3578 	uint32 i;
3579 
3580 	TRACE(("vm_free_kernel_args()\n"));
3581 
3582 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3583 		area_id area = area_for((void*)args->kernel_args_range[i].start);
3584 		if (area >= B_OK)
3585 			delete_area(area);
3586 	}
3587 }
3588 
3589 
3590 static void
3591 allocate_kernel_args(kernel_args* args)
3592 {
3593 	TRACE(("allocate_kernel_args()\n"));
3594 
3595 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3596 		void* address = (void*)args->kernel_args_range[i].start;
3597 
3598 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3599 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3600 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3601 	}
3602 }
3603 
3604 
3605 static void
3606 unreserve_boot_loader_ranges(kernel_args* args)
3607 {
3608 	TRACE(("unreserve_boot_loader_ranges()\n"));
3609 
3610 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3611 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3612 			(void*)args->virtual_allocated_range[i].start,
3613 			args->virtual_allocated_range[i].size);
3614 	}
3615 }
3616 
3617 
3618 static void
3619 reserve_boot_loader_ranges(kernel_args* args)
3620 {
3621 	TRACE(("reserve_boot_loader_ranges()\n"));
3622 
3623 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3624 		void* address = (void*)args->virtual_allocated_range[i].start;
3625 
3626 		// If the address is no kernel address, we just skip it. The
3627 		// architecture specific code has to deal with it.
3628 		if (!IS_KERNEL_ADDRESS(address)) {
3629 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3630 				address, args->virtual_allocated_range[i].size);
3631 			continue;
3632 		}
3633 
3634 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3635 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3636 		if (status < B_OK)
3637 			panic("could not reserve boot loader ranges\n");
3638 	}
3639 }
3640 
3641 
3642 static addr_t
3643 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3644 {
3645 	size = PAGE_ALIGN(size);
3646 
3647 	// find a slot in the virtual allocation addr range
3648 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3649 		// check to see if the space between this one and the last is big enough
3650 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3651 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3652 			+ args->virtual_allocated_range[i - 1].size;
3653 
3654 		addr_t base = alignment > 0
3655 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3656 
3657 		if (base >= KERNEL_BASE && base < rangeStart
3658 				&& rangeStart - base >= size) {
3659 			args->virtual_allocated_range[i - 1].size
3660 				+= base + size - previousRangeEnd;
3661 			return base;
3662 		}
3663 	}
3664 
3665 	// we hadn't found one between allocation ranges. this is ok.
3666 	// see if there's a gap after the last one
3667 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3668 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3669 		+ args->virtual_allocated_range[lastEntryIndex].size;
3670 	addr_t base = alignment > 0
3671 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3672 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3673 		args->virtual_allocated_range[lastEntryIndex].size
3674 			+= base + size - lastRangeEnd;
3675 		return base;
3676 	}
3677 
3678 	// see if there's a gap before the first one
3679 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3680 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3681 		base = rangeStart - size;
3682 		if (alignment > 0)
3683 			base = ROUNDDOWN(base, alignment);
3684 
3685 		if (base >= KERNEL_BASE) {
3686 			args->virtual_allocated_range[0].start = base;
3687 			args->virtual_allocated_range[0].size += rangeStart - base;
3688 			return base;
3689 		}
3690 	}
3691 
3692 	return 0;
3693 }
3694 
3695 
3696 static bool
3697 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3698 {
3699 	// TODO: horrible brute-force method of determining if the page can be
3700 	// allocated
3701 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3702 		if (address >= args->physical_memory_range[i].start
3703 			&& address < args->physical_memory_range[i].start
3704 				+ args->physical_memory_range[i].size)
3705 			return true;
3706 	}
3707 	return false;
3708 }
3709 
3710 
3711 page_num_t
3712 vm_allocate_early_physical_page(kernel_args* args)
3713 {
3714 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3715 		phys_addr_t nextPage;
3716 
3717 		nextPage = args->physical_allocated_range[i].start
3718 			+ args->physical_allocated_range[i].size;
3719 		// see if the page after the next allocated paddr run can be allocated
3720 		if (i + 1 < args->num_physical_allocated_ranges
3721 			&& args->physical_allocated_range[i + 1].size != 0) {
3722 			// see if the next page will collide with the next allocated range
3723 			if (nextPage >= args->physical_allocated_range[i+1].start)
3724 				continue;
3725 		}
3726 		// see if the next physical page fits in the memory block
3727 		if (is_page_in_physical_memory_range(args, nextPage)) {
3728 			// we got one!
3729 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3730 			return nextPage / B_PAGE_SIZE;
3731 		}
3732 	}
3733 
3734 	return 0;
3735 		// could not allocate a block
3736 }
3737 
3738 
3739 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3740 	allocate some pages before the VM is completely up.
3741 */
3742 addr_t
3743 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3744 	uint32 attributes, addr_t alignment)
3745 {
3746 	if (physicalSize > virtualSize)
3747 		physicalSize = virtualSize;
3748 
3749 	// find the vaddr to allocate at
3750 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3751 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3752 
3753 	// map the pages
3754 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3755 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3756 		if (physicalAddress == 0)
3757 			panic("error allocating early page!\n");
3758 
3759 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3760 
3761 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3762 			physicalAddress * B_PAGE_SIZE, attributes,
3763 			&vm_allocate_early_physical_page);
3764 	}
3765 
3766 	return virtualBase;
3767 }
3768 
3769 
3770 /*!	The main entrance point to initialize the VM. */
3771 status_t
3772 vm_init(kernel_args* args)
3773 {
3774 	struct preloaded_image* image;
3775 	void* address;
3776 	status_t err = 0;
3777 	uint32 i;
3778 
3779 	TRACE(("vm_init: entry\n"));
3780 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3781 	err = arch_vm_init(args);
3782 
3783 	// initialize some globals
3784 	vm_page_init_num_pages(args);
3785 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3786 
3787 	slab_init(args);
3788 
3789 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3790 	size_t heapSize = INITIAL_HEAP_SIZE;
3791 	// try to accomodate low memory systems
3792 	while (heapSize > sAvailableMemory / 8)
3793 		heapSize /= 2;
3794 	if (heapSize < 1024 * 1024)
3795 		panic("vm_init: go buy some RAM please.");
3796 
3797 	// map in the new heap and initialize it
3798 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3799 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3800 	TRACE(("heap at 0x%lx\n", heapBase));
3801 	heap_init(heapBase, heapSize);
3802 #endif
3803 
3804 	// initialize the free page list and physical page mapper
3805 	vm_page_init(args);
3806 
3807 	// initialize the cache allocators
3808 	vm_cache_init(args);
3809 
3810 	{
3811 		status_t error = VMAreaHash::Init();
3812 		if (error != B_OK)
3813 			panic("vm_init: error initializing area hash table\n");
3814 	}
3815 
3816 	VMAddressSpace::Init();
3817 	reserve_boot_loader_ranges(args);
3818 
3819 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3820 	heap_init_post_area();
3821 #endif
3822 
3823 	// Do any further initialization that the architecture dependant layers may
3824 	// need now
3825 	arch_vm_translation_map_init_post_area(args);
3826 	arch_vm_init_post_area(args);
3827 	vm_page_init_post_area(args);
3828 	slab_init_post_area();
3829 
3830 	// allocate areas to represent stuff that already exists
3831 
3832 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3833 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3834 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3835 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3836 #endif
3837 
3838 	allocate_kernel_args(args);
3839 
3840 	create_preloaded_image_areas(&args->kernel_image);
3841 
3842 	// allocate areas for preloaded images
3843 	for (image = args->preloaded_images; image != NULL; image = image->next)
3844 		create_preloaded_image_areas(image);
3845 
3846 	// allocate kernel stacks
3847 	for (i = 0; i < args->num_cpus; i++) {
3848 		char name[64];
3849 
3850 		sprintf(name, "idle thread %lu kstack", i + 1);
3851 		address = (void*)args->cpu_kstack[i].start;
3852 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3853 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3854 	}
3855 
3856 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3857 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3858 
3859 #if PARANOID_KERNEL_MALLOC
3860 	vm_block_address_range("uninitialized heap memory",
3861 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3862 #endif
3863 #if PARANOID_KERNEL_FREE
3864 	vm_block_address_range("freed heap memory",
3865 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3866 #endif
3867 
3868 	// create the object cache for the page mappings
3869 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3870 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3871 		NULL, NULL);
3872 	if (gPageMappingsObjectCache == NULL)
3873 		panic("failed to create page mappings object cache");
3874 
3875 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3876 
3877 #if DEBUG_CACHE_LIST
3878 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
3879 		virtual_address_restrictions virtualRestrictions = {};
3880 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
3881 		physical_address_restrictions physicalRestrictions = {};
3882 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
3883 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3884 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
3885 			CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions,
3886 			(void**)&sCacheInfoTable);
3887 	}
3888 #endif	// DEBUG_CACHE_LIST
3889 
3890 	// add some debugger commands
3891 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3892 	add_debugger_command("area", &dump_area,
3893 		"Dump info about a particular area");
3894 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3895 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3896 #if DEBUG_CACHE_LIST
3897 	if (sCacheInfoTable != NULL) {
3898 		add_debugger_command_etc("caches", &dump_caches,
3899 			"List all VMCache trees",
3900 			"[ \"-c\" ]\n"
3901 			"All cache trees are listed sorted in decreasing order by number "
3902 				"of\n"
3903 			"used pages or, if \"-c\" is specified, by size of committed "
3904 				"memory.\n",
3905 			0);
3906 	}
3907 #endif
3908 	add_debugger_command("avail", &dump_available_memory,
3909 		"Dump available memory");
3910 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3911 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3912 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3913 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3914 	add_debugger_command("string", &display_mem, "dump strings");
3915 
3916 	TRACE(("vm_init: exit\n"));
3917 
3918 	vm_cache_init_post_heap();
3919 
3920 	return err;
3921 }
3922 
3923 
3924 status_t
3925 vm_init_post_sem(kernel_args* args)
3926 {
3927 	// This frees all unused boot loader resources and makes its space available
3928 	// again
3929 	arch_vm_init_end(args);
3930 	unreserve_boot_loader_ranges(args);
3931 
3932 	// fill in all of the semaphores that were not allocated before
3933 	// since we're still single threaded and only the kernel address space
3934 	// exists, it isn't that hard to find all of the ones we need to create
3935 
3936 	arch_vm_translation_map_init_post_sem(args);
3937 
3938 	slab_init_post_sem();
3939 
3940 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3941 	heap_init_post_sem();
3942 #endif
3943 
3944 	return B_OK;
3945 }
3946 
3947 
3948 status_t
3949 vm_init_post_thread(kernel_args* args)
3950 {
3951 	vm_page_init_post_thread(args);
3952 	slab_init_post_thread();
3953 	return heap_init_post_thread();
3954 }
3955 
3956 
3957 status_t
3958 vm_init_post_modules(kernel_args* args)
3959 {
3960 	return arch_vm_init_post_modules(args);
3961 }
3962 
3963 
3964 void
3965 permit_page_faults(void)
3966 {
3967 	Thread* thread = thread_get_current_thread();
3968 	if (thread != NULL)
3969 		atomic_add(&thread->page_faults_allowed, 1);
3970 }
3971 
3972 
3973 void
3974 forbid_page_faults(void)
3975 {
3976 	Thread* thread = thread_get_current_thread();
3977 	if (thread != NULL)
3978 		atomic_add(&thread->page_faults_allowed, -1);
3979 }
3980 
3981 
3982 status_t
3983 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3984 	addr_t* newIP)
3985 {
3986 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3987 		faultAddress));
3988 
3989 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
3990 
3991 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
3992 	VMAddressSpace* addressSpace = NULL;
3993 
3994 	status_t status = B_OK;
3995 	*newIP = 0;
3996 	atomic_add((int32*)&sPageFaults, 1);
3997 
3998 	if (IS_KERNEL_ADDRESS(pageAddress)) {
3999 		addressSpace = VMAddressSpace::GetKernel();
4000 	} else if (IS_USER_ADDRESS(pageAddress)) {
4001 		addressSpace = VMAddressSpace::GetCurrent();
4002 		if (addressSpace == NULL) {
4003 			if (!isUser) {
4004 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4005 					"memory!\n");
4006 				status = B_BAD_ADDRESS;
4007 				TPF(PageFaultError(-1,
4008 					VMPageFaultTracing
4009 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4010 			} else {
4011 				// XXX weird state.
4012 				panic("vm_page_fault: non kernel thread accessing user memory "
4013 					"that doesn't exist!\n");
4014 				status = B_BAD_ADDRESS;
4015 			}
4016 		}
4017 	} else {
4018 		// the hit was probably in the 64k DMZ between kernel and user space
4019 		// this keeps a user space thread from passing a buffer that crosses
4020 		// into kernel space
4021 		status = B_BAD_ADDRESS;
4022 		TPF(PageFaultError(-1,
4023 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4024 	}
4025 
4026 	if (status == B_OK) {
4027 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser,
4028 			NULL);
4029 	}
4030 
4031 	if (status < B_OK) {
4032 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4033 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
4034 			strerror(status), address, faultAddress, isWrite, isUser,
4035 			thread_get_current_thread_id());
4036 		if (!isUser) {
4037 			Thread* thread = thread_get_current_thread();
4038 			if (thread != NULL && thread->fault_handler != 0) {
4039 				// this will cause the arch dependant page fault handler to
4040 				// modify the IP on the interrupt frame or whatever to return
4041 				// to this address
4042 				*newIP = thread->fault_handler;
4043 			} else {
4044 				// unhandled page fault in the kernel
4045 				panic("vm_page_fault: unhandled page fault in kernel space at "
4046 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4047 			}
4048 		} else {
4049 #if 1
4050 			addressSpace->ReadLock();
4051 
4052 			// TODO: remove me once we have proper userland debugging support
4053 			// (and tools)
4054 			VMArea* area = addressSpace->LookupArea(faultAddress);
4055 
4056 			Thread* thread = thread_get_current_thread();
4057 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
4058 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
4059 				thread->name, thread->id, thread->team->Name(),
4060 				thread->team->id, isWrite ? "write" : "read", address,
4061 				faultAddress, area ? area->name : "???",
4062 				faultAddress - (area ? area->Base() : 0x0));
4063 
4064 			// We can print a stack trace of the userland thread here.
4065 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4066 // fault and someone is already waiting for a write lock on the same address
4067 // space. This thread will then try to acquire the lock again and will
4068 // be queued after the writer.
4069 #	if 0
4070 			if (area) {
4071 				struct stack_frame {
4072 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4073 						struct stack_frame*	previous;
4074 						void*				return_address;
4075 					#else
4076 						// ...
4077 					#warning writeme
4078 					#endif
4079 				} frame;
4080 #		ifdef __INTEL__
4081 				struct iframe* iframe = i386_get_user_iframe();
4082 				if (iframe == NULL)
4083 					panic("iframe is NULL!");
4084 
4085 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4086 					sizeof(struct stack_frame));
4087 #		elif defined(__POWERPC__)
4088 				struct iframe* iframe = ppc_get_user_iframe();
4089 				if (iframe == NULL)
4090 					panic("iframe is NULL!");
4091 
4092 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4093 					sizeof(struct stack_frame));
4094 #		else
4095 #			warning "vm_page_fault() stack trace won't work"
4096 				status = B_ERROR;
4097 #		endif
4098 
4099 				dprintf("stack trace:\n");
4100 				int32 maxFrames = 50;
4101 				while (status == B_OK && --maxFrames >= 0
4102 						&& frame.return_address != NULL) {
4103 					dprintf("  %p", frame.return_address);
4104 					area = addressSpace->LookupArea(
4105 						(addr_t)frame.return_address);
4106 					if (area) {
4107 						dprintf(" (%s + %#lx)", area->name,
4108 							(addr_t)frame.return_address - area->Base());
4109 					}
4110 					dprintf("\n");
4111 
4112 					status = user_memcpy(&frame, frame.previous,
4113 						sizeof(struct stack_frame));
4114 				}
4115 			}
4116 #	endif	// 0 (stack trace)
4117 
4118 			addressSpace->ReadUnlock();
4119 #endif
4120 
4121 			// TODO: the fault_callback is a temporary solution for vm86
4122 			if (thread->fault_callback == NULL
4123 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4124 				// If the thread has a signal handler for SIGSEGV, we simply
4125 				// send it the signal. Otherwise we notify the user debugger
4126 				// first.
4127 				struct sigaction action;
4128 				if ((sigaction(SIGSEGV, NULL, &action) == 0
4129 						&& action.sa_handler != SIG_DFL
4130 						&& action.sa_handler != SIG_IGN)
4131 					|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4132 						SIGSEGV)) {
4133 					Signal signal(SIGSEGV,
4134 						status == B_PERMISSION_DENIED
4135 							? SEGV_ACCERR : SEGV_MAPERR,
4136 						EFAULT, thread->team->id);
4137 					signal.SetAddress((void*)address);
4138 					send_signal_to_thread(thread, signal, 0);
4139 				}
4140 			}
4141 		}
4142 	}
4143 
4144 	if (addressSpace != NULL)
4145 		addressSpace->Put();
4146 
4147 	return B_HANDLED_INTERRUPT;
4148 }
4149 
4150 
4151 struct PageFaultContext {
4152 	AddressSpaceReadLocker	addressSpaceLocker;
4153 	VMCacheChainLocker		cacheChainLocker;
4154 
4155 	VMTranslationMap*		map;
4156 	VMCache*				topCache;
4157 	off_t					cacheOffset;
4158 	vm_page_reservation		reservation;
4159 	bool					isWrite;
4160 
4161 	// return values
4162 	vm_page*				page;
4163 	bool					restart;
4164 
4165 
4166 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4167 		:
4168 		addressSpaceLocker(addressSpace, true),
4169 		map(addressSpace->TranslationMap()),
4170 		isWrite(isWrite)
4171 	{
4172 	}
4173 
4174 	~PageFaultContext()
4175 	{
4176 		UnlockAll();
4177 		vm_page_unreserve_pages(&reservation);
4178 	}
4179 
4180 	void Prepare(VMCache* topCache, off_t cacheOffset)
4181 	{
4182 		this->topCache = topCache;
4183 		this->cacheOffset = cacheOffset;
4184 		page = NULL;
4185 		restart = false;
4186 
4187 		cacheChainLocker.SetTo(topCache);
4188 	}
4189 
4190 	void UnlockAll(VMCache* exceptCache = NULL)
4191 	{
4192 		topCache = NULL;
4193 		addressSpaceLocker.Unlock();
4194 		cacheChainLocker.Unlock(exceptCache);
4195 	}
4196 };
4197 
4198 
4199 /*!	Gets the page that should be mapped into the area.
4200 	Returns an error code other than \c B_OK, if the page couldn't be found or
4201 	paged in. The locking state of the address space and the caches is undefined
4202 	in that case.
4203 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4204 	had to unlock the address space and all caches and is supposed to be called
4205 	again.
4206 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4207 	found. It is returned in \c context.page. The address space will still be
4208 	locked as well as all caches starting from the top cache to at least the
4209 	cache the page lives in.
4210 */
4211 static status_t
4212 fault_get_page(PageFaultContext& context)
4213 {
4214 	VMCache* cache = context.topCache;
4215 	VMCache* lastCache = NULL;
4216 	vm_page* page = NULL;
4217 
4218 	while (cache != NULL) {
4219 		// We already hold the lock of the cache at this point.
4220 
4221 		lastCache = cache;
4222 
4223 		page = cache->LookupPage(context.cacheOffset);
4224 		if (page != NULL && page->busy) {
4225 			// page must be busy -- wait for it to become unbusy
4226 			context.UnlockAll(cache);
4227 			cache->ReleaseRefLocked();
4228 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4229 
4230 			// restart the whole process
4231 			context.restart = true;
4232 			return B_OK;
4233 		}
4234 
4235 		if (page != NULL)
4236 			break;
4237 
4238 		// The current cache does not contain the page we're looking for.
4239 
4240 		// see if the backing store has it
4241 		if (cache->HasPage(context.cacheOffset)) {
4242 			// insert a fresh page and mark it busy -- we're going to read it in
4243 			page = vm_page_allocate_page(&context.reservation,
4244 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4245 			cache->InsertPage(page, context.cacheOffset);
4246 
4247 			// We need to unlock all caches and the address space while reading
4248 			// the page in. Keep a reference to the cache around.
4249 			cache->AcquireRefLocked();
4250 			context.UnlockAll();
4251 
4252 			// read the page in
4253 			generic_io_vec vec;
4254 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4255 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4256 
4257 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4258 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4259 
4260 			cache->Lock();
4261 
4262 			if (status < B_OK) {
4263 				// on error remove and free the page
4264 				dprintf("reading page from cache %p returned: %s!\n",
4265 					cache, strerror(status));
4266 
4267 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4268 				cache->RemovePage(page);
4269 				vm_page_set_state(page, PAGE_STATE_FREE);
4270 
4271 				cache->ReleaseRefAndUnlock();
4272 				return status;
4273 			}
4274 
4275 			// mark the page unbusy again
4276 			cache->MarkPageUnbusy(page);
4277 
4278 			DEBUG_PAGE_ACCESS_END(page);
4279 
4280 			// Since we needed to unlock everything temporarily, the area
4281 			// situation might have changed. So we need to restart the whole
4282 			// process.
4283 			cache->ReleaseRefAndUnlock();
4284 			context.restart = true;
4285 			return B_OK;
4286 		}
4287 
4288 		cache = context.cacheChainLocker.LockSourceCache();
4289 	}
4290 
4291 	if (page == NULL) {
4292 		// There was no adequate page, determine the cache for a clean one.
4293 		// Read-only pages come in the deepest cache, only the top most cache
4294 		// may have direct write access.
4295 		cache = context.isWrite ? context.topCache : lastCache;
4296 
4297 		// allocate a clean page
4298 		page = vm_page_allocate_page(&context.reservation,
4299 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4300 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4301 			page->physical_page_number));
4302 
4303 		// insert the new page into our cache
4304 		cache->InsertPage(page, context.cacheOffset);
4305 	} else if (page->Cache() != context.topCache && context.isWrite) {
4306 		// We have a page that has the data we want, but in the wrong cache
4307 		// object so we need to copy it and stick it into the top cache.
4308 		vm_page* sourcePage = page;
4309 
4310 		// TODO: If memory is low, it might be a good idea to steal the page
4311 		// from our source cache -- if possible, that is.
4312 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4313 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4314 
4315 		// To not needlessly kill concurrency we unlock all caches but the top
4316 		// one while copying the page. Lacking another mechanism to ensure that
4317 		// the source page doesn't disappear, we mark it busy.
4318 		sourcePage->busy = true;
4319 		context.cacheChainLocker.UnlockKeepRefs(true);
4320 
4321 		// copy the page
4322 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4323 			sourcePage->physical_page_number * B_PAGE_SIZE);
4324 
4325 		context.cacheChainLocker.RelockCaches(true);
4326 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4327 
4328 		// insert the new page into our cache
4329 		context.topCache->InsertPage(page, context.cacheOffset);
4330 	} else
4331 		DEBUG_PAGE_ACCESS_START(page);
4332 
4333 	context.page = page;
4334 	return B_OK;
4335 }
4336 
4337 
4338 /*!	Makes sure the address in the given address space is mapped.
4339 
4340 	\param addressSpace The address space.
4341 	\param originalAddress The address. Doesn't need to be page aligned.
4342 	\param isWrite If \c true the address shall be write-accessible.
4343 	\param isUser If \c true the access is requested by a userland team.
4344 	\param wirePage On success, if non \c NULL, the wired count of the page
4345 		mapped at the given address is incremented and the page is returned
4346 		via this parameter.
4347 	\param wiredRange If given, this wiredRange is ignored when checking whether
4348 		an already mapped page at the virtual address can be unmapped.
4349 	\return \c B_OK on success, another error code otherwise.
4350 */
4351 static status_t
4352 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4353 	bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange)
4354 {
4355 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4356 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4357 
4358 	PageFaultContext context(addressSpace, isWrite);
4359 
4360 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4361 	status_t status = B_OK;
4362 
4363 	addressSpace->IncrementFaultCount();
4364 
4365 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4366 	// the pages upfront makes sure we don't have any cache locked, so that the
4367 	// page daemon/thief can do their job without problems.
4368 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4369 		originalAddress);
4370 	context.addressSpaceLocker.Unlock();
4371 	vm_page_reserve_pages(&context.reservation, reservePages,
4372 		addressSpace == VMAddressSpace::Kernel()
4373 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4374 
4375 	while (true) {
4376 		context.addressSpaceLocker.Lock();
4377 
4378 		// get the area the fault was in
4379 		VMArea* area = addressSpace->LookupArea(address);
4380 		if (area == NULL) {
4381 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4382 				"space\n", originalAddress);
4383 			TPF(PageFaultError(-1,
4384 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4385 			status = B_BAD_ADDRESS;
4386 			break;
4387 		}
4388 
4389 		// check permissions
4390 		uint32 protection = get_area_page_protection(area, address);
4391 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4392 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
4393 				(void*)originalAddress);
4394 			TPF(PageFaultError(area->id,
4395 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4396 			status = B_PERMISSION_DENIED;
4397 			break;
4398 		}
4399 		if (isWrite && (protection
4400 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4401 			dprintf("write access attempted on write-protected area 0x%lx at"
4402 				" %p\n", area->id, (void*)originalAddress);
4403 			TPF(PageFaultError(area->id,
4404 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4405 			status = B_PERMISSION_DENIED;
4406 			break;
4407 		} else if (!isWrite && (protection
4408 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4409 			dprintf("read access attempted on read-protected area 0x%lx at"
4410 				" %p\n", area->id, (void*)originalAddress);
4411 			TPF(PageFaultError(area->id,
4412 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4413 			status = B_PERMISSION_DENIED;
4414 			break;
4415 		}
4416 
4417 		// We have the area, it was a valid access, so let's try to resolve the
4418 		// page fault now.
4419 		// At first, the top most cache from the area is investigated.
4420 
4421 		context.Prepare(vm_area_get_locked_cache(area),
4422 			address - area->Base() + area->cache_offset);
4423 
4424 		// See if this cache has a fault handler -- this will do all the work
4425 		// for us.
4426 		{
4427 			// Note, since the page fault is resolved with interrupts enabled,
4428 			// the fault handler could be called more than once for the same
4429 			// reason -- the store must take this into account.
4430 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4431 			if (status != B_BAD_HANDLER)
4432 				break;
4433 		}
4434 
4435 		// The top most cache has no fault handler, so let's see if the cache or
4436 		// its sources already have the page we're searching for (we're going
4437 		// from top to bottom).
4438 		status = fault_get_page(context);
4439 		if (status != B_OK) {
4440 			TPF(PageFaultError(area->id, status));
4441 			break;
4442 		}
4443 
4444 		if (context.restart)
4445 			continue;
4446 
4447 		// All went fine, all there is left to do is to map the page into the
4448 		// address space.
4449 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4450 			context.page));
4451 
4452 		// If the page doesn't reside in the area's cache, we need to make sure
4453 		// it's mapped in read-only, so that we cannot overwrite someone else's
4454 		// data (copy-on-write)
4455 		uint32 newProtection = protection;
4456 		if (context.page->Cache() != context.topCache && !isWrite)
4457 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4458 
4459 		bool unmapPage = false;
4460 		bool mapPage = true;
4461 
4462 		// check whether there's already a page mapped at the address
4463 		context.map->Lock();
4464 
4465 		phys_addr_t physicalAddress;
4466 		uint32 flags;
4467 		vm_page* mappedPage = NULL;
4468 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4469 			&& (flags & PAGE_PRESENT) != 0
4470 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4471 				!= NULL) {
4472 			// Yep there's already a page. If it's ours, we can simply adjust
4473 			// its protection. Otherwise we have to unmap it.
4474 			if (mappedPage == context.page) {
4475 				context.map->ProtectPage(area, address, newProtection);
4476 					// Note: We assume that ProtectPage() is atomic (i.e.
4477 					// the page isn't temporarily unmapped), otherwise we'd have
4478 					// to make sure it isn't wired.
4479 				mapPage = false;
4480 			} else
4481 				unmapPage = true;
4482 		}
4483 
4484 		context.map->Unlock();
4485 
4486 		if (unmapPage) {
4487 			// If the page is wired, we can't unmap it. Wait until it is unwired
4488 			// again and restart.
4489 			VMAreaUnwiredWaiter waiter;
4490 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4491 					wiredRange)) {
4492 				// unlock everything and wait
4493 				context.UnlockAll();
4494 				waiter.waitEntry.Wait();
4495 				continue;
4496 			}
4497 
4498 			// Note: The mapped page is a page of a lower cache. We are
4499 			// guaranteed to have that cached locked, our new page is a copy of
4500 			// that page, and the page is not busy. The logic for that guarantee
4501 			// is as follows: Since the page is mapped, it must live in the top
4502 			// cache (ruled out above) or any of its lower caches, and there is
4503 			// (was before the new page was inserted) no other page in any
4504 			// cache between the top cache and the page's cache (otherwise that
4505 			// would be mapped instead). That in turn means that our algorithm
4506 			// must have found it and therefore it cannot be busy either.
4507 			DEBUG_PAGE_ACCESS_START(mappedPage);
4508 			unmap_page(area, address);
4509 			DEBUG_PAGE_ACCESS_END(mappedPage);
4510 		}
4511 
4512 		if (mapPage) {
4513 			if (map_page(area, context.page, address, newProtection,
4514 					&context.reservation) != B_OK) {
4515 				// Mapping can only fail, when the page mapping object couldn't
4516 				// be allocated. Save for the missing mapping everything is
4517 				// fine, though. If this was a regular page fault, we'll simply
4518 				// leave and probably fault again. To make sure we'll have more
4519 				// luck then, we ensure that the minimum object reserve is
4520 				// available.
4521 				DEBUG_PAGE_ACCESS_END(context.page);
4522 
4523 				context.UnlockAll();
4524 
4525 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4526 						!= B_OK) {
4527 					// Apparently the situation is serious. Let's get ourselves
4528 					// killed.
4529 					status = B_NO_MEMORY;
4530 				} else if (wirePage != NULL) {
4531 					// The caller expects us to wire the page. Since
4532 					// object_cache_reserve() succeeded, we should now be able
4533 					// to allocate a mapping structure. Restart.
4534 					continue;
4535 				}
4536 
4537 				break;
4538 			}
4539 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4540 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4541 
4542 		// also wire the page, if requested
4543 		if (wirePage != NULL && status == B_OK) {
4544 			increment_page_wired_count(context.page);
4545 			*wirePage = context.page;
4546 		}
4547 
4548 		DEBUG_PAGE_ACCESS_END(context.page);
4549 
4550 		break;
4551 	}
4552 
4553 	return status;
4554 }
4555 
4556 
4557 status_t
4558 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4559 {
4560 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4561 }
4562 
4563 status_t
4564 vm_put_physical_page(addr_t vaddr, void* handle)
4565 {
4566 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4567 }
4568 
4569 
4570 status_t
4571 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4572 	void** _handle)
4573 {
4574 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4575 }
4576 
4577 status_t
4578 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4579 {
4580 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4581 }
4582 
4583 
4584 status_t
4585 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4586 {
4587 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4588 }
4589 
4590 status_t
4591 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4592 {
4593 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4594 }
4595 
4596 
4597 void
4598 vm_get_info(system_memory_info* info)
4599 {
4600 	swap_get_info(info);
4601 
4602 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4603 	info->page_faults = sPageFaults;
4604 
4605 	MutexLocker locker(sAvailableMemoryLock);
4606 	info->free_memory = sAvailableMemory;
4607 	info->needed_memory = sNeededMemory;
4608 }
4609 
4610 
4611 uint32
4612 vm_num_page_faults(void)
4613 {
4614 	return sPageFaults;
4615 }
4616 
4617 
4618 off_t
4619 vm_available_memory(void)
4620 {
4621 	MutexLocker locker(sAvailableMemoryLock);
4622 	return sAvailableMemory;
4623 }
4624 
4625 
4626 off_t
4627 vm_available_not_needed_memory(void)
4628 {
4629 	MutexLocker locker(sAvailableMemoryLock);
4630 	return sAvailableMemory - sNeededMemory;
4631 }
4632 
4633 
4634 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4635 	debugger.
4636 */
4637 off_t
4638 vm_available_not_needed_memory_debug(void)
4639 {
4640 	return sAvailableMemory - sNeededMemory;
4641 }
4642 
4643 
4644 size_t
4645 vm_kernel_address_space_left(void)
4646 {
4647 	return VMAddressSpace::Kernel()->FreeSpace();
4648 }
4649 
4650 
4651 void
4652 vm_unreserve_memory(size_t amount)
4653 {
4654 	mutex_lock(&sAvailableMemoryLock);
4655 
4656 	sAvailableMemory += amount;
4657 
4658 	mutex_unlock(&sAvailableMemoryLock);
4659 }
4660 
4661 
4662 status_t
4663 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4664 {
4665 	size_t reserve = kMemoryReserveForPriority[priority];
4666 
4667 	MutexLocker locker(sAvailableMemoryLock);
4668 
4669 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4670 
4671 	if (sAvailableMemory >= amount + reserve) {
4672 		sAvailableMemory -= amount;
4673 		return B_OK;
4674 	}
4675 
4676 	if (timeout <= 0)
4677 		return B_NO_MEMORY;
4678 
4679 	// turn timeout into an absolute timeout
4680 	timeout += system_time();
4681 
4682 	// loop until we've got the memory or the timeout occurs
4683 	do {
4684 		sNeededMemory += amount;
4685 
4686 		// call the low resource manager
4687 		locker.Unlock();
4688 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4689 			B_ABSOLUTE_TIMEOUT, timeout);
4690 		locker.Lock();
4691 
4692 		sNeededMemory -= amount;
4693 
4694 		if (sAvailableMemory >= amount + reserve) {
4695 			sAvailableMemory -= amount;
4696 			return B_OK;
4697 		}
4698 	} while (timeout > system_time());
4699 
4700 	return B_NO_MEMORY;
4701 }
4702 
4703 
4704 status_t
4705 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4706 {
4707 	// NOTE: The caller is responsible for synchronizing calls to this function!
4708 
4709 	AddressSpaceReadLocker locker;
4710 	VMArea* area;
4711 	status_t status = locker.SetFromArea(id, area);
4712 	if (status != B_OK)
4713 		return status;
4714 
4715 	// nothing to do, if the type doesn't change
4716 	uint32 oldType = area->MemoryType();
4717 	if (type == oldType)
4718 		return B_OK;
4719 
4720 	// set the memory type of the area and the mapped pages
4721 	VMTranslationMap* map = area->address_space->TranslationMap();
4722 	map->Lock();
4723 	area->SetMemoryType(type);
4724 	map->ProtectArea(area, area->protection);
4725 	map->Unlock();
4726 
4727 	// set the physical memory type
4728 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4729 	if (error != B_OK) {
4730 		// reset the memory type of the area and the mapped pages
4731 		map->Lock();
4732 		area->SetMemoryType(oldType);
4733 		map->ProtectArea(area, area->protection);
4734 		map->Unlock();
4735 		return error;
4736 	}
4737 
4738 	return B_OK;
4739 
4740 }
4741 
4742 
4743 /*!	This function enforces some protection properties:
4744 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4745 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4746 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4747 	   and B_KERNEL_WRITE_AREA.
4748 */
4749 static void
4750 fix_protection(uint32* protection)
4751 {
4752 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4753 		if ((*protection & B_USER_PROTECTION) == 0
4754 			|| (*protection & B_WRITE_AREA) != 0)
4755 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4756 		else
4757 			*protection |= B_KERNEL_READ_AREA;
4758 	}
4759 }
4760 
4761 
4762 static void
4763 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4764 {
4765 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4766 	info->area = area->id;
4767 	info->address = (void*)area->Base();
4768 	info->size = area->Size();
4769 	info->protection = area->protection;
4770 	info->lock = B_FULL_LOCK;
4771 	info->team = area->address_space->ID();
4772 	info->copy_count = 0;
4773 	info->in_count = 0;
4774 	info->out_count = 0;
4775 		// TODO: retrieve real values here!
4776 
4777 	VMCache* cache = vm_area_get_locked_cache(area);
4778 
4779 	// Note, this is a simplification; the cache could be larger than this area
4780 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4781 
4782 	vm_area_put_locked_cache(cache);
4783 }
4784 
4785 
4786 static status_t
4787 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4788 {
4789 	// is newSize a multiple of B_PAGE_SIZE?
4790 	if (newSize & (B_PAGE_SIZE - 1))
4791 		return B_BAD_VALUE;
4792 
4793 	// lock all affected address spaces and the cache
4794 	VMArea* area;
4795 	VMCache* cache;
4796 
4797 	MultiAddressSpaceLocker locker;
4798 	AreaCacheLocker cacheLocker;
4799 
4800 	status_t status;
4801 	size_t oldSize;
4802 	bool anyKernelArea;
4803 	bool restart;
4804 
4805 	do {
4806 		anyKernelArea = false;
4807 		restart = false;
4808 
4809 		locker.Unset();
4810 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4811 		if (status != B_OK)
4812 			return status;
4813 		cacheLocker.SetTo(cache, true);	// already locked
4814 
4815 		// enforce restrictions
4816 		if (!kernel) {
4817 			if ((area->protection & B_KERNEL_AREA) != 0)
4818 				return B_NOT_ALLOWED;
4819 			// TODO: Enforce all restrictions (team, etc.)!
4820 		}
4821 
4822 		oldSize = area->Size();
4823 		if (newSize == oldSize)
4824 			return B_OK;
4825 
4826 		if (cache->type != CACHE_TYPE_RAM)
4827 			return B_NOT_ALLOWED;
4828 
4829 		if (oldSize < newSize) {
4830 			// We need to check if all areas of this cache can be resized.
4831 			for (VMArea* current = cache->areas; current != NULL;
4832 					current = current->cache_next) {
4833 				if (!current->address_space->CanResizeArea(current, newSize))
4834 					return B_ERROR;
4835 				anyKernelArea
4836 					|= current->address_space == VMAddressSpace::Kernel();
4837 			}
4838 		} else {
4839 			// We're shrinking the areas, so we must make sure the affected
4840 			// ranges are not wired.
4841 			for (VMArea* current = cache->areas; current != NULL;
4842 					current = current->cache_next) {
4843 				anyKernelArea
4844 					|= current->address_space == VMAddressSpace::Kernel();
4845 
4846 				if (wait_if_area_range_is_wired(current,
4847 						current->Base() + newSize, oldSize - newSize, &locker,
4848 						&cacheLocker)) {
4849 					restart = true;
4850 					break;
4851 				}
4852 			}
4853 		}
4854 	} while (restart);
4855 
4856 	// Okay, looks good so far, so let's do it
4857 
4858 	int priority = kernel && anyKernelArea
4859 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4860 	uint32 allocationFlags = kernel && anyKernelArea
4861 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4862 
4863 	if (oldSize < newSize) {
4864 		// Growing the cache can fail, so we do it first.
4865 		status = cache->Resize(cache->virtual_base + newSize, priority);
4866 		if (status != B_OK)
4867 			return status;
4868 	}
4869 
4870 	for (VMArea* current = cache->areas; current != NULL;
4871 			current = current->cache_next) {
4872 		status = current->address_space->ResizeArea(current, newSize,
4873 			allocationFlags);
4874 		if (status != B_OK)
4875 			break;
4876 
4877 		// We also need to unmap all pages beyond the new size, if the area has
4878 		// shrunk
4879 		if (newSize < oldSize) {
4880 			VMCacheChainLocker cacheChainLocker(cache);
4881 			cacheChainLocker.LockAllSourceCaches();
4882 
4883 			unmap_pages(current, current->Base() + newSize,
4884 				oldSize - newSize);
4885 
4886 			cacheChainLocker.Unlock(cache);
4887 		}
4888 	}
4889 
4890 	if (status == B_OK) {
4891 		// Shrink or grow individual page protections if in use.
4892 		if (area->page_protections != NULL) {
4893 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
4894 			uint8* newProtections
4895 				= (uint8*)realloc(area->page_protections, bytes);
4896 			if (newProtections == NULL)
4897 				status = B_NO_MEMORY;
4898 			else {
4899 				area->page_protections = newProtections;
4900 
4901 				if (oldSize < newSize) {
4902 					// init the additional page protections to that of the area
4903 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
4904 					uint32 areaProtection = area->protection
4905 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4906 					memset(area->page_protections + offset,
4907 						areaProtection | (areaProtection << 4), bytes - offset);
4908 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4909 						uint8& entry = area->page_protections[offset - 1];
4910 						entry = (entry & 0x0f) | (areaProtection << 4);
4911 					}
4912 				}
4913 			}
4914 		}
4915 	}
4916 
4917 	// shrinking the cache can't fail, so we do it now
4918 	if (status == B_OK && newSize < oldSize)
4919 		status = cache->Resize(cache->virtual_base + newSize, priority);
4920 
4921 	if (status != B_OK) {
4922 		// Something failed -- resize the areas back to their original size.
4923 		// This can fail, too, in which case we're seriously screwed.
4924 		for (VMArea* current = cache->areas; current != NULL;
4925 				current = current->cache_next) {
4926 			if (current->address_space->ResizeArea(current, oldSize,
4927 					allocationFlags) != B_OK) {
4928 				panic("vm_resize_area(): Failed and not being able to restore "
4929 					"original state.");
4930 			}
4931 		}
4932 
4933 		cache->Resize(cache->virtual_base + oldSize, priority);
4934 	}
4935 
4936 	// TODO: we must honour the lock restrictions of this area
4937 	return status;
4938 }
4939 
4940 
4941 status_t
4942 vm_memset_physical(phys_addr_t address, int value, size_t length)
4943 {
4944 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4945 }
4946 
4947 
4948 status_t
4949 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
4950 {
4951 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4952 }
4953 
4954 
4955 status_t
4956 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
4957 	bool user)
4958 {
4959 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4960 }
4961 
4962 
4963 void
4964 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
4965 {
4966 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4967 }
4968 
4969 
4970 /*!	Copies a range of memory directly from/to a page that might not be mapped
4971 	at the moment.
4972 
4973 	For \a unsafeMemory the current mapping (if any is ignored). The function
4974 	walks through the respective area's cache chain to find the physical page
4975 	and copies from/to it directly.
4976 	The memory range starting at \a unsafeMemory with a length of \a size bytes
4977 	must not cross a page boundary.
4978 
4979 	\param teamID The team ID identifying the address space \a unsafeMemory is
4980 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
4981 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
4982 		is passed, the address space of the thread returned by
4983 		debug_get_debugged_thread() is used.
4984 	\param unsafeMemory The start of the unsafe memory range to be copied
4985 		from/to.
4986 	\param buffer A safely accessible kernel buffer to be copied from/to.
4987 	\param size The number of bytes to be copied.
4988 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
4989 		\a unsafeMemory, the other way around otherwise.
4990 */
4991 status_t
4992 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
4993 	size_t size, bool copyToUnsafe)
4994 {
4995 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
4996 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
4997 		return B_BAD_VALUE;
4998 	}
4999 
5000 	// get the address space for the debugged thread
5001 	VMAddressSpace* addressSpace;
5002 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5003 		addressSpace = VMAddressSpace::Kernel();
5004 	} else if (teamID == B_CURRENT_TEAM) {
5005 		Thread* thread = debug_get_debugged_thread();
5006 		if (thread == NULL || thread->team == NULL)
5007 			return B_BAD_ADDRESS;
5008 
5009 		addressSpace = thread->team->address_space;
5010 	} else
5011 		addressSpace = VMAddressSpace::DebugGet(teamID);
5012 
5013 	if (addressSpace == NULL)
5014 		return B_BAD_ADDRESS;
5015 
5016 	// get the area
5017 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5018 	if (area == NULL)
5019 		return B_BAD_ADDRESS;
5020 
5021 	// search the page
5022 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5023 		+ area->cache_offset;
5024 	VMCache* cache = area->cache;
5025 	vm_page* page = NULL;
5026 	while (cache != NULL) {
5027 		page = cache->DebugLookupPage(cacheOffset);
5028 		if (page != NULL)
5029 			break;
5030 
5031 		// Page not found in this cache -- if it is paged out, we must not try
5032 		// to get it from lower caches.
5033 		if (cache->DebugHasPage(cacheOffset))
5034 			break;
5035 
5036 		cache = cache->source;
5037 	}
5038 
5039 	if (page == NULL)
5040 		return B_UNSUPPORTED;
5041 
5042 	// copy from/to physical memory
5043 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5044 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5045 
5046 	if (copyToUnsafe) {
5047 		if (page->Cache() != area->cache)
5048 			return B_UNSUPPORTED;
5049 
5050 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5051 	}
5052 
5053 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5054 }
5055 
5056 
5057 //	#pragma mark - kernel public API
5058 
5059 
5060 status_t
5061 user_memcpy(void* to, const void* from, size_t size)
5062 {
5063 	// don't allow address overflows
5064 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5065 		return B_BAD_ADDRESS;
5066 
5067 	if (arch_cpu_user_memcpy(to, from, size,
5068 			&thread_get_current_thread()->fault_handler) < B_OK)
5069 		return B_BAD_ADDRESS;
5070 
5071 	return B_OK;
5072 }
5073 
5074 
5075 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5076 	the string in \a to, NULL-terminating the result.
5077 
5078 	\param to Pointer to the destination C-string.
5079 	\param from Pointer to the source C-string.
5080 	\param size Size in bytes of the string buffer pointed to by \a to.
5081 
5082 	\return strlen(\a from).
5083 */
5084 ssize_t
5085 user_strlcpy(char* to, const char* from, size_t size)
5086 {
5087 	if (to == NULL && size != 0)
5088 		return B_BAD_VALUE;
5089 	if (from == NULL)
5090 		return B_BAD_ADDRESS;
5091 
5092 	// limit size to avoid address overflows
5093 	size_t maxSize = std::min(size,
5094 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5095 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5096 		// the source address might still overflow.
5097 
5098 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
5099 		&thread_get_current_thread()->fault_handler);
5100 
5101 	// If we hit the address overflow boundary, fail.
5102 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
5103 		return B_BAD_ADDRESS;
5104 
5105 	return result;
5106 }
5107 
5108 
5109 status_t
5110 user_memset(void* s, char c, size_t count)
5111 {
5112 	// don't allow address overflows
5113 	if ((addr_t)s + count < (addr_t)s)
5114 		return B_BAD_ADDRESS;
5115 
5116 	if (arch_cpu_user_memset(s, c, count,
5117 			&thread_get_current_thread()->fault_handler) < B_OK)
5118 		return B_BAD_ADDRESS;
5119 
5120 	return B_OK;
5121 }
5122 
5123 
5124 /*!	Wires a single page at the given address.
5125 
5126 	\param team The team whose address space the address belongs to. Supports
5127 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5128 		parameter is ignored.
5129 	\param address address The virtual address to wire down. Does not need to
5130 		be page aligned.
5131 	\param writable If \c true the page shall be writable.
5132 	\param info On success the info is filled in, among other things
5133 		containing the physical address the given virtual one translates to.
5134 	\return \c B_OK, when the page could be wired, another error code otherwise.
5135 */
5136 status_t
5137 vm_wire_page(team_id team, addr_t address, bool writable,
5138 	VMPageWiringInfo* info)
5139 {
5140 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5141 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5142 
5143 	// compute the page protection that is required
5144 	bool isUser = IS_USER_ADDRESS(address);
5145 	uint32 requiredProtection = PAGE_PRESENT
5146 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5147 	if (writable)
5148 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5149 
5150 	// get and read lock the address space
5151 	VMAddressSpace* addressSpace = NULL;
5152 	if (isUser) {
5153 		if (team == B_CURRENT_TEAM)
5154 			addressSpace = VMAddressSpace::GetCurrent();
5155 		else
5156 			addressSpace = VMAddressSpace::Get(team);
5157 	} else
5158 		addressSpace = VMAddressSpace::GetKernel();
5159 	if (addressSpace == NULL)
5160 		return B_ERROR;
5161 
5162 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5163 
5164 	VMTranslationMap* map = addressSpace->TranslationMap();
5165 	status_t error = B_OK;
5166 
5167 	// get the area
5168 	VMArea* area = addressSpace->LookupArea(pageAddress);
5169 	if (area == NULL) {
5170 		addressSpace->Put();
5171 		return B_BAD_ADDRESS;
5172 	}
5173 
5174 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5175 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5176 
5177 	// mark the area range wired
5178 	area->Wire(&info->range);
5179 
5180 	// Lock the area's cache chain and the translation map. Needed to look
5181 	// up the page and play with its wired count.
5182 	cacheChainLocker.LockAllSourceCaches();
5183 	map->Lock();
5184 
5185 	phys_addr_t physicalAddress;
5186 	uint32 flags;
5187 	vm_page* page;
5188 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5189 		&& (flags & requiredProtection) == requiredProtection
5190 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5191 			!= NULL) {
5192 		// Already mapped with the correct permissions -- just increment
5193 		// the page's wired count.
5194 		increment_page_wired_count(page);
5195 
5196 		map->Unlock();
5197 		cacheChainLocker.Unlock();
5198 		addressSpaceLocker.Unlock();
5199 	} else {
5200 		// Let vm_soft_fault() map the page for us, if possible. We need
5201 		// to fully unlock to avoid deadlocks. Since we have already
5202 		// wired the area itself, nothing disturbing will happen with it
5203 		// in the meantime.
5204 		map->Unlock();
5205 		cacheChainLocker.Unlock();
5206 		addressSpaceLocker.Unlock();
5207 
5208 		error = vm_soft_fault(addressSpace, pageAddress, writable, isUser,
5209 			&page, &info->range);
5210 
5211 		if (error != B_OK) {
5212 			// The page could not be mapped -- clean up.
5213 			VMCache* cache = vm_area_get_locked_cache(area);
5214 			area->Unwire(&info->range);
5215 			cache->ReleaseRefAndUnlock();
5216 			addressSpace->Put();
5217 			return error;
5218 		}
5219 	}
5220 
5221 	info->physicalAddress
5222 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5223 			+ address % B_PAGE_SIZE;
5224 	info->page = page;
5225 
5226 	return B_OK;
5227 }
5228 
5229 
5230 /*!	Unwires a single page previously wired via vm_wire_page().
5231 
5232 	\param info The same object passed to vm_wire_page() before.
5233 */
5234 void
5235 vm_unwire_page(VMPageWiringInfo* info)
5236 {
5237 	// lock the address space
5238 	VMArea* area = info->range.area;
5239 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5240 		// takes over our reference
5241 
5242 	// lock the top cache
5243 	VMCache* cache = vm_area_get_locked_cache(area);
5244 	VMCacheChainLocker cacheChainLocker(cache);
5245 
5246 	if (info->page->Cache() != cache) {
5247 		// The page is not in the top cache, so we lock the whole cache chain
5248 		// before touching the page's wired count.
5249 		cacheChainLocker.LockAllSourceCaches();
5250 	}
5251 
5252 	decrement_page_wired_count(info->page);
5253 
5254 	// remove the wired range from the range
5255 	area->Unwire(&info->range);
5256 
5257 	cacheChainLocker.Unlock();
5258 }
5259 
5260 
5261 /*!	Wires down the given address range in the specified team's address space.
5262 
5263 	If successful the function
5264 	- acquires a reference to the specified team's address space,
5265 	- adds respective wired ranges to all areas that intersect with the given
5266 	  address range,
5267 	- makes sure all pages in the given address range are mapped with the
5268 	  requested access permissions and increments their wired count.
5269 
5270 	It fails, when \a team doesn't specify a valid address space, when any part
5271 	of the specified address range is not covered by areas, when the concerned
5272 	areas don't allow mapping with the requested permissions, or when mapping
5273 	failed for another reason.
5274 
5275 	When successful the call must be balanced by a unlock_memory_etc() call with
5276 	the exact same parameters.
5277 
5278 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5279 		supported.
5280 	\param address The start of the address range to be wired.
5281 	\param numBytes The size of the address range to be wired.
5282 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5283 		requests that the range must be wired writable ("read from device
5284 		into memory").
5285 	\return \c B_OK on success, another error code otherwise.
5286 */
5287 status_t
5288 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5289 {
5290 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5291 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5292 
5293 	// compute the page protection that is required
5294 	bool isUser = IS_USER_ADDRESS(address);
5295 	bool writable = (flags & B_READ_DEVICE) == 0;
5296 	uint32 requiredProtection = PAGE_PRESENT
5297 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5298 	if (writable)
5299 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5300 
5301 	uint32 mallocFlags = isUser
5302 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5303 
5304 	// get and read lock the address space
5305 	VMAddressSpace* addressSpace = NULL;
5306 	if (isUser) {
5307 		if (team == B_CURRENT_TEAM)
5308 			addressSpace = VMAddressSpace::GetCurrent();
5309 		else
5310 			addressSpace = VMAddressSpace::Get(team);
5311 	} else
5312 		addressSpace = VMAddressSpace::GetKernel();
5313 	if (addressSpace == NULL)
5314 		return B_ERROR;
5315 
5316 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5317 
5318 	VMTranslationMap* map = addressSpace->TranslationMap();
5319 	status_t error = B_OK;
5320 
5321 	// iterate through all concerned areas
5322 	addr_t nextAddress = lockBaseAddress;
5323 	while (nextAddress != lockEndAddress) {
5324 		// get the next area
5325 		VMArea* area = addressSpace->LookupArea(nextAddress);
5326 		if (area == NULL) {
5327 			error = B_BAD_ADDRESS;
5328 			break;
5329 		}
5330 
5331 		addr_t areaStart = nextAddress;
5332 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5333 
5334 		// allocate the wired range (do that before locking the cache to avoid
5335 		// deadlocks)
5336 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5337 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5338 		if (range == NULL) {
5339 			error = B_NO_MEMORY;
5340 			break;
5341 		}
5342 
5343 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5344 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5345 
5346 		// mark the area range wired
5347 		area->Wire(range);
5348 
5349 		// Depending on the area cache type and the wiring, we may not need to
5350 		// look at the individual pages.
5351 		if (area->cache_type == CACHE_TYPE_NULL
5352 			|| area->cache_type == CACHE_TYPE_DEVICE
5353 			|| area->wiring == B_FULL_LOCK
5354 			|| area->wiring == B_CONTIGUOUS) {
5355 			nextAddress = areaEnd;
5356 			continue;
5357 		}
5358 
5359 		// Lock the area's cache chain and the translation map. Needed to look
5360 		// up pages and play with their wired count.
5361 		cacheChainLocker.LockAllSourceCaches();
5362 		map->Lock();
5363 
5364 		// iterate through the pages and wire them
5365 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5366 			phys_addr_t physicalAddress;
5367 			uint32 flags;
5368 
5369 			vm_page* page;
5370 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5371 				&& (flags & requiredProtection) == requiredProtection
5372 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5373 					!= NULL) {
5374 				// Already mapped with the correct permissions -- just increment
5375 				// the page's wired count.
5376 				increment_page_wired_count(page);
5377 			} else {
5378 				// Let vm_soft_fault() map the page for us, if possible. We need
5379 				// to fully unlock to avoid deadlocks. Since we have already
5380 				// wired the area itself, nothing disturbing will happen with it
5381 				// in the meantime.
5382 				map->Unlock();
5383 				cacheChainLocker.Unlock();
5384 				addressSpaceLocker.Unlock();
5385 
5386 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5387 					isUser, &page, range);
5388 
5389 				addressSpaceLocker.Lock();
5390 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5391 				cacheChainLocker.LockAllSourceCaches();
5392 				map->Lock();
5393 			}
5394 
5395 			if (error != B_OK)
5396 				break;
5397 		}
5398 
5399 		map->Unlock();
5400 
5401 		if (error == B_OK) {
5402 			cacheChainLocker.Unlock();
5403 		} else {
5404 			// An error occurred, so abort right here. If the current address
5405 			// is the first in this area, unwire the area, since we won't get
5406 			// to it when reverting what we've done so far.
5407 			if (nextAddress == areaStart) {
5408 				area->Unwire(range);
5409 				cacheChainLocker.Unlock();
5410 				range->~VMAreaWiredRange();
5411 				free_etc(range, mallocFlags);
5412 			} else
5413 				cacheChainLocker.Unlock();
5414 
5415 			break;
5416 		}
5417 	}
5418 
5419 	if (error != B_OK) {
5420 		// An error occurred, so unwire all that we've already wired. Note that
5421 		// even if not a single page was wired, unlock_memory_etc() is called
5422 		// to put the address space reference.
5423 		addressSpaceLocker.Unlock();
5424 		unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress,
5425 			flags);
5426 	}
5427 
5428 	return error;
5429 }
5430 
5431 
5432 status_t
5433 lock_memory(void* address, size_t numBytes, uint32 flags)
5434 {
5435 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5436 }
5437 
5438 
5439 /*!	Unwires an address range previously wired with lock_memory_etc().
5440 
5441 	Note that a call to this function must balance a previous lock_memory_etc()
5442 	call with exactly the same parameters.
5443 */
5444 status_t
5445 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5446 {
5447 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5448 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5449 
5450 	// compute the page protection that is required
5451 	bool isUser = IS_USER_ADDRESS(address);
5452 	bool writable = (flags & B_READ_DEVICE) == 0;
5453 	uint32 requiredProtection = PAGE_PRESENT
5454 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5455 	if (writable)
5456 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5457 
5458 	uint32 mallocFlags = isUser
5459 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5460 
5461 	// get and read lock the address space
5462 	VMAddressSpace* addressSpace = NULL;
5463 	if (isUser) {
5464 		if (team == B_CURRENT_TEAM)
5465 			addressSpace = VMAddressSpace::GetCurrent();
5466 		else
5467 			addressSpace = VMAddressSpace::Get(team);
5468 	} else
5469 		addressSpace = VMAddressSpace::GetKernel();
5470 	if (addressSpace == NULL)
5471 		return B_ERROR;
5472 
5473 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5474 
5475 	VMTranslationMap* map = addressSpace->TranslationMap();
5476 	status_t error = B_OK;
5477 
5478 	// iterate through all concerned areas
5479 	addr_t nextAddress = lockBaseAddress;
5480 	while (nextAddress != lockEndAddress) {
5481 		// get the next area
5482 		VMArea* area = addressSpace->LookupArea(nextAddress);
5483 		if (area == NULL) {
5484 			error = B_BAD_ADDRESS;
5485 			break;
5486 		}
5487 
5488 		addr_t areaStart = nextAddress;
5489 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5490 
5491 		// Lock the area's top cache. This is a requirement for
5492 		// VMArea::Unwire().
5493 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5494 
5495 		// Depending on the area cache type and the wiring, we may not need to
5496 		// look at the individual pages.
5497 		if (area->cache_type == CACHE_TYPE_NULL
5498 			|| area->cache_type == CACHE_TYPE_DEVICE
5499 			|| area->wiring == B_FULL_LOCK
5500 			|| area->wiring == B_CONTIGUOUS) {
5501 			// unwire the range (to avoid deadlocks we delete the range after
5502 			// unlocking the cache)
5503 			nextAddress = areaEnd;
5504 			VMAreaWiredRange* range = area->Unwire(areaStart,
5505 				areaEnd - areaStart, writable);
5506 			cacheChainLocker.Unlock();
5507 			if (range != NULL) {
5508 				range->~VMAreaWiredRange();
5509 				free_etc(range, mallocFlags);
5510 			}
5511 			continue;
5512 		}
5513 
5514 		// Lock the area's cache chain and the translation map. Needed to look
5515 		// up pages and play with their wired count.
5516 		cacheChainLocker.LockAllSourceCaches();
5517 		map->Lock();
5518 
5519 		// iterate through the pages and unwire them
5520 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5521 			phys_addr_t physicalAddress;
5522 			uint32 flags;
5523 
5524 			vm_page* page;
5525 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5526 				&& (flags & PAGE_PRESENT) != 0
5527 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5528 					!= NULL) {
5529 				// Already mapped with the correct permissions -- just increment
5530 				// the page's wired count.
5531 				decrement_page_wired_count(page);
5532 			} else {
5533 				panic("unlock_memory_etc(): Failed to unwire page: address "
5534 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5535 					nextAddress);
5536 				error = B_BAD_VALUE;
5537 				break;
5538 			}
5539 		}
5540 
5541 		map->Unlock();
5542 
5543 		// All pages are unwired. Remove the area's wired range as well (to
5544 		// avoid deadlocks we delete the range after unlocking the cache).
5545 		VMAreaWiredRange* range = area->Unwire(areaStart,
5546 			areaEnd - areaStart, writable);
5547 
5548 		cacheChainLocker.Unlock();
5549 
5550 		if (range != NULL) {
5551 			range->~VMAreaWiredRange();
5552 			free_etc(range, mallocFlags);
5553 		}
5554 
5555 		if (error != B_OK)
5556 			break;
5557 	}
5558 
5559 	// get rid of the address space reference
5560 	addressSpace->Put();
5561 
5562 	return error;
5563 }
5564 
5565 
5566 status_t
5567 unlock_memory(void* address, size_t numBytes, uint32 flags)
5568 {
5569 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5570 }
5571 
5572 
5573 /*!	Similar to get_memory_map(), but also allows to specify the address space
5574 	for the memory in question and has a saner semantics.
5575 	Returns \c B_OK when the complete range could be translated or
5576 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5577 	case the actual number of entries is written to \c *_numEntries. Any other
5578 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5579 	in this case.
5580 */
5581 status_t
5582 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5583 	physical_entry* table, uint32* _numEntries)
5584 {
5585 	uint32 numEntries = *_numEntries;
5586 	*_numEntries = 0;
5587 
5588 	VMAddressSpace* addressSpace;
5589 	addr_t virtualAddress = (addr_t)address;
5590 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5591 	phys_addr_t physicalAddress;
5592 	status_t status = B_OK;
5593 	int32 index = -1;
5594 	addr_t offset = 0;
5595 	bool interrupts = are_interrupts_enabled();
5596 
5597 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5598 		address, numBytes, numEntries));
5599 
5600 	if (numEntries == 0 || numBytes == 0)
5601 		return B_BAD_VALUE;
5602 
5603 	// in which address space is the address to be found?
5604 	if (IS_USER_ADDRESS(virtualAddress)) {
5605 		if (team == B_CURRENT_TEAM)
5606 			addressSpace = VMAddressSpace::GetCurrent();
5607 		else
5608 			addressSpace = VMAddressSpace::Get(team);
5609 	} else
5610 		addressSpace = VMAddressSpace::GetKernel();
5611 
5612 	if (addressSpace == NULL)
5613 		return B_ERROR;
5614 
5615 	VMTranslationMap* map = addressSpace->TranslationMap();
5616 
5617 	if (interrupts)
5618 		map->Lock();
5619 
5620 	while (offset < numBytes) {
5621 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5622 		uint32 flags;
5623 
5624 		if (interrupts) {
5625 			status = map->Query((addr_t)address + offset, &physicalAddress,
5626 				&flags);
5627 		} else {
5628 			status = map->QueryInterrupt((addr_t)address + offset,
5629 				&physicalAddress, &flags);
5630 		}
5631 		if (status < B_OK)
5632 			break;
5633 		if ((flags & PAGE_PRESENT) == 0) {
5634 			panic("get_memory_map() called on unmapped memory!");
5635 			return B_BAD_ADDRESS;
5636 		}
5637 
5638 		if (index < 0 && pageOffset > 0) {
5639 			physicalAddress += pageOffset;
5640 			if (bytes > B_PAGE_SIZE - pageOffset)
5641 				bytes = B_PAGE_SIZE - pageOffset;
5642 		}
5643 
5644 		// need to switch to the next physical_entry?
5645 		if (index < 0 || table[index].address
5646 				!= physicalAddress - table[index].size) {
5647 			if ((uint32)++index + 1 > numEntries) {
5648 				// table to small
5649 				break;
5650 			}
5651 			table[index].address = physicalAddress;
5652 			table[index].size = bytes;
5653 		} else {
5654 			// page does fit in current entry
5655 			table[index].size += bytes;
5656 		}
5657 
5658 		offset += bytes;
5659 	}
5660 
5661 	if (interrupts)
5662 		map->Unlock();
5663 
5664 	if (status != B_OK)
5665 		return status;
5666 
5667 	if ((uint32)index + 1 > numEntries) {
5668 		*_numEntries = index;
5669 		return B_BUFFER_OVERFLOW;
5670 	}
5671 
5672 	*_numEntries = index + 1;
5673 	return B_OK;
5674 }
5675 
5676 
5677 /*!	According to the BeBook, this function should always succeed.
5678 	This is no longer the case.
5679 */
5680 extern "C" int32
5681 __get_memory_map_haiku(const void* address, size_t numBytes,
5682 	physical_entry* table, int32 numEntries)
5683 {
5684 	uint32 entriesRead = numEntries;
5685 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5686 		table, &entriesRead);
5687 	if (error != B_OK)
5688 		return error;
5689 
5690 	// close the entry list
5691 
5692 	// if it's only one entry, we will silently accept the missing ending
5693 	if (numEntries == 1)
5694 		return B_OK;
5695 
5696 	if (entriesRead + 1 > (uint32)numEntries)
5697 		return B_BUFFER_OVERFLOW;
5698 
5699 	table[entriesRead].address = 0;
5700 	table[entriesRead].size = 0;
5701 
5702 	return B_OK;
5703 }
5704 
5705 
5706 area_id
5707 area_for(void* address)
5708 {
5709 	return vm_area_for((addr_t)address, true);
5710 }
5711 
5712 
5713 area_id
5714 find_area(const char* name)
5715 {
5716 	return VMAreaHash::Find(name);
5717 }
5718 
5719 
5720 status_t
5721 _get_area_info(area_id id, area_info* info, size_t size)
5722 {
5723 	if (size != sizeof(area_info) || info == NULL)
5724 		return B_BAD_VALUE;
5725 
5726 	AddressSpaceReadLocker locker;
5727 	VMArea* area;
5728 	status_t status = locker.SetFromArea(id, area);
5729 	if (status != B_OK)
5730 		return status;
5731 
5732 	fill_area_info(area, info, size);
5733 	return B_OK;
5734 }
5735 
5736 
5737 status_t
5738 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5739 {
5740 	addr_t nextBase = *(addr_t*)cookie;
5741 
5742 	// we're already through the list
5743 	if (nextBase == (addr_t)-1)
5744 		return B_ENTRY_NOT_FOUND;
5745 
5746 	if (team == B_CURRENT_TEAM)
5747 		team = team_get_current_team_id();
5748 
5749 	AddressSpaceReadLocker locker(team);
5750 	if (!locker.IsLocked())
5751 		return B_BAD_TEAM_ID;
5752 
5753 	VMArea* area;
5754 	for (VMAddressSpace::AreaIterator it
5755 				= locker.AddressSpace()->GetAreaIterator();
5756 			(area = it.Next()) != NULL;) {
5757 		if (area->Base() > nextBase)
5758 			break;
5759 	}
5760 
5761 	if (area == NULL) {
5762 		nextBase = (addr_t)-1;
5763 		return B_ENTRY_NOT_FOUND;
5764 	}
5765 
5766 	fill_area_info(area, info, size);
5767 	*cookie = (int32)(area->Base());
5768 		// TODO: Not 64 bit safe!
5769 
5770 	return B_OK;
5771 }
5772 
5773 
5774 status_t
5775 set_area_protection(area_id area, uint32 newProtection)
5776 {
5777 	fix_protection(&newProtection);
5778 
5779 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5780 		newProtection, true);
5781 }
5782 
5783 
5784 status_t
5785 resize_area(area_id areaID, size_t newSize)
5786 {
5787 	return vm_resize_area(areaID, newSize, true);
5788 }
5789 
5790 
5791 /*!	Transfers the specified area to a new team. The caller must be the owner
5792 	of the area.
5793 */
5794 area_id
5795 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5796 	bool kernel)
5797 {
5798 	area_info info;
5799 	status_t status = get_area_info(id, &info);
5800 	if (status != B_OK)
5801 		return status;
5802 
5803 	if (info.team != thread_get_current_thread()->team->id)
5804 		return B_PERMISSION_DENIED;
5805 
5806 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5807 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5808 	if (clonedArea < 0)
5809 		return clonedArea;
5810 
5811 	status = vm_delete_area(info.team, id, kernel);
5812 	if (status != B_OK) {
5813 		vm_delete_area(target, clonedArea, kernel);
5814 		return status;
5815 	}
5816 
5817 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5818 
5819 	return clonedArea;
5820 }
5821 
5822 
5823 extern "C" area_id
5824 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5825 	size_t numBytes, uint32 addressSpec, uint32 protection,
5826 	void** _virtualAddress)
5827 {
5828 	if (!arch_vm_supports_protection(protection))
5829 		return B_NOT_SUPPORTED;
5830 
5831 	fix_protection(&protection);
5832 
5833 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5834 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5835 		false);
5836 }
5837 
5838 
5839 area_id
5840 clone_area(const char* name, void** _address, uint32 addressSpec,
5841 	uint32 protection, area_id source)
5842 {
5843 	if ((protection & B_KERNEL_PROTECTION) == 0)
5844 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5845 
5846 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5847 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5848 }
5849 
5850 
5851 area_id
5852 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
5853 	uint32 protection, uint32 flags,
5854 	const virtual_address_restrictions* virtualAddressRestrictions,
5855 	const physical_address_restrictions* physicalAddressRestrictions,
5856 	void** _address)
5857 {
5858 	fix_protection(&protection);
5859 
5860 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5861 		virtualAddressRestrictions, physicalAddressRestrictions, true,
5862 		_address);
5863 }
5864 
5865 
5866 extern "C" area_id
5867 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5868 	size_t size, uint32 lock, uint32 protection)
5869 {
5870 	fix_protection(&protection);
5871 
5872 	virtual_address_restrictions virtualRestrictions = {};
5873 	virtualRestrictions.address = *_address;
5874 	virtualRestrictions.address_specification = addressSpec;
5875 	physical_address_restrictions physicalRestrictions = {};
5876 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5877 		lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true,
5878 		_address);
5879 }
5880 
5881 
5882 status_t
5883 delete_area(area_id area)
5884 {
5885 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5886 }
5887 
5888 
5889 //	#pragma mark - Userland syscalls
5890 
5891 
5892 status_t
5893 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5894 	addr_t size)
5895 {
5896 	// filter out some unavailable values (for userland)
5897 	switch (addressSpec) {
5898 		case B_ANY_KERNEL_ADDRESS:
5899 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5900 			return B_BAD_VALUE;
5901 	}
5902 
5903 	addr_t address;
5904 
5905 	if (!IS_USER_ADDRESS(userAddress)
5906 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5907 		return B_BAD_ADDRESS;
5908 
5909 	status_t status = vm_reserve_address_range(
5910 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5911 		RESERVED_AVOID_BASE);
5912 	if (status != B_OK)
5913 		return status;
5914 
5915 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5916 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5917 			(void*)address, size);
5918 		return B_BAD_ADDRESS;
5919 	}
5920 
5921 	return B_OK;
5922 }
5923 
5924 
5925 status_t
5926 _user_unreserve_address_range(addr_t address, addr_t size)
5927 {
5928 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5929 		(void*)address, size);
5930 }
5931 
5932 
5933 area_id
5934 _user_area_for(void* address)
5935 {
5936 	return vm_area_for((addr_t)address, false);
5937 }
5938 
5939 
5940 area_id
5941 _user_find_area(const char* userName)
5942 {
5943 	char name[B_OS_NAME_LENGTH];
5944 
5945 	if (!IS_USER_ADDRESS(userName)
5946 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5947 		return B_BAD_ADDRESS;
5948 
5949 	return find_area(name);
5950 }
5951 
5952 
5953 status_t
5954 _user_get_area_info(area_id area, area_info* userInfo)
5955 {
5956 	if (!IS_USER_ADDRESS(userInfo))
5957 		return B_BAD_ADDRESS;
5958 
5959 	area_info info;
5960 	status_t status = get_area_info(area, &info);
5961 	if (status < B_OK)
5962 		return status;
5963 
5964 	// TODO: do we want to prevent userland from seeing kernel protections?
5965 	//info.protection &= B_USER_PROTECTION;
5966 
5967 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5968 		return B_BAD_ADDRESS;
5969 
5970 	return status;
5971 }
5972 
5973 
5974 status_t
5975 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
5976 {
5977 	int32 cookie;
5978 
5979 	if (!IS_USER_ADDRESS(userCookie)
5980 		|| !IS_USER_ADDRESS(userInfo)
5981 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5982 		return B_BAD_ADDRESS;
5983 
5984 	area_info info;
5985 	status_t status = _get_next_area_info(team, &cookie, &info,
5986 		sizeof(area_info));
5987 	if (status != B_OK)
5988 		return status;
5989 
5990 	//info.protection &= B_USER_PROTECTION;
5991 
5992 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5993 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5994 		return B_BAD_ADDRESS;
5995 
5996 	return status;
5997 }
5998 
5999 
6000 status_t
6001 _user_set_area_protection(area_id area, uint32 newProtection)
6002 {
6003 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6004 		return B_BAD_VALUE;
6005 
6006 	fix_protection(&newProtection);
6007 
6008 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6009 		newProtection, false);
6010 }
6011 
6012 
6013 status_t
6014 _user_resize_area(area_id area, size_t newSize)
6015 {
6016 	// TODO: Since we restrict deleting of areas to those owned by the team,
6017 	// we should also do that for resizing (check other functions, too).
6018 	return vm_resize_area(area, newSize, false);
6019 }
6020 
6021 
6022 area_id
6023 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6024 	team_id target)
6025 {
6026 	// filter out some unavailable values (for userland)
6027 	switch (addressSpec) {
6028 		case B_ANY_KERNEL_ADDRESS:
6029 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6030 			return B_BAD_VALUE;
6031 	}
6032 
6033 	void* address;
6034 	if (!IS_USER_ADDRESS(userAddress)
6035 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6036 		return B_BAD_ADDRESS;
6037 
6038 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6039 	if (newArea < B_OK)
6040 		return newArea;
6041 
6042 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6043 		return B_BAD_ADDRESS;
6044 
6045 	return newArea;
6046 }
6047 
6048 
6049 area_id
6050 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6051 	uint32 protection, area_id sourceArea)
6052 {
6053 	char name[B_OS_NAME_LENGTH];
6054 	void* address;
6055 
6056 	// filter out some unavailable values (for userland)
6057 	switch (addressSpec) {
6058 		case B_ANY_KERNEL_ADDRESS:
6059 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6060 			return B_BAD_VALUE;
6061 	}
6062 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6063 		return B_BAD_VALUE;
6064 
6065 	if (!IS_USER_ADDRESS(userName)
6066 		|| !IS_USER_ADDRESS(userAddress)
6067 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6068 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6069 		return B_BAD_ADDRESS;
6070 
6071 	fix_protection(&protection);
6072 
6073 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6074 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6075 		false);
6076 	if (clonedArea < B_OK)
6077 		return clonedArea;
6078 
6079 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6080 		delete_area(clonedArea);
6081 		return B_BAD_ADDRESS;
6082 	}
6083 
6084 	return clonedArea;
6085 }
6086 
6087 
6088 area_id
6089 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6090 	size_t size, uint32 lock, uint32 protection)
6091 {
6092 	char name[B_OS_NAME_LENGTH];
6093 	void* address;
6094 
6095 	// filter out some unavailable values (for userland)
6096 	switch (addressSpec) {
6097 		case B_ANY_KERNEL_ADDRESS:
6098 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6099 			return B_BAD_VALUE;
6100 	}
6101 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6102 		return B_BAD_VALUE;
6103 
6104 	if (!IS_USER_ADDRESS(userName)
6105 		|| !IS_USER_ADDRESS(userAddress)
6106 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6107 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6108 		return B_BAD_ADDRESS;
6109 
6110 	if (addressSpec == B_EXACT_ADDRESS
6111 		&& IS_KERNEL_ADDRESS(address))
6112 		return B_BAD_VALUE;
6113 
6114 	fix_protection(&protection);
6115 
6116 	virtual_address_restrictions virtualRestrictions = {};
6117 	virtualRestrictions.address = address;
6118 	virtualRestrictions.address_specification = addressSpec;
6119 	physical_address_restrictions physicalRestrictions = {};
6120 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6121 		size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions,
6122 		false, &address);
6123 
6124 	if (area >= B_OK
6125 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6126 		delete_area(area);
6127 		return B_BAD_ADDRESS;
6128 	}
6129 
6130 	return area;
6131 }
6132 
6133 
6134 status_t
6135 _user_delete_area(area_id area)
6136 {
6137 	// Unlike the BeOS implementation, you can now only delete areas
6138 	// that you have created yourself from userland.
6139 	// The documentation to delete_area() explicitly states that this
6140 	// will be restricted in the future, and so it will.
6141 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6142 }
6143 
6144 
6145 // TODO: create a BeOS style call for this!
6146 
6147 area_id
6148 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6149 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6150 	int fd, off_t offset)
6151 {
6152 	char name[B_OS_NAME_LENGTH];
6153 	void* address;
6154 	area_id area;
6155 
6156 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6157 		return B_BAD_VALUE;
6158 
6159 	fix_protection(&protection);
6160 
6161 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6162 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6163 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6164 		return B_BAD_ADDRESS;
6165 
6166 	if (addressSpec == B_EXACT_ADDRESS) {
6167 		if ((addr_t)address + size < (addr_t)address
6168 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6169 			return B_BAD_VALUE;
6170 		}
6171 		if (!IS_USER_ADDRESS(address)
6172 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6173 			return B_BAD_ADDRESS;
6174 		}
6175 	}
6176 
6177 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6178 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6179 		false);
6180 	if (area < B_OK)
6181 		return area;
6182 
6183 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6184 		return B_BAD_ADDRESS;
6185 
6186 	return area;
6187 }
6188 
6189 
6190 status_t
6191 _user_unmap_memory(void* _address, size_t size)
6192 {
6193 	addr_t address = (addr_t)_address;
6194 
6195 	// check params
6196 	if (size == 0 || (addr_t)address + size < (addr_t)address
6197 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6198 		return B_BAD_VALUE;
6199 	}
6200 
6201 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6202 		return B_BAD_ADDRESS;
6203 
6204 	// Write lock the address space and ensure the address range is not wired.
6205 	AddressSpaceWriteLocker locker;
6206 	do {
6207 		status_t status = locker.SetTo(team_get_current_team_id());
6208 		if (status != B_OK)
6209 			return status;
6210 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6211 			size, &locker));
6212 
6213 	// unmap
6214 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6215 }
6216 
6217 
6218 status_t
6219 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6220 {
6221 	// check address range
6222 	addr_t address = (addr_t)_address;
6223 	size = PAGE_ALIGN(size);
6224 
6225 	if ((address % B_PAGE_SIZE) != 0)
6226 		return B_BAD_VALUE;
6227 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6228 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6229 		// weird error code required by POSIX
6230 		return ENOMEM;
6231 	}
6232 
6233 	// extend and check protection
6234 	if ((protection & ~B_USER_PROTECTION) != 0)
6235 		return B_BAD_VALUE;
6236 
6237 	fix_protection(&protection);
6238 
6239 	// We need to write lock the address space, since we're going to play with
6240 	// the areas. Also make sure that none of the areas is wired and that we're
6241 	// actually allowed to change the protection.
6242 	AddressSpaceWriteLocker locker;
6243 
6244 	bool restart;
6245 	do {
6246 		restart = false;
6247 
6248 		status_t status = locker.SetTo(team_get_current_team_id());
6249 		if (status != B_OK)
6250 			return status;
6251 
6252 		// First round: Check whether the whole range is covered by areas and we
6253 		// are allowed to modify them.
6254 		addr_t currentAddress = address;
6255 		size_t sizeLeft = size;
6256 		while (sizeLeft > 0) {
6257 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6258 			if (area == NULL)
6259 				return B_NO_MEMORY;
6260 
6261 			if ((area->protection & B_KERNEL_AREA) != 0)
6262 				return B_NOT_ALLOWED;
6263 
6264 			AreaCacheLocker cacheLocker(area);
6265 
6266 			if (wait_if_area_is_wired(area, &locker, &cacheLocker)) {
6267 				restart = true;
6268 				break;
6269 			}
6270 
6271 			cacheLocker.Unlock();
6272 
6273 			// TODO: For (shared) mapped files we should check whether the new
6274 			// protections are compatible with the file permissions. We don't
6275 			// have a way to do that yet, though.
6276 
6277 			addr_t offset = currentAddress - area->Base();
6278 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6279 
6280 			currentAddress += rangeSize;
6281 			sizeLeft -= rangeSize;
6282 		}
6283 	} while (restart);
6284 
6285 	// Second round: If the protections differ from that of the area, create a
6286 	// page protection array and re-map mapped pages.
6287 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6288 	addr_t currentAddress = address;
6289 	size_t sizeLeft = size;
6290 	while (sizeLeft > 0) {
6291 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6292 		if (area == NULL)
6293 			return B_NO_MEMORY;
6294 
6295 		addr_t offset = currentAddress - area->Base();
6296 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6297 
6298 		currentAddress += rangeSize;
6299 		sizeLeft -= rangeSize;
6300 
6301 		if (area->page_protections == NULL) {
6302 			if (area->protection == protection)
6303 				continue;
6304 
6305 			status_t status = allocate_area_page_protections(area);
6306 			if (status != B_OK)
6307 				return status;
6308 		}
6309 
6310 		// We need to lock the complete cache chain, since we potentially unmap
6311 		// pages of lower caches.
6312 		VMCache* topCache = vm_area_get_locked_cache(area);
6313 		VMCacheChainLocker cacheChainLocker(topCache);
6314 		cacheChainLocker.LockAllSourceCaches();
6315 
6316 		for (addr_t pageAddress = area->Base() + offset;
6317 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6318 			map->Lock();
6319 
6320 			set_area_page_protection(area, pageAddress, protection);
6321 
6322 			phys_addr_t physicalAddress;
6323 			uint32 flags;
6324 
6325 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6326 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6327 				map->Unlock();
6328 				continue;
6329 			}
6330 
6331 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6332 			if (page == NULL) {
6333 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6334 					"\n", area, physicalAddress);
6335 				map->Unlock();
6336 				return B_ERROR;
6337 			}
6338 
6339 			// If the page is not in the topmost cache and write access is
6340 			// requested, we have to unmap it. Otherwise we can re-map it with
6341 			// the new protection.
6342 			bool unmapPage = page->Cache() != topCache
6343 				&& (protection & B_WRITE_AREA) != 0;
6344 
6345 			if (!unmapPage)
6346 				map->ProtectPage(area, pageAddress, protection);
6347 
6348 			map->Unlock();
6349 
6350 			if (unmapPage) {
6351 				DEBUG_PAGE_ACCESS_START(page);
6352 				unmap_page(area, pageAddress);
6353 				DEBUG_PAGE_ACCESS_END(page);
6354 			}
6355 		}
6356 	}
6357 
6358 	return B_OK;
6359 }
6360 
6361 
6362 status_t
6363 _user_sync_memory(void* _address, size_t size, uint32 flags)
6364 {
6365 	addr_t address = (addr_t)_address;
6366 	size = PAGE_ALIGN(size);
6367 
6368 	// check params
6369 	if ((address % B_PAGE_SIZE) != 0)
6370 		return B_BAD_VALUE;
6371 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6372 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6373 		// weird error code required by POSIX
6374 		return ENOMEM;
6375 	}
6376 
6377 	bool writeSync = (flags & MS_SYNC) != 0;
6378 	bool writeAsync = (flags & MS_ASYNC) != 0;
6379 	if (writeSync && writeAsync)
6380 		return B_BAD_VALUE;
6381 
6382 	if (size == 0 || (!writeSync && !writeAsync))
6383 		return B_OK;
6384 
6385 	// iterate through the range and sync all concerned areas
6386 	while (size > 0) {
6387 		// read lock the address space
6388 		AddressSpaceReadLocker locker;
6389 		status_t error = locker.SetTo(team_get_current_team_id());
6390 		if (error != B_OK)
6391 			return error;
6392 
6393 		// get the first area
6394 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6395 		if (area == NULL)
6396 			return B_NO_MEMORY;
6397 
6398 		uint32 offset = address - area->Base();
6399 		size_t rangeSize = min_c(area->Size() - offset, size);
6400 		offset += area->cache_offset;
6401 
6402 		// lock the cache
6403 		AreaCacheLocker cacheLocker(area);
6404 		if (!cacheLocker)
6405 			return B_BAD_VALUE;
6406 		VMCache* cache = area->cache;
6407 
6408 		locker.Unlock();
6409 
6410 		uint32 firstPage = offset >> PAGE_SHIFT;
6411 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6412 
6413 		// write the pages
6414 		if (cache->type == CACHE_TYPE_VNODE) {
6415 			if (writeSync) {
6416 				// synchronous
6417 				error = vm_page_write_modified_page_range(cache, firstPage,
6418 					endPage);
6419 				if (error != B_OK)
6420 					return error;
6421 			} else {
6422 				// asynchronous
6423 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6424 				// TODO: This is probably not quite what is supposed to happen.
6425 				// Especially when a lot has to be written, it might take ages
6426 				// until it really hits the disk.
6427 			}
6428 		}
6429 
6430 		address += rangeSize;
6431 		size -= rangeSize;
6432 	}
6433 
6434 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6435 	// synchronize multiple mappings of the same file. In our VM they never get
6436 	// out of sync, though, so we don't have to do anything.
6437 
6438 	return B_OK;
6439 }
6440 
6441 
6442 status_t
6443 _user_memory_advice(void* address, size_t size, uint32 advice)
6444 {
6445 	// TODO: Implement!
6446 	return B_OK;
6447 }
6448 
6449 
6450 status_t
6451 _user_get_memory_properties(team_id teamID, const void* address,
6452 	uint32* _protected, uint32* _lock)
6453 {
6454 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6455 		return B_BAD_ADDRESS;
6456 
6457 	AddressSpaceReadLocker locker;
6458 	status_t error = locker.SetTo(teamID);
6459 	if (error != B_OK)
6460 		return error;
6461 
6462 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6463 	if (area == NULL)
6464 		return B_NO_MEMORY;
6465 
6466 
6467 	uint32 protection = area->protection;
6468 	if (area->page_protections != NULL)
6469 		protection = get_area_page_protection(area, (addr_t)address);
6470 
6471 	uint32 wiring = area->wiring;
6472 
6473 	locker.Unlock();
6474 
6475 	error = user_memcpy(_protected, &protection, sizeof(protection));
6476 	if (error != B_OK)
6477 		return error;
6478 
6479 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6480 
6481 	return error;
6482 }
6483 
6484 
6485 // #pragma mark -- compatibility
6486 
6487 
6488 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6489 
6490 
6491 struct physical_entry_beos {
6492 	uint32	address;
6493 	uint32	size;
6494 };
6495 
6496 
6497 /*!	The physical_entry structure has changed. We need to translate it to the
6498 	old one.
6499 */
6500 extern "C" int32
6501 __get_memory_map_beos(const void* _address, size_t numBytes,
6502 	physical_entry_beos* table, int32 numEntries)
6503 {
6504 	if (numEntries <= 0)
6505 		return B_BAD_VALUE;
6506 
6507 	const uint8* address = (const uint8*)_address;
6508 
6509 	int32 count = 0;
6510 	while (numBytes > 0 && count < numEntries) {
6511 		physical_entry entry;
6512 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6513 		if (result < 0) {
6514 			if (result != B_BUFFER_OVERFLOW)
6515 				return result;
6516 		}
6517 
6518 		if (entry.address >= (phys_addr_t)1 << 32) {
6519 			panic("get_memory_map(): Address is greater 4 GB!");
6520 			return B_ERROR;
6521 		}
6522 
6523 		table[count].address = entry.address;
6524 		table[count++].size = entry.size;
6525 
6526 		address += entry.size;
6527 		numBytes -= entry.size;
6528 	}
6529 
6530 	// null-terminate the table, if possible
6531 	if (count < numEntries) {
6532 		table[count].address = 0;
6533 		table[count].size = 0;
6534 	}
6535 
6536 	return B_OK;
6537 }
6538 
6539 
6540 /*!	The type of the \a physicalAddress parameter has changed from void* to
6541 	phys_addr_t.
6542 */
6543 extern "C" area_id
6544 __map_physical_memory_beos(const char* name, void* physicalAddress,
6545 	size_t numBytes, uint32 addressSpec, uint32 protection,
6546 	void** _virtualAddress)
6547 {
6548 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6549 		addressSpec, protection, _virtualAddress);
6550 }
6551 
6552 
6553 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6554 	we meddle with the \a lock parameter to force 32 bit.
6555 */
6556 extern "C" area_id
6557 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6558 	size_t size, uint32 lock, uint32 protection)
6559 {
6560 	switch (lock) {
6561 		case B_NO_LOCK:
6562 			break;
6563 		case B_FULL_LOCK:
6564 		case B_LAZY_LOCK:
6565 			lock = B_32_BIT_FULL_LOCK;
6566 			break;
6567 		case B_CONTIGUOUS:
6568 			lock = B_32_BIT_CONTIGUOUS;
6569 			break;
6570 	}
6571 
6572 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6573 		protection);
6574 }
6575 
6576 
6577 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6578 	"BASE");
6579 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6580 	"map_physical_memory@", "BASE");
6581 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6582 	"BASE");
6583 
6584 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6585 	"get_memory_map@@", "1_ALPHA3");
6586 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6587 	"map_physical_memory@@", "1_ALPHA3");
6588 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6589 	"1_ALPHA3");
6590 
6591 
6592 #else
6593 
6594 
6595 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6596 	"get_memory_map@@", "BASE");
6597 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6598 	"map_physical_memory@@", "BASE");
6599 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6600 	"BASE");
6601 
6602 
6603 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6604