xref: /haiku/src/system/kernel/vm/vm.cpp (revision 23d878482ed22e55dad6d1fca1df7bea42eb157c)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <boot/elf.h>
31 #include <boot/stage2.h>
32 #include <condition_variable.h>
33 #include <console.h>
34 #include <debug.h>
35 #include <file_cache.h>
36 #include <fs/fd.h>
37 #include <heap.h>
38 #include <kernel.h>
39 #include <int.h>
40 #include <lock.h>
41 #include <low_resource_manager.h>
42 #include <slab/Slab.h>
43 #include <smp.h>
44 #include <system_info.h>
45 #include <thread.h>
46 #include <team.h>
47 #include <tracing.h>
48 #include <util/AutoLock.h>
49 #include <util/khash.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 class AreaCacheLocking {
77 public:
78 	inline bool Lock(VMCache* lockable)
79 	{
80 		return false;
81 	}
82 
83 	inline void Unlock(VMCache* lockable)
84 	{
85 		vm_area_put_locked_cache(lockable);
86 	}
87 };
88 
89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
90 public:
91 	inline AreaCacheLocker(VMCache* cache = NULL)
92 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
93 	{
94 	}
95 
96 	inline AreaCacheLocker(VMArea* area)
97 		: AutoLocker<VMCache, AreaCacheLocking>()
98 	{
99 		SetTo(area);
100 	}
101 
102 	inline void SetTo(VMCache* cache, bool alreadyLocked)
103 	{
104 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
105 	}
106 
107 	inline void SetTo(VMArea* area)
108 	{
109 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
110 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
111 	}
112 };
113 
114 
115 class VMCacheChainLocker {
116 public:
117 	VMCacheChainLocker()
118 		:
119 		fTopCache(NULL),
120 		fBottomCache(NULL)
121 	{
122 	}
123 
124 	VMCacheChainLocker(VMCache* topCache)
125 		:
126 		fTopCache(topCache),
127 		fBottomCache(topCache)
128 	{
129 	}
130 
131 	~VMCacheChainLocker()
132 	{
133 		Unlock();
134 	}
135 
136 	void SetTo(VMCache* topCache)
137 	{
138 		fTopCache = topCache;
139 		fBottomCache = topCache;
140 
141 		if (topCache != NULL)
142 			topCache->SetUserData(NULL);
143 	}
144 
145 	VMCache* LockSourceCache()
146 	{
147 		if (fBottomCache == NULL || fBottomCache->source == NULL)
148 			return NULL;
149 
150 		VMCache* previousCache = fBottomCache;
151 
152 		fBottomCache = fBottomCache->source;
153 		fBottomCache->Lock();
154 		fBottomCache->AcquireRefLocked();
155 		fBottomCache->SetUserData(previousCache);
156 
157 		return fBottomCache;
158 	}
159 
160 	void LockAllSourceCaches()
161 	{
162 		while (LockSourceCache() != NULL) {
163 		}
164 	}
165 
166 	void Unlock(VMCache* exceptCache = NULL)
167 	{
168 		if (fTopCache == NULL)
169 			return;
170 
171 		// Unlock caches in source -> consumer direction. This is important to
172 		// avoid double-locking and a reversal of locking order in case a cache
173 		// is eligable for merging.
174 		VMCache* cache = fBottomCache;
175 		while (cache != NULL) {
176 			VMCache* nextCache = (VMCache*)cache->UserData();
177 			if (cache != exceptCache)
178 				cache->ReleaseRefAndUnlock(cache != fTopCache);
179 
180 			if (cache == fTopCache)
181 				break;
182 
183 			cache = nextCache;
184 		}
185 
186 		fTopCache = NULL;
187 		fBottomCache = NULL;
188 	}
189 
190 	void UnlockKeepRefs(bool keepTopCacheLocked)
191 	{
192 		if (fTopCache == NULL)
193 			return;
194 
195 		VMCache* nextCache = fBottomCache;
196 		VMCache* cache = NULL;
197 
198 		while (keepTopCacheLocked
199 				? nextCache != fTopCache : cache != fTopCache) {
200 			cache = nextCache;
201 			nextCache = (VMCache*)cache->UserData();
202 			cache->Unlock(cache != fTopCache);
203 		}
204 	}
205 
206 	void RelockCaches(bool topCacheLocked)
207 	{
208 		if (fTopCache == NULL)
209 			return;
210 
211 		VMCache* nextCache = fTopCache;
212 		VMCache* cache = NULL;
213 		if (topCacheLocked) {
214 			cache = nextCache;
215 			nextCache = cache->source;
216 		}
217 
218 		while (cache != fBottomCache && nextCache != NULL) {
219 			VMCache* consumer = cache;
220 			cache = nextCache;
221 			nextCache = cache->source;
222 			cache->Lock();
223 			cache->SetUserData(consumer);
224 		}
225 	}
226 
227 private:
228 	VMCache*	fTopCache;
229 	VMCache*	fBottomCache;
230 };
231 
232 
233 // The memory reserve an allocation of the certain priority must not touch.
234 static const size_t kMemoryReserveForPriority[] = {
235 	VM_MEMORY_RESERVE_USER,		// user
236 	VM_MEMORY_RESERVE_SYSTEM,	// system
237 	0							// VIP
238 };
239 
240 
241 ObjectCache* gPageMappingsObjectCache;
242 
243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
244 
245 static off_t sAvailableMemory;
246 static off_t sNeededMemory;
247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
248 static uint32 sPageFaults;
249 
250 static VMPhysicalPageMapper* sPhysicalPageMapper;
251 
252 #if DEBUG_CACHE_LIST
253 
254 struct cache_info {
255 	VMCache*	cache;
256 	addr_t		page_count;
257 	addr_t		committed;
258 };
259 
260 static const int kCacheInfoTableCount = 100 * 1024;
261 static cache_info* sCacheInfoTable;
262 
263 #endif	// DEBUG_CACHE_LIST
264 
265 
266 // function declarations
267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
268 	bool addressSpaceCleanup);
269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
270 	bool isWrite, bool isUser, vm_page** wirePage,
271 	VMAreaWiredRange* wiredRange = NULL);
272 static status_t map_backing_store(VMAddressSpace* addressSpace,
273 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
274 	int protection, int mapping, uint32 flags,
275 	const virtual_address_restrictions* addressRestrictions, bool kernel,
276 	VMArea** _area, void** _virtualAddress);
277 
278 
279 //	#pragma mark -
280 
281 
282 #if VM_PAGE_FAULT_TRACING
283 
284 namespace VMPageFaultTracing {
285 
286 class PageFaultStart : public AbstractTraceEntry {
287 public:
288 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
289 		:
290 		fAddress(address),
291 		fPC(pc),
292 		fWrite(write),
293 		fUser(user)
294 	{
295 		Initialized();
296 	}
297 
298 	virtual void AddDump(TraceOutput& out)
299 	{
300 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
301 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
302 	}
303 
304 private:
305 	addr_t	fAddress;
306 	addr_t	fPC;
307 	bool	fWrite;
308 	bool	fUser;
309 };
310 
311 
312 // page fault errors
313 enum {
314 	PAGE_FAULT_ERROR_NO_AREA		= 0,
315 	PAGE_FAULT_ERROR_KERNEL_ONLY,
316 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
317 	PAGE_FAULT_ERROR_READ_PROTECTED,
318 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
319 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
320 };
321 
322 
323 class PageFaultError : public AbstractTraceEntry {
324 public:
325 	PageFaultError(area_id area, status_t error)
326 		:
327 		fArea(area),
328 		fError(error)
329 	{
330 		Initialized();
331 	}
332 
333 	virtual void AddDump(TraceOutput& out)
334 	{
335 		switch (fError) {
336 			case PAGE_FAULT_ERROR_NO_AREA:
337 				out.Print("page fault error: no area");
338 				break;
339 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
340 				out.Print("page fault error: area: %ld, kernel only", fArea);
341 				break;
342 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
343 				out.Print("page fault error: area: %ld, write protected",
344 					fArea);
345 				break;
346 			case PAGE_FAULT_ERROR_READ_PROTECTED:
347 				out.Print("page fault error: area: %ld, read protected", fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
350 				out.Print("page fault error: kernel touching bad user memory");
351 				break;
352 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
353 				out.Print("page fault error: no address space");
354 				break;
355 			default:
356 				out.Print("page fault error: area: %ld, error: %s", fArea,
357 					strerror(fError));
358 				break;
359 		}
360 	}
361 
362 private:
363 	area_id		fArea;
364 	status_t	fError;
365 };
366 
367 
368 class PageFaultDone : public AbstractTraceEntry {
369 public:
370 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
371 			vm_page* page)
372 		:
373 		fArea(area),
374 		fTopCache(topCache),
375 		fCache(cache),
376 		fPage(page)
377 	{
378 		Initialized();
379 	}
380 
381 	virtual void AddDump(TraceOutput& out)
382 	{
383 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
384 			"page: %p", fArea, fTopCache, fCache, fPage);
385 	}
386 
387 private:
388 	area_id		fArea;
389 	VMCache*	fTopCache;
390 	VMCache*	fCache;
391 	vm_page*	fPage;
392 };
393 
394 }	// namespace VMPageFaultTracing
395 
396 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
397 #else
398 #	define TPF(x) ;
399 #endif	// VM_PAGE_FAULT_TRACING
400 
401 
402 //	#pragma mark -
403 
404 
405 /*!	The page's cache must be locked.
406 */
407 static inline void
408 increment_page_wired_count(vm_page* page)
409 {
410 	if (!page->IsMapped())
411 		atomic_add(&gMappedPagesCount, 1);
412 	page->IncrementWiredCount();
413 }
414 
415 
416 /*!	The page's cache must be locked.
417 */
418 static inline void
419 decrement_page_wired_count(vm_page* page)
420 {
421 	page->DecrementWiredCount();
422 	if (!page->IsMapped())
423 		atomic_add(&gMappedPagesCount, -1);
424 }
425 
426 
427 static inline addr_t
428 virtual_page_address(VMArea* area, vm_page* page)
429 {
430 	return area->Base()
431 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
432 }
433 
434 
435 //! You need to have the address space locked when calling this function
436 static VMArea*
437 lookup_area(VMAddressSpace* addressSpace, area_id id)
438 {
439 	VMAreaHash::ReadLock();
440 
441 	VMArea* area = VMAreaHash::LookupLocked(id);
442 	if (area != NULL && area->address_space != addressSpace)
443 		area = NULL;
444 
445 	VMAreaHash::ReadUnlock();
446 
447 	return area;
448 }
449 
450 
451 static status_t
452 allocate_area_page_protections(VMArea* area)
453 {
454 	// In the page protections we store only the three user protections,
455 	// so we use 4 bits per page.
456 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
457 	area->page_protections = (uint8*)malloc_etc(bytes,
458 		HEAP_DONT_LOCK_KERNEL_SPACE);
459 	if (area->page_protections == NULL)
460 		return B_NO_MEMORY;
461 
462 	// init the page protections for all pages to that of the area
463 	uint32 areaProtection = area->protection
464 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
465 	memset(area->page_protections, areaProtection | (areaProtection << 4),
466 		bytes);
467 	return B_OK;
468 }
469 
470 
471 static inline void
472 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
473 {
474 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
475 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
476 	uint8& entry = area->page_protections[pageIndex / 2];
477 	if (pageIndex % 2 == 0)
478 		entry = (entry & 0xf0) | protection;
479 	else
480 		entry = (entry & 0x0f) | (protection << 4);
481 }
482 
483 
484 static inline uint32
485 get_area_page_protection(VMArea* area, addr_t pageAddress)
486 {
487 	if (area->page_protections == NULL)
488 		return area->protection;
489 
490 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
491 	uint32 protection = area->page_protections[pageIndex / 2];
492 	if (pageIndex % 2 == 0)
493 		protection &= 0x0f;
494 	else
495 		protection >>= 4;
496 
497 	// If this is a kernel area we translate the user flags to kernel flags.
498 	if (area->address_space == VMAddressSpace::Kernel()) {
499 		uint32 kernelProtection = 0;
500 		if ((protection & B_READ_AREA) != 0)
501 			kernelProtection |= B_KERNEL_READ_AREA;
502 		if ((protection & B_WRITE_AREA) != 0)
503 			kernelProtection |= B_KERNEL_WRITE_AREA;
504 
505 		return kernelProtection;
506 	}
507 
508 	return protection | B_KERNEL_READ_AREA
509 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
510 }
511 
512 
513 /*!	The caller must have reserved enough pages the translation map
514 	implementation might need to map this page.
515 	The page's cache must be locked.
516 */
517 static status_t
518 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
519 	vm_page_reservation* reservation)
520 {
521 	VMTranslationMap* map = area->address_space->TranslationMap();
522 
523 	bool wasMapped = page->IsMapped();
524 
525 	if (area->wiring == B_NO_LOCK) {
526 		DEBUG_PAGE_ACCESS_CHECK(page);
527 
528 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
529 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
530 			gPageMappingsObjectCache,
531 			CACHE_DONT_WAIT_FOR_MEMORY
532 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
533 		if (mapping == NULL)
534 			return B_NO_MEMORY;
535 
536 		mapping->page = page;
537 		mapping->area = area;
538 
539 		map->Lock();
540 
541 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
542 			area->MemoryType(), reservation);
543 
544 		// insert mapping into lists
545 		if (!page->IsMapped())
546 			atomic_add(&gMappedPagesCount, 1);
547 
548 		page->mappings.Add(mapping);
549 		area->mappings.Add(mapping);
550 
551 		map->Unlock();
552 	} else {
553 		DEBUG_PAGE_ACCESS_CHECK(page);
554 
555 		map->Lock();
556 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
557 			area->MemoryType(), reservation);
558 		map->Unlock();
559 
560 		increment_page_wired_count(page);
561 	}
562 
563 	if (!wasMapped) {
564 		// The page is mapped now, so we must not remain in the cached queue.
565 		// It also makes sense to move it from the inactive to the active, since
566 		// otherwise the page daemon wouldn't come to keep track of it (in idle
567 		// mode) -- if the page isn't touched, it will be deactivated after a
568 		// full iteration through the queue at the latest.
569 		if (page->State() == PAGE_STATE_CACHED
570 				|| page->State() == PAGE_STATE_INACTIVE) {
571 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
572 		}
573 	}
574 
575 	return B_OK;
576 }
577 
578 
579 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
580 	page's cache.
581 */
582 static inline bool
583 unmap_page(VMArea* area, addr_t virtualAddress)
584 {
585 	return area->address_space->TranslationMap()->UnmapPage(area,
586 		virtualAddress, true);
587 }
588 
589 
590 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
591 	mapped pages' caches.
592 */
593 static inline void
594 unmap_pages(VMArea* area, addr_t base, size_t size)
595 {
596 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
597 }
598 
599 
600 /*!	Cuts a piece out of an area. If the given cut range covers the complete
601 	area, it is deleted. If it covers the beginning or the end, the area is
602 	resized accordingly. If the range covers some part in the middle of the
603 	area, it is split in two; in this case the second area is returned via
604 	\a _secondArea (the variable is left untouched in the other cases).
605 	The address space must be write locked.
606 	The caller must ensure that no part of the given range is wired.
607 */
608 static status_t
609 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
610 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
611 {
612 	// Does the cut range intersect with the area at all?
613 	addr_t areaLast = area->Base() + (area->Size() - 1);
614 	if (area->Base() > lastAddress || areaLast < address)
615 		return B_OK;
616 
617 	// Is the area fully covered?
618 	if (area->Base() >= address && areaLast <= lastAddress) {
619 		delete_area(addressSpace, area, false);
620 		return B_OK;
621 	}
622 
623 	int priority;
624 	uint32 allocationFlags;
625 	if (addressSpace == VMAddressSpace::Kernel()) {
626 		priority = VM_PRIORITY_SYSTEM;
627 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
628 			| HEAP_DONT_LOCK_KERNEL_SPACE;
629 	} else {
630 		priority = VM_PRIORITY_USER;
631 		allocationFlags = 0;
632 	}
633 
634 	VMCache* cache = vm_area_get_locked_cache(area);
635 	VMCacheChainLocker cacheChainLocker(cache);
636 	cacheChainLocker.LockAllSourceCaches();
637 
638 	// Cut the end only?
639 	if (areaLast <= lastAddress) {
640 		size_t oldSize = area->Size();
641 		size_t newSize = address - area->Base();
642 
643 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
644 			allocationFlags);
645 		if (error != B_OK)
646 			return error;
647 
648 		// unmap pages
649 		unmap_pages(area, address, oldSize - newSize);
650 
651 		// If no one else uses the area's cache, we can resize it, too.
652 		if (cache->areas == area && area->cache_next == NULL
653 			&& cache->consumers.IsEmpty()
654 			&& cache->type == CACHE_TYPE_RAM) {
655 			// Since VMCache::Resize() can temporarily drop the lock, we must
656 			// unlock all lower caches to prevent locking order inversion.
657 			cacheChainLocker.Unlock(cache);
658 			cache->Resize(cache->virtual_base + newSize, priority);
659 			cache->ReleaseRefAndUnlock();
660 		}
661 
662 		return B_OK;
663 	}
664 
665 	// Cut the beginning only?
666 	if (area->Base() >= address) {
667 		addr_t oldBase = area->Base();
668 		addr_t newBase = lastAddress + 1;
669 		size_t newSize = areaLast - lastAddress;
670 
671 		// unmap pages
672 		unmap_pages(area, oldBase, newBase - oldBase);
673 
674 		// resize the area
675 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
676 			allocationFlags);
677 		if (error != B_OK)
678 			return error;
679 
680 		// TODO: If no one else uses the area's cache, we should resize it, too!
681 
682 		area->cache_offset += newBase - oldBase;
683 
684 		return B_OK;
685 	}
686 
687 	// The tough part -- cut a piece out of the middle of the area.
688 	// We do that by shrinking the area to the begin section and creating a
689 	// new area for the end section.
690 
691 	addr_t firstNewSize = address - area->Base();
692 	addr_t secondBase = lastAddress + 1;
693 	addr_t secondSize = areaLast - lastAddress;
694 
695 	// unmap pages
696 	unmap_pages(area, address, area->Size() - firstNewSize);
697 
698 	// resize the area
699 	addr_t oldSize = area->Size();
700 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
701 		allocationFlags);
702 	if (error != B_OK)
703 		return error;
704 
705 	// TODO: If no one else uses the area's cache, we might want to create a
706 	// new cache for the second area, transfer the concerned pages from the
707 	// first cache to it and resize the first cache.
708 
709 	// map the second area
710 	virtual_address_restrictions addressRestrictions = {};
711 	addressRestrictions.address = (void*)secondBase;
712 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
713 	VMArea* secondArea;
714 	error = map_backing_store(addressSpace, cache,
715 		area->cache_offset + (secondBase - area->Base()), area->name,
716 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
717 		&addressRestrictions, kernel, &secondArea, NULL);
718 	if (error != B_OK) {
719 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
720 		return error;
721 	}
722 
723 	// We need a cache reference for the new area.
724 	cache->AcquireRefLocked();
725 
726 	if (_secondArea != NULL)
727 		*_secondArea = secondArea;
728 
729 	return B_OK;
730 }
731 
732 
733 /*!	Deletes all areas in the given address range.
734 	The address space must be write-locked.
735 	The caller must ensure that no part of the given range is wired.
736 */
737 static status_t
738 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
739 	bool kernel)
740 {
741 	size = PAGE_ALIGN(size);
742 	addr_t lastAddress = address + (size - 1);
743 
744 	// Check, whether the caller is allowed to modify the concerned areas.
745 	if (!kernel) {
746 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
747 				VMArea* area = it.Next();) {
748 			addr_t areaLast = area->Base() + (area->Size() - 1);
749 			if (area->Base() < lastAddress && address < areaLast) {
750 				if ((area->protection & B_KERNEL_AREA) != 0)
751 					return B_NOT_ALLOWED;
752 			}
753 		}
754 	}
755 
756 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
757 			VMArea* area = it.Next();) {
758 		addr_t areaLast = area->Base() + (area->Size() - 1);
759 		if (area->Base() < lastAddress && address < areaLast) {
760 			status_t error = cut_area(addressSpace, area, address,
761 				lastAddress, NULL, kernel);
762 			if (error != B_OK)
763 				return error;
764 				// Failing after already messing with areas is ugly, but we
765 				// can't do anything about it.
766 		}
767 	}
768 
769 	return B_OK;
770 }
771 
772 
773 /*! You need to hold the lock of the cache and the write lock of the address
774 	space when calling this function.
775 	Note, that in case of error your cache will be temporarily unlocked.
776 	If \a addressSpec is \c B_EXACT_ADDRESS and the
777 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
778 	that no part of the specified address range (base \c *_virtualAddress, size
779 	\a size) is wired.
780 */
781 static status_t
782 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
783 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
784 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
785 	bool kernel, VMArea** _area, void** _virtualAddress)
786 {
787 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%Lx, "
788 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
789 		"'%s'\n", addressSpace, cache, addressRestrictions->address, offset,
790 		size, addressRestrictions->address_specification, wiring, protection,
791 		_area, areaName));
792 	cache->AssertLocked();
793 
794 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
795 		| HEAP_DONT_LOCK_KERNEL_SPACE;
796 	int priority;
797 	if (addressSpace != VMAddressSpace::Kernel()) {
798 		priority = VM_PRIORITY_USER;
799 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
800 		priority = VM_PRIORITY_VIP;
801 		allocationFlags |= HEAP_PRIORITY_VIP;
802 	} else
803 		priority = VM_PRIORITY_SYSTEM;
804 
805 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
806 		allocationFlags);
807 	if (area == NULL)
808 		return B_NO_MEMORY;
809 
810 	status_t status;
811 
812 	// if this is a private map, we need to create a new cache
813 	// to handle the private copies of pages as they are written to
814 	VMCache* sourceCache = cache;
815 	if (mapping == REGION_PRIVATE_MAP) {
816 		VMCache* newCache;
817 
818 		// create an anonymous cache
819 		bool isStack = (protection & B_STACK_AREA) != 0;
820 		status = VMCacheFactory::CreateAnonymousCache(newCache,
821 			isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0,
822 			isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER);
823 		if (status != B_OK)
824 			goto err1;
825 
826 		newCache->Lock();
827 		newCache->temporary = 1;
828 		newCache->virtual_base = offset;
829 		newCache->virtual_end = offset + size;
830 
831 		cache->AddConsumer(newCache);
832 
833 		cache = newCache;
834 	}
835 
836 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
837 		status = cache->SetMinimalCommitment(size, priority);
838 		if (status != B_OK)
839 			goto err2;
840 	}
841 
842 	// check to see if this address space has entered DELETE state
843 	if (addressSpace->IsBeingDeleted()) {
844 		// okay, someone is trying to delete this address space now, so we can't
845 		// insert the area, so back out
846 		status = B_BAD_TEAM_ID;
847 		goto err2;
848 	}
849 
850 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
851 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
852 		status = unmap_address_range(addressSpace,
853 			(addr_t)addressRestrictions->address, size, kernel);
854 		if (status != B_OK)
855 			goto err2;
856 	}
857 
858 	status = addressSpace->InsertArea(area, size, addressRestrictions,
859 		allocationFlags, _virtualAddress);
860 	if (status != B_OK) {
861 		// TODO: wait and try again once this is working in the backend
862 #if 0
863 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
864 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
865 				0, 0);
866 		}
867 #endif
868 		goto err2;
869 	}
870 
871 	// attach the cache to the area
872 	area->cache = cache;
873 	area->cache_offset = offset;
874 
875 	// point the cache back to the area
876 	cache->InsertAreaLocked(area);
877 	if (mapping == REGION_PRIVATE_MAP)
878 		cache->Unlock();
879 
880 	// insert the area in the global area hash table
881 	VMAreaHash::Insert(area);
882 
883 	// grab a ref to the address space (the area holds this)
884 	addressSpace->Get();
885 
886 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
887 //		cache, sourceCache, areaName, area);
888 
889 	*_area = area;
890 	return B_OK;
891 
892 err2:
893 	if (mapping == REGION_PRIVATE_MAP) {
894 		// We created this cache, so we must delete it again. Note, that we
895 		// need to temporarily unlock the source cache or we'll otherwise
896 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
897 		sourceCache->Unlock();
898 		cache->ReleaseRefAndUnlock();
899 		sourceCache->Lock();
900 	}
901 err1:
902 	addressSpace->DeleteArea(area, allocationFlags);
903 	return status;
904 }
905 
906 
907 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
908 	  locker1, locker2).
909 */
910 template<typename LockerType1, typename LockerType2>
911 static inline bool
912 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
913 {
914 	area->cache->AssertLocked();
915 
916 	VMAreaUnwiredWaiter waiter;
917 	if (!area->AddWaiterIfWired(&waiter))
918 		return false;
919 
920 	// unlock everything and wait
921 	if (locker1 != NULL)
922 		locker1->Unlock();
923 	if (locker2 != NULL)
924 		locker2->Unlock();
925 
926 	waiter.waitEntry.Wait();
927 
928 	return true;
929 }
930 
931 
932 /*!	Checks whether the given area has any wired ranges intersecting with the
933 	specified range and waits, if so.
934 
935 	When it has to wait, the function calls \c Unlock() on both \a locker1
936 	and \a locker2, if given.
937 	The area's top cache must be locked and must be unlocked as a side effect
938 	of calling \c Unlock() on either \a locker1 or \a locker2.
939 
940 	If the function does not have to wait it does not modify or unlock any
941 	object.
942 
943 	\param area The area to be checked.
944 	\param base The base address of the range to check.
945 	\param size The size of the address range to check.
946 	\param locker1 An object to be unlocked when before starting to wait (may
947 		be \c NULL).
948 	\param locker2 An object to be unlocked when before starting to wait (may
949 		be \c NULL).
950 	\return \c true, if the function had to wait, \c false otherwise.
951 */
952 template<typename LockerType1, typename LockerType2>
953 static inline bool
954 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
955 	LockerType1* locker1, LockerType2* locker2)
956 {
957 	area->cache->AssertLocked();
958 
959 	VMAreaUnwiredWaiter waiter;
960 	if (!area->AddWaiterIfWired(&waiter, base, size))
961 		return false;
962 
963 	// unlock everything and wait
964 	if (locker1 != NULL)
965 		locker1->Unlock();
966 	if (locker2 != NULL)
967 		locker2->Unlock();
968 
969 	waiter.waitEntry.Wait();
970 
971 	return true;
972 }
973 
974 
975 /*!	Checks whether the given address space has any wired ranges intersecting
976 	with the specified range and waits, if so.
977 
978 	Similar to wait_if_area_range_is_wired(), with the following differences:
979 	- All areas intersecting with the range are checked (respectively all until
980 	  one is found that contains a wired range intersecting with the given
981 	  range).
982 	- The given address space must at least be read-locked and must be unlocked
983 	  when \c Unlock() is called on \a locker.
984 	- None of the areas' caches are allowed to be locked.
985 */
986 template<typename LockerType>
987 static inline bool
988 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
989 	size_t size, LockerType* locker)
990 {
991 	addr_t end = base + size - 1;
992 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
993 			VMArea* area = it.Next();) {
994 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
995 		if (area->Base() > end)
996 			return false;
997 
998 		if (base >= area->Base() + area->Size() - 1)
999 			continue;
1000 
1001 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1002 
1003 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1004 			return true;
1005 	}
1006 
1007 	return false;
1008 }
1009 
1010 
1011 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1012 	It must be called in a situation where the kernel address space may be
1013 	locked.
1014 */
1015 status_t
1016 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1017 {
1018 	AddressSpaceReadLocker locker;
1019 	VMArea* area;
1020 	status_t status = locker.SetFromArea(id, area);
1021 	if (status != B_OK)
1022 		return status;
1023 
1024 	if (area->page_protections == NULL) {
1025 		status = allocate_area_page_protections(area);
1026 		if (status != B_OK)
1027 			return status;
1028 	}
1029 
1030 	*cookie = (void*)area;
1031 	return B_OK;
1032 }
1033 
1034 
1035 /*!	This is a debug helper function that can only be used with very specific
1036 	use cases.
1037 	Sets protection for the given address range to the protection specified.
1038 	If \a protection is 0 then the involved pages will be marked non-present
1039 	in the translation map to cause a fault on access. The pages aren't
1040 	actually unmapped however so that they can be marked present again with
1041 	additional calls to this function. For this to work the area must be
1042 	fully locked in memory so that the pages aren't otherwise touched.
1043 	This function does not lock the kernel address space and needs to be
1044 	supplied with a \a cookie retrieved from a successful call to
1045 	vm_prepare_kernel_area_debug_protection().
1046 */
1047 status_t
1048 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1049 	uint32 protection)
1050 {
1051 	// check address range
1052 	addr_t address = (addr_t)_address;
1053 	size = PAGE_ALIGN(size);
1054 
1055 	if ((address % B_PAGE_SIZE) != 0
1056 		|| (addr_t)address + size < (addr_t)address
1057 		|| !IS_KERNEL_ADDRESS(address)
1058 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1059 		return B_BAD_VALUE;
1060 	}
1061 
1062 	// Translate the kernel protection to user protection as we only store that.
1063 	if ((protection & B_KERNEL_READ_AREA) != 0)
1064 		protection |= B_READ_AREA;
1065 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1066 		protection |= B_WRITE_AREA;
1067 
1068 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1069 	VMTranslationMap* map = addressSpace->TranslationMap();
1070 	VMArea* area = (VMArea*)cookie;
1071 
1072 	addr_t offset = address - area->Base();
1073 	if (area->Size() - offset < size) {
1074 		panic("protect range not fully within supplied area");
1075 		return B_BAD_VALUE;
1076 	}
1077 
1078 	if (area->page_protections == NULL) {
1079 		panic("area has no page protections");
1080 		return B_BAD_VALUE;
1081 	}
1082 
1083 	// Invalidate the mapping entries so any access to them will fault or
1084 	// restore the mapping entries unchanged so that lookup will success again.
1085 	map->Lock();
1086 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1087 	map->Unlock();
1088 
1089 	// And set the proper page protections so that the fault case will actually
1090 	// fail and not simply try to map a new page.
1091 	for (addr_t pageAddress = address; pageAddress < address + size;
1092 			pageAddress += B_PAGE_SIZE) {
1093 		set_area_page_protection(area, pageAddress, protection);
1094 	}
1095 
1096 	return B_OK;
1097 }
1098 
1099 
1100 status_t
1101 vm_block_address_range(const char* name, void* address, addr_t size)
1102 {
1103 	if (!arch_vm_supports_protection(0))
1104 		return B_NOT_SUPPORTED;
1105 
1106 	AddressSpaceWriteLocker locker;
1107 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1108 	if (status != B_OK)
1109 		return status;
1110 
1111 	VMAddressSpace* addressSpace = locker.AddressSpace();
1112 
1113 	// create an anonymous cache
1114 	VMCache* cache;
1115 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1116 		VM_PRIORITY_SYSTEM);
1117 	if (status != B_OK)
1118 		return status;
1119 
1120 	cache->temporary = 1;
1121 	cache->virtual_end = size;
1122 	cache->Lock();
1123 
1124 	VMArea* area;
1125 	virtual_address_restrictions addressRestrictions = {};
1126 	addressRestrictions.address = address;
1127 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1128 	status = map_backing_store(addressSpace, cache, 0, name, size,
1129 		B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0,
1130 		&addressRestrictions, true, &area, NULL);
1131 	if (status != B_OK) {
1132 		cache->ReleaseRefAndUnlock();
1133 		return status;
1134 	}
1135 
1136 	cache->Unlock();
1137 	area->cache_type = CACHE_TYPE_RAM;
1138 	return area->id;
1139 }
1140 
1141 
1142 status_t
1143 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1144 {
1145 	AddressSpaceWriteLocker locker(team);
1146 	if (!locker.IsLocked())
1147 		return B_BAD_TEAM_ID;
1148 
1149 	VMAddressSpace* addressSpace = locker.AddressSpace();
1150 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1151 		addressSpace == VMAddressSpace::Kernel()
1152 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1153 }
1154 
1155 
1156 status_t
1157 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1158 	addr_t size, uint32 flags)
1159 {
1160 	if (size == 0)
1161 		return B_BAD_VALUE;
1162 
1163 	AddressSpaceWriteLocker locker(team);
1164 	if (!locker.IsLocked())
1165 		return B_BAD_TEAM_ID;
1166 
1167 	virtual_address_restrictions addressRestrictions = {};
1168 	addressRestrictions.address = *_address;
1169 	addressRestrictions.address_specification = addressSpec;
1170 	VMAddressSpace* addressSpace = locker.AddressSpace();
1171 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1172 		addressSpace == VMAddressSpace::Kernel()
1173 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1174 		_address);
1175 }
1176 
1177 
1178 area_id
1179 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1180 	uint32 wiring, uint32 protection, uint32 flags,
1181 	const virtual_address_restrictions* virtualAddressRestrictions,
1182 	const physical_address_restrictions* physicalAddressRestrictions,
1183 	bool kernel, void** _address)
1184 {
1185 	VMArea* area;
1186 	VMCache* cache;
1187 	vm_page* page = NULL;
1188 	bool isStack = (protection & B_STACK_AREA) != 0;
1189 	page_num_t guardPages;
1190 	bool canOvercommit = false;
1191 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1192 		? VM_PAGE_ALLOC_CLEAR : 0;
1193 
1194 	TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size));
1195 
1196 	size = PAGE_ALIGN(size);
1197 
1198 	if (size == 0)
1199 		return B_BAD_VALUE;
1200 	if (!arch_vm_supports_protection(protection))
1201 		return B_NOT_SUPPORTED;
1202 
1203 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1204 		canOvercommit = true;
1205 
1206 #ifdef DEBUG_KERNEL_STACKS
1207 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1208 		isStack = true;
1209 #endif
1210 
1211 	// check parameters
1212 	switch (virtualAddressRestrictions->address_specification) {
1213 		case B_ANY_ADDRESS:
1214 		case B_EXACT_ADDRESS:
1215 		case B_BASE_ADDRESS:
1216 		case B_ANY_KERNEL_ADDRESS:
1217 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1218 			break;
1219 
1220 		default:
1221 			return B_BAD_VALUE;
1222 	}
1223 
1224 	// If low or high physical address restrictions are given, we force
1225 	// B_CONTIGUOUS wiring, since only then we'll use
1226 	// vm_page_allocate_page_run() which deals with those restrictions.
1227 	if (physicalAddressRestrictions->low_address != 0
1228 		|| physicalAddressRestrictions->high_address != 0) {
1229 		wiring = B_CONTIGUOUS;
1230 	}
1231 
1232 	physical_address_restrictions stackPhysicalRestrictions;
1233 	bool doReserveMemory = false;
1234 	switch (wiring) {
1235 		case B_NO_LOCK:
1236 			break;
1237 		case B_FULL_LOCK:
1238 		case B_LAZY_LOCK:
1239 		case B_CONTIGUOUS:
1240 			doReserveMemory = true;
1241 			break;
1242 		case B_ALREADY_WIRED:
1243 			break;
1244 		case B_LOMEM:
1245 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1246 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1247 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1248 			wiring = B_CONTIGUOUS;
1249 			doReserveMemory = true;
1250 			break;
1251 		case B_32_BIT_FULL_LOCK:
1252 			if (B_HAIKU_PHYSICAL_BITS <= 32
1253 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1254 				wiring = B_FULL_LOCK;
1255 				doReserveMemory = true;
1256 				break;
1257 			}
1258 			// TODO: We don't really support this mode efficiently. Just fall
1259 			// through for now ...
1260 		case B_32_BIT_CONTIGUOUS:
1261 			#if B_HAIKU_PHYSICAL_BITS > 32
1262 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1263 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1264 					stackPhysicalRestrictions.high_address
1265 						= (phys_addr_t)1 << 32;
1266 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1267 				}
1268 			#endif
1269 			wiring = B_CONTIGUOUS;
1270 			doReserveMemory = true;
1271 			break;
1272 		default:
1273 			return B_BAD_VALUE;
1274 	}
1275 
1276 	// Optimization: For a single-page contiguous allocation without low/high
1277 	// memory restriction B_FULL_LOCK wiring suffices.
1278 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1279 		&& physicalAddressRestrictions->low_address == 0
1280 		&& physicalAddressRestrictions->high_address == 0) {
1281 		wiring = B_FULL_LOCK;
1282 	}
1283 
1284 	// For full lock or contiguous areas we're also going to map the pages and
1285 	// thus need to reserve pages for the mapping backend upfront.
1286 	addr_t reservedMapPages = 0;
1287 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1288 		AddressSpaceWriteLocker locker;
1289 		status_t status = locker.SetTo(team);
1290 		if (status != B_OK)
1291 			return status;
1292 
1293 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1294 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1295 	}
1296 
1297 	int priority;
1298 	if (team != VMAddressSpace::KernelID())
1299 		priority = VM_PRIORITY_USER;
1300 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1301 		priority = VM_PRIORITY_VIP;
1302 	else
1303 		priority = VM_PRIORITY_SYSTEM;
1304 
1305 	// Reserve memory before acquiring the address space lock. This reduces the
1306 	// chances of failure, since while holding the write lock to the address
1307 	// space (if it is the kernel address space that is), the low memory handler
1308 	// won't be able to free anything for us.
1309 	addr_t reservedMemory = 0;
1310 	if (doReserveMemory) {
1311 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1312 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1313 			return B_NO_MEMORY;
1314 		reservedMemory = size;
1315 		// TODO: We don't reserve the memory for the pages for the page
1316 		// directories/tables. We actually need to do since we currently don't
1317 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1318 		// there are actually less physical pages than there should be, which
1319 		// can get the VM into trouble in low memory situations.
1320 	}
1321 
1322 	AddressSpaceWriteLocker locker;
1323 	VMAddressSpace* addressSpace;
1324 	status_t status;
1325 
1326 	// For full lock areas reserve the pages before locking the address
1327 	// space. E.g. block caches can't release their memory while we hold the
1328 	// address space lock.
1329 	page_num_t reservedPages = reservedMapPages;
1330 	if (wiring == B_FULL_LOCK)
1331 		reservedPages += size / B_PAGE_SIZE;
1332 
1333 	vm_page_reservation reservation;
1334 	if (reservedPages > 0) {
1335 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1336 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1337 					priority)) {
1338 				reservedPages = 0;
1339 				status = B_WOULD_BLOCK;
1340 				goto err0;
1341 			}
1342 		} else
1343 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1344 	}
1345 
1346 	if (wiring == B_CONTIGUOUS) {
1347 		// we try to allocate the page run here upfront as this may easily
1348 		// fail for obvious reasons
1349 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1350 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1351 		if (page == NULL) {
1352 			status = B_NO_MEMORY;
1353 			goto err0;
1354 		}
1355 	}
1356 
1357 	// Lock the address space and, if B_EXACT_ADDRESS and
1358 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1359 	// is not wired.
1360 	do {
1361 		status = locker.SetTo(team);
1362 		if (status != B_OK)
1363 			goto err1;
1364 
1365 		addressSpace = locker.AddressSpace();
1366 	} while (virtualAddressRestrictions->address_specification
1367 			== B_EXACT_ADDRESS
1368 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1369 		&& wait_if_address_range_is_wired(addressSpace,
1370 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1371 
1372 	// create an anonymous cache
1373 	// if it's a stack, make sure that two pages are available at least
1374 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1375 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1376 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1377 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1378 		wiring == B_NO_LOCK, priority);
1379 	if (status != B_OK)
1380 		goto err1;
1381 
1382 	cache->temporary = 1;
1383 	cache->virtual_end = size;
1384 	cache->committed_size = reservedMemory;
1385 		// TODO: This should be done via a method.
1386 	reservedMemory = 0;
1387 
1388 	cache->Lock();
1389 
1390 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1391 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1392 		kernel, &area, _address);
1393 
1394 	if (status != B_OK) {
1395 		cache->ReleaseRefAndUnlock();
1396 		goto err1;
1397 	}
1398 
1399 	locker.DegradeToReadLock();
1400 
1401 	switch (wiring) {
1402 		case B_NO_LOCK:
1403 		case B_LAZY_LOCK:
1404 			// do nothing - the pages are mapped in as needed
1405 			break;
1406 
1407 		case B_FULL_LOCK:
1408 		{
1409 			// Allocate and map all pages for this area
1410 
1411 			off_t offset = 0;
1412 			for (addr_t address = area->Base();
1413 					address < area->Base() + (area->Size() - 1);
1414 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1415 #ifdef DEBUG_KERNEL_STACKS
1416 #	ifdef STACK_GROWS_DOWNWARDS
1417 				if (isStack && address < area->Base()
1418 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1419 #	else
1420 				if (isStack && address >= area->Base() + area->Size()
1421 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1422 #	endif
1423 					continue;
1424 #endif
1425 				vm_page* page = vm_page_allocate_page(&reservation,
1426 					PAGE_STATE_WIRED | pageAllocFlags);
1427 				cache->InsertPage(page, offset);
1428 				map_page(area, page, address, protection, &reservation);
1429 
1430 				DEBUG_PAGE_ACCESS_END(page);
1431 			}
1432 
1433 			break;
1434 		}
1435 
1436 		case B_ALREADY_WIRED:
1437 		{
1438 			// The pages should already be mapped. This is only really useful
1439 			// during boot time. Find the appropriate vm_page objects and stick
1440 			// them in the cache object.
1441 			VMTranslationMap* map = addressSpace->TranslationMap();
1442 			off_t offset = 0;
1443 
1444 			if (!gKernelStartup)
1445 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1446 
1447 			map->Lock();
1448 
1449 			for (addr_t virtualAddress = area->Base();
1450 					virtualAddress < area->Base() + (area->Size() - 1);
1451 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1452 				phys_addr_t physicalAddress;
1453 				uint32 flags;
1454 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1455 				if (status < B_OK) {
1456 					panic("looking up mapping failed for va 0x%lx\n",
1457 						virtualAddress);
1458 				}
1459 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1460 				if (page == NULL) {
1461 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1462 						"\n", physicalAddress);
1463 				}
1464 
1465 				DEBUG_PAGE_ACCESS_START(page);
1466 
1467 				cache->InsertPage(page, offset);
1468 				increment_page_wired_count(page);
1469 				vm_page_set_state(page, PAGE_STATE_WIRED);
1470 				page->busy = false;
1471 
1472 				DEBUG_PAGE_ACCESS_END(page);
1473 			}
1474 
1475 			map->Unlock();
1476 			break;
1477 		}
1478 
1479 		case B_CONTIGUOUS:
1480 		{
1481 			// We have already allocated our continuous pages run, so we can now
1482 			// just map them in the address space
1483 			VMTranslationMap* map = addressSpace->TranslationMap();
1484 			phys_addr_t physicalAddress
1485 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1486 			addr_t virtualAddress = area->Base();
1487 			off_t offset = 0;
1488 
1489 			map->Lock();
1490 
1491 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1492 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1493 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1494 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1495 				if (page == NULL)
1496 					panic("couldn't lookup physical page just allocated\n");
1497 
1498 				status = map->Map(virtualAddress, physicalAddress, protection,
1499 					area->MemoryType(), &reservation);
1500 				if (status < B_OK)
1501 					panic("couldn't map physical page in page run\n");
1502 
1503 				cache->InsertPage(page, offset);
1504 				increment_page_wired_count(page);
1505 
1506 				DEBUG_PAGE_ACCESS_END(page);
1507 			}
1508 
1509 			map->Unlock();
1510 			break;
1511 		}
1512 
1513 		default:
1514 			break;
1515 	}
1516 
1517 	cache->Unlock();
1518 
1519 	if (reservedPages > 0)
1520 		vm_page_unreserve_pages(&reservation);
1521 
1522 	TRACE(("vm_create_anonymous_area: done\n"));
1523 
1524 	area->cache_type = CACHE_TYPE_RAM;
1525 	return area->id;
1526 
1527 err1:
1528 	if (wiring == B_CONTIGUOUS) {
1529 		// we had reserved the area space upfront...
1530 		phys_addr_t pageNumber = page->physical_page_number;
1531 		int32 i;
1532 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1533 			page = vm_lookup_page(pageNumber);
1534 			if (page == NULL)
1535 				panic("couldn't lookup physical page just allocated\n");
1536 
1537 			vm_page_set_state(page, PAGE_STATE_FREE);
1538 		}
1539 	}
1540 
1541 err0:
1542 	if (reservedPages > 0)
1543 		vm_page_unreserve_pages(&reservation);
1544 	if (reservedMemory > 0)
1545 		vm_unreserve_memory(reservedMemory);
1546 
1547 	return status;
1548 }
1549 
1550 
1551 area_id
1552 vm_map_physical_memory(team_id team, const char* name, void** _address,
1553 	uint32 addressSpec, addr_t size, uint32 protection,
1554 	phys_addr_t physicalAddress, bool alreadyWired)
1555 {
1556 	VMArea* area;
1557 	VMCache* cache;
1558 	addr_t mapOffset;
1559 
1560 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1561 		"spec = %ld, size = %lu, protection = %ld, phys = %#" B_PRIxPHYSADDR
1562 		")\n", team, name, *_address, addressSpec, size, protection,
1563 		physicalAddress));
1564 
1565 	if (!arch_vm_supports_protection(protection))
1566 		return B_NOT_SUPPORTED;
1567 
1568 	AddressSpaceWriteLocker locker(team);
1569 	if (!locker.IsLocked())
1570 		return B_BAD_TEAM_ID;
1571 
1572 	// if the physical address is somewhat inside a page,
1573 	// move the actual area down to align on a page boundary
1574 	mapOffset = physicalAddress % B_PAGE_SIZE;
1575 	size += mapOffset;
1576 	physicalAddress -= mapOffset;
1577 
1578 	size = PAGE_ALIGN(size);
1579 
1580 	// create a device cache
1581 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1582 	if (status != B_OK)
1583 		return status;
1584 
1585 	cache->virtual_end = size;
1586 
1587 	cache->Lock();
1588 
1589 	virtual_address_restrictions addressRestrictions = {};
1590 	addressRestrictions.address = *_address;
1591 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1592 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1593 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1594 		true, &area, _address);
1595 
1596 	if (status < B_OK)
1597 		cache->ReleaseRefLocked();
1598 
1599 	cache->Unlock();
1600 
1601 	if (status == B_OK) {
1602 		// set requested memory type -- use uncached, if not given
1603 		uint32 memoryType = addressSpec & B_MTR_MASK;
1604 		if (memoryType == 0)
1605 			memoryType = B_MTR_UC;
1606 
1607 		area->SetMemoryType(memoryType);
1608 
1609 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1610 		if (status != B_OK)
1611 			delete_area(locker.AddressSpace(), area, false);
1612 	}
1613 
1614 	if (status != B_OK)
1615 		return status;
1616 
1617 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1618 
1619 	if (alreadyWired) {
1620 		// The area is already mapped, but possibly not with the right
1621 		// memory type.
1622 		map->Lock();
1623 		map->ProtectArea(area, area->protection);
1624 		map->Unlock();
1625 	} else {
1626 		// Map the area completely.
1627 
1628 		// reserve pages needed for the mapping
1629 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1630 			area->Base() + (size - 1));
1631 		vm_page_reservation reservation;
1632 		vm_page_reserve_pages(&reservation, reservePages,
1633 			team == VMAddressSpace::KernelID()
1634 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1635 
1636 		map->Lock();
1637 
1638 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1639 			map->Map(area->Base() + offset, physicalAddress + offset,
1640 				protection, area->MemoryType(), &reservation);
1641 		}
1642 
1643 		map->Unlock();
1644 
1645 		vm_page_unreserve_pages(&reservation);
1646 	}
1647 
1648 	// modify the pointer returned to be offset back into the new area
1649 	// the same way the physical address in was offset
1650 	*_address = (void*)((addr_t)*_address + mapOffset);
1651 
1652 	area->cache_type = CACHE_TYPE_DEVICE;
1653 	return area->id;
1654 }
1655 
1656 
1657 /*!	Don't use!
1658 	TODO: This function was introduced to map physical page vecs to
1659 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1660 	use a device cache and does not track vm_page::wired_count!
1661 */
1662 area_id
1663 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1664 	uint32 addressSpec, addr_t* _size, uint32 protection,
1665 	struct generic_io_vec* vecs, uint32 vecCount)
1666 {
1667 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
1668 		"spec = %ld, _size = %p, protection = %ld, vecs = %p, "
1669 		"vecCount = %ld)\n", team, name, *_address, addressSpec, _size,
1670 		protection, vecs, vecCount));
1671 
1672 	if (!arch_vm_supports_protection(protection)
1673 		|| (addressSpec & B_MTR_MASK) != 0) {
1674 		return B_NOT_SUPPORTED;
1675 	}
1676 
1677 	AddressSpaceWriteLocker locker(team);
1678 	if (!locker.IsLocked())
1679 		return B_BAD_TEAM_ID;
1680 
1681 	if (vecCount == 0)
1682 		return B_BAD_VALUE;
1683 
1684 	addr_t size = 0;
1685 	for (uint32 i = 0; i < vecCount; i++) {
1686 		if (vecs[i].base % B_PAGE_SIZE != 0
1687 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1688 			return B_BAD_VALUE;
1689 		}
1690 
1691 		size += vecs[i].length;
1692 	}
1693 
1694 	// create a device cache
1695 	VMCache* cache;
1696 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1697 	if (result != B_OK)
1698 		return result;
1699 
1700 	cache->virtual_end = size;
1701 
1702 	cache->Lock();
1703 
1704 	VMArea* area;
1705 	virtual_address_restrictions addressRestrictions = {};
1706 	addressRestrictions.address = *_address;
1707 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1708 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1709 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1710 		&addressRestrictions, true, &area, _address);
1711 
1712 	if (result != B_OK)
1713 		cache->ReleaseRefLocked();
1714 
1715 	cache->Unlock();
1716 
1717 	if (result != B_OK)
1718 		return result;
1719 
1720 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1721 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1722 		area->Base() + (size - 1));
1723 
1724 	vm_page_reservation reservation;
1725 	vm_page_reserve_pages(&reservation, reservePages,
1726 			team == VMAddressSpace::KernelID()
1727 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1728 	map->Lock();
1729 
1730 	uint32 vecIndex = 0;
1731 	size_t vecOffset = 0;
1732 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1733 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1734 			vecOffset = 0;
1735 			vecIndex++;
1736 		}
1737 
1738 		if (vecIndex >= vecCount)
1739 			break;
1740 
1741 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1742 			protection, area->MemoryType(), &reservation);
1743 
1744 		vecOffset += B_PAGE_SIZE;
1745 	}
1746 
1747 	map->Unlock();
1748 	vm_page_unreserve_pages(&reservation);
1749 
1750 	if (_size != NULL)
1751 		*_size = size;
1752 
1753 	area->cache_type = CACHE_TYPE_DEVICE;
1754 	return area->id;
1755 }
1756 
1757 
1758 area_id
1759 vm_create_null_area(team_id team, const char* name, void** address,
1760 	uint32 addressSpec, addr_t size, uint32 flags)
1761 {
1762 	size = PAGE_ALIGN(size);
1763 
1764 	// Lock the address space and, if B_EXACT_ADDRESS and
1765 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1766 	// is not wired.
1767 	AddressSpaceWriteLocker locker;
1768 	do {
1769 		if (locker.SetTo(team) != B_OK)
1770 			return B_BAD_TEAM_ID;
1771 	} while (addressSpec == B_EXACT_ADDRESS
1772 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1773 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1774 			(addr_t)*address, size, &locker));
1775 
1776 	// create a null cache
1777 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1778 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1779 	VMCache* cache;
1780 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1781 	if (status != B_OK)
1782 		return status;
1783 
1784 	cache->temporary = 1;
1785 	cache->virtual_end = size;
1786 
1787 	cache->Lock();
1788 
1789 	VMArea* area;
1790 	virtual_address_restrictions addressRestrictions = {};
1791 	addressRestrictions.address = *address;
1792 	addressRestrictions.address_specification = addressSpec;
1793 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1794 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1795 		&addressRestrictions, true, &area, address);
1796 
1797 	if (status < B_OK) {
1798 		cache->ReleaseRefAndUnlock();
1799 		return status;
1800 	}
1801 
1802 	cache->Unlock();
1803 
1804 	area->cache_type = CACHE_TYPE_NULL;
1805 	return area->id;
1806 }
1807 
1808 
1809 /*!	Creates the vnode cache for the specified \a vnode.
1810 	The vnode has to be marked busy when calling this function.
1811 */
1812 status_t
1813 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1814 {
1815 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1816 }
1817 
1818 
1819 /*!	\a cache must be locked. The area's address space must be read-locked.
1820 */
1821 static void
1822 pre_map_area_pages(VMArea* area, VMCache* cache,
1823 	vm_page_reservation* reservation)
1824 {
1825 	addr_t baseAddress = area->Base();
1826 	addr_t cacheOffset = area->cache_offset;
1827 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1828 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1829 
1830 	for (VMCachePagesTree::Iterator it
1831 				= cache->pages.GetIterator(firstPage, true, true);
1832 			vm_page* page = it.Next();) {
1833 		if (page->cache_offset >= endPage)
1834 			break;
1835 
1836 		// skip busy and inactive pages
1837 		if (page->busy || page->usage_count == 0)
1838 			continue;
1839 
1840 		DEBUG_PAGE_ACCESS_START(page);
1841 		map_page(area, page,
1842 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1843 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1844 		DEBUG_PAGE_ACCESS_END(page);
1845 	}
1846 }
1847 
1848 
1849 /*!	Will map the file specified by \a fd to an area in memory.
1850 	The file will be mirrored beginning at the specified \a offset. The
1851 	\a offset and \a size arguments have to be page aligned.
1852 */
1853 static area_id
1854 _vm_map_file(team_id team, const char* name, void** _address,
1855 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1856 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1857 {
1858 	// TODO: for binary files, we want to make sure that they get the
1859 	//	copy of a file at a given time, ie. later changes should not
1860 	//	make it into the mapped copy -- this will need quite some changes
1861 	//	to be done in a nice way
1862 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
1863 		fd, offset, size, mapping));
1864 
1865 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1866 	size = PAGE_ALIGN(size);
1867 
1868 	if (mapping == REGION_NO_PRIVATE_MAP)
1869 		protection |= B_SHARED_AREA;
1870 	if (addressSpec != B_EXACT_ADDRESS)
1871 		unmapAddressRange = false;
1872 
1873 	if (fd < 0) {
1874 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1875 		virtual_address_restrictions virtualRestrictions = {};
1876 		virtualRestrictions.address = *_address;
1877 		virtualRestrictions.address_specification = addressSpec;
1878 		physical_address_restrictions physicalRestrictions = {};
1879 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1880 			flags, &virtualRestrictions, &physicalRestrictions, kernel,
1881 			_address);
1882 	}
1883 
1884 	// get the open flags of the FD
1885 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1886 	if (descriptor == NULL)
1887 		return EBADF;
1888 	int32 openMode = descriptor->open_mode;
1889 	put_fd(descriptor);
1890 
1891 	// The FD must open for reading at any rate. For shared mapping with write
1892 	// access, additionally the FD must be open for writing.
1893 	if ((openMode & O_ACCMODE) == O_WRONLY
1894 		|| (mapping == REGION_NO_PRIVATE_MAP
1895 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1896 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1897 		return EACCES;
1898 	}
1899 
1900 	// get the vnode for the object, this also grabs a ref to it
1901 	struct vnode* vnode = NULL;
1902 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1903 	if (status < B_OK)
1904 		return status;
1905 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1906 
1907 	// If we're going to pre-map pages, we need to reserve the pages needed by
1908 	// the mapping backend upfront.
1909 	page_num_t reservedPreMapPages = 0;
1910 	vm_page_reservation reservation;
1911 	if ((protection & B_READ_AREA) != 0) {
1912 		AddressSpaceWriteLocker locker;
1913 		status = locker.SetTo(team);
1914 		if (status != B_OK)
1915 			return status;
1916 
1917 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1918 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1919 
1920 		locker.Unlock();
1921 
1922 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1923 			team == VMAddressSpace::KernelID()
1924 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1925 	}
1926 
1927 	struct PageUnreserver {
1928 		PageUnreserver(vm_page_reservation* reservation)
1929 			:
1930 			fReservation(reservation)
1931 		{
1932 		}
1933 
1934 		~PageUnreserver()
1935 		{
1936 			if (fReservation != NULL)
1937 				vm_page_unreserve_pages(fReservation);
1938 		}
1939 
1940 		vm_page_reservation* fReservation;
1941 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1942 
1943 	// Lock the address space and, if the specified address range shall be
1944 	// unmapped, ensure it is not wired.
1945 	AddressSpaceWriteLocker locker;
1946 	do {
1947 		if (locker.SetTo(team) != B_OK)
1948 			return B_BAD_TEAM_ID;
1949 	} while (unmapAddressRange
1950 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1951 			(addr_t)*_address, size, &locker));
1952 
1953 	// TODO: this only works for file systems that use the file cache
1954 	VMCache* cache;
1955 	status = vfs_get_vnode_cache(vnode, &cache, false);
1956 	if (status < B_OK)
1957 		return status;
1958 
1959 	cache->Lock();
1960 
1961 	VMArea* area;
1962 	virtual_address_restrictions addressRestrictions = {};
1963 	addressRestrictions.address = *_address;
1964 	addressRestrictions.address_specification = addressSpec;
1965 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1966 		0, protection, mapping,
1967 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1968 		&addressRestrictions, kernel, &area, _address);
1969 
1970 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1971 		// map_backing_store() cannot know we no longer need the ref
1972 		cache->ReleaseRefLocked();
1973 	}
1974 
1975 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1976 		pre_map_area_pages(area, cache, &reservation);
1977 
1978 	cache->Unlock();
1979 
1980 	if (status == B_OK) {
1981 		// TODO: this probably deserves a smarter solution, ie. don't always
1982 		// prefetch stuff, and also, probably don't trigger it at this place.
1983 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1984 			// prefetches at max 10 MB starting from "offset"
1985 	}
1986 
1987 	if (status != B_OK)
1988 		return status;
1989 
1990 	area->cache_type = CACHE_TYPE_VNODE;
1991 	return area->id;
1992 }
1993 
1994 
1995 area_id
1996 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1997 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
1998 	int fd, off_t offset)
1999 {
2000 	if (!arch_vm_supports_protection(protection))
2001 		return B_NOT_SUPPORTED;
2002 
2003 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2004 		mapping, unmapAddressRange, fd, offset, true);
2005 }
2006 
2007 
2008 VMCache*
2009 vm_area_get_locked_cache(VMArea* area)
2010 {
2011 	rw_lock_read_lock(&sAreaCacheLock);
2012 
2013 	while (true) {
2014 		VMCache* cache = area->cache;
2015 
2016 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2017 			// cache has been deleted
2018 			rw_lock_read_lock(&sAreaCacheLock);
2019 			continue;
2020 		}
2021 
2022 		rw_lock_read_lock(&sAreaCacheLock);
2023 
2024 		if (cache == area->cache) {
2025 			cache->AcquireRefLocked();
2026 			rw_lock_read_unlock(&sAreaCacheLock);
2027 			return cache;
2028 		}
2029 
2030 		// the cache changed in the meantime
2031 		cache->Unlock();
2032 	}
2033 }
2034 
2035 
2036 void
2037 vm_area_put_locked_cache(VMCache* cache)
2038 {
2039 	cache->ReleaseRefAndUnlock();
2040 }
2041 
2042 
2043 area_id
2044 vm_clone_area(team_id team, const char* name, void** address,
2045 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2046 	bool kernel)
2047 {
2048 	VMArea* newArea = NULL;
2049 	VMArea* sourceArea;
2050 
2051 	// Check whether the source area exists and is cloneable. If so, mark it
2052 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2053 	{
2054 		AddressSpaceWriteLocker locker;
2055 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2056 		if (status != B_OK)
2057 			return status;
2058 
2059 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2060 			return B_NOT_ALLOWED;
2061 
2062 		sourceArea->protection |= B_SHARED_AREA;
2063 		protection |= B_SHARED_AREA;
2064 	}
2065 
2066 	// Now lock both address spaces and actually do the cloning.
2067 
2068 	MultiAddressSpaceLocker locker;
2069 	VMAddressSpace* sourceAddressSpace;
2070 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2071 	if (status != B_OK)
2072 		return status;
2073 
2074 	VMAddressSpace* targetAddressSpace;
2075 	status = locker.AddTeam(team, true, &targetAddressSpace);
2076 	if (status != B_OK)
2077 		return status;
2078 
2079 	status = locker.Lock();
2080 	if (status != B_OK)
2081 		return status;
2082 
2083 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2084 	if (sourceArea == NULL)
2085 		return B_BAD_VALUE;
2086 
2087 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2088 		return B_NOT_ALLOWED;
2089 
2090 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2091 
2092 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2093 	//	have been adapted. Maybe it should be part of the kernel settings,
2094 	//	anyway (so that old drivers can always work).
2095 #if 0
2096 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2097 		&& addressSpace != VMAddressSpace::Kernel()
2098 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2099 		// kernel areas must not be cloned in userland, unless explicitly
2100 		// declared user-cloneable upon construction
2101 		status = B_NOT_ALLOWED;
2102 	} else
2103 #endif
2104 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2105 		status = B_NOT_ALLOWED;
2106 	else {
2107 		virtual_address_restrictions addressRestrictions = {};
2108 		addressRestrictions.address = *address;
2109 		addressRestrictions.address_specification = addressSpec;
2110 		status = map_backing_store(targetAddressSpace, cache,
2111 			sourceArea->cache_offset, name, sourceArea->Size(),
2112 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2113 			kernel, &newArea, address);
2114 	}
2115 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2116 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2117 		// to create a new cache, and has therefore already acquired a reference
2118 		// to the source cache - but otherwise it has no idea that we need
2119 		// one.
2120 		cache->AcquireRefLocked();
2121 	}
2122 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2123 		// we need to map in everything at this point
2124 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2125 			// we don't have actual pages to map but a physical area
2126 			VMTranslationMap* map
2127 				= sourceArea->address_space->TranslationMap();
2128 			map->Lock();
2129 
2130 			phys_addr_t physicalAddress;
2131 			uint32 oldProtection;
2132 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2133 
2134 			map->Unlock();
2135 
2136 			map = targetAddressSpace->TranslationMap();
2137 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2138 				newArea->Base() + (newArea->Size() - 1));
2139 
2140 			vm_page_reservation reservation;
2141 			vm_page_reserve_pages(&reservation, reservePages,
2142 				targetAddressSpace == VMAddressSpace::Kernel()
2143 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2144 			map->Lock();
2145 
2146 			for (addr_t offset = 0; offset < newArea->Size();
2147 					offset += B_PAGE_SIZE) {
2148 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2149 					protection, newArea->MemoryType(), &reservation);
2150 			}
2151 
2152 			map->Unlock();
2153 			vm_page_unreserve_pages(&reservation);
2154 		} else {
2155 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2156 			size_t reservePages = map->MaxPagesNeededToMap(
2157 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2158 			vm_page_reservation reservation;
2159 			vm_page_reserve_pages(&reservation, reservePages,
2160 				targetAddressSpace == VMAddressSpace::Kernel()
2161 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2162 
2163 			// map in all pages from source
2164 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2165 					vm_page* page  = it.Next();) {
2166 				if (!page->busy) {
2167 					DEBUG_PAGE_ACCESS_START(page);
2168 					map_page(newArea, page,
2169 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2170 							- newArea->cache_offset),
2171 						protection, &reservation);
2172 					DEBUG_PAGE_ACCESS_END(page);
2173 				}
2174 			}
2175 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2176 			// ensuring that!
2177 
2178 			vm_page_unreserve_pages(&reservation);
2179 		}
2180 	}
2181 	if (status == B_OK)
2182 		newArea->cache_type = sourceArea->cache_type;
2183 
2184 	vm_area_put_locked_cache(cache);
2185 
2186 	if (status < B_OK)
2187 		return status;
2188 
2189 	return newArea->id;
2190 }
2191 
2192 
2193 /*!	Deletes the specified area of the given address space.
2194 
2195 	The address space must be write-locked.
2196 	The caller must ensure that the area does not have any wired ranges.
2197 
2198 	\param addressSpace The address space containing the area.
2199 	\param area The area to be deleted.
2200 	\param deletingAddressSpace \c true, if the address space is in the process
2201 		of being deleted.
2202 */
2203 static void
2204 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2205 	bool deletingAddressSpace)
2206 {
2207 	ASSERT(!area->IsWired());
2208 
2209 	VMAreaHash::Remove(area);
2210 
2211 	// At this point the area is removed from the global hash table, but
2212 	// still exists in the area list.
2213 
2214 	// Unmap the virtual address space the area occupied.
2215 	{
2216 		// We need to lock the complete cache chain.
2217 		VMCache* topCache = vm_area_get_locked_cache(area);
2218 		VMCacheChainLocker cacheChainLocker(topCache);
2219 		cacheChainLocker.LockAllSourceCaches();
2220 
2221 		// If the area's top cache is a temporary cache and the area is the only
2222 		// one referencing it (besides us currently holding a second reference),
2223 		// the unmapping code doesn't need to care about preserving the accessed
2224 		// and dirty flags of the top cache page mappings.
2225 		bool ignoreTopCachePageFlags
2226 			= topCache->temporary && topCache->RefCount() == 2;
2227 
2228 		area->address_space->TranslationMap()->UnmapArea(area,
2229 			deletingAddressSpace, ignoreTopCachePageFlags);
2230 	}
2231 
2232 	if (!area->cache->temporary)
2233 		area->cache->WriteModified();
2234 
2235 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2236 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2237 
2238 	arch_vm_unset_memory_type(area);
2239 	addressSpace->RemoveArea(area, allocationFlags);
2240 	addressSpace->Put();
2241 
2242 	area->cache->RemoveArea(area);
2243 	area->cache->ReleaseRef();
2244 
2245 	addressSpace->DeleteArea(area, allocationFlags);
2246 }
2247 
2248 
2249 status_t
2250 vm_delete_area(team_id team, area_id id, bool kernel)
2251 {
2252 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2253 
2254 	// lock the address space and make sure the area isn't wired
2255 	AddressSpaceWriteLocker locker;
2256 	VMArea* area;
2257 	AreaCacheLocker cacheLocker;
2258 
2259 	do {
2260 		status_t status = locker.SetFromArea(team, id, area);
2261 		if (status != B_OK)
2262 			return status;
2263 
2264 		cacheLocker.SetTo(area);
2265 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2266 
2267 	cacheLocker.Unlock();
2268 
2269 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2270 		return B_NOT_ALLOWED;
2271 
2272 	delete_area(locker.AddressSpace(), area, false);
2273 	return B_OK;
2274 }
2275 
2276 
2277 /*!	Creates a new cache on top of given cache, moves all areas from
2278 	the old cache to the new one, and changes the protection of all affected
2279 	areas' pages to read-only. If requested, wired pages are moved up to the
2280 	new cache and copies are added to the old cache in their place.
2281 	Preconditions:
2282 	- The given cache must be locked.
2283 	- All of the cache's areas' address spaces must be read locked.
2284 	- Either the cache must not have any wired ranges or a page reservation for
2285 	  all wired pages must be provided, so they can be copied.
2286 
2287 	\param lowerCache The cache on top of which a new cache shall be created.
2288 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2289 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2290 		has wired page. The wired pages are copied in this case.
2291 */
2292 static status_t
2293 vm_copy_on_write_area(VMCache* lowerCache,
2294 	vm_page_reservation* wiredPagesReservation)
2295 {
2296 	VMCache* upperCache;
2297 
2298 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2299 
2300 	// We need to separate the cache from its areas. The cache goes one level
2301 	// deeper and we create a new cache inbetween.
2302 
2303 	// create an anonymous cache
2304 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2305 		0, dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2306 		VM_PRIORITY_USER);
2307 	if (status != B_OK)
2308 		return status;
2309 
2310 	upperCache->Lock();
2311 
2312 	upperCache->temporary = 1;
2313 	upperCache->virtual_base = lowerCache->virtual_base;
2314 	upperCache->virtual_end = lowerCache->virtual_end;
2315 
2316 	// transfer the lower cache areas to the upper cache
2317 	rw_lock_write_lock(&sAreaCacheLock);
2318 	upperCache->TransferAreas(lowerCache);
2319 	rw_lock_write_unlock(&sAreaCacheLock);
2320 
2321 	lowerCache->AddConsumer(upperCache);
2322 
2323 	// We now need to remap all pages from all of the cache's areas read-only,
2324 	// so that a copy will be created on next write access. If there are wired
2325 	// pages, we keep their protection, move them to the upper cache and create
2326 	// copies for the lower cache.
2327 	if (wiredPagesReservation != NULL) {
2328 		// We need to handle wired pages -- iterate through the cache's pages.
2329 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2330 				vm_page* page = it.Next();) {
2331 			if (page->WiredCount() > 0) {
2332 				// allocate a new page and copy the wired one
2333 				vm_page* copiedPage = vm_page_allocate_page(
2334 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2335 
2336 				vm_memcpy_physical_page(
2337 					copiedPage->physical_page_number * B_PAGE_SIZE,
2338 					page->physical_page_number * B_PAGE_SIZE);
2339 
2340 				// move the wired page to the upper cache (note: removing is OK
2341 				// with the SplayTree iterator) and insert the copy
2342 				upperCache->MovePage(page);
2343 				lowerCache->InsertPage(copiedPage,
2344 					page->cache_offset * B_PAGE_SIZE);
2345 
2346 				DEBUG_PAGE_ACCESS_END(copiedPage);
2347 			} else {
2348 				// Change the protection of this page in all areas.
2349 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2350 						tempArea = tempArea->cache_next) {
2351 					// The area must be readable in the same way it was
2352 					// previously writable.
2353 					uint32 protection = B_KERNEL_READ_AREA;
2354 					if ((tempArea->protection & B_READ_AREA) != 0)
2355 						protection |= B_READ_AREA;
2356 
2357 					VMTranslationMap* map
2358 						= tempArea->address_space->TranslationMap();
2359 					map->Lock();
2360 					map->ProtectPage(tempArea,
2361 						virtual_page_address(tempArea, page), protection);
2362 					map->Unlock();
2363 				}
2364 			}
2365 		}
2366 	} else {
2367 		ASSERT(lowerCache->WiredPagesCount() == 0);
2368 
2369 		// just change the protection of all areas
2370 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2371 				tempArea = tempArea->cache_next) {
2372 			// The area must be readable in the same way it was previously
2373 			// writable.
2374 			uint32 protection = B_KERNEL_READ_AREA;
2375 			if ((tempArea->protection & B_READ_AREA) != 0)
2376 				protection |= B_READ_AREA;
2377 
2378 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2379 			map->Lock();
2380 			map->ProtectArea(tempArea, protection);
2381 			map->Unlock();
2382 		}
2383 	}
2384 
2385 	vm_area_put_locked_cache(upperCache);
2386 
2387 	return B_OK;
2388 }
2389 
2390 
2391 area_id
2392 vm_copy_area(team_id team, const char* name, void** _address,
2393 	uint32 addressSpec, uint32 protection, area_id sourceID)
2394 {
2395 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2396 
2397 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2398 		// set the same protection for the kernel as for userland
2399 		protection |= B_KERNEL_READ_AREA;
2400 		if (writableCopy)
2401 			protection |= B_KERNEL_WRITE_AREA;
2402 	}
2403 
2404 	// Do the locking: target address space, all address spaces associated with
2405 	// the source cache, and the cache itself.
2406 	MultiAddressSpaceLocker locker;
2407 	VMAddressSpace* targetAddressSpace;
2408 	VMCache* cache;
2409 	VMArea* source;
2410 	AreaCacheLocker cacheLocker;
2411 	status_t status;
2412 	bool sharedArea;
2413 
2414 	page_num_t wiredPages = 0;
2415 	vm_page_reservation wiredPagesReservation;
2416 
2417 	bool restart;
2418 	do {
2419 		restart = false;
2420 
2421 		locker.Unset();
2422 		status = locker.AddTeam(team, true, &targetAddressSpace);
2423 		if (status == B_OK) {
2424 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2425 				&cache);
2426 		}
2427 		if (status != B_OK)
2428 			return status;
2429 
2430 		cacheLocker.SetTo(cache, true);	// already locked
2431 
2432 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2433 
2434 		page_num_t oldWiredPages = wiredPages;
2435 		wiredPages = 0;
2436 
2437 		// If the source area isn't shared, count the number of wired pages in
2438 		// the cache and reserve as many pages.
2439 		if (!sharedArea) {
2440 			wiredPages = cache->WiredPagesCount();
2441 
2442 			if (wiredPages > oldWiredPages) {
2443 				cacheLocker.Unlock();
2444 				locker.Unlock();
2445 
2446 				if (oldWiredPages > 0)
2447 					vm_page_unreserve_pages(&wiredPagesReservation);
2448 
2449 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2450 					VM_PRIORITY_USER);
2451 
2452 				restart = true;
2453 			}
2454 		} else if (oldWiredPages > 0)
2455 			vm_page_unreserve_pages(&wiredPagesReservation);
2456 	} while (restart);
2457 
2458 	// unreserve pages later
2459 	struct PagesUnreserver {
2460 		PagesUnreserver(vm_page_reservation* reservation)
2461 			:
2462 			fReservation(reservation)
2463 		{
2464 		}
2465 
2466 		~PagesUnreserver()
2467 		{
2468 			if (fReservation != NULL)
2469 				vm_page_unreserve_pages(fReservation);
2470 		}
2471 
2472 	private:
2473 		vm_page_reservation*	fReservation;
2474 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2475 
2476 	if (addressSpec == B_CLONE_ADDRESS) {
2477 		addressSpec = B_EXACT_ADDRESS;
2478 		*_address = (void*)source->Base();
2479 	}
2480 
2481 	// First, create a cache on top of the source area, respectively use the
2482 	// existing one, if this is a shared area.
2483 
2484 	VMArea* target;
2485 	virtual_address_restrictions addressRestrictions = {};
2486 	addressRestrictions.address = *_address;
2487 	addressRestrictions.address_specification = addressSpec;
2488 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2489 		name, source->Size(), source->wiring, protection,
2490 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2491 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2492 		&addressRestrictions, true, &target, _address);
2493 	if (status < B_OK)
2494 		return status;
2495 
2496 	if (sharedArea) {
2497 		// The new area uses the old area's cache, but map_backing_store()
2498 		// hasn't acquired a ref. So we have to do that now.
2499 		cache->AcquireRefLocked();
2500 	}
2501 
2502 	// If the source area is writable, we need to move it one layer up as well
2503 
2504 	if (!sharedArea) {
2505 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2506 			// TODO: do something more useful if this fails!
2507 			if (vm_copy_on_write_area(cache,
2508 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2509 				panic("vm_copy_on_write_area() failed!\n");
2510 			}
2511 		}
2512 	}
2513 
2514 	// we return the ID of the newly created area
2515 	return target->id;
2516 }
2517 
2518 
2519 static status_t
2520 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2521 	bool kernel)
2522 {
2523 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2524 		"%#lx)\n", team, areaID, newProtection));
2525 
2526 	if (!arch_vm_supports_protection(newProtection))
2527 		return B_NOT_SUPPORTED;
2528 
2529 	bool becomesWritable
2530 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2531 
2532 	// lock address spaces and cache
2533 	MultiAddressSpaceLocker locker;
2534 	VMCache* cache;
2535 	VMArea* area;
2536 	status_t status;
2537 	AreaCacheLocker cacheLocker;
2538 	bool isWritable;
2539 
2540 	bool restart;
2541 	do {
2542 		restart = false;
2543 
2544 		locker.Unset();
2545 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2546 		if (status != B_OK)
2547 			return status;
2548 
2549 		cacheLocker.SetTo(cache, true);	// already locked
2550 
2551 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2552 			return B_NOT_ALLOWED;
2553 
2554 		if (area->protection == newProtection)
2555 			return B_OK;
2556 
2557 		if (team != VMAddressSpace::KernelID()
2558 			&& area->address_space->ID() != team) {
2559 			// unless you're the kernel, you are only allowed to set
2560 			// the protection of your own areas
2561 			return B_NOT_ALLOWED;
2562 		}
2563 
2564 		isWritable
2565 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2566 
2567 		// Make sure the area (respectively, if we're going to call
2568 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2569 		// wired ranges.
2570 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2571 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2572 					otherArea = otherArea->cache_next) {
2573 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2574 					restart = true;
2575 					break;
2576 				}
2577 			}
2578 		} else {
2579 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2580 				restart = true;
2581 		}
2582 	} while (restart);
2583 
2584 	bool changePageProtection = true;
2585 	bool changeTopCachePagesOnly = false;
2586 
2587 	if (isWritable && !becomesWritable) {
2588 		// writable -> !writable
2589 
2590 		if (cache->source != NULL && cache->temporary) {
2591 			if (cache->CountWritableAreas(area) == 0) {
2592 				// Since this cache now lives from the pages in its source cache,
2593 				// we can change the cache's commitment to take only those pages
2594 				// into account that really are in this cache.
2595 
2596 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2597 					team == VMAddressSpace::KernelID()
2598 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2599 
2600 				// TODO: we may be able to join with our source cache, if
2601 				// count == 0
2602 			}
2603 		}
2604 
2605 		// If only the writability changes, we can just remap the pages of the
2606 		// top cache, since the pages of lower caches are mapped read-only
2607 		// anyway. That's advantageous only, if the number of pages in the cache
2608 		// is significantly smaller than the number of pages in the area,
2609 		// though.
2610 		if (newProtection
2611 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2612 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2613 			changeTopCachePagesOnly = true;
2614 		}
2615 	} else if (!isWritable && becomesWritable) {
2616 		// !writable -> writable
2617 
2618 		if (!cache->consumers.IsEmpty()) {
2619 			// There are consumers -- we have to insert a new cache. Fortunately
2620 			// vm_copy_on_write_area() does everything that's needed.
2621 			changePageProtection = false;
2622 			status = vm_copy_on_write_area(cache, NULL);
2623 		} else {
2624 			// No consumers, so we don't need to insert a new one.
2625 			if (cache->source != NULL && cache->temporary) {
2626 				// the cache's commitment must contain all possible pages
2627 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2628 					team == VMAddressSpace::KernelID()
2629 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2630 			}
2631 
2632 			if (status == B_OK && cache->source != NULL) {
2633 				// There's a source cache, hence we can't just change all pages'
2634 				// protection or we might allow writing into pages belonging to
2635 				// a lower cache.
2636 				changeTopCachePagesOnly = true;
2637 			}
2638 		}
2639 	} else {
2640 		// we don't have anything special to do in all other cases
2641 	}
2642 
2643 	if (status == B_OK) {
2644 		// remap existing pages in this cache
2645 		if (changePageProtection) {
2646 			VMTranslationMap* map = area->address_space->TranslationMap();
2647 			map->Lock();
2648 
2649 			if (changeTopCachePagesOnly) {
2650 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2651 				page_num_t lastPageOffset
2652 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2653 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2654 						vm_page* page = it.Next();) {
2655 					if (page->cache_offset >= firstPageOffset
2656 						&& page->cache_offset <= lastPageOffset) {
2657 						addr_t address = virtual_page_address(area, page);
2658 						map->ProtectPage(area, address, newProtection);
2659 					}
2660 				}
2661 			} else
2662 				map->ProtectArea(area, newProtection);
2663 
2664 			map->Unlock();
2665 		}
2666 
2667 		area->protection = newProtection;
2668 	}
2669 
2670 	return status;
2671 }
2672 
2673 
2674 status_t
2675 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2676 {
2677 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2678 	if (addressSpace == NULL)
2679 		return B_BAD_TEAM_ID;
2680 
2681 	VMTranslationMap* map = addressSpace->TranslationMap();
2682 
2683 	map->Lock();
2684 	uint32 dummyFlags;
2685 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2686 	map->Unlock();
2687 
2688 	addressSpace->Put();
2689 	return status;
2690 }
2691 
2692 
2693 /*!	The page's cache must be locked.
2694 */
2695 bool
2696 vm_test_map_modification(vm_page* page)
2697 {
2698 	if (page->modified)
2699 		return true;
2700 
2701 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2702 	vm_page_mapping* mapping;
2703 	while ((mapping = iterator.Next()) != NULL) {
2704 		VMArea* area = mapping->area;
2705 		VMTranslationMap* map = area->address_space->TranslationMap();
2706 
2707 		phys_addr_t physicalAddress;
2708 		uint32 flags;
2709 		map->Lock();
2710 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2711 		map->Unlock();
2712 
2713 		if ((flags & PAGE_MODIFIED) != 0)
2714 			return true;
2715 	}
2716 
2717 	return false;
2718 }
2719 
2720 
2721 /*!	The page's cache must be locked.
2722 */
2723 void
2724 vm_clear_map_flags(vm_page* page, uint32 flags)
2725 {
2726 	if ((flags & PAGE_ACCESSED) != 0)
2727 		page->accessed = false;
2728 	if ((flags & PAGE_MODIFIED) != 0)
2729 		page->modified = false;
2730 
2731 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2732 	vm_page_mapping* mapping;
2733 	while ((mapping = iterator.Next()) != NULL) {
2734 		VMArea* area = mapping->area;
2735 		VMTranslationMap* map = area->address_space->TranslationMap();
2736 
2737 		map->Lock();
2738 		map->ClearFlags(virtual_page_address(area, page), flags);
2739 		map->Unlock();
2740 	}
2741 }
2742 
2743 
2744 /*!	Removes all mappings from a page.
2745 	After you've called this function, the page is unmapped from memory and
2746 	the page's \c accessed and \c modified flags have been updated according
2747 	to the state of the mappings.
2748 	The page's cache must be locked.
2749 */
2750 void
2751 vm_remove_all_page_mappings(vm_page* page)
2752 {
2753 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2754 		VMArea* area = mapping->area;
2755 		VMTranslationMap* map = area->address_space->TranslationMap();
2756 		addr_t address = virtual_page_address(area, page);
2757 		map->UnmapPage(area, address, false);
2758 	}
2759 }
2760 
2761 
2762 int32
2763 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2764 {
2765 	int32 count = 0;
2766 
2767 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2768 	vm_page_mapping* mapping;
2769 	while ((mapping = iterator.Next()) != NULL) {
2770 		VMArea* area = mapping->area;
2771 		VMTranslationMap* map = area->address_space->TranslationMap();
2772 
2773 		bool modified;
2774 		if (map->ClearAccessedAndModified(area,
2775 				virtual_page_address(area, page), false, modified)) {
2776 			count++;
2777 		}
2778 
2779 		page->modified |= modified;
2780 	}
2781 
2782 
2783 	if (page->accessed) {
2784 		count++;
2785 		page->accessed = false;
2786 	}
2787 
2788 	return count;
2789 }
2790 
2791 
2792 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2793 	mappings.
2794 	The function iterates through the page mappings and removes them until
2795 	encountering one that has been accessed. From then on it will continue to
2796 	iterate, but only clear the accessed flag of the mapping. The page's
2797 	\c modified bit will be updated accordingly, the \c accessed bit will be
2798 	cleared.
2799 	\return The number of mapping accessed bits encountered, including the
2800 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2801 		of the page have been removed.
2802 */
2803 int32
2804 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2805 {
2806 	ASSERT(page->WiredCount() == 0);
2807 
2808 	if (page->accessed)
2809 		return vm_clear_page_mapping_accessed_flags(page);
2810 
2811 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2812 		VMArea* area = mapping->area;
2813 		VMTranslationMap* map = area->address_space->TranslationMap();
2814 		addr_t address = virtual_page_address(area, page);
2815 		bool modified = false;
2816 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2817 			page->accessed = true;
2818 			page->modified |= modified;
2819 			return vm_clear_page_mapping_accessed_flags(page);
2820 		}
2821 		page->modified |= modified;
2822 	}
2823 
2824 	return 0;
2825 }
2826 
2827 
2828 static int
2829 display_mem(int argc, char** argv)
2830 {
2831 	bool physical = false;
2832 	addr_t copyAddress;
2833 	int32 displayWidth;
2834 	int32 itemSize;
2835 	int32 num = -1;
2836 	addr_t address;
2837 	int i = 1, j;
2838 
2839 	if (argc > 1 && argv[1][0] == '-') {
2840 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2841 			physical = true;
2842 			i++;
2843 		} else
2844 			i = 99;
2845 	}
2846 
2847 	if (argc < i + 1 || argc > i + 2) {
2848 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2849 			"\tdl - 8 bytes\n"
2850 			"\tdw - 4 bytes\n"
2851 			"\tds - 2 bytes\n"
2852 			"\tdb - 1 byte\n"
2853 			"\tstring - a whole string\n"
2854 			"  -p or --physical only allows memory from a single page to be "
2855 			"displayed.\n");
2856 		return 0;
2857 	}
2858 
2859 	address = parse_expression(argv[i]);
2860 
2861 	if (argc > i + 1)
2862 		num = parse_expression(argv[i + 1]);
2863 
2864 	// build the format string
2865 	if (strcmp(argv[0], "db") == 0) {
2866 		itemSize = 1;
2867 		displayWidth = 16;
2868 	} else if (strcmp(argv[0], "ds") == 0) {
2869 		itemSize = 2;
2870 		displayWidth = 8;
2871 	} else if (strcmp(argv[0], "dw") == 0) {
2872 		itemSize = 4;
2873 		displayWidth = 4;
2874 	} else if (strcmp(argv[0], "dl") == 0) {
2875 		itemSize = 8;
2876 		displayWidth = 2;
2877 	} else if (strcmp(argv[0], "string") == 0) {
2878 		itemSize = 1;
2879 		displayWidth = -1;
2880 	} else {
2881 		kprintf("display_mem called in an invalid way!\n");
2882 		return 0;
2883 	}
2884 
2885 	if (num <= 0)
2886 		num = displayWidth;
2887 
2888 	void* physicalPageHandle = NULL;
2889 
2890 	if (physical) {
2891 		int32 offset = address & (B_PAGE_SIZE - 1);
2892 		if (num * itemSize + offset > B_PAGE_SIZE) {
2893 			num = (B_PAGE_SIZE - offset) / itemSize;
2894 			kprintf("NOTE: number of bytes has been cut to page size\n");
2895 		}
2896 
2897 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2898 
2899 		if (vm_get_physical_page_debug(address, &copyAddress,
2900 				&physicalPageHandle) != B_OK) {
2901 			kprintf("getting the hardware page failed.");
2902 			return 0;
2903 		}
2904 
2905 		address += offset;
2906 		copyAddress += offset;
2907 	} else
2908 		copyAddress = address;
2909 
2910 	if (!strcmp(argv[0], "string")) {
2911 		kprintf("%p \"", (char*)copyAddress);
2912 
2913 		// string mode
2914 		for (i = 0; true; i++) {
2915 			char c;
2916 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2917 					!= B_OK
2918 				|| c == '\0') {
2919 				break;
2920 			}
2921 
2922 			if (c == '\n')
2923 				kprintf("\\n");
2924 			else if (c == '\t')
2925 				kprintf("\\t");
2926 			else {
2927 				if (!isprint(c))
2928 					c = '.';
2929 
2930 				kprintf("%c", c);
2931 			}
2932 		}
2933 
2934 		kprintf("\"\n");
2935 	} else {
2936 		// number mode
2937 		for (i = 0; i < num; i++) {
2938 			uint32 value;
2939 
2940 			if ((i % displayWidth) == 0) {
2941 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2942 				if (i != 0)
2943 					kprintf("\n");
2944 
2945 				kprintf("[0x%lx]  ", address + i * itemSize);
2946 
2947 				for (j = 0; j < displayed; j++) {
2948 					char c;
2949 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2950 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2951 						displayed = j;
2952 						break;
2953 					}
2954 					if (!isprint(c))
2955 						c = '.';
2956 
2957 					kprintf("%c", c);
2958 				}
2959 				if (num > displayWidth) {
2960 					// make sure the spacing in the last line is correct
2961 					for (j = displayed; j < displayWidth * itemSize; j++)
2962 						kprintf(" ");
2963 				}
2964 				kprintf("  ");
2965 			}
2966 
2967 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2968 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2969 				kprintf("read fault");
2970 				break;
2971 			}
2972 
2973 			switch (itemSize) {
2974 				case 1:
2975 					kprintf(" %02x", *(uint8*)&value);
2976 					break;
2977 				case 2:
2978 					kprintf(" %04x", *(uint16*)&value);
2979 					break;
2980 				case 4:
2981 					kprintf(" %08lx", *(uint32*)&value);
2982 					break;
2983 				case 8:
2984 					kprintf(" %016Lx", *(uint64*)&value);
2985 					break;
2986 			}
2987 		}
2988 
2989 		kprintf("\n");
2990 	}
2991 
2992 	if (physical) {
2993 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2994 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2995 	}
2996 	return 0;
2997 }
2998 
2999 
3000 static void
3001 dump_cache_tree_recursively(VMCache* cache, int level,
3002 	VMCache* highlightCache)
3003 {
3004 	// print this cache
3005 	for (int i = 0; i < level; i++)
3006 		kprintf("  ");
3007 	if (cache == highlightCache)
3008 		kprintf("%p <--\n", cache);
3009 	else
3010 		kprintf("%p\n", cache);
3011 
3012 	// recursively print its consumers
3013 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3014 			VMCache* consumer = it.Next();) {
3015 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3016 	}
3017 }
3018 
3019 
3020 static int
3021 dump_cache_tree(int argc, char** argv)
3022 {
3023 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3024 		kprintf("usage: %s <address>\n", argv[0]);
3025 		return 0;
3026 	}
3027 
3028 	addr_t address = parse_expression(argv[1]);
3029 	if (address == 0)
3030 		return 0;
3031 
3032 	VMCache* cache = (VMCache*)address;
3033 	VMCache* root = cache;
3034 
3035 	// find the root cache (the transitive source)
3036 	while (root->source != NULL)
3037 		root = root->source;
3038 
3039 	dump_cache_tree_recursively(root, 0, cache);
3040 
3041 	return 0;
3042 }
3043 
3044 
3045 const char*
3046 vm_cache_type_to_string(int32 type)
3047 {
3048 	switch (type) {
3049 		case CACHE_TYPE_RAM:
3050 			return "RAM";
3051 		case CACHE_TYPE_DEVICE:
3052 			return "device";
3053 		case CACHE_TYPE_VNODE:
3054 			return "vnode";
3055 		case CACHE_TYPE_NULL:
3056 			return "null";
3057 
3058 		default:
3059 			return "unknown";
3060 	}
3061 }
3062 
3063 
3064 #if DEBUG_CACHE_LIST
3065 
3066 static void
3067 update_cache_info_recursively(VMCache* cache, cache_info& info)
3068 {
3069 	info.page_count += cache->page_count;
3070 	if (cache->type == CACHE_TYPE_RAM)
3071 		info.committed += cache->committed_size;
3072 
3073 	// recurse
3074 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3075 			VMCache* consumer = it.Next();) {
3076 		update_cache_info_recursively(consumer, info);
3077 	}
3078 }
3079 
3080 
3081 static int
3082 cache_info_compare_page_count(const void* _a, const void* _b)
3083 {
3084 	const cache_info* a = (const cache_info*)_a;
3085 	const cache_info* b = (const cache_info*)_b;
3086 	if (a->page_count == b->page_count)
3087 		return 0;
3088 	return a->page_count < b->page_count ? 1 : -1;
3089 }
3090 
3091 
3092 static int
3093 cache_info_compare_committed(const void* _a, const void* _b)
3094 {
3095 	const cache_info* a = (const cache_info*)_a;
3096 	const cache_info* b = (const cache_info*)_b;
3097 	if (a->committed == b->committed)
3098 		return 0;
3099 	return a->committed < b->committed ? 1 : -1;
3100 }
3101 
3102 
3103 static void
3104 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3105 {
3106 	for (int i = 0; i < level; i++)
3107 		kprintf("  ");
3108 
3109 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
3110 		vm_cache_type_to_string(cache->type), cache->virtual_base,
3111 		cache->virtual_end, cache->page_count);
3112 
3113 	if (level == 0)
3114 		kprintf("/%lu", info.page_count);
3115 
3116 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3117 		kprintf(", committed: %lld", cache->committed_size);
3118 
3119 		if (level == 0)
3120 			kprintf("/%lu", info.committed);
3121 	}
3122 
3123 	// areas
3124 	if (cache->areas != NULL) {
3125 		VMArea* area = cache->areas;
3126 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3127 			area->address_space->ID());
3128 
3129 		while (area->cache_next != NULL) {
3130 			area = area->cache_next;
3131 			kprintf(", %ld", area->id);
3132 		}
3133 	}
3134 
3135 	kputs("\n");
3136 
3137 	// recurse
3138 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3139 			VMCache* consumer = it.Next();) {
3140 		dump_caches_recursively(consumer, info, level + 1);
3141 	}
3142 }
3143 
3144 
3145 static int
3146 dump_caches(int argc, char** argv)
3147 {
3148 	if (sCacheInfoTable == NULL) {
3149 		kprintf("No cache info table!\n");
3150 		return 0;
3151 	}
3152 
3153 	bool sortByPageCount = true;
3154 
3155 	for (int32 i = 1; i < argc; i++) {
3156 		if (strcmp(argv[i], "-c") == 0) {
3157 			sortByPageCount = false;
3158 		} else {
3159 			print_debugger_command_usage(argv[0]);
3160 			return 0;
3161 		}
3162 	}
3163 
3164 	uint32 totalCount = 0;
3165 	uint32 rootCount = 0;
3166 	off_t totalCommitted = 0;
3167 	page_num_t totalPages = 0;
3168 
3169 	VMCache* cache = gDebugCacheList;
3170 	while (cache) {
3171 		totalCount++;
3172 		if (cache->source == NULL) {
3173 			cache_info stackInfo;
3174 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3175 				? sCacheInfoTable[rootCount] : stackInfo;
3176 			rootCount++;
3177 			info.cache = cache;
3178 			info.page_count = 0;
3179 			info.committed = 0;
3180 			update_cache_info_recursively(cache, info);
3181 			totalCommitted += info.committed;
3182 			totalPages += info.page_count;
3183 		}
3184 
3185 		cache = cache->debug_next;
3186 	}
3187 
3188 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3189 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3190 			sortByPageCount
3191 				? &cache_info_compare_page_count
3192 				: &cache_info_compare_committed);
3193 	}
3194 
3195 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3196 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3197 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3198 		"tree...\n\n", totalCount, rootCount,
3199 		sortByPageCount ? "page count" : "committed size");
3200 
3201 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3202 		for (uint32 i = 0; i < rootCount; i++) {
3203 			cache_info& info = sCacheInfoTable[i];
3204 			dump_caches_recursively(info.cache, info, 0);
3205 		}
3206 	} else
3207 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3208 
3209 	return 0;
3210 }
3211 
3212 #endif	// DEBUG_CACHE_LIST
3213 
3214 
3215 static int
3216 dump_cache(int argc, char** argv)
3217 {
3218 	VMCache* cache;
3219 	bool showPages = false;
3220 	int i = 1;
3221 
3222 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3223 		kprintf("usage: %s [-ps] <address>\n"
3224 			"  if -p is specified, all pages are shown, if -s is used\n"
3225 			"  only the cache info is shown respectively.\n", argv[0]);
3226 		return 0;
3227 	}
3228 	while (argv[i][0] == '-') {
3229 		char* arg = argv[i] + 1;
3230 		while (arg[0]) {
3231 			if (arg[0] == 'p')
3232 				showPages = true;
3233 			arg++;
3234 		}
3235 		i++;
3236 	}
3237 	if (argv[i] == NULL) {
3238 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3239 		return 0;
3240 	}
3241 
3242 	addr_t address = parse_expression(argv[i]);
3243 	if (address == 0)
3244 		return 0;
3245 
3246 	cache = (VMCache*)address;
3247 
3248 	cache->Dump(showPages);
3249 
3250 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3251 
3252 	return 0;
3253 }
3254 
3255 
3256 static void
3257 dump_area_struct(VMArea* area, bool mappings)
3258 {
3259 	kprintf("AREA: %p\n", area);
3260 	kprintf("name:\t\t'%s'\n", area->name);
3261 	kprintf("owner:\t\t0x%lx\n", area->address_space->ID());
3262 	kprintf("id:\t\t0x%lx\n", area->id);
3263 	kprintf("base:\t\t0x%lx\n", area->Base());
3264 	kprintf("size:\t\t0x%lx\n", area->Size());
3265 	kprintf("protection:\t0x%lx\n", area->protection);
3266 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3267 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3268 	kprintf("cache:\t\t%p\n", area->cache);
3269 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3270 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3271 	kprintf("cache_next:\t%p\n", area->cache_next);
3272 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3273 
3274 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3275 	if (mappings) {
3276 		kprintf("page mappings:\n");
3277 		while (iterator.HasNext()) {
3278 			vm_page_mapping* mapping = iterator.Next();
3279 			kprintf("  %p", mapping->page);
3280 		}
3281 		kprintf("\n");
3282 	} else {
3283 		uint32 count = 0;
3284 		while (iterator.Next() != NULL) {
3285 			count++;
3286 		}
3287 		kprintf("page mappings:\t%lu\n", count);
3288 	}
3289 }
3290 
3291 
3292 static int
3293 dump_area(int argc, char** argv)
3294 {
3295 	bool mappings = false;
3296 	bool found = false;
3297 	int32 index = 1;
3298 	VMArea* area;
3299 	addr_t num;
3300 
3301 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3302 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3303 			"All areas matching either id/address/name are listed. You can\n"
3304 			"force to check only a specific item by prefixing the specifier\n"
3305 			"with the id/contains/address/name keywords.\n"
3306 			"-m shows the area's mappings as well.\n");
3307 		return 0;
3308 	}
3309 
3310 	if (!strcmp(argv[1], "-m")) {
3311 		mappings = true;
3312 		index++;
3313 	}
3314 
3315 	int32 mode = 0xf;
3316 	if (!strcmp(argv[index], "id"))
3317 		mode = 1;
3318 	else if (!strcmp(argv[index], "contains"))
3319 		mode = 2;
3320 	else if (!strcmp(argv[index], "name"))
3321 		mode = 4;
3322 	else if (!strcmp(argv[index], "address"))
3323 		mode = 0;
3324 	if (mode != 0xf)
3325 		index++;
3326 
3327 	if (index >= argc) {
3328 		kprintf("No area specifier given.\n");
3329 		return 0;
3330 	}
3331 
3332 	num = parse_expression(argv[index]);
3333 
3334 	if (mode == 0) {
3335 		dump_area_struct((struct VMArea*)num, mappings);
3336 	} else {
3337 		// walk through the area list, looking for the arguments as a name
3338 
3339 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3340 		while ((area = it.Next()) != NULL) {
3341 			if (((mode & 4) != 0 && area->name != NULL
3342 					&& !strcmp(argv[index], area->name))
3343 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3344 					|| (((mode & 2) != 0 && area->Base() <= num
3345 						&& area->Base() + area->Size() > num))))) {
3346 				dump_area_struct(area, mappings);
3347 				found = true;
3348 			}
3349 		}
3350 
3351 		if (!found)
3352 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3353 	}
3354 
3355 	return 0;
3356 }
3357 
3358 
3359 static int
3360 dump_area_list(int argc, char** argv)
3361 {
3362 	VMArea* area;
3363 	const char* name = NULL;
3364 	int32 id = 0;
3365 
3366 	if (argc > 1) {
3367 		id = parse_expression(argv[1]);
3368 		if (id == 0)
3369 			name = argv[1];
3370 	}
3371 
3372 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3373 
3374 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3375 	while ((area = it.Next()) != NULL) {
3376 		if ((id != 0 && area->address_space->ID() != id)
3377 			|| (name != NULL && strstr(area->name, name) == NULL))
3378 			continue;
3379 
3380 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
3381 			(void*)area->Base(), (void*)area->Size(), area->protection,
3382 			area->wiring, area->name);
3383 	}
3384 	return 0;
3385 }
3386 
3387 
3388 static int
3389 dump_available_memory(int argc, char** argv)
3390 {
3391 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3392 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3393 	return 0;
3394 }
3395 
3396 
3397 /*!	Deletes all areas and reserved regions in the given address space.
3398 
3399 	The caller must ensure that none of the areas has any wired ranges.
3400 
3401 	\param addressSpace The address space.
3402 	\param deletingAddressSpace \c true, if the address space is in the process
3403 		of being deleted.
3404 */
3405 void
3406 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3407 {
3408 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3409 		addressSpace->ID()));
3410 
3411 	addressSpace->WriteLock();
3412 
3413 	// remove all reserved areas in this address space
3414 	addressSpace->UnreserveAllAddressRanges(0);
3415 
3416 	// delete all the areas in this address space
3417 	while (VMArea* area = addressSpace->FirstArea()) {
3418 		ASSERT(!area->IsWired());
3419 		delete_area(addressSpace, area, deletingAddressSpace);
3420 	}
3421 
3422 	addressSpace->WriteUnlock();
3423 }
3424 
3425 
3426 static area_id
3427 vm_area_for(addr_t address, bool kernel)
3428 {
3429 	team_id team;
3430 	if (IS_USER_ADDRESS(address)) {
3431 		// we try the user team address space, if any
3432 		team = VMAddressSpace::CurrentID();
3433 		if (team < 0)
3434 			return team;
3435 	} else
3436 		team = VMAddressSpace::KernelID();
3437 
3438 	AddressSpaceReadLocker locker(team);
3439 	if (!locker.IsLocked())
3440 		return B_BAD_TEAM_ID;
3441 
3442 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3443 	if (area != NULL) {
3444 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3445 			return B_ERROR;
3446 
3447 		return area->id;
3448 	}
3449 
3450 	return B_ERROR;
3451 }
3452 
3453 
3454 /*!	Frees physical pages that were used during the boot process.
3455 	\a end is inclusive.
3456 */
3457 static void
3458 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3459 {
3460 	// free all physical pages in the specified range
3461 
3462 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3463 		phys_addr_t physicalAddress;
3464 		uint32 flags;
3465 
3466 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3467 			&& (flags & PAGE_PRESENT) != 0) {
3468 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3469 			if (page != NULL && page->State() != PAGE_STATE_FREE
3470 					 && page->State() != PAGE_STATE_CLEAR
3471 					 && page->State() != PAGE_STATE_UNUSED) {
3472 				DEBUG_PAGE_ACCESS_START(page);
3473 				vm_page_set_state(page, PAGE_STATE_FREE);
3474 			}
3475 		}
3476 	}
3477 
3478 	// unmap the memory
3479 	map->Unmap(start, end);
3480 }
3481 
3482 
3483 void
3484 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3485 {
3486 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3487 	addr_t end = start + (size - 1);
3488 	addr_t lastEnd = start;
3489 
3490 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3491 		(void*)start, (void*)end));
3492 
3493 	// The areas are sorted in virtual address space order, so
3494 	// we just have to find the holes between them that fall
3495 	// into the area we should dispose
3496 
3497 	map->Lock();
3498 
3499 	for (VMAddressSpace::AreaIterator it
3500 				= VMAddressSpace::Kernel()->GetAreaIterator();
3501 			VMArea* area = it.Next();) {
3502 		addr_t areaStart = area->Base();
3503 		addr_t areaEnd = areaStart + (area->Size() - 1);
3504 
3505 		if (areaEnd < start)
3506 			continue;
3507 
3508 		if (areaStart > end) {
3509 			// we are done, the area is already beyond of what we have to free
3510 			break;
3511 		}
3512 
3513 		if (areaStart > lastEnd) {
3514 			// this is something we can free
3515 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3516 				(void*)areaStart));
3517 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3518 		}
3519 
3520 		if (areaEnd >= end) {
3521 			lastEnd = areaEnd;
3522 				// no +1 to prevent potential overflow
3523 			break;
3524 		}
3525 
3526 		lastEnd = areaEnd + 1;
3527 	}
3528 
3529 	if (lastEnd < end) {
3530 		// we can also get rid of some space at the end of the area
3531 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3532 			(void*)end));
3533 		unmap_and_free_physical_pages(map, lastEnd, end);
3534 	}
3535 
3536 	map->Unlock();
3537 }
3538 
3539 
3540 static void
3541 create_preloaded_image_areas(struct preloaded_image* image)
3542 {
3543 	char name[B_OS_NAME_LENGTH];
3544 	void* address;
3545 	int32 length;
3546 
3547 	// use file name to create a good area name
3548 	char* fileName = strrchr(image->name, '/');
3549 	if (fileName == NULL)
3550 		fileName = image->name;
3551 	else
3552 		fileName++;
3553 
3554 	length = strlen(fileName);
3555 	// make sure there is enough space for the suffix
3556 	if (length > 25)
3557 		length = 25;
3558 
3559 	memcpy(name, fileName, length);
3560 	strcpy(name + length, "_text");
3561 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3562 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3563 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3564 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3565 		// this will later be remapped read-only/executable by the
3566 		// ELF initialization code
3567 
3568 	strcpy(name + length, "_data");
3569 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3570 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3571 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3572 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3573 }
3574 
3575 
3576 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3577 	Any boot loader resources contained in that arguments must not be accessed
3578 	anymore past this point.
3579 */
3580 void
3581 vm_free_kernel_args(kernel_args* args)
3582 {
3583 	uint32 i;
3584 
3585 	TRACE(("vm_free_kernel_args()\n"));
3586 
3587 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3588 		area_id area = area_for((void*)args->kernel_args_range[i].start);
3589 		if (area >= B_OK)
3590 			delete_area(area);
3591 	}
3592 }
3593 
3594 
3595 static void
3596 allocate_kernel_args(kernel_args* args)
3597 {
3598 	TRACE(("allocate_kernel_args()\n"));
3599 
3600 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3601 		void* address = (void*)args->kernel_args_range[i].start;
3602 
3603 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3604 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3605 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3606 	}
3607 }
3608 
3609 
3610 static void
3611 unreserve_boot_loader_ranges(kernel_args* args)
3612 {
3613 	TRACE(("unreserve_boot_loader_ranges()\n"));
3614 
3615 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3616 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3617 			(void*)args->virtual_allocated_range[i].start,
3618 			args->virtual_allocated_range[i].size);
3619 	}
3620 }
3621 
3622 
3623 static void
3624 reserve_boot_loader_ranges(kernel_args* args)
3625 {
3626 	TRACE(("reserve_boot_loader_ranges()\n"));
3627 
3628 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3629 		void* address = (void*)args->virtual_allocated_range[i].start;
3630 
3631 		// If the address is no kernel address, we just skip it. The
3632 		// architecture specific code has to deal with it.
3633 		if (!IS_KERNEL_ADDRESS(address)) {
3634 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3635 				address, args->virtual_allocated_range[i].size);
3636 			continue;
3637 		}
3638 
3639 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3640 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3641 		if (status < B_OK)
3642 			panic("could not reserve boot loader ranges\n");
3643 	}
3644 }
3645 
3646 
3647 static addr_t
3648 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3649 {
3650 	size = PAGE_ALIGN(size);
3651 
3652 	// find a slot in the virtual allocation addr range
3653 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3654 		// check to see if the space between this one and the last is big enough
3655 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3656 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3657 			+ args->virtual_allocated_range[i - 1].size;
3658 
3659 		addr_t base = alignment > 0
3660 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3661 
3662 		if (base >= KERNEL_BASE && base < rangeStart
3663 				&& rangeStart - base >= size) {
3664 			args->virtual_allocated_range[i - 1].size
3665 				+= base + size - previousRangeEnd;
3666 			return base;
3667 		}
3668 	}
3669 
3670 	// we hadn't found one between allocation ranges. this is ok.
3671 	// see if there's a gap after the last one
3672 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3673 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3674 		+ args->virtual_allocated_range[lastEntryIndex].size;
3675 	addr_t base = alignment > 0
3676 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3677 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3678 		args->virtual_allocated_range[lastEntryIndex].size
3679 			+= base + size - lastRangeEnd;
3680 		return base;
3681 	}
3682 
3683 	// see if there's a gap before the first one
3684 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3685 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3686 		base = rangeStart - size;
3687 		if (alignment > 0)
3688 			base = ROUNDDOWN(base, alignment);
3689 
3690 		if (base >= KERNEL_BASE) {
3691 			args->virtual_allocated_range[0].start = base;
3692 			args->virtual_allocated_range[0].size += rangeStart - base;
3693 			return base;
3694 		}
3695 	}
3696 
3697 	return 0;
3698 }
3699 
3700 
3701 static bool
3702 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3703 {
3704 	// TODO: horrible brute-force method of determining if the page can be
3705 	// allocated
3706 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3707 		if (address >= args->physical_memory_range[i].start
3708 			&& address < args->physical_memory_range[i].start
3709 				+ args->physical_memory_range[i].size)
3710 			return true;
3711 	}
3712 	return false;
3713 }
3714 
3715 
3716 page_num_t
3717 vm_allocate_early_physical_page(kernel_args* args)
3718 {
3719 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3720 		phys_addr_t nextPage;
3721 
3722 		nextPage = args->physical_allocated_range[i].start
3723 			+ args->physical_allocated_range[i].size;
3724 		// see if the page after the next allocated paddr run can be allocated
3725 		if (i + 1 < args->num_physical_allocated_ranges
3726 			&& args->physical_allocated_range[i + 1].size != 0) {
3727 			// see if the next page will collide with the next allocated range
3728 			if (nextPage >= args->physical_allocated_range[i+1].start)
3729 				continue;
3730 		}
3731 		// see if the next physical page fits in the memory block
3732 		if (is_page_in_physical_memory_range(args, nextPage)) {
3733 			// we got one!
3734 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3735 			return nextPage / B_PAGE_SIZE;
3736 		}
3737 	}
3738 
3739 	return 0;
3740 		// could not allocate a block
3741 }
3742 
3743 
3744 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3745 	allocate some pages before the VM is completely up.
3746 */
3747 addr_t
3748 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3749 	uint32 attributes, addr_t alignment)
3750 {
3751 	if (physicalSize > virtualSize)
3752 		physicalSize = virtualSize;
3753 
3754 	// find the vaddr to allocate at
3755 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3756 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3757 
3758 	// map the pages
3759 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3760 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3761 		if (physicalAddress == 0)
3762 			panic("error allocating early page!\n");
3763 
3764 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3765 
3766 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3767 			physicalAddress * B_PAGE_SIZE, attributes,
3768 			&vm_allocate_early_physical_page);
3769 	}
3770 
3771 	return virtualBase;
3772 }
3773 
3774 
3775 /*!	The main entrance point to initialize the VM. */
3776 status_t
3777 vm_init(kernel_args* args)
3778 {
3779 	struct preloaded_image* image;
3780 	void* address;
3781 	status_t err = 0;
3782 	uint32 i;
3783 
3784 	TRACE(("vm_init: entry\n"));
3785 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3786 	err = arch_vm_init(args);
3787 
3788 	// initialize some globals
3789 	vm_page_init_num_pages(args);
3790 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3791 
3792 	slab_init(args);
3793 
3794 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3795 	size_t heapSize = INITIAL_HEAP_SIZE;
3796 	// try to accomodate low memory systems
3797 	while (heapSize > sAvailableMemory / 8)
3798 		heapSize /= 2;
3799 	if (heapSize < 1024 * 1024)
3800 		panic("vm_init: go buy some RAM please.");
3801 
3802 	// map in the new heap and initialize it
3803 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3804 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3805 	TRACE(("heap at 0x%lx\n", heapBase));
3806 	heap_init(heapBase, heapSize);
3807 #endif
3808 
3809 	// initialize the free page list and physical page mapper
3810 	vm_page_init(args);
3811 
3812 	// initialize the cache allocators
3813 	vm_cache_init(args);
3814 
3815 	{
3816 		status_t error = VMAreaHash::Init();
3817 		if (error != B_OK)
3818 			panic("vm_init: error initializing area hash table\n");
3819 	}
3820 
3821 	VMAddressSpace::Init();
3822 	reserve_boot_loader_ranges(args);
3823 
3824 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3825 	heap_init_post_area();
3826 #endif
3827 
3828 	// Do any further initialization that the architecture dependant layers may
3829 	// need now
3830 	arch_vm_translation_map_init_post_area(args);
3831 	arch_vm_init_post_area(args);
3832 	vm_page_init_post_area(args);
3833 	slab_init_post_area();
3834 
3835 	// allocate areas to represent stuff that already exists
3836 
3837 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3838 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3839 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3840 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3841 #endif
3842 
3843 	allocate_kernel_args(args);
3844 
3845 	create_preloaded_image_areas(&args->kernel_image);
3846 
3847 	// allocate areas for preloaded images
3848 	for (image = args->preloaded_images; image != NULL; image = image->next)
3849 		create_preloaded_image_areas(image);
3850 
3851 	// allocate kernel stacks
3852 	for (i = 0; i < args->num_cpus; i++) {
3853 		char name[64];
3854 
3855 		sprintf(name, "idle thread %lu kstack", i + 1);
3856 		address = (void*)args->cpu_kstack[i].start;
3857 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3858 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3859 	}
3860 
3861 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3862 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3863 
3864 #if PARANOID_KERNEL_MALLOC
3865 	vm_block_address_range("uninitialized heap memory",
3866 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3867 #endif
3868 #if PARANOID_KERNEL_FREE
3869 	vm_block_address_range("freed heap memory",
3870 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3871 #endif
3872 
3873 	// create the object cache for the page mappings
3874 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3875 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3876 		NULL, NULL);
3877 	if (gPageMappingsObjectCache == NULL)
3878 		panic("failed to create page mappings object cache");
3879 
3880 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3881 
3882 #if DEBUG_CACHE_LIST
3883 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
3884 		virtual_address_restrictions virtualRestrictions = {};
3885 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
3886 		physical_address_restrictions physicalRestrictions = {};
3887 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
3888 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3889 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
3890 			CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions,
3891 			(void**)&sCacheInfoTable);
3892 	}
3893 #endif	// DEBUG_CACHE_LIST
3894 
3895 	// add some debugger commands
3896 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3897 	add_debugger_command("area", &dump_area,
3898 		"Dump info about a particular area");
3899 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3900 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3901 #if DEBUG_CACHE_LIST
3902 	if (sCacheInfoTable != NULL) {
3903 		add_debugger_command_etc("caches", &dump_caches,
3904 			"List all VMCache trees",
3905 			"[ \"-c\" ]\n"
3906 			"All cache trees are listed sorted in decreasing order by number "
3907 				"of\n"
3908 			"used pages or, if \"-c\" is specified, by size of committed "
3909 				"memory.\n",
3910 			0);
3911 	}
3912 #endif
3913 	add_debugger_command("avail", &dump_available_memory,
3914 		"Dump available memory");
3915 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3916 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3917 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3918 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3919 	add_debugger_command("string", &display_mem, "dump strings");
3920 
3921 	TRACE(("vm_init: exit\n"));
3922 
3923 	vm_cache_init_post_heap();
3924 
3925 	return err;
3926 }
3927 
3928 
3929 status_t
3930 vm_init_post_sem(kernel_args* args)
3931 {
3932 	// This frees all unused boot loader resources and makes its space available
3933 	// again
3934 	arch_vm_init_end(args);
3935 	unreserve_boot_loader_ranges(args);
3936 
3937 	// fill in all of the semaphores that were not allocated before
3938 	// since we're still single threaded and only the kernel address space
3939 	// exists, it isn't that hard to find all of the ones we need to create
3940 
3941 	arch_vm_translation_map_init_post_sem(args);
3942 
3943 	slab_init_post_sem();
3944 
3945 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3946 	heap_init_post_sem();
3947 #endif
3948 
3949 	return B_OK;
3950 }
3951 
3952 
3953 status_t
3954 vm_init_post_thread(kernel_args* args)
3955 {
3956 	vm_page_init_post_thread(args);
3957 	slab_init_post_thread();
3958 	return heap_init_post_thread();
3959 }
3960 
3961 
3962 status_t
3963 vm_init_post_modules(kernel_args* args)
3964 {
3965 	return arch_vm_init_post_modules(args);
3966 }
3967 
3968 
3969 void
3970 permit_page_faults(void)
3971 {
3972 	Thread* thread = thread_get_current_thread();
3973 	if (thread != NULL)
3974 		atomic_add(&thread->page_faults_allowed, 1);
3975 }
3976 
3977 
3978 void
3979 forbid_page_faults(void)
3980 {
3981 	Thread* thread = thread_get_current_thread();
3982 	if (thread != NULL)
3983 		atomic_add(&thread->page_faults_allowed, -1);
3984 }
3985 
3986 
3987 status_t
3988 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3989 	addr_t* newIP)
3990 {
3991 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3992 		faultAddress));
3993 
3994 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
3995 
3996 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
3997 	VMAddressSpace* addressSpace = NULL;
3998 
3999 	status_t status = B_OK;
4000 	*newIP = 0;
4001 	atomic_add((int32*)&sPageFaults, 1);
4002 
4003 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4004 		addressSpace = VMAddressSpace::GetKernel();
4005 	} else if (IS_USER_ADDRESS(pageAddress)) {
4006 		addressSpace = VMAddressSpace::GetCurrent();
4007 		if (addressSpace == NULL) {
4008 			if (!isUser) {
4009 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4010 					"memory!\n");
4011 				status = B_BAD_ADDRESS;
4012 				TPF(PageFaultError(-1,
4013 					VMPageFaultTracing
4014 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4015 			} else {
4016 				// XXX weird state.
4017 				panic("vm_page_fault: non kernel thread accessing user memory "
4018 					"that doesn't exist!\n");
4019 				status = B_BAD_ADDRESS;
4020 			}
4021 		}
4022 	} else {
4023 		// the hit was probably in the 64k DMZ between kernel and user space
4024 		// this keeps a user space thread from passing a buffer that crosses
4025 		// into kernel space
4026 		status = B_BAD_ADDRESS;
4027 		TPF(PageFaultError(-1,
4028 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4029 	}
4030 
4031 	if (status == B_OK) {
4032 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser,
4033 			NULL);
4034 	}
4035 
4036 	if (status < B_OK) {
4037 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4038 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
4039 			strerror(status), address, faultAddress, isWrite, isUser,
4040 			thread_get_current_thread_id());
4041 		if (!isUser) {
4042 			Thread* thread = thread_get_current_thread();
4043 			if (thread != NULL && thread->fault_handler != 0) {
4044 				// this will cause the arch dependant page fault handler to
4045 				// modify the IP on the interrupt frame or whatever to return
4046 				// to this address
4047 				*newIP = thread->fault_handler;
4048 			} else {
4049 				// unhandled page fault in the kernel
4050 				panic("vm_page_fault: unhandled page fault in kernel space at "
4051 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4052 			}
4053 		} else {
4054 #if 1
4055 			addressSpace->ReadLock();
4056 
4057 			// TODO: remove me once we have proper userland debugging support
4058 			// (and tools)
4059 			VMArea* area = addressSpace->LookupArea(faultAddress);
4060 
4061 			Thread* thread = thread_get_current_thread();
4062 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
4063 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
4064 				thread->name, thread->id, thread->team->Name(),
4065 				thread->team->id, isWrite ? "write" : "read", address,
4066 				faultAddress, area ? area->name : "???",
4067 				faultAddress - (area ? area->Base() : 0x0));
4068 
4069 			// We can print a stack trace of the userland thread here.
4070 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4071 // fault and someone is already waiting for a write lock on the same address
4072 // space. This thread will then try to acquire the lock again and will
4073 // be queued after the writer.
4074 #	if 0
4075 			if (area) {
4076 				struct stack_frame {
4077 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4078 						struct stack_frame*	previous;
4079 						void*				return_address;
4080 					#else
4081 						// ...
4082 					#warning writeme
4083 					#endif
4084 				} frame;
4085 #		ifdef __INTEL__
4086 				struct iframe* iframe = i386_get_user_iframe();
4087 				if (iframe == NULL)
4088 					panic("iframe is NULL!");
4089 
4090 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4091 					sizeof(struct stack_frame));
4092 #		elif defined(__POWERPC__)
4093 				struct iframe* iframe = ppc_get_user_iframe();
4094 				if (iframe == NULL)
4095 					panic("iframe is NULL!");
4096 
4097 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4098 					sizeof(struct stack_frame));
4099 #		else
4100 #			warning "vm_page_fault() stack trace won't work"
4101 				status = B_ERROR;
4102 #		endif
4103 
4104 				dprintf("stack trace:\n");
4105 				int32 maxFrames = 50;
4106 				while (status == B_OK && --maxFrames >= 0
4107 						&& frame.return_address != NULL) {
4108 					dprintf("  %p", frame.return_address);
4109 					area = addressSpace->LookupArea(
4110 						(addr_t)frame.return_address);
4111 					if (area) {
4112 						dprintf(" (%s + %#lx)", area->name,
4113 							(addr_t)frame.return_address - area->Base());
4114 					}
4115 					dprintf("\n");
4116 
4117 					status = user_memcpy(&frame, frame.previous,
4118 						sizeof(struct stack_frame));
4119 				}
4120 			}
4121 #	endif	// 0 (stack trace)
4122 
4123 			addressSpace->ReadUnlock();
4124 #endif
4125 
4126 			// TODO: the fault_callback is a temporary solution for vm86
4127 			if (thread->fault_callback == NULL
4128 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4129 				// If the thread has a signal handler for SIGSEGV, we simply
4130 				// send it the signal. Otherwise we notify the user debugger
4131 				// first.
4132 				struct sigaction action;
4133 				if ((sigaction(SIGSEGV, NULL, &action) == 0
4134 						&& action.sa_handler != SIG_DFL
4135 						&& action.sa_handler != SIG_IGN)
4136 					|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4137 						SIGSEGV)) {
4138 					Signal signal(SIGSEGV,
4139 						status == B_PERMISSION_DENIED
4140 							? SEGV_ACCERR : SEGV_MAPERR,
4141 						EFAULT, thread->team->id);
4142 					signal.SetAddress((void*)address);
4143 					send_signal_to_thread(thread, signal, 0);
4144 				}
4145 			}
4146 		}
4147 	}
4148 
4149 	if (addressSpace != NULL)
4150 		addressSpace->Put();
4151 
4152 	return B_HANDLED_INTERRUPT;
4153 }
4154 
4155 
4156 struct PageFaultContext {
4157 	AddressSpaceReadLocker	addressSpaceLocker;
4158 	VMCacheChainLocker		cacheChainLocker;
4159 
4160 	VMTranslationMap*		map;
4161 	VMCache*				topCache;
4162 	off_t					cacheOffset;
4163 	vm_page_reservation		reservation;
4164 	bool					isWrite;
4165 
4166 	// return values
4167 	vm_page*				page;
4168 	bool					restart;
4169 
4170 
4171 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4172 		:
4173 		addressSpaceLocker(addressSpace, true),
4174 		map(addressSpace->TranslationMap()),
4175 		isWrite(isWrite)
4176 	{
4177 	}
4178 
4179 	~PageFaultContext()
4180 	{
4181 		UnlockAll();
4182 		vm_page_unreserve_pages(&reservation);
4183 	}
4184 
4185 	void Prepare(VMCache* topCache, off_t cacheOffset)
4186 	{
4187 		this->topCache = topCache;
4188 		this->cacheOffset = cacheOffset;
4189 		page = NULL;
4190 		restart = false;
4191 
4192 		cacheChainLocker.SetTo(topCache);
4193 	}
4194 
4195 	void UnlockAll(VMCache* exceptCache = NULL)
4196 	{
4197 		topCache = NULL;
4198 		addressSpaceLocker.Unlock();
4199 		cacheChainLocker.Unlock(exceptCache);
4200 	}
4201 };
4202 
4203 
4204 /*!	Gets the page that should be mapped into the area.
4205 	Returns an error code other than \c B_OK, if the page couldn't be found or
4206 	paged in. The locking state of the address space and the caches is undefined
4207 	in that case.
4208 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4209 	had to unlock the address space and all caches and is supposed to be called
4210 	again.
4211 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4212 	found. It is returned in \c context.page. The address space will still be
4213 	locked as well as all caches starting from the top cache to at least the
4214 	cache the page lives in.
4215 */
4216 static status_t
4217 fault_get_page(PageFaultContext& context)
4218 {
4219 	VMCache* cache = context.topCache;
4220 	VMCache* lastCache = NULL;
4221 	vm_page* page = NULL;
4222 
4223 	while (cache != NULL) {
4224 		// We already hold the lock of the cache at this point.
4225 
4226 		lastCache = cache;
4227 
4228 		page = cache->LookupPage(context.cacheOffset);
4229 		if (page != NULL && page->busy) {
4230 			// page must be busy -- wait for it to become unbusy
4231 			context.UnlockAll(cache);
4232 			cache->ReleaseRefLocked();
4233 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4234 
4235 			// restart the whole process
4236 			context.restart = true;
4237 			return B_OK;
4238 		}
4239 
4240 		if (page != NULL)
4241 			break;
4242 
4243 		// The current cache does not contain the page we're looking for.
4244 
4245 		// see if the backing store has it
4246 		if (cache->HasPage(context.cacheOffset)) {
4247 			// insert a fresh page and mark it busy -- we're going to read it in
4248 			page = vm_page_allocate_page(&context.reservation,
4249 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4250 			cache->InsertPage(page, context.cacheOffset);
4251 
4252 			// We need to unlock all caches and the address space while reading
4253 			// the page in. Keep a reference to the cache around.
4254 			cache->AcquireRefLocked();
4255 			context.UnlockAll();
4256 
4257 			// read the page in
4258 			generic_io_vec vec;
4259 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4260 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4261 
4262 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4263 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4264 
4265 			cache->Lock();
4266 
4267 			if (status < B_OK) {
4268 				// on error remove and free the page
4269 				dprintf("reading page from cache %p returned: %s!\n",
4270 					cache, strerror(status));
4271 
4272 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4273 				cache->RemovePage(page);
4274 				vm_page_set_state(page, PAGE_STATE_FREE);
4275 
4276 				cache->ReleaseRefAndUnlock();
4277 				return status;
4278 			}
4279 
4280 			// mark the page unbusy again
4281 			cache->MarkPageUnbusy(page);
4282 
4283 			DEBUG_PAGE_ACCESS_END(page);
4284 
4285 			// Since we needed to unlock everything temporarily, the area
4286 			// situation might have changed. So we need to restart the whole
4287 			// process.
4288 			cache->ReleaseRefAndUnlock();
4289 			context.restart = true;
4290 			return B_OK;
4291 		}
4292 
4293 		cache = context.cacheChainLocker.LockSourceCache();
4294 	}
4295 
4296 	if (page == NULL) {
4297 		// There was no adequate page, determine the cache for a clean one.
4298 		// Read-only pages come in the deepest cache, only the top most cache
4299 		// may have direct write access.
4300 		cache = context.isWrite ? context.topCache : lastCache;
4301 
4302 		// allocate a clean page
4303 		page = vm_page_allocate_page(&context.reservation,
4304 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4305 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4306 			page->physical_page_number));
4307 
4308 		// insert the new page into our cache
4309 		cache->InsertPage(page, context.cacheOffset);
4310 	} else if (page->Cache() != context.topCache && context.isWrite) {
4311 		// We have a page that has the data we want, but in the wrong cache
4312 		// object so we need to copy it and stick it into the top cache.
4313 		vm_page* sourcePage = page;
4314 
4315 		// TODO: If memory is low, it might be a good idea to steal the page
4316 		// from our source cache -- if possible, that is.
4317 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4318 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4319 
4320 		// To not needlessly kill concurrency we unlock all caches but the top
4321 		// one while copying the page. Lacking another mechanism to ensure that
4322 		// the source page doesn't disappear, we mark it busy.
4323 		sourcePage->busy = true;
4324 		context.cacheChainLocker.UnlockKeepRefs(true);
4325 
4326 		// copy the page
4327 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4328 			sourcePage->physical_page_number * B_PAGE_SIZE);
4329 
4330 		context.cacheChainLocker.RelockCaches(true);
4331 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4332 
4333 		// insert the new page into our cache
4334 		context.topCache->InsertPage(page, context.cacheOffset);
4335 	} else
4336 		DEBUG_PAGE_ACCESS_START(page);
4337 
4338 	context.page = page;
4339 	return B_OK;
4340 }
4341 
4342 
4343 /*!	Makes sure the address in the given address space is mapped.
4344 
4345 	\param addressSpace The address space.
4346 	\param originalAddress The address. Doesn't need to be page aligned.
4347 	\param isWrite If \c true the address shall be write-accessible.
4348 	\param isUser If \c true the access is requested by a userland team.
4349 	\param wirePage On success, if non \c NULL, the wired count of the page
4350 		mapped at the given address is incremented and the page is returned
4351 		via this parameter.
4352 	\param wiredRange If given, this wiredRange is ignored when checking whether
4353 		an already mapped page at the virtual address can be unmapped.
4354 	\return \c B_OK on success, another error code otherwise.
4355 */
4356 static status_t
4357 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4358 	bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange)
4359 {
4360 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4361 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4362 
4363 	PageFaultContext context(addressSpace, isWrite);
4364 
4365 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4366 	status_t status = B_OK;
4367 
4368 	addressSpace->IncrementFaultCount();
4369 
4370 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4371 	// the pages upfront makes sure we don't have any cache locked, so that the
4372 	// page daemon/thief can do their job without problems.
4373 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4374 		originalAddress);
4375 	context.addressSpaceLocker.Unlock();
4376 	vm_page_reserve_pages(&context.reservation, reservePages,
4377 		addressSpace == VMAddressSpace::Kernel()
4378 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4379 
4380 	while (true) {
4381 		context.addressSpaceLocker.Lock();
4382 
4383 		// get the area the fault was in
4384 		VMArea* area = addressSpace->LookupArea(address);
4385 		if (area == NULL) {
4386 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4387 				"space\n", originalAddress);
4388 			TPF(PageFaultError(-1,
4389 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4390 			status = B_BAD_ADDRESS;
4391 			break;
4392 		}
4393 
4394 		// check permissions
4395 		uint32 protection = get_area_page_protection(area, address);
4396 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4397 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
4398 				(void*)originalAddress);
4399 			TPF(PageFaultError(area->id,
4400 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4401 			status = B_PERMISSION_DENIED;
4402 			break;
4403 		}
4404 		if (isWrite && (protection
4405 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4406 			dprintf("write access attempted on write-protected area 0x%lx at"
4407 				" %p\n", area->id, (void*)originalAddress);
4408 			TPF(PageFaultError(area->id,
4409 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4410 			status = B_PERMISSION_DENIED;
4411 			break;
4412 		} else if (!isWrite && (protection
4413 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4414 			dprintf("read access attempted on read-protected area 0x%lx at"
4415 				" %p\n", area->id, (void*)originalAddress);
4416 			TPF(PageFaultError(area->id,
4417 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4418 			status = B_PERMISSION_DENIED;
4419 			break;
4420 		}
4421 
4422 		// We have the area, it was a valid access, so let's try to resolve the
4423 		// page fault now.
4424 		// At first, the top most cache from the area is investigated.
4425 
4426 		context.Prepare(vm_area_get_locked_cache(area),
4427 			address - area->Base() + area->cache_offset);
4428 
4429 		// See if this cache has a fault handler -- this will do all the work
4430 		// for us.
4431 		{
4432 			// Note, since the page fault is resolved with interrupts enabled,
4433 			// the fault handler could be called more than once for the same
4434 			// reason -- the store must take this into account.
4435 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4436 			if (status != B_BAD_HANDLER)
4437 				break;
4438 		}
4439 
4440 		// The top most cache has no fault handler, so let's see if the cache or
4441 		// its sources already have the page we're searching for (we're going
4442 		// from top to bottom).
4443 		status = fault_get_page(context);
4444 		if (status != B_OK) {
4445 			TPF(PageFaultError(area->id, status));
4446 			break;
4447 		}
4448 
4449 		if (context.restart)
4450 			continue;
4451 
4452 		// All went fine, all there is left to do is to map the page into the
4453 		// address space.
4454 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4455 			context.page));
4456 
4457 		// If the page doesn't reside in the area's cache, we need to make sure
4458 		// it's mapped in read-only, so that we cannot overwrite someone else's
4459 		// data (copy-on-write)
4460 		uint32 newProtection = protection;
4461 		if (context.page->Cache() != context.topCache && !isWrite)
4462 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4463 
4464 		bool unmapPage = false;
4465 		bool mapPage = true;
4466 
4467 		// check whether there's already a page mapped at the address
4468 		context.map->Lock();
4469 
4470 		phys_addr_t physicalAddress;
4471 		uint32 flags;
4472 		vm_page* mappedPage = NULL;
4473 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4474 			&& (flags & PAGE_PRESENT) != 0
4475 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4476 				!= NULL) {
4477 			// Yep there's already a page. If it's ours, we can simply adjust
4478 			// its protection. Otherwise we have to unmap it.
4479 			if (mappedPage == context.page) {
4480 				context.map->ProtectPage(area, address, newProtection);
4481 					// Note: We assume that ProtectPage() is atomic (i.e.
4482 					// the page isn't temporarily unmapped), otherwise we'd have
4483 					// to make sure it isn't wired.
4484 				mapPage = false;
4485 			} else
4486 				unmapPage = true;
4487 		}
4488 
4489 		context.map->Unlock();
4490 
4491 		if (unmapPage) {
4492 			// If the page is wired, we can't unmap it. Wait until it is unwired
4493 			// again and restart.
4494 			VMAreaUnwiredWaiter waiter;
4495 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4496 					wiredRange)) {
4497 				// unlock everything and wait
4498 				context.UnlockAll();
4499 				waiter.waitEntry.Wait();
4500 				continue;
4501 			}
4502 
4503 			// Note: The mapped page is a page of a lower cache. We are
4504 			// guaranteed to have that cached locked, our new page is a copy of
4505 			// that page, and the page is not busy. The logic for that guarantee
4506 			// is as follows: Since the page is mapped, it must live in the top
4507 			// cache (ruled out above) or any of its lower caches, and there is
4508 			// (was before the new page was inserted) no other page in any
4509 			// cache between the top cache and the page's cache (otherwise that
4510 			// would be mapped instead). That in turn means that our algorithm
4511 			// must have found it and therefore it cannot be busy either.
4512 			DEBUG_PAGE_ACCESS_START(mappedPage);
4513 			unmap_page(area, address);
4514 			DEBUG_PAGE_ACCESS_END(mappedPage);
4515 		}
4516 
4517 		if (mapPage) {
4518 			if (map_page(area, context.page, address, newProtection,
4519 					&context.reservation) != B_OK) {
4520 				// Mapping can only fail, when the page mapping object couldn't
4521 				// be allocated. Save for the missing mapping everything is
4522 				// fine, though. If this was a regular page fault, we'll simply
4523 				// leave and probably fault again. To make sure we'll have more
4524 				// luck then, we ensure that the minimum object reserve is
4525 				// available.
4526 				DEBUG_PAGE_ACCESS_END(context.page);
4527 
4528 				context.UnlockAll();
4529 
4530 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4531 						!= B_OK) {
4532 					// Apparently the situation is serious. Let's get ourselves
4533 					// killed.
4534 					status = B_NO_MEMORY;
4535 				} else if (wirePage != NULL) {
4536 					// The caller expects us to wire the page. Since
4537 					// object_cache_reserve() succeeded, we should now be able
4538 					// to allocate a mapping structure. Restart.
4539 					continue;
4540 				}
4541 
4542 				break;
4543 			}
4544 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4545 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4546 
4547 		// also wire the page, if requested
4548 		if (wirePage != NULL && status == B_OK) {
4549 			increment_page_wired_count(context.page);
4550 			*wirePage = context.page;
4551 		}
4552 
4553 		DEBUG_PAGE_ACCESS_END(context.page);
4554 
4555 		break;
4556 	}
4557 
4558 	return status;
4559 }
4560 
4561 
4562 status_t
4563 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4564 {
4565 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4566 }
4567 
4568 status_t
4569 vm_put_physical_page(addr_t vaddr, void* handle)
4570 {
4571 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4572 }
4573 
4574 
4575 status_t
4576 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4577 	void** _handle)
4578 {
4579 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4580 }
4581 
4582 status_t
4583 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4584 {
4585 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4586 }
4587 
4588 
4589 status_t
4590 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4591 {
4592 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4593 }
4594 
4595 status_t
4596 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4597 {
4598 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4599 }
4600 
4601 
4602 void
4603 vm_get_info(system_memory_info* info)
4604 {
4605 	swap_get_info(info);
4606 
4607 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4608 	info->page_faults = sPageFaults;
4609 
4610 	MutexLocker locker(sAvailableMemoryLock);
4611 	info->free_memory = sAvailableMemory;
4612 	info->needed_memory = sNeededMemory;
4613 }
4614 
4615 
4616 uint32
4617 vm_num_page_faults(void)
4618 {
4619 	return sPageFaults;
4620 }
4621 
4622 
4623 off_t
4624 vm_available_memory(void)
4625 {
4626 	MutexLocker locker(sAvailableMemoryLock);
4627 	return sAvailableMemory;
4628 }
4629 
4630 
4631 off_t
4632 vm_available_not_needed_memory(void)
4633 {
4634 	MutexLocker locker(sAvailableMemoryLock);
4635 	return sAvailableMemory - sNeededMemory;
4636 }
4637 
4638 
4639 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4640 	debugger.
4641 */
4642 off_t
4643 vm_available_not_needed_memory_debug(void)
4644 {
4645 	return sAvailableMemory - sNeededMemory;
4646 }
4647 
4648 
4649 size_t
4650 vm_kernel_address_space_left(void)
4651 {
4652 	return VMAddressSpace::Kernel()->FreeSpace();
4653 }
4654 
4655 
4656 void
4657 vm_unreserve_memory(size_t amount)
4658 {
4659 	mutex_lock(&sAvailableMemoryLock);
4660 
4661 	sAvailableMemory += amount;
4662 
4663 	mutex_unlock(&sAvailableMemoryLock);
4664 }
4665 
4666 
4667 status_t
4668 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4669 {
4670 	size_t reserve = kMemoryReserveForPriority[priority];
4671 
4672 	MutexLocker locker(sAvailableMemoryLock);
4673 
4674 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4675 
4676 	if (sAvailableMemory >= amount + reserve) {
4677 		sAvailableMemory -= amount;
4678 		return B_OK;
4679 	}
4680 
4681 	if (timeout <= 0)
4682 		return B_NO_MEMORY;
4683 
4684 	// turn timeout into an absolute timeout
4685 	timeout += system_time();
4686 
4687 	// loop until we've got the memory or the timeout occurs
4688 	do {
4689 		sNeededMemory += amount;
4690 
4691 		// call the low resource manager
4692 		locker.Unlock();
4693 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4694 			B_ABSOLUTE_TIMEOUT, timeout);
4695 		locker.Lock();
4696 
4697 		sNeededMemory -= amount;
4698 
4699 		if (sAvailableMemory >= amount + reserve) {
4700 			sAvailableMemory -= amount;
4701 			return B_OK;
4702 		}
4703 	} while (timeout > system_time());
4704 
4705 	return B_NO_MEMORY;
4706 }
4707 
4708 
4709 status_t
4710 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4711 {
4712 	// NOTE: The caller is responsible for synchronizing calls to this function!
4713 
4714 	AddressSpaceReadLocker locker;
4715 	VMArea* area;
4716 	status_t status = locker.SetFromArea(id, area);
4717 	if (status != B_OK)
4718 		return status;
4719 
4720 	// nothing to do, if the type doesn't change
4721 	uint32 oldType = area->MemoryType();
4722 	if (type == oldType)
4723 		return B_OK;
4724 
4725 	// set the memory type of the area and the mapped pages
4726 	VMTranslationMap* map = area->address_space->TranslationMap();
4727 	map->Lock();
4728 	area->SetMemoryType(type);
4729 	map->ProtectArea(area, area->protection);
4730 	map->Unlock();
4731 
4732 	// set the physical memory type
4733 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4734 	if (error != B_OK) {
4735 		// reset the memory type of the area and the mapped pages
4736 		map->Lock();
4737 		area->SetMemoryType(oldType);
4738 		map->ProtectArea(area, area->protection);
4739 		map->Unlock();
4740 		return error;
4741 	}
4742 
4743 	return B_OK;
4744 
4745 }
4746 
4747 
4748 /*!	This function enforces some protection properties:
4749 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4750 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4751 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4752 	   and B_KERNEL_WRITE_AREA.
4753 */
4754 static void
4755 fix_protection(uint32* protection)
4756 {
4757 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4758 		if ((*protection & B_USER_PROTECTION) == 0
4759 			|| (*protection & B_WRITE_AREA) != 0)
4760 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4761 		else
4762 			*protection |= B_KERNEL_READ_AREA;
4763 	}
4764 }
4765 
4766 
4767 static void
4768 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4769 {
4770 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4771 	info->area = area->id;
4772 	info->address = (void*)area->Base();
4773 	info->size = area->Size();
4774 	info->protection = area->protection;
4775 	info->lock = B_FULL_LOCK;
4776 	info->team = area->address_space->ID();
4777 	info->copy_count = 0;
4778 	info->in_count = 0;
4779 	info->out_count = 0;
4780 		// TODO: retrieve real values here!
4781 
4782 	VMCache* cache = vm_area_get_locked_cache(area);
4783 
4784 	// Note, this is a simplification; the cache could be larger than this area
4785 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4786 
4787 	vm_area_put_locked_cache(cache);
4788 }
4789 
4790 
4791 static status_t
4792 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4793 {
4794 	// is newSize a multiple of B_PAGE_SIZE?
4795 	if (newSize & (B_PAGE_SIZE - 1))
4796 		return B_BAD_VALUE;
4797 
4798 	// lock all affected address spaces and the cache
4799 	VMArea* area;
4800 	VMCache* cache;
4801 
4802 	MultiAddressSpaceLocker locker;
4803 	AreaCacheLocker cacheLocker;
4804 
4805 	status_t status;
4806 	size_t oldSize;
4807 	bool anyKernelArea;
4808 	bool restart;
4809 
4810 	do {
4811 		anyKernelArea = false;
4812 		restart = false;
4813 
4814 		locker.Unset();
4815 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4816 		if (status != B_OK)
4817 			return status;
4818 		cacheLocker.SetTo(cache, true);	// already locked
4819 
4820 		// enforce restrictions
4821 		if (!kernel) {
4822 			if ((area->protection & B_KERNEL_AREA) != 0)
4823 				return B_NOT_ALLOWED;
4824 			// TODO: Enforce all restrictions (team, etc.)!
4825 		}
4826 
4827 		oldSize = area->Size();
4828 		if (newSize == oldSize)
4829 			return B_OK;
4830 
4831 		if (cache->type != CACHE_TYPE_RAM)
4832 			return B_NOT_ALLOWED;
4833 
4834 		if (oldSize < newSize) {
4835 			// We need to check if all areas of this cache can be resized.
4836 			for (VMArea* current = cache->areas; current != NULL;
4837 					current = current->cache_next) {
4838 				if (!current->address_space->CanResizeArea(current, newSize))
4839 					return B_ERROR;
4840 				anyKernelArea
4841 					|= current->address_space == VMAddressSpace::Kernel();
4842 			}
4843 		} else {
4844 			// We're shrinking the areas, so we must make sure the affected
4845 			// ranges are not wired.
4846 			for (VMArea* current = cache->areas; current != NULL;
4847 					current = current->cache_next) {
4848 				anyKernelArea
4849 					|= current->address_space == VMAddressSpace::Kernel();
4850 
4851 				if (wait_if_area_range_is_wired(current,
4852 						current->Base() + newSize, oldSize - newSize, &locker,
4853 						&cacheLocker)) {
4854 					restart = true;
4855 					break;
4856 				}
4857 			}
4858 		}
4859 	} while (restart);
4860 
4861 	// Okay, looks good so far, so let's do it
4862 
4863 	int priority = kernel && anyKernelArea
4864 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4865 	uint32 allocationFlags = kernel && anyKernelArea
4866 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4867 
4868 	if (oldSize < newSize) {
4869 		// Growing the cache can fail, so we do it first.
4870 		status = cache->Resize(cache->virtual_base + newSize, priority);
4871 		if (status != B_OK)
4872 			return status;
4873 	}
4874 
4875 	for (VMArea* current = cache->areas; current != NULL;
4876 			current = current->cache_next) {
4877 		status = current->address_space->ResizeArea(current, newSize,
4878 			allocationFlags);
4879 		if (status != B_OK)
4880 			break;
4881 
4882 		// We also need to unmap all pages beyond the new size, if the area has
4883 		// shrunk
4884 		if (newSize < oldSize) {
4885 			VMCacheChainLocker cacheChainLocker(cache);
4886 			cacheChainLocker.LockAllSourceCaches();
4887 
4888 			unmap_pages(current, current->Base() + newSize,
4889 				oldSize - newSize);
4890 
4891 			cacheChainLocker.Unlock(cache);
4892 		}
4893 	}
4894 
4895 	if (status == B_OK) {
4896 		// Shrink or grow individual page protections if in use.
4897 		if (area->page_protections != NULL) {
4898 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
4899 			uint8* newProtections
4900 				= (uint8*)realloc(area->page_protections, bytes);
4901 			if (newProtections == NULL)
4902 				status = B_NO_MEMORY;
4903 			else {
4904 				area->page_protections = newProtections;
4905 
4906 				if (oldSize < newSize) {
4907 					// init the additional page protections to that of the area
4908 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
4909 					uint32 areaProtection = area->protection
4910 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4911 					memset(area->page_protections + offset,
4912 						areaProtection | (areaProtection << 4), bytes - offset);
4913 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4914 						uint8& entry = area->page_protections[offset - 1];
4915 						entry = (entry & 0x0f) | (areaProtection << 4);
4916 					}
4917 				}
4918 			}
4919 		}
4920 	}
4921 
4922 	// shrinking the cache can't fail, so we do it now
4923 	if (status == B_OK && newSize < oldSize)
4924 		status = cache->Resize(cache->virtual_base + newSize, priority);
4925 
4926 	if (status != B_OK) {
4927 		// Something failed -- resize the areas back to their original size.
4928 		// This can fail, too, in which case we're seriously screwed.
4929 		for (VMArea* current = cache->areas; current != NULL;
4930 				current = current->cache_next) {
4931 			if (current->address_space->ResizeArea(current, oldSize,
4932 					allocationFlags) != B_OK) {
4933 				panic("vm_resize_area(): Failed and not being able to restore "
4934 					"original state.");
4935 			}
4936 		}
4937 
4938 		cache->Resize(cache->virtual_base + oldSize, priority);
4939 	}
4940 
4941 	// TODO: we must honour the lock restrictions of this area
4942 	return status;
4943 }
4944 
4945 
4946 status_t
4947 vm_memset_physical(phys_addr_t address, int value, size_t length)
4948 {
4949 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4950 }
4951 
4952 
4953 status_t
4954 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
4955 {
4956 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4957 }
4958 
4959 
4960 status_t
4961 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
4962 	bool user)
4963 {
4964 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4965 }
4966 
4967 
4968 void
4969 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
4970 {
4971 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4972 }
4973 
4974 
4975 /*!	Copies a range of memory directly from/to a page that might not be mapped
4976 	at the moment.
4977 
4978 	For \a unsafeMemory the current mapping (if any is ignored). The function
4979 	walks through the respective area's cache chain to find the physical page
4980 	and copies from/to it directly.
4981 	The memory range starting at \a unsafeMemory with a length of \a size bytes
4982 	must not cross a page boundary.
4983 
4984 	\param teamID The team ID identifying the address space \a unsafeMemory is
4985 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
4986 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
4987 		is passed, the address space of the thread returned by
4988 		debug_get_debugged_thread() is used.
4989 	\param unsafeMemory The start of the unsafe memory range to be copied
4990 		from/to.
4991 	\param buffer A safely accessible kernel buffer to be copied from/to.
4992 	\param size The number of bytes to be copied.
4993 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
4994 		\a unsafeMemory, the other way around otherwise.
4995 */
4996 status_t
4997 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
4998 	size_t size, bool copyToUnsafe)
4999 {
5000 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5001 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5002 		return B_BAD_VALUE;
5003 	}
5004 
5005 	// get the address space for the debugged thread
5006 	VMAddressSpace* addressSpace;
5007 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5008 		addressSpace = VMAddressSpace::Kernel();
5009 	} else if (teamID == B_CURRENT_TEAM) {
5010 		Thread* thread = debug_get_debugged_thread();
5011 		if (thread == NULL || thread->team == NULL)
5012 			return B_BAD_ADDRESS;
5013 
5014 		addressSpace = thread->team->address_space;
5015 	} else
5016 		addressSpace = VMAddressSpace::DebugGet(teamID);
5017 
5018 	if (addressSpace == NULL)
5019 		return B_BAD_ADDRESS;
5020 
5021 	// get the area
5022 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5023 	if (area == NULL)
5024 		return B_BAD_ADDRESS;
5025 
5026 	// search the page
5027 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5028 		+ area->cache_offset;
5029 	VMCache* cache = area->cache;
5030 	vm_page* page = NULL;
5031 	while (cache != NULL) {
5032 		page = cache->DebugLookupPage(cacheOffset);
5033 		if (page != NULL)
5034 			break;
5035 
5036 		// Page not found in this cache -- if it is paged out, we must not try
5037 		// to get it from lower caches.
5038 		if (cache->DebugHasPage(cacheOffset))
5039 			break;
5040 
5041 		cache = cache->source;
5042 	}
5043 
5044 	if (page == NULL)
5045 		return B_UNSUPPORTED;
5046 
5047 	// copy from/to physical memory
5048 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5049 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5050 
5051 	if (copyToUnsafe) {
5052 		if (page->Cache() != area->cache)
5053 			return B_UNSUPPORTED;
5054 
5055 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5056 	}
5057 
5058 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5059 }
5060 
5061 
5062 //	#pragma mark - kernel public API
5063 
5064 
5065 status_t
5066 user_memcpy(void* to, const void* from, size_t size)
5067 {
5068 	// don't allow address overflows
5069 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5070 		return B_BAD_ADDRESS;
5071 
5072 	if (arch_cpu_user_memcpy(to, from, size,
5073 			&thread_get_current_thread()->fault_handler) < B_OK)
5074 		return B_BAD_ADDRESS;
5075 
5076 	return B_OK;
5077 }
5078 
5079 
5080 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5081 	the string in \a to, NULL-terminating the result.
5082 
5083 	\param to Pointer to the destination C-string.
5084 	\param from Pointer to the source C-string.
5085 	\param size Size in bytes of the string buffer pointed to by \a to.
5086 
5087 	\return strlen(\a from).
5088 */
5089 ssize_t
5090 user_strlcpy(char* to, const char* from, size_t size)
5091 {
5092 	if (to == NULL && size != 0)
5093 		return B_BAD_VALUE;
5094 	if (from == NULL)
5095 		return B_BAD_ADDRESS;
5096 
5097 	// limit size to avoid address overflows
5098 	size_t maxSize = std::min(size,
5099 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5100 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5101 		// the source address might still overflow.
5102 
5103 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
5104 		&thread_get_current_thread()->fault_handler);
5105 
5106 	// If we hit the address overflow boundary, fail.
5107 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
5108 		return B_BAD_ADDRESS;
5109 
5110 	return result;
5111 }
5112 
5113 
5114 status_t
5115 user_memset(void* s, char c, size_t count)
5116 {
5117 	// don't allow address overflows
5118 	if ((addr_t)s + count < (addr_t)s)
5119 		return B_BAD_ADDRESS;
5120 
5121 	if (arch_cpu_user_memset(s, c, count,
5122 			&thread_get_current_thread()->fault_handler) < B_OK)
5123 		return B_BAD_ADDRESS;
5124 
5125 	return B_OK;
5126 }
5127 
5128 
5129 /*!	Wires a single page at the given address.
5130 
5131 	\param team The team whose address space the address belongs to. Supports
5132 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5133 		parameter is ignored.
5134 	\param address address The virtual address to wire down. Does not need to
5135 		be page aligned.
5136 	\param writable If \c true the page shall be writable.
5137 	\param info On success the info is filled in, among other things
5138 		containing the physical address the given virtual one translates to.
5139 	\return \c B_OK, when the page could be wired, another error code otherwise.
5140 */
5141 status_t
5142 vm_wire_page(team_id team, addr_t address, bool writable,
5143 	VMPageWiringInfo* info)
5144 {
5145 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5146 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5147 
5148 	// compute the page protection that is required
5149 	bool isUser = IS_USER_ADDRESS(address);
5150 	uint32 requiredProtection = PAGE_PRESENT
5151 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5152 	if (writable)
5153 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5154 
5155 	// get and read lock the address space
5156 	VMAddressSpace* addressSpace = NULL;
5157 	if (isUser) {
5158 		if (team == B_CURRENT_TEAM)
5159 			addressSpace = VMAddressSpace::GetCurrent();
5160 		else
5161 			addressSpace = VMAddressSpace::Get(team);
5162 	} else
5163 		addressSpace = VMAddressSpace::GetKernel();
5164 	if (addressSpace == NULL)
5165 		return B_ERROR;
5166 
5167 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5168 
5169 	VMTranslationMap* map = addressSpace->TranslationMap();
5170 	status_t error = B_OK;
5171 
5172 	// get the area
5173 	VMArea* area = addressSpace->LookupArea(pageAddress);
5174 	if (area == NULL) {
5175 		addressSpace->Put();
5176 		return B_BAD_ADDRESS;
5177 	}
5178 
5179 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5180 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5181 
5182 	// mark the area range wired
5183 	area->Wire(&info->range);
5184 
5185 	// Lock the area's cache chain and the translation map. Needed to look
5186 	// up the page and play with its wired count.
5187 	cacheChainLocker.LockAllSourceCaches();
5188 	map->Lock();
5189 
5190 	phys_addr_t physicalAddress;
5191 	uint32 flags;
5192 	vm_page* page;
5193 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5194 		&& (flags & requiredProtection) == requiredProtection
5195 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5196 			!= NULL) {
5197 		// Already mapped with the correct permissions -- just increment
5198 		// the page's wired count.
5199 		increment_page_wired_count(page);
5200 
5201 		map->Unlock();
5202 		cacheChainLocker.Unlock();
5203 		addressSpaceLocker.Unlock();
5204 	} else {
5205 		// Let vm_soft_fault() map the page for us, if possible. We need
5206 		// to fully unlock to avoid deadlocks. Since we have already
5207 		// wired the area itself, nothing disturbing will happen with it
5208 		// in the meantime.
5209 		map->Unlock();
5210 		cacheChainLocker.Unlock();
5211 		addressSpaceLocker.Unlock();
5212 
5213 		error = vm_soft_fault(addressSpace, pageAddress, writable, isUser,
5214 			&page, &info->range);
5215 
5216 		if (error != B_OK) {
5217 			// The page could not be mapped -- clean up.
5218 			VMCache* cache = vm_area_get_locked_cache(area);
5219 			area->Unwire(&info->range);
5220 			cache->ReleaseRefAndUnlock();
5221 			addressSpace->Put();
5222 			return error;
5223 		}
5224 	}
5225 
5226 	info->physicalAddress
5227 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5228 			+ address % B_PAGE_SIZE;
5229 	info->page = page;
5230 
5231 	return B_OK;
5232 }
5233 
5234 
5235 /*!	Unwires a single page previously wired via vm_wire_page().
5236 
5237 	\param info The same object passed to vm_wire_page() before.
5238 */
5239 void
5240 vm_unwire_page(VMPageWiringInfo* info)
5241 {
5242 	// lock the address space
5243 	VMArea* area = info->range.area;
5244 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5245 		// takes over our reference
5246 
5247 	// lock the top cache
5248 	VMCache* cache = vm_area_get_locked_cache(area);
5249 	VMCacheChainLocker cacheChainLocker(cache);
5250 
5251 	if (info->page->Cache() != cache) {
5252 		// The page is not in the top cache, so we lock the whole cache chain
5253 		// before touching the page's wired count.
5254 		cacheChainLocker.LockAllSourceCaches();
5255 	}
5256 
5257 	decrement_page_wired_count(info->page);
5258 
5259 	// remove the wired range from the range
5260 	area->Unwire(&info->range);
5261 
5262 	cacheChainLocker.Unlock();
5263 }
5264 
5265 
5266 /*!	Wires down the given address range in the specified team's address space.
5267 
5268 	If successful the function
5269 	- acquires a reference to the specified team's address space,
5270 	- adds respective wired ranges to all areas that intersect with the given
5271 	  address range,
5272 	- makes sure all pages in the given address range are mapped with the
5273 	  requested access permissions and increments their wired count.
5274 
5275 	It fails, when \a team doesn't specify a valid address space, when any part
5276 	of the specified address range is not covered by areas, when the concerned
5277 	areas don't allow mapping with the requested permissions, or when mapping
5278 	failed for another reason.
5279 
5280 	When successful the call must be balanced by a unlock_memory_etc() call with
5281 	the exact same parameters.
5282 
5283 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5284 		supported.
5285 	\param address The start of the address range to be wired.
5286 	\param numBytes The size of the address range to be wired.
5287 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5288 		requests that the range must be wired writable ("read from device
5289 		into memory").
5290 	\return \c B_OK on success, another error code otherwise.
5291 */
5292 status_t
5293 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5294 {
5295 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5296 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5297 
5298 	// compute the page protection that is required
5299 	bool isUser = IS_USER_ADDRESS(address);
5300 	bool writable = (flags & B_READ_DEVICE) == 0;
5301 	uint32 requiredProtection = PAGE_PRESENT
5302 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5303 	if (writable)
5304 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5305 
5306 	uint32 mallocFlags = isUser
5307 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5308 
5309 	// get and read lock the address space
5310 	VMAddressSpace* addressSpace = NULL;
5311 	if (isUser) {
5312 		if (team == B_CURRENT_TEAM)
5313 			addressSpace = VMAddressSpace::GetCurrent();
5314 		else
5315 			addressSpace = VMAddressSpace::Get(team);
5316 	} else
5317 		addressSpace = VMAddressSpace::GetKernel();
5318 	if (addressSpace == NULL)
5319 		return B_ERROR;
5320 
5321 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5322 
5323 	VMTranslationMap* map = addressSpace->TranslationMap();
5324 	status_t error = B_OK;
5325 
5326 	// iterate through all concerned areas
5327 	addr_t nextAddress = lockBaseAddress;
5328 	while (nextAddress != lockEndAddress) {
5329 		// get the next area
5330 		VMArea* area = addressSpace->LookupArea(nextAddress);
5331 		if (area == NULL) {
5332 			error = B_BAD_ADDRESS;
5333 			break;
5334 		}
5335 
5336 		addr_t areaStart = nextAddress;
5337 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5338 
5339 		// allocate the wired range (do that before locking the cache to avoid
5340 		// deadlocks)
5341 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5342 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5343 		if (range == NULL) {
5344 			error = B_NO_MEMORY;
5345 			break;
5346 		}
5347 
5348 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5349 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5350 
5351 		// mark the area range wired
5352 		area->Wire(range);
5353 
5354 		// Depending on the area cache type and the wiring, we may not need to
5355 		// look at the individual pages.
5356 		if (area->cache_type == CACHE_TYPE_NULL
5357 			|| area->cache_type == CACHE_TYPE_DEVICE
5358 			|| area->wiring == B_FULL_LOCK
5359 			|| area->wiring == B_CONTIGUOUS) {
5360 			nextAddress = areaEnd;
5361 			continue;
5362 		}
5363 
5364 		// Lock the area's cache chain and the translation map. Needed to look
5365 		// up pages and play with their wired count.
5366 		cacheChainLocker.LockAllSourceCaches();
5367 		map->Lock();
5368 
5369 		// iterate through the pages and wire them
5370 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5371 			phys_addr_t physicalAddress;
5372 			uint32 flags;
5373 
5374 			vm_page* page;
5375 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5376 				&& (flags & requiredProtection) == requiredProtection
5377 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5378 					!= NULL) {
5379 				// Already mapped with the correct permissions -- just increment
5380 				// the page's wired count.
5381 				increment_page_wired_count(page);
5382 			} else {
5383 				// Let vm_soft_fault() map the page for us, if possible. We need
5384 				// to fully unlock to avoid deadlocks. Since we have already
5385 				// wired the area itself, nothing disturbing will happen with it
5386 				// in the meantime.
5387 				map->Unlock();
5388 				cacheChainLocker.Unlock();
5389 				addressSpaceLocker.Unlock();
5390 
5391 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5392 					isUser, &page, range);
5393 
5394 				addressSpaceLocker.Lock();
5395 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5396 				cacheChainLocker.LockAllSourceCaches();
5397 				map->Lock();
5398 			}
5399 
5400 			if (error != B_OK)
5401 				break;
5402 		}
5403 
5404 		map->Unlock();
5405 
5406 		if (error == B_OK) {
5407 			cacheChainLocker.Unlock();
5408 		} else {
5409 			// An error occurred, so abort right here. If the current address
5410 			// is the first in this area, unwire the area, since we won't get
5411 			// to it when reverting what we've done so far.
5412 			if (nextAddress == areaStart) {
5413 				area->Unwire(range);
5414 				cacheChainLocker.Unlock();
5415 				range->~VMAreaWiredRange();
5416 				free_etc(range, mallocFlags);
5417 			} else
5418 				cacheChainLocker.Unlock();
5419 
5420 			break;
5421 		}
5422 	}
5423 
5424 	if (error != B_OK) {
5425 		// An error occurred, so unwire all that we've already wired. Note that
5426 		// even if not a single page was wired, unlock_memory_etc() is called
5427 		// to put the address space reference.
5428 		addressSpaceLocker.Unlock();
5429 		unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress,
5430 			flags);
5431 	}
5432 
5433 	return error;
5434 }
5435 
5436 
5437 status_t
5438 lock_memory(void* address, size_t numBytes, uint32 flags)
5439 {
5440 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5441 }
5442 
5443 
5444 /*!	Unwires an address range previously wired with lock_memory_etc().
5445 
5446 	Note that a call to this function must balance a previous lock_memory_etc()
5447 	call with exactly the same parameters.
5448 */
5449 status_t
5450 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5451 {
5452 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5453 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5454 
5455 	// compute the page protection that is required
5456 	bool isUser = IS_USER_ADDRESS(address);
5457 	bool writable = (flags & B_READ_DEVICE) == 0;
5458 	uint32 requiredProtection = PAGE_PRESENT
5459 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5460 	if (writable)
5461 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5462 
5463 	uint32 mallocFlags = isUser
5464 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5465 
5466 	// get and read lock the address space
5467 	VMAddressSpace* addressSpace = NULL;
5468 	if (isUser) {
5469 		if (team == B_CURRENT_TEAM)
5470 			addressSpace = VMAddressSpace::GetCurrent();
5471 		else
5472 			addressSpace = VMAddressSpace::Get(team);
5473 	} else
5474 		addressSpace = VMAddressSpace::GetKernel();
5475 	if (addressSpace == NULL)
5476 		return B_ERROR;
5477 
5478 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5479 
5480 	VMTranslationMap* map = addressSpace->TranslationMap();
5481 	status_t error = B_OK;
5482 
5483 	// iterate through all concerned areas
5484 	addr_t nextAddress = lockBaseAddress;
5485 	while (nextAddress != lockEndAddress) {
5486 		// get the next area
5487 		VMArea* area = addressSpace->LookupArea(nextAddress);
5488 		if (area == NULL) {
5489 			error = B_BAD_ADDRESS;
5490 			break;
5491 		}
5492 
5493 		addr_t areaStart = nextAddress;
5494 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5495 
5496 		// Lock the area's top cache. This is a requirement for
5497 		// VMArea::Unwire().
5498 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5499 
5500 		// Depending on the area cache type and the wiring, we may not need to
5501 		// look at the individual pages.
5502 		if (area->cache_type == CACHE_TYPE_NULL
5503 			|| area->cache_type == CACHE_TYPE_DEVICE
5504 			|| area->wiring == B_FULL_LOCK
5505 			|| area->wiring == B_CONTIGUOUS) {
5506 			// unwire the range (to avoid deadlocks we delete the range after
5507 			// unlocking the cache)
5508 			nextAddress = areaEnd;
5509 			VMAreaWiredRange* range = area->Unwire(areaStart,
5510 				areaEnd - areaStart, writable);
5511 			cacheChainLocker.Unlock();
5512 			if (range != NULL) {
5513 				range->~VMAreaWiredRange();
5514 				free_etc(range, mallocFlags);
5515 			}
5516 			continue;
5517 		}
5518 
5519 		// Lock the area's cache chain and the translation map. Needed to look
5520 		// up pages and play with their wired count.
5521 		cacheChainLocker.LockAllSourceCaches();
5522 		map->Lock();
5523 
5524 		// iterate through the pages and unwire them
5525 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5526 			phys_addr_t physicalAddress;
5527 			uint32 flags;
5528 
5529 			vm_page* page;
5530 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5531 				&& (flags & PAGE_PRESENT) != 0
5532 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5533 					!= NULL) {
5534 				// Already mapped with the correct permissions -- just increment
5535 				// the page's wired count.
5536 				decrement_page_wired_count(page);
5537 			} else {
5538 				panic("unlock_memory_etc(): Failed to unwire page: address "
5539 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5540 					nextAddress);
5541 				error = B_BAD_VALUE;
5542 				break;
5543 			}
5544 		}
5545 
5546 		map->Unlock();
5547 
5548 		// All pages are unwired. Remove the area's wired range as well (to
5549 		// avoid deadlocks we delete the range after unlocking the cache).
5550 		VMAreaWiredRange* range = area->Unwire(areaStart,
5551 			areaEnd - areaStart, writable);
5552 
5553 		cacheChainLocker.Unlock();
5554 
5555 		if (range != NULL) {
5556 			range->~VMAreaWiredRange();
5557 			free_etc(range, mallocFlags);
5558 		}
5559 
5560 		if (error != B_OK)
5561 			break;
5562 	}
5563 
5564 	// get rid of the address space reference
5565 	addressSpace->Put();
5566 
5567 	return error;
5568 }
5569 
5570 
5571 status_t
5572 unlock_memory(void* address, size_t numBytes, uint32 flags)
5573 {
5574 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5575 }
5576 
5577 
5578 /*!	Similar to get_memory_map(), but also allows to specify the address space
5579 	for the memory in question and has a saner semantics.
5580 	Returns \c B_OK when the complete range could be translated or
5581 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5582 	case the actual number of entries is written to \c *_numEntries. Any other
5583 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5584 	in this case.
5585 */
5586 status_t
5587 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5588 	physical_entry* table, uint32* _numEntries)
5589 {
5590 	uint32 numEntries = *_numEntries;
5591 	*_numEntries = 0;
5592 
5593 	VMAddressSpace* addressSpace;
5594 	addr_t virtualAddress = (addr_t)address;
5595 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5596 	phys_addr_t physicalAddress;
5597 	status_t status = B_OK;
5598 	int32 index = -1;
5599 	addr_t offset = 0;
5600 	bool interrupts = are_interrupts_enabled();
5601 
5602 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5603 		address, numBytes, numEntries));
5604 
5605 	if (numEntries == 0 || numBytes == 0)
5606 		return B_BAD_VALUE;
5607 
5608 	// in which address space is the address to be found?
5609 	if (IS_USER_ADDRESS(virtualAddress)) {
5610 		if (team == B_CURRENT_TEAM)
5611 			addressSpace = VMAddressSpace::GetCurrent();
5612 		else
5613 			addressSpace = VMAddressSpace::Get(team);
5614 	} else
5615 		addressSpace = VMAddressSpace::GetKernel();
5616 
5617 	if (addressSpace == NULL)
5618 		return B_ERROR;
5619 
5620 	VMTranslationMap* map = addressSpace->TranslationMap();
5621 
5622 	if (interrupts)
5623 		map->Lock();
5624 
5625 	while (offset < numBytes) {
5626 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5627 		uint32 flags;
5628 
5629 		if (interrupts) {
5630 			status = map->Query((addr_t)address + offset, &physicalAddress,
5631 				&flags);
5632 		} else {
5633 			status = map->QueryInterrupt((addr_t)address + offset,
5634 				&physicalAddress, &flags);
5635 		}
5636 		if (status < B_OK)
5637 			break;
5638 		if ((flags & PAGE_PRESENT) == 0) {
5639 			panic("get_memory_map() called on unmapped memory!");
5640 			return B_BAD_ADDRESS;
5641 		}
5642 
5643 		if (index < 0 && pageOffset > 0) {
5644 			physicalAddress += pageOffset;
5645 			if (bytes > B_PAGE_SIZE - pageOffset)
5646 				bytes = B_PAGE_SIZE - pageOffset;
5647 		}
5648 
5649 		// need to switch to the next physical_entry?
5650 		if (index < 0 || table[index].address
5651 				!= physicalAddress - table[index].size) {
5652 			if ((uint32)++index + 1 > numEntries) {
5653 				// table to small
5654 				break;
5655 			}
5656 			table[index].address = physicalAddress;
5657 			table[index].size = bytes;
5658 		} else {
5659 			// page does fit in current entry
5660 			table[index].size += bytes;
5661 		}
5662 
5663 		offset += bytes;
5664 	}
5665 
5666 	if (interrupts)
5667 		map->Unlock();
5668 
5669 	if (status != B_OK)
5670 		return status;
5671 
5672 	if ((uint32)index + 1 > numEntries) {
5673 		*_numEntries = index;
5674 		return B_BUFFER_OVERFLOW;
5675 	}
5676 
5677 	*_numEntries = index + 1;
5678 	return B_OK;
5679 }
5680 
5681 
5682 /*!	According to the BeBook, this function should always succeed.
5683 	This is no longer the case.
5684 */
5685 extern "C" int32
5686 __get_memory_map_haiku(const void* address, size_t numBytes,
5687 	physical_entry* table, int32 numEntries)
5688 {
5689 	uint32 entriesRead = numEntries;
5690 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5691 		table, &entriesRead);
5692 	if (error != B_OK)
5693 		return error;
5694 
5695 	// close the entry list
5696 
5697 	// if it's only one entry, we will silently accept the missing ending
5698 	if (numEntries == 1)
5699 		return B_OK;
5700 
5701 	if (entriesRead + 1 > (uint32)numEntries)
5702 		return B_BUFFER_OVERFLOW;
5703 
5704 	table[entriesRead].address = 0;
5705 	table[entriesRead].size = 0;
5706 
5707 	return B_OK;
5708 }
5709 
5710 
5711 area_id
5712 area_for(void* address)
5713 {
5714 	return vm_area_for((addr_t)address, true);
5715 }
5716 
5717 
5718 area_id
5719 find_area(const char* name)
5720 {
5721 	return VMAreaHash::Find(name);
5722 }
5723 
5724 
5725 status_t
5726 _get_area_info(area_id id, area_info* info, size_t size)
5727 {
5728 	if (size != sizeof(area_info) || info == NULL)
5729 		return B_BAD_VALUE;
5730 
5731 	AddressSpaceReadLocker locker;
5732 	VMArea* area;
5733 	status_t status = locker.SetFromArea(id, area);
5734 	if (status != B_OK)
5735 		return status;
5736 
5737 	fill_area_info(area, info, size);
5738 	return B_OK;
5739 }
5740 
5741 
5742 status_t
5743 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5744 {
5745 	addr_t nextBase = *(addr_t*)cookie;
5746 
5747 	// we're already through the list
5748 	if (nextBase == (addr_t)-1)
5749 		return B_ENTRY_NOT_FOUND;
5750 
5751 	if (team == B_CURRENT_TEAM)
5752 		team = team_get_current_team_id();
5753 
5754 	AddressSpaceReadLocker locker(team);
5755 	if (!locker.IsLocked())
5756 		return B_BAD_TEAM_ID;
5757 
5758 	VMArea* area;
5759 	for (VMAddressSpace::AreaIterator it
5760 				= locker.AddressSpace()->GetAreaIterator();
5761 			(area = it.Next()) != NULL;) {
5762 		if (area->Base() > nextBase)
5763 			break;
5764 	}
5765 
5766 	if (area == NULL) {
5767 		nextBase = (addr_t)-1;
5768 		return B_ENTRY_NOT_FOUND;
5769 	}
5770 
5771 	fill_area_info(area, info, size);
5772 	*cookie = (int32)(area->Base());
5773 		// TODO: Not 64 bit safe!
5774 
5775 	return B_OK;
5776 }
5777 
5778 
5779 status_t
5780 set_area_protection(area_id area, uint32 newProtection)
5781 {
5782 	fix_protection(&newProtection);
5783 
5784 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5785 		newProtection, true);
5786 }
5787 
5788 
5789 status_t
5790 resize_area(area_id areaID, size_t newSize)
5791 {
5792 	return vm_resize_area(areaID, newSize, true);
5793 }
5794 
5795 
5796 /*!	Transfers the specified area to a new team. The caller must be the owner
5797 	of the area.
5798 */
5799 area_id
5800 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5801 	bool kernel)
5802 {
5803 	area_info info;
5804 	status_t status = get_area_info(id, &info);
5805 	if (status != B_OK)
5806 		return status;
5807 
5808 	if (info.team != thread_get_current_thread()->team->id)
5809 		return B_PERMISSION_DENIED;
5810 
5811 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5812 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5813 	if (clonedArea < 0)
5814 		return clonedArea;
5815 
5816 	status = vm_delete_area(info.team, id, kernel);
5817 	if (status != B_OK) {
5818 		vm_delete_area(target, clonedArea, kernel);
5819 		return status;
5820 	}
5821 
5822 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5823 
5824 	return clonedArea;
5825 }
5826 
5827 
5828 extern "C" area_id
5829 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5830 	size_t numBytes, uint32 addressSpec, uint32 protection,
5831 	void** _virtualAddress)
5832 {
5833 	if (!arch_vm_supports_protection(protection))
5834 		return B_NOT_SUPPORTED;
5835 
5836 	fix_protection(&protection);
5837 
5838 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5839 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5840 		false);
5841 }
5842 
5843 
5844 area_id
5845 clone_area(const char* name, void** _address, uint32 addressSpec,
5846 	uint32 protection, area_id source)
5847 {
5848 	if ((protection & B_KERNEL_PROTECTION) == 0)
5849 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5850 
5851 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5852 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5853 }
5854 
5855 
5856 area_id
5857 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
5858 	uint32 protection, uint32 flags,
5859 	const virtual_address_restrictions* virtualAddressRestrictions,
5860 	const physical_address_restrictions* physicalAddressRestrictions,
5861 	void** _address)
5862 {
5863 	fix_protection(&protection);
5864 
5865 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5866 		virtualAddressRestrictions, physicalAddressRestrictions, true,
5867 		_address);
5868 }
5869 
5870 
5871 extern "C" area_id
5872 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5873 	size_t size, uint32 lock, uint32 protection)
5874 {
5875 	fix_protection(&protection);
5876 
5877 	virtual_address_restrictions virtualRestrictions = {};
5878 	virtualRestrictions.address = *_address;
5879 	virtualRestrictions.address_specification = addressSpec;
5880 	physical_address_restrictions physicalRestrictions = {};
5881 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5882 		lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true,
5883 		_address);
5884 }
5885 
5886 
5887 status_t
5888 delete_area(area_id area)
5889 {
5890 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5891 }
5892 
5893 
5894 //	#pragma mark - Userland syscalls
5895 
5896 
5897 status_t
5898 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5899 	addr_t size)
5900 {
5901 	// filter out some unavailable values (for userland)
5902 	switch (addressSpec) {
5903 		case B_ANY_KERNEL_ADDRESS:
5904 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5905 			return B_BAD_VALUE;
5906 	}
5907 
5908 	addr_t address;
5909 
5910 	if (!IS_USER_ADDRESS(userAddress)
5911 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5912 		return B_BAD_ADDRESS;
5913 
5914 	status_t status = vm_reserve_address_range(
5915 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5916 		RESERVED_AVOID_BASE);
5917 	if (status != B_OK)
5918 		return status;
5919 
5920 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5921 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5922 			(void*)address, size);
5923 		return B_BAD_ADDRESS;
5924 	}
5925 
5926 	return B_OK;
5927 }
5928 
5929 
5930 status_t
5931 _user_unreserve_address_range(addr_t address, addr_t size)
5932 {
5933 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5934 		(void*)address, size);
5935 }
5936 
5937 
5938 area_id
5939 _user_area_for(void* address)
5940 {
5941 	return vm_area_for((addr_t)address, false);
5942 }
5943 
5944 
5945 area_id
5946 _user_find_area(const char* userName)
5947 {
5948 	char name[B_OS_NAME_LENGTH];
5949 
5950 	if (!IS_USER_ADDRESS(userName)
5951 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5952 		return B_BAD_ADDRESS;
5953 
5954 	return find_area(name);
5955 }
5956 
5957 
5958 status_t
5959 _user_get_area_info(area_id area, area_info* userInfo)
5960 {
5961 	if (!IS_USER_ADDRESS(userInfo))
5962 		return B_BAD_ADDRESS;
5963 
5964 	area_info info;
5965 	status_t status = get_area_info(area, &info);
5966 	if (status < B_OK)
5967 		return status;
5968 
5969 	// TODO: do we want to prevent userland from seeing kernel protections?
5970 	//info.protection &= B_USER_PROTECTION;
5971 
5972 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5973 		return B_BAD_ADDRESS;
5974 
5975 	return status;
5976 }
5977 
5978 
5979 status_t
5980 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
5981 {
5982 	int32 cookie;
5983 
5984 	if (!IS_USER_ADDRESS(userCookie)
5985 		|| !IS_USER_ADDRESS(userInfo)
5986 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5987 		return B_BAD_ADDRESS;
5988 
5989 	area_info info;
5990 	status_t status = _get_next_area_info(team, &cookie, &info,
5991 		sizeof(area_info));
5992 	if (status != B_OK)
5993 		return status;
5994 
5995 	//info.protection &= B_USER_PROTECTION;
5996 
5997 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5998 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5999 		return B_BAD_ADDRESS;
6000 
6001 	return status;
6002 }
6003 
6004 
6005 status_t
6006 _user_set_area_protection(area_id area, uint32 newProtection)
6007 {
6008 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6009 		return B_BAD_VALUE;
6010 
6011 	fix_protection(&newProtection);
6012 
6013 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6014 		newProtection, false);
6015 }
6016 
6017 
6018 status_t
6019 _user_resize_area(area_id area, size_t newSize)
6020 {
6021 	// TODO: Since we restrict deleting of areas to those owned by the team,
6022 	// we should also do that for resizing (check other functions, too).
6023 	return vm_resize_area(area, newSize, false);
6024 }
6025 
6026 
6027 area_id
6028 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6029 	team_id target)
6030 {
6031 	// filter out some unavailable values (for userland)
6032 	switch (addressSpec) {
6033 		case B_ANY_KERNEL_ADDRESS:
6034 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6035 			return B_BAD_VALUE;
6036 	}
6037 
6038 	void* address;
6039 	if (!IS_USER_ADDRESS(userAddress)
6040 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6041 		return B_BAD_ADDRESS;
6042 
6043 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6044 	if (newArea < B_OK)
6045 		return newArea;
6046 
6047 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6048 		return B_BAD_ADDRESS;
6049 
6050 	return newArea;
6051 }
6052 
6053 
6054 area_id
6055 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6056 	uint32 protection, area_id sourceArea)
6057 {
6058 	char name[B_OS_NAME_LENGTH];
6059 	void* address;
6060 
6061 	// filter out some unavailable values (for userland)
6062 	switch (addressSpec) {
6063 		case B_ANY_KERNEL_ADDRESS:
6064 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6065 			return B_BAD_VALUE;
6066 	}
6067 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6068 		return B_BAD_VALUE;
6069 
6070 	if (!IS_USER_ADDRESS(userName)
6071 		|| !IS_USER_ADDRESS(userAddress)
6072 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6073 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6074 		return B_BAD_ADDRESS;
6075 
6076 	fix_protection(&protection);
6077 
6078 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6079 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6080 		false);
6081 	if (clonedArea < B_OK)
6082 		return clonedArea;
6083 
6084 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6085 		delete_area(clonedArea);
6086 		return B_BAD_ADDRESS;
6087 	}
6088 
6089 	return clonedArea;
6090 }
6091 
6092 
6093 area_id
6094 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6095 	size_t size, uint32 lock, uint32 protection)
6096 {
6097 	char name[B_OS_NAME_LENGTH];
6098 	void* address;
6099 
6100 	// filter out some unavailable values (for userland)
6101 	switch (addressSpec) {
6102 		case B_ANY_KERNEL_ADDRESS:
6103 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6104 			return B_BAD_VALUE;
6105 	}
6106 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6107 		return B_BAD_VALUE;
6108 
6109 	if (!IS_USER_ADDRESS(userName)
6110 		|| !IS_USER_ADDRESS(userAddress)
6111 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6112 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6113 		return B_BAD_ADDRESS;
6114 
6115 	if (addressSpec == B_EXACT_ADDRESS
6116 		&& IS_KERNEL_ADDRESS(address))
6117 		return B_BAD_VALUE;
6118 
6119 	fix_protection(&protection);
6120 
6121 	virtual_address_restrictions virtualRestrictions = {};
6122 	virtualRestrictions.address = address;
6123 	virtualRestrictions.address_specification = addressSpec;
6124 	physical_address_restrictions physicalRestrictions = {};
6125 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6126 		size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions,
6127 		false, &address);
6128 
6129 	if (area >= B_OK
6130 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6131 		delete_area(area);
6132 		return B_BAD_ADDRESS;
6133 	}
6134 
6135 	return area;
6136 }
6137 
6138 
6139 status_t
6140 _user_delete_area(area_id area)
6141 {
6142 	// Unlike the BeOS implementation, you can now only delete areas
6143 	// that you have created yourself from userland.
6144 	// The documentation to delete_area() explicitly states that this
6145 	// will be restricted in the future, and so it will.
6146 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6147 }
6148 
6149 
6150 // TODO: create a BeOS style call for this!
6151 
6152 area_id
6153 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6154 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6155 	int fd, off_t offset)
6156 {
6157 	char name[B_OS_NAME_LENGTH];
6158 	void* address;
6159 	area_id area;
6160 
6161 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6162 		return B_BAD_VALUE;
6163 
6164 	fix_protection(&protection);
6165 
6166 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6167 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6168 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6169 		return B_BAD_ADDRESS;
6170 
6171 	if (addressSpec == B_EXACT_ADDRESS) {
6172 		if ((addr_t)address + size < (addr_t)address
6173 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6174 			return B_BAD_VALUE;
6175 		}
6176 		if (!IS_USER_ADDRESS(address)
6177 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6178 			return B_BAD_ADDRESS;
6179 		}
6180 	}
6181 
6182 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6183 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6184 		false);
6185 	if (area < B_OK)
6186 		return area;
6187 
6188 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6189 		return B_BAD_ADDRESS;
6190 
6191 	return area;
6192 }
6193 
6194 
6195 status_t
6196 _user_unmap_memory(void* _address, size_t size)
6197 {
6198 	addr_t address = (addr_t)_address;
6199 
6200 	// check params
6201 	if (size == 0 || (addr_t)address + size < (addr_t)address
6202 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6203 		return B_BAD_VALUE;
6204 	}
6205 
6206 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6207 		return B_BAD_ADDRESS;
6208 
6209 	// Write lock the address space and ensure the address range is not wired.
6210 	AddressSpaceWriteLocker locker;
6211 	do {
6212 		status_t status = locker.SetTo(team_get_current_team_id());
6213 		if (status != B_OK)
6214 			return status;
6215 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6216 			size, &locker));
6217 
6218 	// unmap
6219 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6220 }
6221 
6222 
6223 status_t
6224 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6225 {
6226 	// check address range
6227 	addr_t address = (addr_t)_address;
6228 	size = PAGE_ALIGN(size);
6229 
6230 	if ((address % B_PAGE_SIZE) != 0)
6231 		return B_BAD_VALUE;
6232 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6233 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6234 		// weird error code required by POSIX
6235 		return ENOMEM;
6236 	}
6237 
6238 	// extend and check protection
6239 	if ((protection & ~B_USER_PROTECTION) != 0)
6240 		return B_BAD_VALUE;
6241 
6242 	fix_protection(&protection);
6243 
6244 	// We need to write lock the address space, since we're going to play with
6245 	// the areas. Also make sure that none of the areas is wired and that we're
6246 	// actually allowed to change the protection.
6247 	AddressSpaceWriteLocker locker;
6248 
6249 	bool restart;
6250 	do {
6251 		restart = false;
6252 
6253 		status_t status = locker.SetTo(team_get_current_team_id());
6254 		if (status != B_OK)
6255 			return status;
6256 
6257 		// First round: Check whether the whole range is covered by areas and we
6258 		// are allowed to modify them.
6259 		addr_t currentAddress = address;
6260 		size_t sizeLeft = size;
6261 		while (sizeLeft > 0) {
6262 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6263 			if (area == NULL)
6264 				return B_NO_MEMORY;
6265 
6266 			if ((area->protection & B_KERNEL_AREA) != 0)
6267 				return B_NOT_ALLOWED;
6268 
6269 			// TODO: For (shared) mapped files we should check whether the new
6270 			// protections are compatible with the file permissions. We don't
6271 			// have a way to do that yet, though.
6272 
6273 			addr_t offset = currentAddress - area->Base();
6274 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6275 
6276 			AreaCacheLocker cacheLocker(area);
6277 
6278 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6279 					&locker, &cacheLocker)) {
6280 				restart = true;
6281 				break;
6282 			}
6283 
6284 			cacheLocker.Unlock();
6285 
6286 			currentAddress += rangeSize;
6287 			sizeLeft -= rangeSize;
6288 		}
6289 	} while (restart);
6290 
6291 	// Second round: If the protections differ from that of the area, create a
6292 	// page protection array and re-map mapped pages.
6293 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6294 	addr_t currentAddress = address;
6295 	size_t sizeLeft = size;
6296 	while (sizeLeft > 0) {
6297 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6298 		if (area == NULL)
6299 			return B_NO_MEMORY;
6300 
6301 		addr_t offset = currentAddress - area->Base();
6302 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6303 
6304 		currentAddress += rangeSize;
6305 		sizeLeft -= rangeSize;
6306 
6307 		if (area->page_protections == NULL) {
6308 			if (area->protection == protection)
6309 				continue;
6310 
6311 			status_t status = allocate_area_page_protections(area);
6312 			if (status != B_OK)
6313 				return status;
6314 		}
6315 
6316 		// We need to lock the complete cache chain, since we potentially unmap
6317 		// pages of lower caches.
6318 		VMCache* topCache = vm_area_get_locked_cache(area);
6319 		VMCacheChainLocker cacheChainLocker(topCache);
6320 		cacheChainLocker.LockAllSourceCaches();
6321 
6322 		for (addr_t pageAddress = area->Base() + offset;
6323 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6324 			map->Lock();
6325 
6326 			set_area_page_protection(area, pageAddress, protection);
6327 
6328 			phys_addr_t physicalAddress;
6329 			uint32 flags;
6330 
6331 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6332 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6333 				map->Unlock();
6334 				continue;
6335 			}
6336 
6337 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6338 			if (page == NULL) {
6339 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6340 					"\n", area, physicalAddress);
6341 				map->Unlock();
6342 				return B_ERROR;
6343 			}
6344 
6345 			// If the page is not in the topmost cache and write access is
6346 			// requested, we have to unmap it. Otherwise we can re-map it with
6347 			// the new protection.
6348 			bool unmapPage = page->Cache() != topCache
6349 				&& (protection & B_WRITE_AREA) != 0;
6350 
6351 			if (!unmapPage)
6352 				map->ProtectPage(area, pageAddress, protection);
6353 
6354 			map->Unlock();
6355 
6356 			if (unmapPage) {
6357 				DEBUG_PAGE_ACCESS_START(page);
6358 				unmap_page(area, pageAddress);
6359 				DEBUG_PAGE_ACCESS_END(page);
6360 			}
6361 		}
6362 	}
6363 
6364 	return B_OK;
6365 }
6366 
6367 
6368 status_t
6369 _user_sync_memory(void* _address, size_t size, uint32 flags)
6370 {
6371 	addr_t address = (addr_t)_address;
6372 	size = PAGE_ALIGN(size);
6373 
6374 	// check params
6375 	if ((address % B_PAGE_SIZE) != 0)
6376 		return B_BAD_VALUE;
6377 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6378 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6379 		// weird error code required by POSIX
6380 		return ENOMEM;
6381 	}
6382 
6383 	bool writeSync = (flags & MS_SYNC) != 0;
6384 	bool writeAsync = (flags & MS_ASYNC) != 0;
6385 	if (writeSync && writeAsync)
6386 		return B_BAD_VALUE;
6387 
6388 	if (size == 0 || (!writeSync && !writeAsync))
6389 		return B_OK;
6390 
6391 	// iterate through the range and sync all concerned areas
6392 	while (size > 0) {
6393 		// read lock the address space
6394 		AddressSpaceReadLocker locker;
6395 		status_t error = locker.SetTo(team_get_current_team_id());
6396 		if (error != B_OK)
6397 			return error;
6398 
6399 		// get the first area
6400 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6401 		if (area == NULL)
6402 			return B_NO_MEMORY;
6403 
6404 		uint32 offset = address - area->Base();
6405 		size_t rangeSize = min_c(area->Size() - offset, size);
6406 		offset += area->cache_offset;
6407 
6408 		// lock the cache
6409 		AreaCacheLocker cacheLocker(area);
6410 		if (!cacheLocker)
6411 			return B_BAD_VALUE;
6412 		VMCache* cache = area->cache;
6413 
6414 		locker.Unlock();
6415 
6416 		uint32 firstPage = offset >> PAGE_SHIFT;
6417 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6418 
6419 		// write the pages
6420 		if (cache->type == CACHE_TYPE_VNODE) {
6421 			if (writeSync) {
6422 				// synchronous
6423 				error = vm_page_write_modified_page_range(cache, firstPage,
6424 					endPage);
6425 				if (error != B_OK)
6426 					return error;
6427 			} else {
6428 				// asynchronous
6429 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6430 				// TODO: This is probably not quite what is supposed to happen.
6431 				// Especially when a lot has to be written, it might take ages
6432 				// until it really hits the disk.
6433 			}
6434 		}
6435 
6436 		address += rangeSize;
6437 		size -= rangeSize;
6438 	}
6439 
6440 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6441 	// synchronize multiple mappings of the same file. In our VM they never get
6442 	// out of sync, though, so we don't have to do anything.
6443 
6444 	return B_OK;
6445 }
6446 
6447 
6448 status_t
6449 _user_memory_advice(void* address, size_t size, uint32 advice)
6450 {
6451 	// TODO: Implement!
6452 	return B_OK;
6453 }
6454 
6455 
6456 status_t
6457 _user_get_memory_properties(team_id teamID, const void* address,
6458 	uint32* _protected, uint32* _lock)
6459 {
6460 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6461 		return B_BAD_ADDRESS;
6462 
6463 	AddressSpaceReadLocker locker;
6464 	status_t error = locker.SetTo(teamID);
6465 	if (error != B_OK)
6466 		return error;
6467 
6468 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6469 	if (area == NULL)
6470 		return B_NO_MEMORY;
6471 
6472 
6473 	uint32 protection = area->protection;
6474 	if (area->page_protections != NULL)
6475 		protection = get_area_page_protection(area, (addr_t)address);
6476 
6477 	uint32 wiring = area->wiring;
6478 
6479 	locker.Unlock();
6480 
6481 	error = user_memcpy(_protected, &protection, sizeof(protection));
6482 	if (error != B_OK)
6483 		return error;
6484 
6485 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6486 
6487 	return error;
6488 }
6489 
6490 
6491 // #pragma mark -- compatibility
6492 
6493 
6494 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6495 
6496 
6497 struct physical_entry_beos {
6498 	uint32	address;
6499 	uint32	size;
6500 };
6501 
6502 
6503 /*!	The physical_entry structure has changed. We need to translate it to the
6504 	old one.
6505 */
6506 extern "C" int32
6507 __get_memory_map_beos(const void* _address, size_t numBytes,
6508 	physical_entry_beos* table, int32 numEntries)
6509 {
6510 	if (numEntries <= 0)
6511 		return B_BAD_VALUE;
6512 
6513 	const uint8* address = (const uint8*)_address;
6514 
6515 	int32 count = 0;
6516 	while (numBytes > 0 && count < numEntries) {
6517 		physical_entry entry;
6518 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6519 		if (result < 0) {
6520 			if (result != B_BUFFER_OVERFLOW)
6521 				return result;
6522 		}
6523 
6524 		if (entry.address >= (phys_addr_t)1 << 32) {
6525 			panic("get_memory_map(): Address is greater 4 GB!");
6526 			return B_ERROR;
6527 		}
6528 
6529 		table[count].address = entry.address;
6530 		table[count++].size = entry.size;
6531 
6532 		address += entry.size;
6533 		numBytes -= entry.size;
6534 	}
6535 
6536 	// null-terminate the table, if possible
6537 	if (count < numEntries) {
6538 		table[count].address = 0;
6539 		table[count].size = 0;
6540 	}
6541 
6542 	return B_OK;
6543 }
6544 
6545 
6546 /*!	The type of the \a physicalAddress parameter has changed from void* to
6547 	phys_addr_t.
6548 */
6549 extern "C" area_id
6550 __map_physical_memory_beos(const char* name, void* physicalAddress,
6551 	size_t numBytes, uint32 addressSpec, uint32 protection,
6552 	void** _virtualAddress)
6553 {
6554 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6555 		addressSpec, protection, _virtualAddress);
6556 }
6557 
6558 
6559 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6560 	we meddle with the \a lock parameter to force 32 bit.
6561 */
6562 extern "C" area_id
6563 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6564 	size_t size, uint32 lock, uint32 protection)
6565 {
6566 	switch (lock) {
6567 		case B_NO_LOCK:
6568 			break;
6569 		case B_FULL_LOCK:
6570 		case B_LAZY_LOCK:
6571 			lock = B_32_BIT_FULL_LOCK;
6572 			break;
6573 		case B_CONTIGUOUS:
6574 			lock = B_32_BIT_CONTIGUOUS;
6575 			break;
6576 	}
6577 
6578 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6579 		protection);
6580 }
6581 
6582 
6583 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6584 	"BASE");
6585 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6586 	"map_physical_memory@", "BASE");
6587 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6588 	"BASE");
6589 
6590 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6591 	"get_memory_map@@", "1_ALPHA3");
6592 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6593 	"map_physical_memory@@", "1_ALPHA3");
6594 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6595 	"1_ALPHA3");
6596 
6597 
6598 #else
6599 
6600 
6601 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6602 	"get_memory_map@@", "BASE");
6603 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6604 	"map_physical_memory@@", "BASE");
6605 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6606 	"BASE");
6607 
6608 
6609 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6610