xref: /haiku/src/system/kernel/vm/vm.cpp (revision fccd8899fcb583bfb73c5c26c9fcd714b963959b)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <boot/elf.h>
31 #include <boot/stage2.h>
32 #include <condition_variable.h>
33 #include <console.h>
34 #include <debug.h>
35 #include <file_cache.h>
36 #include <fs/fd.h>
37 #include <heap.h>
38 #include <kernel.h>
39 #include <int.h>
40 #include <lock.h>
41 #include <low_resource_manager.h>
42 #include <slab/Slab.h>
43 #include <smp.h>
44 #include <system_info.h>
45 #include <thread.h>
46 #include <team.h>
47 #include <tracing.h>
48 #include <util/AutoLock.h>
49 #include <util/khash.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 class AreaCacheLocking {
77 public:
78 	inline bool Lock(VMCache* lockable)
79 	{
80 		return false;
81 	}
82 
83 	inline void Unlock(VMCache* lockable)
84 	{
85 		vm_area_put_locked_cache(lockable);
86 	}
87 };
88 
89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
90 public:
91 	inline AreaCacheLocker(VMCache* cache = NULL)
92 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
93 	{
94 	}
95 
96 	inline AreaCacheLocker(VMArea* area)
97 		: AutoLocker<VMCache, AreaCacheLocking>()
98 	{
99 		SetTo(area);
100 	}
101 
102 	inline void SetTo(VMCache* cache, bool alreadyLocked)
103 	{
104 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
105 	}
106 
107 	inline void SetTo(VMArea* area)
108 	{
109 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
110 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
111 	}
112 };
113 
114 
115 class VMCacheChainLocker {
116 public:
117 	VMCacheChainLocker()
118 		:
119 		fTopCache(NULL),
120 		fBottomCache(NULL)
121 	{
122 	}
123 
124 	VMCacheChainLocker(VMCache* topCache)
125 		:
126 		fTopCache(topCache),
127 		fBottomCache(topCache)
128 	{
129 	}
130 
131 	~VMCacheChainLocker()
132 	{
133 		Unlock();
134 	}
135 
136 	void SetTo(VMCache* topCache)
137 	{
138 		fTopCache = topCache;
139 		fBottomCache = topCache;
140 
141 		if (topCache != NULL)
142 			topCache->SetUserData(NULL);
143 	}
144 
145 	VMCache* LockSourceCache()
146 	{
147 		if (fBottomCache == NULL || fBottomCache->source == NULL)
148 			return NULL;
149 
150 		VMCache* previousCache = fBottomCache;
151 
152 		fBottomCache = fBottomCache->source;
153 		fBottomCache->Lock();
154 		fBottomCache->AcquireRefLocked();
155 		fBottomCache->SetUserData(previousCache);
156 
157 		return fBottomCache;
158 	}
159 
160 	void LockAllSourceCaches()
161 	{
162 		while (LockSourceCache() != NULL) {
163 		}
164 	}
165 
166 	void Unlock(VMCache* exceptCache = NULL)
167 	{
168 		if (fTopCache == NULL)
169 			return;
170 
171 		// Unlock caches in source -> consumer direction. This is important to
172 		// avoid double-locking and a reversal of locking order in case a cache
173 		// is eligable for merging.
174 		VMCache* cache = fBottomCache;
175 		while (cache != NULL) {
176 			VMCache* nextCache = (VMCache*)cache->UserData();
177 			if (cache != exceptCache)
178 				cache->ReleaseRefAndUnlock(cache != fTopCache);
179 
180 			if (cache == fTopCache)
181 				break;
182 
183 			cache = nextCache;
184 		}
185 
186 		fTopCache = NULL;
187 		fBottomCache = NULL;
188 	}
189 
190 	void UnlockKeepRefs(bool keepTopCacheLocked)
191 	{
192 		if (fTopCache == NULL)
193 			return;
194 
195 		VMCache* nextCache = fBottomCache;
196 		VMCache* cache = NULL;
197 
198 		while (keepTopCacheLocked
199 				? nextCache != fTopCache : cache != fTopCache) {
200 			cache = nextCache;
201 			nextCache = (VMCache*)cache->UserData();
202 			cache->Unlock(cache != fTopCache);
203 		}
204 	}
205 
206 	void RelockCaches(bool topCacheLocked)
207 	{
208 		if (fTopCache == NULL)
209 			return;
210 
211 		VMCache* nextCache = fTopCache;
212 		VMCache* cache = NULL;
213 		if (topCacheLocked) {
214 			cache = nextCache;
215 			nextCache = cache->source;
216 		}
217 
218 		while (cache != fBottomCache && nextCache != NULL) {
219 			VMCache* consumer = cache;
220 			cache = nextCache;
221 			nextCache = cache->source;
222 			cache->Lock();
223 			cache->SetUserData(consumer);
224 		}
225 	}
226 
227 private:
228 	VMCache*	fTopCache;
229 	VMCache*	fBottomCache;
230 };
231 
232 
233 // The memory reserve an allocation of the certain priority must not touch.
234 static const size_t kMemoryReserveForPriority[] = {
235 	VM_MEMORY_RESERVE_USER,		// user
236 	VM_MEMORY_RESERVE_SYSTEM,	// system
237 	0							// VIP
238 };
239 
240 
241 ObjectCache* gPageMappingsObjectCache;
242 
243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
244 
245 static off_t sAvailableMemory;
246 static off_t sNeededMemory;
247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
248 static uint32 sPageFaults;
249 
250 static VMPhysicalPageMapper* sPhysicalPageMapper;
251 
252 #if DEBUG_CACHE_LIST
253 
254 struct cache_info {
255 	VMCache*	cache;
256 	addr_t		page_count;
257 	addr_t		committed;
258 };
259 
260 static const int kCacheInfoTableCount = 100 * 1024;
261 static cache_info* sCacheInfoTable;
262 
263 #endif	// DEBUG_CACHE_LIST
264 
265 
266 // function declarations
267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
268 	bool addressSpaceCleanup);
269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
270 	bool isWrite, bool isUser, vm_page** wirePage,
271 	VMAreaWiredRange* wiredRange = NULL);
272 static status_t map_backing_store(VMAddressSpace* addressSpace,
273 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
274 	int protection, int mapping, uint32 flags,
275 	const virtual_address_restrictions* addressRestrictions, bool kernel,
276 	VMArea** _area, void** _virtualAddress);
277 
278 
279 //	#pragma mark -
280 
281 
282 #if VM_PAGE_FAULT_TRACING
283 
284 namespace VMPageFaultTracing {
285 
286 class PageFaultStart : public AbstractTraceEntry {
287 public:
288 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
289 		:
290 		fAddress(address),
291 		fPC(pc),
292 		fWrite(write),
293 		fUser(user)
294 	{
295 		Initialized();
296 	}
297 
298 	virtual void AddDump(TraceOutput& out)
299 	{
300 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
301 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
302 	}
303 
304 private:
305 	addr_t	fAddress;
306 	addr_t	fPC;
307 	bool	fWrite;
308 	bool	fUser;
309 };
310 
311 
312 // page fault errors
313 enum {
314 	PAGE_FAULT_ERROR_NO_AREA		= 0,
315 	PAGE_FAULT_ERROR_KERNEL_ONLY,
316 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
317 	PAGE_FAULT_ERROR_READ_PROTECTED,
318 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
319 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
320 };
321 
322 
323 class PageFaultError : public AbstractTraceEntry {
324 public:
325 	PageFaultError(area_id area, status_t error)
326 		:
327 		fArea(area),
328 		fError(error)
329 	{
330 		Initialized();
331 	}
332 
333 	virtual void AddDump(TraceOutput& out)
334 	{
335 		switch (fError) {
336 			case PAGE_FAULT_ERROR_NO_AREA:
337 				out.Print("page fault error: no area");
338 				break;
339 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
340 				out.Print("page fault error: area: %ld, kernel only", fArea);
341 				break;
342 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
343 				out.Print("page fault error: area: %ld, write protected",
344 					fArea);
345 				break;
346 			case PAGE_FAULT_ERROR_READ_PROTECTED:
347 				out.Print("page fault error: area: %ld, read protected", fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
350 				out.Print("page fault error: kernel touching bad user memory");
351 				break;
352 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
353 				out.Print("page fault error: no address space");
354 				break;
355 			default:
356 				out.Print("page fault error: area: %ld, error: %s", fArea,
357 					strerror(fError));
358 				break;
359 		}
360 	}
361 
362 private:
363 	area_id		fArea;
364 	status_t	fError;
365 };
366 
367 
368 class PageFaultDone : public AbstractTraceEntry {
369 public:
370 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
371 			vm_page* page)
372 		:
373 		fArea(area),
374 		fTopCache(topCache),
375 		fCache(cache),
376 		fPage(page)
377 	{
378 		Initialized();
379 	}
380 
381 	virtual void AddDump(TraceOutput& out)
382 	{
383 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
384 			"page: %p", fArea, fTopCache, fCache, fPage);
385 	}
386 
387 private:
388 	area_id		fArea;
389 	VMCache*	fTopCache;
390 	VMCache*	fCache;
391 	vm_page*	fPage;
392 };
393 
394 }	// namespace VMPageFaultTracing
395 
396 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
397 #else
398 #	define TPF(x) ;
399 #endif	// VM_PAGE_FAULT_TRACING
400 
401 
402 //	#pragma mark -
403 
404 
405 /*!	The page's cache must be locked.
406 */
407 static inline void
408 increment_page_wired_count(vm_page* page)
409 {
410 	if (!page->IsMapped())
411 		atomic_add(&gMappedPagesCount, 1);
412 	page->IncrementWiredCount();
413 }
414 
415 
416 /*!	The page's cache must be locked.
417 */
418 static inline void
419 decrement_page_wired_count(vm_page* page)
420 {
421 	page->DecrementWiredCount();
422 	if (!page->IsMapped())
423 		atomic_add(&gMappedPagesCount, -1);
424 }
425 
426 
427 static inline addr_t
428 virtual_page_address(VMArea* area, vm_page* page)
429 {
430 	return area->Base()
431 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
432 }
433 
434 
435 //! You need to have the address space locked when calling this function
436 static VMArea*
437 lookup_area(VMAddressSpace* addressSpace, area_id id)
438 {
439 	VMAreaHash::ReadLock();
440 
441 	VMArea* area = VMAreaHash::LookupLocked(id);
442 	if (area != NULL && area->address_space != addressSpace)
443 		area = NULL;
444 
445 	VMAreaHash::ReadUnlock();
446 
447 	return area;
448 }
449 
450 
451 static status_t
452 allocate_area_page_protections(VMArea* area)
453 {
454 	// In the page protections we store only the three user protections,
455 	// so we use 4 bits per page.
456 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
457 	area->page_protections = (uint8*)malloc_etc(bytes,
458 		HEAP_DONT_LOCK_KERNEL_SPACE);
459 	if (area->page_protections == NULL)
460 		return B_NO_MEMORY;
461 
462 	// init the page protections for all pages to that of the area
463 	uint32 areaProtection = area->protection
464 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
465 	memset(area->page_protections, areaProtection | (areaProtection << 4),
466 		bytes);
467 	return B_OK;
468 }
469 
470 
471 static inline void
472 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
473 {
474 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
475 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
476 	uint8& entry = area->page_protections[pageIndex / 2];
477 	if (pageIndex % 2 == 0)
478 		entry = (entry & 0xf0) | protection;
479 	else
480 		entry = (entry & 0x0f) | (protection << 4);
481 }
482 
483 
484 static inline uint32
485 get_area_page_protection(VMArea* area, addr_t pageAddress)
486 {
487 	if (area->page_protections == NULL)
488 		return area->protection;
489 
490 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
491 	uint32 protection = area->page_protections[pageIndex / 2];
492 	if (pageIndex % 2 == 0)
493 		protection &= 0x0f;
494 	else
495 		protection >>= 4;
496 
497 	// If this is a kernel area we translate the user flags to kernel flags.
498 	if (area->address_space == VMAddressSpace::Kernel()) {
499 		uint32 kernelProtection = 0;
500 		if ((protection & B_READ_AREA) != 0)
501 			kernelProtection |= B_KERNEL_READ_AREA;
502 		if ((protection & B_WRITE_AREA) != 0)
503 			kernelProtection |= B_KERNEL_WRITE_AREA;
504 
505 		return kernelProtection;
506 	}
507 
508 	return protection | B_KERNEL_READ_AREA
509 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
510 }
511 
512 
513 /*!	The caller must have reserved enough pages the translation map
514 	implementation might need to map this page.
515 	The page's cache must be locked.
516 */
517 static status_t
518 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
519 	vm_page_reservation* reservation)
520 {
521 	VMTranslationMap* map = area->address_space->TranslationMap();
522 
523 	bool wasMapped = page->IsMapped();
524 
525 	if (area->wiring == B_NO_LOCK) {
526 		DEBUG_PAGE_ACCESS_CHECK(page);
527 
528 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
529 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
530 			gPageMappingsObjectCache,
531 			CACHE_DONT_WAIT_FOR_MEMORY
532 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
533 		if (mapping == NULL)
534 			return B_NO_MEMORY;
535 
536 		mapping->page = page;
537 		mapping->area = area;
538 
539 		map->Lock();
540 
541 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
542 			area->MemoryType(), reservation);
543 
544 		// insert mapping into lists
545 		if (!page->IsMapped())
546 			atomic_add(&gMappedPagesCount, 1);
547 
548 		page->mappings.Add(mapping);
549 		area->mappings.Add(mapping);
550 
551 		map->Unlock();
552 	} else {
553 		DEBUG_PAGE_ACCESS_CHECK(page);
554 
555 		map->Lock();
556 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
557 			area->MemoryType(), reservation);
558 		map->Unlock();
559 
560 		increment_page_wired_count(page);
561 	}
562 
563 	if (!wasMapped) {
564 		// The page is mapped now, so we must not remain in the cached queue.
565 		// It also makes sense to move it from the inactive to the active, since
566 		// otherwise the page daemon wouldn't come to keep track of it (in idle
567 		// mode) -- if the page isn't touched, it will be deactivated after a
568 		// full iteration through the queue at the latest.
569 		if (page->State() == PAGE_STATE_CACHED
570 				|| page->State() == PAGE_STATE_INACTIVE) {
571 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
572 		}
573 	}
574 
575 	return B_OK;
576 }
577 
578 
579 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
580 	page's cache.
581 */
582 static inline bool
583 unmap_page(VMArea* area, addr_t virtualAddress)
584 {
585 	return area->address_space->TranslationMap()->UnmapPage(area,
586 		virtualAddress, true);
587 }
588 
589 
590 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
591 	mapped pages' caches.
592 */
593 static inline void
594 unmap_pages(VMArea* area, addr_t base, size_t size)
595 {
596 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
597 }
598 
599 
600 /*!	Cuts a piece out of an area. If the given cut range covers the complete
601 	area, it is deleted. If it covers the beginning or the end, the area is
602 	resized accordingly. If the range covers some part in the middle of the
603 	area, it is split in two; in this case the second area is returned via
604 	\a _secondArea (the variable is left untouched in the other cases).
605 	The address space must be write locked.
606 	The caller must ensure that no part of the given range is wired.
607 */
608 static status_t
609 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
610 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
611 {
612 	// Does the cut range intersect with the area at all?
613 	addr_t areaLast = area->Base() + (area->Size() - 1);
614 	if (area->Base() > lastAddress || areaLast < address)
615 		return B_OK;
616 
617 	// Is the area fully covered?
618 	if (area->Base() >= address && areaLast <= lastAddress) {
619 		delete_area(addressSpace, area, false);
620 		return B_OK;
621 	}
622 
623 	int priority;
624 	uint32 allocationFlags;
625 	if (addressSpace == VMAddressSpace::Kernel()) {
626 		priority = VM_PRIORITY_SYSTEM;
627 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
628 			| HEAP_DONT_LOCK_KERNEL_SPACE;
629 	} else {
630 		priority = VM_PRIORITY_USER;
631 		allocationFlags = 0;
632 	}
633 
634 	VMCache* cache = vm_area_get_locked_cache(area);
635 	VMCacheChainLocker cacheChainLocker(cache);
636 	cacheChainLocker.LockAllSourceCaches();
637 
638 	// Cut the end only?
639 	if (areaLast <= lastAddress) {
640 		size_t oldSize = area->Size();
641 		size_t newSize = address - area->Base();
642 
643 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
644 			allocationFlags);
645 		if (error != B_OK)
646 			return error;
647 
648 		// unmap pages
649 		unmap_pages(area, address, oldSize - newSize);
650 
651 		// If no one else uses the area's cache, we can resize it, too.
652 		if (cache->areas == area && area->cache_next == NULL
653 			&& cache->consumers.IsEmpty()
654 			&& cache->type == CACHE_TYPE_RAM) {
655 			// Since VMCache::Resize() can temporarily drop the lock, we must
656 			// unlock all lower caches to prevent locking order inversion.
657 			cacheChainLocker.Unlock(cache);
658 			cache->Resize(cache->virtual_base + newSize, priority);
659 			cache->ReleaseRefAndUnlock();
660 		}
661 
662 		return B_OK;
663 	}
664 
665 	// Cut the beginning only?
666 	if (area->Base() >= address) {
667 		addr_t oldBase = area->Base();
668 		addr_t newBase = lastAddress + 1;
669 		size_t newSize = areaLast - lastAddress;
670 
671 		// unmap pages
672 		unmap_pages(area, oldBase, newBase - oldBase);
673 
674 		// resize the area
675 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
676 			allocationFlags);
677 		if (error != B_OK)
678 			return error;
679 
680 		// TODO: If no one else uses the area's cache, we should resize it, too!
681 
682 		area->cache_offset += newBase - oldBase;
683 
684 		return B_OK;
685 	}
686 
687 	// The tough part -- cut a piece out of the middle of the area.
688 	// We do that by shrinking the area to the begin section and creating a
689 	// new area for the end section.
690 
691 	addr_t firstNewSize = address - area->Base();
692 	addr_t secondBase = lastAddress + 1;
693 	addr_t secondSize = areaLast - lastAddress;
694 
695 	// unmap pages
696 	unmap_pages(area, address, area->Size() - firstNewSize);
697 
698 	// resize the area
699 	addr_t oldSize = area->Size();
700 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
701 		allocationFlags);
702 	if (error != B_OK)
703 		return error;
704 
705 	// TODO: If no one else uses the area's cache, we might want to create a
706 	// new cache for the second area, transfer the concerned pages from the
707 	// first cache to it and resize the first cache.
708 
709 	// map the second area
710 	virtual_address_restrictions addressRestrictions = {};
711 	addressRestrictions.address = (void*)secondBase;
712 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
713 	VMArea* secondArea;
714 	error = map_backing_store(addressSpace, cache,
715 		area->cache_offset + (secondBase - area->Base()), area->name,
716 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
717 		&addressRestrictions, kernel, &secondArea, NULL);
718 	if (error != B_OK) {
719 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
720 		return error;
721 	}
722 
723 	// We need a cache reference for the new area.
724 	cache->AcquireRefLocked();
725 
726 	if (_secondArea != NULL)
727 		*_secondArea = secondArea;
728 
729 	return B_OK;
730 }
731 
732 
733 /*!	Deletes all areas in the given address range.
734 	The address space must be write-locked.
735 	The caller must ensure that no part of the given range is wired.
736 */
737 static status_t
738 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
739 	bool kernel)
740 {
741 	size = PAGE_ALIGN(size);
742 	addr_t lastAddress = address + (size - 1);
743 
744 	// Check, whether the caller is allowed to modify the concerned areas.
745 	if (!kernel) {
746 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
747 				VMArea* area = it.Next();) {
748 			addr_t areaLast = area->Base() + (area->Size() - 1);
749 			if (area->Base() < lastAddress && address < areaLast) {
750 				if ((area->protection & B_KERNEL_AREA) != 0)
751 					return B_NOT_ALLOWED;
752 			}
753 		}
754 	}
755 
756 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
757 			VMArea* area = it.Next();) {
758 		addr_t areaLast = area->Base() + (area->Size() - 1);
759 		if (area->Base() < lastAddress && address < areaLast) {
760 			status_t error = cut_area(addressSpace, area, address,
761 				lastAddress, NULL, kernel);
762 			if (error != B_OK)
763 				return error;
764 				// Failing after already messing with areas is ugly, but we
765 				// can't do anything about it.
766 		}
767 	}
768 
769 	return B_OK;
770 }
771 
772 
773 /*! You need to hold the lock of the cache and the write lock of the address
774 	space when calling this function.
775 	Note, that in case of error your cache will be temporarily unlocked.
776 	If \a addressSpec is \c B_EXACT_ADDRESS and the
777 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
778 	that no part of the specified address range (base \c *_virtualAddress, size
779 	\a size) is wired.
780 */
781 static status_t
782 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
783 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
784 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
785 	bool kernel, VMArea** _area, void** _virtualAddress)
786 {
787 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%Lx, "
788 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
789 		"'%s'\n", addressSpace, cache, addressRestrictions->address, offset,
790 		size, addressRestrictions->address_specification, wiring, protection,
791 		_area, areaName));
792 	cache->AssertLocked();
793 
794 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
795 		| HEAP_DONT_LOCK_KERNEL_SPACE;
796 	int priority;
797 	if (addressSpace != VMAddressSpace::Kernel()) {
798 		priority = VM_PRIORITY_USER;
799 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
800 		priority = VM_PRIORITY_VIP;
801 		allocationFlags |= HEAP_PRIORITY_VIP;
802 	} else
803 		priority = VM_PRIORITY_SYSTEM;
804 
805 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
806 		allocationFlags);
807 	if (area == NULL)
808 		return B_NO_MEMORY;
809 
810 	status_t status;
811 
812 	// if this is a private map, we need to create a new cache
813 	// to handle the private copies of pages as they are written to
814 	VMCache* sourceCache = cache;
815 	if (mapping == REGION_PRIVATE_MAP) {
816 		VMCache* newCache;
817 
818 		// create an anonymous cache
819 		bool isStack = (protection & B_STACK_AREA) != 0;
820 		status = VMCacheFactory::CreateAnonymousCache(newCache,
821 			isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0,
822 			isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER);
823 		if (status != B_OK)
824 			goto err1;
825 
826 		newCache->Lock();
827 		newCache->temporary = 1;
828 		newCache->virtual_base = offset;
829 		newCache->virtual_end = offset + size;
830 
831 		cache->AddConsumer(newCache);
832 
833 		cache = newCache;
834 	}
835 
836 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
837 		status = cache->SetMinimalCommitment(size, priority);
838 		if (status != B_OK)
839 			goto err2;
840 	}
841 
842 	// check to see if this address space has entered DELETE state
843 	if (addressSpace->IsBeingDeleted()) {
844 		// okay, someone is trying to delete this address space now, so we can't
845 		// insert the area, so back out
846 		status = B_BAD_TEAM_ID;
847 		goto err2;
848 	}
849 
850 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
851 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
852 		status = unmap_address_range(addressSpace,
853 			(addr_t)addressRestrictions->address, size, kernel);
854 		if (status != B_OK)
855 			goto err2;
856 	}
857 
858 	status = addressSpace->InsertArea(area, size, addressRestrictions,
859 		allocationFlags, _virtualAddress);
860 	if (status != B_OK) {
861 		// TODO: wait and try again once this is working in the backend
862 #if 0
863 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
864 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
865 				0, 0);
866 		}
867 #endif
868 		goto err2;
869 	}
870 
871 	// attach the cache to the area
872 	area->cache = cache;
873 	area->cache_offset = offset;
874 
875 	// point the cache back to the area
876 	cache->InsertAreaLocked(area);
877 	if (mapping == REGION_PRIVATE_MAP)
878 		cache->Unlock();
879 
880 	// insert the area in the global area hash table
881 	VMAreaHash::Insert(area);
882 
883 	// grab a ref to the address space (the area holds this)
884 	addressSpace->Get();
885 
886 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
887 //		cache, sourceCache, areaName, area);
888 
889 	*_area = area;
890 	return B_OK;
891 
892 err2:
893 	if (mapping == REGION_PRIVATE_MAP) {
894 		// We created this cache, so we must delete it again. Note, that we
895 		// need to temporarily unlock the source cache or we'll otherwise
896 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
897 		sourceCache->Unlock();
898 		cache->ReleaseRefAndUnlock();
899 		sourceCache->Lock();
900 	}
901 err1:
902 	addressSpace->DeleteArea(area, allocationFlags);
903 	return status;
904 }
905 
906 
907 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
908 	  locker1, locker2).
909 */
910 template<typename LockerType1, typename LockerType2>
911 static inline bool
912 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
913 {
914 	area->cache->AssertLocked();
915 
916 	VMAreaUnwiredWaiter waiter;
917 	if (!area->AddWaiterIfWired(&waiter))
918 		return false;
919 
920 	// unlock everything and wait
921 	if (locker1 != NULL)
922 		locker1->Unlock();
923 	if (locker2 != NULL)
924 		locker2->Unlock();
925 
926 	waiter.waitEntry.Wait();
927 
928 	return true;
929 }
930 
931 
932 /*!	Checks whether the given area has any wired ranges intersecting with the
933 	specified range and waits, if so.
934 
935 	When it has to wait, the function calls \c Unlock() on both \a locker1
936 	and \a locker2, if given.
937 	The area's top cache must be locked and must be unlocked as a side effect
938 	of calling \c Unlock() on either \a locker1 or \a locker2.
939 
940 	If the function does not have to wait it does not modify or unlock any
941 	object.
942 
943 	\param area The area to be checked.
944 	\param base The base address of the range to check.
945 	\param size The size of the address range to check.
946 	\param locker1 An object to be unlocked when before starting to wait (may
947 		be \c NULL).
948 	\param locker2 An object to be unlocked when before starting to wait (may
949 		be \c NULL).
950 	\return \c true, if the function had to wait, \c false otherwise.
951 */
952 template<typename LockerType1, typename LockerType2>
953 static inline bool
954 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
955 	LockerType1* locker1, LockerType2* locker2)
956 {
957 	area->cache->AssertLocked();
958 
959 	VMAreaUnwiredWaiter waiter;
960 	if (!area->AddWaiterIfWired(&waiter, base, size))
961 		return false;
962 
963 	// unlock everything and wait
964 	if (locker1 != NULL)
965 		locker1->Unlock();
966 	if (locker2 != NULL)
967 		locker2->Unlock();
968 
969 	waiter.waitEntry.Wait();
970 
971 	return true;
972 }
973 
974 
975 /*!	Checks whether the given address space has any wired ranges intersecting
976 	with the specified range and waits, if so.
977 
978 	Similar to wait_if_area_range_is_wired(), with the following differences:
979 	- All areas intersecting with the range are checked (respectively all until
980 	  one is found that contains a wired range intersecting with the given
981 	  range).
982 	- The given address space must at least be read-locked and must be unlocked
983 	  when \c Unlock() is called on \a locker.
984 	- None of the areas' caches are allowed to be locked.
985 */
986 template<typename LockerType>
987 static inline bool
988 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
989 	size_t size, LockerType* locker)
990 {
991 	addr_t end = base + size - 1;
992 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
993 			VMArea* area = it.Next();) {
994 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
995 		if (area->Base() > end)
996 			return false;
997 
998 		if (base >= area->Base() + area->Size() - 1)
999 			continue;
1000 
1001 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1002 
1003 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1004 			return true;
1005 	}
1006 
1007 	return false;
1008 }
1009 
1010 
1011 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1012 	It must be called in a situation where the kernel address space may be
1013 	locked.
1014 */
1015 status_t
1016 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1017 {
1018 	AddressSpaceReadLocker locker;
1019 	VMArea* area;
1020 	status_t status = locker.SetFromArea(id, area);
1021 	if (status != B_OK)
1022 		return status;
1023 
1024 	if (area->page_protections == NULL) {
1025 		status = allocate_area_page_protections(area);
1026 		if (status != B_OK)
1027 			return status;
1028 	}
1029 
1030 	*cookie = (void*)area;
1031 	return B_OK;
1032 }
1033 
1034 
1035 /*!	This is a debug helper function that can only be used with very specific
1036 	use cases.
1037 	Sets protection for the given address range to the protection specified.
1038 	If \a protection is 0 then the involved pages will be marked non-present
1039 	in the translation map to cause a fault on access. The pages aren't
1040 	actually unmapped however so that they can be marked present again with
1041 	additional calls to this function. For this to work the area must be
1042 	fully locked in memory so that the pages aren't otherwise touched.
1043 	This function does not lock the kernel address space and needs to be
1044 	supplied with a \a cookie retrieved from a successful call to
1045 	vm_prepare_kernel_area_debug_protection().
1046 */
1047 status_t
1048 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1049 	uint32 protection)
1050 {
1051 	// check address range
1052 	addr_t address = (addr_t)_address;
1053 	size = PAGE_ALIGN(size);
1054 
1055 	if ((address % B_PAGE_SIZE) != 0
1056 		|| (addr_t)address + size < (addr_t)address
1057 		|| !IS_KERNEL_ADDRESS(address)
1058 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1059 		return B_BAD_VALUE;
1060 	}
1061 
1062 	// Translate the kernel protection to user protection as we only store that.
1063 	if ((protection & B_KERNEL_READ_AREA) != 0)
1064 		protection |= B_READ_AREA;
1065 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1066 		protection |= B_WRITE_AREA;
1067 
1068 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1069 	VMTranslationMap* map = addressSpace->TranslationMap();
1070 	VMArea* area = (VMArea*)cookie;
1071 
1072 	addr_t offset = address - area->Base();
1073 	if (area->Size() - offset < size) {
1074 		panic("protect range not fully within supplied area");
1075 		return B_BAD_VALUE;
1076 	}
1077 
1078 	if (area->page_protections == NULL) {
1079 		panic("area has no page protections");
1080 		return B_BAD_VALUE;
1081 	}
1082 
1083 	// Invalidate the mapping entries so any access to them will fault or
1084 	// restore the mapping entries unchanged so that lookup will success again.
1085 	map->Lock();
1086 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1087 	map->Unlock();
1088 
1089 	// And set the proper page protections so that the fault case will actually
1090 	// fail and not simply try to map a new page.
1091 	for (addr_t pageAddress = address; pageAddress < address + size;
1092 			pageAddress += B_PAGE_SIZE) {
1093 		set_area_page_protection(area, pageAddress, protection);
1094 	}
1095 
1096 	return B_OK;
1097 }
1098 
1099 
1100 status_t
1101 vm_block_address_range(const char* name, void* address, addr_t size)
1102 {
1103 	if (!arch_vm_supports_protection(0))
1104 		return B_NOT_SUPPORTED;
1105 
1106 	AddressSpaceWriteLocker locker;
1107 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1108 	if (status != B_OK)
1109 		return status;
1110 
1111 	VMAddressSpace* addressSpace = locker.AddressSpace();
1112 
1113 	// create an anonymous cache
1114 	VMCache* cache;
1115 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1116 		VM_PRIORITY_SYSTEM);
1117 	if (status != B_OK)
1118 		return status;
1119 
1120 	cache->temporary = 1;
1121 	cache->virtual_end = size;
1122 	cache->Lock();
1123 
1124 	VMArea* area;
1125 	virtual_address_restrictions addressRestrictions = {};
1126 	addressRestrictions.address = address;
1127 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1128 	status = map_backing_store(addressSpace, cache, 0, name, size,
1129 		B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0,
1130 		&addressRestrictions, true, &area, NULL);
1131 	if (status != B_OK) {
1132 		cache->ReleaseRefAndUnlock();
1133 		return status;
1134 	}
1135 
1136 	cache->Unlock();
1137 	area->cache_type = CACHE_TYPE_RAM;
1138 	return area->id;
1139 }
1140 
1141 
1142 status_t
1143 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1144 {
1145 	AddressSpaceWriteLocker locker(team);
1146 	if (!locker.IsLocked())
1147 		return B_BAD_TEAM_ID;
1148 
1149 	VMAddressSpace* addressSpace = locker.AddressSpace();
1150 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1151 		addressSpace == VMAddressSpace::Kernel()
1152 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1153 }
1154 
1155 
1156 status_t
1157 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1158 	addr_t size, uint32 flags)
1159 {
1160 	if (size == 0)
1161 		return B_BAD_VALUE;
1162 
1163 	AddressSpaceWriteLocker locker(team);
1164 	if (!locker.IsLocked())
1165 		return B_BAD_TEAM_ID;
1166 
1167 	virtual_address_restrictions addressRestrictions = {};
1168 	addressRestrictions.address = *_address;
1169 	addressRestrictions.address_specification = addressSpec;
1170 	VMAddressSpace* addressSpace = locker.AddressSpace();
1171 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1172 		addressSpace == VMAddressSpace::Kernel()
1173 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1174 		_address);
1175 }
1176 
1177 
1178 area_id
1179 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1180 	uint32 wiring, uint32 protection, uint32 flags,
1181 	const virtual_address_restrictions* virtualAddressRestrictions,
1182 	const physical_address_restrictions* physicalAddressRestrictions,
1183 	bool kernel, void** _address)
1184 {
1185 	VMArea* area;
1186 	VMCache* cache;
1187 	vm_page* page = NULL;
1188 	bool isStack = (protection & B_STACK_AREA) != 0;
1189 	page_num_t guardPages;
1190 	bool canOvercommit = false;
1191 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1192 		? VM_PAGE_ALLOC_CLEAR : 0;
1193 
1194 	TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size));
1195 
1196 	size = PAGE_ALIGN(size);
1197 
1198 	if (size == 0)
1199 		return B_BAD_VALUE;
1200 	if (!arch_vm_supports_protection(protection))
1201 		return B_NOT_SUPPORTED;
1202 
1203 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1204 		canOvercommit = true;
1205 
1206 #ifdef DEBUG_KERNEL_STACKS
1207 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1208 		isStack = true;
1209 #endif
1210 
1211 	// check parameters
1212 	switch (virtualAddressRestrictions->address_specification) {
1213 		case B_ANY_ADDRESS:
1214 		case B_EXACT_ADDRESS:
1215 		case B_BASE_ADDRESS:
1216 		case B_ANY_KERNEL_ADDRESS:
1217 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1218 			break;
1219 
1220 		default:
1221 			return B_BAD_VALUE;
1222 	}
1223 
1224 	// If low or high physical address restrictions are given, we force
1225 	// B_CONTIGUOUS wiring, since only then we'll use
1226 	// vm_page_allocate_page_run() which deals with those restrictions.
1227 	if (physicalAddressRestrictions->low_address != 0
1228 		|| physicalAddressRestrictions->high_address != 0) {
1229 		wiring = B_CONTIGUOUS;
1230 	}
1231 
1232 	physical_address_restrictions stackPhysicalRestrictions;
1233 	bool doReserveMemory = false;
1234 	switch (wiring) {
1235 		case B_NO_LOCK:
1236 			break;
1237 		case B_FULL_LOCK:
1238 		case B_LAZY_LOCK:
1239 		case B_CONTIGUOUS:
1240 			doReserveMemory = true;
1241 			break;
1242 		case B_ALREADY_WIRED:
1243 			break;
1244 		case B_LOMEM:
1245 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1246 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1247 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1248 			wiring = B_CONTIGUOUS;
1249 			doReserveMemory = true;
1250 			break;
1251 		case B_32_BIT_FULL_LOCK:
1252 			if (B_HAIKU_PHYSICAL_BITS <= 32
1253 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1254 				wiring = B_FULL_LOCK;
1255 				doReserveMemory = true;
1256 				break;
1257 			}
1258 			// TODO: We don't really support this mode efficiently. Just fall
1259 			// through for now ...
1260 		case B_32_BIT_CONTIGUOUS:
1261 			#if B_HAIKU_PHYSICAL_BITS > 32
1262 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1263 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1264 					stackPhysicalRestrictions.high_address
1265 						= (phys_addr_t)1 << 32;
1266 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1267 				}
1268 			#endif
1269 			wiring = B_CONTIGUOUS;
1270 			doReserveMemory = true;
1271 			break;
1272 		default:
1273 			return B_BAD_VALUE;
1274 	}
1275 
1276 	// Optimization: For a single-page contiguous allocation without low/high
1277 	// memory restriction B_FULL_LOCK wiring suffices.
1278 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1279 		&& physicalAddressRestrictions->low_address == 0
1280 		&& physicalAddressRestrictions->high_address == 0) {
1281 		wiring = B_FULL_LOCK;
1282 	}
1283 
1284 	// For full lock or contiguous areas we're also going to map the pages and
1285 	// thus need to reserve pages for the mapping backend upfront.
1286 	addr_t reservedMapPages = 0;
1287 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1288 		AddressSpaceWriteLocker locker;
1289 		status_t status = locker.SetTo(team);
1290 		if (status != B_OK)
1291 			return status;
1292 
1293 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1294 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1295 	}
1296 
1297 	int priority;
1298 	if (team != VMAddressSpace::KernelID())
1299 		priority = VM_PRIORITY_USER;
1300 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1301 		priority = VM_PRIORITY_VIP;
1302 	else
1303 		priority = VM_PRIORITY_SYSTEM;
1304 
1305 	// Reserve memory before acquiring the address space lock. This reduces the
1306 	// chances of failure, since while holding the write lock to the address
1307 	// space (if it is the kernel address space that is), the low memory handler
1308 	// won't be able to free anything for us.
1309 	addr_t reservedMemory = 0;
1310 	if (doReserveMemory) {
1311 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1312 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1313 			return B_NO_MEMORY;
1314 		reservedMemory = size;
1315 		// TODO: We don't reserve the memory for the pages for the page
1316 		// directories/tables. We actually need to do since we currently don't
1317 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1318 		// there are actually less physical pages than there should be, which
1319 		// can get the VM into trouble in low memory situations.
1320 	}
1321 
1322 	AddressSpaceWriteLocker locker;
1323 	VMAddressSpace* addressSpace;
1324 	status_t status;
1325 
1326 	// For full lock areas reserve the pages before locking the address
1327 	// space. E.g. block caches can't release their memory while we hold the
1328 	// address space lock.
1329 	page_num_t reservedPages = reservedMapPages;
1330 	if (wiring == B_FULL_LOCK)
1331 		reservedPages += size / B_PAGE_SIZE;
1332 
1333 	vm_page_reservation reservation;
1334 	if (reservedPages > 0) {
1335 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1336 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1337 					priority)) {
1338 				reservedPages = 0;
1339 				status = B_WOULD_BLOCK;
1340 				goto err0;
1341 			}
1342 		} else
1343 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1344 	}
1345 
1346 	if (wiring == B_CONTIGUOUS) {
1347 		// we try to allocate the page run here upfront as this may easily
1348 		// fail for obvious reasons
1349 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1350 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1351 		if (page == NULL) {
1352 			status = B_NO_MEMORY;
1353 			goto err0;
1354 		}
1355 	}
1356 
1357 	// Lock the address space and, if B_EXACT_ADDRESS and
1358 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1359 	// is not wired.
1360 	do {
1361 		status = locker.SetTo(team);
1362 		if (status != B_OK)
1363 			goto err1;
1364 
1365 		addressSpace = locker.AddressSpace();
1366 	} while (virtualAddressRestrictions->address_specification
1367 			== B_EXACT_ADDRESS
1368 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1369 		&& wait_if_address_range_is_wired(addressSpace,
1370 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1371 
1372 	// create an anonymous cache
1373 	// if it's a stack, make sure that two pages are available at least
1374 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1375 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1376 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1377 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1378 		wiring == B_NO_LOCK, priority);
1379 	if (status != B_OK)
1380 		goto err1;
1381 
1382 	cache->temporary = 1;
1383 	cache->virtual_end = size;
1384 	cache->committed_size = reservedMemory;
1385 		// TODO: This should be done via a method.
1386 	reservedMemory = 0;
1387 
1388 	cache->Lock();
1389 
1390 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1391 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1392 		kernel, &area, _address);
1393 
1394 	if (status != B_OK) {
1395 		cache->ReleaseRefAndUnlock();
1396 		goto err1;
1397 	}
1398 
1399 	locker.DegradeToReadLock();
1400 
1401 	switch (wiring) {
1402 		case B_NO_LOCK:
1403 		case B_LAZY_LOCK:
1404 			// do nothing - the pages are mapped in as needed
1405 			break;
1406 
1407 		case B_FULL_LOCK:
1408 		{
1409 			// Allocate and map all pages for this area
1410 
1411 			off_t offset = 0;
1412 			for (addr_t address = area->Base();
1413 					address < area->Base() + (area->Size() - 1);
1414 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1415 #ifdef DEBUG_KERNEL_STACKS
1416 #	ifdef STACK_GROWS_DOWNWARDS
1417 				if (isStack && address < area->Base()
1418 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1419 #	else
1420 				if (isStack && address >= area->Base() + area->Size()
1421 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1422 #	endif
1423 					continue;
1424 #endif
1425 				vm_page* page = vm_page_allocate_page(&reservation,
1426 					PAGE_STATE_WIRED | pageAllocFlags);
1427 				cache->InsertPage(page, offset);
1428 				map_page(area, page, address, protection, &reservation);
1429 
1430 				DEBUG_PAGE_ACCESS_END(page);
1431 			}
1432 
1433 			break;
1434 		}
1435 
1436 		case B_ALREADY_WIRED:
1437 		{
1438 			// The pages should already be mapped. This is only really useful
1439 			// during boot time. Find the appropriate vm_page objects and stick
1440 			// them in the cache object.
1441 			VMTranslationMap* map = addressSpace->TranslationMap();
1442 			off_t offset = 0;
1443 
1444 			if (!gKernelStartup)
1445 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1446 
1447 			map->Lock();
1448 
1449 			for (addr_t virtualAddress = area->Base();
1450 					virtualAddress < area->Base() + (area->Size() - 1);
1451 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1452 				phys_addr_t physicalAddress;
1453 				uint32 flags;
1454 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1455 				if (status < B_OK) {
1456 					panic("looking up mapping failed for va 0x%lx\n",
1457 						virtualAddress);
1458 				}
1459 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1460 				if (page == NULL) {
1461 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1462 						"\n", physicalAddress);
1463 				}
1464 
1465 				DEBUG_PAGE_ACCESS_START(page);
1466 
1467 				cache->InsertPage(page, offset);
1468 				increment_page_wired_count(page);
1469 				vm_page_set_state(page, PAGE_STATE_WIRED);
1470 				page->busy = false;
1471 
1472 				DEBUG_PAGE_ACCESS_END(page);
1473 			}
1474 
1475 			map->Unlock();
1476 			break;
1477 		}
1478 
1479 		case B_CONTIGUOUS:
1480 		{
1481 			// We have already allocated our continuous pages run, so we can now
1482 			// just map them in the address space
1483 			VMTranslationMap* map = addressSpace->TranslationMap();
1484 			phys_addr_t physicalAddress
1485 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1486 			addr_t virtualAddress = area->Base();
1487 			off_t offset = 0;
1488 
1489 			map->Lock();
1490 
1491 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1492 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1493 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1494 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1495 				if (page == NULL)
1496 					panic("couldn't lookup physical page just allocated\n");
1497 
1498 				status = map->Map(virtualAddress, physicalAddress, protection,
1499 					area->MemoryType(), &reservation);
1500 				if (status < B_OK)
1501 					panic("couldn't map physical page in page run\n");
1502 
1503 				cache->InsertPage(page, offset);
1504 				increment_page_wired_count(page);
1505 
1506 				DEBUG_PAGE_ACCESS_END(page);
1507 			}
1508 
1509 			map->Unlock();
1510 			break;
1511 		}
1512 
1513 		default:
1514 			break;
1515 	}
1516 
1517 	cache->Unlock();
1518 
1519 	if (reservedPages > 0)
1520 		vm_page_unreserve_pages(&reservation);
1521 
1522 	TRACE(("vm_create_anonymous_area: done\n"));
1523 
1524 	area->cache_type = CACHE_TYPE_RAM;
1525 	return area->id;
1526 
1527 err1:
1528 	if (wiring == B_CONTIGUOUS) {
1529 		// we had reserved the area space upfront...
1530 		phys_addr_t pageNumber = page->physical_page_number;
1531 		int32 i;
1532 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1533 			page = vm_lookup_page(pageNumber);
1534 			if (page == NULL)
1535 				panic("couldn't lookup physical page just allocated\n");
1536 
1537 			vm_page_set_state(page, PAGE_STATE_FREE);
1538 		}
1539 	}
1540 
1541 err0:
1542 	if (reservedPages > 0)
1543 		vm_page_unreserve_pages(&reservation);
1544 	if (reservedMemory > 0)
1545 		vm_unreserve_memory(reservedMemory);
1546 
1547 	return status;
1548 }
1549 
1550 
1551 area_id
1552 vm_map_physical_memory(team_id team, const char* name, void** _address,
1553 	uint32 addressSpec, addr_t size, uint32 protection,
1554 	phys_addr_t physicalAddress, bool alreadyWired)
1555 {
1556 	VMArea* area;
1557 	VMCache* cache;
1558 	addr_t mapOffset;
1559 
1560 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1561 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
1562 		name, *_address, addressSpec, size, protection, physicalAddress));
1563 
1564 	if (!arch_vm_supports_protection(protection))
1565 		return B_NOT_SUPPORTED;
1566 
1567 	AddressSpaceWriteLocker locker(team);
1568 	if (!locker.IsLocked())
1569 		return B_BAD_TEAM_ID;
1570 
1571 	// if the physical address is somewhat inside a page,
1572 	// move the actual area down to align on a page boundary
1573 	mapOffset = physicalAddress % B_PAGE_SIZE;
1574 	size += mapOffset;
1575 	physicalAddress -= mapOffset;
1576 
1577 	size = PAGE_ALIGN(size);
1578 
1579 	// create a device cache
1580 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1581 	if (status != B_OK)
1582 		return status;
1583 
1584 	cache->virtual_end = size;
1585 
1586 	cache->Lock();
1587 
1588 	virtual_address_restrictions addressRestrictions = {};
1589 	addressRestrictions.address = *_address;
1590 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1591 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1592 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1593 		true, &area, _address);
1594 
1595 	if (status < B_OK)
1596 		cache->ReleaseRefLocked();
1597 
1598 	cache->Unlock();
1599 
1600 	if (status == B_OK) {
1601 		// set requested memory type -- use uncached, if not given
1602 		uint32 memoryType = addressSpec & B_MTR_MASK;
1603 		if (memoryType == 0)
1604 			memoryType = B_MTR_UC;
1605 
1606 		area->SetMemoryType(memoryType);
1607 
1608 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1609 		if (status != B_OK)
1610 			delete_area(locker.AddressSpace(), area, false);
1611 	}
1612 
1613 	if (status != B_OK)
1614 		return status;
1615 
1616 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1617 
1618 	if (alreadyWired) {
1619 		// The area is already mapped, but possibly not with the right
1620 		// memory type.
1621 		map->Lock();
1622 		map->ProtectArea(area, area->protection);
1623 		map->Unlock();
1624 	} else {
1625 		// Map the area completely.
1626 
1627 		// reserve pages needed for the mapping
1628 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1629 			area->Base() + (size - 1));
1630 		vm_page_reservation reservation;
1631 		vm_page_reserve_pages(&reservation, reservePages,
1632 			team == VMAddressSpace::KernelID()
1633 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1634 
1635 		map->Lock();
1636 
1637 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1638 			map->Map(area->Base() + offset, physicalAddress + offset,
1639 				protection, area->MemoryType(), &reservation);
1640 		}
1641 
1642 		map->Unlock();
1643 
1644 		vm_page_unreserve_pages(&reservation);
1645 	}
1646 
1647 	// modify the pointer returned to be offset back into the new area
1648 	// the same way the physical address in was offset
1649 	*_address = (void*)((addr_t)*_address + mapOffset);
1650 
1651 	area->cache_type = CACHE_TYPE_DEVICE;
1652 	return area->id;
1653 }
1654 
1655 
1656 /*!	Don't use!
1657 	TODO: This function was introduced to map physical page vecs to
1658 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1659 	use a device cache and does not track vm_page::wired_count!
1660 */
1661 area_id
1662 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1663 	uint32 addressSpec, addr_t* _size, uint32 protection,
1664 	struct generic_io_vec* vecs, uint32 vecCount)
1665 {
1666 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
1667 		"spec = %ld, _size = %p, protection = %ld, vecs = %p, "
1668 		"vecCount = %ld)\n", team, name, *_address, addressSpec, _size,
1669 		protection, vecs, vecCount));
1670 
1671 	if (!arch_vm_supports_protection(protection)
1672 		|| (addressSpec & B_MTR_MASK) != 0) {
1673 		return B_NOT_SUPPORTED;
1674 	}
1675 
1676 	AddressSpaceWriteLocker locker(team);
1677 	if (!locker.IsLocked())
1678 		return B_BAD_TEAM_ID;
1679 
1680 	if (vecCount == 0)
1681 		return B_BAD_VALUE;
1682 
1683 	addr_t size = 0;
1684 	for (uint32 i = 0; i < vecCount; i++) {
1685 		if (vecs[i].base % B_PAGE_SIZE != 0
1686 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1687 			return B_BAD_VALUE;
1688 		}
1689 
1690 		size += vecs[i].length;
1691 	}
1692 
1693 	// create a device cache
1694 	VMCache* cache;
1695 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1696 	if (result != B_OK)
1697 		return result;
1698 
1699 	cache->virtual_end = size;
1700 
1701 	cache->Lock();
1702 
1703 	VMArea* area;
1704 	virtual_address_restrictions addressRestrictions = {};
1705 	addressRestrictions.address = *_address;
1706 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1707 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1708 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1709 		&addressRestrictions, true, &area, _address);
1710 
1711 	if (result != B_OK)
1712 		cache->ReleaseRefLocked();
1713 
1714 	cache->Unlock();
1715 
1716 	if (result != B_OK)
1717 		return result;
1718 
1719 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1720 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1721 		area->Base() + (size - 1));
1722 
1723 	vm_page_reservation reservation;
1724 	vm_page_reserve_pages(&reservation, reservePages,
1725 			team == VMAddressSpace::KernelID()
1726 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1727 	map->Lock();
1728 
1729 	uint32 vecIndex = 0;
1730 	size_t vecOffset = 0;
1731 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1732 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1733 			vecOffset = 0;
1734 			vecIndex++;
1735 		}
1736 
1737 		if (vecIndex >= vecCount)
1738 			break;
1739 
1740 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1741 			protection, area->MemoryType(), &reservation);
1742 
1743 		vecOffset += B_PAGE_SIZE;
1744 	}
1745 
1746 	map->Unlock();
1747 	vm_page_unreserve_pages(&reservation);
1748 
1749 	if (_size != NULL)
1750 		*_size = size;
1751 
1752 	area->cache_type = CACHE_TYPE_DEVICE;
1753 	return area->id;
1754 }
1755 
1756 
1757 area_id
1758 vm_create_null_area(team_id team, const char* name, void** address,
1759 	uint32 addressSpec, addr_t size, uint32 flags)
1760 {
1761 	size = PAGE_ALIGN(size);
1762 
1763 	// Lock the address space and, if B_EXACT_ADDRESS and
1764 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1765 	// is not wired.
1766 	AddressSpaceWriteLocker locker;
1767 	do {
1768 		if (locker.SetTo(team) != B_OK)
1769 			return B_BAD_TEAM_ID;
1770 	} while (addressSpec == B_EXACT_ADDRESS
1771 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1772 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1773 			(addr_t)*address, size, &locker));
1774 
1775 	// create a null cache
1776 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1777 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1778 	VMCache* cache;
1779 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1780 	if (status != B_OK)
1781 		return status;
1782 
1783 	cache->temporary = 1;
1784 	cache->virtual_end = size;
1785 
1786 	cache->Lock();
1787 
1788 	VMArea* area;
1789 	virtual_address_restrictions addressRestrictions = {};
1790 	addressRestrictions.address = *address;
1791 	addressRestrictions.address_specification = addressSpec;
1792 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1793 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1794 		&addressRestrictions, true, &area, address);
1795 
1796 	if (status < B_OK) {
1797 		cache->ReleaseRefAndUnlock();
1798 		return status;
1799 	}
1800 
1801 	cache->Unlock();
1802 
1803 	area->cache_type = CACHE_TYPE_NULL;
1804 	return area->id;
1805 }
1806 
1807 
1808 /*!	Creates the vnode cache for the specified \a vnode.
1809 	The vnode has to be marked busy when calling this function.
1810 */
1811 status_t
1812 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1813 {
1814 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1815 }
1816 
1817 
1818 /*!	\a cache must be locked. The area's address space must be read-locked.
1819 */
1820 static void
1821 pre_map_area_pages(VMArea* area, VMCache* cache,
1822 	vm_page_reservation* reservation)
1823 {
1824 	addr_t baseAddress = area->Base();
1825 	addr_t cacheOffset = area->cache_offset;
1826 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1827 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1828 
1829 	for (VMCachePagesTree::Iterator it
1830 				= cache->pages.GetIterator(firstPage, true, true);
1831 			vm_page* page = it.Next();) {
1832 		if (page->cache_offset >= endPage)
1833 			break;
1834 
1835 		// skip busy and inactive pages
1836 		if (page->busy || page->usage_count == 0)
1837 			continue;
1838 
1839 		DEBUG_PAGE_ACCESS_START(page);
1840 		map_page(area, page,
1841 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1842 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1843 		DEBUG_PAGE_ACCESS_END(page);
1844 	}
1845 }
1846 
1847 
1848 /*!	Will map the file specified by \a fd to an area in memory.
1849 	The file will be mirrored beginning at the specified \a offset. The
1850 	\a offset and \a size arguments have to be page aligned.
1851 */
1852 static area_id
1853 _vm_map_file(team_id team, const char* name, void** _address,
1854 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1855 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1856 {
1857 	// TODO: for binary files, we want to make sure that they get the
1858 	//	copy of a file at a given time, ie. later changes should not
1859 	//	make it into the mapped copy -- this will need quite some changes
1860 	//	to be done in a nice way
1861 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
1862 		fd, offset, size, mapping));
1863 
1864 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1865 	size = PAGE_ALIGN(size);
1866 
1867 	if (mapping == REGION_NO_PRIVATE_MAP)
1868 		protection |= B_SHARED_AREA;
1869 	if (addressSpec != B_EXACT_ADDRESS)
1870 		unmapAddressRange = false;
1871 
1872 	if (fd < 0) {
1873 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1874 		virtual_address_restrictions virtualRestrictions = {};
1875 		virtualRestrictions.address = *_address;
1876 		virtualRestrictions.address_specification = addressSpec;
1877 		physical_address_restrictions physicalRestrictions = {};
1878 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1879 			flags, &virtualRestrictions, &physicalRestrictions, kernel,
1880 			_address);
1881 	}
1882 
1883 	// get the open flags of the FD
1884 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1885 	if (descriptor == NULL)
1886 		return EBADF;
1887 	int32 openMode = descriptor->open_mode;
1888 	put_fd(descriptor);
1889 
1890 	// The FD must open for reading at any rate. For shared mapping with write
1891 	// access, additionally the FD must be open for writing.
1892 	if ((openMode & O_ACCMODE) == O_WRONLY
1893 		|| (mapping == REGION_NO_PRIVATE_MAP
1894 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1895 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1896 		return EACCES;
1897 	}
1898 
1899 	// get the vnode for the object, this also grabs a ref to it
1900 	struct vnode* vnode = NULL;
1901 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1902 	if (status < B_OK)
1903 		return status;
1904 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1905 
1906 	// If we're going to pre-map pages, we need to reserve the pages needed by
1907 	// the mapping backend upfront.
1908 	page_num_t reservedPreMapPages = 0;
1909 	vm_page_reservation reservation;
1910 	if ((protection & B_READ_AREA) != 0) {
1911 		AddressSpaceWriteLocker locker;
1912 		status = locker.SetTo(team);
1913 		if (status != B_OK)
1914 			return status;
1915 
1916 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1917 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1918 
1919 		locker.Unlock();
1920 
1921 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1922 			team == VMAddressSpace::KernelID()
1923 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1924 	}
1925 
1926 	struct PageUnreserver {
1927 		PageUnreserver(vm_page_reservation* reservation)
1928 			:
1929 			fReservation(reservation)
1930 		{
1931 		}
1932 
1933 		~PageUnreserver()
1934 		{
1935 			if (fReservation != NULL)
1936 				vm_page_unreserve_pages(fReservation);
1937 		}
1938 
1939 		vm_page_reservation* fReservation;
1940 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1941 
1942 	// Lock the address space and, if the specified address range shall be
1943 	// unmapped, ensure it is not wired.
1944 	AddressSpaceWriteLocker locker;
1945 	do {
1946 		if (locker.SetTo(team) != B_OK)
1947 			return B_BAD_TEAM_ID;
1948 	} while (unmapAddressRange
1949 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1950 			(addr_t)*_address, size, &locker));
1951 
1952 	// TODO: this only works for file systems that use the file cache
1953 	VMCache* cache;
1954 	status = vfs_get_vnode_cache(vnode, &cache, false);
1955 	if (status < B_OK)
1956 		return status;
1957 
1958 	cache->Lock();
1959 
1960 	VMArea* area;
1961 	virtual_address_restrictions addressRestrictions = {};
1962 	addressRestrictions.address = *_address;
1963 	addressRestrictions.address_specification = addressSpec;
1964 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1965 		0, protection, mapping,
1966 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1967 		&addressRestrictions, kernel, &area, _address);
1968 
1969 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1970 		// map_backing_store() cannot know we no longer need the ref
1971 		cache->ReleaseRefLocked();
1972 	}
1973 
1974 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1975 		pre_map_area_pages(area, cache, &reservation);
1976 
1977 	cache->Unlock();
1978 
1979 	if (status == B_OK) {
1980 		// TODO: this probably deserves a smarter solution, ie. don't always
1981 		// prefetch stuff, and also, probably don't trigger it at this place.
1982 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1983 			// prefetches at max 10 MB starting from "offset"
1984 	}
1985 
1986 	if (status != B_OK)
1987 		return status;
1988 
1989 	area->cache_type = CACHE_TYPE_VNODE;
1990 	return area->id;
1991 }
1992 
1993 
1994 area_id
1995 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1996 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
1997 	int fd, off_t offset)
1998 {
1999 	if (!arch_vm_supports_protection(protection))
2000 		return B_NOT_SUPPORTED;
2001 
2002 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2003 		mapping, unmapAddressRange, fd, offset, true);
2004 }
2005 
2006 
2007 VMCache*
2008 vm_area_get_locked_cache(VMArea* area)
2009 {
2010 	rw_lock_read_lock(&sAreaCacheLock);
2011 
2012 	while (true) {
2013 		VMCache* cache = area->cache;
2014 
2015 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2016 			// cache has been deleted
2017 			rw_lock_read_lock(&sAreaCacheLock);
2018 			continue;
2019 		}
2020 
2021 		rw_lock_read_lock(&sAreaCacheLock);
2022 
2023 		if (cache == area->cache) {
2024 			cache->AcquireRefLocked();
2025 			rw_lock_read_unlock(&sAreaCacheLock);
2026 			return cache;
2027 		}
2028 
2029 		// the cache changed in the meantime
2030 		cache->Unlock();
2031 	}
2032 }
2033 
2034 
2035 void
2036 vm_area_put_locked_cache(VMCache* cache)
2037 {
2038 	cache->ReleaseRefAndUnlock();
2039 }
2040 
2041 
2042 area_id
2043 vm_clone_area(team_id team, const char* name, void** address,
2044 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2045 	bool kernel)
2046 {
2047 	VMArea* newArea = NULL;
2048 	VMArea* sourceArea;
2049 
2050 	// Check whether the source area exists and is cloneable. If so, mark it
2051 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2052 	{
2053 		AddressSpaceWriteLocker locker;
2054 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2055 		if (status != B_OK)
2056 			return status;
2057 
2058 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2059 			return B_NOT_ALLOWED;
2060 
2061 		sourceArea->protection |= B_SHARED_AREA;
2062 		protection |= B_SHARED_AREA;
2063 	}
2064 
2065 	// Now lock both address spaces and actually do the cloning.
2066 
2067 	MultiAddressSpaceLocker locker;
2068 	VMAddressSpace* sourceAddressSpace;
2069 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2070 	if (status != B_OK)
2071 		return status;
2072 
2073 	VMAddressSpace* targetAddressSpace;
2074 	status = locker.AddTeam(team, true, &targetAddressSpace);
2075 	if (status != B_OK)
2076 		return status;
2077 
2078 	status = locker.Lock();
2079 	if (status != B_OK)
2080 		return status;
2081 
2082 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2083 	if (sourceArea == NULL)
2084 		return B_BAD_VALUE;
2085 
2086 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2087 		return B_NOT_ALLOWED;
2088 
2089 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2090 
2091 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2092 	//	have been adapted. Maybe it should be part of the kernel settings,
2093 	//	anyway (so that old drivers can always work).
2094 #if 0
2095 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2096 		&& addressSpace != VMAddressSpace::Kernel()
2097 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2098 		// kernel areas must not be cloned in userland, unless explicitly
2099 		// declared user-cloneable upon construction
2100 		status = B_NOT_ALLOWED;
2101 	} else
2102 #endif
2103 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2104 		status = B_NOT_ALLOWED;
2105 	else {
2106 		virtual_address_restrictions addressRestrictions = {};
2107 		addressRestrictions.address = *address;
2108 		addressRestrictions.address_specification = addressSpec;
2109 		status = map_backing_store(targetAddressSpace, cache,
2110 			sourceArea->cache_offset, name, sourceArea->Size(),
2111 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2112 			kernel, &newArea, address);
2113 	}
2114 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2115 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2116 		// to create a new cache, and has therefore already acquired a reference
2117 		// to the source cache - but otherwise it has no idea that we need
2118 		// one.
2119 		cache->AcquireRefLocked();
2120 	}
2121 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2122 		// we need to map in everything at this point
2123 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2124 			// we don't have actual pages to map but a physical area
2125 			VMTranslationMap* map
2126 				= sourceArea->address_space->TranslationMap();
2127 			map->Lock();
2128 
2129 			phys_addr_t physicalAddress;
2130 			uint32 oldProtection;
2131 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2132 
2133 			map->Unlock();
2134 
2135 			map = targetAddressSpace->TranslationMap();
2136 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2137 				newArea->Base() + (newArea->Size() - 1));
2138 
2139 			vm_page_reservation reservation;
2140 			vm_page_reserve_pages(&reservation, reservePages,
2141 				targetAddressSpace == VMAddressSpace::Kernel()
2142 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2143 			map->Lock();
2144 
2145 			for (addr_t offset = 0; offset < newArea->Size();
2146 					offset += B_PAGE_SIZE) {
2147 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2148 					protection, newArea->MemoryType(), &reservation);
2149 			}
2150 
2151 			map->Unlock();
2152 			vm_page_unreserve_pages(&reservation);
2153 		} else {
2154 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2155 			size_t reservePages = map->MaxPagesNeededToMap(
2156 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2157 			vm_page_reservation reservation;
2158 			vm_page_reserve_pages(&reservation, reservePages,
2159 				targetAddressSpace == VMAddressSpace::Kernel()
2160 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2161 
2162 			// map in all pages from source
2163 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2164 					vm_page* page  = it.Next();) {
2165 				if (!page->busy) {
2166 					DEBUG_PAGE_ACCESS_START(page);
2167 					map_page(newArea, page,
2168 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2169 							- newArea->cache_offset),
2170 						protection, &reservation);
2171 					DEBUG_PAGE_ACCESS_END(page);
2172 				}
2173 			}
2174 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2175 			// ensuring that!
2176 
2177 			vm_page_unreserve_pages(&reservation);
2178 		}
2179 	}
2180 	if (status == B_OK)
2181 		newArea->cache_type = sourceArea->cache_type;
2182 
2183 	vm_area_put_locked_cache(cache);
2184 
2185 	if (status < B_OK)
2186 		return status;
2187 
2188 	return newArea->id;
2189 }
2190 
2191 
2192 /*!	Deletes the specified area of the given address space.
2193 
2194 	The address space must be write-locked.
2195 	The caller must ensure that the area does not have any wired ranges.
2196 
2197 	\param addressSpace The address space containing the area.
2198 	\param area The area to be deleted.
2199 	\param deletingAddressSpace \c true, if the address space is in the process
2200 		of being deleted.
2201 */
2202 static void
2203 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2204 	bool deletingAddressSpace)
2205 {
2206 	ASSERT(!area->IsWired());
2207 
2208 	VMAreaHash::Remove(area);
2209 
2210 	// At this point the area is removed from the global hash table, but
2211 	// still exists in the area list.
2212 
2213 	// Unmap the virtual address space the area occupied.
2214 	{
2215 		// We need to lock the complete cache chain.
2216 		VMCache* topCache = vm_area_get_locked_cache(area);
2217 		VMCacheChainLocker cacheChainLocker(topCache);
2218 		cacheChainLocker.LockAllSourceCaches();
2219 
2220 		// If the area's top cache is a temporary cache and the area is the only
2221 		// one referencing it (besides us currently holding a second reference),
2222 		// the unmapping code doesn't need to care about preserving the accessed
2223 		// and dirty flags of the top cache page mappings.
2224 		bool ignoreTopCachePageFlags
2225 			= topCache->temporary && topCache->RefCount() == 2;
2226 
2227 		area->address_space->TranslationMap()->UnmapArea(area,
2228 			deletingAddressSpace, ignoreTopCachePageFlags);
2229 	}
2230 
2231 	if (!area->cache->temporary)
2232 		area->cache->WriteModified();
2233 
2234 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2235 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2236 
2237 	arch_vm_unset_memory_type(area);
2238 	addressSpace->RemoveArea(area, allocationFlags);
2239 	addressSpace->Put();
2240 
2241 	area->cache->RemoveArea(area);
2242 	area->cache->ReleaseRef();
2243 
2244 	addressSpace->DeleteArea(area, allocationFlags);
2245 }
2246 
2247 
2248 status_t
2249 vm_delete_area(team_id team, area_id id, bool kernel)
2250 {
2251 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2252 
2253 	// lock the address space and make sure the area isn't wired
2254 	AddressSpaceWriteLocker locker;
2255 	VMArea* area;
2256 	AreaCacheLocker cacheLocker;
2257 
2258 	do {
2259 		status_t status = locker.SetFromArea(team, id, area);
2260 		if (status != B_OK)
2261 			return status;
2262 
2263 		cacheLocker.SetTo(area);
2264 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2265 
2266 	cacheLocker.Unlock();
2267 
2268 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2269 		return B_NOT_ALLOWED;
2270 
2271 	delete_area(locker.AddressSpace(), area, false);
2272 	return B_OK;
2273 }
2274 
2275 
2276 /*!	Creates a new cache on top of given cache, moves all areas from
2277 	the old cache to the new one, and changes the protection of all affected
2278 	areas' pages to read-only. If requested, wired pages are moved up to the
2279 	new cache and copies are added to the old cache in their place.
2280 	Preconditions:
2281 	- The given cache must be locked.
2282 	- All of the cache's areas' address spaces must be read locked.
2283 	- Either the cache must not have any wired ranges or a page reservation for
2284 	  all wired pages must be provided, so they can be copied.
2285 
2286 	\param lowerCache The cache on top of which a new cache shall be created.
2287 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2288 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2289 		has wired page. The wired pages are copied in this case.
2290 */
2291 static status_t
2292 vm_copy_on_write_area(VMCache* lowerCache,
2293 	vm_page_reservation* wiredPagesReservation)
2294 {
2295 	VMCache* upperCache;
2296 
2297 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2298 
2299 	// We need to separate the cache from its areas. The cache goes one level
2300 	// deeper and we create a new cache inbetween.
2301 
2302 	// create an anonymous cache
2303 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2304 		0, dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2305 		VM_PRIORITY_USER);
2306 	if (status != B_OK)
2307 		return status;
2308 
2309 	upperCache->Lock();
2310 
2311 	upperCache->temporary = 1;
2312 	upperCache->virtual_base = lowerCache->virtual_base;
2313 	upperCache->virtual_end = lowerCache->virtual_end;
2314 
2315 	// transfer the lower cache areas to the upper cache
2316 	rw_lock_write_lock(&sAreaCacheLock);
2317 	upperCache->TransferAreas(lowerCache);
2318 	rw_lock_write_unlock(&sAreaCacheLock);
2319 
2320 	lowerCache->AddConsumer(upperCache);
2321 
2322 	// We now need to remap all pages from all of the cache's areas read-only,
2323 	// so that a copy will be created on next write access. If there are wired
2324 	// pages, we keep their protection, move them to the upper cache and create
2325 	// copies for the lower cache.
2326 	if (wiredPagesReservation != NULL) {
2327 		// We need to handle wired pages -- iterate through the cache's pages.
2328 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2329 				vm_page* page = it.Next();) {
2330 			if (page->WiredCount() > 0) {
2331 				// allocate a new page and copy the wired one
2332 				vm_page* copiedPage = vm_page_allocate_page(
2333 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2334 
2335 				vm_memcpy_physical_page(
2336 					copiedPage->physical_page_number * B_PAGE_SIZE,
2337 					page->physical_page_number * B_PAGE_SIZE);
2338 
2339 				// move the wired page to the upper cache (note: removing is OK
2340 				// with the SplayTree iterator) and insert the copy
2341 				upperCache->MovePage(page);
2342 				lowerCache->InsertPage(copiedPage,
2343 					page->cache_offset * B_PAGE_SIZE);
2344 
2345 				DEBUG_PAGE_ACCESS_END(copiedPage);
2346 			} else {
2347 				// Change the protection of this page in all areas.
2348 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2349 						tempArea = tempArea->cache_next) {
2350 					// The area must be readable in the same way it was
2351 					// previously writable.
2352 					uint32 protection = B_KERNEL_READ_AREA;
2353 					if ((tempArea->protection & B_READ_AREA) != 0)
2354 						protection |= B_READ_AREA;
2355 
2356 					VMTranslationMap* map
2357 						= tempArea->address_space->TranslationMap();
2358 					map->Lock();
2359 					map->ProtectPage(tempArea,
2360 						virtual_page_address(tempArea, page), protection);
2361 					map->Unlock();
2362 				}
2363 			}
2364 		}
2365 	} else {
2366 		ASSERT(lowerCache->WiredPagesCount() == 0);
2367 
2368 		// just change the protection of all areas
2369 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2370 				tempArea = tempArea->cache_next) {
2371 			// The area must be readable in the same way it was previously
2372 			// writable.
2373 			uint32 protection = B_KERNEL_READ_AREA;
2374 			if ((tempArea->protection & B_READ_AREA) != 0)
2375 				protection |= B_READ_AREA;
2376 
2377 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2378 			map->Lock();
2379 			map->ProtectArea(tempArea, protection);
2380 			map->Unlock();
2381 		}
2382 	}
2383 
2384 	vm_area_put_locked_cache(upperCache);
2385 
2386 	return B_OK;
2387 }
2388 
2389 
2390 area_id
2391 vm_copy_area(team_id team, const char* name, void** _address,
2392 	uint32 addressSpec, uint32 protection, area_id sourceID)
2393 {
2394 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2395 
2396 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2397 		// set the same protection for the kernel as for userland
2398 		protection |= B_KERNEL_READ_AREA;
2399 		if (writableCopy)
2400 			protection |= B_KERNEL_WRITE_AREA;
2401 	}
2402 
2403 	// Do the locking: target address space, all address spaces associated with
2404 	// the source cache, and the cache itself.
2405 	MultiAddressSpaceLocker locker;
2406 	VMAddressSpace* targetAddressSpace;
2407 	VMCache* cache;
2408 	VMArea* source;
2409 	AreaCacheLocker cacheLocker;
2410 	status_t status;
2411 	bool sharedArea;
2412 
2413 	page_num_t wiredPages = 0;
2414 	vm_page_reservation wiredPagesReservation;
2415 
2416 	bool restart;
2417 	do {
2418 		restart = false;
2419 
2420 		locker.Unset();
2421 		status = locker.AddTeam(team, true, &targetAddressSpace);
2422 		if (status == B_OK) {
2423 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2424 				&cache);
2425 		}
2426 		if (status != B_OK)
2427 			return status;
2428 
2429 		cacheLocker.SetTo(cache, true);	// already locked
2430 
2431 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2432 
2433 		page_num_t oldWiredPages = wiredPages;
2434 		wiredPages = 0;
2435 
2436 		// If the source area isn't shared, count the number of wired pages in
2437 		// the cache and reserve as many pages.
2438 		if (!sharedArea) {
2439 			wiredPages = cache->WiredPagesCount();
2440 
2441 			if (wiredPages > oldWiredPages) {
2442 				cacheLocker.Unlock();
2443 				locker.Unlock();
2444 
2445 				if (oldWiredPages > 0)
2446 					vm_page_unreserve_pages(&wiredPagesReservation);
2447 
2448 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2449 					VM_PRIORITY_USER);
2450 
2451 				restart = true;
2452 			}
2453 		} else if (oldWiredPages > 0)
2454 			vm_page_unreserve_pages(&wiredPagesReservation);
2455 	} while (restart);
2456 
2457 	// unreserve pages later
2458 	struct PagesUnreserver {
2459 		PagesUnreserver(vm_page_reservation* reservation)
2460 			:
2461 			fReservation(reservation)
2462 		{
2463 		}
2464 
2465 		~PagesUnreserver()
2466 		{
2467 			if (fReservation != NULL)
2468 				vm_page_unreserve_pages(fReservation);
2469 		}
2470 
2471 	private:
2472 		vm_page_reservation*	fReservation;
2473 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2474 
2475 	if (addressSpec == B_CLONE_ADDRESS) {
2476 		addressSpec = B_EXACT_ADDRESS;
2477 		*_address = (void*)source->Base();
2478 	}
2479 
2480 	// First, create a cache on top of the source area, respectively use the
2481 	// existing one, if this is a shared area.
2482 
2483 	VMArea* target;
2484 	virtual_address_restrictions addressRestrictions = {};
2485 	addressRestrictions.address = *_address;
2486 	addressRestrictions.address_specification = addressSpec;
2487 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2488 		name, source->Size(), source->wiring, protection,
2489 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2490 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2491 		&addressRestrictions, true, &target, _address);
2492 	if (status < B_OK)
2493 		return status;
2494 
2495 	if (sharedArea) {
2496 		// The new area uses the old area's cache, but map_backing_store()
2497 		// hasn't acquired a ref. So we have to do that now.
2498 		cache->AcquireRefLocked();
2499 	}
2500 
2501 	// If the source area is writable, we need to move it one layer up as well
2502 
2503 	if (!sharedArea) {
2504 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2505 			// TODO: do something more useful if this fails!
2506 			if (vm_copy_on_write_area(cache,
2507 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2508 				panic("vm_copy_on_write_area() failed!\n");
2509 			}
2510 		}
2511 	}
2512 
2513 	// we return the ID of the newly created area
2514 	return target->id;
2515 }
2516 
2517 
2518 static status_t
2519 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2520 	bool kernel)
2521 {
2522 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2523 		"%#lx)\n", team, areaID, newProtection));
2524 
2525 	if (!arch_vm_supports_protection(newProtection))
2526 		return B_NOT_SUPPORTED;
2527 
2528 	bool becomesWritable
2529 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2530 
2531 	// lock address spaces and cache
2532 	MultiAddressSpaceLocker locker;
2533 	VMCache* cache;
2534 	VMArea* area;
2535 	status_t status;
2536 	AreaCacheLocker cacheLocker;
2537 	bool isWritable;
2538 
2539 	bool restart;
2540 	do {
2541 		restart = false;
2542 
2543 		locker.Unset();
2544 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2545 		if (status != B_OK)
2546 			return status;
2547 
2548 		cacheLocker.SetTo(cache, true);	// already locked
2549 
2550 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2551 			return B_NOT_ALLOWED;
2552 
2553 		if (area->protection == newProtection)
2554 			return B_OK;
2555 
2556 		if (team != VMAddressSpace::KernelID()
2557 			&& area->address_space->ID() != team) {
2558 			// unless you're the kernel, you are only allowed to set
2559 			// the protection of your own areas
2560 			return B_NOT_ALLOWED;
2561 		}
2562 
2563 		isWritable
2564 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2565 
2566 		// Make sure the area (respectively, if we're going to call
2567 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2568 		// wired ranges.
2569 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2570 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2571 					otherArea = otherArea->cache_next) {
2572 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2573 					restart = true;
2574 					break;
2575 				}
2576 			}
2577 		} else {
2578 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2579 				restart = true;
2580 		}
2581 	} while (restart);
2582 
2583 	bool changePageProtection = true;
2584 	bool changeTopCachePagesOnly = false;
2585 
2586 	if (isWritable && !becomesWritable) {
2587 		// writable -> !writable
2588 
2589 		if (cache->source != NULL && cache->temporary) {
2590 			if (cache->CountWritableAreas(area) == 0) {
2591 				// Since this cache now lives from the pages in its source cache,
2592 				// we can change the cache's commitment to take only those pages
2593 				// into account that really are in this cache.
2594 
2595 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2596 					team == VMAddressSpace::KernelID()
2597 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2598 
2599 				// TODO: we may be able to join with our source cache, if
2600 				// count == 0
2601 			}
2602 		}
2603 
2604 		// If only the writability changes, we can just remap the pages of the
2605 		// top cache, since the pages of lower caches are mapped read-only
2606 		// anyway. That's advantageous only, if the number of pages in the cache
2607 		// is significantly smaller than the number of pages in the area,
2608 		// though.
2609 		if (newProtection
2610 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2611 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2612 			changeTopCachePagesOnly = true;
2613 		}
2614 	} else if (!isWritable && becomesWritable) {
2615 		// !writable -> writable
2616 
2617 		if (!cache->consumers.IsEmpty()) {
2618 			// There are consumers -- we have to insert a new cache. Fortunately
2619 			// vm_copy_on_write_area() does everything that's needed.
2620 			changePageProtection = false;
2621 			status = vm_copy_on_write_area(cache, NULL);
2622 		} else {
2623 			// No consumers, so we don't need to insert a new one.
2624 			if (cache->source != NULL && cache->temporary) {
2625 				// the cache's commitment must contain all possible pages
2626 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2627 					team == VMAddressSpace::KernelID()
2628 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2629 			}
2630 
2631 			if (status == B_OK && cache->source != NULL) {
2632 				// There's a source cache, hence we can't just change all pages'
2633 				// protection or we might allow writing into pages belonging to
2634 				// a lower cache.
2635 				changeTopCachePagesOnly = true;
2636 			}
2637 		}
2638 	} else {
2639 		// we don't have anything special to do in all other cases
2640 	}
2641 
2642 	if (status == B_OK) {
2643 		// remap existing pages in this cache
2644 		if (changePageProtection) {
2645 			VMTranslationMap* map = area->address_space->TranslationMap();
2646 			map->Lock();
2647 
2648 			if (changeTopCachePagesOnly) {
2649 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2650 				page_num_t lastPageOffset
2651 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2652 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2653 						vm_page* page = it.Next();) {
2654 					if (page->cache_offset >= firstPageOffset
2655 						&& page->cache_offset <= lastPageOffset) {
2656 						addr_t address = virtual_page_address(area, page);
2657 						map->ProtectPage(area, address, newProtection);
2658 					}
2659 				}
2660 			} else
2661 				map->ProtectArea(area, newProtection);
2662 
2663 			map->Unlock();
2664 		}
2665 
2666 		area->protection = newProtection;
2667 	}
2668 
2669 	return status;
2670 }
2671 
2672 
2673 status_t
2674 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2675 {
2676 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2677 	if (addressSpace == NULL)
2678 		return B_BAD_TEAM_ID;
2679 
2680 	VMTranslationMap* map = addressSpace->TranslationMap();
2681 
2682 	map->Lock();
2683 	uint32 dummyFlags;
2684 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2685 	map->Unlock();
2686 
2687 	addressSpace->Put();
2688 	return status;
2689 }
2690 
2691 
2692 /*!	The page's cache must be locked.
2693 */
2694 bool
2695 vm_test_map_modification(vm_page* page)
2696 {
2697 	if (page->modified)
2698 		return true;
2699 
2700 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2701 	vm_page_mapping* mapping;
2702 	while ((mapping = iterator.Next()) != NULL) {
2703 		VMArea* area = mapping->area;
2704 		VMTranslationMap* map = area->address_space->TranslationMap();
2705 
2706 		phys_addr_t physicalAddress;
2707 		uint32 flags;
2708 		map->Lock();
2709 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2710 		map->Unlock();
2711 
2712 		if ((flags & PAGE_MODIFIED) != 0)
2713 			return true;
2714 	}
2715 
2716 	return false;
2717 }
2718 
2719 
2720 /*!	The page's cache must be locked.
2721 */
2722 void
2723 vm_clear_map_flags(vm_page* page, uint32 flags)
2724 {
2725 	if ((flags & PAGE_ACCESSED) != 0)
2726 		page->accessed = false;
2727 	if ((flags & PAGE_MODIFIED) != 0)
2728 		page->modified = false;
2729 
2730 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2731 	vm_page_mapping* mapping;
2732 	while ((mapping = iterator.Next()) != NULL) {
2733 		VMArea* area = mapping->area;
2734 		VMTranslationMap* map = area->address_space->TranslationMap();
2735 
2736 		map->Lock();
2737 		map->ClearFlags(virtual_page_address(area, page), flags);
2738 		map->Unlock();
2739 	}
2740 }
2741 
2742 
2743 /*!	Removes all mappings from a page.
2744 	After you've called this function, the page is unmapped from memory and
2745 	the page's \c accessed and \c modified flags have been updated according
2746 	to the state of the mappings.
2747 	The page's cache must be locked.
2748 */
2749 void
2750 vm_remove_all_page_mappings(vm_page* page)
2751 {
2752 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2753 		VMArea* area = mapping->area;
2754 		VMTranslationMap* map = area->address_space->TranslationMap();
2755 		addr_t address = virtual_page_address(area, page);
2756 		map->UnmapPage(area, address, false);
2757 	}
2758 }
2759 
2760 
2761 int32
2762 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2763 {
2764 	int32 count = 0;
2765 
2766 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2767 	vm_page_mapping* mapping;
2768 	while ((mapping = iterator.Next()) != NULL) {
2769 		VMArea* area = mapping->area;
2770 		VMTranslationMap* map = area->address_space->TranslationMap();
2771 
2772 		bool modified;
2773 		if (map->ClearAccessedAndModified(area,
2774 				virtual_page_address(area, page), false, modified)) {
2775 			count++;
2776 		}
2777 
2778 		page->modified |= modified;
2779 	}
2780 
2781 
2782 	if (page->accessed) {
2783 		count++;
2784 		page->accessed = false;
2785 	}
2786 
2787 	return count;
2788 }
2789 
2790 
2791 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2792 	mappings.
2793 	The function iterates through the page mappings and removes them until
2794 	encountering one that has been accessed. From then on it will continue to
2795 	iterate, but only clear the accessed flag of the mapping. The page's
2796 	\c modified bit will be updated accordingly, the \c accessed bit will be
2797 	cleared.
2798 	\return The number of mapping accessed bits encountered, including the
2799 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2800 		of the page have been removed.
2801 */
2802 int32
2803 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2804 {
2805 	ASSERT(page->WiredCount() == 0);
2806 
2807 	if (page->accessed)
2808 		return vm_clear_page_mapping_accessed_flags(page);
2809 
2810 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2811 		VMArea* area = mapping->area;
2812 		VMTranslationMap* map = area->address_space->TranslationMap();
2813 		addr_t address = virtual_page_address(area, page);
2814 		bool modified = false;
2815 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2816 			page->accessed = true;
2817 			page->modified |= modified;
2818 			return vm_clear_page_mapping_accessed_flags(page);
2819 		}
2820 		page->modified |= modified;
2821 	}
2822 
2823 	return 0;
2824 }
2825 
2826 
2827 static int
2828 display_mem(int argc, char** argv)
2829 {
2830 	bool physical = false;
2831 	addr_t copyAddress;
2832 	int32 displayWidth;
2833 	int32 itemSize;
2834 	int32 num = -1;
2835 	addr_t address;
2836 	int i = 1, j;
2837 
2838 	if (argc > 1 && argv[1][0] == '-') {
2839 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2840 			physical = true;
2841 			i++;
2842 		} else
2843 			i = 99;
2844 	}
2845 
2846 	if (argc < i + 1 || argc > i + 2) {
2847 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2848 			"\tdl - 8 bytes\n"
2849 			"\tdw - 4 bytes\n"
2850 			"\tds - 2 bytes\n"
2851 			"\tdb - 1 byte\n"
2852 			"\tstring - a whole string\n"
2853 			"  -p or --physical only allows memory from a single page to be "
2854 			"displayed.\n");
2855 		return 0;
2856 	}
2857 
2858 	address = parse_expression(argv[i]);
2859 
2860 	if (argc > i + 1)
2861 		num = parse_expression(argv[i + 1]);
2862 
2863 	// build the format string
2864 	if (strcmp(argv[0], "db") == 0) {
2865 		itemSize = 1;
2866 		displayWidth = 16;
2867 	} else if (strcmp(argv[0], "ds") == 0) {
2868 		itemSize = 2;
2869 		displayWidth = 8;
2870 	} else if (strcmp(argv[0], "dw") == 0) {
2871 		itemSize = 4;
2872 		displayWidth = 4;
2873 	} else if (strcmp(argv[0], "dl") == 0) {
2874 		itemSize = 8;
2875 		displayWidth = 2;
2876 	} else if (strcmp(argv[0], "string") == 0) {
2877 		itemSize = 1;
2878 		displayWidth = -1;
2879 	} else {
2880 		kprintf("display_mem called in an invalid way!\n");
2881 		return 0;
2882 	}
2883 
2884 	if (num <= 0)
2885 		num = displayWidth;
2886 
2887 	void* physicalPageHandle = NULL;
2888 
2889 	if (physical) {
2890 		int32 offset = address & (B_PAGE_SIZE - 1);
2891 		if (num * itemSize + offset > B_PAGE_SIZE) {
2892 			num = (B_PAGE_SIZE - offset) / itemSize;
2893 			kprintf("NOTE: number of bytes has been cut to page size\n");
2894 		}
2895 
2896 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2897 
2898 		if (vm_get_physical_page_debug(address, &copyAddress,
2899 				&physicalPageHandle) != B_OK) {
2900 			kprintf("getting the hardware page failed.");
2901 			return 0;
2902 		}
2903 
2904 		address += offset;
2905 		copyAddress += offset;
2906 	} else
2907 		copyAddress = address;
2908 
2909 	if (!strcmp(argv[0], "string")) {
2910 		kprintf("%p \"", (char*)copyAddress);
2911 
2912 		// string mode
2913 		for (i = 0; true; i++) {
2914 			char c;
2915 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2916 					!= B_OK
2917 				|| c == '\0') {
2918 				break;
2919 			}
2920 
2921 			if (c == '\n')
2922 				kprintf("\\n");
2923 			else if (c == '\t')
2924 				kprintf("\\t");
2925 			else {
2926 				if (!isprint(c))
2927 					c = '.';
2928 
2929 				kprintf("%c", c);
2930 			}
2931 		}
2932 
2933 		kprintf("\"\n");
2934 	} else {
2935 		// number mode
2936 		for (i = 0; i < num; i++) {
2937 			uint32 value;
2938 
2939 			if ((i % displayWidth) == 0) {
2940 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2941 				if (i != 0)
2942 					kprintf("\n");
2943 
2944 				kprintf("[0x%lx]  ", address + i * itemSize);
2945 
2946 				for (j = 0; j < displayed; j++) {
2947 					char c;
2948 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2949 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2950 						displayed = j;
2951 						break;
2952 					}
2953 					if (!isprint(c))
2954 						c = '.';
2955 
2956 					kprintf("%c", c);
2957 				}
2958 				if (num > displayWidth) {
2959 					// make sure the spacing in the last line is correct
2960 					for (j = displayed; j < displayWidth * itemSize; j++)
2961 						kprintf(" ");
2962 				}
2963 				kprintf("  ");
2964 			}
2965 
2966 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2967 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2968 				kprintf("read fault");
2969 				break;
2970 			}
2971 
2972 			switch (itemSize) {
2973 				case 1:
2974 					kprintf(" %02x", *(uint8*)&value);
2975 					break;
2976 				case 2:
2977 					kprintf(" %04x", *(uint16*)&value);
2978 					break;
2979 				case 4:
2980 					kprintf(" %08lx", *(uint32*)&value);
2981 					break;
2982 				case 8:
2983 					kprintf(" %016Lx", *(uint64*)&value);
2984 					break;
2985 			}
2986 		}
2987 
2988 		kprintf("\n");
2989 	}
2990 
2991 	if (physical) {
2992 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2993 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2994 	}
2995 	return 0;
2996 }
2997 
2998 
2999 static void
3000 dump_cache_tree_recursively(VMCache* cache, int level,
3001 	VMCache* highlightCache)
3002 {
3003 	// print this cache
3004 	for (int i = 0; i < level; i++)
3005 		kprintf("  ");
3006 	if (cache == highlightCache)
3007 		kprintf("%p <--\n", cache);
3008 	else
3009 		kprintf("%p\n", cache);
3010 
3011 	// recursively print its consumers
3012 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3013 			VMCache* consumer = it.Next();) {
3014 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3015 	}
3016 }
3017 
3018 
3019 static int
3020 dump_cache_tree(int argc, char** argv)
3021 {
3022 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3023 		kprintf("usage: %s <address>\n", argv[0]);
3024 		return 0;
3025 	}
3026 
3027 	addr_t address = parse_expression(argv[1]);
3028 	if (address == 0)
3029 		return 0;
3030 
3031 	VMCache* cache = (VMCache*)address;
3032 	VMCache* root = cache;
3033 
3034 	// find the root cache (the transitive source)
3035 	while (root->source != NULL)
3036 		root = root->source;
3037 
3038 	dump_cache_tree_recursively(root, 0, cache);
3039 
3040 	return 0;
3041 }
3042 
3043 
3044 const char*
3045 vm_cache_type_to_string(int32 type)
3046 {
3047 	switch (type) {
3048 		case CACHE_TYPE_RAM:
3049 			return "RAM";
3050 		case CACHE_TYPE_DEVICE:
3051 			return "device";
3052 		case CACHE_TYPE_VNODE:
3053 			return "vnode";
3054 		case CACHE_TYPE_NULL:
3055 			return "null";
3056 
3057 		default:
3058 			return "unknown";
3059 	}
3060 }
3061 
3062 
3063 #if DEBUG_CACHE_LIST
3064 
3065 static void
3066 update_cache_info_recursively(VMCache* cache, cache_info& info)
3067 {
3068 	info.page_count += cache->page_count;
3069 	if (cache->type == CACHE_TYPE_RAM)
3070 		info.committed += cache->committed_size;
3071 
3072 	// recurse
3073 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3074 			VMCache* consumer = it.Next();) {
3075 		update_cache_info_recursively(consumer, info);
3076 	}
3077 }
3078 
3079 
3080 static int
3081 cache_info_compare_page_count(const void* _a, const void* _b)
3082 {
3083 	const cache_info* a = (const cache_info*)_a;
3084 	const cache_info* b = (const cache_info*)_b;
3085 	if (a->page_count == b->page_count)
3086 		return 0;
3087 	return a->page_count < b->page_count ? 1 : -1;
3088 }
3089 
3090 
3091 static int
3092 cache_info_compare_committed(const void* _a, const void* _b)
3093 {
3094 	const cache_info* a = (const cache_info*)_a;
3095 	const cache_info* b = (const cache_info*)_b;
3096 	if (a->committed == b->committed)
3097 		return 0;
3098 	return a->committed < b->committed ? 1 : -1;
3099 }
3100 
3101 
3102 static void
3103 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3104 {
3105 	for (int i = 0; i < level; i++)
3106 		kprintf("  ");
3107 
3108 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
3109 		vm_cache_type_to_string(cache->type), cache->virtual_base,
3110 		cache->virtual_end, cache->page_count);
3111 
3112 	if (level == 0)
3113 		kprintf("/%lu", info.page_count);
3114 
3115 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3116 		kprintf(", committed: %lld", cache->committed_size);
3117 
3118 		if (level == 0)
3119 			kprintf("/%lu", info.committed);
3120 	}
3121 
3122 	// areas
3123 	if (cache->areas != NULL) {
3124 		VMArea* area = cache->areas;
3125 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3126 			area->address_space->ID());
3127 
3128 		while (area->cache_next != NULL) {
3129 			area = area->cache_next;
3130 			kprintf(", %ld", area->id);
3131 		}
3132 	}
3133 
3134 	kputs("\n");
3135 
3136 	// recurse
3137 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3138 			VMCache* consumer = it.Next();) {
3139 		dump_caches_recursively(consumer, info, level + 1);
3140 	}
3141 }
3142 
3143 
3144 static int
3145 dump_caches(int argc, char** argv)
3146 {
3147 	if (sCacheInfoTable == NULL) {
3148 		kprintf("No cache info table!\n");
3149 		return 0;
3150 	}
3151 
3152 	bool sortByPageCount = true;
3153 
3154 	for (int32 i = 1; i < argc; i++) {
3155 		if (strcmp(argv[i], "-c") == 0) {
3156 			sortByPageCount = false;
3157 		} else {
3158 			print_debugger_command_usage(argv[0]);
3159 			return 0;
3160 		}
3161 	}
3162 
3163 	uint32 totalCount = 0;
3164 	uint32 rootCount = 0;
3165 	off_t totalCommitted = 0;
3166 	page_num_t totalPages = 0;
3167 
3168 	VMCache* cache = gDebugCacheList;
3169 	while (cache) {
3170 		totalCount++;
3171 		if (cache->source == NULL) {
3172 			cache_info stackInfo;
3173 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3174 				? sCacheInfoTable[rootCount] : stackInfo;
3175 			rootCount++;
3176 			info.cache = cache;
3177 			info.page_count = 0;
3178 			info.committed = 0;
3179 			update_cache_info_recursively(cache, info);
3180 			totalCommitted += info.committed;
3181 			totalPages += info.page_count;
3182 		}
3183 
3184 		cache = cache->debug_next;
3185 	}
3186 
3187 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3188 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3189 			sortByPageCount
3190 				? &cache_info_compare_page_count
3191 				: &cache_info_compare_committed);
3192 	}
3193 
3194 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3195 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3196 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3197 		"tree...\n\n", totalCount, rootCount,
3198 		sortByPageCount ? "page count" : "committed size");
3199 
3200 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3201 		for (uint32 i = 0; i < rootCount; i++) {
3202 			cache_info& info = sCacheInfoTable[i];
3203 			dump_caches_recursively(info.cache, info, 0);
3204 		}
3205 	} else
3206 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3207 
3208 	return 0;
3209 }
3210 
3211 #endif	// DEBUG_CACHE_LIST
3212 
3213 
3214 static int
3215 dump_cache(int argc, char** argv)
3216 {
3217 	VMCache* cache;
3218 	bool showPages = false;
3219 	int i = 1;
3220 
3221 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3222 		kprintf("usage: %s [-ps] <address>\n"
3223 			"  if -p is specified, all pages are shown, if -s is used\n"
3224 			"  only the cache info is shown respectively.\n", argv[0]);
3225 		return 0;
3226 	}
3227 	while (argv[i][0] == '-') {
3228 		char* arg = argv[i] + 1;
3229 		while (arg[0]) {
3230 			if (arg[0] == 'p')
3231 				showPages = true;
3232 			arg++;
3233 		}
3234 		i++;
3235 	}
3236 	if (argv[i] == NULL) {
3237 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3238 		return 0;
3239 	}
3240 
3241 	addr_t address = parse_expression(argv[i]);
3242 	if (address == 0)
3243 		return 0;
3244 
3245 	cache = (VMCache*)address;
3246 
3247 	cache->Dump(showPages);
3248 
3249 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3250 
3251 	return 0;
3252 }
3253 
3254 
3255 static void
3256 dump_area_struct(VMArea* area, bool mappings)
3257 {
3258 	kprintf("AREA: %p\n", area);
3259 	kprintf("name:\t\t'%s'\n", area->name);
3260 	kprintf("owner:\t\t0x%lx\n", area->address_space->ID());
3261 	kprintf("id:\t\t0x%lx\n", area->id);
3262 	kprintf("base:\t\t0x%lx\n", area->Base());
3263 	kprintf("size:\t\t0x%lx\n", area->Size());
3264 	kprintf("protection:\t0x%lx\n", area->protection);
3265 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3266 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3267 	kprintf("cache:\t\t%p\n", area->cache);
3268 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3269 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3270 	kprintf("cache_next:\t%p\n", area->cache_next);
3271 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3272 
3273 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3274 	if (mappings) {
3275 		kprintf("page mappings:\n");
3276 		while (iterator.HasNext()) {
3277 			vm_page_mapping* mapping = iterator.Next();
3278 			kprintf("  %p", mapping->page);
3279 		}
3280 		kprintf("\n");
3281 	} else {
3282 		uint32 count = 0;
3283 		while (iterator.Next() != NULL) {
3284 			count++;
3285 		}
3286 		kprintf("page mappings:\t%lu\n", count);
3287 	}
3288 }
3289 
3290 
3291 static int
3292 dump_area(int argc, char** argv)
3293 {
3294 	bool mappings = false;
3295 	bool found = false;
3296 	int32 index = 1;
3297 	VMArea* area;
3298 	addr_t num;
3299 
3300 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3301 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3302 			"All areas matching either id/address/name are listed. You can\n"
3303 			"force to check only a specific item by prefixing the specifier\n"
3304 			"with the id/contains/address/name keywords.\n"
3305 			"-m shows the area's mappings as well.\n");
3306 		return 0;
3307 	}
3308 
3309 	if (!strcmp(argv[1], "-m")) {
3310 		mappings = true;
3311 		index++;
3312 	}
3313 
3314 	int32 mode = 0xf;
3315 	if (!strcmp(argv[index], "id"))
3316 		mode = 1;
3317 	else if (!strcmp(argv[index], "contains"))
3318 		mode = 2;
3319 	else if (!strcmp(argv[index], "name"))
3320 		mode = 4;
3321 	else if (!strcmp(argv[index], "address"))
3322 		mode = 0;
3323 	if (mode != 0xf)
3324 		index++;
3325 
3326 	if (index >= argc) {
3327 		kprintf("No area specifier given.\n");
3328 		return 0;
3329 	}
3330 
3331 	num = parse_expression(argv[index]);
3332 
3333 	if (mode == 0) {
3334 		dump_area_struct((struct VMArea*)num, mappings);
3335 	} else {
3336 		// walk through the area list, looking for the arguments as a name
3337 
3338 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3339 		while ((area = it.Next()) != NULL) {
3340 			if (((mode & 4) != 0 && area->name != NULL
3341 					&& !strcmp(argv[index], area->name))
3342 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3343 					|| (((mode & 2) != 0 && area->Base() <= num
3344 						&& area->Base() + area->Size() > num))))) {
3345 				dump_area_struct(area, mappings);
3346 				found = true;
3347 			}
3348 		}
3349 
3350 		if (!found)
3351 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3352 	}
3353 
3354 	return 0;
3355 }
3356 
3357 
3358 static int
3359 dump_area_list(int argc, char** argv)
3360 {
3361 	VMArea* area;
3362 	const char* name = NULL;
3363 	int32 id = 0;
3364 
3365 	if (argc > 1) {
3366 		id = parse_expression(argv[1]);
3367 		if (id == 0)
3368 			name = argv[1];
3369 	}
3370 
3371 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3372 
3373 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3374 	while ((area = it.Next()) != NULL) {
3375 		if ((id != 0 && area->address_space->ID() != id)
3376 			|| (name != NULL && strstr(area->name, name) == NULL))
3377 			continue;
3378 
3379 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
3380 			(void*)area->Base(), (void*)area->Size(), area->protection,
3381 			area->wiring, area->name);
3382 	}
3383 	return 0;
3384 }
3385 
3386 
3387 static int
3388 dump_available_memory(int argc, char** argv)
3389 {
3390 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3391 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3392 	return 0;
3393 }
3394 
3395 
3396 /*!	Deletes all areas and reserved regions in the given address space.
3397 
3398 	The caller must ensure that none of the areas has any wired ranges.
3399 
3400 	\param addressSpace The address space.
3401 	\param deletingAddressSpace \c true, if the address space is in the process
3402 		of being deleted.
3403 */
3404 void
3405 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3406 {
3407 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3408 		addressSpace->ID()));
3409 
3410 	addressSpace->WriteLock();
3411 
3412 	// remove all reserved areas in this address space
3413 	addressSpace->UnreserveAllAddressRanges(0);
3414 
3415 	// delete all the areas in this address space
3416 	while (VMArea* area = addressSpace->FirstArea()) {
3417 		ASSERT(!area->IsWired());
3418 		delete_area(addressSpace, area, deletingAddressSpace);
3419 	}
3420 
3421 	addressSpace->WriteUnlock();
3422 }
3423 
3424 
3425 static area_id
3426 vm_area_for(addr_t address, bool kernel)
3427 {
3428 	team_id team;
3429 	if (IS_USER_ADDRESS(address)) {
3430 		// we try the user team address space, if any
3431 		team = VMAddressSpace::CurrentID();
3432 		if (team < 0)
3433 			return team;
3434 	} else
3435 		team = VMAddressSpace::KernelID();
3436 
3437 	AddressSpaceReadLocker locker(team);
3438 	if (!locker.IsLocked())
3439 		return B_BAD_TEAM_ID;
3440 
3441 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3442 	if (area != NULL) {
3443 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3444 			return B_ERROR;
3445 
3446 		return area->id;
3447 	}
3448 
3449 	return B_ERROR;
3450 }
3451 
3452 
3453 /*!	Frees physical pages that were used during the boot process.
3454 	\a end is inclusive.
3455 */
3456 static void
3457 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3458 {
3459 	// free all physical pages in the specified range
3460 
3461 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3462 		phys_addr_t physicalAddress;
3463 		uint32 flags;
3464 
3465 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3466 			&& (flags & PAGE_PRESENT) != 0) {
3467 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3468 			if (page != NULL && page->State() != PAGE_STATE_FREE
3469 					 && page->State() != PAGE_STATE_CLEAR
3470 					 && page->State() != PAGE_STATE_UNUSED) {
3471 				DEBUG_PAGE_ACCESS_START(page);
3472 				vm_page_set_state(page, PAGE_STATE_FREE);
3473 			}
3474 		}
3475 	}
3476 
3477 	// unmap the memory
3478 	map->Unmap(start, end);
3479 }
3480 
3481 
3482 void
3483 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3484 {
3485 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3486 	addr_t end = start + (size - 1);
3487 	addr_t lastEnd = start;
3488 
3489 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3490 		(void*)start, (void*)end));
3491 
3492 	// The areas are sorted in virtual address space order, so
3493 	// we just have to find the holes between them that fall
3494 	// into the area we should dispose
3495 
3496 	map->Lock();
3497 
3498 	for (VMAddressSpace::AreaIterator it
3499 				= VMAddressSpace::Kernel()->GetAreaIterator();
3500 			VMArea* area = it.Next();) {
3501 		addr_t areaStart = area->Base();
3502 		addr_t areaEnd = areaStart + (area->Size() - 1);
3503 
3504 		if (areaEnd < start)
3505 			continue;
3506 
3507 		if (areaStart > end) {
3508 			// we are done, the area is already beyond of what we have to free
3509 			break;
3510 		}
3511 
3512 		if (areaStart > lastEnd) {
3513 			// this is something we can free
3514 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3515 				(void*)areaStart));
3516 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3517 		}
3518 
3519 		if (areaEnd >= end) {
3520 			lastEnd = areaEnd;
3521 				// no +1 to prevent potential overflow
3522 			break;
3523 		}
3524 
3525 		lastEnd = areaEnd + 1;
3526 	}
3527 
3528 	if (lastEnd < end) {
3529 		// we can also get rid of some space at the end of the area
3530 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3531 			(void*)end));
3532 		unmap_and_free_physical_pages(map, lastEnd, end);
3533 	}
3534 
3535 	map->Unlock();
3536 }
3537 
3538 
3539 static void
3540 create_preloaded_image_areas(struct preloaded_image* image)
3541 {
3542 	char name[B_OS_NAME_LENGTH];
3543 	void* address;
3544 	int32 length;
3545 
3546 	// use file name to create a good area name
3547 	char* fileName = strrchr(image->name, '/');
3548 	if (fileName == NULL)
3549 		fileName = image->name;
3550 	else
3551 		fileName++;
3552 
3553 	length = strlen(fileName);
3554 	// make sure there is enough space for the suffix
3555 	if (length > 25)
3556 		length = 25;
3557 
3558 	memcpy(name, fileName, length);
3559 	strcpy(name + length, "_text");
3560 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3561 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3562 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3563 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3564 		// this will later be remapped read-only/executable by the
3565 		// ELF initialization code
3566 
3567 	strcpy(name + length, "_data");
3568 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3569 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3570 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3571 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3572 }
3573 
3574 
3575 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3576 	Any boot loader resources contained in that arguments must not be accessed
3577 	anymore past this point.
3578 */
3579 void
3580 vm_free_kernel_args(kernel_args* args)
3581 {
3582 	uint32 i;
3583 
3584 	TRACE(("vm_free_kernel_args()\n"));
3585 
3586 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3587 		area_id area = area_for((void*)args->kernel_args_range[i].start);
3588 		if (area >= B_OK)
3589 			delete_area(area);
3590 	}
3591 }
3592 
3593 
3594 static void
3595 allocate_kernel_args(kernel_args* args)
3596 {
3597 	TRACE(("allocate_kernel_args()\n"));
3598 
3599 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3600 		void* address = (void*)args->kernel_args_range[i].start;
3601 
3602 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3603 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3604 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3605 	}
3606 }
3607 
3608 
3609 static void
3610 unreserve_boot_loader_ranges(kernel_args* args)
3611 {
3612 	TRACE(("unreserve_boot_loader_ranges()\n"));
3613 
3614 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3615 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3616 			(void*)args->virtual_allocated_range[i].start,
3617 			args->virtual_allocated_range[i].size);
3618 	}
3619 }
3620 
3621 
3622 static void
3623 reserve_boot_loader_ranges(kernel_args* args)
3624 {
3625 	TRACE(("reserve_boot_loader_ranges()\n"));
3626 
3627 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3628 		void* address = (void*)args->virtual_allocated_range[i].start;
3629 
3630 		// If the address is no kernel address, we just skip it. The
3631 		// architecture specific code has to deal with it.
3632 		if (!IS_KERNEL_ADDRESS(address)) {
3633 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3634 				address, args->virtual_allocated_range[i].size);
3635 			continue;
3636 		}
3637 
3638 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3639 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3640 		if (status < B_OK)
3641 			panic("could not reserve boot loader ranges\n");
3642 	}
3643 }
3644 
3645 
3646 static addr_t
3647 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3648 {
3649 	size = PAGE_ALIGN(size);
3650 
3651 	// find a slot in the virtual allocation addr range
3652 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3653 		// check to see if the space between this one and the last is big enough
3654 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3655 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3656 			+ args->virtual_allocated_range[i - 1].size;
3657 
3658 		addr_t base = alignment > 0
3659 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3660 
3661 		if (base >= KERNEL_BASE && base < rangeStart
3662 				&& rangeStart - base >= size) {
3663 			args->virtual_allocated_range[i - 1].size
3664 				+= base + size - previousRangeEnd;
3665 			return base;
3666 		}
3667 	}
3668 
3669 	// we hadn't found one between allocation ranges. this is ok.
3670 	// see if there's a gap after the last one
3671 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3672 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3673 		+ args->virtual_allocated_range[lastEntryIndex].size;
3674 	addr_t base = alignment > 0
3675 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3676 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3677 		args->virtual_allocated_range[lastEntryIndex].size
3678 			+= base + size - lastRangeEnd;
3679 		return base;
3680 	}
3681 
3682 	// see if there's a gap before the first one
3683 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3684 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3685 		base = rangeStart - size;
3686 		if (alignment > 0)
3687 			base = ROUNDDOWN(base, alignment);
3688 
3689 		if (base >= KERNEL_BASE) {
3690 			args->virtual_allocated_range[0].start = base;
3691 			args->virtual_allocated_range[0].size += rangeStart - base;
3692 			return base;
3693 		}
3694 	}
3695 
3696 	return 0;
3697 }
3698 
3699 
3700 static bool
3701 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3702 {
3703 	// TODO: horrible brute-force method of determining if the page can be
3704 	// allocated
3705 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3706 		if (address >= args->physical_memory_range[i].start
3707 			&& address < args->physical_memory_range[i].start
3708 				+ args->physical_memory_range[i].size)
3709 			return true;
3710 	}
3711 	return false;
3712 }
3713 
3714 
3715 page_num_t
3716 vm_allocate_early_physical_page(kernel_args* args)
3717 {
3718 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3719 		phys_addr_t nextPage;
3720 
3721 		nextPage = args->physical_allocated_range[i].start
3722 			+ args->physical_allocated_range[i].size;
3723 		// see if the page after the next allocated paddr run can be allocated
3724 		if (i + 1 < args->num_physical_allocated_ranges
3725 			&& args->physical_allocated_range[i + 1].size != 0) {
3726 			// see if the next page will collide with the next allocated range
3727 			if (nextPage >= args->physical_allocated_range[i+1].start)
3728 				continue;
3729 		}
3730 		// see if the next physical page fits in the memory block
3731 		if (is_page_in_physical_memory_range(args, nextPage)) {
3732 			// we got one!
3733 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3734 			return nextPage / B_PAGE_SIZE;
3735 		}
3736 	}
3737 
3738 	return 0;
3739 		// could not allocate a block
3740 }
3741 
3742 
3743 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3744 	allocate some pages before the VM is completely up.
3745 */
3746 addr_t
3747 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3748 	uint32 attributes, addr_t alignment)
3749 {
3750 	if (physicalSize > virtualSize)
3751 		physicalSize = virtualSize;
3752 
3753 	// find the vaddr to allocate at
3754 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3755 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3756 
3757 	// map the pages
3758 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3759 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3760 		if (physicalAddress == 0)
3761 			panic("error allocating early page!\n");
3762 
3763 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3764 
3765 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3766 			physicalAddress * B_PAGE_SIZE, attributes,
3767 			&vm_allocate_early_physical_page);
3768 	}
3769 
3770 	return virtualBase;
3771 }
3772 
3773 
3774 /*!	The main entrance point to initialize the VM. */
3775 status_t
3776 vm_init(kernel_args* args)
3777 {
3778 	struct preloaded_image* image;
3779 	void* address;
3780 	status_t err = 0;
3781 	uint32 i;
3782 
3783 	TRACE(("vm_init: entry\n"));
3784 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3785 	err = arch_vm_init(args);
3786 
3787 	// initialize some globals
3788 	vm_page_init_num_pages(args);
3789 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3790 
3791 	slab_init(args);
3792 
3793 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3794 	size_t heapSize = INITIAL_HEAP_SIZE;
3795 	// try to accomodate low memory systems
3796 	while (heapSize > sAvailableMemory / 8)
3797 		heapSize /= 2;
3798 	if (heapSize < 1024 * 1024)
3799 		panic("vm_init: go buy some RAM please.");
3800 
3801 	// map in the new heap and initialize it
3802 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3803 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3804 	TRACE(("heap at 0x%lx\n", heapBase));
3805 	heap_init(heapBase, heapSize);
3806 #endif
3807 
3808 	// initialize the free page list and physical page mapper
3809 	vm_page_init(args);
3810 
3811 	// initialize the cache allocators
3812 	vm_cache_init(args);
3813 
3814 	{
3815 		status_t error = VMAreaHash::Init();
3816 		if (error != B_OK)
3817 			panic("vm_init: error initializing area hash table\n");
3818 	}
3819 
3820 	VMAddressSpace::Init();
3821 	reserve_boot_loader_ranges(args);
3822 
3823 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3824 	heap_init_post_area();
3825 #endif
3826 
3827 	// Do any further initialization that the architecture dependant layers may
3828 	// need now
3829 	arch_vm_translation_map_init_post_area(args);
3830 	arch_vm_init_post_area(args);
3831 	vm_page_init_post_area(args);
3832 	slab_init_post_area();
3833 
3834 	// allocate areas to represent stuff that already exists
3835 
3836 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3837 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3838 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3839 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3840 #endif
3841 
3842 	allocate_kernel_args(args);
3843 
3844 	create_preloaded_image_areas(&args->kernel_image);
3845 
3846 	// allocate areas for preloaded images
3847 	for (image = args->preloaded_images; image != NULL; image = image->next)
3848 		create_preloaded_image_areas(image);
3849 
3850 	// allocate kernel stacks
3851 	for (i = 0; i < args->num_cpus; i++) {
3852 		char name[64];
3853 
3854 		sprintf(name, "idle thread %lu kstack", i + 1);
3855 		address = (void*)args->cpu_kstack[i].start;
3856 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3857 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3858 	}
3859 
3860 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3861 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3862 
3863 #if PARANOID_KERNEL_MALLOC
3864 	vm_block_address_range("uninitialized heap memory",
3865 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3866 #endif
3867 #if PARANOID_KERNEL_FREE
3868 	vm_block_address_range("freed heap memory",
3869 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3870 #endif
3871 
3872 	// create the object cache for the page mappings
3873 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3874 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3875 		NULL, NULL);
3876 	if (gPageMappingsObjectCache == NULL)
3877 		panic("failed to create page mappings object cache");
3878 
3879 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3880 
3881 #if DEBUG_CACHE_LIST
3882 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
3883 		virtual_address_restrictions virtualRestrictions = {};
3884 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
3885 		physical_address_restrictions physicalRestrictions = {};
3886 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
3887 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3888 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
3889 			CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions,
3890 			(void**)&sCacheInfoTable);
3891 	}
3892 #endif	// DEBUG_CACHE_LIST
3893 
3894 	// add some debugger commands
3895 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3896 	add_debugger_command("area", &dump_area,
3897 		"Dump info about a particular area");
3898 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3899 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3900 #if DEBUG_CACHE_LIST
3901 	if (sCacheInfoTable != NULL) {
3902 		add_debugger_command_etc("caches", &dump_caches,
3903 			"List all VMCache trees",
3904 			"[ \"-c\" ]\n"
3905 			"All cache trees are listed sorted in decreasing order by number "
3906 				"of\n"
3907 			"used pages or, if \"-c\" is specified, by size of committed "
3908 				"memory.\n",
3909 			0);
3910 	}
3911 #endif
3912 	add_debugger_command("avail", &dump_available_memory,
3913 		"Dump available memory");
3914 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3915 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3916 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3917 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3918 	add_debugger_command("string", &display_mem, "dump strings");
3919 
3920 	TRACE(("vm_init: exit\n"));
3921 
3922 	vm_cache_init_post_heap();
3923 
3924 	return err;
3925 }
3926 
3927 
3928 status_t
3929 vm_init_post_sem(kernel_args* args)
3930 {
3931 	// This frees all unused boot loader resources and makes its space available
3932 	// again
3933 	arch_vm_init_end(args);
3934 	unreserve_boot_loader_ranges(args);
3935 
3936 	// fill in all of the semaphores that were not allocated before
3937 	// since we're still single threaded and only the kernel address space
3938 	// exists, it isn't that hard to find all of the ones we need to create
3939 
3940 	arch_vm_translation_map_init_post_sem(args);
3941 
3942 	slab_init_post_sem();
3943 
3944 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3945 	heap_init_post_sem();
3946 #endif
3947 
3948 	return B_OK;
3949 }
3950 
3951 
3952 status_t
3953 vm_init_post_thread(kernel_args* args)
3954 {
3955 	vm_page_init_post_thread(args);
3956 	slab_init_post_thread();
3957 	return heap_init_post_thread();
3958 }
3959 
3960 
3961 status_t
3962 vm_init_post_modules(kernel_args* args)
3963 {
3964 	return arch_vm_init_post_modules(args);
3965 }
3966 
3967 
3968 void
3969 permit_page_faults(void)
3970 {
3971 	Thread* thread = thread_get_current_thread();
3972 	if (thread != NULL)
3973 		atomic_add(&thread->page_faults_allowed, 1);
3974 }
3975 
3976 
3977 void
3978 forbid_page_faults(void)
3979 {
3980 	Thread* thread = thread_get_current_thread();
3981 	if (thread != NULL)
3982 		atomic_add(&thread->page_faults_allowed, -1);
3983 }
3984 
3985 
3986 status_t
3987 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3988 	addr_t* newIP)
3989 {
3990 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3991 		faultAddress));
3992 
3993 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
3994 
3995 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
3996 	VMAddressSpace* addressSpace = NULL;
3997 
3998 	status_t status = B_OK;
3999 	*newIP = 0;
4000 	atomic_add((int32*)&sPageFaults, 1);
4001 
4002 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4003 		addressSpace = VMAddressSpace::GetKernel();
4004 	} else if (IS_USER_ADDRESS(pageAddress)) {
4005 		addressSpace = VMAddressSpace::GetCurrent();
4006 		if (addressSpace == NULL) {
4007 			if (!isUser) {
4008 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4009 					"memory!\n");
4010 				status = B_BAD_ADDRESS;
4011 				TPF(PageFaultError(-1,
4012 					VMPageFaultTracing
4013 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4014 			} else {
4015 				// XXX weird state.
4016 				panic("vm_page_fault: non kernel thread accessing user memory "
4017 					"that doesn't exist!\n");
4018 				status = B_BAD_ADDRESS;
4019 			}
4020 		}
4021 	} else {
4022 		// the hit was probably in the 64k DMZ between kernel and user space
4023 		// this keeps a user space thread from passing a buffer that crosses
4024 		// into kernel space
4025 		status = B_BAD_ADDRESS;
4026 		TPF(PageFaultError(-1,
4027 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4028 	}
4029 
4030 	if (status == B_OK) {
4031 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser,
4032 			NULL);
4033 	}
4034 
4035 	if (status < B_OK) {
4036 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4037 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
4038 			strerror(status), address, faultAddress, isWrite, isUser,
4039 			thread_get_current_thread_id());
4040 		if (!isUser) {
4041 			Thread* thread = thread_get_current_thread();
4042 			if (thread != NULL && thread->fault_handler != 0) {
4043 				// this will cause the arch dependant page fault handler to
4044 				// modify the IP on the interrupt frame or whatever to return
4045 				// to this address
4046 				*newIP = thread->fault_handler;
4047 			} else {
4048 				// unhandled page fault in the kernel
4049 				panic("vm_page_fault: unhandled page fault in kernel space at "
4050 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4051 			}
4052 		} else {
4053 #if 1
4054 			addressSpace->ReadLock();
4055 
4056 			// TODO: remove me once we have proper userland debugging support
4057 			// (and tools)
4058 			VMArea* area = addressSpace->LookupArea(faultAddress);
4059 
4060 			Thread* thread = thread_get_current_thread();
4061 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
4062 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
4063 				thread->name, thread->id, thread->team->Name(),
4064 				thread->team->id, isWrite ? "write" : "read", address,
4065 				faultAddress, area ? area->name : "???",
4066 				faultAddress - (area ? area->Base() : 0x0));
4067 
4068 			// We can print a stack trace of the userland thread here.
4069 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4070 // fault and someone is already waiting for a write lock on the same address
4071 // space. This thread will then try to acquire the lock again and will
4072 // be queued after the writer.
4073 #	if 0
4074 			if (area) {
4075 				struct stack_frame {
4076 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4077 						struct stack_frame*	previous;
4078 						void*				return_address;
4079 					#else
4080 						// ...
4081 					#warning writeme
4082 					#endif
4083 				} frame;
4084 #		ifdef __INTEL__
4085 				struct iframe* iframe = i386_get_user_iframe();
4086 				if (iframe == NULL)
4087 					panic("iframe is NULL!");
4088 
4089 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4090 					sizeof(struct stack_frame));
4091 #		elif defined(__POWERPC__)
4092 				struct iframe* iframe = ppc_get_user_iframe();
4093 				if (iframe == NULL)
4094 					panic("iframe is NULL!");
4095 
4096 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4097 					sizeof(struct stack_frame));
4098 #		else
4099 #			warning "vm_page_fault() stack trace won't work"
4100 				status = B_ERROR;
4101 #		endif
4102 
4103 				dprintf("stack trace:\n");
4104 				int32 maxFrames = 50;
4105 				while (status == B_OK && --maxFrames >= 0
4106 						&& frame.return_address != NULL) {
4107 					dprintf("  %p", frame.return_address);
4108 					area = addressSpace->LookupArea(
4109 						(addr_t)frame.return_address);
4110 					if (area) {
4111 						dprintf(" (%s + %#lx)", area->name,
4112 							(addr_t)frame.return_address - area->Base());
4113 					}
4114 					dprintf("\n");
4115 
4116 					status = user_memcpy(&frame, frame.previous,
4117 						sizeof(struct stack_frame));
4118 				}
4119 			}
4120 #	endif	// 0 (stack trace)
4121 
4122 			addressSpace->ReadUnlock();
4123 #endif
4124 
4125 			// TODO: the fault_callback is a temporary solution for vm86
4126 			if (thread->fault_callback == NULL
4127 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4128 				// If the thread has a signal handler for SIGSEGV, we simply
4129 				// send it the signal. Otherwise we notify the user debugger
4130 				// first.
4131 				struct sigaction action;
4132 				if ((sigaction(SIGSEGV, NULL, &action) == 0
4133 						&& action.sa_handler != SIG_DFL
4134 						&& action.sa_handler != SIG_IGN)
4135 					|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4136 						SIGSEGV)) {
4137 					Signal signal(SIGSEGV,
4138 						status == B_PERMISSION_DENIED
4139 							? SEGV_ACCERR : SEGV_MAPERR,
4140 						EFAULT, thread->team->id);
4141 					signal.SetAddress((void*)address);
4142 					send_signal_to_thread(thread, signal, 0);
4143 				}
4144 			}
4145 		}
4146 	}
4147 
4148 	if (addressSpace != NULL)
4149 		addressSpace->Put();
4150 
4151 	return B_HANDLED_INTERRUPT;
4152 }
4153 
4154 
4155 struct PageFaultContext {
4156 	AddressSpaceReadLocker	addressSpaceLocker;
4157 	VMCacheChainLocker		cacheChainLocker;
4158 
4159 	VMTranslationMap*		map;
4160 	VMCache*				topCache;
4161 	off_t					cacheOffset;
4162 	vm_page_reservation		reservation;
4163 	bool					isWrite;
4164 
4165 	// return values
4166 	vm_page*				page;
4167 	bool					restart;
4168 
4169 
4170 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4171 		:
4172 		addressSpaceLocker(addressSpace, true),
4173 		map(addressSpace->TranslationMap()),
4174 		isWrite(isWrite)
4175 	{
4176 	}
4177 
4178 	~PageFaultContext()
4179 	{
4180 		UnlockAll();
4181 		vm_page_unreserve_pages(&reservation);
4182 	}
4183 
4184 	void Prepare(VMCache* topCache, off_t cacheOffset)
4185 	{
4186 		this->topCache = topCache;
4187 		this->cacheOffset = cacheOffset;
4188 		page = NULL;
4189 		restart = false;
4190 
4191 		cacheChainLocker.SetTo(topCache);
4192 	}
4193 
4194 	void UnlockAll(VMCache* exceptCache = NULL)
4195 	{
4196 		topCache = NULL;
4197 		addressSpaceLocker.Unlock();
4198 		cacheChainLocker.Unlock(exceptCache);
4199 	}
4200 };
4201 
4202 
4203 /*!	Gets the page that should be mapped into the area.
4204 	Returns an error code other than \c B_OK, if the page couldn't be found or
4205 	paged in. The locking state of the address space and the caches is undefined
4206 	in that case.
4207 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4208 	had to unlock the address space and all caches and is supposed to be called
4209 	again.
4210 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4211 	found. It is returned in \c context.page. The address space will still be
4212 	locked as well as all caches starting from the top cache to at least the
4213 	cache the page lives in.
4214 */
4215 static status_t
4216 fault_get_page(PageFaultContext& context)
4217 {
4218 	VMCache* cache = context.topCache;
4219 	VMCache* lastCache = NULL;
4220 	vm_page* page = NULL;
4221 
4222 	while (cache != NULL) {
4223 		// We already hold the lock of the cache at this point.
4224 
4225 		lastCache = cache;
4226 
4227 		page = cache->LookupPage(context.cacheOffset);
4228 		if (page != NULL && page->busy) {
4229 			// page must be busy -- wait for it to become unbusy
4230 			context.UnlockAll(cache);
4231 			cache->ReleaseRefLocked();
4232 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4233 
4234 			// restart the whole process
4235 			context.restart = true;
4236 			return B_OK;
4237 		}
4238 
4239 		if (page != NULL)
4240 			break;
4241 
4242 		// The current cache does not contain the page we're looking for.
4243 
4244 		// see if the backing store has it
4245 		if (cache->HasPage(context.cacheOffset)) {
4246 			// insert a fresh page and mark it busy -- we're going to read it in
4247 			page = vm_page_allocate_page(&context.reservation,
4248 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4249 			cache->InsertPage(page, context.cacheOffset);
4250 
4251 			// We need to unlock all caches and the address space while reading
4252 			// the page in. Keep a reference to the cache around.
4253 			cache->AcquireRefLocked();
4254 			context.UnlockAll();
4255 
4256 			// read the page in
4257 			generic_io_vec vec;
4258 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4259 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4260 
4261 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4262 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4263 
4264 			cache->Lock();
4265 
4266 			if (status < B_OK) {
4267 				// on error remove and free the page
4268 				dprintf("reading page from cache %p returned: %s!\n",
4269 					cache, strerror(status));
4270 
4271 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4272 				cache->RemovePage(page);
4273 				vm_page_set_state(page, PAGE_STATE_FREE);
4274 
4275 				cache->ReleaseRefAndUnlock();
4276 				return status;
4277 			}
4278 
4279 			// mark the page unbusy again
4280 			cache->MarkPageUnbusy(page);
4281 
4282 			DEBUG_PAGE_ACCESS_END(page);
4283 
4284 			// Since we needed to unlock everything temporarily, the area
4285 			// situation might have changed. So we need to restart the whole
4286 			// process.
4287 			cache->ReleaseRefAndUnlock();
4288 			context.restart = true;
4289 			return B_OK;
4290 		}
4291 
4292 		cache = context.cacheChainLocker.LockSourceCache();
4293 	}
4294 
4295 	if (page == NULL) {
4296 		// There was no adequate page, determine the cache for a clean one.
4297 		// Read-only pages come in the deepest cache, only the top most cache
4298 		// may have direct write access.
4299 		cache = context.isWrite ? context.topCache : lastCache;
4300 
4301 		// allocate a clean page
4302 		page = vm_page_allocate_page(&context.reservation,
4303 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4304 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4305 			page->physical_page_number));
4306 
4307 		// insert the new page into our cache
4308 		cache->InsertPage(page, context.cacheOffset);
4309 	} else if (page->Cache() != context.topCache && context.isWrite) {
4310 		// We have a page that has the data we want, but in the wrong cache
4311 		// object so we need to copy it and stick it into the top cache.
4312 		vm_page* sourcePage = page;
4313 
4314 		// TODO: If memory is low, it might be a good idea to steal the page
4315 		// from our source cache -- if possible, that is.
4316 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4317 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4318 
4319 		// To not needlessly kill concurrency we unlock all caches but the top
4320 		// one while copying the page. Lacking another mechanism to ensure that
4321 		// the source page doesn't disappear, we mark it busy.
4322 		sourcePage->busy = true;
4323 		context.cacheChainLocker.UnlockKeepRefs(true);
4324 
4325 		// copy the page
4326 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4327 			sourcePage->physical_page_number * B_PAGE_SIZE);
4328 
4329 		context.cacheChainLocker.RelockCaches(true);
4330 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4331 
4332 		// insert the new page into our cache
4333 		context.topCache->InsertPage(page, context.cacheOffset);
4334 	} else
4335 		DEBUG_PAGE_ACCESS_START(page);
4336 
4337 	context.page = page;
4338 	return B_OK;
4339 }
4340 
4341 
4342 /*!	Makes sure the address in the given address space is mapped.
4343 
4344 	\param addressSpace The address space.
4345 	\param originalAddress The address. Doesn't need to be page aligned.
4346 	\param isWrite If \c true the address shall be write-accessible.
4347 	\param isUser If \c true the access is requested by a userland team.
4348 	\param wirePage On success, if non \c NULL, the wired count of the page
4349 		mapped at the given address is incremented and the page is returned
4350 		via this parameter.
4351 	\param wiredRange If given, this wiredRange is ignored when checking whether
4352 		an already mapped page at the virtual address can be unmapped.
4353 	\return \c B_OK on success, another error code otherwise.
4354 */
4355 static status_t
4356 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4357 	bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange)
4358 {
4359 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4360 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4361 
4362 	PageFaultContext context(addressSpace, isWrite);
4363 
4364 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4365 	status_t status = B_OK;
4366 
4367 	addressSpace->IncrementFaultCount();
4368 
4369 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4370 	// the pages upfront makes sure we don't have any cache locked, so that the
4371 	// page daemon/thief can do their job without problems.
4372 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4373 		originalAddress);
4374 	context.addressSpaceLocker.Unlock();
4375 	vm_page_reserve_pages(&context.reservation, reservePages,
4376 		addressSpace == VMAddressSpace::Kernel()
4377 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4378 
4379 	while (true) {
4380 		context.addressSpaceLocker.Lock();
4381 
4382 		// get the area the fault was in
4383 		VMArea* area = addressSpace->LookupArea(address);
4384 		if (area == NULL) {
4385 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4386 				"space\n", originalAddress);
4387 			TPF(PageFaultError(-1,
4388 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4389 			status = B_BAD_ADDRESS;
4390 			break;
4391 		}
4392 
4393 		// check permissions
4394 		uint32 protection = get_area_page_protection(area, address);
4395 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4396 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
4397 				(void*)originalAddress);
4398 			TPF(PageFaultError(area->id,
4399 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4400 			status = B_PERMISSION_DENIED;
4401 			break;
4402 		}
4403 		if (isWrite && (protection
4404 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4405 			dprintf("write access attempted on write-protected area 0x%lx at"
4406 				" %p\n", area->id, (void*)originalAddress);
4407 			TPF(PageFaultError(area->id,
4408 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4409 			status = B_PERMISSION_DENIED;
4410 			break;
4411 		} else if (!isWrite && (protection
4412 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4413 			dprintf("read access attempted on read-protected area 0x%lx at"
4414 				" %p\n", area->id, (void*)originalAddress);
4415 			TPF(PageFaultError(area->id,
4416 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4417 			status = B_PERMISSION_DENIED;
4418 			break;
4419 		}
4420 
4421 		// We have the area, it was a valid access, so let's try to resolve the
4422 		// page fault now.
4423 		// At first, the top most cache from the area is investigated.
4424 
4425 		context.Prepare(vm_area_get_locked_cache(area),
4426 			address - area->Base() + area->cache_offset);
4427 
4428 		// See if this cache has a fault handler -- this will do all the work
4429 		// for us.
4430 		{
4431 			// Note, since the page fault is resolved with interrupts enabled,
4432 			// the fault handler could be called more than once for the same
4433 			// reason -- the store must take this into account.
4434 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4435 			if (status != B_BAD_HANDLER)
4436 				break;
4437 		}
4438 
4439 		// The top most cache has no fault handler, so let's see if the cache or
4440 		// its sources already have the page we're searching for (we're going
4441 		// from top to bottom).
4442 		status = fault_get_page(context);
4443 		if (status != B_OK) {
4444 			TPF(PageFaultError(area->id, status));
4445 			break;
4446 		}
4447 
4448 		if (context.restart)
4449 			continue;
4450 
4451 		// All went fine, all there is left to do is to map the page into the
4452 		// address space.
4453 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4454 			context.page));
4455 
4456 		// If the page doesn't reside in the area's cache, we need to make sure
4457 		// it's mapped in read-only, so that we cannot overwrite someone else's
4458 		// data (copy-on-write)
4459 		uint32 newProtection = protection;
4460 		if (context.page->Cache() != context.topCache && !isWrite)
4461 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4462 
4463 		bool unmapPage = false;
4464 		bool mapPage = true;
4465 
4466 		// check whether there's already a page mapped at the address
4467 		context.map->Lock();
4468 
4469 		phys_addr_t physicalAddress;
4470 		uint32 flags;
4471 		vm_page* mappedPage = NULL;
4472 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4473 			&& (flags & PAGE_PRESENT) != 0
4474 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4475 				!= NULL) {
4476 			// Yep there's already a page. If it's ours, we can simply adjust
4477 			// its protection. Otherwise we have to unmap it.
4478 			if (mappedPage == context.page) {
4479 				context.map->ProtectPage(area, address, newProtection);
4480 					// Note: We assume that ProtectPage() is atomic (i.e.
4481 					// the page isn't temporarily unmapped), otherwise we'd have
4482 					// to make sure it isn't wired.
4483 				mapPage = false;
4484 			} else
4485 				unmapPage = true;
4486 		}
4487 
4488 		context.map->Unlock();
4489 
4490 		if (unmapPage) {
4491 			// If the page is wired, we can't unmap it. Wait until it is unwired
4492 			// again and restart.
4493 			VMAreaUnwiredWaiter waiter;
4494 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4495 					wiredRange)) {
4496 				// unlock everything and wait
4497 				context.UnlockAll();
4498 				waiter.waitEntry.Wait();
4499 				continue;
4500 			}
4501 
4502 			// Note: The mapped page is a page of a lower cache. We are
4503 			// guaranteed to have that cached locked, our new page is a copy of
4504 			// that page, and the page is not busy. The logic for that guarantee
4505 			// is as follows: Since the page is mapped, it must live in the top
4506 			// cache (ruled out above) or any of its lower caches, and there is
4507 			// (was before the new page was inserted) no other page in any
4508 			// cache between the top cache and the page's cache (otherwise that
4509 			// would be mapped instead). That in turn means that our algorithm
4510 			// must have found it and therefore it cannot be busy either.
4511 			DEBUG_PAGE_ACCESS_START(mappedPage);
4512 			unmap_page(area, address);
4513 			DEBUG_PAGE_ACCESS_END(mappedPage);
4514 		}
4515 
4516 		if (mapPage) {
4517 			if (map_page(area, context.page, address, newProtection,
4518 					&context.reservation) != B_OK) {
4519 				// Mapping can only fail, when the page mapping object couldn't
4520 				// be allocated. Save for the missing mapping everything is
4521 				// fine, though. If this was a regular page fault, we'll simply
4522 				// leave and probably fault again. To make sure we'll have more
4523 				// luck then, we ensure that the minimum object reserve is
4524 				// available.
4525 				DEBUG_PAGE_ACCESS_END(context.page);
4526 
4527 				context.UnlockAll();
4528 
4529 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4530 						!= B_OK) {
4531 					// Apparently the situation is serious. Let's get ourselves
4532 					// killed.
4533 					status = B_NO_MEMORY;
4534 				} else if (wirePage != NULL) {
4535 					// The caller expects us to wire the page. Since
4536 					// object_cache_reserve() succeeded, we should now be able
4537 					// to allocate a mapping structure. Restart.
4538 					continue;
4539 				}
4540 
4541 				break;
4542 			}
4543 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4544 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4545 
4546 		// also wire the page, if requested
4547 		if (wirePage != NULL && status == B_OK) {
4548 			increment_page_wired_count(context.page);
4549 			*wirePage = context.page;
4550 		}
4551 
4552 		DEBUG_PAGE_ACCESS_END(context.page);
4553 
4554 		break;
4555 	}
4556 
4557 	return status;
4558 }
4559 
4560 
4561 status_t
4562 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4563 {
4564 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4565 }
4566 
4567 status_t
4568 vm_put_physical_page(addr_t vaddr, void* handle)
4569 {
4570 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4571 }
4572 
4573 
4574 status_t
4575 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4576 	void** _handle)
4577 {
4578 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4579 }
4580 
4581 status_t
4582 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4583 {
4584 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4585 }
4586 
4587 
4588 status_t
4589 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4590 {
4591 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4592 }
4593 
4594 status_t
4595 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4596 {
4597 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4598 }
4599 
4600 
4601 void
4602 vm_get_info(system_memory_info* info)
4603 {
4604 	swap_get_info(info);
4605 
4606 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4607 	info->page_faults = sPageFaults;
4608 
4609 	MutexLocker locker(sAvailableMemoryLock);
4610 	info->free_memory = sAvailableMemory;
4611 	info->needed_memory = sNeededMemory;
4612 }
4613 
4614 
4615 uint32
4616 vm_num_page_faults(void)
4617 {
4618 	return sPageFaults;
4619 }
4620 
4621 
4622 off_t
4623 vm_available_memory(void)
4624 {
4625 	MutexLocker locker(sAvailableMemoryLock);
4626 	return sAvailableMemory;
4627 }
4628 
4629 
4630 off_t
4631 vm_available_not_needed_memory(void)
4632 {
4633 	MutexLocker locker(sAvailableMemoryLock);
4634 	return sAvailableMemory - sNeededMemory;
4635 }
4636 
4637 
4638 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4639 	debugger.
4640 */
4641 off_t
4642 vm_available_not_needed_memory_debug(void)
4643 {
4644 	return sAvailableMemory - sNeededMemory;
4645 }
4646 
4647 
4648 size_t
4649 vm_kernel_address_space_left(void)
4650 {
4651 	return VMAddressSpace::Kernel()->FreeSpace();
4652 }
4653 
4654 
4655 void
4656 vm_unreserve_memory(size_t amount)
4657 {
4658 	mutex_lock(&sAvailableMemoryLock);
4659 
4660 	sAvailableMemory += amount;
4661 
4662 	mutex_unlock(&sAvailableMemoryLock);
4663 }
4664 
4665 
4666 status_t
4667 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4668 {
4669 	size_t reserve = kMemoryReserveForPriority[priority];
4670 
4671 	MutexLocker locker(sAvailableMemoryLock);
4672 
4673 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4674 
4675 	if (sAvailableMemory >= amount + reserve) {
4676 		sAvailableMemory -= amount;
4677 		return B_OK;
4678 	}
4679 
4680 	if (timeout <= 0)
4681 		return B_NO_MEMORY;
4682 
4683 	// turn timeout into an absolute timeout
4684 	timeout += system_time();
4685 
4686 	// loop until we've got the memory or the timeout occurs
4687 	do {
4688 		sNeededMemory += amount;
4689 
4690 		// call the low resource manager
4691 		locker.Unlock();
4692 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4693 			B_ABSOLUTE_TIMEOUT, timeout);
4694 		locker.Lock();
4695 
4696 		sNeededMemory -= amount;
4697 
4698 		if (sAvailableMemory >= amount + reserve) {
4699 			sAvailableMemory -= amount;
4700 			return B_OK;
4701 		}
4702 	} while (timeout > system_time());
4703 
4704 	return B_NO_MEMORY;
4705 }
4706 
4707 
4708 status_t
4709 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4710 {
4711 	// NOTE: The caller is responsible for synchronizing calls to this function!
4712 
4713 	AddressSpaceReadLocker locker;
4714 	VMArea* area;
4715 	status_t status = locker.SetFromArea(id, area);
4716 	if (status != B_OK)
4717 		return status;
4718 
4719 	// nothing to do, if the type doesn't change
4720 	uint32 oldType = area->MemoryType();
4721 	if (type == oldType)
4722 		return B_OK;
4723 
4724 	// set the memory type of the area and the mapped pages
4725 	VMTranslationMap* map = area->address_space->TranslationMap();
4726 	map->Lock();
4727 	area->SetMemoryType(type);
4728 	map->ProtectArea(area, area->protection);
4729 	map->Unlock();
4730 
4731 	// set the physical memory type
4732 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4733 	if (error != B_OK) {
4734 		// reset the memory type of the area and the mapped pages
4735 		map->Lock();
4736 		area->SetMemoryType(oldType);
4737 		map->ProtectArea(area, area->protection);
4738 		map->Unlock();
4739 		return error;
4740 	}
4741 
4742 	return B_OK;
4743 
4744 }
4745 
4746 
4747 /*!	This function enforces some protection properties:
4748 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4749 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4750 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4751 	   and B_KERNEL_WRITE_AREA.
4752 */
4753 static void
4754 fix_protection(uint32* protection)
4755 {
4756 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4757 		if ((*protection & B_USER_PROTECTION) == 0
4758 			|| (*protection & B_WRITE_AREA) != 0)
4759 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4760 		else
4761 			*protection |= B_KERNEL_READ_AREA;
4762 	}
4763 }
4764 
4765 
4766 static void
4767 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4768 {
4769 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4770 	info->area = area->id;
4771 	info->address = (void*)area->Base();
4772 	info->size = area->Size();
4773 	info->protection = area->protection;
4774 	info->lock = B_FULL_LOCK;
4775 	info->team = area->address_space->ID();
4776 	info->copy_count = 0;
4777 	info->in_count = 0;
4778 	info->out_count = 0;
4779 		// TODO: retrieve real values here!
4780 
4781 	VMCache* cache = vm_area_get_locked_cache(area);
4782 
4783 	// Note, this is a simplification; the cache could be larger than this area
4784 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4785 
4786 	vm_area_put_locked_cache(cache);
4787 }
4788 
4789 
4790 static status_t
4791 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4792 {
4793 	// is newSize a multiple of B_PAGE_SIZE?
4794 	if (newSize & (B_PAGE_SIZE - 1))
4795 		return B_BAD_VALUE;
4796 
4797 	// lock all affected address spaces and the cache
4798 	VMArea* area;
4799 	VMCache* cache;
4800 
4801 	MultiAddressSpaceLocker locker;
4802 	AreaCacheLocker cacheLocker;
4803 
4804 	status_t status;
4805 	size_t oldSize;
4806 	bool anyKernelArea;
4807 	bool restart;
4808 
4809 	do {
4810 		anyKernelArea = false;
4811 		restart = false;
4812 
4813 		locker.Unset();
4814 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4815 		if (status != B_OK)
4816 			return status;
4817 		cacheLocker.SetTo(cache, true);	// already locked
4818 
4819 		// enforce restrictions
4820 		if (!kernel) {
4821 			if ((area->protection & B_KERNEL_AREA) != 0)
4822 				return B_NOT_ALLOWED;
4823 			// TODO: Enforce all restrictions (team, etc.)!
4824 		}
4825 
4826 		oldSize = area->Size();
4827 		if (newSize == oldSize)
4828 			return B_OK;
4829 
4830 		if (cache->type != CACHE_TYPE_RAM)
4831 			return B_NOT_ALLOWED;
4832 
4833 		if (oldSize < newSize) {
4834 			// We need to check if all areas of this cache can be resized.
4835 			for (VMArea* current = cache->areas; current != NULL;
4836 					current = current->cache_next) {
4837 				if (!current->address_space->CanResizeArea(current, newSize))
4838 					return B_ERROR;
4839 				anyKernelArea
4840 					|= current->address_space == VMAddressSpace::Kernel();
4841 			}
4842 		} else {
4843 			// We're shrinking the areas, so we must make sure the affected
4844 			// ranges are not wired.
4845 			for (VMArea* current = cache->areas; current != NULL;
4846 					current = current->cache_next) {
4847 				anyKernelArea
4848 					|= current->address_space == VMAddressSpace::Kernel();
4849 
4850 				if (wait_if_area_range_is_wired(current,
4851 						current->Base() + newSize, oldSize - newSize, &locker,
4852 						&cacheLocker)) {
4853 					restart = true;
4854 					break;
4855 				}
4856 			}
4857 		}
4858 	} while (restart);
4859 
4860 	// Okay, looks good so far, so let's do it
4861 
4862 	int priority = kernel && anyKernelArea
4863 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4864 	uint32 allocationFlags = kernel && anyKernelArea
4865 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4866 
4867 	if (oldSize < newSize) {
4868 		// Growing the cache can fail, so we do it first.
4869 		status = cache->Resize(cache->virtual_base + newSize, priority);
4870 		if (status != B_OK)
4871 			return status;
4872 	}
4873 
4874 	for (VMArea* current = cache->areas; current != NULL;
4875 			current = current->cache_next) {
4876 		status = current->address_space->ResizeArea(current, newSize,
4877 			allocationFlags);
4878 		if (status != B_OK)
4879 			break;
4880 
4881 		// We also need to unmap all pages beyond the new size, if the area has
4882 		// shrunk
4883 		if (newSize < oldSize) {
4884 			VMCacheChainLocker cacheChainLocker(cache);
4885 			cacheChainLocker.LockAllSourceCaches();
4886 
4887 			unmap_pages(current, current->Base() + newSize,
4888 				oldSize - newSize);
4889 
4890 			cacheChainLocker.Unlock(cache);
4891 		}
4892 	}
4893 
4894 	if (status == B_OK) {
4895 		// Shrink or grow individual page protections if in use.
4896 		if (area->page_protections != NULL) {
4897 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
4898 			uint8* newProtections
4899 				= (uint8*)realloc(area->page_protections, bytes);
4900 			if (newProtections == NULL)
4901 				status = B_NO_MEMORY;
4902 			else {
4903 				area->page_protections = newProtections;
4904 
4905 				if (oldSize < newSize) {
4906 					// init the additional page protections to that of the area
4907 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
4908 					uint32 areaProtection = area->protection
4909 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4910 					memset(area->page_protections + offset,
4911 						areaProtection | (areaProtection << 4), bytes - offset);
4912 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4913 						uint8& entry = area->page_protections[offset - 1];
4914 						entry = (entry & 0x0f) | (areaProtection << 4);
4915 					}
4916 				}
4917 			}
4918 		}
4919 	}
4920 
4921 	// shrinking the cache can't fail, so we do it now
4922 	if (status == B_OK && newSize < oldSize)
4923 		status = cache->Resize(cache->virtual_base + newSize, priority);
4924 
4925 	if (status != B_OK) {
4926 		// Something failed -- resize the areas back to their original size.
4927 		// This can fail, too, in which case we're seriously screwed.
4928 		for (VMArea* current = cache->areas; current != NULL;
4929 				current = current->cache_next) {
4930 			if (current->address_space->ResizeArea(current, oldSize,
4931 					allocationFlags) != B_OK) {
4932 				panic("vm_resize_area(): Failed and not being able to restore "
4933 					"original state.");
4934 			}
4935 		}
4936 
4937 		cache->Resize(cache->virtual_base + oldSize, priority);
4938 	}
4939 
4940 	// TODO: we must honour the lock restrictions of this area
4941 	return status;
4942 }
4943 
4944 
4945 status_t
4946 vm_memset_physical(phys_addr_t address, int value, size_t length)
4947 {
4948 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4949 }
4950 
4951 
4952 status_t
4953 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
4954 {
4955 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4956 }
4957 
4958 
4959 status_t
4960 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
4961 	bool user)
4962 {
4963 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4964 }
4965 
4966 
4967 void
4968 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
4969 {
4970 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4971 }
4972 
4973 
4974 /*!	Copies a range of memory directly from/to a page that might not be mapped
4975 	at the moment.
4976 
4977 	For \a unsafeMemory the current mapping (if any is ignored). The function
4978 	walks through the respective area's cache chain to find the physical page
4979 	and copies from/to it directly.
4980 	The memory range starting at \a unsafeMemory with a length of \a size bytes
4981 	must not cross a page boundary.
4982 
4983 	\param teamID The team ID identifying the address space \a unsafeMemory is
4984 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
4985 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
4986 		is passed, the address space of the thread returned by
4987 		debug_get_debugged_thread() is used.
4988 	\param unsafeMemory The start of the unsafe memory range to be copied
4989 		from/to.
4990 	\param buffer A safely accessible kernel buffer to be copied from/to.
4991 	\param size The number of bytes to be copied.
4992 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
4993 		\a unsafeMemory, the other way around otherwise.
4994 */
4995 status_t
4996 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
4997 	size_t size, bool copyToUnsafe)
4998 {
4999 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5000 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5001 		return B_BAD_VALUE;
5002 	}
5003 
5004 	// get the address space for the debugged thread
5005 	VMAddressSpace* addressSpace;
5006 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5007 		addressSpace = VMAddressSpace::Kernel();
5008 	} else if (teamID == B_CURRENT_TEAM) {
5009 		Thread* thread = debug_get_debugged_thread();
5010 		if (thread == NULL || thread->team == NULL)
5011 			return B_BAD_ADDRESS;
5012 
5013 		addressSpace = thread->team->address_space;
5014 	} else
5015 		addressSpace = VMAddressSpace::DebugGet(teamID);
5016 
5017 	if (addressSpace == NULL)
5018 		return B_BAD_ADDRESS;
5019 
5020 	// get the area
5021 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5022 	if (area == NULL)
5023 		return B_BAD_ADDRESS;
5024 
5025 	// search the page
5026 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5027 		+ area->cache_offset;
5028 	VMCache* cache = area->cache;
5029 	vm_page* page = NULL;
5030 	while (cache != NULL) {
5031 		page = cache->DebugLookupPage(cacheOffset);
5032 		if (page != NULL)
5033 			break;
5034 
5035 		// Page not found in this cache -- if it is paged out, we must not try
5036 		// to get it from lower caches.
5037 		if (cache->DebugHasPage(cacheOffset))
5038 			break;
5039 
5040 		cache = cache->source;
5041 	}
5042 
5043 	if (page == NULL)
5044 		return B_UNSUPPORTED;
5045 
5046 	// copy from/to physical memory
5047 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5048 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5049 
5050 	if (copyToUnsafe) {
5051 		if (page->Cache() != area->cache)
5052 			return B_UNSUPPORTED;
5053 
5054 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5055 	}
5056 
5057 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5058 }
5059 
5060 
5061 //	#pragma mark - kernel public API
5062 
5063 
5064 status_t
5065 user_memcpy(void* to, const void* from, size_t size)
5066 {
5067 	// don't allow address overflows
5068 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5069 		return B_BAD_ADDRESS;
5070 
5071 	if (arch_cpu_user_memcpy(to, from, size,
5072 			&thread_get_current_thread()->fault_handler) < B_OK)
5073 		return B_BAD_ADDRESS;
5074 
5075 	return B_OK;
5076 }
5077 
5078 
5079 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5080 	the string in \a to, NULL-terminating the result.
5081 
5082 	\param to Pointer to the destination C-string.
5083 	\param from Pointer to the source C-string.
5084 	\param size Size in bytes of the string buffer pointed to by \a to.
5085 
5086 	\return strlen(\a from).
5087 */
5088 ssize_t
5089 user_strlcpy(char* to, const char* from, size_t size)
5090 {
5091 	if (to == NULL && size != 0)
5092 		return B_BAD_VALUE;
5093 	if (from == NULL)
5094 		return B_BAD_ADDRESS;
5095 
5096 	// limit size to avoid address overflows
5097 	size_t maxSize = std::min(size,
5098 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5099 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5100 		// the source address might still overflow.
5101 
5102 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
5103 		&thread_get_current_thread()->fault_handler);
5104 
5105 	// If we hit the address overflow boundary, fail.
5106 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
5107 		return B_BAD_ADDRESS;
5108 
5109 	return result;
5110 }
5111 
5112 
5113 status_t
5114 user_memset(void* s, char c, size_t count)
5115 {
5116 	// don't allow address overflows
5117 	if ((addr_t)s + count < (addr_t)s)
5118 		return B_BAD_ADDRESS;
5119 
5120 	if (arch_cpu_user_memset(s, c, count,
5121 			&thread_get_current_thread()->fault_handler) < B_OK)
5122 		return B_BAD_ADDRESS;
5123 
5124 	return B_OK;
5125 }
5126 
5127 
5128 /*!	Wires a single page at the given address.
5129 
5130 	\param team The team whose address space the address belongs to. Supports
5131 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5132 		parameter is ignored.
5133 	\param address address The virtual address to wire down. Does not need to
5134 		be page aligned.
5135 	\param writable If \c true the page shall be writable.
5136 	\param info On success the info is filled in, among other things
5137 		containing the physical address the given virtual one translates to.
5138 	\return \c B_OK, when the page could be wired, another error code otherwise.
5139 */
5140 status_t
5141 vm_wire_page(team_id team, addr_t address, bool writable,
5142 	VMPageWiringInfo* info)
5143 {
5144 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5145 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5146 
5147 	// compute the page protection that is required
5148 	bool isUser = IS_USER_ADDRESS(address);
5149 	uint32 requiredProtection = PAGE_PRESENT
5150 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5151 	if (writable)
5152 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5153 
5154 	// get and read lock the address space
5155 	VMAddressSpace* addressSpace = NULL;
5156 	if (isUser) {
5157 		if (team == B_CURRENT_TEAM)
5158 			addressSpace = VMAddressSpace::GetCurrent();
5159 		else
5160 			addressSpace = VMAddressSpace::Get(team);
5161 	} else
5162 		addressSpace = VMAddressSpace::GetKernel();
5163 	if (addressSpace == NULL)
5164 		return B_ERROR;
5165 
5166 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5167 
5168 	VMTranslationMap* map = addressSpace->TranslationMap();
5169 	status_t error = B_OK;
5170 
5171 	// get the area
5172 	VMArea* area = addressSpace->LookupArea(pageAddress);
5173 	if (area == NULL) {
5174 		addressSpace->Put();
5175 		return B_BAD_ADDRESS;
5176 	}
5177 
5178 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5179 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5180 
5181 	// mark the area range wired
5182 	area->Wire(&info->range);
5183 
5184 	// Lock the area's cache chain and the translation map. Needed to look
5185 	// up the page and play with its wired count.
5186 	cacheChainLocker.LockAllSourceCaches();
5187 	map->Lock();
5188 
5189 	phys_addr_t physicalAddress;
5190 	uint32 flags;
5191 	vm_page* page;
5192 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5193 		&& (flags & requiredProtection) == requiredProtection
5194 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5195 			!= NULL) {
5196 		// Already mapped with the correct permissions -- just increment
5197 		// the page's wired count.
5198 		increment_page_wired_count(page);
5199 
5200 		map->Unlock();
5201 		cacheChainLocker.Unlock();
5202 		addressSpaceLocker.Unlock();
5203 	} else {
5204 		// Let vm_soft_fault() map the page for us, if possible. We need
5205 		// to fully unlock to avoid deadlocks. Since we have already
5206 		// wired the area itself, nothing disturbing will happen with it
5207 		// in the meantime.
5208 		map->Unlock();
5209 		cacheChainLocker.Unlock();
5210 		addressSpaceLocker.Unlock();
5211 
5212 		error = vm_soft_fault(addressSpace, pageAddress, writable, isUser,
5213 			&page, &info->range);
5214 
5215 		if (error != B_OK) {
5216 			// The page could not be mapped -- clean up.
5217 			VMCache* cache = vm_area_get_locked_cache(area);
5218 			area->Unwire(&info->range);
5219 			cache->ReleaseRefAndUnlock();
5220 			addressSpace->Put();
5221 			return error;
5222 		}
5223 	}
5224 
5225 	info->physicalAddress
5226 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5227 			+ address % B_PAGE_SIZE;
5228 	info->page = page;
5229 
5230 	return B_OK;
5231 }
5232 
5233 
5234 /*!	Unwires a single page previously wired via vm_wire_page().
5235 
5236 	\param info The same object passed to vm_wire_page() before.
5237 */
5238 void
5239 vm_unwire_page(VMPageWiringInfo* info)
5240 {
5241 	// lock the address space
5242 	VMArea* area = info->range.area;
5243 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5244 		// takes over our reference
5245 
5246 	// lock the top cache
5247 	VMCache* cache = vm_area_get_locked_cache(area);
5248 	VMCacheChainLocker cacheChainLocker(cache);
5249 
5250 	if (info->page->Cache() != cache) {
5251 		// The page is not in the top cache, so we lock the whole cache chain
5252 		// before touching the page's wired count.
5253 		cacheChainLocker.LockAllSourceCaches();
5254 	}
5255 
5256 	decrement_page_wired_count(info->page);
5257 
5258 	// remove the wired range from the range
5259 	area->Unwire(&info->range);
5260 
5261 	cacheChainLocker.Unlock();
5262 }
5263 
5264 
5265 /*!	Wires down the given address range in the specified team's address space.
5266 
5267 	If successful the function
5268 	- acquires a reference to the specified team's address space,
5269 	- adds respective wired ranges to all areas that intersect with the given
5270 	  address range,
5271 	- makes sure all pages in the given address range are mapped with the
5272 	  requested access permissions and increments their wired count.
5273 
5274 	It fails, when \a team doesn't specify a valid address space, when any part
5275 	of the specified address range is not covered by areas, when the concerned
5276 	areas don't allow mapping with the requested permissions, or when mapping
5277 	failed for another reason.
5278 
5279 	When successful the call must be balanced by a unlock_memory_etc() call with
5280 	the exact same parameters.
5281 
5282 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5283 		supported.
5284 	\param address The start of the address range to be wired.
5285 	\param numBytes The size of the address range to be wired.
5286 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5287 		requests that the range must be wired writable ("read from device
5288 		into memory").
5289 	\return \c B_OK on success, another error code otherwise.
5290 */
5291 status_t
5292 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5293 {
5294 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5295 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5296 
5297 	// compute the page protection that is required
5298 	bool isUser = IS_USER_ADDRESS(address);
5299 	bool writable = (flags & B_READ_DEVICE) == 0;
5300 	uint32 requiredProtection = PAGE_PRESENT
5301 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5302 	if (writable)
5303 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5304 
5305 	uint32 mallocFlags = isUser
5306 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5307 
5308 	// get and read lock the address space
5309 	VMAddressSpace* addressSpace = NULL;
5310 	if (isUser) {
5311 		if (team == B_CURRENT_TEAM)
5312 			addressSpace = VMAddressSpace::GetCurrent();
5313 		else
5314 			addressSpace = VMAddressSpace::Get(team);
5315 	} else
5316 		addressSpace = VMAddressSpace::GetKernel();
5317 	if (addressSpace == NULL)
5318 		return B_ERROR;
5319 
5320 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5321 
5322 	VMTranslationMap* map = addressSpace->TranslationMap();
5323 	status_t error = B_OK;
5324 
5325 	// iterate through all concerned areas
5326 	addr_t nextAddress = lockBaseAddress;
5327 	while (nextAddress != lockEndAddress) {
5328 		// get the next area
5329 		VMArea* area = addressSpace->LookupArea(nextAddress);
5330 		if (area == NULL) {
5331 			error = B_BAD_ADDRESS;
5332 			break;
5333 		}
5334 
5335 		addr_t areaStart = nextAddress;
5336 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5337 
5338 		// allocate the wired range (do that before locking the cache to avoid
5339 		// deadlocks)
5340 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5341 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5342 		if (range == NULL) {
5343 			error = B_NO_MEMORY;
5344 			break;
5345 		}
5346 
5347 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5348 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5349 
5350 		// mark the area range wired
5351 		area->Wire(range);
5352 
5353 		// Depending on the area cache type and the wiring, we may not need to
5354 		// look at the individual pages.
5355 		if (area->cache_type == CACHE_TYPE_NULL
5356 			|| area->cache_type == CACHE_TYPE_DEVICE
5357 			|| area->wiring == B_FULL_LOCK
5358 			|| area->wiring == B_CONTIGUOUS) {
5359 			nextAddress = areaEnd;
5360 			continue;
5361 		}
5362 
5363 		// Lock the area's cache chain and the translation map. Needed to look
5364 		// up pages and play with their wired count.
5365 		cacheChainLocker.LockAllSourceCaches();
5366 		map->Lock();
5367 
5368 		// iterate through the pages and wire them
5369 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5370 			phys_addr_t physicalAddress;
5371 			uint32 flags;
5372 
5373 			vm_page* page;
5374 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5375 				&& (flags & requiredProtection) == requiredProtection
5376 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5377 					!= NULL) {
5378 				// Already mapped with the correct permissions -- just increment
5379 				// the page's wired count.
5380 				increment_page_wired_count(page);
5381 			} else {
5382 				// Let vm_soft_fault() map the page for us, if possible. We need
5383 				// to fully unlock to avoid deadlocks. Since we have already
5384 				// wired the area itself, nothing disturbing will happen with it
5385 				// in the meantime.
5386 				map->Unlock();
5387 				cacheChainLocker.Unlock();
5388 				addressSpaceLocker.Unlock();
5389 
5390 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5391 					isUser, &page, range);
5392 
5393 				addressSpaceLocker.Lock();
5394 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5395 				cacheChainLocker.LockAllSourceCaches();
5396 				map->Lock();
5397 			}
5398 
5399 			if (error != B_OK)
5400 				break;
5401 		}
5402 
5403 		map->Unlock();
5404 
5405 		if (error == B_OK) {
5406 			cacheChainLocker.Unlock();
5407 		} else {
5408 			// An error occurred, so abort right here. If the current address
5409 			// is the first in this area, unwire the area, since we won't get
5410 			// to it when reverting what we've done so far.
5411 			if (nextAddress == areaStart) {
5412 				area->Unwire(range);
5413 				cacheChainLocker.Unlock();
5414 				range->~VMAreaWiredRange();
5415 				free_etc(range, mallocFlags);
5416 			} else
5417 				cacheChainLocker.Unlock();
5418 
5419 			break;
5420 		}
5421 	}
5422 
5423 	if (error != B_OK) {
5424 		// An error occurred, so unwire all that we've already wired. Note that
5425 		// even if not a single page was wired, unlock_memory_etc() is called
5426 		// to put the address space reference.
5427 		addressSpaceLocker.Unlock();
5428 		unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress,
5429 			flags);
5430 	}
5431 
5432 	return error;
5433 }
5434 
5435 
5436 status_t
5437 lock_memory(void* address, size_t numBytes, uint32 flags)
5438 {
5439 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5440 }
5441 
5442 
5443 /*!	Unwires an address range previously wired with lock_memory_etc().
5444 
5445 	Note that a call to this function must balance a previous lock_memory_etc()
5446 	call with exactly the same parameters.
5447 */
5448 status_t
5449 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5450 {
5451 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5452 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5453 
5454 	// compute the page protection that is required
5455 	bool isUser = IS_USER_ADDRESS(address);
5456 	bool writable = (flags & B_READ_DEVICE) == 0;
5457 	uint32 requiredProtection = PAGE_PRESENT
5458 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5459 	if (writable)
5460 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5461 
5462 	uint32 mallocFlags = isUser
5463 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5464 
5465 	// get and read lock the address space
5466 	VMAddressSpace* addressSpace = NULL;
5467 	if (isUser) {
5468 		if (team == B_CURRENT_TEAM)
5469 			addressSpace = VMAddressSpace::GetCurrent();
5470 		else
5471 			addressSpace = VMAddressSpace::Get(team);
5472 	} else
5473 		addressSpace = VMAddressSpace::GetKernel();
5474 	if (addressSpace == NULL)
5475 		return B_ERROR;
5476 
5477 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5478 
5479 	VMTranslationMap* map = addressSpace->TranslationMap();
5480 	status_t error = B_OK;
5481 
5482 	// iterate through all concerned areas
5483 	addr_t nextAddress = lockBaseAddress;
5484 	while (nextAddress != lockEndAddress) {
5485 		// get the next area
5486 		VMArea* area = addressSpace->LookupArea(nextAddress);
5487 		if (area == NULL) {
5488 			error = B_BAD_ADDRESS;
5489 			break;
5490 		}
5491 
5492 		addr_t areaStart = nextAddress;
5493 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5494 
5495 		// Lock the area's top cache. This is a requirement for
5496 		// VMArea::Unwire().
5497 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5498 
5499 		// Depending on the area cache type and the wiring, we may not need to
5500 		// look at the individual pages.
5501 		if (area->cache_type == CACHE_TYPE_NULL
5502 			|| area->cache_type == CACHE_TYPE_DEVICE
5503 			|| area->wiring == B_FULL_LOCK
5504 			|| area->wiring == B_CONTIGUOUS) {
5505 			// unwire the range (to avoid deadlocks we delete the range after
5506 			// unlocking the cache)
5507 			nextAddress = areaEnd;
5508 			VMAreaWiredRange* range = area->Unwire(areaStart,
5509 				areaEnd - areaStart, writable);
5510 			cacheChainLocker.Unlock();
5511 			if (range != NULL) {
5512 				range->~VMAreaWiredRange();
5513 				free_etc(range, mallocFlags);
5514 			}
5515 			continue;
5516 		}
5517 
5518 		// Lock the area's cache chain and the translation map. Needed to look
5519 		// up pages and play with their wired count.
5520 		cacheChainLocker.LockAllSourceCaches();
5521 		map->Lock();
5522 
5523 		// iterate through the pages and unwire them
5524 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5525 			phys_addr_t physicalAddress;
5526 			uint32 flags;
5527 
5528 			vm_page* page;
5529 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5530 				&& (flags & PAGE_PRESENT) != 0
5531 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5532 					!= NULL) {
5533 				// Already mapped with the correct permissions -- just increment
5534 				// the page's wired count.
5535 				decrement_page_wired_count(page);
5536 			} else {
5537 				panic("unlock_memory_etc(): Failed to unwire page: address "
5538 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5539 					nextAddress);
5540 				error = B_BAD_VALUE;
5541 				break;
5542 			}
5543 		}
5544 
5545 		map->Unlock();
5546 
5547 		// All pages are unwired. Remove the area's wired range as well (to
5548 		// avoid deadlocks we delete the range after unlocking the cache).
5549 		VMAreaWiredRange* range = area->Unwire(areaStart,
5550 			areaEnd - areaStart, writable);
5551 
5552 		cacheChainLocker.Unlock();
5553 
5554 		if (range != NULL) {
5555 			range->~VMAreaWiredRange();
5556 			free_etc(range, mallocFlags);
5557 		}
5558 
5559 		if (error != B_OK)
5560 			break;
5561 	}
5562 
5563 	// get rid of the address space reference
5564 	addressSpace->Put();
5565 
5566 	return error;
5567 }
5568 
5569 
5570 status_t
5571 unlock_memory(void* address, size_t numBytes, uint32 flags)
5572 {
5573 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5574 }
5575 
5576 
5577 /*!	Similar to get_memory_map(), but also allows to specify the address space
5578 	for the memory in question and has a saner semantics.
5579 	Returns \c B_OK when the complete range could be translated or
5580 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5581 	case the actual number of entries is written to \c *_numEntries. Any other
5582 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5583 	in this case.
5584 */
5585 status_t
5586 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5587 	physical_entry* table, uint32* _numEntries)
5588 {
5589 	uint32 numEntries = *_numEntries;
5590 	*_numEntries = 0;
5591 
5592 	VMAddressSpace* addressSpace;
5593 	addr_t virtualAddress = (addr_t)address;
5594 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5595 	phys_addr_t physicalAddress;
5596 	status_t status = B_OK;
5597 	int32 index = -1;
5598 	addr_t offset = 0;
5599 	bool interrupts = are_interrupts_enabled();
5600 
5601 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5602 		address, numBytes, numEntries));
5603 
5604 	if (numEntries == 0 || numBytes == 0)
5605 		return B_BAD_VALUE;
5606 
5607 	// in which address space is the address to be found?
5608 	if (IS_USER_ADDRESS(virtualAddress)) {
5609 		if (team == B_CURRENT_TEAM)
5610 			addressSpace = VMAddressSpace::GetCurrent();
5611 		else
5612 			addressSpace = VMAddressSpace::Get(team);
5613 	} else
5614 		addressSpace = VMAddressSpace::GetKernel();
5615 
5616 	if (addressSpace == NULL)
5617 		return B_ERROR;
5618 
5619 	VMTranslationMap* map = addressSpace->TranslationMap();
5620 
5621 	if (interrupts)
5622 		map->Lock();
5623 
5624 	while (offset < numBytes) {
5625 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5626 		uint32 flags;
5627 
5628 		if (interrupts) {
5629 			status = map->Query((addr_t)address + offset, &physicalAddress,
5630 				&flags);
5631 		} else {
5632 			status = map->QueryInterrupt((addr_t)address + offset,
5633 				&physicalAddress, &flags);
5634 		}
5635 		if (status < B_OK)
5636 			break;
5637 		if ((flags & PAGE_PRESENT) == 0) {
5638 			panic("get_memory_map() called on unmapped memory!");
5639 			return B_BAD_ADDRESS;
5640 		}
5641 
5642 		if (index < 0 && pageOffset > 0) {
5643 			physicalAddress += pageOffset;
5644 			if (bytes > B_PAGE_SIZE - pageOffset)
5645 				bytes = B_PAGE_SIZE - pageOffset;
5646 		}
5647 
5648 		// need to switch to the next physical_entry?
5649 		if (index < 0 || table[index].address
5650 				!= physicalAddress - table[index].size) {
5651 			if ((uint32)++index + 1 > numEntries) {
5652 				// table to small
5653 				break;
5654 			}
5655 			table[index].address = physicalAddress;
5656 			table[index].size = bytes;
5657 		} else {
5658 			// page does fit in current entry
5659 			table[index].size += bytes;
5660 		}
5661 
5662 		offset += bytes;
5663 	}
5664 
5665 	if (interrupts)
5666 		map->Unlock();
5667 
5668 	if (status != B_OK)
5669 		return status;
5670 
5671 	if ((uint32)index + 1 > numEntries) {
5672 		*_numEntries = index;
5673 		return B_BUFFER_OVERFLOW;
5674 	}
5675 
5676 	*_numEntries = index + 1;
5677 	return B_OK;
5678 }
5679 
5680 
5681 /*!	According to the BeBook, this function should always succeed.
5682 	This is no longer the case.
5683 */
5684 extern "C" int32
5685 __get_memory_map_haiku(const void* address, size_t numBytes,
5686 	physical_entry* table, int32 numEntries)
5687 {
5688 	uint32 entriesRead = numEntries;
5689 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5690 		table, &entriesRead);
5691 	if (error != B_OK)
5692 		return error;
5693 
5694 	// close the entry list
5695 
5696 	// if it's only one entry, we will silently accept the missing ending
5697 	if (numEntries == 1)
5698 		return B_OK;
5699 
5700 	if (entriesRead + 1 > (uint32)numEntries)
5701 		return B_BUFFER_OVERFLOW;
5702 
5703 	table[entriesRead].address = 0;
5704 	table[entriesRead].size = 0;
5705 
5706 	return B_OK;
5707 }
5708 
5709 
5710 area_id
5711 area_for(void* address)
5712 {
5713 	return vm_area_for((addr_t)address, true);
5714 }
5715 
5716 
5717 area_id
5718 find_area(const char* name)
5719 {
5720 	return VMAreaHash::Find(name);
5721 }
5722 
5723 
5724 status_t
5725 _get_area_info(area_id id, area_info* info, size_t size)
5726 {
5727 	if (size != sizeof(area_info) || info == NULL)
5728 		return B_BAD_VALUE;
5729 
5730 	AddressSpaceReadLocker locker;
5731 	VMArea* area;
5732 	status_t status = locker.SetFromArea(id, area);
5733 	if (status != B_OK)
5734 		return status;
5735 
5736 	fill_area_info(area, info, size);
5737 	return B_OK;
5738 }
5739 
5740 
5741 status_t
5742 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5743 {
5744 	addr_t nextBase = *(addr_t*)cookie;
5745 
5746 	// we're already through the list
5747 	if (nextBase == (addr_t)-1)
5748 		return B_ENTRY_NOT_FOUND;
5749 
5750 	if (team == B_CURRENT_TEAM)
5751 		team = team_get_current_team_id();
5752 
5753 	AddressSpaceReadLocker locker(team);
5754 	if (!locker.IsLocked())
5755 		return B_BAD_TEAM_ID;
5756 
5757 	VMArea* area;
5758 	for (VMAddressSpace::AreaIterator it
5759 				= locker.AddressSpace()->GetAreaIterator();
5760 			(area = it.Next()) != NULL;) {
5761 		if (area->Base() > nextBase)
5762 			break;
5763 	}
5764 
5765 	if (area == NULL) {
5766 		nextBase = (addr_t)-1;
5767 		return B_ENTRY_NOT_FOUND;
5768 	}
5769 
5770 	fill_area_info(area, info, size);
5771 	*cookie = (int32)(area->Base());
5772 		// TODO: Not 64 bit safe!
5773 
5774 	return B_OK;
5775 }
5776 
5777 
5778 status_t
5779 set_area_protection(area_id area, uint32 newProtection)
5780 {
5781 	fix_protection(&newProtection);
5782 
5783 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5784 		newProtection, true);
5785 }
5786 
5787 
5788 status_t
5789 resize_area(area_id areaID, size_t newSize)
5790 {
5791 	return vm_resize_area(areaID, newSize, true);
5792 }
5793 
5794 
5795 /*!	Transfers the specified area to a new team. The caller must be the owner
5796 	of the area.
5797 */
5798 area_id
5799 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5800 	bool kernel)
5801 {
5802 	area_info info;
5803 	status_t status = get_area_info(id, &info);
5804 	if (status != B_OK)
5805 		return status;
5806 
5807 	if (info.team != thread_get_current_thread()->team->id)
5808 		return B_PERMISSION_DENIED;
5809 
5810 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5811 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5812 	if (clonedArea < 0)
5813 		return clonedArea;
5814 
5815 	status = vm_delete_area(info.team, id, kernel);
5816 	if (status != B_OK) {
5817 		vm_delete_area(target, clonedArea, kernel);
5818 		return status;
5819 	}
5820 
5821 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5822 
5823 	return clonedArea;
5824 }
5825 
5826 
5827 extern "C" area_id
5828 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5829 	size_t numBytes, uint32 addressSpec, uint32 protection,
5830 	void** _virtualAddress)
5831 {
5832 	if (!arch_vm_supports_protection(protection))
5833 		return B_NOT_SUPPORTED;
5834 
5835 	fix_protection(&protection);
5836 
5837 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5838 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5839 		false);
5840 }
5841 
5842 
5843 area_id
5844 clone_area(const char* name, void** _address, uint32 addressSpec,
5845 	uint32 protection, area_id source)
5846 {
5847 	if ((protection & B_KERNEL_PROTECTION) == 0)
5848 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5849 
5850 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5851 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5852 }
5853 
5854 
5855 area_id
5856 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
5857 	uint32 protection, uint32 flags,
5858 	const virtual_address_restrictions* virtualAddressRestrictions,
5859 	const physical_address_restrictions* physicalAddressRestrictions,
5860 	void** _address)
5861 {
5862 	fix_protection(&protection);
5863 
5864 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5865 		virtualAddressRestrictions, physicalAddressRestrictions, true,
5866 		_address);
5867 }
5868 
5869 
5870 extern "C" area_id
5871 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5872 	size_t size, uint32 lock, uint32 protection)
5873 {
5874 	fix_protection(&protection);
5875 
5876 	virtual_address_restrictions virtualRestrictions = {};
5877 	virtualRestrictions.address = *_address;
5878 	virtualRestrictions.address_specification = addressSpec;
5879 	physical_address_restrictions physicalRestrictions = {};
5880 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5881 		lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true,
5882 		_address);
5883 }
5884 
5885 
5886 status_t
5887 delete_area(area_id area)
5888 {
5889 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5890 }
5891 
5892 
5893 //	#pragma mark - Userland syscalls
5894 
5895 
5896 status_t
5897 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5898 	addr_t size)
5899 {
5900 	// filter out some unavailable values (for userland)
5901 	switch (addressSpec) {
5902 		case B_ANY_KERNEL_ADDRESS:
5903 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5904 			return B_BAD_VALUE;
5905 	}
5906 
5907 	addr_t address;
5908 
5909 	if (!IS_USER_ADDRESS(userAddress)
5910 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5911 		return B_BAD_ADDRESS;
5912 
5913 	status_t status = vm_reserve_address_range(
5914 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5915 		RESERVED_AVOID_BASE);
5916 	if (status != B_OK)
5917 		return status;
5918 
5919 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5920 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5921 			(void*)address, size);
5922 		return B_BAD_ADDRESS;
5923 	}
5924 
5925 	return B_OK;
5926 }
5927 
5928 
5929 status_t
5930 _user_unreserve_address_range(addr_t address, addr_t size)
5931 {
5932 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5933 		(void*)address, size);
5934 }
5935 
5936 
5937 area_id
5938 _user_area_for(void* address)
5939 {
5940 	return vm_area_for((addr_t)address, false);
5941 }
5942 
5943 
5944 area_id
5945 _user_find_area(const char* userName)
5946 {
5947 	char name[B_OS_NAME_LENGTH];
5948 
5949 	if (!IS_USER_ADDRESS(userName)
5950 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5951 		return B_BAD_ADDRESS;
5952 
5953 	return find_area(name);
5954 }
5955 
5956 
5957 status_t
5958 _user_get_area_info(area_id area, area_info* userInfo)
5959 {
5960 	if (!IS_USER_ADDRESS(userInfo))
5961 		return B_BAD_ADDRESS;
5962 
5963 	area_info info;
5964 	status_t status = get_area_info(area, &info);
5965 	if (status < B_OK)
5966 		return status;
5967 
5968 	// TODO: do we want to prevent userland from seeing kernel protections?
5969 	//info.protection &= B_USER_PROTECTION;
5970 
5971 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5972 		return B_BAD_ADDRESS;
5973 
5974 	return status;
5975 }
5976 
5977 
5978 status_t
5979 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
5980 {
5981 	int32 cookie;
5982 
5983 	if (!IS_USER_ADDRESS(userCookie)
5984 		|| !IS_USER_ADDRESS(userInfo)
5985 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5986 		return B_BAD_ADDRESS;
5987 
5988 	area_info info;
5989 	status_t status = _get_next_area_info(team, &cookie, &info,
5990 		sizeof(area_info));
5991 	if (status != B_OK)
5992 		return status;
5993 
5994 	//info.protection &= B_USER_PROTECTION;
5995 
5996 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5997 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5998 		return B_BAD_ADDRESS;
5999 
6000 	return status;
6001 }
6002 
6003 
6004 status_t
6005 _user_set_area_protection(area_id area, uint32 newProtection)
6006 {
6007 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6008 		return B_BAD_VALUE;
6009 
6010 	fix_protection(&newProtection);
6011 
6012 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6013 		newProtection, false);
6014 }
6015 
6016 
6017 status_t
6018 _user_resize_area(area_id area, size_t newSize)
6019 {
6020 	// TODO: Since we restrict deleting of areas to those owned by the team,
6021 	// we should also do that for resizing (check other functions, too).
6022 	return vm_resize_area(area, newSize, false);
6023 }
6024 
6025 
6026 area_id
6027 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6028 	team_id target)
6029 {
6030 	// filter out some unavailable values (for userland)
6031 	switch (addressSpec) {
6032 		case B_ANY_KERNEL_ADDRESS:
6033 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6034 			return B_BAD_VALUE;
6035 	}
6036 
6037 	void* address;
6038 	if (!IS_USER_ADDRESS(userAddress)
6039 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6040 		return B_BAD_ADDRESS;
6041 
6042 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6043 	if (newArea < B_OK)
6044 		return newArea;
6045 
6046 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6047 		return B_BAD_ADDRESS;
6048 
6049 	return newArea;
6050 }
6051 
6052 
6053 area_id
6054 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6055 	uint32 protection, area_id sourceArea)
6056 {
6057 	char name[B_OS_NAME_LENGTH];
6058 	void* address;
6059 
6060 	// filter out some unavailable values (for userland)
6061 	switch (addressSpec) {
6062 		case B_ANY_KERNEL_ADDRESS:
6063 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6064 			return B_BAD_VALUE;
6065 	}
6066 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6067 		return B_BAD_VALUE;
6068 
6069 	if (!IS_USER_ADDRESS(userName)
6070 		|| !IS_USER_ADDRESS(userAddress)
6071 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6072 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6073 		return B_BAD_ADDRESS;
6074 
6075 	fix_protection(&protection);
6076 
6077 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6078 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6079 		false);
6080 	if (clonedArea < B_OK)
6081 		return clonedArea;
6082 
6083 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6084 		delete_area(clonedArea);
6085 		return B_BAD_ADDRESS;
6086 	}
6087 
6088 	return clonedArea;
6089 }
6090 
6091 
6092 area_id
6093 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6094 	size_t size, uint32 lock, uint32 protection)
6095 {
6096 	char name[B_OS_NAME_LENGTH];
6097 	void* address;
6098 
6099 	// filter out some unavailable values (for userland)
6100 	switch (addressSpec) {
6101 		case B_ANY_KERNEL_ADDRESS:
6102 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6103 			return B_BAD_VALUE;
6104 	}
6105 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6106 		return B_BAD_VALUE;
6107 
6108 	if (!IS_USER_ADDRESS(userName)
6109 		|| !IS_USER_ADDRESS(userAddress)
6110 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6111 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6112 		return B_BAD_ADDRESS;
6113 
6114 	if (addressSpec == B_EXACT_ADDRESS
6115 		&& IS_KERNEL_ADDRESS(address))
6116 		return B_BAD_VALUE;
6117 
6118 	fix_protection(&protection);
6119 
6120 	virtual_address_restrictions virtualRestrictions = {};
6121 	virtualRestrictions.address = address;
6122 	virtualRestrictions.address_specification = addressSpec;
6123 	physical_address_restrictions physicalRestrictions = {};
6124 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6125 		size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions,
6126 		false, &address);
6127 
6128 	if (area >= B_OK
6129 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6130 		delete_area(area);
6131 		return B_BAD_ADDRESS;
6132 	}
6133 
6134 	return area;
6135 }
6136 
6137 
6138 status_t
6139 _user_delete_area(area_id area)
6140 {
6141 	// Unlike the BeOS implementation, you can now only delete areas
6142 	// that you have created yourself from userland.
6143 	// The documentation to delete_area() explicitly states that this
6144 	// will be restricted in the future, and so it will.
6145 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6146 }
6147 
6148 
6149 // TODO: create a BeOS style call for this!
6150 
6151 area_id
6152 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6153 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6154 	int fd, off_t offset)
6155 {
6156 	char name[B_OS_NAME_LENGTH];
6157 	void* address;
6158 	area_id area;
6159 
6160 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6161 		return B_BAD_VALUE;
6162 
6163 	fix_protection(&protection);
6164 
6165 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6166 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6167 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6168 		return B_BAD_ADDRESS;
6169 
6170 	if (addressSpec == B_EXACT_ADDRESS) {
6171 		if ((addr_t)address + size < (addr_t)address
6172 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6173 			return B_BAD_VALUE;
6174 		}
6175 		if (!IS_USER_ADDRESS(address)
6176 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6177 			return B_BAD_ADDRESS;
6178 		}
6179 	}
6180 
6181 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6182 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6183 		false);
6184 	if (area < B_OK)
6185 		return area;
6186 
6187 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6188 		return B_BAD_ADDRESS;
6189 
6190 	return area;
6191 }
6192 
6193 
6194 status_t
6195 _user_unmap_memory(void* _address, size_t size)
6196 {
6197 	addr_t address = (addr_t)_address;
6198 
6199 	// check params
6200 	if (size == 0 || (addr_t)address + size < (addr_t)address
6201 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6202 		return B_BAD_VALUE;
6203 	}
6204 
6205 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6206 		return B_BAD_ADDRESS;
6207 
6208 	// Write lock the address space and ensure the address range is not wired.
6209 	AddressSpaceWriteLocker locker;
6210 	do {
6211 		status_t status = locker.SetTo(team_get_current_team_id());
6212 		if (status != B_OK)
6213 			return status;
6214 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6215 			size, &locker));
6216 
6217 	// unmap
6218 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6219 }
6220 
6221 
6222 status_t
6223 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6224 {
6225 	// check address range
6226 	addr_t address = (addr_t)_address;
6227 	size = PAGE_ALIGN(size);
6228 
6229 	if ((address % B_PAGE_SIZE) != 0)
6230 		return B_BAD_VALUE;
6231 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6232 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6233 		// weird error code required by POSIX
6234 		return ENOMEM;
6235 	}
6236 
6237 	// extend and check protection
6238 	if ((protection & ~B_USER_PROTECTION) != 0)
6239 		return B_BAD_VALUE;
6240 
6241 	fix_protection(&protection);
6242 
6243 	// We need to write lock the address space, since we're going to play with
6244 	// the areas. Also make sure that none of the areas is wired and that we're
6245 	// actually allowed to change the protection.
6246 	AddressSpaceWriteLocker locker;
6247 
6248 	bool restart;
6249 	do {
6250 		restart = false;
6251 
6252 		status_t status = locker.SetTo(team_get_current_team_id());
6253 		if (status != B_OK)
6254 			return status;
6255 
6256 		// First round: Check whether the whole range is covered by areas and we
6257 		// are allowed to modify them.
6258 		addr_t currentAddress = address;
6259 		size_t sizeLeft = size;
6260 		while (sizeLeft > 0) {
6261 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6262 			if (area == NULL)
6263 				return B_NO_MEMORY;
6264 
6265 			if ((area->protection & B_KERNEL_AREA) != 0)
6266 				return B_NOT_ALLOWED;
6267 
6268 			// TODO: For (shared) mapped files we should check whether the new
6269 			// protections are compatible with the file permissions. We don't
6270 			// have a way to do that yet, though.
6271 
6272 			addr_t offset = currentAddress - area->Base();
6273 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6274 
6275 			AreaCacheLocker cacheLocker(area);
6276 
6277 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6278 					&locker, &cacheLocker)) {
6279 				restart = true;
6280 				break;
6281 			}
6282 
6283 			cacheLocker.Unlock();
6284 
6285 			currentAddress += rangeSize;
6286 			sizeLeft -= rangeSize;
6287 		}
6288 	} while (restart);
6289 
6290 	// Second round: If the protections differ from that of the area, create a
6291 	// page protection array and re-map mapped pages.
6292 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6293 	addr_t currentAddress = address;
6294 	size_t sizeLeft = size;
6295 	while (sizeLeft > 0) {
6296 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6297 		if (area == NULL)
6298 			return B_NO_MEMORY;
6299 
6300 		addr_t offset = currentAddress - area->Base();
6301 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6302 
6303 		currentAddress += rangeSize;
6304 		sizeLeft -= rangeSize;
6305 
6306 		if (area->page_protections == NULL) {
6307 			if (area->protection == protection)
6308 				continue;
6309 
6310 			status_t status = allocate_area_page_protections(area);
6311 			if (status != B_OK)
6312 				return status;
6313 		}
6314 
6315 		// We need to lock the complete cache chain, since we potentially unmap
6316 		// pages of lower caches.
6317 		VMCache* topCache = vm_area_get_locked_cache(area);
6318 		VMCacheChainLocker cacheChainLocker(topCache);
6319 		cacheChainLocker.LockAllSourceCaches();
6320 
6321 		for (addr_t pageAddress = area->Base() + offset;
6322 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6323 			map->Lock();
6324 
6325 			set_area_page_protection(area, pageAddress, protection);
6326 
6327 			phys_addr_t physicalAddress;
6328 			uint32 flags;
6329 
6330 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6331 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6332 				map->Unlock();
6333 				continue;
6334 			}
6335 
6336 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6337 			if (page == NULL) {
6338 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6339 					"\n", area, physicalAddress);
6340 				map->Unlock();
6341 				return B_ERROR;
6342 			}
6343 
6344 			// If the page is not in the topmost cache and write access is
6345 			// requested, we have to unmap it. Otherwise we can re-map it with
6346 			// the new protection.
6347 			bool unmapPage = page->Cache() != topCache
6348 				&& (protection & B_WRITE_AREA) != 0;
6349 
6350 			if (!unmapPage)
6351 				map->ProtectPage(area, pageAddress, protection);
6352 
6353 			map->Unlock();
6354 
6355 			if (unmapPage) {
6356 				DEBUG_PAGE_ACCESS_START(page);
6357 				unmap_page(area, pageAddress);
6358 				DEBUG_PAGE_ACCESS_END(page);
6359 			}
6360 		}
6361 	}
6362 
6363 	return B_OK;
6364 }
6365 
6366 
6367 status_t
6368 _user_sync_memory(void* _address, size_t size, uint32 flags)
6369 {
6370 	addr_t address = (addr_t)_address;
6371 	size = PAGE_ALIGN(size);
6372 
6373 	// check params
6374 	if ((address % B_PAGE_SIZE) != 0)
6375 		return B_BAD_VALUE;
6376 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6377 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6378 		// weird error code required by POSIX
6379 		return ENOMEM;
6380 	}
6381 
6382 	bool writeSync = (flags & MS_SYNC) != 0;
6383 	bool writeAsync = (flags & MS_ASYNC) != 0;
6384 	if (writeSync && writeAsync)
6385 		return B_BAD_VALUE;
6386 
6387 	if (size == 0 || (!writeSync && !writeAsync))
6388 		return B_OK;
6389 
6390 	// iterate through the range and sync all concerned areas
6391 	while (size > 0) {
6392 		// read lock the address space
6393 		AddressSpaceReadLocker locker;
6394 		status_t error = locker.SetTo(team_get_current_team_id());
6395 		if (error != B_OK)
6396 			return error;
6397 
6398 		// get the first area
6399 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6400 		if (area == NULL)
6401 			return B_NO_MEMORY;
6402 
6403 		uint32 offset = address - area->Base();
6404 		size_t rangeSize = min_c(area->Size() - offset, size);
6405 		offset += area->cache_offset;
6406 
6407 		// lock the cache
6408 		AreaCacheLocker cacheLocker(area);
6409 		if (!cacheLocker)
6410 			return B_BAD_VALUE;
6411 		VMCache* cache = area->cache;
6412 
6413 		locker.Unlock();
6414 
6415 		uint32 firstPage = offset >> PAGE_SHIFT;
6416 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6417 
6418 		// write the pages
6419 		if (cache->type == CACHE_TYPE_VNODE) {
6420 			if (writeSync) {
6421 				// synchronous
6422 				error = vm_page_write_modified_page_range(cache, firstPage,
6423 					endPage);
6424 				if (error != B_OK)
6425 					return error;
6426 			} else {
6427 				// asynchronous
6428 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6429 				// TODO: This is probably not quite what is supposed to happen.
6430 				// Especially when a lot has to be written, it might take ages
6431 				// until it really hits the disk.
6432 			}
6433 		}
6434 
6435 		address += rangeSize;
6436 		size -= rangeSize;
6437 	}
6438 
6439 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6440 	// synchronize multiple mappings of the same file. In our VM they never get
6441 	// out of sync, though, so we don't have to do anything.
6442 
6443 	return B_OK;
6444 }
6445 
6446 
6447 status_t
6448 _user_memory_advice(void* address, size_t size, uint32 advice)
6449 {
6450 	// TODO: Implement!
6451 	return B_OK;
6452 }
6453 
6454 
6455 status_t
6456 _user_get_memory_properties(team_id teamID, const void* address,
6457 	uint32* _protected, uint32* _lock)
6458 {
6459 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6460 		return B_BAD_ADDRESS;
6461 
6462 	AddressSpaceReadLocker locker;
6463 	status_t error = locker.SetTo(teamID);
6464 	if (error != B_OK)
6465 		return error;
6466 
6467 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6468 	if (area == NULL)
6469 		return B_NO_MEMORY;
6470 
6471 
6472 	uint32 protection = area->protection;
6473 	if (area->page_protections != NULL)
6474 		protection = get_area_page_protection(area, (addr_t)address);
6475 
6476 	uint32 wiring = area->wiring;
6477 
6478 	locker.Unlock();
6479 
6480 	error = user_memcpy(_protected, &protection, sizeof(protection));
6481 	if (error != B_OK)
6482 		return error;
6483 
6484 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6485 
6486 	return error;
6487 }
6488 
6489 
6490 // #pragma mark -- compatibility
6491 
6492 
6493 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6494 
6495 
6496 struct physical_entry_beos {
6497 	uint32	address;
6498 	uint32	size;
6499 };
6500 
6501 
6502 /*!	The physical_entry structure has changed. We need to translate it to the
6503 	old one.
6504 */
6505 extern "C" int32
6506 __get_memory_map_beos(const void* _address, size_t numBytes,
6507 	physical_entry_beos* table, int32 numEntries)
6508 {
6509 	if (numEntries <= 0)
6510 		return B_BAD_VALUE;
6511 
6512 	const uint8* address = (const uint8*)_address;
6513 
6514 	int32 count = 0;
6515 	while (numBytes > 0 && count < numEntries) {
6516 		physical_entry entry;
6517 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6518 		if (result < 0) {
6519 			if (result != B_BUFFER_OVERFLOW)
6520 				return result;
6521 		}
6522 
6523 		if (entry.address >= (phys_addr_t)1 << 32) {
6524 			panic("get_memory_map(): Address is greater 4 GB!");
6525 			return B_ERROR;
6526 		}
6527 
6528 		table[count].address = entry.address;
6529 		table[count++].size = entry.size;
6530 
6531 		address += entry.size;
6532 		numBytes -= entry.size;
6533 	}
6534 
6535 	// null-terminate the table, if possible
6536 	if (count < numEntries) {
6537 		table[count].address = 0;
6538 		table[count].size = 0;
6539 	}
6540 
6541 	return B_OK;
6542 }
6543 
6544 
6545 /*!	The type of the \a physicalAddress parameter has changed from void* to
6546 	phys_addr_t.
6547 */
6548 extern "C" area_id
6549 __map_physical_memory_beos(const char* name, void* physicalAddress,
6550 	size_t numBytes, uint32 addressSpec, uint32 protection,
6551 	void** _virtualAddress)
6552 {
6553 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6554 		addressSpec, protection, _virtualAddress);
6555 }
6556 
6557 
6558 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6559 	we meddle with the \a lock parameter to force 32 bit.
6560 */
6561 extern "C" area_id
6562 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6563 	size_t size, uint32 lock, uint32 protection)
6564 {
6565 	switch (lock) {
6566 		case B_NO_LOCK:
6567 			break;
6568 		case B_FULL_LOCK:
6569 		case B_LAZY_LOCK:
6570 			lock = B_32_BIT_FULL_LOCK;
6571 			break;
6572 		case B_CONTIGUOUS:
6573 			lock = B_32_BIT_CONTIGUOUS;
6574 			break;
6575 	}
6576 
6577 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6578 		protection);
6579 }
6580 
6581 
6582 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6583 	"BASE");
6584 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6585 	"map_physical_memory@", "BASE");
6586 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6587 	"BASE");
6588 
6589 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6590 	"get_memory_map@@", "1_ALPHA3");
6591 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6592 	"map_physical_memory@@", "1_ALPHA3");
6593 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6594 	"1_ALPHA3");
6595 
6596 
6597 #else
6598 
6599 
6600 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6601 	"get_memory_map@@", "BASE");
6602 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6603 	"map_physical_memory@@", "BASE");
6604 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6605 	"BASE");
6606 
6607 
6608 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6609