xref: /haiku/src/system/kernel/vm/vm.cpp (revision 7f4d1af49dd1d67ecbf1d934eddd92cae7c4c558)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <boot/elf.h>
31 #include <boot/stage2.h>
32 #include <condition_variable.h>
33 #include <console.h>
34 #include <debug.h>
35 #include <file_cache.h>
36 #include <fs/fd.h>
37 #include <heap.h>
38 #include <kernel.h>
39 #include <int.h>
40 #include <lock.h>
41 #include <low_resource_manager.h>
42 #include <slab/Slab.h>
43 #include <smp.h>
44 #include <system_info.h>
45 #include <thread.h>
46 #include <team.h>
47 #include <tracing.h>
48 #include <util/AutoLock.h>
49 #include <util/khash.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 class AreaCacheLocking {
77 public:
78 	inline bool Lock(VMCache* lockable)
79 	{
80 		return false;
81 	}
82 
83 	inline void Unlock(VMCache* lockable)
84 	{
85 		vm_area_put_locked_cache(lockable);
86 	}
87 };
88 
89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
90 public:
91 	inline AreaCacheLocker(VMCache* cache = NULL)
92 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
93 	{
94 	}
95 
96 	inline AreaCacheLocker(VMArea* area)
97 		: AutoLocker<VMCache, AreaCacheLocking>()
98 	{
99 		SetTo(area);
100 	}
101 
102 	inline void SetTo(VMCache* cache, bool alreadyLocked)
103 	{
104 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
105 	}
106 
107 	inline void SetTo(VMArea* area)
108 	{
109 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
110 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
111 	}
112 };
113 
114 
115 class VMCacheChainLocker {
116 public:
117 	VMCacheChainLocker()
118 		:
119 		fTopCache(NULL),
120 		fBottomCache(NULL)
121 	{
122 	}
123 
124 	VMCacheChainLocker(VMCache* topCache)
125 		:
126 		fTopCache(topCache),
127 		fBottomCache(topCache)
128 	{
129 	}
130 
131 	~VMCacheChainLocker()
132 	{
133 		Unlock();
134 	}
135 
136 	void SetTo(VMCache* topCache)
137 	{
138 		fTopCache = topCache;
139 		fBottomCache = topCache;
140 
141 		if (topCache != NULL)
142 			topCache->SetUserData(NULL);
143 	}
144 
145 	VMCache* LockSourceCache()
146 	{
147 		if (fBottomCache == NULL || fBottomCache->source == NULL)
148 			return NULL;
149 
150 		VMCache* previousCache = fBottomCache;
151 
152 		fBottomCache = fBottomCache->source;
153 		fBottomCache->Lock();
154 		fBottomCache->AcquireRefLocked();
155 		fBottomCache->SetUserData(previousCache);
156 
157 		return fBottomCache;
158 	}
159 
160 	void LockAllSourceCaches()
161 	{
162 		while (LockSourceCache() != NULL) {
163 		}
164 	}
165 
166 	void Unlock(VMCache* exceptCache = NULL)
167 	{
168 		if (fTopCache == NULL)
169 			return;
170 
171 		// Unlock caches in source -> consumer direction. This is important to
172 		// avoid double-locking and a reversal of locking order in case a cache
173 		// is eligable for merging.
174 		VMCache* cache = fBottomCache;
175 		while (cache != NULL) {
176 			VMCache* nextCache = (VMCache*)cache->UserData();
177 			if (cache != exceptCache)
178 				cache->ReleaseRefAndUnlock(cache != fTopCache);
179 
180 			if (cache == fTopCache)
181 				break;
182 
183 			cache = nextCache;
184 		}
185 
186 		fTopCache = NULL;
187 		fBottomCache = NULL;
188 	}
189 
190 	void UnlockKeepRefs(bool keepTopCacheLocked)
191 	{
192 		if (fTopCache == NULL)
193 			return;
194 
195 		VMCache* nextCache = fBottomCache;
196 		VMCache* cache = NULL;
197 
198 		while (keepTopCacheLocked
199 				? nextCache != fTopCache : cache != fTopCache) {
200 			cache = nextCache;
201 			nextCache = (VMCache*)cache->UserData();
202 			cache->Unlock(cache != fTopCache);
203 		}
204 	}
205 
206 	void RelockCaches(bool topCacheLocked)
207 	{
208 		if (fTopCache == NULL)
209 			return;
210 
211 		VMCache* nextCache = fTopCache;
212 		VMCache* cache = NULL;
213 		if (topCacheLocked) {
214 			cache = nextCache;
215 			nextCache = cache->source;
216 		}
217 
218 		while (cache != fBottomCache && nextCache != NULL) {
219 			VMCache* consumer = cache;
220 			cache = nextCache;
221 			nextCache = cache->source;
222 			cache->Lock();
223 			cache->SetUserData(consumer);
224 		}
225 	}
226 
227 private:
228 	VMCache*	fTopCache;
229 	VMCache*	fBottomCache;
230 };
231 
232 
233 // The memory reserve an allocation of the certain priority must not touch.
234 static const size_t kMemoryReserveForPriority[] = {
235 	VM_MEMORY_RESERVE_USER,		// user
236 	VM_MEMORY_RESERVE_SYSTEM,	// system
237 	0							// VIP
238 };
239 
240 
241 ObjectCache* gPageMappingsObjectCache;
242 
243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
244 
245 static off_t sAvailableMemory;
246 static off_t sNeededMemory;
247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
248 static uint32 sPageFaults;
249 
250 static VMPhysicalPageMapper* sPhysicalPageMapper;
251 
252 #if DEBUG_CACHE_LIST
253 
254 struct cache_info {
255 	VMCache*	cache;
256 	addr_t		page_count;
257 	addr_t		committed;
258 };
259 
260 static const int kCacheInfoTableCount = 100 * 1024;
261 static cache_info* sCacheInfoTable;
262 
263 #endif	// DEBUG_CACHE_LIST
264 
265 
266 // function declarations
267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
268 	bool addressSpaceCleanup);
269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
270 	bool isWrite, bool isUser, vm_page** wirePage,
271 	VMAreaWiredRange* wiredRange = NULL);
272 static status_t map_backing_store(VMAddressSpace* addressSpace,
273 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
274 	int protection, int mapping, uint32 flags,
275 	const virtual_address_restrictions* addressRestrictions, bool kernel,
276 	VMArea** _area, void** _virtualAddress);
277 
278 
279 //	#pragma mark -
280 
281 
282 #if VM_PAGE_FAULT_TRACING
283 
284 namespace VMPageFaultTracing {
285 
286 class PageFaultStart : public AbstractTraceEntry {
287 public:
288 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
289 		:
290 		fAddress(address),
291 		fPC(pc),
292 		fWrite(write),
293 		fUser(user)
294 	{
295 		Initialized();
296 	}
297 
298 	virtual void AddDump(TraceOutput& out)
299 	{
300 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
301 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
302 	}
303 
304 private:
305 	addr_t	fAddress;
306 	addr_t	fPC;
307 	bool	fWrite;
308 	bool	fUser;
309 };
310 
311 
312 // page fault errors
313 enum {
314 	PAGE_FAULT_ERROR_NO_AREA		= 0,
315 	PAGE_FAULT_ERROR_KERNEL_ONLY,
316 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
317 	PAGE_FAULT_ERROR_READ_PROTECTED,
318 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
319 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
320 };
321 
322 
323 class PageFaultError : public AbstractTraceEntry {
324 public:
325 	PageFaultError(area_id area, status_t error)
326 		:
327 		fArea(area),
328 		fError(error)
329 	{
330 		Initialized();
331 	}
332 
333 	virtual void AddDump(TraceOutput& out)
334 	{
335 		switch (fError) {
336 			case PAGE_FAULT_ERROR_NO_AREA:
337 				out.Print("page fault error: no area");
338 				break;
339 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
340 				out.Print("page fault error: area: %ld, kernel only", fArea);
341 				break;
342 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
343 				out.Print("page fault error: area: %ld, write protected",
344 					fArea);
345 				break;
346 			case PAGE_FAULT_ERROR_READ_PROTECTED:
347 				out.Print("page fault error: area: %ld, read protected", fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
350 				out.Print("page fault error: kernel touching bad user memory");
351 				break;
352 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
353 				out.Print("page fault error: no address space");
354 				break;
355 			default:
356 				out.Print("page fault error: area: %ld, error: %s", fArea,
357 					strerror(fError));
358 				break;
359 		}
360 	}
361 
362 private:
363 	area_id		fArea;
364 	status_t	fError;
365 };
366 
367 
368 class PageFaultDone : public AbstractTraceEntry {
369 public:
370 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
371 			vm_page* page)
372 		:
373 		fArea(area),
374 		fTopCache(topCache),
375 		fCache(cache),
376 		fPage(page)
377 	{
378 		Initialized();
379 	}
380 
381 	virtual void AddDump(TraceOutput& out)
382 	{
383 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
384 			"page: %p", fArea, fTopCache, fCache, fPage);
385 	}
386 
387 private:
388 	area_id		fArea;
389 	VMCache*	fTopCache;
390 	VMCache*	fCache;
391 	vm_page*	fPage;
392 };
393 
394 }	// namespace VMPageFaultTracing
395 
396 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
397 #else
398 #	define TPF(x) ;
399 #endif	// VM_PAGE_FAULT_TRACING
400 
401 
402 //	#pragma mark -
403 
404 
405 /*!	The page's cache must be locked.
406 */
407 static inline void
408 increment_page_wired_count(vm_page* page)
409 {
410 	if (!page->IsMapped())
411 		atomic_add(&gMappedPagesCount, 1);
412 	page->IncrementWiredCount();
413 }
414 
415 
416 /*!	The page's cache must be locked.
417 */
418 static inline void
419 decrement_page_wired_count(vm_page* page)
420 {
421 	page->DecrementWiredCount();
422 	if (!page->IsMapped())
423 		atomic_add(&gMappedPagesCount, -1);
424 }
425 
426 
427 static inline addr_t
428 virtual_page_address(VMArea* area, vm_page* page)
429 {
430 	return area->Base()
431 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
432 }
433 
434 
435 //! You need to have the address space locked when calling this function
436 static VMArea*
437 lookup_area(VMAddressSpace* addressSpace, area_id id)
438 {
439 	VMAreaHash::ReadLock();
440 
441 	VMArea* area = VMAreaHash::LookupLocked(id);
442 	if (area != NULL && area->address_space != addressSpace)
443 		area = NULL;
444 
445 	VMAreaHash::ReadUnlock();
446 
447 	return area;
448 }
449 
450 
451 static status_t
452 allocate_area_page_protections(VMArea* area)
453 {
454 	// In the page protections we store only the three user protections,
455 	// so we use 4 bits per page.
456 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
457 	area->page_protections = (uint8*)malloc_etc(bytes,
458 		HEAP_DONT_LOCK_KERNEL_SPACE);
459 	if (area->page_protections == NULL)
460 		return B_NO_MEMORY;
461 
462 	// init the page protections for all pages to that of the area
463 	uint32 areaProtection = area->protection
464 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
465 	memset(area->page_protections, areaProtection | (areaProtection << 4),
466 		bytes);
467 	return B_OK;
468 }
469 
470 
471 static inline void
472 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
473 {
474 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
475 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
476 	uint8& entry = area->page_protections[pageIndex / 2];
477 	if (pageIndex % 2 == 0)
478 		entry = (entry & 0xf0) | protection;
479 	else
480 		entry = (entry & 0x0f) | (protection << 4);
481 }
482 
483 
484 static inline uint32
485 get_area_page_protection(VMArea* area, addr_t pageAddress)
486 {
487 	if (area->page_protections == NULL)
488 		return area->protection;
489 
490 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
491 	uint32 protection = area->page_protections[pageIndex / 2];
492 	if (pageIndex % 2 == 0)
493 		protection &= 0x0f;
494 	else
495 		protection >>= 4;
496 
497 	// If this is a kernel area we translate the user flags to kernel flags.
498 	if (area->address_space == VMAddressSpace::Kernel()) {
499 		uint32 kernelProtection = 0;
500 		if ((protection & B_READ_AREA) != 0)
501 			kernelProtection |= B_KERNEL_READ_AREA;
502 		if ((protection & B_WRITE_AREA) != 0)
503 			kernelProtection |= B_KERNEL_WRITE_AREA;
504 
505 		return kernelProtection;
506 	}
507 
508 	return protection | B_KERNEL_READ_AREA
509 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
510 }
511 
512 
513 /*!	The caller must have reserved enough pages the translation map
514 	implementation might need to map this page.
515 	The page's cache must be locked.
516 */
517 static status_t
518 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
519 	vm_page_reservation* reservation)
520 {
521 	VMTranslationMap* map = area->address_space->TranslationMap();
522 
523 	bool wasMapped = page->IsMapped();
524 
525 	if (area->wiring == B_NO_LOCK) {
526 		DEBUG_PAGE_ACCESS_CHECK(page);
527 
528 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
529 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
530 			gPageMappingsObjectCache,
531 			CACHE_DONT_WAIT_FOR_MEMORY
532 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
533 		if (mapping == NULL)
534 			return B_NO_MEMORY;
535 
536 		mapping->page = page;
537 		mapping->area = area;
538 
539 		map->Lock();
540 
541 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
542 			area->MemoryType(), reservation);
543 
544 		// insert mapping into lists
545 		if (!page->IsMapped())
546 			atomic_add(&gMappedPagesCount, 1);
547 
548 		page->mappings.Add(mapping);
549 		area->mappings.Add(mapping);
550 
551 		map->Unlock();
552 	} else {
553 		DEBUG_PAGE_ACCESS_CHECK(page);
554 
555 		map->Lock();
556 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
557 			area->MemoryType(), reservation);
558 		map->Unlock();
559 
560 		increment_page_wired_count(page);
561 	}
562 
563 	if (!wasMapped) {
564 		// The page is mapped now, so we must not remain in the cached queue.
565 		// It also makes sense to move it from the inactive to the active, since
566 		// otherwise the page daemon wouldn't come to keep track of it (in idle
567 		// mode) -- if the page isn't touched, it will be deactivated after a
568 		// full iteration through the queue at the latest.
569 		if (page->State() == PAGE_STATE_CACHED
570 				|| page->State() == PAGE_STATE_INACTIVE) {
571 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
572 		}
573 	}
574 
575 	return B_OK;
576 }
577 
578 
579 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
580 	page's cache.
581 */
582 static inline bool
583 unmap_page(VMArea* area, addr_t virtualAddress)
584 {
585 	return area->address_space->TranslationMap()->UnmapPage(area,
586 		virtualAddress, true);
587 }
588 
589 
590 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
591 	mapped pages' caches.
592 */
593 static inline void
594 unmap_pages(VMArea* area, addr_t base, size_t size)
595 {
596 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
597 }
598 
599 
600 /*!	Cuts a piece out of an area. If the given cut range covers the complete
601 	area, it is deleted. If it covers the beginning or the end, the area is
602 	resized accordingly. If the range covers some part in the middle of the
603 	area, it is split in two; in this case the second area is returned via
604 	\a _secondArea (the variable is left untouched in the other cases).
605 	The address space must be write locked.
606 	The caller must ensure that no part of the given range is wired.
607 */
608 static status_t
609 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
610 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
611 {
612 	// Does the cut range intersect with the area at all?
613 	addr_t areaLast = area->Base() + (area->Size() - 1);
614 	if (area->Base() > lastAddress || areaLast < address)
615 		return B_OK;
616 
617 	// Is the area fully covered?
618 	if (area->Base() >= address && areaLast <= lastAddress) {
619 		delete_area(addressSpace, area, false);
620 		return B_OK;
621 	}
622 
623 	int priority;
624 	uint32 allocationFlags;
625 	if (addressSpace == VMAddressSpace::Kernel()) {
626 		priority = VM_PRIORITY_SYSTEM;
627 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
628 			| HEAP_DONT_LOCK_KERNEL_SPACE;
629 	} else {
630 		priority = VM_PRIORITY_USER;
631 		allocationFlags = 0;
632 	}
633 
634 	VMCache* cache = vm_area_get_locked_cache(area);
635 	VMCacheChainLocker cacheChainLocker(cache);
636 	cacheChainLocker.LockAllSourceCaches();
637 
638 	// Cut the end only?
639 	if (areaLast <= lastAddress) {
640 		size_t oldSize = area->Size();
641 		size_t newSize = address - area->Base();
642 
643 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
644 			allocationFlags);
645 		if (error != B_OK)
646 			return error;
647 
648 		// unmap pages
649 		unmap_pages(area, address, oldSize - newSize);
650 
651 		// If no one else uses the area's cache, we can resize it, too.
652 		if (cache->areas == area && area->cache_next == NULL
653 			&& cache->consumers.IsEmpty()
654 			&& cache->type == CACHE_TYPE_RAM) {
655 			// Since VMCache::Resize() can temporarily drop the lock, we must
656 			// unlock all lower caches to prevent locking order inversion.
657 			cacheChainLocker.Unlock(cache);
658 			cache->Resize(cache->virtual_base + newSize, priority);
659 			cache->ReleaseRefAndUnlock();
660 		}
661 
662 		return B_OK;
663 	}
664 
665 	// Cut the beginning only?
666 	if (area->Base() >= address) {
667 		addr_t oldBase = area->Base();
668 		addr_t newBase = lastAddress + 1;
669 		size_t newSize = areaLast - lastAddress;
670 
671 		// unmap pages
672 		unmap_pages(area, oldBase, newBase - oldBase);
673 
674 		// resize the area
675 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
676 			allocationFlags);
677 		if (error != B_OK)
678 			return error;
679 
680 		// TODO: If no one else uses the area's cache, we should resize it, too!
681 
682 		area->cache_offset += newBase - oldBase;
683 
684 		return B_OK;
685 	}
686 
687 	// The tough part -- cut a piece out of the middle of the area.
688 	// We do that by shrinking the area to the begin section and creating a
689 	// new area for the end section.
690 
691 	addr_t firstNewSize = address - area->Base();
692 	addr_t secondBase = lastAddress + 1;
693 	addr_t secondSize = areaLast - lastAddress;
694 
695 	// unmap pages
696 	unmap_pages(area, address, area->Size() - firstNewSize);
697 
698 	// resize the area
699 	addr_t oldSize = area->Size();
700 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
701 		allocationFlags);
702 	if (error != B_OK)
703 		return error;
704 
705 	// TODO: If no one else uses the area's cache, we might want to create a
706 	// new cache for the second area, transfer the concerned pages from the
707 	// first cache to it and resize the first cache.
708 
709 	// map the second area
710 	virtual_address_restrictions addressRestrictions = {};
711 	addressRestrictions.address = (void*)secondBase;
712 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
713 	VMArea* secondArea;
714 	error = map_backing_store(addressSpace, cache,
715 		area->cache_offset + (secondBase - area->Base()), area->name,
716 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
717 		&addressRestrictions, kernel, &secondArea, NULL);
718 	if (error != B_OK) {
719 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
720 		return error;
721 	}
722 
723 	// We need a cache reference for the new area.
724 	cache->AcquireRefLocked();
725 
726 	if (_secondArea != NULL)
727 		*_secondArea = secondArea;
728 
729 	return B_OK;
730 }
731 
732 
733 /*!	Deletes all areas in the given address range.
734 	The address space must be write-locked.
735 	The caller must ensure that no part of the given range is wired.
736 */
737 static status_t
738 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
739 	bool kernel)
740 {
741 	size = PAGE_ALIGN(size);
742 	addr_t lastAddress = address + (size - 1);
743 
744 	// Check, whether the caller is allowed to modify the concerned areas.
745 	if (!kernel) {
746 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
747 				VMArea* area = it.Next();) {
748 			addr_t areaLast = area->Base() + (area->Size() - 1);
749 			if (area->Base() < lastAddress && address < areaLast) {
750 				if ((area->protection & B_KERNEL_AREA) != 0)
751 					return B_NOT_ALLOWED;
752 			}
753 		}
754 	}
755 
756 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
757 			VMArea* area = it.Next();) {
758 		addr_t areaLast = area->Base() + (area->Size() - 1);
759 		if (area->Base() < lastAddress && address < areaLast) {
760 			status_t error = cut_area(addressSpace, area, address,
761 				lastAddress, NULL, kernel);
762 			if (error != B_OK)
763 				return error;
764 				// Failing after already messing with areas is ugly, but we
765 				// can't do anything about it.
766 		}
767 	}
768 
769 	return B_OK;
770 }
771 
772 
773 /*! You need to hold the lock of the cache and the write lock of the address
774 	space when calling this function.
775 	Note, that in case of error your cache will be temporarily unlocked.
776 	If \a addressSpec is \c B_EXACT_ADDRESS and the
777 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
778 	that no part of the specified address range (base \c *_virtualAddress, size
779 	\a size) is wired.
780 */
781 static status_t
782 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
783 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
784 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
785 	bool kernel, VMArea** _area, void** _virtualAddress)
786 {
787 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
788 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
789 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
790 		addressRestrictions->address, offset, size,
791 		addressRestrictions->address_specification, wiring, protection,
792 		_area, areaName));
793 	cache->AssertLocked();
794 
795 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
796 		| HEAP_DONT_LOCK_KERNEL_SPACE;
797 	int priority;
798 	if (addressSpace != VMAddressSpace::Kernel()) {
799 		priority = VM_PRIORITY_USER;
800 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
801 		priority = VM_PRIORITY_VIP;
802 		allocationFlags |= HEAP_PRIORITY_VIP;
803 	} else
804 		priority = VM_PRIORITY_SYSTEM;
805 
806 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
807 		allocationFlags);
808 	if (area == NULL)
809 		return B_NO_MEMORY;
810 
811 	status_t status;
812 
813 	// if this is a private map, we need to create a new cache
814 	// to handle the private copies of pages as they are written to
815 	VMCache* sourceCache = cache;
816 	if (mapping == REGION_PRIVATE_MAP) {
817 		VMCache* newCache;
818 
819 		// create an anonymous cache
820 		status = VMCacheFactory::CreateAnonymousCache(newCache,
821 			(protection & B_STACK_AREA) != 0
822 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
823 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
824 		if (status != B_OK)
825 			goto err1;
826 
827 		newCache->Lock();
828 		newCache->temporary = 1;
829 		newCache->virtual_base = offset;
830 		newCache->virtual_end = offset + size;
831 
832 		cache->AddConsumer(newCache);
833 
834 		cache = newCache;
835 	}
836 
837 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
838 		status = cache->SetMinimalCommitment(size, priority);
839 		if (status != B_OK)
840 			goto err2;
841 	}
842 
843 	// check to see if this address space has entered DELETE state
844 	if (addressSpace->IsBeingDeleted()) {
845 		// okay, someone is trying to delete this address space now, so we can't
846 		// insert the area, so back out
847 		status = B_BAD_TEAM_ID;
848 		goto err2;
849 	}
850 
851 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
852 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
853 		status = unmap_address_range(addressSpace,
854 			(addr_t)addressRestrictions->address, size, kernel);
855 		if (status != B_OK)
856 			goto err2;
857 	}
858 
859 	status = addressSpace->InsertArea(area, size, addressRestrictions,
860 		allocationFlags, _virtualAddress);
861 	if (status != B_OK) {
862 		// TODO: wait and try again once this is working in the backend
863 #if 0
864 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
865 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
866 				0, 0);
867 		}
868 #endif
869 		goto err2;
870 	}
871 
872 	// attach the cache to the area
873 	area->cache = cache;
874 	area->cache_offset = offset;
875 
876 	// point the cache back to the area
877 	cache->InsertAreaLocked(area);
878 	if (mapping == REGION_PRIVATE_MAP)
879 		cache->Unlock();
880 
881 	// insert the area in the global area hash table
882 	VMAreaHash::Insert(area);
883 
884 	// grab a ref to the address space (the area holds this)
885 	addressSpace->Get();
886 
887 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
888 //		cache, sourceCache, areaName, area);
889 
890 	*_area = area;
891 	return B_OK;
892 
893 err2:
894 	if (mapping == REGION_PRIVATE_MAP) {
895 		// We created this cache, so we must delete it again. Note, that we
896 		// need to temporarily unlock the source cache or we'll otherwise
897 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
898 		sourceCache->Unlock();
899 		cache->ReleaseRefAndUnlock();
900 		sourceCache->Lock();
901 	}
902 err1:
903 	addressSpace->DeleteArea(area, allocationFlags);
904 	return status;
905 }
906 
907 
908 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
909 	  locker1, locker2).
910 */
911 template<typename LockerType1, typename LockerType2>
912 static inline bool
913 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
914 {
915 	area->cache->AssertLocked();
916 
917 	VMAreaUnwiredWaiter waiter;
918 	if (!area->AddWaiterIfWired(&waiter))
919 		return false;
920 
921 	// unlock everything and wait
922 	if (locker1 != NULL)
923 		locker1->Unlock();
924 	if (locker2 != NULL)
925 		locker2->Unlock();
926 
927 	waiter.waitEntry.Wait();
928 
929 	return true;
930 }
931 
932 
933 /*!	Checks whether the given area has any wired ranges intersecting with the
934 	specified range and waits, if so.
935 
936 	When it has to wait, the function calls \c Unlock() on both \a locker1
937 	and \a locker2, if given.
938 	The area's top cache must be locked and must be unlocked as a side effect
939 	of calling \c Unlock() on either \a locker1 or \a locker2.
940 
941 	If the function does not have to wait it does not modify or unlock any
942 	object.
943 
944 	\param area The area to be checked.
945 	\param base The base address of the range to check.
946 	\param size The size of the address range to check.
947 	\param locker1 An object to be unlocked when before starting to wait (may
948 		be \c NULL).
949 	\param locker2 An object to be unlocked when before starting to wait (may
950 		be \c NULL).
951 	\return \c true, if the function had to wait, \c false otherwise.
952 */
953 template<typename LockerType1, typename LockerType2>
954 static inline bool
955 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
956 	LockerType1* locker1, LockerType2* locker2)
957 {
958 	area->cache->AssertLocked();
959 
960 	VMAreaUnwiredWaiter waiter;
961 	if (!area->AddWaiterIfWired(&waiter, base, size))
962 		return false;
963 
964 	// unlock everything and wait
965 	if (locker1 != NULL)
966 		locker1->Unlock();
967 	if (locker2 != NULL)
968 		locker2->Unlock();
969 
970 	waiter.waitEntry.Wait();
971 
972 	return true;
973 }
974 
975 
976 /*!	Checks whether the given address space has any wired ranges intersecting
977 	with the specified range and waits, if so.
978 
979 	Similar to wait_if_area_range_is_wired(), with the following differences:
980 	- All areas intersecting with the range are checked (respectively all until
981 	  one is found that contains a wired range intersecting with the given
982 	  range).
983 	- The given address space must at least be read-locked and must be unlocked
984 	  when \c Unlock() is called on \a locker.
985 	- None of the areas' caches are allowed to be locked.
986 */
987 template<typename LockerType>
988 static inline bool
989 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
990 	size_t size, LockerType* locker)
991 {
992 	addr_t end = base + size - 1;
993 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
994 			VMArea* area = it.Next();) {
995 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
996 		if (area->Base() > end)
997 			return false;
998 
999 		if (base >= area->Base() + area->Size() - 1)
1000 			continue;
1001 
1002 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1003 
1004 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1005 			return true;
1006 	}
1007 
1008 	return false;
1009 }
1010 
1011 
1012 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1013 	It must be called in a situation where the kernel address space may be
1014 	locked.
1015 */
1016 status_t
1017 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1018 {
1019 	AddressSpaceReadLocker locker;
1020 	VMArea* area;
1021 	status_t status = locker.SetFromArea(id, area);
1022 	if (status != B_OK)
1023 		return status;
1024 
1025 	if (area->page_protections == NULL) {
1026 		status = allocate_area_page_protections(area);
1027 		if (status != B_OK)
1028 			return status;
1029 	}
1030 
1031 	*cookie = (void*)area;
1032 	return B_OK;
1033 }
1034 
1035 
1036 /*!	This is a debug helper function that can only be used with very specific
1037 	use cases.
1038 	Sets protection for the given address range to the protection specified.
1039 	If \a protection is 0 then the involved pages will be marked non-present
1040 	in the translation map to cause a fault on access. The pages aren't
1041 	actually unmapped however so that they can be marked present again with
1042 	additional calls to this function. For this to work the area must be
1043 	fully locked in memory so that the pages aren't otherwise touched.
1044 	This function does not lock the kernel address space and needs to be
1045 	supplied with a \a cookie retrieved from a successful call to
1046 	vm_prepare_kernel_area_debug_protection().
1047 */
1048 status_t
1049 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1050 	uint32 protection)
1051 {
1052 	// check address range
1053 	addr_t address = (addr_t)_address;
1054 	size = PAGE_ALIGN(size);
1055 
1056 	if ((address % B_PAGE_SIZE) != 0
1057 		|| (addr_t)address + size < (addr_t)address
1058 		|| !IS_KERNEL_ADDRESS(address)
1059 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1060 		return B_BAD_VALUE;
1061 	}
1062 
1063 	// Translate the kernel protection to user protection as we only store that.
1064 	if ((protection & B_KERNEL_READ_AREA) != 0)
1065 		protection |= B_READ_AREA;
1066 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1067 		protection |= B_WRITE_AREA;
1068 
1069 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1070 	VMTranslationMap* map = addressSpace->TranslationMap();
1071 	VMArea* area = (VMArea*)cookie;
1072 
1073 	addr_t offset = address - area->Base();
1074 	if (area->Size() - offset < size) {
1075 		panic("protect range not fully within supplied area");
1076 		return B_BAD_VALUE;
1077 	}
1078 
1079 	if (area->page_protections == NULL) {
1080 		panic("area has no page protections");
1081 		return B_BAD_VALUE;
1082 	}
1083 
1084 	// Invalidate the mapping entries so any access to them will fault or
1085 	// restore the mapping entries unchanged so that lookup will success again.
1086 	map->Lock();
1087 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1088 	map->Unlock();
1089 
1090 	// And set the proper page protections so that the fault case will actually
1091 	// fail and not simply try to map a new page.
1092 	for (addr_t pageAddress = address; pageAddress < address + size;
1093 			pageAddress += B_PAGE_SIZE) {
1094 		set_area_page_protection(area, pageAddress, protection);
1095 	}
1096 
1097 	return B_OK;
1098 }
1099 
1100 
1101 status_t
1102 vm_block_address_range(const char* name, void* address, addr_t size)
1103 {
1104 	if (!arch_vm_supports_protection(0))
1105 		return B_NOT_SUPPORTED;
1106 
1107 	AddressSpaceWriteLocker locker;
1108 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1109 	if (status != B_OK)
1110 		return status;
1111 
1112 	VMAddressSpace* addressSpace = locker.AddressSpace();
1113 
1114 	// create an anonymous cache
1115 	VMCache* cache;
1116 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1117 		VM_PRIORITY_SYSTEM);
1118 	if (status != B_OK)
1119 		return status;
1120 
1121 	cache->temporary = 1;
1122 	cache->virtual_end = size;
1123 	cache->Lock();
1124 
1125 	VMArea* area;
1126 	virtual_address_restrictions addressRestrictions = {};
1127 	addressRestrictions.address = address;
1128 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1129 	status = map_backing_store(addressSpace, cache, 0, name, size,
1130 		B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0,
1131 		&addressRestrictions, true, &area, NULL);
1132 	if (status != B_OK) {
1133 		cache->ReleaseRefAndUnlock();
1134 		return status;
1135 	}
1136 
1137 	cache->Unlock();
1138 	area->cache_type = CACHE_TYPE_RAM;
1139 	return area->id;
1140 }
1141 
1142 
1143 status_t
1144 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1145 {
1146 	AddressSpaceWriteLocker locker(team);
1147 	if (!locker.IsLocked())
1148 		return B_BAD_TEAM_ID;
1149 
1150 	VMAddressSpace* addressSpace = locker.AddressSpace();
1151 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1152 		addressSpace == VMAddressSpace::Kernel()
1153 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1154 }
1155 
1156 
1157 status_t
1158 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1159 	addr_t size, uint32 flags)
1160 {
1161 	if (size == 0)
1162 		return B_BAD_VALUE;
1163 
1164 	AddressSpaceWriteLocker locker(team);
1165 	if (!locker.IsLocked())
1166 		return B_BAD_TEAM_ID;
1167 
1168 	virtual_address_restrictions addressRestrictions = {};
1169 	addressRestrictions.address = *_address;
1170 	addressRestrictions.address_specification = addressSpec;
1171 	VMAddressSpace* addressSpace = locker.AddressSpace();
1172 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1173 		addressSpace == VMAddressSpace::Kernel()
1174 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1175 		_address);
1176 }
1177 
1178 
1179 area_id
1180 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1181 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1182 	const virtual_address_restrictions* virtualAddressRestrictions,
1183 	const physical_address_restrictions* physicalAddressRestrictions,
1184 	bool kernel, void** _address)
1185 {
1186 	VMArea* area;
1187 	VMCache* cache;
1188 	vm_page* page = NULL;
1189 	bool isStack = (protection & B_STACK_AREA) != 0;
1190 	page_num_t guardPages;
1191 	bool canOvercommit = false;
1192 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1193 		? VM_PAGE_ALLOC_CLEAR : 0;
1194 
1195 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1196 		team, name, size));
1197 
1198 	size = PAGE_ALIGN(size);
1199 	guardSize = PAGE_ALIGN(guardSize);
1200 	guardPages = guardSize / B_PAGE_SIZE;
1201 
1202 	if (size == 0 || size < guardSize)
1203 		return B_BAD_VALUE;
1204 	if (!arch_vm_supports_protection(protection))
1205 		return B_NOT_SUPPORTED;
1206 
1207 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1208 		canOvercommit = true;
1209 
1210 #ifdef DEBUG_KERNEL_STACKS
1211 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1212 		isStack = true;
1213 #endif
1214 
1215 	// check parameters
1216 	switch (virtualAddressRestrictions->address_specification) {
1217 		case B_ANY_ADDRESS:
1218 		case B_EXACT_ADDRESS:
1219 		case B_BASE_ADDRESS:
1220 		case B_ANY_KERNEL_ADDRESS:
1221 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1222 			break;
1223 
1224 		default:
1225 			return B_BAD_VALUE;
1226 	}
1227 
1228 	// If low or high physical address restrictions are given, we force
1229 	// B_CONTIGUOUS wiring, since only then we'll use
1230 	// vm_page_allocate_page_run() which deals with those restrictions.
1231 	if (physicalAddressRestrictions->low_address != 0
1232 		|| physicalAddressRestrictions->high_address != 0) {
1233 		wiring = B_CONTIGUOUS;
1234 	}
1235 
1236 	physical_address_restrictions stackPhysicalRestrictions;
1237 	bool doReserveMemory = false;
1238 	switch (wiring) {
1239 		case B_NO_LOCK:
1240 			break;
1241 		case B_FULL_LOCK:
1242 		case B_LAZY_LOCK:
1243 		case B_CONTIGUOUS:
1244 			doReserveMemory = true;
1245 			break;
1246 		case B_ALREADY_WIRED:
1247 			break;
1248 		case B_LOMEM:
1249 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1250 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1251 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1252 			wiring = B_CONTIGUOUS;
1253 			doReserveMemory = true;
1254 			break;
1255 		case B_32_BIT_FULL_LOCK:
1256 			if (B_HAIKU_PHYSICAL_BITS <= 32
1257 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1258 				wiring = B_FULL_LOCK;
1259 				doReserveMemory = true;
1260 				break;
1261 			}
1262 			// TODO: We don't really support this mode efficiently. Just fall
1263 			// through for now ...
1264 		case B_32_BIT_CONTIGUOUS:
1265 			#if B_HAIKU_PHYSICAL_BITS > 32
1266 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1267 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1268 					stackPhysicalRestrictions.high_address
1269 						= (phys_addr_t)1 << 32;
1270 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1271 				}
1272 			#endif
1273 			wiring = B_CONTIGUOUS;
1274 			doReserveMemory = true;
1275 			break;
1276 		default:
1277 			return B_BAD_VALUE;
1278 	}
1279 
1280 	// Optimization: For a single-page contiguous allocation without low/high
1281 	// memory restriction B_FULL_LOCK wiring suffices.
1282 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1283 		&& physicalAddressRestrictions->low_address == 0
1284 		&& physicalAddressRestrictions->high_address == 0) {
1285 		wiring = B_FULL_LOCK;
1286 	}
1287 
1288 	// For full lock or contiguous areas we're also going to map the pages and
1289 	// thus need to reserve pages for the mapping backend upfront.
1290 	addr_t reservedMapPages = 0;
1291 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1292 		AddressSpaceWriteLocker locker;
1293 		status_t status = locker.SetTo(team);
1294 		if (status != B_OK)
1295 			return status;
1296 
1297 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1298 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1299 	}
1300 
1301 	int priority;
1302 	if (team != VMAddressSpace::KernelID())
1303 		priority = VM_PRIORITY_USER;
1304 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1305 		priority = VM_PRIORITY_VIP;
1306 	else
1307 		priority = VM_PRIORITY_SYSTEM;
1308 
1309 	// Reserve memory before acquiring the address space lock. This reduces the
1310 	// chances of failure, since while holding the write lock to the address
1311 	// space (if it is the kernel address space that is), the low memory handler
1312 	// won't be able to free anything for us.
1313 	addr_t reservedMemory = 0;
1314 	if (doReserveMemory) {
1315 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1316 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1317 			return B_NO_MEMORY;
1318 		reservedMemory = size;
1319 		// TODO: We don't reserve the memory for the pages for the page
1320 		// directories/tables. We actually need to do since we currently don't
1321 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1322 		// there are actually less physical pages than there should be, which
1323 		// can get the VM into trouble in low memory situations.
1324 	}
1325 
1326 	AddressSpaceWriteLocker locker;
1327 	VMAddressSpace* addressSpace;
1328 	status_t status;
1329 
1330 	// For full lock areas reserve the pages before locking the address
1331 	// space. E.g. block caches can't release their memory while we hold the
1332 	// address space lock.
1333 	page_num_t reservedPages = reservedMapPages;
1334 	if (wiring == B_FULL_LOCK)
1335 		reservedPages += size / B_PAGE_SIZE;
1336 
1337 	vm_page_reservation reservation;
1338 	if (reservedPages > 0) {
1339 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1340 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1341 					priority)) {
1342 				reservedPages = 0;
1343 				status = B_WOULD_BLOCK;
1344 				goto err0;
1345 			}
1346 		} else
1347 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1348 	}
1349 
1350 	if (wiring == B_CONTIGUOUS) {
1351 		// we try to allocate the page run here upfront as this may easily
1352 		// fail for obvious reasons
1353 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1354 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1355 		if (page == NULL) {
1356 			status = B_NO_MEMORY;
1357 			goto err0;
1358 		}
1359 	}
1360 
1361 	// Lock the address space and, if B_EXACT_ADDRESS and
1362 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1363 	// is not wired.
1364 	do {
1365 		status = locker.SetTo(team);
1366 		if (status != B_OK)
1367 			goto err1;
1368 
1369 		addressSpace = locker.AddressSpace();
1370 	} while (virtualAddressRestrictions->address_specification
1371 			== B_EXACT_ADDRESS
1372 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1373 		&& wait_if_address_range_is_wired(addressSpace,
1374 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1375 
1376 	// create an anonymous cache
1377 	// if it's a stack, make sure that two pages are available at least
1378 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1379 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1380 		wiring == B_NO_LOCK, priority);
1381 	if (status != B_OK)
1382 		goto err1;
1383 
1384 	cache->temporary = 1;
1385 	cache->virtual_end = size;
1386 	cache->committed_size = reservedMemory;
1387 		// TODO: This should be done via a method.
1388 	reservedMemory = 0;
1389 
1390 	cache->Lock();
1391 
1392 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1393 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1394 		kernel, &area, _address);
1395 
1396 	if (status != B_OK) {
1397 		cache->ReleaseRefAndUnlock();
1398 		goto err1;
1399 	}
1400 
1401 	locker.DegradeToReadLock();
1402 
1403 	switch (wiring) {
1404 		case B_NO_LOCK:
1405 		case B_LAZY_LOCK:
1406 			// do nothing - the pages are mapped in as needed
1407 			break;
1408 
1409 		case B_FULL_LOCK:
1410 		{
1411 			// Allocate and map all pages for this area
1412 
1413 			off_t offset = 0;
1414 			for (addr_t address = area->Base();
1415 					address < area->Base() + (area->Size() - 1);
1416 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1417 #ifdef DEBUG_KERNEL_STACKS
1418 #	ifdef STACK_GROWS_DOWNWARDS
1419 				if (isStack && address < area->Base()
1420 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1421 #	else
1422 				if (isStack && address >= area->Base() + area->Size()
1423 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1424 #	endif
1425 					continue;
1426 #endif
1427 				vm_page* page = vm_page_allocate_page(&reservation,
1428 					PAGE_STATE_WIRED | pageAllocFlags);
1429 				cache->InsertPage(page, offset);
1430 				map_page(area, page, address, protection, &reservation);
1431 
1432 				DEBUG_PAGE_ACCESS_END(page);
1433 			}
1434 
1435 			break;
1436 		}
1437 
1438 		case B_ALREADY_WIRED:
1439 		{
1440 			// The pages should already be mapped. This is only really useful
1441 			// during boot time. Find the appropriate vm_page objects and stick
1442 			// them in the cache object.
1443 			VMTranslationMap* map = addressSpace->TranslationMap();
1444 			off_t offset = 0;
1445 
1446 			if (!gKernelStartup)
1447 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1448 
1449 			map->Lock();
1450 
1451 			for (addr_t virtualAddress = area->Base();
1452 					virtualAddress < area->Base() + (area->Size() - 1);
1453 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1454 				phys_addr_t physicalAddress;
1455 				uint32 flags;
1456 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1457 				if (status < B_OK) {
1458 					panic("looking up mapping failed for va 0x%lx\n",
1459 						virtualAddress);
1460 				}
1461 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1462 				if (page == NULL) {
1463 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1464 						"\n", physicalAddress);
1465 				}
1466 
1467 				DEBUG_PAGE_ACCESS_START(page);
1468 
1469 				cache->InsertPage(page, offset);
1470 				increment_page_wired_count(page);
1471 				vm_page_set_state(page, PAGE_STATE_WIRED);
1472 				page->busy = false;
1473 
1474 				DEBUG_PAGE_ACCESS_END(page);
1475 			}
1476 
1477 			map->Unlock();
1478 			break;
1479 		}
1480 
1481 		case B_CONTIGUOUS:
1482 		{
1483 			// We have already allocated our continuous pages run, so we can now
1484 			// just map them in the address space
1485 			VMTranslationMap* map = addressSpace->TranslationMap();
1486 			phys_addr_t physicalAddress
1487 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1488 			addr_t virtualAddress = area->Base();
1489 			off_t offset = 0;
1490 
1491 			map->Lock();
1492 
1493 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1494 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1495 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1496 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1497 				if (page == NULL)
1498 					panic("couldn't lookup physical page just allocated\n");
1499 
1500 				status = map->Map(virtualAddress, physicalAddress, protection,
1501 					area->MemoryType(), &reservation);
1502 				if (status < B_OK)
1503 					panic("couldn't map physical page in page run\n");
1504 
1505 				cache->InsertPage(page, offset);
1506 				increment_page_wired_count(page);
1507 
1508 				DEBUG_PAGE_ACCESS_END(page);
1509 			}
1510 
1511 			map->Unlock();
1512 			break;
1513 		}
1514 
1515 		default:
1516 			break;
1517 	}
1518 
1519 	cache->Unlock();
1520 
1521 	if (reservedPages > 0)
1522 		vm_page_unreserve_pages(&reservation);
1523 
1524 	TRACE(("vm_create_anonymous_area: done\n"));
1525 
1526 	area->cache_type = CACHE_TYPE_RAM;
1527 	return area->id;
1528 
1529 err1:
1530 	if (wiring == B_CONTIGUOUS) {
1531 		// we had reserved the area space upfront...
1532 		phys_addr_t pageNumber = page->physical_page_number;
1533 		int32 i;
1534 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1535 			page = vm_lookup_page(pageNumber);
1536 			if (page == NULL)
1537 				panic("couldn't lookup physical page just allocated\n");
1538 
1539 			vm_page_set_state(page, PAGE_STATE_FREE);
1540 		}
1541 	}
1542 
1543 err0:
1544 	if (reservedPages > 0)
1545 		vm_page_unreserve_pages(&reservation);
1546 	if (reservedMemory > 0)
1547 		vm_unreserve_memory(reservedMemory);
1548 
1549 	return status;
1550 }
1551 
1552 
1553 area_id
1554 vm_map_physical_memory(team_id team, const char* name, void** _address,
1555 	uint32 addressSpec, addr_t size, uint32 protection,
1556 	phys_addr_t physicalAddress, bool alreadyWired)
1557 {
1558 	VMArea* area;
1559 	VMCache* cache;
1560 	addr_t mapOffset;
1561 
1562 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1563 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1564 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1565 		addressSpec, size, protection, physicalAddress));
1566 
1567 	if (!arch_vm_supports_protection(protection))
1568 		return B_NOT_SUPPORTED;
1569 
1570 	AddressSpaceWriteLocker locker(team);
1571 	if (!locker.IsLocked())
1572 		return B_BAD_TEAM_ID;
1573 
1574 	// if the physical address is somewhat inside a page,
1575 	// move the actual area down to align on a page boundary
1576 	mapOffset = physicalAddress % B_PAGE_SIZE;
1577 	size += mapOffset;
1578 	physicalAddress -= mapOffset;
1579 
1580 	size = PAGE_ALIGN(size);
1581 
1582 	// create a device cache
1583 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1584 	if (status != B_OK)
1585 		return status;
1586 
1587 	cache->virtual_end = size;
1588 
1589 	cache->Lock();
1590 
1591 	virtual_address_restrictions addressRestrictions = {};
1592 	addressRestrictions.address = *_address;
1593 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1594 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1595 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1596 		true, &area, _address);
1597 
1598 	if (status < B_OK)
1599 		cache->ReleaseRefLocked();
1600 
1601 	cache->Unlock();
1602 
1603 	if (status == B_OK) {
1604 		// set requested memory type -- use uncached, if not given
1605 		uint32 memoryType = addressSpec & B_MTR_MASK;
1606 		if (memoryType == 0)
1607 			memoryType = B_MTR_UC;
1608 
1609 		area->SetMemoryType(memoryType);
1610 
1611 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1612 		if (status != B_OK)
1613 			delete_area(locker.AddressSpace(), area, false);
1614 	}
1615 
1616 	if (status != B_OK)
1617 		return status;
1618 
1619 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1620 
1621 	if (alreadyWired) {
1622 		// The area is already mapped, but possibly not with the right
1623 		// memory type.
1624 		map->Lock();
1625 		map->ProtectArea(area, area->protection);
1626 		map->Unlock();
1627 	} else {
1628 		// Map the area completely.
1629 
1630 		// reserve pages needed for the mapping
1631 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1632 			area->Base() + (size - 1));
1633 		vm_page_reservation reservation;
1634 		vm_page_reserve_pages(&reservation, reservePages,
1635 			team == VMAddressSpace::KernelID()
1636 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1637 
1638 		map->Lock();
1639 
1640 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1641 			map->Map(area->Base() + offset, physicalAddress + offset,
1642 				protection, area->MemoryType(), &reservation);
1643 		}
1644 
1645 		map->Unlock();
1646 
1647 		vm_page_unreserve_pages(&reservation);
1648 	}
1649 
1650 	// modify the pointer returned to be offset back into the new area
1651 	// the same way the physical address in was offset
1652 	*_address = (void*)((addr_t)*_address + mapOffset);
1653 
1654 	area->cache_type = CACHE_TYPE_DEVICE;
1655 	return area->id;
1656 }
1657 
1658 
1659 /*!	Don't use!
1660 	TODO: This function was introduced to map physical page vecs to
1661 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1662 	use a device cache and does not track vm_page::wired_count!
1663 */
1664 area_id
1665 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1666 	uint32 addressSpec, addr_t* _size, uint32 protection,
1667 	struct generic_io_vec* vecs, uint32 vecCount)
1668 {
1669 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1670 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1671 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1672 		addressSpec, _size, protection, vecs, vecCount));
1673 
1674 	if (!arch_vm_supports_protection(protection)
1675 		|| (addressSpec & B_MTR_MASK) != 0) {
1676 		return B_NOT_SUPPORTED;
1677 	}
1678 
1679 	AddressSpaceWriteLocker locker(team);
1680 	if (!locker.IsLocked())
1681 		return B_BAD_TEAM_ID;
1682 
1683 	if (vecCount == 0)
1684 		return B_BAD_VALUE;
1685 
1686 	addr_t size = 0;
1687 	for (uint32 i = 0; i < vecCount; i++) {
1688 		if (vecs[i].base % B_PAGE_SIZE != 0
1689 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1690 			return B_BAD_VALUE;
1691 		}
1692 
1693 		size += vecs[i].length;
1694 	}
1695 
1696 	// create a device cache
1697 	VMCache* cache;
1698 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1699 	if (result != B_OK)
1700 		return result;
1701 
1702 	cache->virtual_end = size;
1703 
1704 	cache->Lock();
1705 
1706 	VMArea* area;
1707 	virtual_address_restrictions addressRestrictions = {};
1708 	addressRestrictions.address = *_address;
1709 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1710 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1711 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1712 		&addressRestrictions, true, &area, _address);
1713 
1714 	if (result != B_OK)
1715 		cache->ReleaseRefLocked();
1716 
1717 	cache->Unlock();
1718 
1719 	if (result != B_OK)
1720 		return result;
1721 
1722 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1723 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1724 		area->Base() + (size - 1));
1725 
1726 	vm_page_reservation reservation;
1727 	vm_page_reserve_pages(&reservation, reservePages,
1728 			team == VMAddressSpace::KernelID()
1729 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1730 	map->Lock();
1731 
1732 	uint32 vecIndex = 0;
1733 	size_t vecOffset = 0;
1734 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1735 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1736 			vecOffset = 0;
1737 			vecIndex++;
1738 		}
1739 
1740 		if (vecIndex >= vecCount)
1741 			break;
1742 
1743 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1744 			protection, area->MemoryType(), &reservation);
1745 
1746 		vecOffset += B_PAGE_SIZE;
1747 	}
1748 
1749 	map->Unlock();
1750 	vm_page_unreserve_pages(&reservation);
1751 
1752 	if (_size != NULL)
1753 		*_size = size;
1754 
1755 	area->cache_type = CACHE_TYPE_DEVICE;
1756 	return area->id;
1757 }
1758 
1759 
1760 area_id
1761 vm_create_null_area(team_id team, const char* name, void** address,
1762 	uint32 addressSpec, addr_t size, uint32 flags)
1763 {
1764 	size = PAGE_ALIGN(size);
1765 
1766 	// Lock the address space and, if B_EXACT_ADDRESS and
1767 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1768 	// is not wired.
1769 	AddressSpaceWriteLocker locker;
1770 	do {
1771 		if (locker.SetTo(team) != B_OK)
1772 			return B_BAD_TEAM_ID;
1773 	} while (addressSpec == B_EXACT_ADDRESS
1774 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1775 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1776 			(addr_t)*address, size, &locker));
1777 
1778 	// create a null cache
1779 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1780 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1781 	VMCache* cache;
1782 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1783 	if (status != B_OK)
1784 		return status;
1785 
1786 	cache->temporary = 1;
1787 	cache->virtual_end = size;
1788 
1789 	cache->Lock();
1790 
1791 	VMArea* area;
1792 	virtual_address_restrictions addressRestrictions = {};
1793 	addressRestrictions.address = *address;
1794 	addressRestrictions.address_specification = addressSpec;
1795 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1796 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1797 		&addressRestrictions, true, &area, address);
1798 
1799 	if (status < B_OK) {
1800 		cache->ReleaseRefAndUnlock();
1801 		return status;
1802 	}
1803 
1804 	cache->Unlock();
1805 
1806 	area->cache_type = CACHE_TYPE_NULL;
1807 	return area->id;
1808 }
1809 
1810 
1811 /*!	Creates the vnode cache for the specified \a vnode.
1812 	The vnode has to be marked busy when calling this function.
1813 */
1814 status_t
1815 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1816 {
1817 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1818 }
1819 
1820 
1821 /*!	\a cache must be locked. The area's address space must be read-locked.
1822 */
1823 static void
1824 pre_map_area_pages(VMArea* area, VMCache* cache,
1825 	vm_page_reservation* reservation)
1826 {
1827 	addr_t baseAddress = area->Base();
1828 	addr_t cacheOffset = area->cache_offset;
1829 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1830 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1831 
1832 	for (VMCachePagesTree::Iterator it
1833 				= cache->pages.GetIterator(firstPage, true, true);
1834 			vm_page* page = it.Next();) {
1835 		if (page->cache_offset >= endPage)
1836 			break;
1837 
1838 		// skip busy and inactive pages
1839 		if (page->busy || page->usage_count == 0)
1840 			continue;
1841 
1842 		DEBUG_PAGE_ACCESS_START(page);
1843 		map_page(area, page,
1844 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1845 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1846 		DEBUG_PAGE_ACCESS_END(page);
1847 	}
1848 }
1849 
1850 
1851 /*!	Will map the file specified by \a fd to an area in memory.
1852 	The file will be mirrored beginning at the specified \a offset. The
1853 	\a offset and \a size arguments have to be page aligned.
1854 */
1855 static area_id
1856 _vm_map_file(team_id team, const char* name, void** _address,
1857 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1858 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1859 {
1860 	// TODO: for binary files, we want to make sure that they get the
1861 	//	copy of a file at a given time, ie. later changes should not
1862 	//	make it into the mapped copy -- this will need quite some changes
1863 	//	to be done in a nice way
1864 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1865 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1866 
1867 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1868 	size = PAGE_ALIGN(size);
1869 
1870 	if (mapping == REGION_NO_PRIVATE_MAP)
1871 		protection |= B_SHARED_AREA;
1872 	if (addressSpec != B_EXACT_ADDRESS)
1873 		unmapAddressRange = false;
1874 
1875 	if (fd < 0) {
1876 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1877 		virtual_address_restrictions virtualRestrictions = {};
1878 		virtualRestrictions.address = *_address;
1879 		virtualRestrictions.address_specification = addressSpec;
1880 		physical_address_restrictions physicalRestrictions = {};
1881 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1882 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1883 			_address);
1884 	}
1885 
1886 	// get the open flags of the FD
1887 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1888 	if (descriptor == NULL)
1889 		return EBADF;
1890 	int32 openMode = descriptor->open_mode;
1891 	put_fd(descriptor);
1892 
1893 	// The FD must open for reading at any rate. For shared mapping with write
1894 	// access, additionally the FD must be open for writing.
1895 	if ((openMode & O_ACCMODE) == O_WRONLY
1896 		|| (mapping == REGION_NO_PRIVATE_MAP
1897 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1898 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1899 		return EACCES;
1900 	}
1901 
1902 	// get the vnode for the object, this also grabs a ref to it
1903 	struct vnode* vnode = NULL;
1904 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1905 	if (status < B_OK)
1906 		return status;
1907 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1908 
1909 	// If we're going to pre-map pages, we need to reserve the pages needed by
1910 	// the mapping backend upfront.
1911 	page_num_t reservedPreMapPages = 0;
1912 	vm_page_reservation reservation;
1913 	if ((protection & B_READ_AREA) != 0) {
1914 		AddressSpaceWriteLocker locker;
1915 		status = locker.SetTo(team);
1916 		if (status != B_OK)
1917 			return status;
1918 
1919 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1920 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1921 
1922 		locker.Unlock();
1923 
1924 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1925 			team == VMAddressSpace::KernelID()
1926 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1927 	}
1928 
1929 	struct PageUnreserver {
1930 		PageUnreserver(vm_page_reservation* reservation)
1931 			:
1932 			fReservation(reservation)
1933 		{
1934 		}
1935 
1936 		~PageUnreserver()
1937 		{
1938 			if (fReservation != NULL)
1939 				vm_page_unreserve_pages(fReservation);
1940 		}
1941 
1942 		vm_page_reservation* fReservation;
1943 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1944 
1945 	// Lock the address space and, if the specified address range shall be
1946 	// unmapped, ensure it is not wired.
1947 	AddressSpaceWriteLocker locker;
1948 	do {
1949 		if (locker.SetTo(team) != B_OK)
1950 			return B_BAD_TEAM_ID;
1951 	} while (unmapAddressRange
1952 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1953 			(addr_t)*_address, size, &locker));
1954 
1955 	// TODO: this only works for file systems that use the file cache
1956 	VMCache* cache;
1957 	status = vfs_get_vnode_cache(vnode, &cache, false);
1958 	if (status < B_OK)
1959 		return status;
1960 
1961 	cache->Lock();
1962 
1963 	VMArea* area;
1964 	virtual_address_restrictions addressRestrictions = {};
1965 	addressRestrictions.address = *_address;
1966 	addressRestrictions.address_specification = addressSpec;
1967 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1968 		0, protection, mapping,
1969 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1970 		&addressRestrictions, kernel, &area, _address);
1971 
1972 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1973 		// map_backing_store() cannot know we no longer need the ref
1974 		cache->ReleaseRefLocked();
1975 	}
1976 
1977 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1978 		pre_map_area_pages(area, cache, &reservation);
1979 
1980 	cache->Unlock();
1981 
1982 	if (status == B_OK) {
1983 		// TODO: this probably deserves a smarter solution, ie. don't always
1984 		// prefetch stuff, and also, probably don't trigger it at this place.
1985 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1986 			// prefetches at max 10 MB starting from "offset"
1987 	}
1988 
1989 	if (status != B_OK)
1990 		return status;
1991 
1992 	area->cache_type = CACHE_TYPE_VNODE;
1993 	return area->id;
1994 }
1995 
1996 
1997 area_id
1998 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1999 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2000 	int fd, off_t offset)
2001 {
2002 	if (!arch_vm_supports_protection(protection))
2003 		return B_NOT_SUPPORTED;
2004 
2005 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2006 		mapping, unmapAddressRange, fd, offset, true);
2007 }
2008 
2009 
2010 VMCache*
2011 vm_area_get_locked_cache(VMArea* area)
2012 {
2013 	rw_lock_read_lock(&sAreaCacheLock);
2014 
2015 	while (true) {
2016 		VMCache* cache = area->cache;
2017 
2018 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2019 			// cache has been deleted
2020 			rw_lock_read_lock(&sAreaCacheLock);
2021 			continue;
2022 		}
2023 
2024 		rw_lock_read_lock(&sAreaCacheLock);
2025 
2026 		if (cache == area->cache) {
2027 			cache->AcquireRefLocked();
2028 			rw_lock_read_unlock(&sAreaCacheLock);
2029 			return cache;
2030 		}
2031 
2032 		// the cache changed in the meantime
2033 		cache->Unlock();
2034 	}
2035 }
2036 
2037 
2038 void
2039 vm_area_put_locked_cache(VMCache* cache)
2040 {
2041 	cache->ReleaseRefAndUnlock();
2042 }
2043 
2044 
2045 area_id
2046 vm_clone_area(team_id team, const char* name, void** address,
2047 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2048 	bool kernel)
2049 {
2050 	VMArea* newArea = NULL;
2051 	VMArea* sourceArea;
2052 
2053 	// Check whether the source area exists and is cloneable. If so, mark it
2054 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2055 	{
2056 		AddressSpaceWriteLocker locker;
2057 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2058 		if (status != B_OK)
2059 			return status;
2060 
2061 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2062 			return B_NOT_ALLOWED;
2063 
2064 		sourceArea->protection |= B_SHARED_AREA;
2065 		protection |= B_SHARED_AREA;
2066 	}
2067 
2068 	// Now lock both address spaces and actually do the cloning.
2069 
2070 	MultiAddressSpaceLocker locker;
2071 	VMAddressSpace* sourceAddressSpace;
2072 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2073 	if (status != B_OK)
2074 		return status;
2075 
2076 	VMAddressSpace* targetAddressSpace;
2077 	status = locker.AddTeam(team, true, &targetAddressSpace);
2078 	if (status != B_OK)
2079 		return status;
2080 
2081 	status = locker.Lock();
2082 	if (status != B_OK)
2083 		return status;
2084 
2085 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2086 	if (sourceArea == NULL)
2087 		return B_BAD_VALUE;
2088 
2089 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2090 		return B_NOT_ALLOWED;
2091 
2092 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2093 
2094 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2095 	//	have been adapted. Maybe it should be part of the kernel settings,
2096 	//	anyway (so that old drivers can always work).
2097 #if 0
2098 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2099 		&& addressSpace != VMAddressSpace::Kernel()
2100 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2101 		// kernel areas must not be cloned in userland, unless explicitly
2102 		// declared user-cloneable upon construction
2103 		status = B_NOT_ALLOWED;
2104 	} else
2105 #endif
2106 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2107 		status = B_NOT_ALLOWED;
2108 	else {
2109 		virtual_address_restrictions addressRestrictions = {};
2110 		addressRestrictions.address = *address;
2111 		addressRestrictions.address_specification = addressSpec;
2112 		status = map_backing_store(targetAddressSpace, cache,
2113 			sourceArea->cache_offset, name, sourceArea->Size(),
2114 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2115 			kernel, &newArea, address);
2116 	}
2117 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2118 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2119 		// to create a new cache, and has therefore already acquired a reference
2120 		// to the source cache - but otherwise it has no idea that we need
2121 		// one.
2122 		cache->AcquireRefLocked();
2123 	}
2124 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2125 		// we need to map in everything at this point
2126 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2127 			// we don't have actual pages to map but a physical area
2128 			VMTranslationMap* map
2129 				= sourceArea->address_space->TranslationMap();
2130 			map->Lock();
2131 
2132 			phys_addr_t physicalAddress;
2133 			uint32 oldProtection;
2134 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2135 
2136 			map->Unlock();
2137 
2138 			map = targetAddressSpace->TranslationMap();
2139 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2140 				newArea->Base() + (newArea->Size() - 1));
2141 
2142 			vm_page_reservation reservation;
2143 			vm_page_reserve_pages(&reservation, reservePages,
2144 				targetAddressSpace == VMAddressSpace::Kernel()
2145 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2146 			map->Lock();
2147 
2148 			for (addr_t offset = 0; offset < newArea->Size();
2149 					offset += B_PAGE_SIZE) {
2150 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2151 					protection, newArea->MemoryType(), &reservation);
2152 			}
2153 
2154 			map->Unlock();
2155 			vm_page_unreserve_pages(&reservation);
2156 		} else {
2157 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2158 			size_t reservePages = map->MaxPagesNeededToMap(
2159 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2160 			vm_page_reservation reservation;
2161 			vm_page_reserve_pages(&reservation, reservePages,
2162 				targetAddressSpace == VMAddressSpace::Kernel()
2163 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2164 
2165 			// map in all pages from source
2166 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2167 					vm_page* page  = it.Next();) {
2168 				if (!page->busy) {
2169 					DEBUG_PAGE_ACCESS_START(page);
2170 					map_page(newArea, page,
2171 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2172 							- newArea->cache_offset),
2173 						protection, &reservation);
2174 					DEBUG_PAGE_ACCESS_END(page);
2175 				}
2176 			}
2177 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2178 			// ensuring that!
2179 
2180 			vm_page_unreserve_pages(&reservation);
2181 		}
2182 	}
2183 	if (status == B_OK)
2184 		newArea->cache_type = sourceArea->cache_type;
2185 
2186 	vm_area_put_locked_cache(cache);
2187 
2188 	if (status < B_OK)
2189 		return status;
2190 
2191 	return newArea->id;
2192 }
2193 
2194 
2195 /*!	Deletes the specified area of the given address space.
2196 
2197 	The address space must be write-locked.
2198 	The caller must ensure that the area does not have any wired ranges.
2199 
2200 	\param addressSpace The address space containing the area.
2201 	\param area The area to be deleted.
2202 	\param deletingAddressSpace \c true, if the address space is in the process
2203 		of being deleted.
2204 */
2205 static void
2206 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2207 	bool deletingAddressSpace)
2208 {
2209 	ASSERT(!area->IsWired());
2210 
2211 	VMAreaHash::Remove(area);
2212 
2213 	// At this point the area is removed from the global hash table, but
2214 	// still exists in the area list.
2215 
2216 	// Unmap the virtual address space the area occupied.
2217 	{
2218 		// We need to lock the complete cache chain.
2219 		VMCache* topCache = vm_area_get_locked_cache(area);
2220 		VMCacheChainLocker cacheChainLocker(topCache);
2221 		cacheChainLocker.LockAllSourceCaches();
2222 
2223 		// If the area's top cache is a temporary cache and the area is the only
2224 		// one referencing it (besides us currently holding a second reference),
2225 		// the unmapping code doesn't need to care about preserving the accessed
2226 		// and dirty flags of the top cache page mappings.
2227 		bool ignoreTopCachePageFlags
2228 			= topCache->temporary && topCache->RefCount() == 2;
2229 
2230 		area->address_space->TranslationMap()->UnmapArea(area,
2231 			deletingAddressSpace, ignoreTopCachePageFlags);
2232 	}
2233 
2234 	if (!area->cache->temporary)
2235 		area->cache->WriteModified();
2236 
2237 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2238 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2239 
2240 	arch_vm_unset_memory_type(area);
2241 	addressSpace->RemoveArea(area, allocationFlags);
2242 	addressSpace->Put();
2243 
2244 	area->cache->RemoveArea(area);
2245 	area->cache->ReleaseRef();
2246 
2247 	addressSpace->DeleteArea(area, allocationFlags);
2248 }
2249 
2250 
2251 status_t
2252 vm_delete_area(team_id team, area_id id, bool kernel)
2253 {
2254 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2255 		team, id));
2256 
2257 	// lock the address space and make sure the area isn't wired
2258 	AddressSpaceWriteLocker locker;
2259 	VMArea* area;
2260 	AreaCacheLocker cacheLocker;
2261 
2262 	do {
2263 		status_t status = locker.SetFromArea(team, id, area);
2264 		if (status != B_OK)
2265 			return status;
2266 
2267 		cacheLocker.SetTo(area);
2268 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2269 
2270 	cacheLocker.Unlock();
2271 
2272 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2273 		return B_NOT_ALLOWED;
2274 
2275 	delete_area(locker.AddressSpace(), area, false);
2276 	return B_OK;
2277 }
2278 
2279 
2280 /*!	Creates a new cache on top of given cache, moves all areas from
2281 	the old cache to the new one, and changes the protection of all affected
2282 	areas' pages to read-only. If requested, wired pages are moved up to the
2283 	new cache and copies are added to the old cache in their place.
2284 	Preconditions:
2285 	- The given cache must be locked.
2286 	- All of the cache's areas' address spaces must be read locked.
2287 	- Either the cache must not have any wired ranges or a page reservation for
2288 	  all wired pages must be provided, so they can be copied.
2289 
2290 	\param lowerCache The cache on top of which a new cache shall be created.
2291 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2292 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2293 		has wired page. The wired pages are copied in this case.
2294 */
2295 static status_t
2296 vm_copy_on_write_area(VMCache* lowerCache,
2297 	vm_page_reservation* wiredPagesReservation)
2298 {
2299 	VMCache* upperCache;
2300 
2301 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2302 
2303 	// We need to separate the cache from its areas. The cache goes one level
2304 	// deeper and we create a new cache inbetween.
2305 
2306 	// create an anonymous cache
2307 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2308 		lowerCache->GuardSize() / B_PAGE_SIZE,
2309 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2310 		VM_PRIORITY_USER);
2311 	if (status != B_OK)
2312 		return status;
2313 
2314 	upperCache->Lock();
2315 
2316 	upperCache->temporary = 1;
2317 	upperCache->virtual_base = lowerCache->virtual_base;
2318 	upperCache->virtual_end = lowerCache->virtual_end;
2319 
2320 	// transfer the lower cache areas to the upper cache
2321 	rw_lock_write_lock(&sAreaCacheLock);
2322 	upperCache->TransferAreas(lowerCache);
2323 	rw_lock_write_unlock(&sAreaCacheLock);
2324 
2325 	lowerCache->AddConsumer(upperCache);
2326 
2327 	// We now need to remap all pages from all of the cache's areas read-only,
2328 	// so that a copy will be created on next write access. If there are wired
2329 	// pages, we keep their protection, move them to the upper cache and create
2330 	// copies for the lower cache.
2331 	if (wiredPagesReservation != NULL) {
2332 		// We need to handle wired pages -- iterate through the cache's pages.
2333 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2334 				vm_page* page = it.Next();) {
2335 			if (page->WiredCount() > 0) {
2336 				// allocate a new page and copy the wired one
2337 				vm_page* copiedPage = vm_page_allocate_page(
2338 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2339 
2340 				vm_memcpy_physical_page(
2341 					copiedPage->physical_page_number * B_PAGE_SIZE,
2342 					page->physical_page_number * B_PAGE_SIZE);
2343 
2344 				// move the wired page to the upper cache (note: removing is OK
2345 				// with the SplayTree iterator) and insert the copy
2346 				upperCache->MovePage(page);
2347 				lowerCache->InsertPage(copiedPage,
2348 					page->cache_offset * B_PAGE_SIZE);
2349 
2350 				DEBUG_PAGE_ACCESS_END(copiedPage);
2351 			} else {
2352 				// Change the protection of this page in all areas.
2353 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2354 						tempArea = tempArea->cache_next) {
2355 					// The area must be readable in the same way it was
2356 					// previously writable.
2357 					uint32 protection = B_KERNEL_READ_AREA;
2358 					if ((tempArea->protection & B_READ_AREA) != 0)
2359 						protection |= B_READ_AREA;
2360 
2361 					VMTranslationMap* map
2362 						= tempArea->address_space->TranslationMap();
2363 					map->Lock();
2364 					map->ProtectPage(tempArea,
2365 						virtual_page_address(tempArea, page), protection);
2366 					map->Unlock();
2367 				}
2368 			}
2369 		}
2370 	} else {
2371 		ASSERT(lowerCache->WiredPagesCount() == 0);
2372 
2373 		// just change the protection of all areas
2374 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2375 				tempArea = tempArea->cache_next) {
2376 			// The area must be readable in the same way it was previously
2377 			// writable.
2378 			uint32 protection = B_KERNEL_READ_AREA;
2379 			if ((tempArea->protection & B_READ_AREA) != 0)
2380 				protection |= B_READ_AREA;
2381 
2382 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2383 			map->Lock();
2384 			map->ProtectArea(tempArea, protection);
2385 			map->Unlock();
2386 		}
2387 	}
2388 
2389 	vm_area_put_locked_cache(upperCache);
2390 
2391 	return B_OK;
2392 }
2393 
2394 
2395 area_id
2396 vm_copy_area(team_id team, const char* name, void** _address,
2397 	uint32 addressSpec, uint32 protection, area_id sourceID)
2398 {
2399 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2400 
2401 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2402 		// set the same protection for the kernel as for userland
2403 		protection |= B_KERNEL_READ_AREA;
2404 		if (writableCopy)
2405 			protection |= B_KERNEL_WRITE_AREA;
2406 	}
2407 
2408 	// Do the locking: target address space, all address spaces associated with
2409 	// the source cache, and the cache itself.
2410 	MultiAddressSpaceLocker locker;
2411 	VMAddressSpace* targetAddressSpace;
2412 	VMCache* cache;
2413 	VMArea* source;
2414 	AreaCacheLocker cacheLocker;
2415 	status_t status;
2416 	bool sharedArea;
2417 
2418 	page_num_t wiredPages = 0;
2419 	vm_page_reservation wiredPagesReservation;
2420 
2421 	bool restart;
2422 	do {
2423 		restart = false;
2424 
2425 		locker.Unset();
2426 		status = locker.AddTeam(team, true, &targetAddressSpace);
2427 		if (status == B_OK) {
2428 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2429 				&cache);
2430 		}
2431 		if (status != B_OK)
2432 			return status;
2433 
2434 		cacheLocker.SetTo(cache, true);	// already locked
2435 
2436 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2437 
2438 		page_num_t oldWiredPages = wiredPages;
2439 		wiredPages = 0;
2440 
2441 		// If the source area isn't shared, count the number of wired pages in
2442 		// the cache and reserve as many pages.
2443 		if (!sharedArea) {
2444 			wiredPages = cache->WiredPagesCount();
2445 
2446 			if (wiredPages > oldWiredPages) {
2447 				cacheLocker.Unlock();
2448 				locker.Unlock();
2449 
2450 				if (oldWiredPages > 0)
2451 					vm_page_unreserve_pages(&wiredPagesReservation);
2452 
2453 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2454 					VM_PRIORITY_USER);
2455 
2456 				restart = true;
2457 			}
2458 		} else if (oldWiredPages > 0)
2459 			vm_page_unreserve_pages(&wiredPagesReservation);
2460 	} while (restart);
2461 
2462 	// unreserve pages later
2463 	struct PagesUnreserver {
2464 		PagesUnreserver(vm_page_reservation* reservation)
2465 			:
2466 			fReservation(reservation)
2467 		{
2468 		}
2469 
2470 		~PagesUnreserver()
2471 		{
2472 			if (fReservation != NULL)
2473 				vm_page_unreserve_pages(fReservation);
2474 		}
2475 
2476 	private:
2477 		vm_page_reservation*	fReservation;
2478 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2479 
2480 	if (addressSpec == B_CLONE_ADDRESS) {
2481 		addressSpec = B_EXACT_ADDRESS;
2482 		*_address = (void*)source->Base();
2483 	}
2484 
2485 	// First, create a cache on top of the source area, respectively use the
2486 	// existing one, if this is a shared area.
2487 
2488 	VMArea* target;
2489 	virtual_address_restrictions addressRestrictions = {};
2490 	addressRestrictions.address = *_address;
2491 	addressRestrictions.address_specification = addressSpec;
2492 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2493 		name, source->Size(), source->wiring, protection,
2494 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2495 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2496 		&addressRestrictions, true, &target, _address);
2497 	if (status < B_OK)
2498 		return status;
2499 
2500 	if (sharedArea) {
2501 		// The new area uses the old area's cache, but map_backing_store()
2502 		// hasn't acquired a ref. So we have to do that now.
2503 		cache->AcquireRefLocked();
2504 	}
2505 
2506 	// If the source area is writable, we need to move it one layer up as well
2507 
2508 	if (!sharedArea) {
2509 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2510 			// TODO: do something more useful if this fails!
2511 			if (vm_copy_on_write_area(cache,
2512 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2513 				panic("vm_copy_on_write_area() failed!\n");
2514 			}
2515 		}
2516 	}
2517 
2518 	// we return the ID of the newly created area
2519 	return target->id;
2520 }
2521 
2522 
2523 static status_t
2524 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2525 	bool kernel)
2526 {
2527 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2528 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2529 
2530 	if (!arch_vm_supports_protection(newProtection))
2531 		return B_NOT_SUPPORTED;
2532 
2533 	bool becomesWritable
2534 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2535 
2536 	// lock address spaces and cache
2537 	MultiAddressSpaceLocker locker;
2538 	VMCache* cache;
2539 	VMArea* area;
2540 	status_t status;
2541 	AreaCacheLocker cacheLocker;
2542 	bool isWritable;
2543 
2544 	bool restart;
2545 	do {
2546 		restart = false;
2547 
2548 		locker.Unset();
2549 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2550 		if (status != B_OK)
2551 			return status;
2552 
2553 		cacheLocker.SetTo(cache, true);	// already locked
2554 
2555 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2556 			return B_NOT_ALLOWED;
2557 
2558 		if (area->protection == newProtection)
2559 			return B_OK;
2560 
2561 		if (team != VMAddressSpace::KernelID()
2562 			&& area->address_space->ID() != team) {
2563 			// unless you're the kernel, you are only allowed to set
2564 			// the protection of your own areas
2565 			return B_NOT_ALLOWED;
2566 		}
2567 
2568 		isWritable
2569 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2570 
2571 		// Make sure the area (respectively, if we're going to call
2572 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2573 		// wired ranges.
2574 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2575 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2576 					otherArea = otherArea->cache_next) {
2577 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2578 					restart = true;
2579 					break;
2580 				}
2581 			}
2582 		} else {
2583 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2584 				restart = true;
2585 		}
2586 	} while (restart);
2587 
2588 	bool changePageProtection = true;
2589 	bool changeTopCachePagesOnly = false;
2590 
2591 	if (isWritable && !becomesWritable) {
2592 		// writable -> !writable
2593 
2594 		if (cache->source != NULL && cache->temporary) {
2595 			if (cache->CountWritableAreas(area) == 0) {
2596 				// Since this cache now lives from the pages in its source cache,
2597 				// we can change the cache's commitment to take only those pages
2598 				// into account that really are in this cache.
2599 
2600 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2601 					team == VMAddressSpace::KernelID()
2602 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2603 
2604 				// TODO: we may be able to join with our source cache, if
2605 				// count == 0
2606 			}
2607 		}
2608 
2609 		// If only the writability changes, we can just remap the pages of the
2610 		// top cache, since the pages of lower caches are mapped read-only
2611 		// anyway. That's advantageous only, if the number of pages in the cache
2612 		// is significantly smaller than the number of pages in the area,
2613 		// though.
2614 		if (newProtection
2615 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2616 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2617 			changeTopCachePagesOnly = true;
2618 		}
2619 	} else if (!isWritable && becomesWritable) {
2620 		// !writable -> writable
2621 
2622 		if (!cache->consumers.IsEmpty()) {
2623 			// There are consumers -- we have to insert a new cache. Fortunately
2624 			// vm_copy_on_write_area() does everything that's needed.
2625 			changePageProtection = false;
2626 			status = vm_copy_on_write_area(cache, NULL);
2627 		} else {
2628 			// No consumers, so we don't need to insert a new one.
2629 			if (cache->source != NULL && cache->temporary) {
2630 				// the cache's commitment must contain all possible pages
2631 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2632 					team == VMAddressSpace::KernelID()
2633 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2634 			}
2635 
2636 			if (status == B_OK && cache->source != NULL) {
2637 				// There's a source cache, hence we can't just change all pages'
2638 				// protection or we might allow writing into pages belonging to
2639 				// a lower cache.
2640 				changeTopCachePagesOnly = true;
2641 			}
2642 		}
2643 	} else {
2644 		// we don't have anything special to do in all other cases
2645 	}
2646 
2647 	if (status == B_OK) {
2648 		// remap existing pages in this cache
2649 		if (changePageProtection) {
2650 			VMTranslationMap* map = area->address_space->TranslationMap();
2651 			map->Lock();
2652 
2653 			if (changeTopCachePagesOnly) {
2654 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2655 				page_num_t lastPageOffset
2656 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2657 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2658 						vm_page* page = it.Next();) {
2659 					if (page->cache_offset >= firstPageOffset
2660 						&& page->cache_offset <= lastPageOffset) {
2661 						addr_t address = virtual_page_address(area, page);
2662 						map->ProtectPage(area, address, newProtection);
2663 					}
2664 				}
2665 			} else
2666 				map->ProtectArea(area, newProtection);
2667 
2668 			map->Unlock();
2669 		}
2670 
2671 		area->protection = newProtection;
2672 	}
2673 
2674 	return status;
2675 }
2676 
2677 
2678 status_t
2679 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2680 {
2681 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2682 	if (addressSpace == NULL)
2683 		return B_BAD_TEAM_ID;
2684 
2685 	VMTranslationMap* map = addressSpace->TranslationMap();
2686 
2687 	map->Lock();
2688 	uint32 dummyFlags;
2689 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2690 	map->Unlock();
2691 
2692 	addressSpace->Put();
2693 	return status;
2694 }
2695 
2696 
2697 /*!	The page's cache must be locked.
2698 */
2699 bool
2700 vm_test_map_modification(vm_page* page)
2701 {
2702 	if (page->modified)
2703 		return true;
2704 
2705 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2706 	vm_page_mapping* mapping;
2707 	while ((mapping = iterator.Next()) != NULL) {
2708 		VMArea* area = mapping->area;
2709 		VMTranslationMap* map = area->address_space->TranslationMap();
2710 
2711 		phys_addr_t physicalAddress;
2712 		uint32 flags;
2713 		map->Lock();
2714 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2715 		map->Unlock();
2716 
2717 		if ((flags & PAGE_MODIFIED) != 0)
2718 			return true;
2719 	}
2720 
2721 	return false;
2722 }
2723 
2724 
2725 /*!	The page's cache must be locked.
2726 */
2727 void
2728 vm_clear_map_flags(vm_page* page, uint32 flags)
2729 {
2730 	if ((flags & PAGE_ACCESSED) != 0)
2731 		page->accessed = false;
2732 	if ((flags & PAGE_MODIFIED) != 0)
2733 		page->modified = false;
2734 
2735 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2736 	vm_page_mapping* mapping;
2737 	while ((mapping = iterator.Next()) != NULL) {
2738 		VMArea* area = mapping->area;
2739 		VMTranslationMap* map = area->address_space->TranslationMap();
2740 
2741 		map->Lock();
2742 		map->ClearFlags(virtual_page_address(area, page), flags);
2743 		map->Unlock();
2744 	}
2745 }
2746 
2747 
2748 /*!	Removes all mappings from a page.
2749 	After you've called this function, the page is unmapped from memory and
2750 	the page's \c accessed and \c modified flags have been updated according
2751 	to the state of the mappings.
2752 	The page's cache must be locked.
2753 */
2754 void
2755 vm_remove_all_page_mappings(vm_page* page)
2756 {
2757 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2758 		VMArea* area = mapping->area;
2759 		VMTranslationMap* map = area->address_space->TranslationMap();
2760 		addr_t address = virtual_page_address(area, page);
2761 		map->UnmapPage(area, address, false);
2762 	}
2763 }
2764 
2765 
2766 int32
2767 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2768 {
2769 	int32 count = 0;
2770 
2771 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2772 	vm_page_mapping* mapping;
2773 	while ((mapping = iterator.Next()) != NULL) {
2774 		VMArea* area = mapping->area;
2775 		VMTranslationMap* map = area->address_space->TranslationMap();
2776 
2777 		bool modified;
2778 		if (map->ClearAccessedAndModified(area,
2779 				virtual_page_address(area, page), false, modified)) {
2780 			count++;
2781 		}
2782 
2783 		page->modified |= modified;
2784 	}
2785 
2786 
2787 	if (page->accessed) {
2788 		count++;
2789 		page->accessed = false;
2790 	}
2791 
2792 	return count;
2793 }
2794 
2795 
2796 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2797 	mappings.
2798 	The function iterates through the page mappings and removes them until
2799 	encountering one that has been accessed. From then on it will continue to
2800 	iterate, but only clear the accessed flag of the mapping. The page's
2801 	\c modified bit will be updated accordingly, the \c accessed bit will be
2802 	cleared.
2803 	\return The number of mapping accessed bits encountered, including the
2804 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2805 		of the page have been removed.
2806 */
2807 int32
2808 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2809 {
2810 	ASSERT(page->WiredCount() == 0);
2811 
2812 	if (page->accessed)
2813 		return vm_clear_page_mapping_accessed_flags(page);
2814 
2815 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2816 		VMArea* area = mapping->area;
2817 		VMTranslationMap* map = area->address_space->TranslationMap();
2818 		addr_t address = virtual_page_address(area, page);
2819 		bool modified = false;
2820 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2821 			page->accessed = true;
2822 			page->modified |= modified;
2823 			return vm_clear_page_mapping_accessed_flags(page);
2824 		}
2825 		page->modified |= modified;
2826 	}
2827 
2828 	return 0;
2829 }
2830 
2831 
2832 static int
2833 display_mem(int argc, char** argv)
2834 {
2835 	bool physical = false;
2836 	addr_t copyAddress;
2837 	int32 displayWidth;
2838 	int32 itemSize;
2839 	int32 num = -1;
2840 	addr_t address;
2841 	int i = 1, j;
2842 
2843 	if (argc > 1 && argv[1][0] == '-') {
2844 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2845 			physical = true;
2846 			i++;
2847 		} else
2848 			i = 99;
2849 	}
2850 
2851 	if (argc < i + 1 || argc > i + 2) {
2852 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2853 			"\tdl - 8 bytes\n"
2854 			"\tdw - 4 bytes\n"
2855 			"\tds - 2 bytes\n"
2856 			"\tdb - 1 byte\n"
2857 			"\tstring - a whole string\n"
2858 			"  -p or --physical only allows memory from a single page to be "
2859 			"displayed.\n");
2860 		return 0;
2861 	}
2862 
2863 	address = parse_expression(argv[i]);
2864 
2865 	if (argc > i + 1)
2866 		num = parse_expression(argv[i + 1]);
2867 
2868 	// build the format string
2869 	if (strcmp(argv[0], "db") == 0) {
2870 		itemSize = 1;
2871 		displayWidth = 16;
2872 	} else if (strcmp(argv[0], "ds") == 0) {
2873 		itemSize = 2;
2874 		displayWidth = 8;
2875 	} else if (strcmp(argv[0], "dw") == 0) {
2876 		itemSize = 4;
2877 		displayWidth = 4;
2878 	} else if (strcmp(argv[0], "dl") == 0) {
2879 		itemSize = 8;
2880 		displayWidth = 2;
2881 	} else if (strcmp(argv[0], "string") == 0) {
2882 		itemSize = 1;
2883 		displayWidth = -1;
2884 	} else {
2885 		kprintf("display_mem called in an invalid way!\n");
2886 		return 0;
2887 	}
2888 
2889 	if (num <= 0)
2890 		num = displayWidth;
2891 
2892 	void* physicalPageHandle = NULL;
2893 
2894 	if (physical) {
2895 		int32 offset = address & (B_PAGE_SIZE - 1);
2896 		if (num * itemSize + offset > B_PAGE_SIZE) {
2897 			num = (B_PAGE_SIZE - offset) / itemSize;
2898 			kprintf("NOTE: number of bytes has been cut to page size\n");
2899 		}
2900 
2901 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2902 
2903 		if (vm_get_physical_page_debug(address, &copyAddress,
2904 				&physicalPageHandle) != B_OK) {
2905 			kprintf("getting the hardware page failed.");
2906 			return 0;
2907 		}
2908 
2909 		address += offset;
2910 		copyAddress += offset;
2911 	} else
2912 		copyAddress = address;
2913 
2914 	if (!strcmp(argv[0], "string")) {
2915 		kprintf("%p \"", (char*)copyAddress);
2916 
2917 		// string mode
2918 		for (i = 0; true; i++) {
2919 			char c;
2920 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2921 					!= B_OK
2922 				|| c == '\0') {
2923 				break;
2924 			}
2925 
2926 			if (c == '\n')
2927 				kprintf("\\n");
2928 			else if (c == '\t')
2929 				kprintf("\\t");
2930 			else {
2931 				if (!isprint(c))
2932 					c = '.';
2933 
2934 				kprintf("%c", c);
2935 			}
2936 		}
2937 
2938 		kprintf("\"\n");
2939 	} else {
2940 		// number mode
2941 		for (i = 0; i < num; i++) {
2942 			uint32 value;
2943 
2944 			if ((i % displayWidth) == 0) {
2945 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2946 				if (i != 0)
2947 					kprintf("\n");
2948 
2949 				kprintf("[0x%lx]  ", address + i * itemSize);
2950 
2951 				for (j = 0; j < displayed; j++) {
2952 					char c;
2953 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2954 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2955 						displayed = j;
2956 						break;
2957 					}
2958 					if (!isprint(c))
2959 						c = '.';
2960 
2961 					kprintf("%c", c);
2962 				}
2963 				if (num > displayWidth) {
2964 					// make sure the spacing in the last line is correct
2965 					for (j = displayed; j < displayWidth * itemSize; j++)
2966 						kprintf(" ");
2967 				}
2968 				kprintf("  ");
2969 			}
2970 
2971 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2972 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2973 				kprintf("read fault");
2974 				break;
2975 			}
2976 
2977 			switch (itemSize) {
2978 				case 1:
2979 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2980 					break;
2981 				case 2:
2982 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2983 					break;
2984 				case 4:
2985 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2986 					break;
2987 				case 8:
2988 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
2989 					break;
2990 			}
2991 		}
2992 
2993 		kprintf("\n");
2994 	}
2995 
2996 	if (physical) {
2997 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2998 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2999 	}
3000 	return 0;
3001 }
3002 
3003 
3004 static void
3005 dump_cache_tree_recursively(VMCache* cache, int level,
3006 	VMCache* highlightCache)
3007 {
3008 	// print this cache
3009 	for (int i = 0; i < level; i++)
3010 		kprintf("  ");
3011 	if (cache == highlightCache)
3012 		kprintf("%p <--\n", cache);
3013 	else
3014 		kprintf("%p\n", cache);
3015 
3016 	// recursively print its consumers
3017 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3018 			VMCache* consumer = it.Next();) {
3019 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3020 	}
3021 }
3022 
3023 
3024 static int
3025 dump_cache_tree(int argc, char** argv)
3026 {
3027 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3028 		kprintf("usage: %s <address>\n", argv[0]);
3029 		return 0;
3030 	}
3031 
3032 	addr_t address = parse_expression(argv[1]);
3033 	if (address == 0)
3034 		return 0;
3035 
3036 	VMCache* cache = (VMCache*)address;
3037 	VMCache* root = cache;
3038 
3039 	// find the root cache (the transitive source)
3040 	while (root->source != NULL)
3041 		root = root->source;
3042 
3043 	dump_cache_tree_recursively(root, 0, cache);
3044 
3045 	return 0;
3046 }
3047 
3048 
3049 const char*
3050 vm_cache_type_to_string(int32 type)
3051 {
3052 	switch (type) {
3053 		case CACHE_TYPE_RAM:
3054 			return "RAM";
3055 		case CACHE_TYPE_DEVICE:
3056 			return "device";
3057 		case CACHE_TYPE_VNODE:
3058 			return "vnode";
3059 		case CACHE_TYPE_NULL:
3060 			return "null";
3061 
3062 		default:
3063 			return "unknown";
3064 	}
3065 }
3066 
3067 
3068 #if DEBUG_CACHE_LIST
3069 
3070 static void
3071 update_cache_info_recursively(VMCache* cache, cache_info& info)
3072 {
3073 	info.page_count += cache->page_count;
3074 	if (cache->type == CACHE_TYPE_RAM)
3075 		info.committed += cache->committed_size;
3076 
3077 	// recurse
3078 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3079 			VMCache* consumer = it.Next();) {
3080 		update_cache_info_recursively(consumer, info);
3081 	}
3082 }
3083 
3084 
3085 static int
3086 cache_info_compare_page_count(const void* _a, const void* _b)
3087 {
3088 	const cache_info* a = (const cache_info*)_a;
3089 	const cache_info* b = (const cache_info*)_b;
3090 	if (a->page_count == b->page_count)
3091 		return 0;
3092 	return a->page_count < b->page_count ? 1 : -1;
3093 }
3094 
3095 
3096 static int
3097 cache_info_compare_committed(const void* _a, const void* _b)
3098 {
3099 	const cache_info* a = (const cache_info*)_a;
3100 	const cache_info* b = (const cache_info*)_b;
3101 	if (a->committed == b->committed)
3102 		return 0;
3103 	return a->committed < b->committed ? 1 : -1;
3104 }
3105 
3106 
3107 static void
3108 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3109 {
3110 	for (int i = 0; i < level; i++)
3111 		kprintf("  ");
3112 
3113 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3114 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3115 		cache->virtual_base, cache->virtual_end, cache->page_count);
3116 
3117 	if (level == 0)
3118 		kprintf("/%lu", info.page_count);
3119 
3120 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3121 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3122 
3123 		if (level == 0)
3124 			kprintf("/%lu", info.committed);
3125 	}
3126 
3127 	// areas
3128 	if (cache->areas != NULL) {
3129 		VMArea* area = cache->areas;
3130 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3131 			area->name, area->address_space->ID());
3132 
3133 		while (area->cache_next != NULL) {
3134 			area = area->cache_next;
3135 			kprintf(", %" B_PRId32, area->id);
3136 		}
3137 	}
3138 
3139 	kputs("\n");
3140 
3141 	// recurse
3142 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3143 			VMCache* consumer = it.Next();) {
3144 		dump_caches_recursively(consumer, info, level + 1);
3145 	}
3146 }
3147 
3148 
3149 static int
3150 dump_caches(int argc, char** argv)
3151 {
3152 	if (sCacheInfoTable == NULL) {
3153 		kprintf("No cache info table!\n");
3154 		return 0;
3155 	}
3156 
3157 	bool sortByPageCount = true;
3158 
3159 	for (int32 i = 1; i < argc; i++) {
3160 		if (strcmp(argv[i], "-c") == 0) {
3161 			sortByPageCount = false;
3162 		} else {
3163 			print_debugger_command_usage(argv[0]);
3164 			return 0;
3165 		}
3166 	}
3167 
3168 	uint32 totalCount = 0;
3169 	uint32 rootCount = 0;
3170 	off_t totalCommitted = 0;
3171 	page_num_t totalPages = 0;
3172 
3173 	VMCache* cache = gDebugCacheList;
3174 	while (cache) {
3175 		totalCount++;
3176 		if (cache->source == NULL) {
3177 			cache_info stackInfo;
3178 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3179 				? sCacheInfoTable[rootCount] : stackInfo;
3180 			rootCount++;
3181 			info.cache = cache;
3182 			info.page_count = 0;
3183 			info.committed = 0;
3184 			update_cache_info_recursively(cache, info);
3185 			totalCommitted += info.committed;
3186 			totalPages += info.page_count;
3187 		}
3188 
3189 		cache = cache->debug_next;
3190 	}
3191 
3192 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3193 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3194 			sortByPageCount
3195 				? &cache_info_compare_page_count
3196 				: &cache_info_compare_committed);
3197 	}
3198 
3199 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3200 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3201 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3202 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3203 			"page count" : "committed size");
3204 
3205 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3206 		for (uint32 i = 0; i < rootCount; i++) {
3207 			cache_info& info = sCacheInfoTable[i];
3208 			dump_caches_recursively(info.cache, info, 0);
3209 		}
3210 	} else
3211 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3212 
3213 	return 0;
3214 }
3215 
3216 #endif	// DEBUG_CACHE_LIST
3217 
3218 
3219 static int
3220 dump_cache(int argc, char** argv)
3221 {
3222 	VMCache* cache;
3223 	bool showPages = false;
3224 	int i = 1;
3225 
3226 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3227 		kprintf("usage: %s [-ps] <address>\n"
3228 			"  if -p is specified, all pages are shown, if -s is used\n"
3229 			"  only the cache info is shown respectively.\n", argv[0]);
3230 		return 0;
3231 	}
3232 	while (argv[i][0] == '-') {
3233 		char* arg = argv[i] + 1;
3234 		while (arg[0]) {
3235 			if (arg[0] == 'p')
3236 				showPages = true;
3237 			arg++;
3238 		}
3239 		i++;
3240 	}
3241 	if (argv[i] == NULL) {
3242 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3243 		return 0;
3244 	}
3245 
3246 	addr_t address = parse_expression(argv[i]);
3247 	if (address == 0)
3248 		return 0;
3249 
3250 	cache = (VMCache*)address;
3251 
3252 	cache->Dump(showPages);
3253 
3254 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3255 
3256 	return 0;
3257 }
3258 
3259 
3260 static void
3261 dump_area_struct(VMArea* area, bool mappings)
3262 {
3263 	kprintf("AREA: %p\n", area);
3264 	kprintf("name:\t\t'%s'\n", area->name);
3265 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3266 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3267 	kprintf("base:\t\t0x%lx\n", area->Base());
3268 	kprintf("size:\t\t0x%lx\n", area->Size());
3269 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3270 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3271 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3272 	kprintf("cache:\t\t%p\n", area->cache);
3273 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3274 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3275 	kprintf("cache_next:\t%p\n", area->cache_next);
3276 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3277 
3278 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3279 	if (mappings) {
3280 		kprintf("page mappings:\n");
3281 		while (iterator.HasNext()) {
3282 			vm_page_mapping* mapping = iterator.Next();
3283 			kprintf("  %p", mapping->page);
3284 		}
3285 		kprintf("\n");
3286 	} else {
3287 		uint32 count = 0;
3288 		while (iterator.Next() != NULL) {
3289 			count++;
3290 		}
3291 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3292 	}
3293 }
3294 
3295 
3296 static int
3297 dump_area(int argc, char** argv)
3298 {
3299 	bool mappings = false;
3300 	bool found = false;
3301 	int32 index = 1;
3302 	VMArea* area;
3303 	addr_t num;
3304 
3305 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3306 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3307 			"All areas matching either id/address/name are listed. You can\n"
3308 			"force to check only a specific item by prefixing the specifier\n"
3309 			"with the id/contains/address/name keywords.\n"
3310 			"-m shows the area's mappings as well.\n");
3311 		return 0;
3312 	}
3313 
3314 	if (!strcmp(argv[1], "-m")) {
3315 		mappings = true;
3316 		index++;
3317 	}
3318 
3319 	int32 mode = 0xf;
3320 	if (!strcmp(argv[index], "id"))
3321 		mode = 1;
3322 	else if (!strcmp(argv[index], "contains"))
3323 		mode = 2;
3324 	else if (!strcmp(argv[index], "name"))
3325 		mode = 4;
3326 	else if (!strcmp(argv[index], "address"))
3327 		mode = 0;
3328 	if (mode != 0xf)
3329 		index++;
3330 
3331 	if (index >= argc) {
3332 		kprintf("No area specifier given.\n");
3333 		return 0;
3334 	}
3335 
3336 	num = parse_expression(argv[index]);
3337 
3338 	if (mode == 0) {
3339 		dump_area_struct((struct VMArea*)num, mappings);
3340 	} else {
3341 		// walk through the area list, looking for the arguments as a name
3342 
3343 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3344 		while ((area = it.Next()) != NULL) {
3345 			if (((mode & 4) != 0 && area->name != NULL
3346 					&& !strcmp(argv[index], area->name))
3347 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3348 					|| (((mode & 2) != 0 && area->Base() <= num
3349 						&& area->Base() + area->Size() > num))))) {
3350 				dump_area_struct(area, mappings);
3351 				found = true;
3352 			}
3353 		}
3354 
3355 		if (!found)
3356 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3357 	}
3358 
3359 	return 0;
3360 }
3361 
3362 
3363 static int
3364 dump_area_list(int argc, char** argv)
3365 {
3366 	VMArea* area;
3367 	const char* name = NULL;
3368 	int32 id = 0;
3369 
3370 	if (argc > 1) {
3371 		id = parse_expression(argv[1]);
3372 		if (id == 0)
3373 			name = argv[1];
3374 	}
3375 
3376 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3377 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3378 		B_PRINTF_POINTER_WIDTH, "size");
3379 
3380 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3381 	while ((area = it.Next()) != NULL) {
3382 		if ((id != 0 && area->address_space->ID() != id)
3383 			|| (name != NULL && strstr(area->name, name) == NULL))
3384 			continue;
3385 
3386 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3387 			area->id, (void*)area->Base(), (void*)area->Size(),
3388 			area->protection, area->wiring, area->name);
3389 	}
3390 	return 0;
3391 }
3392 
3393 
3394 static int
3395 dump_available_memory(int argc, char** argv)
3396 {
3397 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3398 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3399 	return 0;
3400 }
3401 
3402 
3403 /*!	Deletes all areas and reserved regions in the given address space.
3404 
3405 	The caller must ensure that none of the areas has any wired ranges.
3406 
3407 	\param addressSpace The address space.
3408 	\param deletingAddressSpace \c true, if the address space is in the process
3409 		of being deleted.
3410 */
3411 void
3412 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3413 {
3414 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3415 		addressSpace->ID()));
3416 
3417 	addressSpace->WriteLock();
3418 
3419 	// remove all reserved areas in this address space
3420 	addressSpace->UnreserveAllAddressRanges(0);
3421 
3422 	// delete all the areas in this address space
3423 	while (VMArea* area = addressSpace->FirstArea()) {
3424 		ASSERT(!area->IsWired());
3425 		delete_area(addressSpace, area, deletingAddressSpace);
3426 	}
3427 
3428 	addressSpace->WriteUnlock();
3429 }
3430 
3431 
3432 static area_id
3433 vm_area_for(addr_t address, bool kernel)
3434 {
3435 	team_id team;
3436 	if (IS_USER_ADDRESS(address)) {
3437 		// we try the user team address space, if any
3438 		team = VMAddressSpace::CurrentID();
3439 		if (team < 0)
3440 			return team;
3441 	} else
3442 		team = VMAddressSpace::KernelID();
3443 
3444 	AddressSpaceReadLocker locker(team);
3445 	if (!locker.IsLocked())
3446 		return B_BAD_TEAM_ID;
3447 
3448 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3449 	if (area != NULL) {
3450 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3451 			return B_ERROR;
3452 
3453 		return area->id;
3454 	}
3455 
3456 	return B_ERROR;
3457 }
3458 
3459 
3460 /*!	Frees physical pages that were used during the boot process.
3461 	\a end is inclusive.
3462 */
3463 static void
3464 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3465 {
3466 	// free all physical pages in the specified range
3467 
3468 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3469 		phys_addr_t physicalAddress;
3470 		uint32 flags;
3471 
3472 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3473 			&& (flags & PAGE_PRESENT) != 0) {
3474 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3475 			if (page != NULL && page->State() != PAGE_STATE_FREE
3476 					 && page->State() != PAGE_STATE_CLEAR
3477 					 && page->State() != PAGE_STATE_UNUSED) {
3478 				DEBUG_PAGE_ACCESS_START(page);
3479 				vm_page_set_state(page, PAGE_STATE_FREE);
3480 			}
3481 		}
3482 	}
3483 
3484 	// unmap the memory
3485 	map->Unmap(start, end);
3486 }
3487 
3488 
3489 void
3490 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3491 {
3492 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3493 	addr_t end = start + (size - 1);
3494 	addr_t lastEnd = start;
3495 
3496 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3497 		(void*)start, (void*)end));
3498 
3499 	// The areas are sorted in virtual address space order, so
3500 	// we just have to find the holes between them that fall
3501 	// into the area we should dispose
3502 
3503 	map->Lock();
3504 
3505 	for (VMAddressSpace::AreaIterator it
3506 				= VMAddressSpace::Kernel()->GetAreaIterator();
3507 			VMArea* area = it.Next();) {
3508 		addr_t areaStart = area->Base();
3509 		addr_t areaEnd = areaStart + (area->Size() - 1);
3510 
3511 		if (areaEnd < start)
3512 			continue;
3513 
3514 		if (areaStart > end) {
3515 			// we are done, the area is already beyond of what we have to free
3516 			break;
3517 		}
3518 
3519 		if (areaStart > lastEnd) {
3520 			// this is something we can free
3521 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3522 				(void*)areaStart));
3523 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3524 		}
3525 
3526 		if (areaEnd >= end) {
3527 			lastEnd = areaEnd;
3528 				// no +1 to prevent potential overflow
3529 			break;
3530 		}
3531 
3532 		lastEnd = areaEnd + 1;
3533 	}
3534 
3535 	if (lastEnd < end) {
3536 		// we can also get rid of some space at the end of the area
3537 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3538 			(void*)end));
3539 		unmap_and_free_physical_pages(map, lastEnd, end);
3540 	}
3541 
3542 	map->Unlock();
3543 }
3544 
3545 
3546 static void
3547 create_preloaded_image_areas(struct preloaded_image* _image)
3548 {
3549 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3550 	char name[B_OS_NAME_LENGTH];
3551 	void* address;
3552 	int32 length;
3553 
3554 	// use file name to create a good area name
3555 	char* fileName = strrchr(image->name, '/');
3556 	if (fileName == NULL)
3557 		fileName = image->name;
3558 	else
3559 		fileName++;
3560 
3561 	length = strlen(fileName);
3562 	// make sure there is enough space for the suffix
3563 	if (length > 25)
3564 		length = 25;
3565 
3566 	memcpy(name, fileName, length);
3567 	strcpy(name + length, "_text");
3568 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3569 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3570 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3571 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3572 		// this will later be remapped read-only/executable by the
3573 		// ELF initialization code
3574 
3575 	strcpy(name + length, "_data");
3576 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3577 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3578 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3579 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3580 }
3581 
3582 
3583 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3584 	Any boot loader resources contained in that arguments must not be accessed
3585 	anymore past this point.
3586 */
3587 void
3588 vm_free_kernel_args(kernel_args* args)
3589 {
3590 	uint32 i;
3591 
3592 	TRACE(("vm_free_kernel_args()\n"));
3593 
3594 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3595 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3596 		if (area >= B_OK)
3597 			delete_area(area);
3598 	}
3599 }
3600 
3601 
3602 static void
3603 allocate_kernel_args(kernel_args* args)
3604 {
3605 	TRACE(("allocate_kernel_args()\n"));
3606 
3607 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3608 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3609 
3610 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3611 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3612 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3613 	}
3614 }
3615 
3616 
3617 static void
3618 unreserve_boot_loader_ranges(kernel_args* args)
3619 {
3620 	TRACE(("unreserve_boot_loader_ranges()\n"));
3621 
3622 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3623 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3624 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3625 			args->virtual_allocated_range[i].size);
3626 	}
3627 }
3628 
3629 
3630 static void
3631 reserve_boot_loader_ranges(kernel_args* args)
3632 {
3633 	TRACE(("reserve_boot_loader_ranges()\n"));
3634 
3635 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3636 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3637 
3638 		// If the address is no kernel address, we just skip it. The
3639 		// architecture specific code has to deal with it.
3640 		if (!IS_KERNEL_ADDRESS(address)) {
3641 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3642 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3643 			continue;
3644 		}
3645 
3646 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3647 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3648 		if (status < B_OK)
3649 			panic("could not reserve boot loader ranges\n");
3650 	}
3651 }
3652 
3653 
3654 static addr_t
3655 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3656 {
3657 	size = PAGE_ALIGN(size);
3658 
3659 	// find a slot in the virtual allocation addr range
3660 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3661 		// check to see if the space between this one and the last is big enough
3662 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3663 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3664 			+ args->virtual_allocated_range[i - 1].size;
3665 
3666 		addr_t base = alignment > 0
3667 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3668 
3669 		if (base >= KERNEL_BASE && base < rangeStart
3670 				&& rangeStart - base >= size) {
3671 			args->virtual_allocated_range[i - 1].size
3672 				+= base + size - previousRangeEnd;
3673 			return base;
3674 		}
3675 	}
3676 
3677 	// we hadn't found one between allocation ranges. this is ok.
3678 	// see if there's a gap after the last one
3679 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3680 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3681 		+ args->virtual_allocated_range[lastEntryIndex].size;
3682 	addr_t base = alignment > 0
3683 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3684 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3685 		args->virtual_allocated_range[lastEntryIndex].size
3686 			+= base + size - lastRangeEnd;
3687 		return base;
3688 	}
3689 
3690 	// see if there's a gap before the first one
3691 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3692 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3693 		base = rangeStart - size;
3694 		if (alignment > 0)
3695 			base = ROUNDDOWN(base, alignment);
3696 
3697 		if (base >= KERNEL_BASE) {
3698 			args->virtual_allocated_range[0].start = base;
3699 			args->virtual_allocated_range[0].size += rangeStart - base;
3700 			return base;
3701 		}
3702 	}
3703 
3704 	return 0;
3705 }
3706 
3707 
3708 static bool
3709 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3710 {
3711 	// TODO: horrible brute-force method of determining if the page can be
3712 	// allocated
3713 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3714 		if (address >= args->physical_memory_range[i].start
3715 			&& address < args->physical_memory_range[i].start
3716 				+ args->physical_memory_range[i].size)
3717 			return true;
3718 	}
3719 	return false;
3720 }
3721 
3722 
3723 page_num_t
3724 vm_allocate_early_physical_page(kernel_args* args)
3725 {
3726 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3727 		phys_addr_t nextPage;
3728 
3729 		nextPage = args->physical_allocated_range[i].start
3730 			+ args->physical_allocated_range[i].size;
3731 		// see if the page after the next allocated paddr run can be allocated
3732 		if (i + 1 < args->num_physical_allocated_ranges
3733 			&& args->physical_allocated_range[i + 1].size != 0) {
3734 			// see if the next page will collide with the next allocated range
3735 			if (nextPage >= args->physical_allocated_range[i+1].start)
3736 				continue;
3737 		}
3738 		// see if the next physical page fits in the memory block
3739 		if (is_page_in_physical_memory_range(args, nextPage)) {
3740 			// we got one!
3741 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3742 			return nextPage / B_PAGE_SIZE;
3743 		}
3744 	}
3745 
3746 	return 0;
3747 		// could not allocate a block
3748 }
3749 
3750 
3751 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3752 	allocate some pages before the VM is completely up.
3753 */
3754 addr_t
3755 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3756 	uint32 attributes, addr_t alignment)
3757 {
3758 	if (physicalSize > virtualSize)
3759 		physicalSize = virtualSize;
3760 
3761 	// find the vaddr to allocate at
3762 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3763 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3764 
3765 	// map the pages
3766 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3767 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3768 		if (physicalAddress == 0)
3769 			panic("error allocating early page!\n");
3770 
3771 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3772 
3773 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3774 			physicalAddress * B_PAGE_SIZE, attributes,
3775 			&vm_allocate_early_physical_page);
3776 	}
3777 
3778 	return virtualBase;
3779 }
3780 
3781 
3782 /*!	The main entrance point to initialize the VM. */
3783 status_t
3784 vm_init(kernel_args* args)
3785 {
3786 	struct preloaded_image* image;
3787 	void* address;
3788 	status_t err = 0;
3789 	uint32 i;
3790 
3791 	TRACE(("vm_init: entry\n"));
3792 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3793 	err = arch_vm_init(args);
3794 
3795 	// initialize some globals
3796 	vm_page_init_num_pages(args);
3797 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3798 
3799 	slab_init(args);
3800 
3801 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3802 	size_t heapSize = INITIAL_HEAP_SIZE;
3803 	// try to accomodate low memory systems
3804 	while (heapSize > sAvailableMemory / 8)
3805 		heapSize /= 2;
3806 	if (heapSize < 1024 * 1024)
3807 		panic("vm_init: go buy some RAM please.");
3808 
3809 	// map in the new heap and initialize it
3810 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3811 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3812 	TRACE(("heap at 0x%lx\n", heapBase));
3813 	heap_init(heapBase, heapSize);
3814 #endif
3815 
3816 	// initialize the free page list and physical page mapper
3817 	vm_page_init(args);
3818 
3819 	// initialize the cache allocators
3820 	vm_cache_init(args);
3821 
3822 	{
3823 		status_t error = VMAreaHash::Init();
3824 		if (error != B_OK)
3825 			panic("vm_init: error initializing area hash table\n");
3826 	}
3827 
3828 	VMAddressSpace::Init();
3829 	reserve_boot_loader_ranges(args);
3830 
3831 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3832 	heap_init_post_area();
3833 #endif
3834 
3835 	// Do any further initialization that the architecture dependant layers may
3836 	// need now
3837 	arch_vm_translation_map_init_post_area(args);
3838 	arch_vm_init_post_area(args);
3839 	vm_page_init_post_area(args);
3840 	slab_init_post_area();
3841 
3842 	// allocate areas to represent stuff that already exists
3843 
3844 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3845 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3846 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3847 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3848 #endif
3849 
3850 	allocate_kernel_args(args);
3851 
3852 	create_preloaded_image_areas(args->kernel_image);
3853 
3854 	// allocate areas for preloaded images
3855 	for (image = args->preloaded_images; image != NULL; image = image->next)
3856 		create_preloaded_image_areas(image);
3857 
3858 	// allocate kernel stacks
3859 	for (i = 0; i < args->num_cpus; i++) {
3860 		char name[64];
3861 
3862 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
3863 		address = (void*)args->cpu_kstack[i].start;
3864 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3865 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3866 	}
3867 
3868 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3869 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3870 
3871 #if PARANOID_KERNEL_MALLOC
3872 	vm_block_address_range("uninitialized heap memory",
3873 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3874 #endif
3875 #if PARANOID_KERNEL_FREE
3876 	vm_block_address_range("freed heap memory",
3877 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3878 #endif
3879 
3880 	// create the object cache for the page mappings
3881 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3882 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3883 		NULL, NULL);
3884 	if (gPageMappingsObjectCache == NULL)
3885 		panic("failed to create page mappings object cache");
3886 
3887 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3888 
3889 #if DEBUG_CACHE_LIST
3890 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
3891 		virtual_address_restrictions virtualRestrictions = {};
3892 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
3893 		physical_address_restrictions physicalRestrictions = {};
3894 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
3895 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3896 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
3897 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
3898 			&physicalRestrictions, (void**)&sCacheInfoTable);
3899 	}
3900 #endif	// DEBUG_CACHE_LIST
3901 
3902 	// add some debugger commands
3903 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3904 	add_debugger_command("area", &dump_area,
3905 		"Dump info about a particular area");
3906 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3907 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3908 #if DEBUG_CACHE_LIST
3909 	if (sCacheInfoTable != NULL) {
3910 		add_debugger_command_etc("caches", &dump_caches,
3911 			"List all VMCache trees",
3912 			"[ \"-c\" ]\n"
3913 			"All cache trees are listed sorted in decreasing order by number "
3914 				"of\n"
3915 			"used pages or, if \"-c\" is specified, by size of committed "
3916 				"memory.\n",
3917 			0);
3918 	}
3919 #endif
3920 	add_debugger_command("avail", &dump_available_memory,
3921 		"Dump available memory");
3922 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3923 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3924 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3925 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3926 	add_debugger_command("string", &display_mem, "dump strings");
3927 
3928 	TRACE(("vm_init: exit\n"));
3929 
3930 	vm_cache_init_post_heap();
3931 
3932 	return err;
3933 }
3934 
3935 
3936 status_t
3937 vm_init_post_sem(kernel_args* args)
3938 {
3939 	// This frees all unused boot loader resources and makes its space available
3940 	// again
3941 	arch_vm_init_end(args);
3942 	unreserve_boot_loader_ranges(args);
3943 
3944 	// fill in all of the semaphores that were not allocated before
3945 	// since we're still single threaded and only the kernel address space
3946 	// exists, it isn't that hard to find all of the ones we need to create
3947 
3948 	arch_vm_translation_map_init_post_sem(args);
3949 
3950 	slab_init_post_sem();
3951 
3952 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3953 	heap_init_post_sem();
3954 #endif
3955 
3956 	return B_OK;
3957 }
3958 
3959 
3960 status_t
3961 vm_init_post_thread(kernel_args* args)
3962 {
3963 	vm_page_init_post_thread(args);
3964 	slab_init_post_thread();
3965 	return heap_init_post_thread();
3966 }
3967 
3968 
3969 status_t
3970 vm_init_post_modules(kernel_args* args)
3971 {
3972 	return arch_vm_init_post_modules(args);
3973 }
3974 
3975 
3976 void
3977 permit_page_faults(void)
3978 {
3979 	Thread* thread = thread_get_current_thread();
3980 	if (thread != NULL)
3981 		atomic_add(&thread->page_faults_allowed, 1);
3982 }
3983 
3984 
3985 void
3986 forbid_page_faults(void)
3987 {
3988 	Thread* thread = thread_get_current_thread();
3989 	if (thread != NULL)
3990 		atomic_add(&thread->page_faults_allowed, -1);
3991 }
3992 
3993 
3994 status_t
3995 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3996 	addr_t* newIP)
3997 {
3998 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3999 		faultAddress));
4000 
4001 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4002 
4003 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4004 	VMAddressSpace* addressSpace = NULL;
4005 
4006 	status_t status = B_OK;
4007 	*newIP = 0;
4008 	atomic_add((int32*)&sPageFaults, 1);
4009 
4010 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4011 		addressSpace = VMAddressSpace::GetKernel();
4012 	} else if (IS_USER_ADDRESS(pageAddress)) {
4013 		addressSpace = VMAddressSpace::GetCurrent();
4014 		if (addressSpace == NULL) {
4015 			if (!isUser) {
4016 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4017 					"memory!\n");
4018 				status = B_BAD_ADDRESS;
4019 				TPF(PageFaultError(-1,
4020 					VMPageFaultTracing
4021 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4022 			} else {
4023 				// XXX weird state.
4024 				panic("vm_page_fault: non kernel thread accessing user memory "
4025 					"that doesn't exist!\n");
4026 				status = B_BAD_ADDRESS;
4027 			}
4028 		}
4029 	} else {
4030 		// the hit was probably in the 64k DMZ between kernel and user space
4031 		// this keeps a user space thread from passing a buffer that crosses
4032 		// into kernel space
4033 		status = B_BAD_ADDRESS;
4034 		TPF(PageFaultError(-1,
4035 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4036 	}
4037 
4038 	if (status == B_OK) {
4039 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser,
4040 			NULL);
4041 	}
4042 
4043 	if (status < B_OK) {
4044 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4045 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4046 			strerror(status), address, faultAddress, isWrite, isUser,
4047 			thread_get_current_thread_id());
4048 		if (!isUser) {
4049 			Thread* thread = thread_get_current_thread();
4050 			if (thread != NULL && thread->fault_handler != 0) {
4051 				// this will cause the arch dependant page fault handler to
4052 				// modify the IP on the interrupt frame or whatever to return
4053 				// to this address
4054 				*newIP = thread->fault_handler;
4055 			} else {
4056 				// unhandled page fault in the kernel
4057 				panic("vm_page_fault: unhandled page fault in kernel space at "
4058 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4059 			}
4060 		} else {
4061 #if 1
4062 			// TODO: remove me once we have proper userland debugging support
4063 			// (and tools)
4064 			VMArea* area = NULL;
4065 			if (addressSpace != NULL) {
4066 				addressSpace->ReadLock();
4067 				area = addressSpace->LookupArea(faultAddress);
4068 			}
4069 
4070 			Thread* thread = thread_get_current_thread();
4071 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4072 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4073 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4074 				thread->team->Name(), thread->team->id,
4075 				isWrite ? "write" : "read", address, faultAddress,
4076 				area ? area->name : "???", faultAddress - (area ?
4077 					area->Base() : 0x0));
4078 
4079 			// We can print a stack trace of the userland thread here.
4080 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4081 // fault and someone is already waiting for a write lock on the same address
4082 // space. This thread will then try to acquire the lock again and will
4083 // be queued after the writer.
4084 #	if 0
4085 			if (area) {
4086 				struct stack_frame {
4087 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4088 						struct stack_frame*	previous;
4089 						void*				return_address;
4090 					#else
4091 						// ...
4092 					#warning writeme
4093 					#endif
4094 				} frame;
4095 #		ifdef __INTEL__
4096 				struct iframe* iframe = x86_get_user_iframe();
4097 				if (iframe == NULL)
4098 					panic("iframe is NULL!");
4099 
4100 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4101 					sizeof(struct stack_frame));
4102 #		elif defined(__POWERPC__)
4103 				struct iframe* iframe = ppc_get_user_iframe();
4104 				if (iframe == NULL)
4105 					panic("iframe is NULL!");
4106 
4107 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4108 					sizeof(struct stack_frame));
4109 #		else
4110 #			warning "vm_page_fault() stack trace won't work"
4111 				status = B_ERROR;
4112 #		endif
4113 
4114 				dprintf("stack trace:\n");
4115 				int32 maxFrames = 50;
4116 				while (status == B_OK && --maxFrames >= 0
4117 						&& frame.return_address != NULL) {
4118 					dprintf("  %p", frame.return_address);
4119 					area = addressSpace->LookupArea(
4120 						(addr_t)frame.return_address);
4121 					if (area) {
4122 						dprintf(" (%s + %#lx)", area->name,
4123 							(addr_t)frame.return_address - area->Base());
4124 					}
4125 					dprintf("\n");
4126 
4127 					status = user_memcpy(&frame, frame.previous,
4128 						sizeof(struct stack_frame));
4129 				}
4130 			}
4131 #	endif	// 0 (stack trace)
4132 
4133 			if (addressSpace != NULL)
4134 				addressSpace->ReadUnlock();
4135 #endif
4136 
4137 			// If the thread has a signal handler for SIGSEGV, we simply
4138 			// send it the signal. Otherwise we notify the user debugger
4139 			// first.
4140 			struct sigaction action;
4141 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4142 					&& action.sa_handler != SIG_DFL
4143 					&& action.sa_handler != SIG_IGN)
4144 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4145 					SIGSEGV)) {
4146 				Signal signal(SIGSEGV,
4147 					status == B_PERMISSION_DENIED
4148 						? SEGV_ACCERR : SEGV_MAPERR,
4149 					EFAULT, thread->team->id);
4150 				signal.SetAddress((void*)address);
4151 				send_signal_to_thread(thread, signal, 0);
4152 			}
4153 		}
4154 	}
4155 
4156 	if (addressSpace != NULL)
4157 		addressSpace->Put();
4158 
4159 	return B_HANDLED_INTERRUPT;
4160 }
4161 
4162 
4163 struct PageFaultContext {
4164 	AddressSpaceReadLocker	addressSpaceLocker;
4165 	VMCacheChainLocker		cacheChainLocker;
4166 
4167 	VMTranslationMap*		map;
4168 	VMCache*				topCache;
4169 	off_t					cacheOffset;
4170 	vm_page_reservation		reservation;
4171 	bool					isWrite;
4172 
4173 	// return values
4174 	vm_page*				page;
4175 	bool					restart;
4176 
4177 
4178 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4179 		:
4180 		addressSpaceLocker(addressSpace, true),
4181 		map(addressSpace->TranslationMap()),
4182 		isWrite(isWrite)
4183 	{
4184 	}
4185 
4186 	~PageFaultContext()
4187 	{
4188 		UnlockAll();
4189 		vm_page_unreserve_pages(&reservation);
4190 	}
4191 
4192 	void Prepare(VMCache* topCache, off_t cacheOffset)
4193 	{
4194 		this->topCache = topCache;
4195 		this->cacheOffset = cacheOffset;
4196 		page = NULL;
4197 		restart = false;
4198 
4199 		cacheChainLocker.SetTo(topCache);
4200 	}
4201 
4202 	void UnlockAll(VMCache* exceptCache = NULL)
4203 	{
4204 		topCache = NULL;
4205 		addressSpaceLocker.Unlock();
4206 		cacheChainLocker.Unlock(exceptCache);
4207 	}
4208 };
4209 
4210 
4211 /*!	Gets the page that should be mapped into the area.
4212 	Returns an error code other than \c B_OK, if the page couldn't be found or
4213 	paged in. The locking state of the address space and the caches is undefined
4214 	in that case.
4215 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4216 	had to unlock the address space and all caches and is supposed to be called
4217 	again.
4218 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4219 	found. It is returned in \c context.page. The address space will still be
4220 	locked as well as all caches starting from the top cache to at least the
4221 	cache the page lives in.
4222 */
4223 static status_t
4224 fault_get_page(PageFaultContext& context)
4225 {
4226 	VMCache* cache = context.topCache;
4227 	VMCache* lastCache = NULL;
4228 	vm_page* page = NULL;
4229 
4230 	while (cache != NULL) {
4231 		// We already hold the lock of the cache at this point.
4232 
4233 		lastCache = cache;
4234 
4235 		page = cache->LookupPage(context.cacheOffset);
4236 		if (page != NULL && page->busy) {
4237 			// page must be busy -- wait for it to become unbusy
4238 			context.UnlockAll(cache);
4239 			cache->ReleaseRefLocked();
4240 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4241 
4242 			// restart the whole process
4243 			context.restart = true;
4244 			return B_OK;
4245 		}
4246 
4247 		if (page != NULL)
4248 			break;
4249 
4250 		// The current cache does not contain the page we're looking for.
4251 
4252 		// see if the backing store has it
4253 		if (cache->HasPage(context.cacheOffset)) {
4254 			// insert a fresh page and mark it busy -- we're going to read it in
4255 			page = vm_page_allocate_page(&context.reservation,
4256 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4257 			cache->InsertPage(page, context.cacheOffset);
4258 
4259 			// We need to unlock all caches and the address space while reading
4260 			// the page in. Keep a reference to the cache around.
4261 			cache->AcquireRefLocked();
4262 			context.UnlockAll();
4263 
4264 			// read the page in
4265 			generic_io_vec vec;
4266 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4267 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4268 
4269 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4270 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4271 
4272 			cache->Lock();
4273 
4274 			if (status < B_OK) {
4275 				// on error remove and free the page
4276 				dprintf("reading page from cache %p returned: %s!\n",
4277 					cache, strerror(status));
4278 
4279 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4280 				cache->RemovePage(page);
4281 				vm_page_set_state(page, PAGE_STATE_FREE);
4282 
4283 				cache->ReleaseRefAndUnlock();
4284 				return status;
4285 			}
4286 
4287 			// mark the page unbusy again
4288 			cache->MarkPageUnbusy(page);
4289 
4290 			DEBUG_PAGE_ACCESS_END(page);
4291 
4292 			// Since we needed to unlock everything temporarily, the area
4293 			// situation might have changed. So we need to restart the whole
4294 			// process.
4295 			cache->ReleaseRefAndUnlock();
4296 			context.restart = true;
4297 			return B_OK;
4298 		}
4299 
4300 		cache = context.cacheChainLocker.LockSourceCache();
4301 	}
4302 
4303 	if (page == NULL) {
4304 		// There was no adequate page, determine the cache for a clean one.
4305 		// Read-only pages come in the deepest cache, only the top most cache
4306 		// may have direct write access.
4307 		cache = context.isWrite ? context.topCache : lastCache;
4308 
4309 		// allocate a clean page
4310 		page = vm_page_allocate_page(&context.reservation,
4311 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4312 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4313 			page->physical_page_number));
4314 
4315 		// insert the new page into our cache
4316 		cache->InsertPage(page, context.cacheOffset);
4317 	} else if (page->Cache() != context.topCache && context.isWrite) {
4318 		// We have a page that has the data we want, but in the wrong cache
4319 		// object so we need to copy it and stick it into the top cache.
4320 		vm_page* sourcePage = page;
4321 
4322 		// TODO: If memory is low, it might be a good idea to steal the page
4323 		// from our source cache -- if possible, that is.
4324 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4325 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4326 
4327 		// To not needlessly kill concurrency we unlock all caches but the top
4328 		// one while copying the page. Lacking another mechanism to ensure that
4329 		// the source page doesn't disappear, we mark it busy.
4330 		sourcePage->busy = true;
4331 		context.cacheChainLocker.UnlockKeepRefs(true);
4332 
4333 		// copy the page
4334 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4335 			sourcePage->physical_page_number * B_PAGE_SIZE);
4336 
4337 		context.cacheChainLocker.RelockCaches(true);
4338 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4339 
4340 		// insert the new page into our cache
4341 		context.topCache->InsertPage(page, context.cacheOffset);
4342 	} else
4343 		DEBUG_PAGE_ACCESS_START(page);
4344 
4345 	context.page = page;
4346 	return B_OK;
4347 }
4348 
4349 
4350 /*!	Makes sure the address in the given address space is mapped.
4351 
4352 	\param addressSpace The address space.
4353 	\param originalAddress The address. Doesn't need to be page aligned.
4354 	\param isWrite If \c true the address shall be write-accessible.
4355 	\param isUser If \c true the access is requested by a userland team.
4356 	\param wirePage On success, if non \c NULL, the wired count of the page
4357 		mapped at the given address is incremented and the page is returned
4358 		via this parameter.
4359 	\param wiredRange If given, this wiredRange is ignored when checking whether
4360 		an already mapped page at the virtual address can be unmapped.
4361 	\return \c B_OK on success, another error code otherwise.
4362 */
4363 static status_t
4364 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4365 	bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange)
4366 {
4367 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4368 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4369 		originalAddress, isWrite, isUser));
4370 
4371 	PageFaultContext context(addressSpace, isWrite);
4372 
4373 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4374 	status_t status = B_OK;
4375 
4376 	addressSpace->IncrementFaultCount();
4377 
4378 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4379 	// the pages upfront makes sure we don't have any cache locked, so that the
4380 	// page daemon/thief can do their job without problems.
4381 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4382 		originalAddress);
4383 	context.addressSpaceLocker.Unlock();
4384 	vm_page_reserve_pages(&context.reservation, reservePages,
4385 		addressSpace == VMAddressSpace::Kernel()
4386 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4387 
4388 	while (true) {
4389 		context.addressSpaceLocker.Lock();
4390 
4391 		// get the area the fault was in
4392 		VMArea* area = addressSpace->LookupArea(address);
4393 		if (area == NULL) {
4394 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4395 				"space\n", originalAddress);
4396 			TPF(PageFaultError(-1,
4397 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4398 			status = B_BAD_ADDRESS;
4399 			break;
4400 		}
4401 
4402 		// check permissions
4403 		uint32 protection = get_area_page_protection(area, address);
4404 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4405 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4406 				area->id, (void*)originalAddress);
4407 			TPF(PageFaultError(area->id,
4408 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4409 			status = B_PERMISSION_DENIED;
4410 			break;
4411 		}
4412 		if (isWrite && (protection
4413 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4414 			dprintf("write access attempted on write-protected area 0x%"
4415 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4416 			TPF(PageFaultError(area->id,
4417 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4418 			status = B_PERMISSION_DENIED;
4419 			break;
4420 		} else if (!isWrite && (protection
4421 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4422 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4423 				" at %p\n", area->id, (void*)originalAddress);
4424 			TPF(PageFaultError(area->id,
4425 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4426 			status = B_PERMISSION_DENIED;
4427 			break;
4428 		}
4429 
4430 		// We have the area, it was a valid access, so let's try to resolve the
4431 		// page fault now.
4432 		// At first, the top most cache from the area is investigated.
4433 
4434 		context.Prepare(vm_area_get_locked_cache(area),
4435 			address - area->Base() + area->cache_offset);
4436 
4437 		// See if this cache has a fault handler -- this will do all the work
4438 		// for us.
4439 		{
4440 			// Note, since the page fault is resolved with interrupts enabled,
4441 			// the fault handler could be called more than once for the same
4442 			// reason -- the store must take this into account.
4443 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4444 			if (status != B_BAD_HANDLER)
4445 				break;
4446 		}
4447 
4448 		// The top most cache has no fault handler, so let's see if the cache or
4449 		// its sources already have the page we're searching for (we're going
4450 		// from top to bottom).
4451 		status = fault_get_page(context);
4452 		if (status != B_OK) {
4453 			TPF(PageFaultError(area->id, status));
4454 			break;
4455 		}
4456 
4457 		if (context.restart)
4458 			continue;
4459 
4460 		// All went fine, all there is left to do is to map the page into the
4461 		// address space.
4462 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4463 			context.page));
4464 
4465 		// If the page doesn't reside in the area's cache, we need to make sure
4466 		// it's mapped in read-only, so that we cannot overwrite someone else's
4467 		// data (copy-on-write)
4468 		uint32 newProtection = protection;
4469 		if (context.page->Cache() != context.topCache && !isWrite)
4470 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4471 
4472 		bool unmapPage = false;
4473 		bool mapPage = true;
4474 
4475 		// check whether there's already a page mapped at the address
4476 		context.map->Lock();
4477 
4478 		phys_addr_t physicalAddress;
4479 		uint32 flags;
4480 		vm_page* mappedPage = NULL;
4481 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4482 			&& (flags & PAGE_PRESENT) != 0
4483 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4484 				!= NULL) {
4485 			// Yep there's already a page. If it's ours, we can simply adjust
4486 			// its protection. Otherwise we have to unmap it.
4487 			if (mappedPage == context.page) {
4488 				context.map->ProtectPage(area, address, newProtection);
4489 					// Note: We assume that ProtectPage() is atomic (i.e.
4490 					// the page isn't temporarily unmapped), otherwise we'd have
4491 					// to make sure it isn't wired.
4492 				mapPage = false;
4493 			} else
4494 				unmapPage = true;
4495 		}
4496 
4497 		context.map->Unlock();
4498 
4499 		if (unmapPage) {
4500 			// If the page is wired, we can't unmap it. Wait until it is unwired
4501 			// again and restart.
4502 			VMAreaUnwiredWaiter waiter;
4503 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4504 					wiredRange)) {
4505 				// unlock everything and wait
4506 				context.UnlockAll();
4507 				waiter.waitEntry.Wait();
4508 				continue;
4509 			}
4510 
4511 			// Note: The mapped page is a page of a lower cache. We are
4512 			// guaranteed to have that cached locked, our new page is a copy of
4513 			// that page, and the page is not busy. The logic for that guarantee
4514 			// is as follows: Since the page is mapped, it must live in the top
4515 			// cache (ruled out above) or any of its lower caches, and there is
4516 			// (was before the new page was inserted) no other page in any
4517 			// cache between the top cache and the page's cache (otherwise that
4518 			// would be mapped instead). That in turn means that our algorithm
4519 			// must have found it and therefore it cannot be busy either.
4520 			DEBUG_PAGE_ACCESS_START(mappedPage);
4521 			unmap_page(area, address);
4522 			DEBUG_PAGE_ACCESS_END(mappedPage);
4523 		}
4524 
4525 		if (mapPage) {
4526 			if (map_page(area, context.page, address, newProtection,
4527 					&context.reservation) != B_OK) {
4528 				// Mapping can only fail, when the page mapping object couldn't
4529 				// be allocated. Save for the missing mapping everything is
4530 				// fine, though. If this was a regular page fault, we'll simply
4531 				// leave and probably fault again. To make sure we'll have more
4532 				// luck then, we ensure that the minimum object reserve is
4533 				// available.
4534 				DEBUG_PAGE_ACCESS_END(context.page);
4535 
4536 				context.UnlockAll();
4537 
4538 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4539 						!= B_OK) {
4540 					// Apparently the situation is serious. Let's get ourselves
4541 					// killed.
4542 					status = B_NO_MEMORY;
4543 				} else if (wirePage != NULL) {
4544 					// The caller expects us to wire the page. Since
4545 					// object_cache_reserve() succeeded, we should now be able
4546 					// to allocate a mapping structure. Restart.
4547 					continue;
4548 				}
4549 
4550 				break;
4551 			}
4552 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4553 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4554 
4555 		// also wire the page, if requested
4556 		if (wirePage != NULL && status == B_OK) {
4557 			increment_page_wired_count(context.page);
4558 			*wirePage = context.page;
4559 		}
4560 
4561 		DEBUG_PAGE_ACCESS_END(context.page);
4562 
4563 		break;
4564 	}
4565 
4566 	return status;
4567 }
4568 
4569 
4570 status_t
4571 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4572 {
4573 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4574 }
4575 
4576 status_t
4577 vm_put_physical_page(addr_t vaddr, void* handle)
4578 {
4579 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4580 }
4581 
4582 
4583 status_t
4584 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4585 	void** _handle)
4586 {
4587 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4588 }
4589 
4590 status_t
4591 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4592 {
4593 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4594 }
4595 
4596 
4597 status_t
4598 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4599 {
4600 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4601 }
4602 
4603 status_t
4604 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4605 {
4606 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4607 }
4608 
4609 
4610 void
4611 vm_get_info(system_memory_info* info)
4612 {
4613 	swap_get_info(info);
4614 
4615 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4616 	info->page_faults = sPageFaults;
4617 
4618 	MutexLocker locker(sAvailableMemoryLock);
4619 	info->free_memory = sAvailableMemory;
4620 	info->needed_memory = sNeededMemory;
4621 }
4622 
4623 
4624 uint32
4625 vm_num_page_faults(void)
4626 {
4627 	return sPageFaults;
4628 }
4629 
4630 
4631 off_t
4632 vm_available_memory(void)
4633 {
4634 	MutexLocker locker(sAvailableMemoryLock);
4635 	return sAvailableMemory;
4636 }
4637 
4638 
4639 off_t
4640 vm_available_not_needed_memory(void)
4641 {
4642 	MutexLocker locker(sAvailableMemoryLock);
4643 	return sAvailableMemory - sNeededMemory;
4644 }
4645 
4646 
4647 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4648 	debugger.
4649 */
4650 off_t
4651 vm_available_not_needed_memory_debug(void)
4652 {
4653 	return sAvailableMemory - sNeededMemory;
4654 }
4655 
4656 
4657 size_t
4658 vm_kernel_address_space_left(void)
4659 {
4660 	return VMAddressSpace::Kernel()->FreeSpace();
4661 }
4662 
4663 
4664 void
4665 vm_unreserve_memory(size_t amount)
4666 {
4667 	mutex_lock(&sAvailableMemoryLock);
4668 
4669 	sAvailableMemory += amount;
4670 
4671 	mutex_unlock(&sAvailableMemoryLock);
4672 }
4673 
4674 
4675 status_t
4676 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4677 {
4678 	size_t reserve = kMemoryReserveForPriority[priority];
4679 
4680 	MutexLocker locker(sAvailableMemoryLock);
4681 
4682 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4683 
4684 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4685 		sAvailableMemory -= amount;
4686 		return B_OK;
4687 	}
4688 
4689 	if (timeout <= 0)
4690 		return B_NO_MEMORY;
4691 
4692 	// turn timeout into an absolute timeout
4693 	timeout += system_time();
4694 
4695 	// loop until we've got the memory or the timeout occurs
4696 	do {
4697 		sNeededMemory += amount;
4698 
4699 		// call the low resource manager
4700 		locker.Unlock();
4701 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4702 			B_ABSOLUTE_TIMEOUT, timeout);
4703 		locker.Lock();
4704 
4705 		sNeededMemory -= amount;
4706 
4707 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4708 			sAvailableMemory -= amount;
4709 			return B_OK;
4710 		}
4711 	} while (timeout > system_time());
4712 
4713 	return B_NO_MEMORY;
4714 }
4715 
4716 
4717 status_t
4718 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4719 {
4720 	// NOTE: The caller is responsible for synchronizing calls to this function!
4721 
4722 	AddressSpaceReadLocker locker;
4723 	VMArea* area;
4724 	status_t status = locker.SetFromArea(id, area);
4725 	if (status != B_OK)
4726 		return status;
4727 
4728 	// nothing to do, if the type doesn't change
4729 	uint32 oldType = area->MemoryType();
4730 	if (type == oldType)
4731 		return B_OK;
4732 
4733 	// set the memory type of the area and the mapped pages
4734 	VMTranslationMap* map = area->address_space->TranslationMap();
4735 	map->Lock();
4736 	area->SetMemoryType(type);
4737 	map->ProtectArea(area, area->protection);
4738 	map->Unlock();
4739 
4740 	// set the physical memory type
4741 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4742 	if (error != B_OK) {
4743 		// reset the memory type of the area and the mapped pages
4744 		map->Lock();
4745 		area->SetMemoryType(oldType);
4746 		map->ProtectArea(area, area->protection);
4747 		map->Unlock();
4748 		return error;
4749 	}
4750 
4751 	return B_OK;
4752 
4753 }
4754 
4755 
4756 /*!	This function enforces some protection properties:
4757 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4758 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4759 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4760 	   and B_KERNEL_WRITE_AREA.
4761 */
4762 static void
4763 fix_protection(uint32* protection)
4764 {
4765 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4766 		if ((*protection & B_USER_PROTECTION) == 0
4767 			|| (*protection & B_WRITE_AREA) != 0)
4768 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4769 		else
4770 			*protection |= B_KERNEL_READ_AREA;
4771 	}
4772 }
4773 
4774 
4775 static void
4776 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4777 {
4778 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4779 	info->area = area->id;
4780 	info->address = (void*)area->Base();
4781 	info->size = area->Size();
4782 	info->protection = area->protection;
4783 	info->lock = B_FULL_LOCK;
4784 	info->team = area->address_space->ID();
4785 	info->copy_count = 0;
4786 	info->in_count = 0;
4787 	info->out_count = 0;
4788 		// TODO: retrieve real values here!
4789 
4790 	VMCache* cache = vm_area_get_locked_cache(area);
4791 
4792 	// Note, this is a simplification; the cache could be larger than this area
4793 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4794 
4795 	vm_area_put_locked_cache(cache);
4796 }
4797 
4798 
4799 static status_t
4800 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4801 {
4802 	// is newSize a multiple of B_PAGE_SIZE?
4803 	if (newSize & (B_PAGE_SIZE - 1))
4804 		return B_BAD_VALUE;
4805 
4806 	// lock all affected address spaces and the cache
4807 	VMArea* area;
4808 	VMCache* cache;
4809 
4810 	MultiAddressSpaceLocker locker;
4811 	AreaCacheLocker cacheLocker;
4812 
4813 	status_t status;
4814 	size_t oldSize;
4815 	bool anyKernelArea;
4816 	bool restart;
4817 
4818 	do {
4819 		anyKernelArea = false;
4820 		restart = false;
4821 
4822 		locker.Unset();
4823 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4824 		if (status != B_OK)
4825 			return status;
4826 		cacheLocker.SetTo(cache, true);	// already locked
4827 
4828 		// enforce restrictions
4829 		if (!kernel) {
4830 			if ((area->protection & B_KERNEL_AREA) != 0)
4831 				return B_NOT_ALLOWED;
4832 			// TODO: Enforce all restrictions (team, etc.)!
4833 		}
4834 
4835 		oldSize = area->Size();
4836 		if (newSize == oldSize)
4837 			return B_OK;
4838 
4839 		if (cache->type != CACHE_TYPE_RAM)
4840 			return B_NOT_ALLOWED;
4841 
4842 		if (oldSize < newSize) {
4843 			// We need to check if all areas of this cache can be resized.
4844 			for (VMArea* current = cache->areas; current != NULL;
4845 					current = current->cache_next) {
4846 				if (!current->address_space->CanResizeArea(current, newSize))
4847 					return B_ERROR;
4848 				anyKernelArea
4849 					|= current->address_space == VMAddressSpace::Kernel();
4850 			}
4851 		} else {
4852 			// We're shrinking the areas, so we must make sure the affected
4853 			// ranges are not wired.
4854 			for (VMArea* current = cache->areas; current != NULL;
4855 					current = current->cache_next) {
4856 				anyKernelArea
4857 					|= current->address_space == VMAddressSpace::Kernel();
4858 
4859 				if (wait_if_area_range_is_wired(current,
4860 						current->Base() + newSize, oldSize - newSize, &locker,
4861 						&cacheLocker)) {
4862 					restart = true;
4863 					break;
4864 				}
4865 			}
4866 		}
4867 	} while (restart);
4868 
4869 	// Okay, looks good so far, so let's do it
4870 
4871 	int priority = kernel && anyKernelArea
4872 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4873 	uint32 allocationFlags = kernel && anyKernelArea
4874 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4875 
4876 	if (oldSize < newSize) {
4877 		// Growing the cache can fail, so we do it first.
4878 		status = cache->Resize(cache->virtual_base + newSize, priority);
4879 		if (status != B_OK)
4880 			return status;
4881 	}
4882 
4883 	for (VMArea* current = cache->areas; current != NULL;
4884 			current = current->cache_next) {
4885 		status = current->address_space->ResizeArea(current, newSize,
4886 			allocationFlags);
4887 		if (status != B_OK)
4888 			break;
4889 
4890 		// We also need to unmap all pages beyond the new size, if the area has
4891 		// shrunk
4892 		if (newSize < oldSize) {
4893 			VMCacheChainLocker cacheChainLocker(cache);
4894 			cacheChainLocker.LockAllSourceCaches();
4895 
4896 			unmap_pages(current, current->Base() + newSize,
4897 				oldSize - newSize);
4898 
4899 			cacheChainLocker.Unlock(cache);
4900 		}
4901 	}
4902 
4903 	if (status == B_OK) {
4904 		// Shrink or grow individual page protections if in use.
4905 		if (area->page_protections != NULL) {
4906 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
4907 			uint8* newProtections
4908 				= (uint8*)realloc(area->page_protections, bytes);
4909 			if (newProtections == NULL)
4910 				status = B_NO_MEMORY;
4911 			else {
4912 				area->page_protections = newProtections;
4913 
4914 				if (oldSize < newSize) {
4915 					// init the additional page protections to that of the area
4916 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
4917 					uint32 areaProtection = area->protection
4918 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4919 					memset(area->page_protections + offset,
4920 						areaProtection | (areaProtection << 4), bytes - offset);
4921 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4922 						uint8& entry = area->page_protections[offset - 1];
4923 						entry = (entry & 0x0f) | (areaProtection << 4);
4924 					}
4925 				}
4926 			}
4927 		}
4928 	}
4929 
4930 	// shrinking the cache can't fail, so we do it now
4931 	if (status == B_OK && newSize < oldSize)
4932 		status = cache->Resize(cache->virtual_base + newSize, priority);
4933 
4934 	if (status != B_OK) {
4935 		// Something failed -- resize the areas back to their original size.
4936 		// This can fail, too, in which case we're seriously screwed.
4937 		for (VMArea* current = cache->areas; current != NULL;
4938 				current = current->cache_next) {
4939 			if (current->address_space->ResizeArea(current, oldSize,
4940 					allocationFlags) != B_OK) {
4941 				panic("vm_resize_area(): Failed and not being able to restore "
4942 					"original state.");
4943 			}
4944 		}
4945 
4946 		cache->Resize(cache->virtual_base + oldSize, priority);
4947 	}
4948 
4949 	// TODO: we must honour the lock restrictions of this area
4950 	return status;
4951 }
4952 
4953 
4954 status_t
4955 vm_memset_physical(phys_addr_t address, int value, size_t length)
4956 {
4957 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4958 }
4959 
4960 
4961 status_t
4962 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
4963 {
4964 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4965 }
4966 
4967 
4968 status_t
4969 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
4970 	bool user)
4971 {
4972 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4973 }
4974 
4975 
4976 void
4977 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
4978 {
4979 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4980 }
4981 
4982 
4983 /*!	Copies a range of memory directly from/to a page that might not be mapped
4984 	at the moment.
4985 
4986 	For \a unsafeMemory the current mapping (if any is ignored). The function
4987 	walks through the respective area's cache chain to find the physical page
4988 	and copies from/to it directly.
4989 	The memory range starting at \a unsafeMemory with a length of \a size bytes
4990 	must not cross a page boundary.
4991 
4992 	\param teamID The team ID identifying the address space \a unsafeMemory is
4993 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
4994 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
4995 		is passed, the address space of the thread returned by
4996 		debug_get_debugged_thread() is used.
4997 	\param unsafeMemory The start of the unsafe memory range to be copied
4998 		from/to.
4999 	\param buffer A safely accessible kernel buffer to be copied from/to.
5000 	\param size The number of bytes to be copied.
5001 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5002 		\a unsafeMemory, the other way around otherwise.
5003 */
5004 status_t
5005 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5006 	size_t size, bool copyToUnsafe)
5007 {
5008 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5009 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5010 		return B_BAD_VALUE;
5011 	}
5012 
5013 	// get the address space for the debugged thread
5014 	VMAddressSpace* addressSpace;
5015 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5016 		addressSpace = VMAddressSpace::Kernel();
5017 	} else if (teamID == B_CURRENT_TEAM) {
5018 		Thread* thread = debug_get_debugged_thread();
5019 		if (thread == NULL || thread->team == NULL)
5020 			return B_BAD_ADDRESS;
5021 
5022 		addressSpace = thread->team->address_space;
5023 	} else
5024 		addressSpace = VMAddressSpace::DebugGet(teamID);
5025 
5026 	if (addressSpace == NULL)
5027 		return B_BAD_ADDRESS;
5028 
5029 	// get the area
5030 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5031 	if (area == NULL)
5032 		return B_BAD_ADDRESS;
5033 
5034 	// search the page
5035 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5036 		+ area->cache_offset;
5037 	VMCache* cache = area->cache;
5038 	vm_page* page = NULL;
5039 	while (cache != NULL) {
5040 		page = cache->DebugLookupPage(cacheOffset);
5041 		if (page != NULL)
5042 			break;
5043 
5044 		// Page not found in this cache -- if it is paged out, we must not try
5045 		// to get it from lower caches.
5046 		if (cache->DebugHasPage(cacheOffset))
5047 			break;
5048 
5049 		cache = cache->source;
5050 	}
5051 
5052 	if (page == NULL)
5053 		return B_UNSUPPORTED;
5054 
5055 	// copy from/to physical memory
5056 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5057 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5058 
5059 	if (copyToUnsafe) {
5060 		if (page->Cache() != area->cache)
5061 			return B_UNSUPPORTED;
5062 
5063 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5064 	}
5065 
5066 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5067 }
5068 
5069 
5070 //	#pragma mark - kernel public API
5071 
5072 
5073 status_t
5074 user_memcpy(void* to, const void* from, size_t size)
5075 {
5076 	// don't allow address overflows
5077 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5078 		return B_BAD_ADDRESS;
5079 
5080 	if (arch_cpu_user_memcpy(to, from, size,
5081 			&thread_get_current_thread()->fault_handler) < B_OK)
5082 		return B_BAD_ADDRESS;
5083 
5084 	return B_OK;
5085 }
5086 
5087 
5088 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5089 	the string in \a to, NULL-terminating the result.
5090 
5091 	\param to Pointer to the destination C-string.
5092 	\param from Pointer to the source C-string.
5093 	\param size Size in bytes of the string buffer pointed to by \a to.
5094 
5095 	\return strlen(\a from).
5096 */
5097 ssize_t
5098 user_strlcpy(char* to, const char* from, size_t size)
5099 {
5100 	if (to == NULL && size != 0)
5101 		return B_BAD_VALUE;
5102 	if (from == NULL)
5103 		return B_BAD_ADDRESS;
5104 
5105 	// limit size to avoid address overflows
5106 	size_t maxSize = std::min(size,
5107 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5108 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5109 		// the source address might still overflow.
5110 
5111 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
5112 		&thread_get_current_thread()->fault_handler);
5113 
5114 	// If we hit the address overflow boundary, fail.
5115 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5116 			&& maxSize < size)) {
5117 		return B_BAD_ADDRESS;
5118 	}
5119 
5120 	return result;
5121 }
5122 
5123 
5124 status_t
5125 user_memset(void* s, char c, size_t count)
5126 {
5127 	// don't allow address overflows
5128 	if ((addr_t)s + count < (addr_t)s)
5129 		return B_BAD_ADDRESS;
5130 
5131 	if (arch_cpu_user_memset(s, c, count,
5132 			&thread_get_current_thread()->fault_handler) < B_OK)
5133 		return B_BAD_ADDRESS;
5134 
5135 	return B_OK;
5136 }
5137 
5138 
5139 /*!	Wires a single page at the given address.
5140 
5141 	\param team The team whose address space the address belongs to. Supports
5142 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5143 		parameter is ignored.
5144 	\param address address The virtual address to wire down. Does not need to
5145 		be page aligned.
5146 	\param writable If \c true the page shall be writable.
5147 	\param info On success the info is filled in, among other things
5148 		containing the physical address the given virtual one translates to.
5149 	\return \c B_OK, when the page could be wired, another error code otherwise.
5150 */
5151 status_t
5152 vm_wire_page(team_id team, addr_t address, bool writable,
5153 	VMPageWiringInfo* info)
5154 {
5155 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5156 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5157 
5158 	// compute the page protection that is required
5159 	bool isUser = IS_USER_ADDRESS(address);
5160 	uint32 requiredProtection = PAGE_PRESENT
5161 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5162 	if (writable)
5163 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5164 
5165 	// get and read lock the address space
5166 	VMAddressSpace* addressSpace = NULL;
5167 	if (isUser) {
5168 		if (team == B_CURRENT_TEAM)
5169 			addressSpace = VMAddressSpace::GetCurrent();
5170 		else
5171 			addressSpace = VMAddressSpace::Get(team);
5172 	} else
5173 		addressSpace = VMAddressSpace::GetKernel();
5174 	if (addressSpace == NULL)
5175 		return B_ERROR;
5176 
5177 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5178 
5179 	VMTranslationMap* map = addressSpace->TranslationMap();
5180 	status_t error = B_OK;
5181 
5182 	// get the area
5183 	VMArea* area = addressSpace->LookupArea(pageAddress);
5184 	if (area == NULL) {
5185 		addressSpace->Put();
5186 		return B_BAD_ADDRESS;
5187 	}
5188 
5189 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5190 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5191 
5192 	// mark the area range wired
5193 	area->Wire(&info->range);
5194 
5195 	// Lock the area's cache chain and the translation map. Needed to look
5196 	// up the page and play with its wired count.
5197 	cacheChainLocker.LockAllSourceCaches();
5198 	map->Lock();
5199 
5200 	phys_addr_t physicalAddress;
5201 	uint32 flags;
5202 	vm_page* page;
5203 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5204 		&& (flags & requiredProtection) == requiredProtection
5205 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5206 			!= NULL) {
5207 		// Already mapped with the correct permissions -- just increment
5208 		// the page's wired count.
5209 		increment_page_wired_count(page);
5210 
5211 		map->Unlock();
5212 		cacheChainLocker.Unlock();
5213 		addressSpaceLocker.Unlock();
5214 	} else {
5215 		// Let vm_soft_fault() map the page for us, if possible. We need
5216 		// to fully unlock to avoid deadlocks. Since we have already
5217 		// wired the area itself, nothing disturbing will happen with it
5218 		// in the meantime.
5219 		map->Unlock();
5220 		cacheChainLocker.Unlock();
5221 		addressSpaceLocker.Unlock();
5222 
5223 		error = vm_soft_fault(addressSpace, pageAddress, writable, isUser,
5224 			&page, &info->range);
5225 
5226 		if (error != B_OK) {
5227 			// The page could not be mapped -- clean up.
5228 			VMCache* cache = vm_area_get_locked_cache(area);
5229 			area->Unwire(&info->range);
5230 			cache->ReleaseRefAndUnlock();
5231 			addressSpace->Put();
5232 			return error;
5233 		}
5234 	}
5235 
5236 	info->physicalAddress
5237 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5238 			+ address % B_PAGE_SIZE;
5239 	info->page = page;
5240 
5241 	return B_OK;
5242 }
5243 
5244 
5245 /*!	Unwires a single page previously wired via vm_wire_page().
5246 
5247 	\param info The same object passed to vm_wire_page() before.
5248 */
5249 void
5250 vm_unwire_page(VMPageWiringInfo* info)
5251 {
5252 	// lock the address space
5253 	VMArea* area = info->range.area;
5254 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5255 		// takes over our reference
5256 
5257 	// lock the top cache
5258 	VMCache* cache = vm_area_get_locked_cache(area);
5259 	VMCacheChainLocker cacheChainLocker(cache);
5260 
5261 	if (info->page->Cache() != cache) {
5262 		// The page is not in the top cache, so we lock the whole cache chain
5263 		// before touching the page's wired count.
5264 		cacheChainLocker.LockAllSourceCaches();
5265 	}
5266 
5267 	decrement_page_wired_count(info->page);
5268 
5269 	// remove the wired range from the range
5270 	area->Unwire(&info->range);
5271 
5272 	cacheChainLocker.Unlock();
5273 }
5274 
5275 
5276 /*!	Wires down the given address range in the specified team's address space.
5277 
5278 	If successful the function
5279 	- acquires a reference to the specified team's address space,
5280 	- adds respective wired ranges to all areas that intersect with the given
5281 	  address range,
5282 	- makes sure all pages in the given address range are mapped with the
5283 	  requested access permissions and increments their wired count.
5284 
5285 	It fails, when \a team doesn't specify a valid address space, when any part
5286 	of the specified address range is not covered by areas, when the concerned
5287 	areas don't allow mapping with the requested permissions, or when mapping
5288 	failed for another reason.
5289 
5290 	When successful the call must be balanced by a unlock_memory_etc() call with
5291 	the exact same parameters.
5292 
5293 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5294 		supported.
5295 	\param address The start of the address range to be wired.
5296 	\param numBytes The size of the address range to be wired.
5297 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5298 		requests that the range must be wired writable ("read from device
5299 		into memory").
5300 	\return \c B_OK on success, another error code otherwise.
5301 */
5302 status_t
5303 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5304 {
5305 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5306 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5307 
5308 	// compute the page protection that is required
5309 	bool isUser = IS_USER_ADDRESS(address);
5310 	bool writable = (flags & B_READ_DEVICE) == 0;
5311 	uint32 requiredProtection = PAGE_PRESENT
5312 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5313 	if (writable)
5314 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5315 
5316 	uint32 mallocFlags = isUser
5317 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5318 
5319 	// get and read lock the address space
5320 	VMAddressSpace* addressSpace = NULL;
5321 	if (isUser) {
5322 		if (team == B_CURRENT_TEAM)
5323 			addressSpace = VMAddressSpace::GetCurrent();
5324 		else
5325 			addressSpace = VMAddressSpace::Get(team);
5326 	} else
5327 		addressSpace = VMAddressSpace::GetKernel();
5328 	if (addressSpace == NULL)
5329 		return B_ERROR;
5330 
5331 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5332 
5333 	VMTranslationMap* map = addressSpace->TranslationMap();
5334 	status_t error = B_OK;
5335 
5336 	// iterate through all concerned areas
5337 	addr_t nextAddress = lockBaseAddress;
5338 	while (nextAddress != lockEndAddress) {
5339 		// get the next area
5340 		VMArea* area = addressSpace->LookupArea(nextAddress);
5341 		if (area == NULL) {
5342 			error = B_BAD_ADDRESS;
5343 			break;
5344 		}
5345 
5346 		addr_t areaStart = nextAddress;
5347 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5348 
5349 		// allocate the wired range (do that before locking the cache to avoid
5350 		// deadlocks)
5351 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5352 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5353 		if (range == NULL) {
5354 			error = B_NO_MEMORY;
5355 			break;
5356 		}
5357 
5358 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5359 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5360 
5361 		// mark the area range wired
5362 		area->Wire(range);
5363 
5364 		// Depending on the area cache type and the wiring, we may not need to
5365 		// look at the individual pages.
5366 		if (area->cache_type == CACHE_TYPE_NULL
5367 			|| area->cache_type == CACHE_TYPE_DEVICE
5368 			|| area->wiring == B_FULL_LOCK
5369 			|| area->wiring == B_CONTIGUOUS) {
5370 			nextAddress = areaEnd;
5371 			continue;
5372 		}
5373 
5374 		// Lock the area's cache chain and the translation map. Needed to look
5375 		// up pages and play with their wired count.
5376 		cacheChainLocker.LockAllSourceCaches();
5377 		map->Lock();
5378 
5379 		// iterate through the pages and wire them
5380 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5381 			phys_addr_t physicalAddress;
5382 			uint32 flags;
5383 
5384 			vm_page* page;
5385 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5386 				&& (flags & requiredProtection) == requiredProtection
5387 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5388 					!= NULL) {
5389 				// Already mapped with the correct permissions -- just increment
5390 				// the page's wired count.
5391 				increment_page_wired_count(page);
5392 			} else {
5393 				// Let vm_soft_fault() map the page for us, if possible. We need
5394 				// to fully unlock to avoid deadlocks. Since we have already
5395 				// wired the area itself, nothing disturbing will happen with it
5396 				// in the meantime.
5397 				map->Unlock();
5398 				cacheChainLocker.Unlock();
5399 				addressSpaceLocker.Unlock();
5400 
5401 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5402 					isUser, &page, range);
5403 
5404 				addressSpaceLocker.Lock();
5405 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5406 				cacheChainLocker.LockAllSourceCaches();
5407 				map->Lock();
5408 			}
5409 
5410 			if (error != B_OK)
5411 				break;
5412 		}
5413 
5414 		map->Unlock();
5415 
5416 		if (error == B_OK) {
5417 			cacheChainLocker.Unlock();
5418 		} else {
5419 			// An error occurred, so abort right here. If the current address
5420 			// is the first in this area, unwire the area, since we won't get
5421 			// to it when reverting what we've done so far.
5422 			if (nextAddress == areaStart) {
5423 				area->Unwire(range);
5424 				cacheChainLocker.Unlock();
5425 				range->~VMAreaWiredRange();
5426 				free_etc(range, mallocFlags);
5427 			} else
5428 				cacheChainLocker.Unlock();
5429 
5430 			break;
5431 		}
5432 	}
5433 
5434 	if (error != B_OK) {
5435 		// An error occurred, so unwire all that we've already wired. Note that
5436 		// even if not a single page was wired, unlock_memory_etc() is called
5437 		// to put the address space reference.
5438 		addressSpaceLocker.Unlock();
5439 		unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress,
5440 			flags);
5441 	}
5442 
5443 	return error;
5444 }
5445 
5446 
5447 status_t
5448 lock_memory(void* address, size_t numBytes, uint32 flags)
5449 {
5450 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5451 }
5452 
5453 
5454 /*!	Unwires an address range previously wired with lock_memory_etc().
5455 
5456 	Note that a call to this function must balance a previous lock_memory_etc()
5457 	call with exactly the same parameters.
5458 */
5459 status_t
5460 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5461 {
5462 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5463 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5464 
5465 	// compute the page protection that is required
5466 	bool isUser = IS_USER_ADDRESS(address);
5467 	bool writable = (flags & B_READ_DEVICE) == 0;
5468 	uint32 requiredProtection = PAGE_PRESENT
5469 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5470 	if (writable)
5471 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5472 
5473 	uint32 mallocFlags = isUser
5474 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5475 
5476 	// get and read lock the address space
5477 	VMAddressSpace* addressSpace = NULL;
5478 	if (isUser) {
5479 		if (team == B_CURRENT_TEAM)
5480 			addressSpace = VMAddressSpace::GetCurrent();
5481 		else
5482 			addressSpace = VMAddressSpace::Get(team);
5483 	} else
5484 		addressSpace = VMAddressSpace::GetKernel();
5485 	if (addressSpace == NULL)
5486 		return B_ERROR;
5487 
5488 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5489 
5490 	VMTranslationMap* map = addressSpace->TranslationMap();
5491 	status_t error = B_OK;
5492 
5493 	// iterate through all concerned areas
5494 	addr_t nextAddress = lockBaseAddress;
5495 	while (nextAddress != lockEndAddress) {
5496 		// get the next area
5497 		VMArea* area = addressSpace->LookupArea(nextAddress);
5498 		if (area == NULL) {
5499 			error = B_BAD_ADDRESS;
5500 			break;
5501 		}
5502 
5503 		addr_t areaStart = nextAddress;
5504 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5505 
5506 		// Lock the area's top cache. This is a requirement for
5507 		// VMArea::Unwire().
5508 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5509 
5510 		// Depending on the area cache type and the wiring, we may not need to
5511 		// look at the individual pages.
5512 		if (area->cache_type == CACHE_TYPE_NULL
5513 			|| area->cache_type == CACHE_TYPE_DEVICE
5514 			|| area->wiring == B_FULL_LOCK
5515 			|| area->wiring == B_CONTIGUOUS) {
5516 			// unwire the range (to avoid deadlocks we delete the range after
5517 			// unlocking the cache)
5518 			nextAddress = areaEnd;
5519 			VMAreaWiredRange* range = area->Unwire(areaStart,
5520 				areaEnd - areaStart, writable);
5521 			cacheChainLocker.Unlock();
5522 			if (range != NULL) {
5523 				range->~VMAreaWiredRange();
5524 				free_etc(range, mallocFlags);
5525 			}
5526 			continue;
5527 		}
5528 
5529 		// Lock the area's cache chain and the translation map. Needed to look
5530 		// up pages and play with their wired count.
5531 		cacheChainLocker.LockAllSourceCaches();
5532 		map->Lock();
5533 
5534 		// iterate through the pages and unwire them
5535 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5536 			phys_addr_t physicalAddress;
5537 			uint32 flags;
5538 
5539 			vm_page* page;
5540 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5541 				&& (flags & PAGE_PRESENT) != 0
5542 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5543 					!= NULL) {
5544 				// Already mapped with the correct permissions -- just increment
5545 				// the page's wired count.
5546 				decrement_page_wired_count(page);
5547 			} else {
5548 				panic("unlock_memory_etc(): Failed to unwire page: address "
5549 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5550 					nextAddress);
5551 				error = B_BAD_VALUE;
5552 				break;
5553 			}
5554 		}
5555 
5556 		map->Unlock();
5557 
5558 		// All pages are unwired. Remove the area's wired range as well (to
5559 		// avoid deadlocks we delete the range after unlocking the cache).
5560 		VMAreaWiredRange* range = area->Unwire(areaStart,
5561 			areaEnd - areaStart, writable);
5562 
5563 		cacheChainLocker.Unlock();
5564 
5565 		if (range != NULL) {
5566 			range->~VMAreaWiredRange();
5567 			free_etc(range, mallocFlags);
5568 		}
5569 
5570 		if (error != B_OK)
5571 			break;
5572 	}
5573 
5574 	// get rid of the address space reference
5575 	addressSpace->Put();
5576 
5577 	return error;
5578 }
5579 
5580 
5581 status_t
5582 unlock_memory(void* address, size_t numBytes, uint32 flags)
5583 {
5584 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5585 }
5586 
5587 
5588 /*!	Similar to get_memory_map(), but also allows to specify the address space
5589 	for the memory in question and has a saner semantics.
5590 	Returns \c B_OK when the complete range could be translated or
5591 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5592 	case the actual number of entries is written to \c *_numEntries. Any other
5593 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5594 	in this case.
5595 */
5596 status_t
5597 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5598 	physical_entry* table, uint32* _numEntries)
5599 {
5600 	uint32 numEntries = *_numEntries;
5601 	*_numEntries = 0;
5602 
5603 	VMAddressSpace* addressSpace;
5604 	addr_t virtualAddress = (addr_t)address;
5605 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5606 	phys_addr_t physicalAddress;
5607 	status_t status = B_OK;
5608 	int32 index = -1;
5609 	addr_t offset = 0;
5610 	bool interrupts = are_interrupts_enabled();
5611 
5612 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5613 		"entries)\n", team, address, numBytes, numEntries));
5614 
5615 	if (numEntries == 0 || numBytes == 0)
5616 		return B_BAD_VALUE;
5617 
5618 	// in which address space is the address to be found?
5619 	if (IS_USER_ADDRESS(virtualAddress)) {
5620 		if (team == B_CURRENT_TEAM)
5621 			addressSpace = VMAddressSpace::GetCurrent();
5622 		else
5623 			addressSpace = VMAddressSpace::Get(team);
5624 	} else
5625 		addressSpace = VMAddressSpace::GetKernel();
5626 
5627 	if (addressSpace == NULL)
5628 		return B_ERROR;
5629 
5630 	VMTranslationMap* map = addressSpace->TranslationMap();
5631 
5632 	if (interrupts)
5633 		map->Lock();
5634 
5635 	while (offset < numBytes) {
5636 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5637 		uint32 flags;
5638 
5639 		if (interrupts) {
5640 			status = map->Query((addr_t)address + offset, &physicalAddress,
5641 				&flags);
5642 		} else {
5643 			status = map->QueryInterrupt((addr_t)address + offset,
5644 				&physicalAddress, &flags);
5645 		}
5646 		if (status < B_OK)
5647 			break;
5648 		if ((flags & PAGE_PRESENT) == 0) {
5649 			panic("get_memory_map() called on unmapped memory!");
5650 			return B_BAD_ADDRESS;
5651 		}
5652 
5653 		if (index < 0 && pageOffset > 0) {
5654 			physicalAddress += pageOffset;
5655 			if (bytes > B_PAGE_SIZE - pageOffset)
5656 				bytes = B_PAGE_SIZE - pageOffset;
5657 		}
5658 
5659 		// need to switch to the next physical_entry?
5660 		if (index < 0 || table[index].address
5661 				!= physicalAddress - table[index].size) {
5662 			if ((uint32)++index + 1 > numEntries) {
5663 				// table to small
5664 				break;
5665 			}
5666 			table[index].address = physicalAddress;
5667 			table[index].size = bytes;
5668 		} else {
5669 			// page does fit in current entry
5670 			table[index].size += bytes;
5671 		}
5672 
5673 		offset += bytes;
5674 	}
5675 
5676 	if (interrupts)
5677 		map->Unlock();
5678 
5679 	if (status != B_OK)
5680 		return status;
5681 
5682 	if ((uint32)index + 1 > numEntries) {
5683 		*_numEntries = index;
5684 		return B_BUFFER_OVERFLOW;
5685 	}
5686 
5687 	*_numEntries = index + 1;
5688 	return B_OK;
5689 }
5690 
5691 
5692 /*!	According to the BeBook, this function should always succeed.
5693 	This is no longer the case.
5694 */
5695 extern "C" int32
5696 __get_memory_map_haiku(const void* address, size_t numBytes,
5697 	physical_entry* table, int32 numEntries)
5698 {
5699 	uint32 entriesRead = numEntries;
5700 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5701 		table, &entriesRead);
5702 	if (error != B_OK)
5703 		return error;
5704 
5705 	// close the entry list
5706 
5707 	// if it's only one entry, we will silently accept the missing ending
5708 	if (numEntries == 1)
5709 		return B_OK;
5710 
5711 	if (entriesRead + 1 > (uint32)numEntries)
5712 		return B_BUFFER_OVERFLOW;
5713 
5714 	table[entriesRead].address = 0;
5715 	table[entriesRead].size = 0;
5716 
5717 	return B_OK;
5718 }
5719 
5720 
5721 area_id
5722 area_for(void* address)
5723 {
5724 	return vm_area_for((addr_t)address, true);
5725 }
5726 
5727 
5728 area_id
5729 find_area(const char* name)
5730 {
5731 	return VMAreaHash::Find(name);
5732 }
5733 
5734 
5735 status_t
5736 _get_area_info(area_id id, area_info* info, size_t size)
5737 {
5738 	if (size != sizeof(area_info) || info == NULL)
5739 		return B_BAD_VALUE;
5740 
5741 	AddressSpaceReadLocker locker;
5742 	VMArea* area;
5743 	status_t status = locker.SetFromArea(id, area);
5744 	if (status != B_OK)
5745 		return status;
5746 
5747 	fill_area_info(area, info, size);
5748 	return B_OK;
5749 }
5750 
5751 
5752 status_t
5753 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5754 {
5755 	addr_t nextBase = *(addr_t*)cookie;
5756 
5757 	// we're already through the list
5758 	if (nextBase == (addr_t)-1)
5759 		return B_ENTRY_NOT_FOUND;
5760 
5761 	if (team == B_CURRENT_TEAM)
5762 		team = team_get_current_team_id();
5763 
5764 	AddressSpaceReadLocker locker(team);
5765 	if (!locker.IsLocked())
5766 		return B_BAD_TEAM_ID;
5767 
5768 	VMArea* area;
5769 	for (VMAddressSpace::AreaIterator it
5770 				= locker.AddressSpace()->GetAreaIterator();
5771 			(area = it.Next()) != NULL;) {
5772 		if (area->Base() > nextBase)
5773 			break;
5774 	}
5775 
5776 	if (area == NULL) {
5777 		nextBase = (addr_t)-1;
5778 		return B_ENTRY_NOT_FOUND;
5779 	}
5780 
5781 	fill_area_info(area, info, size);
5782 	*cookie = (ssize_t)(area->Base());
5783 
5784 	return B_OK;
5785 }
5786 
5787 
5788 status_t
5789 set_area_protection(area_id area, uint32 newProtection)
5790 {
5791 	fix_protection(&newProtection);
5792 
5793 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5794 		newProtection, true);
5795 }
5796 
5797 
5798 status_t
5799 resize_area(area_id areaID, size_t newSize)
5800 {
5801 	return vm_resize_area(areaID, newSize, true);
5802 }
5803 
5804 
5805 /*!	Transfers the specified area to a new team. The caller must be the owner
5806 	of the area.
5807 */
5808 area_id
5809 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5810 	bool kernel)
5811 {
5812 	area_info info;
5813 	status_t status = get_area_info(id, &info);
5814 	if (status != B_OK)
5815 		return status;
5816 
5817 	if (info.team != thread_get_current_thread()->team->id)
5818 		return B_PERMISSION_DENIED;
5819 
5820 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5821 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5822 	if (clonedArea < 0)
5823 		return clonedArea;
5824 
5825 	status = vm_delete_area(info.team, id, kernel);
5826 	if (status != B_OK) {
5827 		vm_delete_area(target, clonedArea, kernel);
5828 		return status;
5829 	}
5830 
5831 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5832 
5833 	return clonedArea;
5834 }
5835 
5836 
5837 extern "C" area_id
5838 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5839 	size_t numBytes, uint32 addressSpec, uint32 protection,
5840 	void** _virtualAddress)
5841 {
5842 	if (!arch_vm_supports_protection(protection))
5843 		return B_NOT_SUPPORTED;
5844 
5845 	fix_protection(&protection);
5846 
5847 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5848 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5849 		false);
5850 }
5851 
5852 
5853 area_id
5854 clone_area(const char* name, void** _address, uint32 addressSpec,
5855 	uint32 protection, area_id source)
5856 {
5857 	if ((protection & B_KERNEL_PROTECTION) == 0)
5858 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5859 
5860 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5861 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5862 }
5863 
5864 
5865 area_id
5866 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
5867 	uint32 protection, uint32 flags, uint32 guardSize,
5868 	const virtual_address_restrictions* virtualAddressRestrictions,
5869 	const physical_address_restrictions* physicalAddressRestrictions,
5870 	void** _address)
5871 {
5872 	fix_protection(&protection);
5873 
5874 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5875 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
5876 		true, _address);
5877 }
5878 
5879 
5880 extern "C" area_id
5881 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5882 	size_t size, uint32 lock, uint32 protection)
5883 {
5884 	fix_protection(&protection);
5885 
5886 	virtual_address_restrictions virtualRestrictions = {};
5887 	virtualRestrictions.address = *_address;
5888 	virtualRestrictions.address_specification = addressSpec;
5889 	physical_address_restrictions physicalRestrictions = {};
5890 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5891 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
5892 		true, _address);
5893 }
5894 
5895 
5896 status_t
5897 delete_area(area_id area)
5898 {
5899 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5900 }
5901 
5902 
5903 //	#pragma mark - Userland syscalls
5904 
5905 
5906 status_t
5907 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5908 	addr_t size)
5909 {
5910 	// filter out some unavailable values (for userland)
5911 	switch (addressSpec) {
5912 		case B_ANY_KERNEL_ADDRESS:
5913 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5914 			return B_BAD_VALUE;
5915 	}
5916 
5917 	addr_t address;
5918 
5919 	if (!IS_USER_ADDRESS(userAddress)
5920 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5921 		return B_BAD_ADDRESS;
5922 
5923 	status_t status = vm_reserve_address_range(
5924 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5925 		RESERVED_AVOID_BASE);
5926 	if (status != B_OK)
5927 		return status;
5928 
5929 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5930 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5931 			(void*)address, size);
5932 		return B_BAD_ADDRESS;
5933 	}
5934 
5935 	return B_OK;
5936 }
5937 
5938 
5939 status_t
5940 _user_unreserve_address_range(addr_t address, addr_t size)
5941 {
5942 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5943 		(void*)address, size);
5944 }
5945 
5946 
5947 area_id
5948 _user_area_for(void* address)
5949 {
5950 	return vm_area_for((addr_t)address, false);
5951 }
5952 
5953 
5954 area_id
5955 _user_find_area(const char* userName)
5956 {
5957 	char name[B_OS_NAME_LENGTH];
5958 
5959 	if (!IS_USER_ADDRESS(userName)
5960 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5961 		return B_BAD_ADDRESS;
5962 
5963 	return find_area(name);
5964 }
5965 
5966 
5967 status_t
5968 _user_get_area_info(area_id area, area_info* userInfo)
5969 {
5970 	if (!IS_USER_ADDRESS(userInfo))
5971 		return B_BAD_ADDRESS;
5972 
5973 	area_info info;
5974 	status_t status = get_area_info(area, &info);
5975 	if (status < B_OK)
5976 		return status;
5977 
5978 	// TODO: do we want to prevent userland from seeing kernel protections?
5979 	//info.protection &= B_USER_PROTECTION;
5980 
5981 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5982 		return B_BAD_ADDRESS;
5983 
5984 	return status;
5985 }
5986 
5987 
5988 status_t
5989 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
5990 {
5991 	ssize_t cookie;
5992 
5993 	if (!IS_USER_ADDRESS(userCookie)
5994 		|| !IS_USER_ADDRESS(userInfo)
5995 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
5996 		return B_BAD_ADDRESS;
5997 
5998 	area_info info;
5999 	status_t status = _get_next_area_info(team, &cookie, &info,
6000 		sizeof(area_info));
6001 	if (status != B_OK)
6002 		return status;
6003 
6004 	//info.protection &= B_USER_PROTECTION;
6005 
6006 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6007 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6008 		return B_BAD_ADDRESS;
6009 
6010 	return status;
6011 }
6012 
6013 
6014 status_t
6015 _user_set_area_protection(area_id area, uint32 newProtection)
6016 {
6017 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6018 		return B_BAD_VALUE;
6019 
6020 	fix_protection(&newProtection);
6021 
6022 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6023 		newProtection, false);
6024 }
6025 
6026 
6027 status_t
6028 _user_resize_area(area_id area, size_t newSize)
6029 {
6030 	// TODO: Since we restrict deleting of areas to those owned by the team,
6031 	// we should also do that for resizing (check other functions, too).
6032 	return vm_resize_area(area, newSize, false);
6033 }
6034 
6035 
6036 area_id
6037 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6038 	team_id target)
6039 {
6040 	// filter out some unavailable values (for userland)
6041 	switch (addressSpec) {
6042 		case B_ANY_KERNEL_ADDRESS:
6043 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6044 			return B_BAD_VALUE;
6045 	}
6046 
6047 	void* address;
6048 	if (!IS_USER_ADDRESS(userAddress)
6049 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6050 		return B_BAD_ADDRESS;
6051 
6052 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6053 	if (newArea < B_OK)
6054 		return newArea;
6055 
6056 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6057 		return B_BAD_ADDRESS;
6058 
6059 	return newArea;
6060 }
6061 
6062 
6063 area_id
6064 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6065 	uint32 protection, area_id sourceArea)
6066 {
6067 	char name[B_OS_NAME_LENGTH];
6068 	void* address;
6069 
6070 	// filter out some unavailable values (for userland)
6071 	switch (addressSpec) {
6072 		case B_ANY_KERNEL_ADDRESS:
6073 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6074 			return B_BAD_VALUE;
6075 	}
6076 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6077 		return B_BAD_VALUE;
6078 
6079 	if (!IS_USER_ADDRESS(userName)
6080 		|| !IS_USER_ADDRESS(userAddress)
6081 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6082 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6083 		return B_BAD_ADDRESS;
6084 
6085 	fix_protection(&protection);
6086 
6087 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6088 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6089 		false);
6090 	if (clonedArea < B_OK)
6091 		return clonedArea;
6092 
6093 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6094 		delete_area(clonedArea);
6095 		return B_BAD_ADDRESS;
6096 	}
6097 
6098 	return clonedArea;
6099 }
6100 
6101 
6102 area_id
6103 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6104 	size_t size, uint32 lock, uint32 protection)
6105 {
6106 	char name[B_OS_NAME_LENGTH];
6107 	void* address;
6108 
6109 	// filter out some unavailable values (for userland)
6110 	switch (addressSpec) {
6111 		case B_ANY_KERNEL_ADDRESS:
6112 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6113 			return B_BAD_VALUE;
6114 	}
6115 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6116 		return B_BAD_VALUE;
6117 
6118 	if (!IS_USER_ADDRESS(userName)
6119 		|| !IS_USER_ADDRESS(userAddress)
6120 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6121 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6122 		return B_BAD_ADDRESS;
6123 
6124 	if (addressSpec == B_EXACT_ADDRESS
6125 		&& IS_KERNEL_ADDRESS(address))
6126 		return B_BAD_VALUE;
6127 
6128 	fix_protection(&protection);
6129 
6130 	virtual_address_restrictions virtualRestrictions = {};
6131 	virtualRestrictions.address = address;
6132 	virtualRestrictions.address_specification = addressSpec;
6133 	physical_address_restrictions physicalRestrictions = {};
6134 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6135 		size, lock, protection, 0, 0, &virtualRestrictions,
6136 		&physicalRestrictions, false, &address);
6137 
6138 	if (area >= B_OK
6139 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6140 		delete_area(area);
6141 		return B_BAD_ADDRESS;
6142 	}
6143 
6144 	return area;
6145 }
6146 
6147 
6148 status_t
6149 _user_delete_area(area_id area)
6150 {
6151 	// Unlike the BeOS implementation, you can now only delete areas
6152 	// that you have created yourself from userland.
6153 	// The documentation to delete_area() explicitly states that this
6154 	// will be restricted in the future, and so it will.
6155 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6156 }
6157 
6158 
6159 // TODO: create a BeOS style call for this!
6160 
6161 area_id
6162 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6163 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6164 	int fd, off_t offset)
6165 {
6166 	char name[B_OS_NAME_LENGTH];
6167 	void* address;
6168 	area_id area;
6169 
6170 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6171 		return B_BAD_VALUE;
6172 
6173 	fix_protection(&protection);
6174 
6175 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6176 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6177 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6178 		return B_BAD_ADDRESS;
6179 
6180 	if (addressSpec == B_EXACT_ADDRESS) {
6181 		if ((addr_t)address + size < (addr_t)address
6182 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6183 			return B_BAD_VALUE;
6184 		}
6185 		if (!IS_USER_ADDRESS(address)
6186 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6187 			return B_BAD_ADDRESS;
6188 		}
6189 	}
6190 
6191 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6192 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6193 		false);
6194 	if (area < B_OK)
6195 		return area;
6196 
6197 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6198 		return B_BAD_ADDRESS;
6199 
6200 	return area;
6201 }
6202 
6203 
6204 status_t
6205 _user_unmap_memory(void* _address, size_t size)
6206 {
6207 	addr_t address = (addr_t)_address;
6208 
6209 	// check params
6210 	if (size == 0 || (addr_t)address + size < (addr_t)address
6211 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6212 		return B_BAD_VALUE;
6213 	}
6214 
6215 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6216 		return B_BAD_ADDRESS;
6217 
6218 	// Write lock the address space and ensure the address range is not wired.
6219 	AddressSpaceWriteLocker locker;
6220 	do {
6221 		status_t status = locker.SetTo(team_get_current_team_id());
6222 		if (status != B_OK)
6223 			return status;
6224 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6225 			size, &locker));
6226 
6227 	// unmap
6228 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6229 }
6230 
6231 
6232 status_t
6233 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6234 {
6235 	// check address range
6236 	addr_t address = (addr_t)_address;
6237 	size = PAGE_ALIGN(size);
6238 
6239 	if ((address % B_PAGE_SIZE) != 0)
6240 		return B_BAD_VALUE;
6241 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6242 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6243 		// weird error code required by POSIX
6244 		return ENOMEM;
6245 	}
6246 
6247 	// extend and check protection
6248 	if ((protection & ~B_USER_PROTECTION) != 0)
6249 		return B_BAD_VALUE;
6250 
6251 	fix_protection(&protection);
6252 
6253 	// We need to write lock the address space, since we're going to play with
6254 	// the areas. Also make sure that none of the areas is wired and that we're
6255 	// actually allowed to change the protection.
6256 	AddressSpaceWriteLocker locker;
6257 
6258 	bool restart;
6259 	do {
6260 		restart = false;
6261 
6262 		status_t status = locker.SetTo(team_get_current_team_id());
6263 		if (status != B_OK)
6264 			return status;
6265 
6266 		// First round: Check whether the whole range is covered by areas and we
6267 		// are allowed to modify them.
6268 		addr_t currentAddress = address;
6269 		size_t sizeLeft = size;
6270 		while (sizeLeft > 0) {
6271 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6272 			if (area == NULL)
6273 				return B_NO_MEMORY;
6274 
6275 			if ((area->protection & B_KERNEL_AREA) != 0)
6276 				return B_NOT_ALLOWED;
6277 
6278 			// TODO: For (shared) mapped files we should check whether the new
6279 			// protections are compatible with the file permissions. We don't
6280 			// have a way to do that yet, though.
6281 
6282 			addr_t offset = currentAddress - area->Base();
6283 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6284 
6285 			AreaCacheLocker cacheLocker(area);
6286 
6287 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6288 					&locker, &cacheLocker)) {
6289 				restart = true;
6290 				break;
6291 			}
6292 
6293 			cacheLocker.Unlock();
6294 
6295 			currentAddress += rangeSize;
6296 			sizeLeft -= rangeSize;
6297 		}
6298 	} while (restart);
6299 
6300 	// Second round: If the protections differ from that of the area, create a
6301 	// page protection array and re-map mapped pages.
6302 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6303 	addr_t currentAddress = address;
6304 	size_t sizeLeft = size;
6305 	while (sizeLeft > 0) {
6306 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6307 		if (area == NULL)
6308 			return B_NO_MEMORY;
6309 
6310 		addr_t offset = currentAddress - area->Base();
6311 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6312 
6313 		currentAddress += rangeSize;
6314 		sizeLeft -= rangeSize;
6315 
6316 		if (area->page_protections == NULL) {
6317 			if (area->protection == protection)
6318 				continue;
6319 
6320 			status_t status = allocate_area_page_protections(area);
6321 			if (status != B_OK)
6322 				return status;
6323 		}
6324 
6325 		// We need to lock the complete cache chain, since we potentially unmap
6326 		// pages of lower caches.
6327 		VMCache* topCache = vm_area_get_locked_cache(area);
6328 		VMCacheChainLocker cacheChainLocker(topCache);
6329 		cacheChainLocker.LockAllSourceCaches();
6330 
6331 		for (addr_t pageAddress = area->Base() + offset;
6332 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6333 			map->Lock();
6334 
6335 			set_area_page_protection(area, pageAddress, protection);
6336 
6337 			phys_addr_t physicalAddress;
6338 			uint32 flags;
6339 
6340 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6341 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6342 				map->Unlock();
6343 				continue;
6344 			}
6345 
6346 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6347 			if (page == NULL) {
6348 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6349 					"\n", area, physicalAddress);
6350 				map->Unlock();
6351 				return B_ERROR;
6352 			}
6353 
6354 			// If the page is not in the topmost cache and write access is
6355 			// requested, we have to unmap it. Otherwise we can re-map it with
6356 			// the new protection.
6357 			bool unmapPage = page->Cache() != topCache
6358 				&& (protection & B_WRITE_AREA) != 0;
6359 
6360 			if (!unmapPage)
6361 				map->ProtectPage(area, pageAddress, protection);
6362 
6363 			map->Unlock();
6364 
6365 			if (unmapPage) {
6366 				DEBUG_PAGE_ACCESS_START(page);
6367 				unmap_page(area, pageAddress);
6368 				DEBUG_PAGE_ACCESS_END(page);
6369 			}
6370 		}
6371 	}
6372 
6373 	return B_OK;
6374 }
6375 
6376 
6377 status_t
6378 _user_sync_memory(void* _address, size_t size, uint32 flags)
6379 {
6380 	addr_t address = (addr_t)_address;
6381 	size = PAGE_ALIGN(size);
6382 
6383 	// check params
6384 	if ((address % B_PAGE_SIZE) != 0)
6385 		return B_BAD_VALUE;
6386 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6387 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6388 		// weird error code required by POSIX
6389 		return ENOMEM;
6390 	}
6391 
6392 	bool writeSync = (flags & MS_SYNC) != 0;
6393 	bool writeAsync = (flags & MS_ASYNC) != 0;
6394 	if (writeSync && writeAsync)
6395 		return B_BAD_VALUE;
6396 
6397 	if (size == 0 || (!writeSync && !writeAsync))
6398 		return B_OK;
6399 
6400 	// iterate through the range and sync all concerned areas
6401 	while (size > 0) {
6402 		// read lock the address space
6403 		AddressSpaceReadLocker locker;
6404 		status_t error = locker.SetTo(team_get_current_team_id());
6405 		if (error != B_OK)
6406 			return error;
6407 
6408 		// get the first area
6409 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6410 		if (area == NULL)
6411 			return B_NO_MEMORY;
6412 
6413 		uint32 offset = address - area->Base();
6414 		size_t rangeSize = min_c(area->Size() - offset, size);
6415 		offset += area->cache_offset;
6416 
6417 		// lock the cache
6418 		AreaCacheLocker cacheLocker(area);
6419 		if (!cacheLocker)
6420 			return B_BAD_VALUE;
6421 		VMCache* cache = area->cache;
6422 
6423 		locker.Unlock();
6424 
6425 		uint32 firstPage = offset >> PAGE_SHIFT;
6426 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6427 
6428 		// write the pages
6429 		if (cache->type == CACHE_TYPE_VNODE) {
6430 			if (writeSync) {
6431 				// synchronous
6432 				error = vm_page_write_modified_page_range(cache, firstPage,
6433 					endPage);
6434 				if (error != B_OK)
6435 					return error;
6436 			} else {
6437 				// asynchronous
6438 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6439 				// TODO: This is probably not quite what is supposed to happen.
6440 				// Especially when a lot has to be written, it might take ages
6441 				// until it really hits the disk.
6442 			}
6443 		}
6444 
6445 		address += rangeSize;
6446 		size -= rangeSize;
6447 	}
6448 
6449 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6450 	// synchronize multiple mappings of the same file. In our VM they never get
6451 	// out of sync, though, so we don't have to do anything.
6452 
6453 	return B_OK;
6454 }
6455 
6456 
6457 status_t
6458 _user_memory_advice(void* address, size_t size, uint32 advice)
6459 {
6460 	// TODO: Implement!
6461 	return B_OK;
6462 }
6463 
6464 
6465 status_t
6466 _user_get_memory_properties(team_id teamID, const void* address,
6467 	uint32* _protected, uint32* _lock)
6468 {
6469 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6470 		return B_BAD_ADDRESS;
6471 
6472 	AddressSpaceReadLocker locker;
6473 	status_t error = locker.SetTo(teamID);
6474 	if (error != B_OK)
6475 		return error;
6476 
6477 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6478 	if (area == NULL)
6479 		return B_NO_MEMORY;
6480 
6481 
6482 	uint32 protection = area->protection;
6483 	if (area->page_protections != NULL)
6484 		protection = get_area_page_protection(area, (addr_t)address);
6485 
6486 	uint32 wiring = area->wiring;
6487 
6488 	locker.Unlock();
6489 
6490 	error = user_memcpy(_protected, &protection, sizeof(protection));
6491 	if (error != B_OK)
6492 		return error;
6493 
6494 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6495 
6496 	return error;
6497 }
6498 
6499 
6500 // #pragma mark -- compatibility
6501 
6502 
6503 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6504 
6505 
6506 struct physical_entry_beos {
6507 	uint32	address;
6508 	uint32	size;
6509 };
6510 
6511 
6512 /*!	The physical_entry structure has changed. We need to translate it to the
6513 	old one.
6514 */
6515 extern "C" int32
6516 __get_memory_map_beos(const void* _address, size_t numBytes,
6517 	physical_entry_beos* table, int32 numEntries)
6518 {
6519 	if (numEntries <= 0)
6520 		return B_BAD_VALUE;
6521 
6522 	const uint8* address = (const uint8*)_address;
6523 
6524 	int32 count = 0;
6525 	while (numBytes > 0 && count < numEntries) {
6526 		physical_entry entry;
6527 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6528 		if (result < 0) {
6529 			if (result != B_BUFFER_OVERFLOW)
6530 				return result;
6531 		}
6532 
6533 		if (entry.address >= (phys_addr_t)1 << 32) {
6534 			panic("get_memory_map(): Address is greater 4 GB!");
6535 			return B_ERROR;
6536 		}
6537 
6538 		table[count].address = entry.address;
6539 		table[count++].size = entry.size;
6540 
6541 		address += entry.size;
6542 		numBytes -= entry.size;
6543 	}
6544 
6545 	// null-terminate the table, if possible
6546 	if (count < numEntries) {
6547 		table[count].address = 0;
6548 		table[count].size = 0;
6549 	}
6550 
6551 	return B_OK;
6552 }
6553 
6554 
6555 /*!	The type of the \a physicalAddress parameter has changed from void* to
6556 	phys_addr_t.
6557 */
6558 extern "C" area_id
6559 __map_physical_memory_beos(const char* name, void* physicalAddress,
6560 	size_t numBytes, uint32 addressSpec, uint32 protection,
6561 	void** _virtualAddress)
6562 {
6563 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6564 		addressSpec, protection, _virtualAddress);
6565 }
6566 
6567 
6568 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6569 	we meddle with the \a lock parameter to force 32 bit.
6570 */
6571 extern "C" area_id
6572 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6573 	size_t size, uint32 lock, uint32 protection)
6574 {
6575 	switch (lock) {
6576 		case B_NO_LOCK:
6577 			break;
6578 		case B_FULL_LOCK:
6579 		case B_LAZY_LOCK:
6580 			lock = B_32_BIT_FULL_LOCK;
6581 			break;
6582 		case B_CONTIGUOUS:
6583 			lock = B_32_BIT_CONTIGUOUS;
6584 			break;
6585 	}
6586 
6587 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6588 		protection);
6589 }
6590 
6591 
6592 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6593 	"BASE");
6594 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6595 	"map_physical_memory@", "BASE");
6596 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6597 	"BASE");
6598 
6599 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6600 	"get_memory_map@@", "1_ALPHA3");
6601 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6602 	"map_physical_memory@@", "1_ALPHA3");
6603 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6604 	"1_ALPHA3");
6605 
6606 
6607 #else
6608 
6609 
6610 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6611 	"get_memory_map@@", "BASE");
6612 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6613 	"map_physical_memory@@", "BASE");
6614 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6615 	"BASE");
6616 
6617 
6618 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6619