xref: /haiku/src/system/kernel/vm/vm.cpp (revision 0ce4c23d22fae64d10e5575687490fbdf8ee52b8)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <boot/elf.h>
31 #include <boot/stage2.h>
32 #include <condition_variable.h>
33 #include <console.h>
34 #include <debug.h>
35 #include <file_cache.h>
36 #include <fs/fd.h>
37 #include <heap.h>
38 #include <kernel.h>
39 #include <int.h>
40 #include <lock.h>
41 #include <low_resource_manager.h>
42 #include <slab/Slab.h>
43 #include <smp.h>
44 #include <system_info.h>
45 #include <thread.h>
46 #include <team.h>
47 #include <tracing.h>
48 #include <util/AutoLock.h>
49 #include <util/khash.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 class AreaCacheLocking {
77 public:
78 	inline bool Lock(VMCache* lockable)
79 	{
80 		return false;
81 	}
82 
83 	inline void Unlock(VMCache* lockable)
84 	{
85 		vm_area_put_locked_cache(lockable);
86 	}
87 };
88 
89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
90 public:
91 	inline AreaCacheLocker(VMCache* cache = NULL)
92 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
93 	{
94 	}
95 
96 	inline AreaCacheLocker(VMArea* area)
97 		: AutoLocker<VMCache, AreaCacheLocking>()
98 	{
99 		SetTo(area);
100 	}
101 
102 	inline void SetTo(VMCache* cache, bool alreadyLocked)
103 	{
104 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
105 	}
106 
107 	inline void SetTo(VMArea* area)
108 	{
109 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
110 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
111 	}
112 };
113 
114 
115 class VMCacheChainLocker {
116 public:
117 	VMCacheChainLocker()
118 		:
119 		fTopCache(NULL),
120 		fBottomCache(NULL)
121 	{
122 	}
123 
124 	VMCacheChainLocker(VMCache* topCache)
125 		:
126 		fTopCache(topCache),
127 		fBottomCache(topCache)
128 	{
129 	}
130 
131 	~VMCacheChainLocker()
132 	{
133 		Unlock();
134 	}
135 
136 	void SetTo(VMCache* topCache)
137 	{
138 		fTopCache = topCache;
139 		fBottomCache = topCache;
140 
141 		if (topCache != NULL)
142 			topCache->SetUserData(NULL);
143 	}
144 
145 	VMCache* LockSourceCache()
146 	{
147 		if (fBottomCache == NULL || fBottomCache->source == NULL)
148 			return NULL;
149 
150 		VMCache* previousCache = fBottomCache;
151 
152 		fBottomCache = fBottomCache->source;
153 		fBottomCache->Lock();
154 		fBottomCache->AcquireRefLocked();
155 		fBottomCache->SetUserData(previousCache);
156 
157 		return fBottomCache;
158 	}
159 
160 	void LockAllSourceCaches()
161 	{
162 		while (LockSourceCache() != NULL) {
163 		}
164 	}
165 
166 	void Unlock(VMCache* exceptCache = NULL)
167 	{
168 		if (fTopCache == NULL)
169 			return;
170 
171 		// Unlock caches in source -> consumer direction. This is important to
172 		// avoid double-locking and a reversal of locking order in case a cache
173 		// is eligable for merging.
174 		VMCache* cache = fBottomCache;
175 		while (cache != NULL) {
176 			VMCache* nextCache = (VMCache*)cache->UserData();
177 			if (cache != exceptCache)
178 				cache->ReleaseRefAndUnlock(cache != fTopCache);
179 
180 			if (cache == fTopCache)
181 				break;
182 
183 			cache = nextCache;
184 		}
185 
186 		fTopCache = NULL;
187 		fBottomCache = NULL;
188 	}
189 
190 	void UnlockKeepRefs(bool keepTopCacheLocked)
191 	{
192 		if (fTopCache == NULL)
193 			return;
194 
195 		VMCache* nextCache = fBottomCache;
196 		VMCache* cache = NULL;
197 
198 		while (keepTopCacheLocked
199 				? nextCache != fTopCache : cache != fTopCache) {
200 			cache = nextCache;
201 			nextCache = (VMCache*)cache->UserData();
202 			cache->Unlock(cache != fTopCache);
203 		}
204 	}
205 
206 	void RelockCaches(bool topCacheLocked)
207 	{
208 		if (fTopCache == NULL)
209 			return;
210 
211 		VMCache* nextCache = fTopCache;
212 		VMCache* cache = NULL;
213 		if (topCacheLocked) {
214 			cache = nextCache;
215 			nextCache = cache->source;
216 		}
217 
218 		while (cache != fBottomCache && nextCache != NULL) {
219 			VMCache* consumer = cache;
220 			cache = nextCache;
221 			nextCache = cache->source;
222 			cache->Lock();
223 			cache->SetUserData(consumer);
224 		}
225 	}
226 
227 private:
228 	VMCache*	fTopCache;
229 	VMCache*	fBottomCache;
230 };
231 
232 
233 // The memory reserve an allocation of the certain priority must not touch.
234 static const size_t kMemoryReserveForPriority[] = {
235 	VM_MEMORY_RESERVE_USER,		// user
236 	VM_MEMORY_RESERVE_SYSTEM,	// system
237 	0							// VIP
238 };
239 
240 
241 ObjectCache* gPageMappingsObjectCache;
242 
243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
244 
245 static off_t sAvailableMemory;
246 static off_t sNeededMemory;
247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
248 static uint32 sPageFaults;
249 
250 static VMPhysicalPageMapper* sPhysicalPageMapper;
251 
252 #if DEBUG_CACHE_LIST
253 
254 struct cache_info {
255 	VMCache*	cache;
256 	addr_t		page_count;
257 	addr_t		committed;
258 };
259 
260 static const int kCacheInfoTableCount = 100 * 1024;
261 static cache_info* sCacheInfoTable;
262 
263 #endif	// DEBUG_CACHE_LIST
264 
265 
266 // function declarations
267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
268 	bool addressSpaceCleanup);
269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
270 	bool isWrite, bool isUser, vm_page** wirePage,
271 	VMAreaWiredRange* wiredRange = NULL);
272 static status_t map_backing_store(VMAddressSpace* addressSpace,
273 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
274 	int protection, int mapping, uint32 flags,
275 	const virtual_address_restrictions* addressRestrictions, bool kernel,
276 	VMArea** _area, void** _virtualAddress);
277 
278 
279 //	#pragma mark -
280 
281 
282 #if VM_PAGE_FAULT_TRACING
283 
284 namespace VMPageFaultTracing {
285 
286 class PageFaultStart : public AbstractTraceEntry {
287 public:
288 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
289 		:
290 		fAddress(address),
291 		fPC(pc),
292 		fWrite(write),
293 		fUser(user)
294 	{
295 		Initialized();
296 	}
297 
298 	virtual void AddDump(TraceOutput& out)
299 	{
300 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
301 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
302 	}
303 
304 private:
305 	addr_t	fAddress;
306 	addr_t	fPC;
307 	bool	fWrite;
308 	bool	fUser;
309 };
310 
311 
312 // page fault errors
313 enum {
314 	PAGE_FAULT_ERROR_NO_AREA		= 0,
315 	PAGE_FAULT_ERROR_KERNEL_ONLY,
316 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
317 	PAGE_FAULT_ERROR_READ_PROTECTED,
318 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
319 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
320 };
321 
322 
323 class PageFaultError : public AbstractTraceEntry {
324 public:
325 	PageFaultError(area_id area, status_t error)
326 		:
327 		fArea(area),
328 		fError(error)
329 	{
330 		Initialized();
331 	}
332 
333 	virtual void AddDump(TraceOutput& out)
334 	{
335 		switch (fError) {
336 			case PAGE_FAULT_ERROR_NO_AREA:
337 				out.Print("page fault error: no area");
338 				break;
339 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
340 				out.Print("page fault error: area: %ld, kernel only", fArea);
341 				break;
342 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
343 				out.Print("page fault error: area: %ld, write protected",
344 					fArea);
345 				break;
346 			case PAGE_FAULT_ERROR_READ_PROTECTED:
347 				out.Print("page fault error: area: %ld, read protected", fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
350 				out.Print("page fault error: kernel touching bad user memory");
351 				break;
352 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
353 				out.Print("page fault error: no address space");
354 				break;
355 			default:
356 				out.Print("page fault error: area: %ld, error: %s", fArea,
357 					strerror(fError));
358 				break;
359 		}
360 	}
361 
362 private:
363 	area_id		fArea;
364 	status_t	fError;
365 };
366 
367 
368 class PageFaultDone : public AbstractTraceEntry {
369 public:
370 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
371 			vm_page* page)
372 		:
373 		fArea(area),
374 		fTopCache(topCache),
375 		fCache(cache),
376 		fPage(page)
377 	{
378 		Initialized();
379 	}
380 
381 	virtual void AddDump(TraceOutput& out)
382 	{
383 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
384 			"page: %p", fArea, fTopCache, fCache, fPage);
385 	}
386 
387 private:
388 	area_id		fArea;
389 	VMCache*	fTopCache;
390 	VMCache*	fCache;
391 	vm_page*	fPage;
392 };
393 
394 }	// namespace VMPageFaultTracing
395 
396 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
397 #else
398 #	define TPF(x) ;
399 #endif	// VM_PAGE_FAULT_TRACING
400 
401 
402 //	#pragma mark -
403 
404 
405 /*!	The page's cache must be locked.
406 */
407 static inline void
408 increment_page_wired_count(vm_page* page)
409 {
410 	if (!page->IsMapped())
411 		atomic_add(&gMappedPagesCount, 1);
412 	page->IncrementWiredCount();
413 }
414 
415 
416 /*!	The page's cache must be locked.
417 */
418 static inline void
419 decrement_page_wired_count(vm_page* page)
420 {
421 	page->DecrementWiredCount();
422 	if (!page->IsMapped())
423 		atomic_add(&gMappedPagesCount, -1);
424 }
425 
426 
427 static inline addr_t
428 virtual_page_address(VMArea* area, vm_page* page)
429 {
430 	return area->Base()
431 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
432 }
433 
434 
435 //! You need to have the address space locked when calling this function
436 static VMArea*
437 lookup_area(VMAddressSpace* addressSpace, area_id id)
438 {
439 	VMAreaHash::ReadLock();
440 
441 	VMArea* area = VMAreaHash::LookupLocked(id);
442 	if (area != NULL && area->address_space != addressSpace)
443 		area = NULL;
444 
445 	VMAreaHash::ReadUnlock();
446 
447 	return area;
448 }
449 
450 
451 static status_t
452 allocate_area_page_protections(VMArea* area)
453 {
454 	// In the page protections we store only the three user protections,
455 	// so we use 4 bits per page.
456 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
457 	area->page_protections = (uint8*)malloc_etc(bytes,
458 		HEAP_DONT_LOCK_KERNEL_SPACE);
459 	if (area->page_protections == NULL)
460 		return B_NO_MEMORY;
461 
462 	// init the page protections for all pages to that of the area
463 	uint32 areaProtection = area->protection
464 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
465 	memset(area->page_protections, areaProtection | (areaProtection << 4),
466 		bytes);
467 	return B_OK;
468 }
469 
470 
471 static inline void
472 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
473 {
474 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
475 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
476 	uint8& entry = area->page_protections[pageIndex / 2];
477 	if (pageIndex % 2 == 0)
478 		entry = (entry & 0xf0) | protection;
479 	else
480 		entry = (entry & 0x0f) | (protection << 4);
481 }
482 
483 
484 static inline uint32
485 get_area_page_protection(VMArea* area, addr_t pageAddress)
486 {
487 	if (area->page_protections == NULL)
488 		return area->protection;
489 
490 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
491 	uint32 protection = area->page_protections[pageIndex / 2];
492 	if (pageIndex % 2 == 0)
493 		protection &= 0x0f;
494 	else
495 		protection >>= 4;
496 
497 	// If this is a kernel area we translate the user flags to kernel flags.
498 	if (area->address_space == VMAddressSpace::Kernel()) {
499 		uint32 kernelProtection = 0;
500 		if ((protection & B_READ_AREA) != 0)
501 			kernelProtection |= B_KERNEL_READ_AREA;
502 		if ((protection & B_WRITE_AREA) != 0)
503 			kernelProtection |= B_KERNEL_WRITE_AREA;
504 
505 		return kernelProtection;
506 	}
507 
508 	return protection | B_KERNEL_READ_AREA
509 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
510 }
511 
512 
513 /*!	The caller must have reserved enough pages the translation map
514 	implementation might need to map this page.
515 	The page's cache must be locked.
516 */
517 static status_t
518 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
519 	vm_page_reservation* reservation)
520 {
521 	VMTranslationMap* map = area->address_space->TranslationMap();
522 
523 	bool wasMapped = page->IsMapped();
524 
525 	if (area->wiring == B_NO_LOCK) {
526 		DEBUG_PAGE_ACCESS_CHECK(page);
527 
528 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
529 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
530 			gPageMappingsObjectCache,
531 			CACHE_DONT_WAIT_FOR_MEMORY
532 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
533 		if (mapping == NULL)
534 			return B_NO_MEMORY;
535 
536 		mapping->page = page;
537 		mapping->area = area;
538 
539 		map->Lock();
540 
541 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
542 			area->MemoryType(), reservation);
543 
544 		// insert mapping into lists
545 		if (!page->IsMapped())
546 			atomic_add(&gMappedPagesCount, 1);
547 
548 		page->mappings.Add(mapping);
549 		area->mappings.Add(mapping);
550 
551 		map->Unlock();
552 	} else {
553 		DEBUG_PAGE_ACCESS_CHECK(page);
554 
555 		map->Lock();
556 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
557 			area->MemoryType(), reservation);
558 		map->Unlock();
559 
560 		increment_page_wired_count(page);
561 	}
562 
563 	if (!wasMapped) {
564 		// The page is mapped now, so we must not remain in the cached queue.
565 		// It also makes sense to move it from the inactive to the active, since
566 		// otherwise the page daemon wouldn't come to keep track of it (in idle
567 		// mode) -- if the page isn't touched, it will be deactivated after a
568 		// full iteration through the queue at the latest.
569 		if (page->State() == PAGE_STATE_CACHED
570 				|| page->State() == PAGE_STATE_INACTIVE) {
571 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
572 		}
573 	}
574 
575 	return B_OK;
576 }
577 
578 
579 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
580 	page's cache.
581 */
582 static inline bool
583 unmap_page(VMArea* area, addr_t virtualAddress)
584 {
585 	return area->address_space->TranslationMap()->UnmapPage(area,
586 		virtualAddress, true);
587 }
588 
589 
590 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
591 	mapped pages' caches.
592 */
593 static inline void
594 unmap_pages(VMArea* area, addr_t base, size_t size)
595 {
596 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
597 }
598 
599 
600 /*!	Cuts a piece out of an area. If the given cut range covers the complete
601 	area, it is deleted. If it covers the beginning or the end, the area is
602 	resized accordingly. If the range covers some part in the middle of the
603 	area, it is split in two; in this case the second area is returned via
604 	\a _secondArea (the variable is left untouched in the other cases).
605 	The address space must be write locked.
606 	The caller must ensure that no part of the given range is wired.
607 */
608 static status_t
609 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
610 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
611 {
612 	// Does the cut range intersect with the area at all?
613 	addr_t areaLast = area->Base() + (area->Size() - 1);
614 	if (area->Base() > lastAddress || areaLast < address)
615 		return B_OK;
616 
617 	// Is the area fully covered?
618 	if (area->Base() >= address && areaLast <= lastAddress) {
619 		delete_area(addressSpace, area, false);
620 		return B_OK;
621 	}
622 
623 	int priority;
624 	uint32 allocationFlags;
625 	if (addressSpace == VMAddressSpace::Kernel()) {
626 		priority = VM_PRIORITY_SYSTEM;
627 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
628 			| HEAP_DONT_LOCK_KERNEL_SPACE;
629 	} else {
630 		priority = VM_PRIORITY_USER;
631 		allocationFlags = 0;
632 	}
633 
634 	VMCache* cache = vm_area_get_locked_cache(area);
635 	VMCacheChainLocker cacheChainLocker(cache);
636 	cacheChainLocker.LockAllSourceCaches();
637 
638 	// Cut the end only?
639 	if (areaLast <= lastAddress) {
640 		size_t oldSize = area->Size();
641 		size_t newSize = address - area->Base();
642 
643 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
644 			allocationFlags);
645 		if (error != B_OK)
646 			return error;
647 
648 		// unmap pages
649 		unmap_pages(area, address, oldSize - newSize);
650 
651 		// If no one else uses the area's cache, we can resize it, too.
652 		if (cache->areas == area && area->cache_next == NULL
653 			&& cache->consumers.IsEmpty()
654 			&& cache->type == CACHE_TYPE_RAM) {
655 			// Since VMCache::Resize() can temporarily drop the lock, we must
656 			// unlock all lower caches to prevent locking order inversion.
657 			cacheChainLocker.Unlock(cache);
658 			cache->Resize(cache->virtual_base + newSize, priority);
659 			cache->ReleaseRefAndUnlock();
660 		}
661 
662 		return B_OK;
663 	}
664 
665 	// Cut the beginning only?
666 	if (area->Base() >= address) {
667 		addr_t oldBase = area->Base();
668 		addr_t newBase = lastAddress + 1;
669 		size_t newSize = areaLast - lastAddress;
670 
671 		// unmap pages
672 		unmap_pages(area, oldBase, newBase - oldBase);
673 
674 		// resize the area
675 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
676 			allocationFlags);
677 		if (error != B_OK)
678 			return error;
679 
680 		// TODO: If no one else uses the area's cache, we should resize it, too!
681 
682 		area->cache_offset += newBase - oldBase;
683 
684 		return B_OK;
685 	}
686 
687 	// The tough part -- cut a piece out of the middle of the area.
688 	// We do that by shrinking the area to the begin section and creating a
689 	// new area for the end section.
690 
691 	addr_t firstNewSize = address - area->Base();
692 	addr_t secondBase = lastAddress + 1;
693 	addr_t secondSize = areaLast - lastAddress;
694 
695 	// unmap pages
696 	unmap_pages(area, address, area->Size() - firstNewSize);
697 
698 	// resize the area
699 	addr_t oldSize = area->Size();
700 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
701 		allocationFlags);
702 	if (error != B_OK)
703 		return error;
704 
705 	// TODO: If no one else uses the area's cache, we might want to create a
706 	// new cache for the second area, transfer the concerned pages from the
707 	// first cache to it and resize the first cache.
708 
709 	// map the second area
710 	virtual_address_restrictions addressRestrictions = {};
711 	addressRestrictions.address = (void*)secondBase;
712 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
713 	VMArea* secondArea;
714 	error = map_backing_store(addressSpace, cache,
715 		area->cache_offset + (secondBase - area->Base()), area->name,
716 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
717 		&addressRestrictions, kernel, &secondArea, NULL);
718 	if (error != B_OK) {
719 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
720 		return error;
721 	}
722 
723 	// We need a cache reference for the new area.
724 	cache->AcquireRefLocked();
725 
726 	if (_secondArea != NULL)
727 		*_secondArea = secondArea;
728 
729 	return B_OK;
730 }
731 
732 
733 /*!	Deletes all areas in the given address range.
734 	The address space must be write-locked.
735 	The caller must ensure that no part of the given range is wired.
736 */
737 static status_t
738 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
739 	bool kernel)
740 {
741 	size = PAGE_ALIGN(size);
742 	addr_t lastAddress = address + (size - 1);
743 
744 	// Check, whether the caller is allowed to modify the concerned areas.
745 	if (!kernel) {
746 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
747 				VMArea* area = it.Next();) {
748 			addr_t areaLast = area->Base() + (area->Size() - 1);
749 			if (area->Base() < lastAddress && address < areaLast) {
750 				if ((area->protection & B_KERNEL_AREA) != 0)
751 					return B_NOT_ALLOWED;
752 			}
753 		}
754 	}
755 
756 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
757 			VMArea* area = it.Next();) {
758 		addr_t areaLast = area->Base() + (area->Size() - 1);
759 		if (area->Base() < lastAddress && address < areaLast) {
760 			status_t error = cut_area(addressSpace, area, address,
761 				lastAddress, NULL, kernel);
762 			if (error != B_OK)
763 				return error;
764 				// Failing after already messing with areas is ugly, but we
765 				// can't do anything about it.
766 		}
767 	}
768 
769 	return B_OK;
770 }
771 
772 
773 /*! You need to hold the lock of the cache and the write lock of the address
774 	space when calling this function.
775 	Note, that in case of error your cache will be temporarily unlocked.
776 	If \a addressSpec is \c B_EXACT_ADDRESS and the
777 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
778 	that no part of the specified address range (base \c *_virtualAddress, size
779 	\a size) is wired.
780 */
781 static status_t
782 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
783 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
784 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
785 	bool kernel, VMArea** _area, void** _virtualAddress)
786 {
787 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
788 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
789 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
790 		addressRestrictions->address, offset, size,
791 		addressRestrictions->address_specification, wiring, protection,
792 		_area, areaName));
793 	cache->AssertLocked();
794 
795 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
796 		| HEAP_DONT_LOCK_KERNEL_SPACE;
797 	int priority;
798 	if (addressSpace != VMAddressSpace::Kernel()) {
799 		priority = VM_PRIORITY_USER;
800 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
801 		priority = VM_PRIORITY_VIP;
802 		allocationFlags |= HEAP_PRIORITY_VIP;
803 	} else
804 		priority = VM_PRIORITY_SYSTEM;
805 
806 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
807 		allocationFlags);
808 	if (area == NULL)
809 		return B_NO_MEMORY;
810 
811 	status_t status;
812 
813 	// if this is a private map, we need to create a new cache
814 	// to handle the private copies of pages as they are written to
815 	VMCache* sourceCache = cache;
816 	if (mapping == REGION_PRIVATE_MAP) {
817 		VMCache* newCache;
818 
819 		// create an anonymous cache
820 		bool isStack = (protection & B_STACK_AREA) != 0;
821 		status = VMCacheFactory::CreateAnonymousCache(newCache,
822 			isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0,
823 			isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER);
824 		if (status != B_OK)
825 			goto err1;
826 
827 		newCache->Lock();
828 		newCache->temporary = 1;
829 		newCache->virtual_base = offset;
830 		newCache->virtual_end = offset + size;
831 
832 		cache->AddConsumer(newCache);
833 
834 		cache = newCache;
835 	}
836 
837 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
838 		status = cache->SetMinimalCommitment(size, priority);
839 		if (status != B_OK)
840 			goto err2;
841 	}
842 
843 	// check to see if this address space has entered DELETE state
844 	if (addressSpace->IsBeingDeleted()) {
845 		// okay, someone is trying to delete this address space now, so we can't
846 		// insert the area, so back out
847 		status = B_BAD_TEAM_ID;
848 		goto err2;
849 	}
850 
851 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
852 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
853 		status = unmap_address_range(addressSpace,
854 			(addr_t)addressRestrictions->address, size, kernel);
855 		if (status != B_OK)
856 			goto err2;
857 	}
858 
859 	status = addressSpace->InsertArea(area, size, addressRestrictions,
860 		allocationFlags, _virtualAddress);
861 	if (status != B_OK) {
862 		// TODO: wait and try again once this is working in the backend
863 #if 0
864 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
865 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
866 				0, 0);
867 		}
868 #endif
869 		goto err2;
870 	}
871 
872 	// attach the cache to the area
873 	area->cache = cache;
874 	area->cache_offset = offset;
875 
876 	// point the cache back to the area
877 	cache->InsertAreaLocked(area);
878 	if (mapping == REGION_PRIVATE_MAP)
879 		cache->Unlock();
880 
881 	// insert the area in the global area hash table
882 	VMAreaHash::Insert(area);
883 
884 	// grab a ref to the address space (the area holds this)
885 	addressSpace->Get();
886 
887 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
888 //		cache, sourceCache, areaName, area);
889 
890 	*_area = area;
891 	return B_OK;
892 
893 err2:
894 	if (mapping == REGION_PRIVATE_MAP) {
895 		// We created this cache, so we must delete it again. Note, that we
896 		// need to temporarily unlock the source cache or we'll otherwise
897 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
898 		sourceCache->Unlock();
899 		cache->ReleaseRefAndUnlock();
900 		sourceCache->Lock();
901 	}
902 err1:
903 	addressSpace->DeleteArea(area, allocationFlags);
904 	return status;
905 }
906 
907 
908 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
909 	  locker1, locker2).
910 */
911 template<typename LockerType1, typename LockerType2>
912 static inline bool
913 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
914 {
915 	area->cache->AssertLocked();
916 
917 	VMAreaUnwiredWaiter waiter;
918 	if (!area->AddWaiterIfWired(&waiter))
919 		return false;
920 
921 	// unlock everything and wait
922 	if (locker1 != NULL)
923 		locker1->Unlock();
924 	if (locker2 != NULL)
925 		locker2->Unlock();
926 
927 	waiter.waitEntry.Wait();
928 
929 	return true;
930 }
931 
932 
933 /*!	Checks whether the given area has any wired ranges intersecting with the
934 	specified range and waits, if so.
935 
936 	When it has to wait, the function calls \c Unlock() on both \a locker1
937 	and \a locker2, if given.
938 	The area's top cache must be locked and must be unlocked as a side effect
939 	of calling \c Unlock() on either \a locker1 or \a locker2.
940 
941 	If the function does not have to wait it does not modify or unlock any
942 	object.
943 
944 	\param area The area to be checked.
945 	\param base The base address of the range to check.
946 	\param size The size of the address range to check.
947 	\param locker1 An object to be unlocked when before starting to wait (may
948 		be \c NULL).
949 	\param locker2 An object to be unlocked when before starting to wait (may
950 		be \c NULL).
951 	\return \c true, if the function had to wait, \c false otherwise.
952 */
953 template<typename LockerType1, typename LockerType2>
954 static inline bool
955 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
956 	LockerType1* locker1, LockerType2* locker2)
957 {
958 	area->cache->AssertLocked();
959 
960 	VMAreaUnwiredWaiter waiter;
961 	if (!area->AddWaiterIfWired(&waiter, base, size))
962 		return false;
963 
964 	// unlock everything and wait
965 	if (locker1 != NULL)
966 		locker1->Unlock();
967 	if (locker2 != NULL)
968 		locker2->Unlock();
969 
970 	waiter.waitEntry.Wait();
971 
972 	return true;
973 }
974 
975 
976 /*!	Checks whether the given address space has any wired ranges intersecting
977 	with the specified range and waits, if so.
978 
979 	Similar to wait_if_area_range_is_wired(), with the following differences:
980 	- All areas intersecting with the range are checked (respectively all until
981 	  one is found that contains a wired range intersecting with the given
982 	  range).
983 	- The given address space must at least be read-locked and must be unlocked
984 	  when \c Unlock() is called on \a locker.
985 	- None of the areas' caches are allowed to be locked.
986 */
987 template<typename LockerType>
988 static inline bool
989 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
990 	size_t size, LockerType* locker)
991 {
992 	addr_t end = base + size - 1;
993 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
994 			VMArea* area = it.Next();) {
995 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
996 		if (area->Base() > end)
997 			return false;
998 
999 		if (base >= area->Base() + area->Size() - 1)
1000 			continue;
1001 
1002 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1003 
1004 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1005 			return true;
1006 	}
1007 
1008 	return false;
1009 }
1010 
1011 
1012 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1013 	It must be called in a situation where the kernel address space may be
1014 	locked.
1015 */
1016 status_t
1017 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1018 {
1019 	AddressSpaceReadLocker locker;
1020 	VMArea* area;
1021 	status_t status = locker.SetFromArea(id, area);
1022 	if (status != B_OK)
1023 		return status;
1024 
1025 	if (area->page_protections == NULL) {
1026 		status = allocate_area_page_protections(area);
1027 		if (status != B_OK)
1028 			return status;
1029 	}
1030 
1031 	*cookie = (void*)area;
1032 	return B_OK;
1033 }
1034 
1035 
1036 /*!	This is a debug helper function that can only be used with very specific
1037 	use cases.
1038 	Sets protection for the given address range to the protection specified.
1039 	If \a protection is 0 then the involved pages will be marked non-present
1040 	in the translation map to cause a fault on access. The pages aren't
1041 	actually unmapped however so that they can be marked present again with
1042 	additional calls to this function. For this to work the area must be
1043 	fully locked in memory so that the pages aren't otherwise touched.
1044 	This function does not lock the kernel address space and needs to be
1045 	supplied with a \a cookie retrieved from a successful call to
1046 	vm_prepare_kernel_area_debug_protection().
1047 */
1048 status_t
1049 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1050 	uint32 protection)
1051 {
1052 	// check address range
1053 	addr_t address = (addr_t)_address;
1054 	size = PAGE_ALIGN(size);
1055 
1056 	if ((address % B_PAGE_SIZE) != 0
1057 		|| (addr_t)address + size < (addr_t)address
1058 		|| !IS_KERNEL_ADDRESS(address)
1059 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1060 		return B_BAD_VALUE;
1061 	}
1062 
1063 	// Translate the kernel protection to user protection as we only store that.
1064 	if ((protection & B_KERNEL_READ_AREA) != 0)
1065 		protection |= B_READ_AREA;
1066 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1067 		protection |= B_WRITE_AREA;
1068 
1069 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1070 	VMTranslationMap* map = addressSpace->TranslationMap();
1071 	VMArea* area = (VMArea*)cookie;
1072 
1073 	addr_t offset = address - area->Base();
1074 	if (area->Size() - offset < size) {
1075 		panic("protect range not fully within supplied area");
1076 		return B_BAD_VALUE;
1077 	}
1078 
1079 	if (area->page_protections == NULL) {
1080 		panic("area has no page protections");
1081 		return B_BAD_VALUE;
1082 	}
1083 
1084 	// Invalidate the mapping entries so any access to them will fault or
1085 	// restore the mapping entries unchanged so that lookup will success again.
1086 	map->Lock();
1087 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1088 	map->Unlock();
1089 
1090 	// And set the proper page protections so that the fault case will actually
1091 	// fail and not simply try to map a new page.
1092 	for (addr_t pageAddress = address; pageAddress < address + size;
1093 			pageAddress += B_PAGE_SIZE) {
1094 		set_area_page_protection(area, pageAddress, protection);
1095 	}
1096 
1097 	return B_OK;
1098 }
1099 
1100 
1101 status_t
1102 vm_block_address_range(const char* name, void* address, addr_t size)
1103 {
1104 	if (!arch_vm_supports_protection(0))
1105 		return B_NOT_SUPPORTED;
1106 
1107 	AddressSpaceWriteLocker locker;
1108 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1109 	if (status != B_OK)
1110 		return status;
1111 
1112 	VMAddressSpace* addressSpace = locker.AddressSpace();
1113 
1114 	// create an anonymous cache
1115 	VMCache* cache;
1116 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1117 		VM_PRIORITY_SYSTEM);
1118 	if (status != B_OK)
1119 		return status;
1120 
1121 	cache->temporary = 1;
1122 	cache->virtual_end = size;
1123 	cache->Lock();
1124 
1125 	VMArea* area;
1126 	virtual_address_restrictions addressRestrictions = {};
1127 	addressRestrictions.address = address;
1128 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1129 	status = map_backing_store(addressSpace, cache, 0, name, size,
1130 		B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0,
1131 		&addressRestrictions, true, &area, NULL);
1132 	if (status != B_OK) {
1133 		cache->ReleaseRefAndUnlock();
1134 		return status;
1135 	}
1136 
1137 	cache->Unlock();
1138 	area->cache_type = CACHE_TYPE_RAM;
1139 	return area->id;
1140 }
1141 
1142 
1143 status_t
1144 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1145 {
1146 	AddressSpaceWriteLocker locker(team);
1147 	if (!locker.IsLocked())
1148 		return B_BAD_TEAM_ID;
1149 
1150 	VMAddressSpace* addressSpace = locker.AddressSpace();
1151 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1152 		addressSpace == VMAddressSpace::Kernel()
1153 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1154 }
1155 
1156 
1157 status_t
1158 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1159 	addr_t size, uint32 flags)
1160 {
1161 	if (size == 0)
1162 		return B_BAD_VALUE;
1163 
1164 	AddressSpaceWriteLocker locker(team);
1165 	if (!locker.IsLocked())
1166 		return B_BAD_TEAM_ID;
1167 
1168 	virtual_address_restrictions addressRestrictions = {};
1169 	addressRestrictions.address = *_address;
1170 	addressRestrictions.address_specification = addressSpec;
1171 	VMAddressSpace* addressSpace = locker.AddressSpace();
1172 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1173 		addressSpace == VMAddressSpace::Kernel()
1174 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1175 		_address);
1176 }
1177 
1178 
1179 area_id
1180 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1181 	uint32 wiring, uint32 protection, uint32 flags,
1182 	const virtual_address_restrictions* virtualAddressRestrictions,
1183 	const physical_address_restrictions* physicalAddressRestrictions,
1184 	bool kernel, void** _address)
1185 {
1186 	VMArea* area;
1187 	VMCache* cache;
1188 	vm_page* page = NULL;
1189 	bool isStack = (protection & B_STACK_AREA) != 0;
1190 	page_num_t guardPages;
1191 	bool canOvercommit = false;
1192 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1193 		? VM_PAGE_ALLOC_CLEAR : 0;
1194 
1195 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1196 		team, name, size));
1197 
1198 	size = PAGE_ALIGN(size);
1199 
1200 	if (size == 0)
1201 		return B_BAD_VALUE;
1202 	if (!arch_vm_supports_protection(protection))
1203 		return B_NOT_SUPPORTED;
1204 
1205 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1206 		canOvercommit = true;
1207 
1208 #ifdef DEBUG_KERNEL_STACKS
1209 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1210 		isStack = true;
1211 #endif
1212 
1213 	// check parameters
1214 	switch (virtualAddressRestrictions->address_specification) {
1215 		case B_ANY_ADDRESS:
1216 		case B_EXACT_ADDRESS:
1217 		case B_BASE_ADDRESS:
1218 		case B_ANY_KERNEL_ADDRESS:
1219 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1220 			break;
1221 
1222 		default:
1223 			return B_BAD_VALUE;
1224 	}
1225 
1226 	// If low or high physical address restrictions are given, we force
1227 	// B_CONTIGUOUS wiring, since only then we'll use
1228 	// vm_page_allocate_page_run() which deals with those restrictions.
1229 	if (physicalAddressRestrictions->low_address != 0
1230 		|| physicalAddressRestrictions->high_address != 0) {
1231 		wiring = B_CONTIGUOUS;
1232 	}
1233 
1234 	physical_address_restrictions stackPhysicalRestrictions;
1235 	bool doReserveMemory = false;
1236 	switch (wiring) {
1237 		case B_NO_LOCK:
1238 			break;
1239 		case B_FULL_LOCK:
1240 		case B_LAZY_LOCK:
1241 		case B_CONTIGUOUS:
1242 			doReserveMemory = true;
1243 			break;
1244 		case B_ALREADY_WIRED:
1245 			break;
1246 		case B_LOMEM:
1247 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1248 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1249 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1250 			wiring = B_CONTIGUOUS;
1251 			doReserveMemory = true;
1252 			break;
1253 		case B_32_BIT_FULL_LOCK:
1254 			if (B_HAIKU_PHYSICAL_BITS <= 32
1255 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1256 				wiring = B_FULL_LOCK;
1257 				doReserveMemory = true;
1258 				break;
1259 			}
1260 			// TODO: We don't really support this mode efficiently. Just fall
1261 			// through for now ...
1262 		case B_32_BIT_CONTIGUOUS:
1263 			#if B_HAIKU_PHYSICAL_BITS > 32
1264 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1265 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1266 					stackPhysicalRestrictions.high_address
1267 						= (phys_addr_t)1 << 32;
1268 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1269 				}
1270 			#endif
1271 			wiring = B_CONTIGUOUS;
1272 			doReserveMemory = true;
1273 			break;
1274 		default:
1275 			return B_BAD_VALUE;
1276 	}
1277 
1278 	// Optimization: For a single-page contiguous allocation without low/high
1279 	// memory restriction B_FULL_LOCK wiring suffices.
1280 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1281 		&& physicalAddressRestrictions->low_address == 0
1282 		&& physicalAddressRestrictions->high_address == 0) {
1283 		wiring = B_FULL_LOCK;
1284 	}
1285 
1286 	// For full lock or contiguous areas we're also going to map the pages and
1287 	// thus need to reserve pages for the mapping backend upfront.
1288 	addr_t reservedMapPages = 0;
1289 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1290 		AddressSpaceWriteLocker locker;
1291 		status_t status = locker.SetTo(team);
1292 		if (status != B_OK)
1293 			return status;
1294 
1295 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1296 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1297 	}
1298 
1299 	int priority;
1300 	if (team != VMAddressSpace::KernelID())
1301 		priority = VM_PRIORITY_USER;
1302 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1303 		priority = VM_PRIORITY_VIP;
1304 	else
1305 		priority = VM_PRIORITY_SYSTEM;
1306 
1307 	// Reserve memory before acquiring the address space lock. This reduces the
1308 	// chances of failure, since while holding the write lock to the address
1309 	// space (if it is the kernel address space that is), the low memory handler
1310 	// won't be able to free anything for us.
1311 	addr_t reservedMemory = 0;
1312 	if (doReserveMemory) {
1313 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1314 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1315 			return B_NO_MEMORY;
1316 		reservedMemory = size;
1317 		// TODO: We don't reserve the memory for the pages for the page
1318 		// directories/tables. We actually need to do since we currently don't
1319 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1320 		// there are actually less physical pages than there should be, which
1321 		// can get the VM into trouble in low memory situations.
1322 	}
1323 
1324 	AddressSpaceWriteLocker locker;
1325 	VMAddressSpace* addressSpace;
1326 	status_t status;
1327 
1328 	// For full lock areas reserve the pages before locking the address
1329 	// space. E.g. block caches can't release their memory while we hold the
1330 	// address space lock.
1331 	page_num_t reservedPages = reservedMapPages;
1332 	if (wiring == B_FULL_LOCK)
1333 		reservedPages += size / B_PAGE_SIZE;
1334 
1335 	vm_page_reservation reservation;
1336 	if (reservedPages > 0) {
1337 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1338 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1339 					priority)) {
1340 				reservedPages = 0;
1341 				status = B_WOULD_BLOCK;
1342 				goto err0;
1343 			}
1344 		} else
1345 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1346 	}
1347 
1348 	if (wiring == B_CONTIGUOUS) {
1349 		// we try to allocate the page run here upfront as this may easily
1350 		// fail for obvious reasons
1351 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1352 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1353 		if (page == NULL) {
1354 			status = B_NO_MEMORY;
1355 			goto err0;
1356 		}
1357 	}
1358 
1359 	// Lock the address space and, if B_EXACT_ADDRESS and
1360 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1361 	// is not wired.
1362 	do {
1363 		status = locker.SetTo(team);
1364 		if (status != B_OK)
1365 			goto err1;
1366 
1367 		addressSpace = locker.AddressSpace();
1368 	} while (virtualAddressRestrictions->address_specification
1369 			== B_EXACT_ADDRESS
1370 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1371 		&& wait_if_address_range_is_wired(addressSpace,
1372 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1373 
1374 	// create an anonymous cache
1375 	// if it's a stack, make sure that two pages are available at least
1376 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1377 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1378 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1379 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1380 		wiring == B_NO_LOCK, priority);
1381 	if (status != B_OK)
1382 		goto err1;
1383 
1384 	cache->temporary = 1;
1385 	cache->virtual_end = size;
1386 	cache->committed_size = reservedMemory;
1387 		// TODO: This should be done via a method.
1388 	reservedMemory = 0;
1389 
1390 	cache->Lock();
1391 
1392 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1393 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1394 		kernel, &area, _address);
1395 
1396 	if (status != B_OK) {
1397 		cache->ReleaseRefAndUnlock();
1398 		goto err1;
1399 	}
1400 
1401 	locker.DegradeToReadLock();
1402 
1403 	switch (wiring) {
1404 		case B_NO_LOCK:
1405 		case B_LAZY_LOCK:
1406 			// do nothing - the pages are mapped in as needed
1407 			break;
1408 
1409 		case B_FULL_LOCK:
1410 		{
1411 			// Allocate and map all pages for this area
1412 
1413 			off_t offset = 0;
1414 			for (addr_t address = area->Base();
1415 					address < area->Base() + (area->Size() - 1);
1416 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1417 #ifdef DEBUG_KERNEL_STACKS
1418 #	ifdef STACK_GROWS_DOWNWARDS
1419 				if (isStack && address < area->Base()
1420 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1421 #	else
1422 				if (isStack && address >= area->Base() + area->Size()
1423 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1424 #	endif
1425 					continue;
1426 #endif
1427 				vm_page* page = vm_page_allocate_page(&reservation,
1428 					PAGE_STATE_WIRED | pageAllocFlags);
1429 				cache->InsertPage(page, offset);
1430 				map_page(area, page, address, protection, &reservation);
1431 
1432 				DEBUG_PAGE_ACCESS_END(page);
1433 			}
1434 
1435 			break;
1436 		}
1437 
1438 		case B_ALREADY_WIRED:
1439 		{
1440 			// The pages should already be mapped. This is only really useful
1441 			// during boot time. Find the appropriate vm_page objects and stick
1442 			// them in the cache object.
1443 			VMTranslationMap* map = addressSpace->TranslationMap();
1444 			off_t offset = 0;
1445 
1446 			if (!gKernelStartup)
1447 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1448 
1449 			map->Lock();
1450 
1451 			for (addr_t virtualAddress = area->Base();
1452 					virtualAddress < area->Base() + (area->Size() - 1);
1453 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1454 				phys_addr_t physicalAddress;
1455 				uint32 flags;
1456 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1457 				if (status < B_OK) {
1458 					panic("looking up mapping failed for va 0x%lx\n",
1459 						virtualAddress);
1460 				}
1461 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1462 				if (page == NULL) {
1463 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1464 						"\n", physicalAddress);
1465 				}
1466 
1467 				DEBUG_PAGE_ACCESS_START(page);
1468 
1469 				cache->InsertPage(page, offset);
1470 				increment_page_wired_count(page);
1471 				vm_page_set_state(page, PAGE_STATE_WIRED);
1472 				page->busy = false;
1473 
1474 				DEBUG_PAGE_ACCESS_END(page);
1475 			}
1476 
1477 			map->Unlock();
1478 			break;
1479 		}
1480 
1481 		case B_CONTIGUOUS:
1482 		{
1483 			// We have already allocated our continuous pages run, so we can now
1484 			// just map them in the address space
1485 			VMTranslationMap* map = addressSpace->TranslationMap();
1486 			phys_addr_t physicalAddress
1487 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1488 			addr_t virtualAddress = area->Base();
1489 			off_t offset = 0;
1490 
1491 			map->Lock();
1492 
1493 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1494 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1495 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1496 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1497 				if (page == NULL)
1498 					panic("couldn't lookup physical page just allocated\n");
1499 
1500 				status = map->Map(virtualAddress, physicalAddress, protection,
1501 					area->MemoryType(), &reservation);
1502 				if (status < B_OK)
1503 					panic("couldn't map physical page in page run\n");
1504 
1505 				cache->InsertPage(page, offset);
1506 				increment_page_wired_count(page);
1507 
1508 				DEBUG_PAGE_ACCESS_END(page);
1509 			}
1510 
1511 			map->Unlock();
1512 			break;
1513 		}
1514 
1515 		default:
1516 			break;
1517 	}
1518 
1519 	cache->Unlock();
1520 
1521 	if (reservedPages > 0)
1522 		vm_page_unreserve_pages(&reservation);
1523 
1524 	TRACE(("vm_create_anonymous_area: done\n"));
1525 
1526 	area->cache_type = CACHE_TYPE_RAM;
1527 	return area->id;
1528 
1529 err1:
1530 	if (wiring == B_CONTIGUOUS) {
1531 		// we had reserved the area space upfront...
1532 		phys_addr_t pageNumber = page->physical_page_number;
1533 		int32 i;
1534 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1535 			page = vm_lookup_page(pageNumber);
1536 			if (page == NULL)
1537 				panic("couldn't lookup physical page just allocated\n");
1538 
1539 			vm_page_set_state(page, PAGE_STATE_FREE);
1540 		}
1541 	}
1542 
1543 err0:
1544 	if (reservedPages > 0)
1545 		vm_page_unreserve_pages(&reservation);
1546 	if (reservedMemory > 0)
1547 		vm_unreserve_memory(reservedMemory);
1548 
1549 	return status;
1550 }
1551 
1552 
1553 area_id
1554 vm_map_physical_memory(team_id team, const char* name, void** _address,
1555 	uint32 addressSpec, addr_t size, uint32 protection,
1556 	phys_addr_t physicalAddress, bool alreadyWired)
1557 {
1558 	VMArea* area;
1559 	VMCache* cache;
1560 	addr_t mapOffset;
1561 
1562 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1563 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1564 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1565 		addressSpec, size, protection, physicalAddress));
1566 
1567 	if (!arch_vm_supports_protection(protection))
1568 		return B_NOT_SUPPORTED;
1569 
1570 	AddressSpaceWriteLocker locker(team);
1571 	if (!locker.IsLocked())
1572 		return B_BAD_TEAM_ID;
1573 
1574 	// if the physical address is somewhat inside a page,
1575 	// move the actual area down to align on a page boundary
1576 	mapOffset = physicalAddress % B_PAGE_SIZE;
1577 	size += mapOffset;
1578 	physicalAddress -= mapOffset;
1579 
1580 	size = PAGE_ALIGN(size);
1581 
1582 	// create a device cache
1583 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1584 	if (status != B_OK)
1585 		return status;
1586 
1587 	cache->virtual_end = size;
1588 
1589 	cache->Lock();
1590 
1591 	virtual_address_restrictions addressRestrictions = {};
1592 	addressRestrictions.address = *_address;
1593 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1594 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1595 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1596 		true, &area, _address);
1597 
1598 	if (status < B_OK)
1599 		cache->ReleaseRefLocked();
1600 
1601 	cache->Unlock();
1602 
1603 	if (status == B_OK) {
1604 		// set requested memory type -- use uncached, if not given
1605 		uint32 memoryType = addressSpec & B_MTR_MASK;
1606 		if (memoryType == 0)
1607 			memoryType = B_MTR_UC;
1608 
1609 		area->SetMemoryType(memoryType);
1610 
1611 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1612 		if (status != B_OK)
1613 			delete_area(locker.AddressSpace(), area, false);
1614 	}
1615 
1616 	if (status != B_OK)
1617 		return status;
1618 
1619 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1620 
1621 	if (alreadyWired) {
1622 		// The area is already mapped, but possibly not with the right
1623 		// memory type.
1624 		map->Lock();
1625 		map->ProtectArea(area, area->protection);
1626 		map->Unlock();
1627 	} else {
1628 		// Map the area completely.
1629 
1630 		// reserve pages needed for the mapping
1631 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1632 			area->Base() + (size - 1));
1633 		vm_page_reservation reservation;
1634 		vm_page_reserve_pages(&reservation, reservePages,
1635 			team == VMAddressSpace::KernelID()
1636 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1637 
1638 		map->Lock();
1639 
1640 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1641 			map->Map(area->Base() + offset, physicalAddress + offset,
1642 				protection, area->MemoryType(), &reservation);
1643 		}
1644 
1645 		map->Unlock();
1646 
1647 		vm_page_unreserve_pages(&reservation);
1648 	}
1649 
1650 	// modify the pointer returned to be offset back into the new area
1651 	// the same way the physical address in was offset
1652 	*_address = (void*)((addr_t)*_address + mapOffset);
1653 
1654 	area->cache_type = CACHE_TYPE_DEVICE;
1655 	return area->id;
1656 }
1657 
1658 
1659 /*!	Don't use!
1660 	TODO: This function was introduced to map physical page vecs to
1661 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1662 	use a device cache and does not track vm_page::wired_count!
1663 */
1664 area_id
1665 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1666 	uint32 addressSpec, addr_t* _size, uint32 protection,
1667 	struct generic_io_vec* vecs, uint32 vecCount)
1668 {
1669 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1670 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1671 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1672 		addressSpec, _size, protection, vecs, vecCount));
1673 
1674 	if (!arch_vm_supports_protection(protection)
1675 		|| (addressSpec & B_MTR_MASK) != 0) {
1676 		return B_NOT_SUPPORTED;
1677 	}
1678 
1679 	AddressSpaceWriteLocker locker(team);
1680 	if (!locker.IsLocked())
1681 		return B_BAD_TEAM_ID;
1682 
1683 	if (vecCount == 0)
1684 		return B_BAD_VALUE;
1685 
1686 	addr_t size = 0;
1687 	for (uint32 i = 0; i < vecCount; i++) {
1688 		if (vecs[i].base % B_PAGE_SIZE != 0
1689 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1690 			return B_BAD_VALUE;
1691 		}
1692 
1693 		size += vecs[i].length;
1694 	}
1695 
1696 	// create a device cache
1697 	VMCache* cache;
1698 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1699 	if (result != B_OK)
1700 		return result;
1701 
1702 	cache->virtual_end = size;
1703 
1704 	cache->Lock();
1705 
1706 	VMArea* area;
1707 	virtual_address_restrictions addressRestrictions = {};
1708 	addressRestrictions.address = *_address;
1709 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1710 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1711 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1712 		&addressRestrictions, true, &area, _address);
1713 
1714 	if (result != B_OK)
1715 		cache->ReleaseRefLocked();
1716 
1717 	cache->Unlock();
1718 
1719 	if (result != B_OK)
1720 		return result;
1721 
1722 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1723 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1724 		area->Base() + (size - 1));
1725 
1726 	vm_page_reservation reservation;
1727 	vm_page_reserve_pages(&reservation, reservePages,
1728 			team == VMAddressSpace::KernelID()
1729 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1730 	map->Lock();
1731 
1732 	uint32 vecIndex = 0;
1733 	size_t vecOffset = 0;
1734 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1735 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1736 			vecOffset = 0;
1737 			vecIndex++;
1738 		}
1739 
1740 		if (vecIndex >= vecCount)
1741 			break;
1742 
1743 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1744 			protection, area->MemoryType(), &reservation);
1745 
1746 		vecOffset += B_PAGE_SIZE;
1747 	}
1748 
1749 	map->Unlock();
1750 	vm_page_unreserve_pages(&reservation);
1751 
1752 	if (_size != NULL)
1753 		*_size = size;
1754 
1755 	area->cache_type = CACHE_TYPE_DEVICE;
1756 	return area->id;
1757 }
1758 
1759 
1760 area_id
1761 vm_create_null_area(team_id team, const char* name, void** address,
1762 	uint32 addressSpec, addr_t size, uint32 flags)
1763 {
1764 	size = PAGE_ALIGN(size);
1765 
1766 	// Lock the address space and, if B_EXACT_ADDRESS and
1767 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1768 	// is not wired.
1769 	AddressSpaceWriteLocker locker;
1770 	do {
1771 		if (locker.SetTo(team) != B_OK)
1772 			return B_BAD_TEAM_ID;
1773 	} while (addressSpec == B_EXACT_ADDRESS
1774 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1775 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1776 			(addr_t)*address, size, &locker));
1777 
1778 	// create a null cache
1779 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1780 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1781 	VMCache* cache;
1782 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1783 	if (status != B_OK)
1784 		return status;
1785 
1786 	cache->temporary = 1;
1787 	cache->virtual_end = size;
1788 
1789 	cache->Lock();
1790 
1791 	VMArea* area;
1792 	virtual_address_restrictions addressRestrictions = {};
1793 	addressRestrictions.address = *address;
1794 	addressRestrictions.address_specification = addressSpec;
1795 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1796 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1797 		&addressRestrictions, true, &area, address);
1798 
1799 	if (status < B_OK) {
1800 		cache->ReleaseRefAndUnlock();
1801 		return status;
1802 	}
1803 
1804 	cache->Unlock();
1805 
1806 	area->cache_type = CACHE_TYPE_NULL;
1807 	return area->id;
1808 }
1809 
1810 
1811 /*!	Creates the vnode cache for the specified \a vnode.
1812 	The vnode has to be marked busy when calling this function.
1813 */
1814 status_t
1815 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1816 {
1817 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1818 }
1819 
1820 
1821 /*!	\a cache must be locked. The area's address space must be read-locked.
1822 */
1823 static void
1824 pre_map_area_pages(VMArea* area, VMCache* cache,
1825 	vm_page_reservation* reservation)
1826 {
1827 	addr_t baseAddress = area->Base();
1828 	addr_t cacheOffset = area->cache_offset;
1829 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1830 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1831 
1832 	for (VMCachePagesTree::Iterator it
1833 				= cache->pages.GetIterator(firstPage, true, true);
1834 			vm_page* page = it.Next();) {
1835 		if (page->cache_offset >= endPage)
1836 			break;
1837 
1838 		// skip busy and inactive pages
1839 		if (page->busy || page->usage_count == 0)
1840 			continue;
1841 
1842 		DEBUG_PAGE_ACCESS_START(page);
1843 		map_page(area, page,
1844 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1845 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1846 		DEBUG_PAGE_ACCESS_END(page);
1847 	}
1848 }
1849 
1850 
1851 /*!	Will map the file specified by \a fd to an area in memory.
1852 	The file will be mirrored beginning at the specified \a offset. The
1853 	\a offset and \a size arguments have to be page aligned.
1854 */
1855 static area_id
1856 _vm_map_file(team_id team, const char* name, void** _address,
1857 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1858 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1859 {
1860 	// TODO: for binary files, we want to make sure that they get the
1861 	//	copy of a file at a given time, ie. later changes should not
1862 	//	make it into the mapped copy -- this will need quite some changes
1863 	//	to be done in a nice way
1864 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1865 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1866 
1867 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1868 	size = PAGE_ALIGN(size);
1869 
1870 	if (mapping == REGION_NO_PRIVATE_MAP)
1871 		protection |= B_SHARED_AREA;
1872 	if (addressSpec != B_EXACT_ADDRESS)
1873 		unmapAddressRange = false;
1874 
1875 	if (fd < 0) {
1876 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1877 		virtual_address_restrictions virtualRestrictions = {};
1878 		virtualRestrictions.address = *_address;
1879 		virtualRestrictions.address_specification = addressSpec;
1880 		physical_address_restrictions physicalRestrictions = {};
1881 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1882 			flags, &virtualRestrictions, &physicalRestrictions, kernel,
1883 			_address);
1884 	}
1885 
1886 	// get the open flags of the FD
1887 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1888 	if (descriptor == NULL)
1889 		return EBADF;
1890 	int32 openMode = descriptor->open_mode;
1891 	put_fd(descriptor);
1892 
1893 	// The FD must open for reading at any rate. For shared mapping with write
1894 	// access, additionally the FD must be open for writing.
1895 	if ((openMode & O_ACCMODE) == O_WRONLY
1896 		|| (mapping == REGION_NO_PRIVATE_MAP
1897 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1898 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1899 		return EACCES;
1900 	}
1901 
1902 	// get the vnode for the object, this also grabs a ref to it
1903 	struct vnode* vnode = NULL;
1904 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1905 	if (status < B_OK)
1906 		return status;
1907 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1908 
1909 	// If we're going to pre-map pages, we need to reserve the pages needed by
1910 	// the mapping backend upfront.
1911 	page_num_t reservedPreMapPages = 0;
1912 	vm_page_reservation reservation;
1913 	if ((protection & B_READ_AREA) != 0) {
1914 		AddressSpaceWriteLocker locker;
1915 		status = locker.SetTo(team);
1916 		if (status != B_OK)
1917 			return status;
1918 
1919 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1920 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1921 
1922 		locker.Unlock();
1923 
1924 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1925 			team == VMAddressSpace::KernelID()
1926 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1927 	}
1928 
1929 	struct PageUnreserver {
1930 		PageUnreserver(vm_page_reservation* reservation)
1931 			:
1932 			fReservation(reservation)
1933 		{
1934 		}
1935 
1936 		~PageUnreserver()
1937 		{
1938 			if (fReservation != NULL)
1939 				vm_page_unreserve_pages(fReservation);
1940 		}
1941 
1942 		vm_page_reservation* fReservation;
1943 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1944 
1945 	// Lock the address space and, if the specified address range shall be
1946 	// unmapped, ensure it is not wired.
1947 	AddressSpaceWriteLocker locker;
1948 	do {
1949 		if (locker.SetTo(team) != B_OK)
1950 			return B_BAD_TEAM_ID;
1951 	} while (unmapAddressRange
1952 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1953 			(addr_t)*_address, size, &locker));
1954 
1955 	// TODO: this only works for file systems that use the file cache
1956 	VMCache* cache;
1957 	status = vfs_get_vnode_cache(vnode, &cache, false);
1958 	if (status < B_OK)
1959 		return status;
1960 
1961 	cache->Lock();
1962 
1963 	VMArea* area;
1964 	virtual_address_restrictions addressRestrictions = {};
1965 	addressRestrictions.address = *_address;
1966 	addressRestrictions.address_specification = addressSpec;
1967 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1968 		0, protection, mapping,
1969 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1970 		&addressRestrictions, kernel, &area, _address);
1971 
1972 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1973 		// map_backing_store() cannot know we no longer need the ref
1974 		cache->ReleaseRefLocked();
1975 	}
1976 
1977 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1978 		pre_map_area_pages(area, cache, &reservation);
1979 
1980 	cache->Unlock();
1981 
1982 	if (status == B_OK) {
1983 		// TODO: this probably deserves a smarter solution, ie. don't always
1984 		// prefetch stuff, and also, probably don't trigger it at this place.
1985 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1986 			// prefetches at max 10 MB starting from "offset"
1987 	}
1988 
1989 	if (status != B_OK)
1990 		return status;
1991 
1992 	area->cache_type = CACHE_TYPE_VNODE;
1993 	return area->id;
1994 }
1995 
1996 
1997 area_id
1998 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1999 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2000 	int fd, off_t offset)
2001 {
2002 	if (!arch_vm_supports_protection(protection))
2003 		return B_NOT_SUPPORTED;
2004 
2005 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2006 		mapping, unmapAddressRange, fd, offset, true);
2007 }
2008 
2009 
2010 VMCache*
2011 vm_area_get_locked_cache(VMArea* area)
2012 {
2013 	rw_lock_read_lock(&sAreaCacheLock);
2014 
2015 	while (true) {
2016 		VMCache* cache = area->cache;
2017 
2018 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2019 			// cache has been deleted
2020 			rw_lock_read_lock(&sAreaCacheLock);
2021 			continue;
2022 		}
2023 
2024 		rw_lock_read_lock(&sAreaCacheLock);
2025 
2026 		if (cache == area->cache) {
2027 			cache->AcquireRefLocked();
2028 			rw_lock_read_unlock(&sAreaCacheLock);
2029 			return cache;
2030 		}
2031 
2032 		// the cache changed in the meantime
2033 		cache->Unlock();
2034 	}
2035 }
2036 
2037 
2038 void
2039 vm_area_put_locked_cache(VMCache* cache)
2040 {
2041 	cache->ReleaseRefAndUnlock();
2042 }
2043 
2044 
2045 area_id
2046 vm_clone_area(team_id team, const char* name, void** address,
2047 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2048 	bool kernel)
2049 {
2050 	VMArea* newArea = NULL;
2051 	VMArea* sourceArea;
2052 
2053 	// Check whether the source area exists and is cloneable. If so, mark it
2054 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2055 	{
2056 		AddressSpaceWriteLocker locker;
2057 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2058 		if (status != B_OK)
2059 			return status;
2060 
2061 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2062 			return B_NOT_ALLOWED;
2063 
2064 		sourceArea->protection |= B_SHARED_AREA;
2065 		protection |= B_SHARED_AREA;
2066 	}
2067 
2068 	// Now lock both address spaces and actually do the cloning.
2069 
2070 	MultiAddressSpaceLocker locker;
2071 	VMAddressSpace* sourceAddressSpace;
2072 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2073 	if (status != B_OK)
2074 		return status;
2075 
2076 	VMAddressSpace* targetAddressSpace;
2077 	status = locker.AddTeam(team, true, &targetAddressSpace);
2078 	if (status != B_OK)
2079 		return status;
2080 
2081 	status = locker.Lock();
2082 	if (status != B_OK)
2083 		return status;
2084 
2085 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2086 	if (sourceArea == NULL)
2087 		return B_BAD_VALUE;
2088 
2089 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2090 		return B_NOT_ALLOWED;
2091 
2092 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2093 
2094 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2095 	//	have been adapted. Maybe it should be part of the kernel settings,
2096 	//	anyway (so that old drivers can always work).
2097 #if 0
2098 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2099 		&& addressSpace != VMAddressSpace::Kernel()
2100 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2101 		// kernel areas must not be cloned in userland, unless explicitly
2102 		// declared user-cloneable upon construction
2103 		status = B_NOT_ALLOWED;
2104 	} else
2105 #endif
2106 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2107 		status = B_NOT_ALLOWED;
2108 	else {
2109 		virtual_address_restrictions addressRestrictions = {};
2110 		addressRestrictions.address = *address;
2111 		addressRestrictions.address_specification = addressSpec;
2112 		status = map_backing_store(targetAddressSpace, cache,
2113 			sourceArea->cache_offset, name, sourceArea->Size(),
2114 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2115 			kernel, &newArea, address);
2116 	}
2117 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2118 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2119 		// to create a new cache, and has therefore already acquired a reference
2120 		// to the source cache - but otherwise it has no idea that we need
2121 		// one.
2122 		cache->AcquireRefLocked();
2123 	}
2124 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2125 		// we need to map in everything at this point
2126 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2127 			// we don't have actual pages to map but a physical area
2128 			VMTranslationMap* map
2129 				= sourceArea->address_space->TranslationMap();
2130 			map->Lock();
2131 
2132 			phys_addr_t physicalAddress;
2133 			uint32 oldProtection;
2134 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2135 
2136 			map->Unlock();
2137 
2138 			map = targetAddressSpace->TranslationMap();
2139 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2140 				newArea->Base() + (newArea->Size() - 1));
2141 
2142 			vm_page_reservation reservation;
2143 			vm_page_reserve_pages(&reservation, reservePages,
2144 				targetAddressSpace == VMAddressSpace::Kernel()
2145 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2146 			map->Lock();
2147 
2148 			for (addr_t offset = 0; offset < newArea->Size();
2149 					offset += B_PAGE_SIZE) {
2150 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2151 					protection, newArea->MemoryType(), &reservation);
2152 			}
2153 
2154 			map->Unlock();
2155 			vm_page_unreserve_pages(&reservation);
2156 		} else {
2157 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2158 			size_t reservePages = map->MaxPagesNeededToMap(
2159 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2160 			vm_page_reservation reservation;
2161 			vm_page_reserve_pages(&reservation, reservePages,
2162 				targetAddressSpace == VMAddressSpace::Kernel()
2163 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2164 
2165 			// map in all pages from source
2166 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2167 					vm_page* page  = it.Next();) {
2168 				if (!page->busy) {
2169 					DEBUG_PAGE_ACCESS_START(page);
2170 					map_page(newArea, page,
2171 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2172 							- newArea->cache_offset),
2173 						protection, &reservation);
2174 					DEBUG_PAGE_ACCESS_END(page);
2175 				}
2176 			}
2177 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2178 			// ensuring that!
2179 
2180 			vm_page_unreserve_pages(&reservation);
2181 		}
2182 	}
2183 	if (status == B_OK)
2184 		newArea->cache_type = sourceArea->cache_type;
2185 
2186 	vm_area_put_locked_cache(cache);
2187 
2188 	if (status < B_OK)
2189 		return status;
2190 
2191 	return newArea->id;
2192 }
2193 
2194 
2195 /*!	Deletes the specified area of the given address space.
2196 
2197 	The address space must be write-locked.
2198 	The caller must ensure that the area does not have any wired ranges.
2199 
2200 	\param addressSpace The address space containing the area.
2201 	\param area The area to be deleted.
2202 	\param deletingAddressSpace \c true, if the address space is in the process
2203 		of being deleted.
2204 */
2205 static void
2206 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2207 	bool deletingAddressSpace)
2208 {
2209 	ASSERT(!area->IsWired());
2210 
2211 	VMAreaHash::Remove(area);
2212 
2213 	// At this point the area is removed from the global hash table, but
2214 	// still exists in the area list.
2215 
2216 	// Unmap the virtual address space the area occupied.
2217 	{
2218 		// We need to lock the complete cache chain.
2219 		VMCache* topCache = vm_area_get_locked_cache(area);
2220 		VMCacheChainLocker cacheChainLocker(topCache);
2221 		cacheChainLocker.LockAllSourceCaches();
2222 
2223 		// If the area's top cache is a temporary cache and the area is the only
2224 		// one referencing it (besides us currently holding a second reference),
2225 		// the unmapping code doesn't need to care about preserving the accessed
2226 		// and dirty flags of the top cache page mappings.
2227 		bool ignoreTopCachePageFlags
2228 			= topCache->temporary && topCache->RefCount() == 2;
2229 
2230 		area->address_space->TranslationMap()->UnmapArea(area,
2231 			deletingAddressSpace, ignoreTopCachePageFlags);
2232 	}
2233 
2234 	if (!area->cache->temporary)
2235 		area->cache->WriteModified();
2236 
2237 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2238 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2239 
2240 	arch_vm_unset_memory_type(area);
2241 	addressSpace->RemoveArea(area, allocationFlags);
2242 	addressSpace->Put();
2243 
2244 	area->cache->RemoveArea(area);
2245 	area->cache->ReleaseRef();
2246 
2247 	addressSpace->DeleteArea(area, allocationFlags);
2248 }
2249 
2250 
2251 status_t
2252 vm_delete_area(team_id team, area_id id, bool kernel)
2253 {
2254 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2255 		team, id));
2256 
2257 	// lock the address space and make sure the area isn't wired
2258 	AddressSpaceWriteLocker locker;
2259 	VMArea* area;
2260 	AreaCacheLocker cacheLocker;
2261 
2262 	do {
2263 		status_t status = locker.SetFromArea(team, id, area);
2264 		if (status != B_OK)
2265 			return status;
2266 
2267 		cacheLocker.SetTo(area);
2268 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2269 
2270 	cacheLocker.Unlock();
2271 
2272 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2273 		return B_NOT_ALLOWED;
2274 
2275 	delete_area(locker.AddressSpace(), area, false);
2276 	return B_OK;
2277 }
2278 
2279 
2280 /*!	Creates a new cache on top of given cache, moves all areas from
2281 	the old cache to the new one, and changes the protection of all affected
2282 	areas' pages to read-only. If requested, wired pages are moved up to the
2283 	new cache and copies are added to the old cache in their place.
2284 	Preconditions:
2285 	- The given cache must be locked.
2286 	- All of the cache's areas' address spaces must be read locked.
2287 	- Either the cache must not have any wired ranges or a page reservation for
2288 	  all wired pages must be provided, so they can be copied.
2289 
2290 	\param lowerCache The cache on top of which a new cache shall be created.
2291 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2292 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2293 		has wired page. The wired pages are copied in this case.
2294 */
2295 static status_t
2296 vm_copy_on_write_area(VMCache* lowerCache,
2297 	vm_page_reservation* wiredPagesReservation)
2298 {
2299 	VMCache* upperCache;
2300 
2301 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2302 
2303 	// We need to separate the cache from its areas. The cache goes one level
2304 	// deeper and we create a new cache inbetween.
2305 
2306 	// create an anonymous cache
2307 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2308 		0, dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2309 		VM_PRIORITY_USER);
2310 	if (status != B_OK)
2311 		return status;
2312 
2313 	upperCache->Lock();
2314 
2315 	upperCache->temporary = 1;
2316 	upperCache->virtual_base = lowerCache->virtual_base;
2317 	upperCache->virtual_end = lowerCache->virtual_end;
2318 
2319 	// transfer the lower cache areas to the upper cache
2320 	rw_lock_write_lock(&sAreaCacheLock);
2321 	upperCache->TransferAreas(lowerCache);
2322 	rw_lock_write_unlock(&sAreaCacheLock);
2323 
2324 	lowerCache->AddConsumer(upperCache);
2325 
2326 	// We now need to remap all pages from all of the cache's areas read-only,
2327 	// so that a copy will be created on next write access. If there are wired
2328 	// pages, we keep their protection, move them to the upper cache and create
2329 	// copies for the lower cache.
2330 	if (wiredPagesReservation != NULL) {
2331 		// We need to handle wired pages -- iterate through the cache's pages.
2332 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2333 				vm_page* page = it.Next();) {
2334 			if (page->WiredCount() > 0) {
2335 				// allocate a new page and copy the wired one
2336 				vm_page* copiedPage = vm_page_allocate_page(
2337 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2338 
2339 				vm_memcpy_physical_page(
2340 					copiedPage->physical_page_number * B_PAGE_SIZE,
2341 					page->physical_page_number * B_PAGE_SIZE);
2342 
2343 				// move the wired page to the upper cache (note: removing is OK
2344 				// with the SplayTree iterator) and insert the copy
2345 				upperCache->MovePage(page);
2346 				lowerCache->InsertPage(copiedPage,
2347 					page->cache_offset * B_PAGE_SIZE);
2348 
2349 				DEBUG_PAGE_ACCESS_END(copiedPage);
2350 			} else {
2351 				// Change the protection of this page in all areas.
2352 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2353 						tempArea = tempArea->cache_next) {
2354 					// The area must be readable in the same way it was
2355 					// previously writable.
2356 					uint32 protection = B_KERNEL_READ_AREA;
2357 					if ((tempArea->protection & B_READ_AREA) != 0)
2358 						protection |= B_READ_AREA;
2359 
2360 					VMTranslationMap* map
2361 						= tempArea->address_space->TranslationMap();
2362 					map->Lock();
2363 					map->ProtectPage(tempArea,
2364 						virtual_page_address(tempArea, page), protection);
2365 					map->Unlock();
2366 				}
2367 			}
2368 		}
2369 	} else {
2370 		ASSERT(lowerCache->WiredPagesCount() == 0);
2371 
2372 		// just change the protection of all areas
2373 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2374 				tempArea = tempArea->cache_next) {
2375 			// The area must be readable in the same way it was previously
2376 			// writable.
2377 			uint32 protection = B_KERNEL_READ_AREA;
2378 			if ((tempArea->protection & B_READ_AREA) != 0)
2379 				protection |= B_READ_AREA;
2380 
2381 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2382 			map->Lock();
2383 			map->ProtectArea(tempArea, protection);
2384 			map->Unlock();
2385 		}
2386 	}
2387 
2388 	vm_area_put_locked_cache(upperCache);
2389 
2390 	return B_OK;
2391 }
2392 
2393 
2394 area_id
2395 vm_copy_area(team_id team, const char* name, void** _address,
2396 	uint32 addressSpec, uint32 protection, area_id sourceID)
2397 {
2398 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2399 
2400 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2401 		// set the same protection for the kernel as for userland
2402 		protection |= B_KERNEL_READ_AREA;
2403 		if (writableCopy)
2404 			protection |= B_KERNEL_WRITE_AREA;
2405 	}
2406 
2407 	// Do the locking: target address space, all address spaces associated with
2408 	// the source cache, and the cache itself.
2409 	MultiAddressSpaceLocker locker;
2410 	VMAddressSpace* targetAddressSpace;
2411 	VMCache* cache;
2412 	VMArea* source;
2413 	AreaCacheLocker cacheLocker;
2414 	status_t status;
2415 	bool sharedArea;
2416 
2417 	page_num_t wiredPages = 0;
2418 	vm_page_reservation wiredPagesReservation;
2419 
2420 	bool restart;
2421 	do {
2422 		restart = false;
2423 
2424 		locker.Unset();
2425 		status = locker.AddTeam(team, true, &targetAddressSpace);
2426 		if (status == B_OK) {
2427 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2428 				&cache);
2429 		}
2430 		if (status != B_OK)
2431 			return status;
2432 
2433 		cacheLocker.SetTo(cache, true);	// already locked
2434 
2435 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2436 
2437 		page_num_t oldWiredPages = wiredPages;
2438 		wiredPages = 0;
2439 
2440 		// If the source area isn't shared, count the number of wired pages in
2441 		// the cache and reserve as many pages.
2442 		if (!sharedArea) {
2443 			wiredPages = cache->WiredPagesCount();
2444 
2445 			if (wiredPages > oldWiredPages) {
2446 				cacheLocker.Unlock();
2447 				locker.Unlock();
2448 
2449 				if (oldWiredPages > 0)
2450 					vm_page_unreserve_pages(&wiredPagesReservation);
2451 
2452 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2453 					VM_PRIORITY_USER);
2454 
2455 				restart = true;
2456 			}
2457 		} else if (oldWiredPages > 0)
2458 			vm_page_unreserve_pages(&wiredPagesReservation);
2459 	} while (restart);
2460 
2461 	// unreserve pages later
2462 	struct PagesUnreserver {
2463 		PagesUnreserver(vm_page_reservation* reservation)
2464 			:
2465 			fReservation(reservation)
2466 		{
2467 		}
2468 
2469 		~PagesUnreserver()
2470 		{
2471 			if (fReservation != NULL)
2472 				vm_page_unreserve_pages(fReservation);
2473 		}
2474 
2475 	private:
2476 		vm_page_reservation*	fReservation;
2477 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2478 
2479 	if (addressSpec == B_CLONE_ADDRESS) {
2480 		addressSpec = B_EXACT_ADDRESS;
2481 		*_address = (void*)source->Base();
2482 	}
2483 
2484 	// First, create a cache on top of the source area, respectively use the
2485 	// existing one, if this is a shared area.
2486 
2487 	VMArea* target;
2488 	virtual_address_restrictions addressRestrictions = {};
2489 	addressRestrictions.address = *_address;
2490 	addressRestrictions.address_specification = addressSpec;
2491 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2492 		name, source->Size(), source->wiring, protection,
2493 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2494 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2495 		&addressRestrictions, true, &target, _address);
2496 	if (status < B_OK)
2497 		return status;
2498 
2499 	if (sharedArea) {
2500 		// The new area uses the old area's cache, but map_backing_store()
2501 		// hasn't acquired a ref. So we have to do that now.
2502 		cache->AcquireRefLocked();
2503 	}
2504 
2505 	// If the source area is writable, we need to move it one layer up as well
2506 
2507 	if (!sharedArea) {
2508 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2509 			// TODO: do something more useful if this fails!
2510 			if (vm_copy_on_write_area(cache,
2511 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2512 				panic("vm_copy_on_write_area() failed!\n");
2513 			}
2514 		}
2515 	}
2516 
2517 	// we return the ID of the newly created area
2518 	return target->id;
2519 }
2520 
2521 
2522 static status_t
2523 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2524 	bool kernel)
2525 {
2526 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2527 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2528 
2529 	if (!arch_vm_supports_protection(newProtection))
2530 		return B_NOT_SUPPORTED;
2531 
2532 	bool becomesWritable
2533 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2534 
2535 	// lock address spaces and cache
2536 	MultiAddressSpaceLocker locker;
2537 	VMCache* cache;
2538 	VMArea* area;
2539 	status_t status;
2540 	AreaCacheLocker cacheLocker;
2541 	bool isWritable;
2542 
2543 	bool restart;
2544 	do {
2545 		restart = false;
2546 
2547 		locker.Unset();
2548 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2549 		if (status != B_OK)
2550 			return status;
2551 
2552 		cacheLocker.SetTo(cache, true);	// already locked
2553 
2554 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2555 			return B_NOT_ALLOWED;
2556 
2557 		if (area->protection == newProtection)
2558 			return B_OK;
2559 
2560 		if (team != VMAddressSpace::KernelID()
2561 			&& area->address_space->ID() != team) {
2562 			// unless you're the kernel, you are only allowed to set
2563 			// the protection of your own areas
2564 			return B_NOT_ALLOWED;
2565 		}
2566 
2567 		isWritable
2568 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2569 
2570 		// Make sure the area (respectively, if we're going to call
2571 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2572 		// wired ranges.
2573 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2574 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2575 					otherArea = otherArea->cache_next) {
2576 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2577 					restart = true;
2578 					break;
2579 				}
2580 			}
2581 		} else {
2582 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2583 				restart = true;
2584 		}
2585 	} while (restart);
2586 
2587 	bool changePageProtection = true;
2588 	bool changeTopCachePagesOnly = false;
2589 
2590 	if (isWritable && !becomesWritable) {
2591 		// writable -> !writable
2592 
2593 		if (cache->source != NULL && cache->temporary) {
2594 			if (cache->CountWritableAreas(area) == 0) {
2595 				// Since this cache now lives from the pages in its source cache,
2596 				// we can change the cache's commitment to take only those pages
2597 				// into account that really are in this cache.
2598 
2599 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2600 					team == VMAddressSpace::KernelID()
2601 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2602 
2603 				// TODO: we may be able to join with our source cache, if
2604 				// count == 0
2605 			}
2606 		}
2607 
2608 		// If only the writability changes, we can just remap the pages of the
2609 		// top cache, since the pages of lower caches are mapped read-only
2610 		// anyway. That's advantageous only, if the number of pages in the cache
2611 		// is significantly smaller than the number of pages in the area,
2612 		// though.
2613 		if (newProtection
2614 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2615 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2616 			changeTopCachePagesOnly = true;
2617 		}
2618 	} else if (!isWritable && becomesWritable) {
2619 		// !writable -> writable
2620 
2621 		if (!cache->consumers.IsEmpty()) {
2622 			// There are consumers -- we have to insert a new cache. Fortunately
2623 			// vm_copy_on_write_area() does everything that's needed.
2624 			changePageProtection = false;
2625 			status = vm_copy_on_write_area(cache, NULL);
2626 		} else {
2627 			// No consumers, so we don't need to insert a new one.
2628 			if (cache->source != NULL && cache->temporary) {
2629 				// the cache's commitment must contain all possible pages
2630 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2631 					team == VMAddressSpace::KernelID()
2632 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2633 			}
2634 
2635 			if (status == B_OK && cache->source != NULL) {
2636 				// There's a source cache, hence we can't just change all pages'
2637 				// protection or we might allow writing into pages belonging to
2638 				// a lower cache.
2639 				changeTopCachePagesOnly = true;
2640 			}
2641 		}
2642 	} else {
2643 		// we don't have anything special to do in all other cases
2644 	}
2645 
2646 	if (status == B_OK) {
2647 		// remap existing pages in this cache
2648 		if (changePageProtection) {
2649 			VMTranslationMap* map = area->address_space->TranslationMap();
2650 			map->Lock();
2651 
2652 			if (changeTopCachePagesOnly) {
2653 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2654 				page_num_t lastPageOffset
2655 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2656 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2657 						vm_page* page = it.Next();) {
2658 					if (page->cache_offset >= firstPageOffset
2659 						&& page->cache_offset <= lastPageOffset) {
2660 						addr_t address = virtual_page_address(area, page);
2661 						map->ProtectPage(area, address, newProtection);
2662 					}
2663 				}
2664 			} else
2665 				map->ProtectArea(area, newProtection);
2666 
2667 			map->Unlock();
2668 		}
2669 
2670 		area->protection = newProtection;
2671 	}
2672 
2673 	return status;
2674 }
2675 
2676 
2677 status_t
2678 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2679 {
2680 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2681 	if (addressSpace == NULL)
2682 		return B_BAD_TEAM_ID;
2683 
2684 	VMTranslationMap* map = addressSpace->TranslationMap();
2685 
2686 	map->Lock();
2687 	uint32 dummyFlags;
2688 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2689 	map->Unlock();
2690 
2691 	addressSpace->Put();
2692 	return status;
2693 }
2694 
2695 
2696 /*!	The page's cache must be locked.
2697 */
2698 bool
2699 vm_test_map_modification(vm_page* page)
2700 {
2701 	if (page->modified)
2702 		return true;
2703 
2704 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2705 	vm_page_mapping* mapping;
2706 	while ((mapping = iterator.Next()) != NULL) {
2707 		VMArea* area = mapping->area;
2708 		VMTranslationMap* map = area->address_space->TranslationMap();
2709 
2710 		phys_addr_t physicalAddress;
2711 		uint32 flags;
2712 		map->Lock();
2713 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2714 		map->Unlock();
2715 
2716 		if ((flags & PAGE_MODIFIED) != 0)
2717 			return true;
2718 	}
2719 
2720 	return false;
2721 }
2722 
2723 
2724 /*!	The page's cache must be locked.
2725 */
2726 void
2727 vm_clear_map_flags(vm_page* page, uint32 flags)
2728 {
2729 	if ((flags & PAGE_ACCESSED) != 0)
2730 		page->accessed = false;
2731 	if ((flags & PAGE_MODIFIED) != 0)
2732 		page->modified = false;
2733 
2734 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2735 	vm_page_mapping* mapping;
2736 	while ((mapping = iterator.Next()) != NULL) {
2737 		VMArea* area = mapping->area;
2738 		VMTranslationMap* map = area->address_space->TranslationMap();
2739 
2740 		map->Lock();
2741 		map->ClearFlags(virtual_page_address(area, page), flags);
2742 		map->Unlock();
2743 	}
2744 }
2745 
2746 
2747 /*!	Removes all mappings from a page.
2748 	After you've called this function, the page is unmapped from memory and
2749 	the page's \c accessed and \c modified flags have been updated according
2750 	to the state of the mappings.
2751 	The page's cache must be locked.
2752 */
2753 void
2754 vm_remove_all_page_mappings(vm_page* page)
2755 {
2756 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2757 		VMArea* area = mapping->area;
2758 		VMTranslationMap* map = area->address_space->TranslationMap();
2759 		addr_t address = virtual_page_address(area, page);
2760 		map->UnmapPage(area, address, false);
2761 	}
2762 }
2763 
2764 
2765 int32
2766 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2767 {
2768 	int32 count = 0;
2769 
2770 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2771 	vm_page_mapping* mapping;
2772 	while ((mapping = iterator.Next()) != NULL) {
2773 		VMArea* area = mapping->area;
2774 		VMTranslationMap* map = area->address_space->TranslationMap();
2775 
2776 		bool modified;
2777 		if (map->ClearAccessedAndModified(area,
2778 				virtual_page_address(area, page), false, modified)) {
2779 			count++;
2780 		}
2781 
2782 		page->modified |= modified;
2783 	}
2784 
2785 
2786 	if (page->accessed) {
2787 		count++;
2788 		page->accessed = false;
2789 	}
2790 
2791 	return count;
2792 }
2793 
2794 
2795 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2796 	mappings.
2797 	The function iterates through the page mappings and removes them until
2798 	encountering one that has been accessed. From then on it will continue to
2799 	iterate, but only clear the accessed flag of the mapping. The page's
2800 	\c modified bit will be updated accordingly, the \c accessed bit will be
2801 	cleared.
2802 	\return The number of mapping accessed bits encountered, including the
2803 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2804 		of the page have been removed.
2805 */
2806 int32
2807 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2808 {
2809 	ASSERT(page->WiredCount() == 0);
2810 
2811 	if (page->accessed)
2812 		return vm_clear_page_mapping_accessed_flags(page);
2813 
2814 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2815 		VMArea* area = mapping->area;
2816 		VMTranslationMap* map = area->address_space->TranslationMap();
2817 		addr_t address = virtual_page_address(area, page);
2818 		bool modified = false;
2819 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2820 			page->accessed = true;
2821 			page->modified |= modified;
2822 			return vm_clear_page_mapping_accessed_flags(page);
2823 		}
2824 		page->modified |= modified;
2825 	}
2826 
2827 	return 0;
2828 }
2829 
2830 
2831 static int
2832 display_mem(int argc, char** argv)
2833 {
2834 	bool physical = false;
2835 	addr_t copyAddress;
2836 	int32 displayWidth;
2837 	int32 itemSize;
2838 	int32 num = -1;
2839 	addr_t address;
2840 	int i = 1, j;
2841 
2842 	if (argc > 1 && argv[1][0] == '-') {
2843 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2844 			physical = true;
2845 			i++;
2846 		} else
2847 			i = 99;
2848 	}
2849 
2850 	if (argc < i + 1 || argc > i + 2) {
2851 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2852 			"\tdl - 8 bytes\n"
2853 			"\tdw - 4 bytes\n"
2854 			"\tds - 2 bytes\n"
2855 			"\tdb - 1 byte\n"
2856 			"\tstring - a whole string\n"
2857 			"  -p or --physical only allows memory from a single page to be "
2858 			"displayed.\n");
2859 		return 0;
2860 	}
2861 
2862 	address = parse_expression(argv[i]);
2863 
2864 	if (argc > i + 1)
2865 		num = parse_expression(argv[i + 1]);
2866 
2867 	// build the format string
2868 	if (strcmp(argv[0], "db") == 0) {
2869 		itemSize = 1;
2870 		displayWidth = 16;
2871 	} else if (strcmp(argv[0], "ds") == 0) {
2872 		itemSize = 2;
2873 		displayWidth = 8;
2874 	} else if (strcmp(argv[0], "dw") == 0) {
2875 		itemSize = 4;
2876 		displayWidth = 4;
2877 	} else if (strcmp(argv[0], "dl") == 0) {
2878 		itemSize = 8;
2879 		displayWidth = 2;
2880 	} else if (strcmp(argv[0], "string") == 0) {
2881 		itemSize = 1;
2882 		displayWidth = -1;
2883 	} else {
2884 		kprintf("display_mem called in an invalid way!\n");
2885 		return 0;
2886 	}
2887 
2888 	if (num <= 0)
2889 		num = displayWidth;
2890 
2891 	void* physicalPageHandle = NULL;
2892 
2893 	if (physical) {
2894 		int32 offset = address & (B_PAGE_SIZE - 1);
2895 		if (num * itemSize + offset > B_PAGE_SIZE) {
2896 			num = (B_PAGE_SIZE - offset) / itemSize;
2897 			kprintf("NOTE: number of bytes has been cut to page size\n");
2898 		}
2899 
2900 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2901 
2902 		if (vm_get_physical_page_debug(address, &copyAddress,
2903 				&physicalPageHandle) != B_OK) {
2904 			kprintf("getting the hardware page failed.");
2905 			return 0;
2906 		}
2907 
2908 		address += offset;
2909 		copyAddress += offset;
2910 	} else
2911 		copyAddress = address;
2912 
2913 	if (!strcmp(argv[0], "string")) {
2914 		kprintf("%p \"", (char*)copyAddress);
2915 
2916 		// string mode
2917 		for (i = 0; true; i++) {
2918 			char c;
2919 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2920 					!= B_OK
2921 				|| c == '\0') {
2922 				break;
2923 			}
2924 
2925 			if (c == '\n')
2926 				kprintf("\\n");
2927 			else if (c == '\t')
2928 				kprintf("\\t");
2929 			else {
2930 				if (!isprint(c))
2931 					c = '.';
2932 
2933 				kprintf("%c", c);
2934 			}
2935 		}
2936 
2937 		kprintf("\"\n");
2938 	} else {
2939 		// number mode
2940 		for (i = 0; i < num; i++) {
2941 			uint32 value;
2942 
2943 			if ((i % displayWidth) == 0) {
2944 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2945 				if (i != 0)
2946 					kprintf("\n");
2947 
2948 				kprintf("[0x%lx]  ", address + i * itemSize);
2949 
2950 				for (j = 0; j < displayed; j++) {
2951 					char c;
2952 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2953 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2954 						displayed = j;
2955 						break;
2956 					}
2957 					if (!isprint(c))
2958 						c = '.';
2959 
2960 					kprintf("%c", c);
2961 				}
2962 				if (num > displayWidth) {
2963 					// make sure the spacing in the last line is correct
2964 					for (j = displayed; j < displayWidth * itemSize; j++)
2965 						kprintf(" ");
2966 				}
2967 				kprintf("  ");
2968 			}
2969 
2970 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2971 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2972 				kprintf("read fault");
2973 				break;
2974 			}
2975 
2976 			switch (itemSize) {
2977 				case 1:
2978 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2979 					break;
2980 				case 2:
2981 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2982 					break;
2983 				case 4:
2984 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2985 					break;
2986 				case 8:
2987 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
2988 					break;
2989 			}
2990 		}
2991 
2992 		kprintf("\n");
2993 	}
2994 
2995 	if (physical) {
2996 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2997 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2998 	}
2999 	return 0;
3000 }
3001 
3002 
3003 static void
3004 dump_cache_tree_recursively(VMCache* cache, int level,
3005 	VMCache* highlightCache)
3006 {
3007 	// print this cache
3008 	for (int i = 0; i < level; i++)
3009 		kprintf("  ");
3010 	if (cache == highlightCache)
3011 		kprintf("%p <--\n", cache);
3012 	else
3013 		kprintf("%p\n", cache);
3014 
3015 	// recursively print its consumers
3016 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3017 			VMCache* consumer = it.Next();) {
3018 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3019 	}
3020 }
3021 
3022 
3023 static int
3024 dump_cache_tree(int argc, char** argv)
3025 {
3026 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3027 		kprintf("usage: %s <address>\n", argv[0]);
3028 		return 0;
3029 	}
3030 
3031 	addr_t address = parse_expression(argv[1]);
3032 	if (address == 0)
3033 		return 0;
3034 
3035 	VMCache* cache = (VMCache*)address;
3036 	VMCache* root = cache;
3037 
3038 	// find the root cache (the transitive source)
3039 	while (root->source != NULL)
3040 		root = root->source;
3041 
3042 	dump_cache_tree_recursively(root, 0, cache);
3043 
3044 	return 0;
3045 }
3046 
3047 
3048 const char*
3049 vm_cache_type_to_string(int32 type)
3050 {
3051 	switch (type) {
3052 		case CACHE_TYPE_RAM:
3053 			return "RAM";
3054 		case CACHE_TYPE_DEVICE:
3055 			return "device";
3056 		case CACHE_TYPE_VNODE:
3057 			return "vnode";
3058 		case CACHE_TYPE_NULL:
3059 			return "null";
3060 
3061 		default:
3062 			return "unknown";
3063 	}
3064 }
3065 
3066 
3067 #if DEBUG_CACHE_LIST
3068 
3069 static void
3070 update_cache_info_recursively(VMCache* cache, cache_info& info)
3071 {
3072 	info.page_count += cache->page_count;
3073 	if (cache->type == CACHE_TYPE_RAM)
3074 		info.committed += cache->committed_size;
3075 
3076 	// recurse
3077 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3078 			VMCache* consumer = it.Next();) {
3079 		update_cache_info_recursively(consumer, info);
3080 	}
3081 }
3082 
3083 
3084 static int
3085 cache_info_compare_page_count(const void* _a, const void* _b)
3086 {
3087 	const cache_info* a = (const cache_info*)_a;
3088 	const cache_info* b = (const cache_info*)_b;
3089 	if (a->page_count == b->page_count)
3090 		return 0;
3091 	return a->page_count < b->page_count ? 1 : -1;
3092 }
3093 
3094 
3095 static int
3096 cache_info_compare_committed(const void* _a, const void* _b)
3097 {
3098 	const cache_info* a = (const cache_info*)_a;
3099 	const cache_info* b = (const cache_info*)_b;
3100 	if (a->committed == b->committed)
3101 		return 0;
3102 	return a->committed < b->committed ? 1 : -1;
3103 }
3104 
3105 
3106 static void
3107 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3108 {
3109 	for (int i = 0; i < level; i++)
3110 		kprintf("  ");
3111 
3112 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3113 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3114 		cache->virtual_base, cache->virtual_end, cache->page_count);
3115 
3116 	if (level == 0)
3117 		kprintf("/%lu", info.page_count);
3118 
3119 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3120 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3121 
3122 		if (level == 0)
3123 			kprintf("/%lu", info.committed);
3124 	}
3125 
3126 	// areas
3127 	if (cache->areas != NULL) {
3128 		VMArea* area = cache->areas;
3129 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3130 			area->name, area->address_space->ID());
3131 
3132 		while (area->cache_next != NULL) {
3133 			area = area->cache_next;
3134 			kprintf(", %" B_PRId32, area->id);
3135 		}
3136 	}
3137 
3138 	kputs("\n");
3139 
3140 	// recurse
3141 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3142 			VMCache* consumer = it.Next();) {
3143 		dump_caches_recursively(consumer, info, level + 1);
3144 	}
3145 }
3146 
3147 
3148 static int
3149 dump_caches(int argc, char** argv)
3150 {
3151 	if (sCacheInfoTable == NULL) {
3152 		kprintf("No cache info table!\n");
3153 		return 0;
3154 	}
3155 
3156 	bool sortByPageCount = true;
3157 
3158 	for (int32 i = 1; i < argc; i++) {
3159 		if (strcmp(argv[i], "-c") == 0) {
3160 			sortByPageCount = false;
3161 		} else {
3162 			print_debugger_command_usage(argv[0]);
3163 			return 0;
3164 		}
3165 	}
3166 
3167 	uint32 totalCount = 0;
3168 	uint32 rootCount = 0;
3169 	off_t totalCommitted = 0;
3170 	page_num_t totalPages = 0;
3171 
3172 	VMCache* cache = gDebugCacheList;
3173 	while (cache) {
3174 		totalCount++;
3175 		if (cache->source == NULL) {
3176 			cache_info stackInfo;
3177 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3178 				? sCacheInfoTable[rootCount] : stackInfo;
3179 			rootCount++;
3180 			info.cache = cache;
3181 			info.page_count = 0;
3182 			info.committed = 0;
3183 			update_cache_info_recursively(cache, info);
3184 			totalCommitted += info.committed;
3185 			totalPages += info.page_count;
3186 		}
3187 
3188 		cache = cache->debug_next;
3189 	}
3190 
3191 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3192 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3193 			sortByPageCount
3194 				? &cache_info_compare_page_count
3195 				: &cache_info_compare_committed);
3196 	}
3197 
3198 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3199 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3200 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3201 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3202 			"page count" : "committed size");
3203 
3204 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3205 		for (uint32 i = 0; i < rootCount; i++) {
3206 			cache_info& info = sCacheInfoTable[i];
3207 			dump_caches_recursively(info.cache, info, 0);
3208 		}
3209 	} else
3210 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3211 
3212 	return 0;
3213 }
3214 
3215 #endif	// DEBUG_CACHE_LIST
3216 
3217 
3218 static int
3219 dump_cache(int argc, char** argv)
3220 {
3221 	VMCache* cache;
3222 	bool showPages = false;
3223 	int i = 1;
3224 
3225 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3226 		kprintf("usage: %s [-ps] <address>\n"
3227 			"  if -p is specified, all pages are shown, if -s is used\n"
3228 			"  only the cache info is shown respectively.\n", argv[0]);
3229 		return 0;
3230 	}
3231 	while (argv[i][0] == '-') {
3232 		char* arg = argv[i] + 1;
3233 		while (arg[0]) {
3234 			if (arg[0] == 'p')
3235 				showPages = true;
3236 			arg++;
3237 		}
3238 		i++;
3239 	}
3240 	if (argv[i] == NULL) {
3241 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3242 		return 0;
3243 	}
3244 
3245 	addr_t address = parse_expression(argv[i]);
3246 	if (address == 0)
3247 		return 0;
3248 
3249 	cache = (VMCache*)address;
3250 
3251 	cache->Dump(showPages);
3252 
3253 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3254 
3255 	return 0;
3256 }
3257 
3258 
3259 static void
3260 dump_area_struct(VMArea* area, bool mappings)
3261 {
3262 	kprintf("AREA: %p\n", area);
3263 	kprintf("name:\t\t'%s'\n", area->name);
3264 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3265 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3266 	kprintf("base:\t\t0x%lx\n", area->Base());
3267 	kprintf("size:\t\t0x%lx\n", area->Size());
3268 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3269 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3270 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3271 	kprintf("cache:\t\t%p\n", area->cache);
3272 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3273 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3274 	kprintf("cache_next:\t%p\n", area->cache_next);
3275 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3276 
3277 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3278 	if (mappings) {
3279 		kprintf("page mappings:\n");
3280 		while (iterator.HasNext()) {
3281 			vm_page_mapping* mapping = iterator.Next();
3282 			kprintf("  %p", mapping->page);
3283 		}
3284 		kprintf("\n");
3285 	} else {
3286 		uint32 count = 0;
3287 		while (iterator.Next() != NULL) {
3288 			count++;
3289 		}
3290 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3291 	}
3292 }
3293 
3294 
3295 static int
3296 dump_area(int argc, char** argv)
3297 {
3298 	bool mappings = false;
3299 	bool found = false;
3300 	int32 index = 1;
3301 	VMArea* area;
3302 	addr_t num;
3303 
3304 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3305 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3306 			"All areas matching either id/address/name are listed. You can\n"
3307 			"force to check only a specific item by prefixing the specifier\n"
3308 			"with the id/contains/address/name keywords.\n"
3309 			"-m shows the area's mappings as well.\n");
3310 		return 0;
3311 	}
3312 
3313 	if (!strcmp(argv[1], "-m")) {
3314 		mappings = true;
3315 		index++;
3316 	}
3317 
3318 	int32 mode = 0xf;
3319 	if (!strcmp(argv[index], "id"))
3320 		mode = 1;
3321 	else if (!strcmp(argv[index], "contains"))
3322 		mode = 2;
3323 	else if (!strcmp(argv[index], "name"))
3324 		mode = 4;
3325 	else if (!strcmp(argv[index], "address"))
3326 		mode = 0;
3327 	if (mode != 0xf)
3328 		index++;
3329 
3330 	if (index >= argc) {
3331 		kprintf("No area specifier given.\n");
3332 		return 0;
3333 	}
3334 
3335 	num = parse_expression(argv[index]);
3336 
3337 	if (mode == 0) {
3338 		dump_area_struct((struct VMArea*)num, mappings);
3339 	} else {
3340 		// walk through the area list, looking for the arguments as a name
3341 
3342 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3343 		while ((area = it.Next()) != NULL) {
3344 			if (((mode & 4) != 0 && area->name != NULL
3345 					&& !strcmp(argv[index], area->name))
3346 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3347 					|| (((mode & 2) != 0 && area->Base() <= num
3348 						&& area->Base() + area->Size() > num))))) {
3349 				dump_area_struct(area, mappings);
3350 				found = true;
3351 			}
3352 		}
3353 
3354 		if (!found)
3355 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3356 	}
3357 
3358 	return 0;
3359 }
3360 
3361 
3362 static int
3363 dump_area_list(int argc, char** argv)
3364 {
3365 	VMArea* area;
3366 	const char* name = NULL;
3367 	int32 id = 0;
3368 
3369 	if (argc > 1) {
3370 		id = parse_expression(argv[1]);
3371 		if (id == 0)
3372 			name = argv[1];
3373 	}
3374 
3375 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3376 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3377 		B_PRINTF_POINTER_WIDTH, "size");
3378 
3379 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3380 	while ((area = it.Next()) != NULL) {
3381 		if ((id != 0 && area->address_space->ID() != id)
3382 			|| (name != NULL && strstr(area->name, name) == NULL))
3383 			continue;
3384 
3385 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3386 			area->id, (void*)area->Base(), (void*)area->Size(),
3387 			area->protection, area->wiring, area->name);
3388 	}
3389 	return 0;
3390 }
3391 
3392 
3393 static int
3394 dump_available_memory(int argc, char** argv)
3395 {
3396 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3397 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3398 	return 0;
3399 }
3400 
3401 
3402 /*!	Deletes all areas and reserved regions in the given address space.
3403 
3404 	The caller must ensure that none of the areas has any wired ranges.
3405 
3406 	\param addressSpace The address space.
3407 	\param deletingAddressSpace \c true, if the address space is in the process
3408 		of being deleted.
3409 */
3410 void
3411 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3412 {
3413 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3414 		addressSpace->ID()));
3415 
3416 	addressSpace->WriteLock();
3417 
3418 	// remove all reserved areas in this address space
3419 	addressSpace->UnreserveAllAddressRanges(0);
3420 
3421 	// delete all the areas in this address space
3422 	while (VMArea* area = addressSpace->FirstArea()) {
3423 		ASSERT(!area->IsWired());
3424 		delete_area(addressSpace, area, deletingAddressSpace);
3425 	}
3426 
3427 	addressSpace->WriteUnlock();
3428 }
3429 
3430 
3431 static area_id
3432 vm_area_for(addr_t address, bool kernel)
3433 {
3434 	team_id team;
3435 	if (IS_USER_ADDRESS(address)) {
3436 		// we try the user team address space, if any
3437 		team = VMAddressSpace::CurrentID();
3438 		if (team < 0)
3439 			return team;
3440 	} else
3441 		team = VMAddressSpace::KernelID();
3442 
3443 	AddressSpaceReadLocker locker(team);
3444 	if (!locker.IsLocked())
3445 		return B_BAD_TEAM_ID;
3446 
3447 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3448 	if (area != NULL) {
3449 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3450 			return B_ERROR;
3451 
3452 		return area->id;
3453 	}
3454 
3455 	return B_ERROR;
3456 }
3457 
3458 
3459 /*!	Frees physical pages that were used during the boot process.
3460 	\a end is inclusive.
3461 */
3462 static void
3463 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3464 {
3465 	// free all physical pages in the specified range
3466 
3467 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3468 		phys_addr_t physicalAddress;
3469 		uint32 flags;
3470 
3471 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3472 			&& (flags & PAGE_PRESENT) != 0) {
3473 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3474 			if (page != NULL && page->State() != PAGE_STATE_FREE
3475 					 && page->State() != PAGE_STATE_CLEAR
3476 					 && page->State() != PAGE_STATE_UNUSED) {
3477 				DEBUG_PAGE_ACCESS_START(page);
3478 				vm_page_set_state(page, PAGE_STATE_FREE);
3479 			}
3480 		}
3481 	}
3482 
3483 	// unmap the memory
3484 	map->Unmap(start, end);
3485 }
3486 
3487 
3488 void
3489 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3490 {
3491 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3492 	addr_t end = start + (size - 1);
3493 	addr_t lastEnd = start;
3494 
3495 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3496 		(void*)start, (void*)end));
3497 
3498 	// The areas are sorted in virtual address space order, so
3499 	// we just have to find the holes between them that fall
3500 	// into the area we should dispose
3501 
3502 	map->Lock();
3503 
3504 	for (VMAddressSpace::AreaIterator it
3505 				= VMAddressSpace::Kernel()->GetAreaIterator();
3506 			VMArea* area = it.Next();) {
3507 		addr_t areaStart = area->Base();
3508 		addr_t areaEnd = areaStart + (area->Size() - 1);
3509 
3510 		if (areaEnd < start)
3511 			continue;
3512 
3513 		if (areaStart > end) {
3514 			// we are done, the area is already beyond of what we have to free
3515 			break;
3516 		}
3517 
3518 		if (areaStart > lastEnd) {
3519 			// this is something we can free
3520 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3521 				(void*)areaStart));
3522 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3523 		}
3524 
3525 		if (areaEnd >= end) {
3526 			lastEnd = areaEnd;
3527 				// no +1 to prevent potential overflow
3528 			break;
3529 		}
3530 
3531 		lastEnd = areaEnd + 1;
3532 	}
3533 
3534 	if (lastEnd < end) {
3535 		// we can also get rid of some space at the end of the area
3536 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3537 			(void*)end));
3538 		unmap_and_free_physical_pages(map, lastEnd, end);
3539 	}
3540 
3541 	map->Unlock();
3542 }
3543 
3544 
3545 static void
3546 create_preloaded_image_areas(struct preloaded_image* _image)
3547 {
3548 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3549 	char name[B_OS_NAME_LENGTH];
3550 	void* address;
3551 	int32 length;
3552 
3553 	// use file name to create a good area name
3554 	char* fileName = strrchr(image->name, '/');
3555 	if (fileName == NULL)
3556 		fileName = image->name;
3557 	else
3558 		fileName++;
3559 
3560 	length = strlen(fileName);
3561 	// make sure there is enough space for the suffix
3562 	if (length > 25)
3563 		length = 25;
3564 
3565 	memcpy(name, fileName, length);
3566 	strcpy(name + length, "_text");
3567 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3568 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3569 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3570 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3571 		// this will later be remapped read-only/executable by the
3572 		// ELF initialization code
3573 
3574 	strcpy(name + length, "_data");
3575 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3576 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3577 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3578 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3579 }
3580 
3581 
3582 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3583 	Any boot loader resources contained in that arguments must not be accessed
3584 	anymore past this point.
3585 */
3586 void
3587 vm_free_kernel_args(kernel_args* args)
3588 {
3589 	uint32 i;
3590 
3591 	TRACE(("vm_free_kernel_args()\n"));
3592 
3593 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3594 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3595 		if (area >= B_OK)
3596 			delete_area(area);
3597 	}
3598 }
3599 
3600 
3601 static void
3602 allocate_kernel_args(kernel_args* args)
3603 {
3604 	TRACE(("allocate_kernel_args()\n"));
3605 
3606 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3607 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3608 
3609 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3610 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3611 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3612 	}
3613 }
3614 
3615 
3616 static void
3617 unreserve_boot_loader_ranges(kernel_args* args)
3618 {
3619 	TRACE(("unreserve_boot_loader_ranges()\n"));
3620 
3621 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3622 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3623 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3624 			args->virtual_allocated_range[i].size);
3625 	}
3626 }
3627 
3628 
3629 static void
3630 reserve_boot_loader_ranges(kernel_args* args)
3631 {
3632 	TRACE(("reserve_boot_loader_ranges()\n"));
3633 
3634 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3635 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3636 
3637 		// If the address is no kernel address, we just skip it. The
3638 		// architecture specific code has to deal with it.
3639 		if (!IS_KERNEL_ADDRESS(address)) {
3640 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3641 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3642 			continue;
3643 		}
3644 
3645 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3646 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3647 		if (status < B_OK)
3648 			panic("could not reserve boot loader ranges\n");
3649 	}
3650 }
3651 
3652 
3653 static addr_t
3654 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3655 {
3656 	size = PAGE_ALIGN(size);
3657 
3658 	// find a slot in the virtual allocation addr range
3659 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3660 		// check to see if the space between this one and the last is big enough
3661 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3662 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3663 			+ args->virtual_allocated_range[i - 1].size;
3664 
3665 		addr_t base = alignment > 0
3666 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3667 
3668 		if (base >= KERNEL_BASE && base < rangeStart
3669 				&& rangeStart - base >= size) {
3670 			args->virtual_allocated_range[i - 1].size
3671 				+= base + size - previousRangeEnd;
3672 			return base;
3673 		}
3674 	}
3675 
3676 	// we hadn't found one between allocation ranges. this is ok.
3677 	// see if there's a gap after the last one
3678 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3679 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3680 		+ args->virtual_allocated_range[lastEntryIndex].size;
3681 	addr_t base = alignment > 0
3682 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3683 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3684 		args->virtual_allocated_range[lastEntryIndex].size
3685 			+= base + size - lastRangeEnd;
3686 		return base;
3687 	}
3688 
3689 	// see if there's a gap before the first one
3690 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3691 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3692 		base = rangeStart - size;
3693 		if (alignment > 0)
3694 			base = ROUNDDOWN(base, alignment);
3695 
3696 		if (base >= KERNEL_BASE) {
3697 			args->virtual_allocated_range[0].start = base;
3698 			args->virtual_allocated_range[0].size += rangeStart - base;
3699 			return base;
3700 		}
3701 	}
3702 
3703 	return 0;
3704 }
3705 
3706 
3707 static bool
3708 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3709 {
3710 	// TODO: horrible brute-force method of determining if the page can be
3711 	// allocated
3712 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3713 		if (address >= args->physical_memory_range[i].start
3714 			&& address < args->physical_memory_range[i].start
3715 				+ args->physical_memory_range[i].size)
3716 			return true;
3717 	}
3718 	return false;
3719 }
3720 
3721 
3722 page_num_t
3723 vm_allocate_early_physical_page(kernel_args* args)
3724 {
3725 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3726 		phys_addr_t nextPage;
3727 
3728 		nextPage = args->physical_allocated_range[i].start
3729 			+ args->physical_allocated_range[i].size;
3730 		// see if the page after the next allocated paddr run can be allocated
3731 		if (i + 1 < args->num_physical_allocated_ranges
3732 			&& args->physical_allocated_range[i + 1].size != 0) {
3733 			// see if the next page will collide with the next allocated range
3734 			if (nextPage >= args->physical_allocated_range[i+1].start)
3735 				continue;
3736 		}
3737 		// see if the next physical page fits in the memory block
3738 		if (is_page_in_physical_memory_range(args, nextPage)) {
3739 			// we got one!
3740 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3741 			return nextPage / B_PAGE_SIZE;
3742 		}
3743 	}
3744 
3745 	return 0;
3746 		// could not allocate a block
3747 }
3748 
3749 
3750 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3751 	allocate some pages before the VM is completely up.
3752 */
3753 addr_t
3754 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3755 	uint32 attributes, addr_t alignment)
3756 {
3757 	if (physicalSize > virtualSize)
3758 		physicalSize = virtualSize;
3759 
3760 	// find the vaddr to allocate at
3761 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3762 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3763 
3764 	// map the pages
3765 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3766 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3767 		if (physicalAddress == 0)
3768 			panic("error allocating early page!\n");
3769 
3770 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3771 
3772 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3773 			physicalAddress * B_PAGE_SIZE, attributes,
3774 			&vm_allocate_early_physical_page);
3775 	}
3776 
3777 	return virtualBase;
3778 }
3779 
3780 
3781 /*!	The main entrance point to initialize the VM. */
3782 status_t
3783 vm_init(kernel_args* args)
3784 {
3785 	struct preloaded_image* image;
3786 	void* address;
3787 	status_t err = 0;
3788 	uint32 i;
3789 
3790 	TRACE(("vm_init: entry\n"));
3791 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3792 	err = arch_vm_init(args);
3793 
3794 	// initialize some globals
3795 	vm_page_init_num_pages(args);
3796 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3797 
3798 	slab_init(args);
3799 
3800 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3801 	size_t heapSize = INITIAL_HEAP_SIZE;
3802 	// try to accomodate low memory systems
3803 	while (heapSize > sAvailableMemory / 8)
3804 		heapSize /= 2;
3805 	if (heapSize < 1024 * 1024)
3806 		panic("vm_init: go buy some RAM please.");
3807 
3808 	// map in the new heap and initialize it
3809 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3810 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3811 	TRACE(("heap at 0x%lx\n", heapBase));
3812 	heap_init(heapBase, heapSize);
3813 #endif
3814 
3815 	// initialize the free page list and physical page mapper
3816 	vm_page_init(args);
3817 
3818 	// initialize the cache allocators
3819 	vm_cache_init(args);
3820 
3821 	{
3822 		status_t error = VMAreaHash::Init();
3823 		if (error != B_OK)
3824 			panic("vm_init: error initializing area hash table\n");
3825 	}
3826 
3827 	VMAddressSpace::Init();
3828 	reserve_boot_loader_ranges(args);
3829 
3830 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3831 	heap_init_post_area();
3832 #endif
3833 
3834 	// Do any further initialization that the architecture dependant layers may
3835 	// need now
3836 	arch_vm_translation_map_init_post_area(args);
3837 	arch_vm_init_post_area(args);
3838 	vm_page_init_post_area(args);
3839 	slab_init_post_area();
3840 
3841 	// allocate areas to represent stuff that already exists
3842 
3843 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3844 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3845 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3846 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3847 #endif
3848 
3849 	allocate_kernel_args(args);
3850 
3851 	create_preloaded_image_areas(args->kernel_image);
3852 
3853 	// allocate areas for preloaded images
3854 	for (image = args->preloaded_images; image != NULL; image = image->next)
3855 		create_preloaded_image_areas(image);
3856 
3857 	// allocate kernel stacks
3858 	for (i = 0; i < args->num_cpus; i++) {
3859 		char name[64];
3860 
3861 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
3862 		address = (void*)args->cpu_kstack[i].start;
3863 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3864 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3865 	}
3866 
3867 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3868 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3869 
3870 #if PARANOID_KERNEL_MALLOC
3871 	vm_block_address_range("uninitialized heap memory",
3872 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3873 #endif
3874 #if PARANOID_KERNEL_FREE
3875 	vm_block_address_range("freed heap memory",
3876 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3877 #endif
3878 
3879 	// create the object cache for the page mappings
3880 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3881 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3882 		NULL, NULL);
3883 	if (gPageMappingsObjectCache == NULL)
3884 		panic("failed to create page mappings object cache");
3885 
3886 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3887 
3888 #if DEBUG_CACHE_LIST
3889 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
3890 		virtual_address_restrictions virtualRestrictions = {};
3891 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
3892 		physical_address_restrictions physicalRestrictions = {};
3893 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
3894 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3895 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
3896 			CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions,
3897 			(void**)&sCacheInfoTable);
3898 	}
3899 #endif	// DEBUG_CACHE_LIST
3900 
3901 	// add some debugger commands
3902 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3903 	add_debugger_command("area", &dump_area,
3904 		"Dump info about a particular area");
3905 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3906 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3907 #if DEBUG_CACHE_LIST
3908 	if (sCacheInfoTable != NULL) {
3909 		add_debugger_command_etc("caches", &dump_caches,
3910 			"List all VMCache trees",
3911 			"[ \"-c\" ]\n"
3912 			"All cache trees are listed sorted in decreasing order by number "
3913 				"of\n"
3914 			"used pages or, if \"-c\" is specified, by size of committed "
3915 				"memory.\n",
3916 			0);
3917 	}
3918 #endif
3919 	add_debugger_command("avail", &dump_available_memory,
3920 		"Dump available memory");
3921 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3922 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3923 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3924 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3925 	add_debugger_command("string", &display_mem, "dump strings");
3926 
3927 	TRACE(("vm_init: exit\n"));
3928 
3929 	vm_cache_init_post_heap();
3930 
3931 	return err;
3932 }
3933 
3934 
3935 status_t
3936 vm_init_post_sem(kernel_args* args)
3937 {
3938 	// This frees all unused boot loader resources and makes its space available
3939 	// again
3940 	arch_vm_init_end(args);
3941 	unreserve_boot_loader_ranges(args);
3942 
3943 	// fill in all of the semaphores that were not allocated before
3944 	// since we're still single threaded and only the kernel address space
3945 	// exists, it isn't that hard to find all of the ones we need to create
3946 
3947 	arch_vm_translation_map_init_post_sem(args);
3948 
3949 	slab_init_post_sem();
3950 
3951 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3952 	heap_init_post_sem();
3953 #endif
3954 
3955 	return B_OK;
3956 }
3957 
3958 
3959 status_t
3960 vm_init_post_thread(kernel_args* args)
3961 {
3962 	vm_page_init_post_thread(args);
3963 	slab_init_post_thread();
3964 	return heap_init_post_thread();
3965 }
3966 
3967 
3968 status_t
3969 vm_init_post_modules(kernel_args* args)
3970 {
3971 	return arch_vm_init_post_modules(args);
3972 }
3973 
3974 
3975 void
3976 permit_page_faults(void)
3977 {
3978 	Thread* thread = thread_get_current_thread();
3979 	if (thread != NULL)
3980 		atomic_add(&thread->page_faults_allowed, 1);
3981 }
3982 
3983 
3984 void
3985 forbid_page_faults(void)
3986 {
3987 	Thread* thread = thread_get_current_thread();
3988 	if (thread != NULL)
3989 		atomic_add(&thread->page_faults_allowed, -1);
3990 }
3991 
3992 
3993 status_t
3994 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3995 	addr_t* newIP)
3996 {
3997 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3998 		faultAddress));
3999 
4000 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4001 
4002 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4003 	VMAddressSpace* addressSpace = NULL;
4004 
4005 	status_t status = B_OK;
4006 	*newIP = 0;
4007 	atomic_add((int32*)&sPageFaults, 1);
4008 
4009 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4010 		addressSpace = VMAddressSpace::GetKernel();
4011 	} else if (IS_USER_ADDRESS(pageAddress)) {
4012 		addressSpace = VMAddressSpace::GetCurrent();
4013 		if (addressSpace == NULL) {
4014 			if (!isUser) {
4015 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4016 					"memory!\n");
4017 				status = B_BAD_ADDRESS;
4018 				TPF(PageFaultError(-1,
4019 					VMPageFaultTracing
4020 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4021 			} else {
4022 				// XXX weird state.
4023 				panic("vm_page_fault: non kernel thread accessing user memory "
4024 					"that doesn't exist!\n");
4025 				status = B_BAD_ADDRESS;
4026 			}
4027 		}
4028 	} else {
4029 		// the hit was probably in the 64k DMZ between kernel and user space
4030 		// this keeps a user space thread from passing a buffer that crosses
4031 		// into kernel space
4032 		status = B_BAD_ADDRESS;
4033 		TPF(PageFaultError(-1,
4034 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4035 	}
4036 
4037 	if (status == B_OK) {
4038 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser,
4039 			NULL);
4040 	}
4041 
4042 	if (status < B_OK) {
4043 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4044 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4045 			strerror(status), address, faultAddress, isWrite, isUser,
4046 			thread_get_current_thread_id());
4047 		if (!isUser) {
4048 			Thread* thread = thread_get_current_thread();
4049 			if (thread != NULL && thread->fault_handler != 0) {
4050 				// this will cause the arch dependant page fault handler to
4051 				// modify the IP on the interrupt frame or whatever to return
4052 				// to this address
4053 				*newIP = thread->fault_handler;
4054 			} else {
4055 				// unhandled page fault in the kernel
4056 				panic("vm_page_fault: unhandled page fault in kernel space at "
4057 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4058 			}
4059 		} else {
4060 #if 1
4061 			// TODO: remove me once we have proper userland debugging support
4062 			// (and tools)
4063 			VMArea* area = NULL;
4064 			if (addressSpace != NULL) {
4065 				addressSpace->ReadLock();
4066 				area = addressSpace->LookupArea(faultAddress);
4067 			}
4068 
4069 			Thread* thread = thread_get_current_thread();
4070 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4071 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4072 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4073 				thread->team->Name(), thread->team->id,
4074 				isWrite ? "write" : "read", address, faultAddress,
4075 				area ? area->name : "???", faultAddress - (area ?
4076 					area->Base() : 0x0));
4077 
4078 			// We can print a stack trace of the userland thread here.
4079 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4080 // fault and someone is already waiting for a write lock on the same address
4081 // space. This thread will then try to acquire the lock again and will
4082 // be queued after the writer.
4083 #	if 0
4084 			if (area) {
4085 				struct stack_frame {
4086 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4087 						struct stack_frame*	previous;
4088 						void*				return_address;
4089 					#else
4090 						// ...
4091 					#warning writeme
4092 					#endif
4093 				} frame;
4094 #		ifdef __INTEL__
4095 				struct iframe* iframe = x86_get_user_iframe();
4096 				if (iframe == NULL)
4097 					panic("iframe is NULL!");
4098 
4099 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4100 					sizeof(struct stack_frame));
4101 #		elif defined(__POWERPC__)
4102 				struct iframe* iframe = ppc_get_user_iframe();
4103 				if (iframe == NULL)
4104 					panic("iframe is NULL!");
4105 
4106 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4107 					sizeof(struct stack_frame));
4108 #		else
4109 #			warning "vm_page_fault() stack trace won't work"
4110 				status = B_ERROR;
4111 #		endif
4112 
4113 				dprintf("stack trace:\n");
4114 				int32 maxFrames = 50;
4115 				while (status == B_OK && --maxFrames >= 0
4116 						&& frame.return_address != NULL) {
4117 					dprintf("  %p", frame.return_address);
4118 					area = addressSpace->LookupArea(
4119 						(addr_t)frame.return_address);
4120 					if (area) {
4121 						dprintf(" (%s + %#lx)", area->name,
4122 							(addr_t)frame.return_address - area->Base());
4123 					}
4124 					dprintf("\n");
4125 
4126 					status = user_memcpy(&frame, frame.previous,
4127 						sizeof(struct stack_frame));
4128 				}
4129 			}
4130 #	endif	// 0 (stack trace)
4131 
4132 			if (addressSpace != NULL)
4133 				addressSpace->ReadUnlock();
4134 #endif
4135 
4136 			// If the thread has a signal handler for SIGSEGV, we simply
4137 			// send it the signal. Otherwise we notify the user debugger
4138 			// first.
4139 			struct sigaction action;
4140 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4141 					&& action.sa_handler != SIG_DFL
4142 					&& action.sa_handler != SIG_IGN)
4143 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4144 					SIGSEGV)) {
4145 				Signal signal(SIGSEGV,
4146 					status == B_PERMISSION_DENIED
4147 						? SEGV_ACCERR : SEGV_MAPERR,
4148 					EFAULT, thread->team->id);
4149 				signal.SetAddress((void*)address);
4150 				send_signal_to_thread(thread, signal, 0);
4151 			}
4152 		}
4153 	}
4154 
4155 	if (addressSpace != NULL)
4156 		addressSpace->Put();
4157 
4158 	return B_HANDLED_INTERRUPT;
4159 }
4160 
4161 
4162 struct PageFaultContext {
4163 	AddressSpaceReadLocker	addressSpaceLocker;
4164 	VMCacheChainLocker		cacheChainLocker;
4165 
4166 	VMTranslationMap*		map;
4167 	VMCache*				topCache;
4168 	off_t					cacheOffset;
4169 	vm_page_reservation		reservation;
4170 	bool					isWrite;
4171 
4172 	// return values
4173 	vm_page*				page;
4174 	bool					restart;
4175 
4176 
4177 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4178 		:
4179 		addressSpaceLocker(addressSpace, true),
4180 		map(addressSpace->TranslationMap()),
4181 		isWrite(isWrite)
4182 	{
4183 	}
4184 
4185 	~PageFaultContext()
4186 	{
4187 		UnlockAll();
4188 		vm_page_unreserve_pages(&reservation);
4189 	}
4190 
4191 	void Prepare(VMCache* topCache, off_t cacheOffset)
4192 	{
4193 		this->topCache = topCache;
4194 		this->cacheOffset = cacheOffset;
4195 		page = NULL;
4196 		restart = false;
4197 
4198 		cacheChainLocker.SetTo(topCache);
4199 	}
4200 
4201 	void UnlockAll(VMCache* exceptCache = NULL)
4202 	{
4203 		topCache = NULL;
4204 		addressSpaceLocker.Unlock();
4205 		cacheChainLocker.Unlock(exceptCache);
4206 	}
4207 };
4208 
4209 
4210 /*!	Gets the page that should be mapped into the area.
4211 	Returns an error code other than \c B_OK, if the page couldn't be found or
4212 	paged in. The locking state of the address space and the caches is undefined
4213 	in that case.
4214 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4215 	had to unlock the address space and all caches and is supposed to be called
4216 	again.
4217 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4218 	found. It is returned in \c context.page. The address space will still be
4219 	locked as well as all caches starting from the top cache to at least the
4220 	cache the page lives in.
4221 */
4222 static status_t
4223 fault_get_page(PageFaultContext& context)
4224 {
4225 	VMCache* cache = context.topCache;
4226 	VMCache* lastCache = NULL;
4227 	vm_page* page = NULL;
4228 
4229 	while (cache != NULL) {
4230 		// We already hold the lock of the cache at this point.
4231 
4232 		lastCache = cache;
4233 
4234 		page = cache->LookupPage(context.cacheOffset);
4235 		if (page != NULL && page->busy) {
4236 			// page must be busy -- wait for it to become unbusy
4237 			context.UnlockAll(cache);
4238 			cache->ReleaseRefLocked();
4239 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4240 
4241 			// restart the whole process
4242 			context.restart = true;
4243 			return B_OK;
4244 		}
4245 
4246 		if (page != NULL)
4247 			break;
4248 
4249 		// The current cache does not contain the page we're looking for.
4250 
4251 		// see if the backing store has it
4252 		if (cache->HasPage(context.cacheOffset)) {
4253 			// insert a fresh page and mark it busy -- we're going to read it in
4254 			page = vm_page_allocate_page(&context.reservation,
4255 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4256 			cache->InsertPage(page, context.cacheOffset);
4257 
4258 			// We need to unlock all caches and the address space while reading
4259 			// the page in. Keep a reference to the cache around.
4260 			cache->AcquireRefLocked();
4261 			context.UnlockAll();
4262 
4263 			// read the page in
4264 			generic_io_vec vec;
4265 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4266 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4267 
4268 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4269 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4270 
4271 			cache->Lock();
4272 
4273 			if (status < B_OK) {
4274 				// on error remove and free the page
4275 				dprintf("reading page from cache %p returned: %s!\n",
4276 					cache, strerror(status));
4277 
4278 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4279 				cache->RemovePage(page);
4280 				vm_page_set_state(page, PAGE_STATE_FREE);
4281 
4282 				cache->ReleaseRefAndUnlock();
4283 				return status;
4284 			}
4285 
4286 			// mark the page unbusy again
4287 			cache->MarkPageUnbusy(page);
4288 
4289 			DEBUG_PAGE_ACCESS_END(page);
4290 
4291 			// Since we needed to unlock everything temporarily, the area
4292 			// situation might have changed. So we need to restart the whole
4293 			// process.
4294 			cache->ReleaseRefAndUnlock();
4295 			context.restart = true;
4296 			return B_OK;
4297 		}
4298 
4299 		cache = context.cacheChainLocker.LockSourceCache();
4300 	}
4301 
4302 	if (page == NULL) {
4303 		// There was no adequate page, determine the cache for a clean one.
4304 		// Read-only pages come in the deepest cache, only the top most cache
4305 		// may have direct write access.
4306 		cache = context.isWrite ? context.topCache : lastCache;
4307 
4308 		// allocate a clean page
4309 		page = vm_page_allocate_page(&context.reservation,
4310 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4311 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4312 			page->physical_page_number));
4313 
4314 		// insert the new page into our cache
4315 		cache->InsertPage(page, context.cacheOffset);
4316 	} else if (page->Cache() != context.topCache && context.isWrite) {
4317 		// We have a page that has the data we want, but in the wrong cache
4318 		// object so we need to copy it and stick it into the top cache.
4319 		vm_page* sourcePage = page;
4320 
4321 		// TODO: If memory is low, it might be a good idea to steal the page
4322 		// from our source cache -- if possible, that is.
4323 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4324 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4325 
4326 		// To not needlessly kill concurrency we unlock all caches but the top
4327 		// one while copying the page. Lacking another mechanism to ensure that
4328 		// the source page doesn't disappear, we mark it busy.
4329 		sourcePage->busy = true;
4330 		context.cacheChainLocker.UnlockKeepRefs(true);
4331 
4332 		// copy the page
4333 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4334 			sourcePage->physical_page_number * B_PAGE_SIZE);
4335 
4336 		context.cacheChainLocker.RelockCaches(true);
4337 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4338 
4339 		// insert the new page into our cache
4340 		context.topCache->InsertPage(page, context.cacheOffset);
4341 	} else
4342 		DEBUG_PAGE_ACCESS_START(page);
4343 
4344 	context.page = page;
4345 	return B_OK;
4346 }
4347 
4348 
4349 /*!	Makes sure the address in the given address space is mapped.
4350 
4351 	\param addressSpace The address space.
4352 	\param originalAddress The address. Doesn't need to be page aligned.
4353 	\param isWrite If \c true the address shall be write-accessible.
4354 	\param isUser If \c true the access is requested by a userland team.
4355 	\param wirePage On success, if non \c NULL, the wired count of the page
4356 		mapped at the given address is incremented and the page is returned
4357 		via this parameter.
4358 	\param wiredRange If given, this wiredRange is ignored when checking whether
4359 		an already mapped page at the virtual address can be unmapped.
4360 	\return \c B_OK on success, another error code otherwise.
4361 */
4362 static status_t
4363 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4364 	bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange)
4365 {
4366 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4367 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4368 		originalAddress, isWrite, isUser));
4369 
4370 	PageFaultContext context(addressSpace, isWrite);
4371 
4372 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4373 	status_t status = B_OK;
4374 
4375 	addressSpace->IncrementFaultCount();
4376 
4377 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4378 	// the pages upfront makes sure we don't have any cache locked, so that the
4379 	// page daemon/thief can do their job without problems.
4380 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4381 		originalAddress);
4382 	context.addressSpaceLocker.Unlock();
4383 	vm_page_reserve_pages(&context.reservation, reservePages,
4384 		addressSpace == VMAddressSpace::Kernel()
4385 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4386 
4387 	while (true) {
4388 		context.addressSpaceLocker.Lock();
4389 
4390 		// get the area the fault was in
4391 		VMArea* area = addressSpace->LookupArea(address);
4392 		if (area == NULL) {
4393 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4394 				"space\n", originalAddress);
4395 			TPF(PageFaultError(-1,
4396 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4397 			status = B_BAD_ADDRESS;
4398 			break;
4399 		}
4400 
4401 		// check permissions
4402 		uint32 protection = get_area_page_protection(area, address);
4403 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4404 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4405 				area->id, (void*)originalAddress);
4406 			TPF(PageFaultError(area->id,
4407 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4408 			status = B_PERMISSION_DENIED;
4409 			break;
4410 		}
4411 		if (isWrite && (protection
4412 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4413 			dprintf("write access attempted on write-protected area 0x%"
4414 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4415 			TPF(PageFaultError(area->id,
4416 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4417 			status = B_PERMISSION_DENIED;
4418 			break;
4419 		} else if (!isWrite && (protection
4420 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4421 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4422 				" at %p\n", area->id, (void*)originalAddress);
4423 			TPF(PageFaultError(area->id,
4424 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4425 			status = B_PERMISSION_DENIED;
4426 			break;
4427 		}
4428 
4429 		// We have the area, it was a valid access, so let's try to resolve the
4430 		// page fault now.
4431 		// At first, the top most cache from the area is investigated.
4432 
4433 		context.Prepare(vm_area_get_locked_cache(area),
4434 			address - area->Base() + area->cache_offset);
4435 
4436 		// See if this cache has a fault handler -- this will do all the work
4437 		// for us.
4438 		{
4439 			// Note, since the page fault is resolved with interrupts enabled,
4440 			// the fault handler could be called more than once for the same
4441 			// reason -- the store must take this into account.
4442 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4443 			if (status != B_BAD_HANDLER)
4444 				break;
4445 		}
4446 
4447 		// The top most cache has no fault handler, so let's see if the cache or
4448 		// its sources already have the page we're searching for (we're going
4449 		// from top to bottom).
4450 		status = fault_get_page(context);
4451 		if (status != B_OK) {
4452 			TPF(PageFaultError(area->id, status));
4453 			break;
4454 		}
4455 
4456 		if (context.restart)
4457 			continue;
4458 
4459 		// All went fine, all there is left to do is to map the page into the
4460 		// address space.
4461 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4462 			context.page));
4463 
4464 		// If the page doesn't reside in the area's cache, we need to make sure
4465 		// it's mapped in read-only, so that we cannot overwrite someone else's
4466 		// data (copy-on-write)
4467 		uint32 newProtection = protection;
4468 		if (context.page->Cache() != context.topCache && !isWrite)
4469 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4470 
4471 		bool unmapPage = false;
4472 		bool mapPage = true;
4473 
4474 		// check whether there's already a page mapped at the address
4475 		context.map->Lock();
4476 
4477 		phys_addr_t physicalAddress;
4478 		uint32 flags;
4479 		vm_page* mappedPage = NULL;
4480 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4481 			&& (flags & PAGE_PRESENT) != 0
4482 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4483 				!= NULL) {
4484 			// Yep there's already a page. If it's ours, we can simply adjust
4485 			// its protection. Otherwise we have to unmap it.
4486 			if (mappedPage == context.page) {
4487 				context.map->ProtectPage(area, address, newProtection);
4488 					// Note: We assume that ProtectPage() is atomic (i.e.
4489 					// the page isn't temporarily unmapped), otherwise we'd have
4490 					// to make sure it isn't wired.
4491 				mapPage = false;
4492 			} else
4493 				unmapPage = true;
4494 		}
4495 
4496 		context.map->Unlock();
4497 
4498 		if (unmapPage) {
4499 			// If the page is wired, we can't unmap it. Wait until it is unwired
4500 			// again and restart.
4501 			VMAreaUnwiredWaiter waiter;
4502 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4503 					wiredRange)) {
4504 				// unlock everything and wait
4505 				context.UnlockAll();
4506 				waiter.waitEntry.Wait();
4507 				continue;
4508 			}
4509 
4510 			// Note: The mapped page is a page of a lower cache. We are
4511 			// guaranteed to have that cached locked, our new page is a copy of
4512 			// that page, and the page is not busy. The logic for that guarantee
4513 			// is as follows: Since the page is mapped, it must live in the top
4514 			// cache (ruled out above) or any of its lower caches, and there is
4515 			// (was before the new page was inserted) no other page in any
4516 			// cache between the top cache and the page's cache (otherwise that
4517 			// would be mapped instead). That in turn means that our algorithm
4518 			// must have found it and therefore it cannot be busy either.
4519 			DEBUG_PAGE_ACCESS_START(mappedPage);
4520 			unmap_page(area, address);
4521 			DEBUG_PAGE_ACCESS_END(mappedPage);
4522 		}
4523 
4524 		if (mapPage) {
4525 			if (map_page(area, context.page, address, newProtection,
4526 					&context.reservation) != B_OK) {
4527 				// Mapping can only fail, when the page mapping object couldn't
4528 				// be allocated. Save for the missing mapping everything is
4529 				// fine, though. If this was a regular page fault, we'll simply
4530 				// leave and probably fault again. To make sure we'll have more
4531 				// luck then, we ensure that the minimum object reserve is
4532 				// available.
4533 				DEBUG_PAGE_ACCESS_END(context.page);
4534 
4535 				context.UnlockAll();
4536 
4537 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4538 						!= B_OK) {
4539 					// Apparently the situation is serious. Let's get ourselves
4540 					// killed.
4541 					status = B_NO_MEMORY;
4542 				} else if (wirePage != NULL) {
4543 					// The caller expects us to wire the page. Since
4544 					// object_cache_reserve() succeeded, we should now be able
4545 					// to allocate a mapping structure. Restart.
4546 					continue;
4547 				}
4548 
4549 				break;
4550 			}
4551 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4552 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4553 
4554 		// also wire the page, if requested
4555 		if (wirePage != NULL && status == B_OK) {
4556 			increment_page_wired_count(context.page);
4557 			*wirePage = context.page;
4558 		}
4559 
4560 		DEBUG_PAGE_ACCESS_END(context.page);
4561 
4562 		break;
4563 	}
4564 
4565 	return status;
4566 }
4567 
4568 
4569 status_t
4570 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4571 {
4572 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4573 }
4574 
4575 status_t
4576 vm_put_physical_page(addr_t vaddr, void* handle)
4577 {
4578 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4579 }
4580 
4581 
4582 status_t
4583 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4584 	void** _handle)
4585 {
4586 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4587 }
4588 
4589 status_t
4590 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4591 {
4592 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4593 }
4594 
4595 
4596 status_t
4597 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4598 {
4599 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4600 }
4601 
4602 status_t
4603 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4604 {
4605 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4606 }
4607 
4608 
4609 void
4610 vm_get_info(system_memory_info* info)
4611 {
4612 	swap_get_info(info);
4613 
4614 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4615 	info->page_faults = sPageFaults;
4616 
4617 	MutexLocker locker(sAvailableMemoryLock);
4618 	info->free_memory = sAvailableMemory;
4619 	info->needed_memory = sNeededMemory;
4620 }
4621 
4622 
4623 uint32
4624 vm_num_page_faults(void)
4625 {
4626 	return sPageFaults;
4627 }
4628 
4629 
4630 off_t
4631 vm_available_memory(void)
4632 {
4633 	MutexLocker locker(sAvailableMemoryLock);
4634 	return sAvailableMemory;
4635 }
4636 
4637 
4638 off_t
4639 vm_available_not_needed_memory(void)
4640 {
4641 	MutexLocker locker(sAvailableMemoryLock);
4642 	return sAvailableMemory - sNeededMemory;
4643 }
4644 
4645 
4646 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4647 	debugger.
4648 */
4649 off_t
4650 vm_available_not_needed_memory_debug(void)
4651 {
4652 	return sAvailableMemory - sNeededMemory;
4653 }
4654 
4655 
4656 size_t
4657 vm_kernel_address_space_left(void)
4658 {
4659 	return VMAddressSpace::Kernel()->FreeSpace();
4660 }
4661 
4662 
4663 void
4664 vm_unreserve_memory(size_t amount)
4665 {
4666 	mutex_lock(&sAvailableMemoryLock);
4667 
4668 	sAvailableMemory += amount;
4669 
4670 	mutex_unlock(&sAvailableMemoryLock);
4671 }
4672 
4673 
4674 status_t
4675 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4676 {
4677 	size_t reserve = kMemoryReserveForPriority[priority];
4678 
4679 	MutexLocker locker(sAvailableMemoryLock);
4680 
4681 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4682 
4683 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4684 		sAvailableMemory -= amount;
4685 		return B_OK;
4686 	}
4687 
4688 	if (timeout <= 0)
4689 		return B_NO_MEMORY;
4690 
4691 	// turn timeout into an absolute timeout
4692 	timeout += system_time();
4693 
4694 	// loop until we've got the memory or the timeout occurs
4695 	do {
4696 		sNeededMemory += amount;
4697 
4698 		// call the low resource manager
4699 		locker.Unlock();
4700 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4701 			B_ABSOLUTE_TIMEOUT, timeout);
4702 		locker.Lock();
4703 
4704 		sNeededMemory -= amount;
4705 
4706 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4707 			sAvailableMemory -= amount;
4708 			return B_OK;
4709 		}
4710 	} while (timeout > system_time());
4711 
4712 	return B_NO_MEMORY;
4713 }
4714 
4715 
4716 status_t
4717 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4718 {
4719 	// NOTE: The caller is responsible for synchronizing calls to this function!
4720 
4721 	AddressSpaceReadLocker locker;
4722 	VMArea* area;
4723 	status_t status = locker.SetFromArea(id, area);
4724 	if (status != B_OK)
4725 		return status;
4726 
4727 	// nothing to do, if the type doesn't change
4728 	uint32 oldType = area->MemoryType();
4729 	if (type == oldType)
4730 		return B_OK;
4731 
4732 	// set the memory type of the area and the mapped pages
4733 	VMTranslationMap* map = area->address_space->TranslationMap();
4734 	map->Lock();
4735 	area->SetMemoryType(type);
4736 	map->ProtectArea(area, area->protection);
4737 	map->Unlock();
4738 
4739 	// set the physical memory type
4740 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4741 	if (error != B_OK) {
4742 		// reset the memory type of the area and the mapped pages
4743 		map->Lock();
4744 		area->SetMemoryType(oldType);
4745 		map->ProtectArea(area, area->protection);
4746 		map->Unlock();
4747 		return error;
4748 	}
4749 
4750 	return B_OK;
4751 
4752 }
4753 
4754 
4755 /*!	This function enforces some protection properties:
4756 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4757 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4758 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4759 	   and B_KERNEL_WRITE_AREA.
4760 */
4761 static void
4762 fix_protection(uint32* protection)
4763 {
4764 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4765 		if ((*protection & B_USER_PROTECTION) == 0
4766 			|| (*protection & B_WRITE_AREA) != 0)
4767 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4768 		else
4769 			*protection |= B_KERNEL_READ_AREA;
4770 	}
4771 }
4772 
4773 
4774 static void
4775 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4776 {
4777 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4778 	info->area = area->id;
4779 	info->address = (void*)area->Base();
4780 	info->size = area->Size();
4781 	info->protection = area->protection;
4782 	info->lock = B_FULL_LOCK;
4783 	info->team = area->address_space->ID();
4784 	info->copy_count = 0;
4785 	info->in_count = 0;
4786 	info->out_count = 0;
4787 		// TODO: retrieve real values here!
4788 
4789 	VMCache* cache = vm_area_get_locked_cache(area);
4790 
4791 	// Note, this is a simplification; the cache could be larger than this area
4792 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4793 
4794 	vm_area_put_locked_cache(cache);
4795 }
4796 
4797 
4798 static status_t
4799 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4800 {
4801 	// is newSize a multiple of B_PAGE_SIZE?
4802 	if (newSize & (B_PAGE_SIZE - 1))
4803 		return B_BAD_VALUE;
4804 
4805 	// lock all affected address spaces and the cache
4806 	VMArea* area;
4807 	VMCache* cache;
4808 
4809 	MultiAddressSpaceLocker locker;
4810 	AreaCacheLocker cacheLocker;
4811 
4812 	status_t status;
4813 	size_t oldSize;
4814 	bool anyKernelArea;
4815 	bool restart;
4816 
4817 	do {
4818 		anyKernelArea = false;
4819 		restart = false;
4820 
4821 		locker.Unset();
4822 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4823 		if (status != B_OK)
4824 			return status;
4825 		cacheLocker.SetTo(cache, true);	// already locked
4826 
4827 		// enforce restrictions
4828 		if (!kernel) {
4829 			if ((area->protection & B_KERNEL_AREA) != 0)
4830 				return B_NOT_ALLOWED;
4831 			// TODO: Enforce all restrictions (team, etc.)!
4832 		}
4833 
4834 		oldSize = area->Size();
4835 		if (newSize == oldSize)
4836 			return B_OK;
4837 
4838 		if (cache->type != CACHE_TYPE_RAM)
4839 			return B_NOT_ALLOWED;
4840 
4841 		if (oldSize < newSize) {
4842 			// We need to check if all areas of this cache can be resized.
4843 			for (VMArea* current = cache->areas; current != NULL;
4844 					current = current->cache_next) {
4845 				if (!current->address_space->CanResizeArea(current, newSize))
4846 					return B_ERROR;
4847 				anyKernelArea
4848 					|= current->address_space == VMAddressSpace::Kernel();
4849 			}
4850 		} else {
4851 			// We're shrinking the areas, so we must make sure the affected
4852 			// ranges are not wired.
4853 			for (VMArea* current = cache->areas; current != NULL;
4854 					current = current->cache_next) {
4855 				anyKernelArea
4856 					|= current->address_space == VMAddressSpace::Kernel();
4857 
4858 				if (wait_if_area_range_is_wired(current,
4859 						current->Base() + newSize, oldSize - newSize, &locker,
4860 						&cacheLocker)) {
4861 					restart = true;
4862 					break;
4863 				}
4864 			}
4865 		}
4866 	} while (restart);
4867 
4868 	// Okay, looks good so far, so let's do it
4869 
4870 	int priority = kernel && anyKernelArea
4871 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4872 	uint32 allocationFlags = kernel && anyKernelArea
4873 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4874 
4875 	if (oldSize < newSize) {
4876 		// Growing the cache can fail, so we do it first.
4877 		status = cache->Resize(cache->virtual_base + newSize, priority);
4878 		if (status != B_OK)
4879 			return status;
4880 	}
4881 
4882 	for (VMArea* current = cache->areas; current != NULL;
4883 			current = current->cache_next) {
4884 		status = current->address_space->ResizeArea(current, newSize,
4885 			allocationFlags);
4886 		if (status != B_OK)
4887 			break;
4888 
4889 		// We also need to unmap all pages beyond the new size, if the area has
4890 		// shrunk
4891 		if (newSize < oldSize) {
4892 			VMCacheChainLocker cacheChainLocker(cache);
4893 			cacheChainLocker.LockAllSourceCaches();
4894 
4895 			unmap_pages(current, current->Base() + newSize,
4896 				oldSize - newSize);
4897 
4898 			cacheChainLocker.Unlock(cache);
4899 		}
4900 	}
4901 
4902 	if (status == B_OK) {
4903 		// Shrink or grow individual page protections if in use.
4904 		if (area->page_protections != NULL) {
4905 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
4906 			uint8* newProtections
4907 				= (uint8*)realloc(area->page_protections, bytes);
4908 			if (newProtections == NULL)
4909 				status = B_NO_MEMORY;
4910 			else {
4911 				area->page_protections = newProtections;
4912 
4913 				if (oldSize < newSize) {
4914 					// init the additional page protections to that of the area
4915 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
4916 					uint32 areaProtection = area->protection
4917 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4918 					memset(area->page_protections + offset,
4919 						areaProtection | (areaProtection << 4), bytes - offset);
4920 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4921 						uint8& entry = area->page_protections[offset - 1];
4922 						entry = (entry & 0x0f) | (areaProtection << 4);
4923 					}
4924 				}
4925 			}
4926 		}
4927 	}
4928 
4929 	// shrinking the cache can't fail, so we do it now
4930 	if (status == B_OK && newSize < oldSize)
4931 		status = cache->Resize(cache->virtual_base + newSize, priority);
4932 
4933 	if (status != B_OK) {
4934 		// Something failed -- resize the areas back to their original size.
4935 		// This can fail, too, in which case we're seriously screwed.
4936 		for (VMArea* current = cache->areas; current != NULL;
4937 				current = current->cache_next) {
4938 			if (current->address_space->ResizeArea(current, oldSize,
4939 					allocationFlags) != B_OK) {
4940 				panic("vm_resize_area(): Failed and not being able to restore "
4941 					"original state.");
4942 			}
4943 		}
4944 
4945 		cache->Resize(cache->virtual_base + oldSize, priority);
4946 	}
4947 
4948 	// TODO: we must honour the lock restrictions of this area
4949 	return status;
4950 }
4951 
4952 
4953 status_t
4954 vm_memset_physical(phys_addr_t address, int value, size_t length)
4955 {
4956 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4957 }
4958 
4959 
4960 status_t
4961 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
4962 {
4963 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4964 }
4965 
4966 
4967 status_t
4968 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
4969 	bool user)
4970 {
4971 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4972 }
4973 
4974 
4975 void
4976 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
4977 {
4978 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4979 }
4980 
4981 
4982 /*!	Copies a range of memory directly from/to a page that might not be mapped
4983 	at the moment.
4984 
4985 	For \a unsafeMemory the current mapping (if any is ignored). The function
4986 	walks through the respective area's cache chain to find the physical page
4987 	and copies from/to it directly.
4988 	The memory range starting at \a unsafeMemory with a length of \a size bytes
4989 	must not cross a page boundary.
4990 
4991 	\param teamID The team ID identifying the address space \a unsafeMemory is
4992 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
4993 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
4994 		is passed, the address space of the thread returned by
4995 		debug_get_debugged_thread() is used.
4996 	\param unsafeMemory The start of the unsafe memory range to be copied
4997 		from/to.
4998 	\param buffer A safely accessible kernel buffer to be copied from/to.
4999 	\param size The number of bytes to be copied.
5000 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5001 		\a unsafeMemory, the other way around otherwise.
5002 */
5003 status_t
5004 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5005 	size_t size, bool copyToUnsafe)
5006 {
5007 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5008 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5009 		return B_BAD_VALUE;
5010 	}
5011 
5012 	// get the address space for the debugged thread
5013 	VMAddressSpace* addressSpace;
5014 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5015 		addressSpace = VMAddressSpace::Kernel();
5016 	} else if (teamID == B_CURRENT_TEAM) {
5017 		Thread* thread = debug_get_debugged_thread();
5018 		if (thread == NULL || thread->team == NULL)
5019 			return B_BAD_ADDRESS;
5020 
5021 		addressSpace = thread->team->address_space;
5022 	} else
5023 		addressSpace = VMAddressSpace::DebugGet(teamID);
5024 
5025 	if (addressSpace == NULL)
5026 		return B_BAD_ADDRESS;
5027 
5028 	// get the area
5029 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5030 	if (area == NULL)
5031 		return B_BAD_ADDRESS;
5032 
5033 	// search the page
5034 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5035 		+ area->cache_offset;
5036 	VMCache* cache = area->cache;
5037 	vm_page* page = NULL;
5038 	while (cache != NULL) {
5039 		page = cache->DebugLookupPage(cacheOffset);
5040 		if (page != NULL)
5041 			break;
5042 
5043 		// Page not found in this cache -- if it is paged out, we must not try
5044 		// to get it from lower caches.
5045 		if (cache->DebugHasPage(cacheOffset))
5046 			break;
5047 
5048 		cache = cache->source;
5049 	}
5050 
5051 	if (page == NULL)
5052 		return B_UNSUPPORTED;
5053 
5054 	// copy from/to physical memory
5055 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5056 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5057 
5058 	if (copyToUnsafe) {
5059 		if (page->Cache() != area->cache)
5060 			return B_UNSUPPORTED;
5061 
5062 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5063 	}
5064 
5065 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5066 }
5067 
5068 
5069 //	#pragma mark - kernel public API
5070 
5071 
5072 status_t
5073 user_memcpy(void* to, const void* from, size_t size)
5074 {
5075 	// don't allow address overflows
5076 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5077 		return B_BAD_ADDRESS;
5078 
5079 	if (arch_cpu_user_memcpy(to, from, size,
5080 			&thread_get_current_thread()->fault_handler) < B_OK)
5081 		return B_BAD_ADDRESS;
5082 
5083 	return B_OK;
5084 }
5085 
5086 
5087 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5088 	the string in \a to, NULL-terminating the result.
5089 
5090 	\param to Pointer to the destination C-string.
5091 	\param from Pointer to the source C-string.
5092 	\param size Size in bytes of the string buffer pointed to by \a to.
5093 
5094 	\return strlen(\a from).
5095 */
5096 ssize_t
5097 user_strlcpy(char* to, const char* from, size_t size)
5098 {
5099 	if (to == NULL && size != 0)
5100 		return B_BAD_VALUE;
5101 	if (from == NULL)
5102 		return B_BAD_ADDRESS;
5103 
5104 	// limit size to avoid address overflows
5105 	size_t maxSize = std::min(size,
5106 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5107 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5108 		// the source address might still overflow.
5109 
5110 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
5111 		&thread_get_current_thread()->fault_handler);
5112 
5113 	// If we hit the address overflow boundary, fail.
5114 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5115 			&& maxSize < size)) {
5116 		return B_BAD_ADDRESS;
5117 	}
5118 
5119 	return result;
5120 }
5121 
5122 
5123 status_t
5124 user_memset(void* s, char c, size_t count)
5125 {
5126 	// don't allow address overflows
5127 	if ((addr_t)s + count < (addr_t)s)
5128 		return B_BAD_ADDRESS;
5129 
5130 	if (arch_cpu_user_memset(s, c, count,
5131 			&thread_get_current_thread()->fault_handler) < B_OK)
5132 		return B_BAD_ADDRESS;
5133 
5134 	return B_OK;
5135 }
5136 
5137 
5138 /*!	Wires a single page at the given address.
5139 
5140 	\param team The team whose address space the address belongs to. Supports
5141 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5142 		parameter is ignored.
5143 	\param address address The virtual address to wire down. Does not need to
5144 		be page aligned.
5145 	\param writable If \c true the page shall be writable.
5146 	\param info On success the info is filled in, among other things
5147 		containing the physical address the given virtual one translates to.
5148 	\return \c B_OK, when the page could be wired, another error code otherwise.
5149 */
5150 status_t
5151 vm_wire_page(team_id team, addr_t address, bool writable,
5152 	VMPageWiringInfo* info)
5153 {
5154 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5155 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5156 
5157 	// compute the page protection that is required
5158 	bool isUser = IS_USER_ADDRESS(address);
5159 	uint32 requiredProtection = PAGE_PRESENT
5160 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5161 	if (writable)
5162 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5163 
5164 	// get and read lock the address space
5165 	VMAddressSpace* addressSpace = NULL;
5166 	if (isUser) {
5167 		if (team == B_CURRENT_TEAM)
5168 			addressSpace = VMAddressSpace::GetCurrent();
5169 		else
5170 			addressSpace = VMAddressSpace::Get(team);
5171 	} else
5172 		addressSpace = VMAddressSpace::GetKernel();
5173 	if (addressSpace == NULL)
5174 		return B_ERROR;
5175 
5176 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5177 
5178 	VMTranslationMap* map = addressSpace->TranslationMap();
5179 	status_t error = B_OK;
5180 
5181 	// get the area
5182 	VMArea* area = addressSpace->LookupArea(pageAddress);
5183 	if (area == NULL) {
5184 		addressSpace->Put();
5185 		return B_BAD_ADDRESS;
5186 	}
5187 
5188 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5189 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5190 
5191 	// mark the area range wired
5192 	area->Wire(&info->range);
5193 
5194 	// Lock the area's cache chain and the translation map. Needed to look
5195 	// up the page and play with its wired count.
5196 	cacheChainLocker.LockAllSourceCaches();
5197 	map->Lock();
5198 
5199 	phys_addr_t physicalAddress;
5200 	uint32 flags;
5201 	vm_page* page;
5202 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5203 		&& (flags & requiredProtection) == requiredProtection
5204 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5205 			!= NULL) {
5206 		// Already mapped with the correct permissions -- just increment
5207 		// the page's wired count.
5208 		increment_page_wired_count(page);
5209 
5210 		map->Unlock();
5211 		cacheChainLocker.Unlock();
5212 		addressSpaceLocker.Unlock();
5213 	} else {
5214 		// Let vm_soft_fault() map the page for us, if possible. We need
5215 		// to fully unlock to avoid deadlocks. Since we have already
5216 		// wired the area itself, nothing disturbing will happen with it
5217 		// in the meantime.
5218 		map->Unlock();
5219 		cacheChainLocker.Unlock();
5220 		addressSpaceLocker.Unlock();
5221 
5222 		error = vm_soft_fault(addressSpace, pageAddress, writable, isUser,
5223 			&page, &info->range);
5224 
5225 		if (error != B_OK) {
5226 			// The page could not be mapped -- clean up.
5227 			VMCache* cache = vm_area_get_locked_cache(area);
5228 			area->Unwire(&info->range);
5229 			cache->ReleaseRefAndUnlock();
5230 			addressSpace->Put();
5231 			return error;
5232 		}
5233 	}
5234 
5235 	info->physicalAddress
5236 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5237 			+ address % B_PAGE_SIZE;
5238 	info->page = page;
5239 
5240 	return B_OK;
5241 }
5242 
5243 
5244 /*!	Unwires a single page previously wired via vm_wire_page().
5245 
5246 	\param info The same object passed to vm_wire_page() before.
5247 */
5248 void
5249 vm_unwire_page(VMPageWiringInfo* info)
5250 {
5251 	// lock the address space
5252 	VMArea* area = info->range.area;
5253 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5254 		// takes over our reference
5255 
5256 	// lock the top cache
5257 	VMCache* cache = vm_area_get_locked_cache(area);
5258 	VMCacheChainLocker cacheChainLocker(cache);
5259 
5260 	if (info->page->Cache() != cache) {
5261 		// The page is not in the top cache, so we lock the whole cache chain
5262 		// before touching the page's wired count.
5263 		cacheChainLocker.LockAllSourceCaches();
5264 	}
5265 
5266 	decrement_page_wired_count(info->page);
5267 
5268 	// remove the wired range from the range
5269 	area->Unwire(&info->range);
5270 
5271 	cacheChainLocker.Unlock();
5272 }
5273 
5274 
5275 /*!	Wires down the given address range in the specified team's address space.
5276 
5277 	If successful the function
5278 	- acquires a reference to the specified team's address space,
5279 	- adds respective wired ranges to all areas that intersect with the given
5280 	  address range,
5281 	- makes sure all pages in the given address range are mapped with the
5282 	  requested access permissions and increments their wired count.
5283 
5284 	It fails, when \a team doesn't specify a valid address space, when any part
5285 	of the specified address range is not covered by areas, when the concerned
5286 	areas don't allow mapping with the requested permissions, or when mapping
5287 	failed for another reason.
5288 
5289 	When successful the call must be balanced by a unlock_memory_etc() call with
5290 	the exact same parameters.
5291 
5292 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5293 		supported.
5294 	\param address The start of the address range to be wired.
5295 	\param numBytes The size of the address range to be wired.
5296 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5297 		requests that the range must be wired writable ("read from device
5298 		into memory").
5299 	\return \c B_OK on success, another error code otherwise.
5300 */
5301 status_t
5302 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5303 {
5304 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5305 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5306 
5307 	// compute the page protection that is required
5308 	bool isUser = IS_USER_ADDRESS(address);
5309 	bool writable = (flags & B_READ_DEVICE) == 0;
5310 	uint32 requiredProtection = PAGE_PRESENT
5311 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5312 	if (writable)
5313 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5314 
5315 	uint32 mallocFlags = isUser
5316 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5317 
5318 	// get and read lock the address space
5319 	VMAddressSpace* addressSpace = NULL;
5320 	if (isUser) {
5321 		if (team == B_CURRENT_TEAM)
5322 			addressSpace = VMAddressSpace::GetCurrent();
5323 		else
5324 			addressSpace = VMAddressSpace::Get(team);
5325 	} else
5326 		addressSpace = VMAddressSpace::GetKernel();
5327 	if (addressSpace == NULL)
5328 		return B_ERROR;
5329 
5330 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5331 
5332 	VMTranslationMap* map = addressSpace->TranslationMap();
5333 	status_t error = B_OK;
5334 
5335 	// iterate through all concerned areas
5336 	addr_t nextAddress = lockBaseAddress;
5337 	while (nextAddress != lockEndAddress) {
5338 		// get the next area
5339 		VMArea* area = addressSpace->LookupArea(nextAddress);
5340 		if (area == NULL) {
5341 			error = B_BAD_ADDRESS;
5342 			break;
5343 		}
5344 
5345 		addr_t areaStart = nextAddress;
5346 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5347 
5348 		// allocate the wired range (do that before locking the cache to avoid
5349 		// deadlocks)
5350 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5351 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5352 		if (range == NULL) {
5353 			error = B_NO_MEMORY;
5354 			break;
5355 		}
5356 
5357 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5358 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5359 
5360 		// mark the area range wired
5361 		area->Wire(range);
5362 
5363 		// Depending on the area cache type and the wiring, we may not need to
5364 		// look at the individual pages.
5365 		if (area->cache_type == CACHE_TYPE_NULL
5366 			|| area->cache_type == CACHE_TYPE_DEVICE
5367 			|| area->wiring == B_FULL_LOCK
5368 			|| area->wiring == B_CONTIGUOUS) {
5369 			nextAddress = areaEnd;
5370 			continue;
5371 		}
5372 
5373 		// Lock the area's cache chain and the translation map. Needed to look
5374 		// up pages and play with their wired count.
5375 		cacheChainLocker.LockAllSourceCaches();
5376 		map->Lock();
5377 
5378 		// iterate through the pages and wire them
5379 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5380 			phys_addr_t physicalAddress;
5381 			uint32 flags;
5382 
5383 			vm_page* page;
5384 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5385 				&& (flags & requiredProtection) == requiredProtection
5386 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5387 					!= NULL) {
5388 				// Already mapped with the correct permissions -- just increment
5389 				// the page's wired count.
5390 				increment_page_wired_count(page);
5391 			} else {
5392 				// Let vm_soft_fault() map the page for us, if possible. We need
5393 				// to fully unlock to avoid deadlocks. Since we have already
5394 				// wired the area itself, nothing disturbing will happen with it
5395 				// in the meantime.
5396 				map->Unlock();
5397 				cacheChainLocker.Unlock();
5398 				addressSpaceLocker.Unlock();
5399 
5400 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5401 					isUser, &page, range);
5402 
5403 				addressSpaceLocker.Lock();
5404 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5405 				cacheChainLocker.LockAllSourceCaches();
5406 				map->Lock();
5407 			}
5408 
5409 			if (error != B_OK)
5410 				break;
5411 		}
5412 
5413 		map->Unlock();
5414 
5415 		if (error == B_OK) {
5416 			cacheChainLocker.Unlock();
5417 		} else {
5418 			// An error occurred, so abort right here. If the current address
5419 			// is the first in this area, unwire the area, since we won't get
5420 			// to it when reverting what we've done so far.
5421 			if (nextAddress == areaStart) {
5422 				area->Unwire(range);
5423 				cacheChainLocker.Unlock();
5424 				range->~VMAreaWiredRange();
5425 				free_etc(range, mallocFlags);
5426 			} else
5427 				cacheChainLocker.Unlock();
5428 
5429 			break;
5430 		}
5431 	}
5432 
5433 	if (error != B_OK) {
5434 		// An error occurred, so unwire all that we've already wired. Note that
5435 		// even if not a single page was wired, unlock_memory_etc() is called
5436 		// to put the address space reference.
5437 		addressSpaceLocker.Unlock();
5438 		unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress,
5439 			flags);
5440 	}
5441 
5442 	return error;
5443 }
5444 
5445 
5446 status_t
5447 lock_memory(void* address, size_t numBytes, uint32 flags)
5448 {
5449 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5450 }
5451 
5452 
5453 /*!	Unwires an address range previously wired with lock_memory_etc().
5454 
5455 	Note that a call to this function must balance a previous lock_memory_etc()
5456 	call with exactly the same parameters.
5457 */
5458 status_t
5459 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5460 {
5461 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5462 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5463 
5464 	// compute the page protection that is required
5465 	bool isUser = IS_USER_ADDRESS(address);
5466 	bool writable = (flags & B_READ_DEVICE) == 0;
5467 	uint32 requiredProtection = PAGE_PRESENT
5468 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5469 	if (writable)
5470 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5471 
5472 	uint32 mallocFlags = isUser
5473 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5474 
5475 	// get and read lock the address space
5476 	VMAddressSpace* addressSpace = NULL;
5477 	if (isUser) {
5478 		if (team == B_CURRENT_TEAM)
5479 			addressSpace = VMAddressSpace::GetCurrent();
5480 		else
5481 			addressSpace = VMAddressSpace::Get(team);
5482 	} else
5483 		addressSpace = VMAddressSpace::GetKernel();
5484 	if (addressSpace == NULL)
5485 		return B_ERROR;
5486 
5487 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5488 
5489 	VMTranslationMap* map = addressSpace->TranslationMap();
5490 	status_t error = B_OK;
5491 
5492 	// iterate through all concerned areas
5493 	addr_t nextAddress = lockBaseAddress;
5494 	while (nextAddress != lockEndAddress) {
5495 		// get the next area
5496 		VMArea* area = addressSpace->LookupArea(nextAddress);
5497 		if (area == NULL) {
5498 			error = B_BAD_ADDRESS;
5499 			break;
5500 		}
5501 
5502 		addr_t areaStart = nextAddress;
5503 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5504 
5505 		// Lock the area's top cache. This is a requirement for
5506 		// VMArea::Unwire().
5507 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5508 
5509 		// Depending on the area cache type and the wiring, we may not need to
5510 		// look at the individual pages.
5511 		if (area->cache_type == CACHE_TYPE_NULL
5512 			|| area->cache_type == CACHE_TYPE_DEVICE
5513 			|| area->wiring == B_FULL_LOCK
5514 			|| area->wiring == B_CONTIGUOUS) {
5515 			// unwire the range (to avoid deadlocks we delete the range after
5516 			// unlocking the cache)
5517 			nextAddress = areaEnd;
5518 			VMAreaWiredRange* range = area->Unwire(areaStart,
5519 				areaEnd - areaStart, writable);
5520 			cacheChainLocker.Unlock();
5521 			if (range != NULL) {
5522 				range->~VMAreaWiredRange();
5523 				free_etc(range, mallocFlags);
5524 			}
5525 			continue;
5526 		}
5527 
5528 		// Lock the area's cache chain and the translation map. Needed to look
5529 		// up pages and play with their wired count.
5530 		cacheChainLocker.LockAllSourceCaches();
5531 		map->Lock();
5532 
5533 		// iterate through the pages and unwire them
5534 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5535 			phys_addr_t physicalAddress;
5536 			uint32 flags;
5537 
5538 			vm_page* page;
5539 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5540 				&& (flags & PAGE_PRESENT) != 0
5541 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5542 					!= NULL) {
5543 				// Already mapped with the correct permissions -- just increment
5544 				// the page's wired count.
5545 				decrement_page_wired_count(page);
5546 			} else {
5547 				panic("unlock_memory_etc(): Failed to unwire page: address "
5548 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5549 					nextAddress);
5550 				error = B_BAD_VALUE;
5551 				break;
5552 			}
5553 		}
5554 
5555 		map->Unlock();
5556 
5557 		// All pages are unwired. Remove the area's wired range as well (to
5558 		// avoid deadlocks we delete the range after unlocking the cache).
5559 		VMAreaWiredRange* range = area->Unwire(areaStart,
5560 			areaEnd - areaStart, writable);
5561 
5562 		cacheChainLocker.Unlock();
5563 
5564 		if (range != NULL) {
5565 			range->~VMAreaWiredRange();
5566 			free_etc(range, mallocFlags);
5567 		}
5568 
5569 		if (error != B_OK)
5570 			break;
5571 	}
5572 
5573 	// get rid of the address space reference
5574 	addressSpace->Put();
5575 
5576 	return error;
5577 }
5578 
5579 
5580 status_t
5581 unlock_memory(void* address, size_t numBytes, uint32 flags)
5582 {
5583 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5584 }
5585 
5586 
5587 /*!	Similar to get_memory_map(), but also allows to specify the address space
5588 	for the memory in question and has a saner semantics.
5589 	Returns \c B_OK when the complete range could be translated or
5590 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5591 	case the actual number of entries is written to \c *_numEntries. Any other
5592 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5593 	in this case.
5594 */
5595 status_t
5596 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5597 	physical_entry* table, uint32* _numEntries)
5598 {
5599 	uint32 numEntries = *_numEntries;
5600 	*_numEntries = 0;
5601 
5602 	VMAddressSpace* addressSpace;
5603 	addr_t virtualAddress = (addr_t)address;
5604 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5605 	phys_addr_t physicalAddress;
5606 	status_t status = B_OK;
5607 	int32 index = -1;
5608 	addr_t offset = 0;
5609 	bool interrupts = are_interrupts_enabled();
5610 
5611 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5612 		"entries)\n", team, address, numBytes, numEntries));
5613 
5614 	if (numEntries == 0 || numBytes == 0)
5615 		return B_BAD_VALUE;
5616 
5617 	// in which address space is the address to be found?
5618 	if (IS_USER_ADDRESS(virtualAddress)) {
5619 		if (team == B_CURRENT_TEAM)
5620 			addressSpace = VMAddressSpace::GetCurrent();
5621 		else
5622 			addressSpace = VMAddressSpace::Get(team);
5623 	} else
5624 		addressSpace = VMAddressSpace::GetKernel();
5625 
5626 	if (addressSpace == NULL)
5627 		return B_ERROR;
5628 
5629 	VMTranslationMap* map = addressSpace->TranslationMap();
5630 
5631 	if (interrupts)
5632 		map->Lock();
5633 
5634 	while (offset < numBytes) {
5635 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5636 		uint32 flags;
5637 
5638 		if (interrupts) {
5639 			status = map->Query((addr_t)address + offset, &physicalAddress,
5640 				&flags);
5641 		} else {
5642 			status = map->QueryInterrupt((addr_t)address + offset,
5643 				&physicalAddress, &flags);
5644 		}
5645 		if (status < B_OK)
5646 			break;
5647 		if ((flags & PAGE_PRESENT) == 0) {
5648 			panic("get_memory_map() called on unmapped memory!");
5649 			return B_BAD_ADDRESS;
5650 		}
5651 
5652 		if (index < 0 && pageOffset > 0) {
5653 			physicalAddress += pageOffset;
5654 			if (bytes > B_PAGE_SIZE - pageOffset)
5655 				bytes = B_PAGE_SIZE - pageOffset;
5656 		}
5657 
5658 		// need to switch to the next physical_entry?
5659 		if (index < 0 || table[index].address
5660 				!= physicalAddress - table[index].size) {
5661 			if ((uint32)++index + 1 > numEntries) {
5662 				// table to small
5663 				break;
5664 			}
5665 			table[index].address = physicalAddress;
5666 			table[index].size = bytes;
5667 		} else {
5668 			// page does fit in current entry
5669 			table[index].size += bytes;
5670 		}
5671 
5672 		offset += bytes;
5673 	}
5674 
5675 	if (interrupts)
5676 		map->Unlock();
5677 
5678 	if (status != B_OK)
5679 		return status;
5680 
5681 	if ((uint32)index + 1 > numEntries) {
5682 		*_numEntries = index;
5683 		return B_BUFFER_OVERFLOW;
5684 	}
5685 
5686 	*_numEntries = index + 1;
5687 	return B_OK;
5688 }
5689 
5690 
5691 /*!	According to the BeBook, this function should always succeed.
5692 	This is no longer the case.
5693 */
5694 extern "C" int32
5695 __get_memory_map_haiku(const void* address, size_t numBytes,
5696 	physical_entry* table, int32 numEntries)
5697 {
5698 	uint32 entriesRead = numEntries;
5699 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5700 		table, &entriesRead);
5701 	if (error != B_OK)
5702 		return error;
5703 
5704 	// close the entry list
5705 
5706 	// if it's only one entry, we will silently accept the missing ending
5707 	if (numEntries == 1)
5708 		return B_OK;
5709 
5710 	if (entriesRead + 1 > (uint32)numEntries)
5711 		return B_BUFFER_OVERFLOW;
5712 
5713 	table[entriesRead].address = 0;
5714 	table[entriesRead].size = 0;
5715 
5716 	return B_OK;
5717 }
5718 
5719 
5720 area_id
5721 area_for(void* address)
5722 {
5723 	return vm_area_for((addr_t)address, true);
5724 }
5725 
5726 
5727 area_id
5728 find_area(const char* name)
5729 {
5730 	return VMAreaHash::Find(name);
5731 }
5732 
5733 
5734 status_t
5735 _get_area_info(area_id id, area_info* info, size_t size)
5736 {
5737 	if (size != sizeof(area_info) || info == NULL)
5738 		return B_BAD_VALUE;
5739 
5740 	AddressSpaceReadLocker locker;
5741 	VMArea* area;
5742 	status_t status = locker.SetFromArea(id, area);
5743 	if (status != B_OK)
5744 		return status;
5745 
5746 	fill_area_info(area, info, size);
5747 	return B_OK;
5748 }
5749 
5750 
5751 status_t
5752 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5753 {
5754 	addr_t nextBase = *(addr_t*)cookie;
5755 
5756 	// we're already through the list
5757 	if (nextBase == (addr_t)-1)
5758 		return B_ENTRY_NOT_FOUND;
5759 
5760 	if (team == B_CURRENT_TEAM)
5761 		team = team_get_current_team_id();
5762 
5763 	AddressSpaceReadLocker locker(team);
5764 	if (!locker.IsLocked())
5765 		return B_BAD_TEAM_ID;
5766 
5767 	VMArea* area;
5768 	for (VMAddressSpace::AreaIterator it
5769 				= locker.AddressSpace()->GetAreaIterator();
5770 			(area = it.Next()) != NULL;) {
5771 		if (area->Base() > nextBase)
5772 			break;
5773 	}
5774 
5775 	if (area == NULL) {
5776 		nextBase = (addr_t)-1;
5777 		return B_ENTRY_NOT_FOUND;
5778 	}
5779 
5780 	fill_area_info(area, info, size);
5781 	*cookie = (ssize_t)(area->Base());
5782 
5783 	return B_OK;
5784 }
5785 
5786 
5787 status_t
5788 set_area_protection(area_id area, uint32 newProtection)
5789 {
5790 	fix_protection(&newProtection);
5791 
5792 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5793 		newProtection, true);
5794 }
5795 
5796 
5797 status_t
5798 resize_area(area_id areaID, size_t newSize)
5799 {
5800 	return vm_resize_area(areaID, newSize, true);
5801 }
5802 
5803 
5804 /*!	Transfers the specified area to a new team. The caller must be the owner
5805 	of the area.
5806 */
5807 area_id
5808 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5809 	bool kernel)
5810 {
5811 	area_info info;
5812 	status_t status = get_area_info(id, &info);
5813 	if (status != B_OK)
5814 		return status;
5815 
5816 	if (info.team != thread_get_current_thread()->team->id)
5817 		return B_PERMISSION_DENIED;
5818 
5819 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5820 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5821 	if (clonedArea < 0)
5822 		return clonedArea;
5823 
5824 	status = vm_delete_area(info.team, id, kernel);
5825 	if (status != B_OK) {
5826 		vm_delete_area(target, clonedArea, kernel);
5827 		return status;
5828 	}
5829 
5830 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5831 
5832 	return clonedArea;
5833 }
5834 
5835 
5836 extern "C" area_id
5837 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5838 	size_t numBytes, uint32 addressSpec, uint32 protection,
5839 	void** _virtualAddress)
5840 {
5841 	if (!arch_vm_supports_protection(protection))
5842 		return B_NOT_SUPPORTED;
5843 
5844 	fix_protection(&protection);
5845 
5846 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5847 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5848 		false);
5849 }
5850 
5851 
5852 area_id
5853 clone_area(const char* name, void** _address, uint32 addressSpec,
5854 	uint32 protection, area_id source)
5855 {
5856 	if ((protection & B_KERNEL_PROTECTION) == 0)
5857 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5858 
5859 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5860 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5861 }
5862 
5863 
5864 area_id
5865 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
5866 	uint32 protection, uint32 flags,
5867 	const virtual_address_restrictions* virtualAddressRestrictions,
5868 	const physical_address_restrictions* physicalAddressRestrictions,
5869 	void** _address)
5870 {
5871 	fix_protection(&protection);
5872 
5873 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5874 		virtualAddressRestrictions, physicalAddressRestrictions, true,
5875 		_address);
5876 }
5877 
5878 
5879 extern "C" area_id
5880 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5881 	size_t size, uint32 lock, uint32 protection)
5882 {
5883 	fix_protection(&protection);
5884 
5885 	virtual_address_restrictions virtualRestrictions = {};
5886 	virtualRestrictions.address = *_address;
5887 	virtualRestrictions.address_specification = addressSpec;
5888 	physical_address_restrictions physicalRestrictions = {};
5889 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5890 		lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true,
5891 		_address);
5892 }
5893 
5894 
5895 status_t
5896 delete_area(area_id area)
5897 {
5898 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5899 }
5900 
5901 
5902 //	#pragma mark - Userland syscalls
5903 
5904 
5905 status_t
5906 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5907 	addr_t size)
5908 {
5909 	// filter out some unavailable values (for userland)
5910 	switch (addressSpec) {
5911 		case B_ANY_KERNEL_ADDRESS:
5912 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5913 			return B_BAD_VALUE;
5914 	}
5915 
5916 	addr_t address;
5917 
5918 	if (!IS_USER_ADDRESS(userAddress)
5919 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5920 		return B_BAD_ADDRESS;
5921 
5922 	status_t status = vm_reserve_address_range(
5923 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5924 		RESERVED_AVOID_BASE);
5925 	if (status != B_OK)
5926 		return status;
5927 
5928 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5929 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5930 			(void*)address, size);
5931 		return B_BAD_ADDRESS;
5932 	}
5933 
5934 	return B_OK;
5935 }
5936 
5937 
5938 status_t
5939 _user_unreserve_address_range(addr_t address, addr_t size)
5940 {
5941 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5942 		(void*)address, size);
5943 }
5944 
5945 
5946 area_id
5947 _user_area_for(void* address)
5948 {
5949 	return vm_area_for((addr_t)address, false);
5950 }
5951 
5952 
5953 area_id
5954 _user_find_area(const char* userName)
5955 {
5956 	char name[B_OS_NAME_LENGTH];
5957 
5958 	if (!IS_USER_ADDRESS(userName)
5959 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5960 		return B_BAD_ADDRESS;
5961 
5962 	return find_area(name);
5963 }
5964 
5965 
5966 status_t
5967 _user_get_area_info(area_id area, area_info* userInfo)
5968 {
5969 	if (!IS_USER_ADDRESS(userInfo))
5970 		return B_BAD_ADDRESS;
5971 
5972 	area_info info;
5973 	status_t status = get_area_info(area, &info);
5974 	if (status < B_OK)
5975 		return status;
5976 
5977 	// TODO: do we want to prevent userland from seeing kernel protections?
5978 	//info.protection &= B_USER_PROTECTION;
5979 
5980 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5981 		return B_BAD_ADDRESS;
5982 
5983 	return status;
5984 }
5985 
5986 
5987 status_t
5988 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
5989 {
5990 	ssize_t cookie;
5991 
5992 	if (!IS_USER_ADDRESS(userCookie)
5993 		|| !IS_USER_ADDRESS(userInfo)
5994 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
5995 		return B_BAD_ADDRESS;
5996 
5997 	area_info info;
5998 	status_t status = _get_next_area_info(team, &cookie, &info,
5999 		sizeof(area_info));
6000 	if (status != B_OK)
6001 		return status;
6002 
6003 	//info.protection &= B_USER_PROTECTION;
6004 
6005 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6006 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6007 		return B_BAD_ADDRESS;
6008 
6009 	return status;
6010 }
6011 
6012 
6013 status_t
6014 _user_set_area_protection(area_id area, uint32 newProtection)
6015 {
6016 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6017 		return B_BAD_VALUE;
6018 
6019 	fix_protection(&newProtection);
6020 
6021 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6022 		newProtection, false);
6023 }
6024 
6025 
6026 status_t
6027 _user_resize_area(area_id area, size_t newSize)
6028 {
6029 	// TODO: Since we restrict deleting of areas to those owned by the team,
6030 	// we should also do that for resizing (check other functions, too).
6031 	return vm_resize_area(area, newSize, false);
6032 }
6033 
6034 
6035 area_id
6036 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6037 	team_id target)
6038 {
6039 	// filter out some unavailable values (for userland)
6040 	switch (addressSpec) {
6041 		case B_ANY_KERNEL_ADDRESS:
6042 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6043 			return B_BAD_VALUE;
6044 	}
6045 
6046 	void* address;
6047 	if (!IS_USER_ADDRESS(userAddress)
6048 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6049 		return B_BAD_ADDRESS;
6050 
6051 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6052 	if (newArea < B_OK)
6053 		return newArea;
6054 
6055 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6056 		return B_BAD_ADDRESS;
6057 
6058 	return newArea;
6059 }
6060 
6061 
6062 area_id
6063 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6064 	uint32 protection, area_id sourceArea)
6065 {
6066 	char name[B_OS_NAME_LENGTH];
6067 	void* address;
6068 
6069 	// filter out some unavailable values (for userland)
6070 	switch (addressSpec) {
6071 		case B_ANY_KERNEL_ADDRESS:
6072 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6073 			return B_BAD_VALUE;
6074 	}
6075 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6076 		return B_BAD_VALUE;
6077 
6078 	if (!IS_USER_ADDRESS(userName)
6079 		|| !IS_USER_ADDRESS(userAddress)
6080 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6081 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6082 		return B_BAD_ADDRESS;
6083 
6084 	fix_protection(&protection);
6085 
6086 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6087 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6088 		false);
6089 	if (clonedArea < B_OK)
6090 		return clonedArea;
6091 
6092 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6093 		delete_area(clonedArea);
6094 		return B_BAD_ADDRESS;
6095 	}
6096 
6097 	return clonedArea;
6098 }
6099 
6100 
6101 area_id
6102 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6103 	size_t size, uint32 lock, uint32 protection)
6104 {
6105 	char name[B_OS_NAME_LENGTH];
6106 	void* address;
6107 
6108 	// filter out some unavailable values (for userland)
6109 	switch (addressSpec) {
6110 		case B_ANY_KERNEL_ADDRESS:
6111 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6112 			return B_BAD_VALUE;
6113 	}
6114 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6115 		return B_BAD_VALUE;
6116 
6117 	if (!IS_USER_ADDRESS(userName)
6118 		|| !IS_USER_ADDRESS(userAddress)
6119 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6120 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6121 		return B_BAD_ADDRESS;
6122 
6123 	if (addressSpec == B_EXACT_ADDRESS
6124 		&& IS_KERNEL_ADDRESS(address))
6125 		return B_BAD_VALUE;
6126 
6127 	fix_protection(&protection);
6128 
6129 	virtual_address_restrictions virtualRestrictions = {};
6130 	virtualRestrictions.address = address;
6131 	virtualRestrictions.address_specification = addressSpec;
6132 	physical_address_restrictions physicalRestrictions = {};
6133 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6134 		size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions,
6135 		false, &address);
6136 
6137 	if (area >= B_OK
6138 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6139 		delete_area(area);
6140 		return B_BAD_ADDRESS;
6141 	}
6142 
6143 	return area;
6144 }
6145 
6146 
6147 status_t
6148 _user_delete_area(area_id area)
6149 {
6150 	// Unlike the BeOS implementation, you can now only delete areas
6151 	// that you have created yourself from userland.
6152 	// The documentation to delete_area() explicitly states that this
6153 	// will be restricted in the future, and so it will.
6154 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6155 }
6156 
6157 
6158 // TODO: create a BeOS style call for this!
6159 
6160 area_id
6161 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6162 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6163 	int fd, off_t offset)
6164 {
6165 	char name[B_OS_NAME_LENGTH];
6166 	void* address;
6167 	area_id area;
6168 
6169 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6170 		return B_BAD_VALUE;
6171 
6172 	fix_protection(&protection);
6173 
6174 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6175 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6176 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6177 		return B_BAD_ADDRESS;
6178 
6179 	if (addressSpec == B_EXACT_ADDRESS) {
6180 		if ((addr_t)address + size < (addr_t)address
6181 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6182 			return B_BAD_VALUE;
6183 		}
6184 		if (!IS_USER_ADDRESS(address)
6185 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6186 			return B_BAD_ADDRESS;
6187 		}
6188 	}
6189 
6190 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6191 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6192 		false);
6193 	if (area < B_OK)
6194 		return area;
6195 
6196 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6197 		return B_BAD_ADDRESS;
6198 
6199 	return area;
6200 }
6201 
6202 
6203 status_t
6204 _user_unmap_memory(void* _address, size_t size)
6205 {
6206 	addr_t address = (addr_t)_address;
6207 
6208 	// check params
6209 	if (size == 0 || (addr_t)address + size < (addr_t)address
6210 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6211 		return B_BAD_VALUE;
6212 	}
6213 
6214 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6215 		return B_BAD_ADDRESS;
6216 
6217 	// Write lock the address space and ensure the address range is not wired.
6218 	AddressSpaceWriteLocker locker;
6219 	do {
6220 		status_t status = locker.SetTo(team_get_current_team_id());
6221 		if (status != B_OK)
6222 			return status;
6223 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6224 			size, &locker));
6225 
6226 	// unmap
6227 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6228 }
6229 
6230 
6231 status_t
6232 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6233 {
6234 	// check address range
6235 	addr_t address = (addr_t)_address;
6236 	size = PAGE_ALIGN(size);
6237 
6238 	if ((address % B_PAGE_SIZE) != 0)
6239 		return B_BAD_VALUE;
6240 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6241 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6242 		// weird error code required by POSIX
6243 		return ENOMEM;
6244 	}
6245 
6246 	// extend and check protection
6247 	if ((protection & ~B_USER_PROTECTION) != 0)
6248 		return B_BAD_VALUE;
6249 
6250 	fix_protection(&protection);
6251 
6252 	// We need to write lock the address space, since we're going to play with
6253 	// the areas. Also make sure that none of the areas is wired and that we're
6254 	// actually allowed to change the protection.
6255 	AddressSpaceWriteLocker locker;
6256 
6257 	bool restart;
6258 	do {
6259 		restart = false;
6260 
6261 		status_t status = locker.SetTo(team_get_current_team_id());
6262 		if (status != B_OK)
6263 			return status;
6264 
6265 		// First round: Check whether the whole range is covered by areas and we
6266 		// are allowed to modify them.
6267 		addr_t currentAddress = address;
6268 		size_t sizeLeft = size;
6269 		while (sizeLeft > 0) {
6270 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6271 			if (area == NULL)
6272 				return B_NO_MEMORY;
6273 
6274 			if ((area->protection & B_KERNEL_AREA) != 0)
6275 				return B_NOT_ALLOWED;
6276 
6277 			// TODO: For (shared) mapped files we should check whether the new
6278 			// protections are compatible with the file permissions. We don't
6279 			// have a way to do that yet, though.
6280 
6281 			addr_t offset = currentAddress - area->Base();
6282 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6283 
6284 			AreaCacheLocker cacheLocker(area);
6285 
6286 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6287 					&locker, &cacheLocker)) {
6288 				restart = true;
6289 				break;
6290 			}
6291 
6292 			cacheLocker.Unlock();
6293 
6294 			currentAddress += rangeSize;
6295 			sizeLeft -= rangeSize;
6296 		}
6297 	} while (restart);
6298 
6299 	// Second round: If the protections differ from that of the area, create a
6300 	// page protection array and re-map mapped pages.
6301 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6302 	addr_t currentAddress = address;
6303 	size_t sizeLeft = size;
6304 	while (sizeLeft > 0) {
6305 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6306 		if (area == NULL)
6307 			return B_NO_MEMORY;
6308 
6309 		addr_t offset = currentAddress - area->Base();
6310 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6311 
6312 		currentAddress += rangeSize;
6313 		sizeLeft -= rangeSize;
6314 
6315 		if (area->page_protections == NULL) {
6316 			if (area->protection == protection)
6317 				continue;
6318 
6319 			status_t status = allocate_area_page_protections(area);
6320 			if (status != B_OK)
6321 				return status;
6322 		}
6323 
6324 		// We need to lock the complete cache chain, since we potentially unmap
6325 		// pages of lower caches.
6326 		VMCache* topCache = vm_area_get_locked_cache(area);
6327 		VMCacheChainLocker cacheChainLocker(topCache);
6328 		cacheChainLocker.LockAllSourceCaches();
6329 
6330 		for (addr_t pageAddress = area->Base() + offset;
6331 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6332 			map->Lock();
6333 
6334 			set_area_page_protection(area, pageAddress, protection);
6335 
6336 			phys_addr_t physicalAddress;
6337 			uint32 flags;
6338 
6339 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6340 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6341 				map->Unlock();
6342 				continue;
6343 			}
6344 
6345 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6346 			if (page == NULL) {
6347 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6348 					"\n", area, physicalAddress);
6349 				map->Unlock();
6350 				return B_ERROR;
6351 			}
6352 
6353 			// If the page is not in the topmost cache and write access is
6354 			// requested, we have to unmap it. Otherwise we can re-map it with
6355 			// the new protection.
6356 			bool unmapPage = page->Cache() != topCache
6357 				&& (protection & B_WRITE_AREA) != 0;
6358 
6359 			if (!unmapPage)
6360 				map->ProtectPage(area, pageAddress, protection);
6361 
6362 			map->Unlock();
6363 
6364 			if (unmapPage) {
6365 				DEBUG_PAGE_ACCESS_START(page);
6366 				unmap_page(area, pageAddress);
6367 				DEBUG_PAGE_ACCESS_END(page);
6368 			}
6369 		}
6370 	}
6371 
6372 	return B_OK;
6373 }
6374 
6375 
6376 status_t
6377 _user_sync_memory(void* _address, size_t size, uint32 flags)
6378 {
6379 	addr_t address = (addr_t)_address;
6380 	size = PAGE_ALIGN(size);
6381 
6382 	// check params
6383 	if ((address % B_PAGE_SIZE) != 0)
6384 		return B_BAD_VALUE;
6385 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6386 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6387 		// weird error code required by POSIX
6388 		return ENOMEM;
6389 	}
6390 
6391 	bool writeSync = (flags & MS_SYNC) != 0;
6392 	bool writeAsync = (flags & MS_ASYNC) != 0;
6393 	if (writeSync && writeAsync)
6394 		return B_BAD_VALUE;
6395 
6396 	if (size == 0 || (!writeSync && !writeAsync))
6397 		return B_OK;
6398 
6399 	// iterate through the range and sync all concerned areas
6400 	while (size > 0) {
6401 		// read lock the address space
6402 		AddressSpaceReadLocker locker;
6403 		status_t error = locker.SetTo(team_get_current_team_id());
6404 		if (error != B_OK)
6405 			return error;
6406 
6407 		// get the first area
6408 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6409 		if (area == NULL)
6410 			return B_NO_MEMORY;
6411 
6412 		uint32 offset = address - area->Base();
6413 		size_t rangeSize = min_c(area->Size() - offset, size);
6414 		offset += area->cache_offset;
6415 
6416 		// lock the cache
6417 		AreaCacheLocker cacheLocker(area);
6418 		if (!cacheLocker)
6419 			return B_BAD_VALUE;
6420 		VMCache* cache = area->cache;
6421 
6422 		locker.Unlock();
6423 
6424 		uint32 firstPage = offset >> PAGE_SHIFT;
6425 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6426 
6427 		// write the pages
6428 		if (cache->type == CACHE_TYPE_VNODE) {
6429 			if (writeSync) {
6430 				// synchronous
6431 				error = vm_page_write_modified_page_range(cache, firstPage,
6432 					endPage);
6433 				if (error != B_OK)
6434 					return error;
6435 			} else {
6436 				// asynchronous
6437 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6438 				// TODO: This is probably not quite what is supposed to happen.
6439 				// Especially when a lot has to be written, it might take ages
6440 				// until it really hits the disk.
6441 			}
6442 		}
6443 
6444 		address += rangeSize;
6445 		size -= rangeSize;
6446 	}
6447 
6448 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6449 	// synchronize multiple mappings of the same file. In our VM they never get
6450 	// out of sync, though, so we don't have to do anything.
6451 
6452 	return B_OK;
6453 }
6454 
6455 
6456 status_t
6457 _user_memory_advice(void* address, size_t size, uint32 advice)
6458 {
6459 	// TODO: Implement!
6460 	return B_OK;
6461 }
6462 
6463 
6464 status_t
6465 _user_get_memory_properties(team_id teamID, const void* address,
6466 	uint32* _protected, uint32* _lock)
6467 {
6468 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6469 		return B_BAD_ADDRESS;
6470 
6471 	AddressSpaceReadLocker locker;
6472 	status_t error = locker.SetTo(teamID);
6473 	if (error != B_OK)
6474 		return error;
6475 
6476 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6477 	if (area == NULL)
6478 		return B_NO_MEMORY;
6479 
6480 
6481 	uint32 protection = area->protection;
6482 	if (area->page_protections != NULL)
6483 		protection = get_area_page_protection(area, (addr_t)address);
6484 
6485 	uint32 wiring = area->wiring;
6486 
6487 	locker.Unlock();
6488 
6489 	error = user_memcpy(_protected, &protection, sizeof(protection));
6490 	if (error != B_OK)
6491 		return error;
6492 
6493 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6494 
6495 	return error;
6496 }
6497 
6498 
6499 // #pragma mark -- compatibility
6500 
6501 
6502 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6503 
6504 
6505 struct physical_entry_beos {
6506 	uint32	address;
6507 	uint32	size;
6508 };
6509 
6510 
6511 /*!	The physical_entry structure has changed. We need to translate it to the
6512 	old one.
6513 */
6514 extern "C" int32
6515 __get_memory_map_beos(const void* _address, size_t numBytes,
6516 	physical_entry_beos* table, int32 numEntries)
6517 {
6518 	if (numEntries <= 0)
6519 		return B_BAD_VALUE;
6520 
6521 	const uint8* address = (const uint8*)_address;
6522 
6523 	int32 count = 0;
6524 	while (numBytes > 0 && count < numEntries) {
6525 		physical_entry entry;
6526 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6527 		if (result < 0) {
6528 			if (result != B_BUFFER_OVERFLOW)
6529 				return result;
6530 		}
6531 
6532 		if (entry.address >= (phys_addr_t)1 << 32) {
6533 			panic("get_memory_map(): Address is greater 4 GB!");
6534 			return B_ERROR;
6535 		}
6536 
6537 		table[count].address = entry.address;
6538 		table[count++].size = entry.size;
6539 
6540 		address += entry.size;
6541 		numBytes -= entry.size;
6542 	}
6543 
6544 	// null-terminate the table, if possible
6545 	if (count < numEntries) {
6546 		table[count].address = 0;
6547 		table[count].size = 0;
6548 	}
6549 
6550 	return B_OK;
6551 }
6552 
6553 
6554 /*!	The type of the \a physicalAddress parameter has changed from void* to
6555 	phys_addr_t.
6556 */
6557 extern "C" area_id
6558 __map_physical_memory_beos(const char* name, void* physicalAddress,
6559 	size_t numBytes, uint32 addressSpec, uint32 protection,
6560 	void** _virtualAddress)
6561 {
6562 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6563 		addressSpec, protection, _virtualAddress);
6564 }
6565 
6566 
6567 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6568 	we meddle with the \a lock parameter to force 32 bit.
6569 */
6570 extern "C" area_id
6571 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6572 	size_t size, uint32 lock, uint32 protection)
6573 {
6574 	switch (lock) {
6575 		case B_NO_LOCK:
6576 			break;
6577 		case B_FULL_LOCK:
6578 		case B_LAZY_LOCK:
6579 			lock = B_32_BIT_FULL_LOCK;
6580 			break;
6581 		case B_CONTIGUOUS:
6582 			lock = B_32_BIT_CONTIGUOUS;
6583 			break;
6584 	}
6585 
6586 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6587 		protection);
6588 }
6589 
6590 
6591 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6592 	"BASE");
6593 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6594 	"map_physical_memory@", "BASE");
6595 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6596 	"BASE");
6597 
6598 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6599 	"get_memory_map@@", "1_ALPHA3");
6600 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6601 	"map_physical_memory@@", "1_ALPHA3");
6602 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6603 	"1_ALPHA3");
6604 
6605 
6606 #else
6607 
6608 
6609 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6610 	"get_memory_map@@", "BASE");
6611 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6612 	"map_physical_memory@@", "BASE");
6613 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6614 	"BASE");
6615 
6616 
6617 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6618