xref: /haiku/src/system/kernel/vm/vm.cpp (revision 1f0635d2277dcd0818dc7f539c1cb1b296f6444b)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// Cut the end only?
648 	if (areaLast <= lastAddress) {
649 		size_t oldSize = area->Size();
650 		size_t newSize = address - area->Base();
651 
652 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 			allocationFlags);
654 		if (error != B_OK)
655 			return error;
656 
657 		// unmap pages
658 		unmap_pages(area, address, oldSize - newSize);
659 
660 		// If no one else uses the area's cache, we can resize it, too.
661 		if (cache->areas == area && area->cache_next == NULL
662 			&& cache->consumers.IsEmpty()
663 			&& cache->type == CACHE_TYPE_RAM) {
664 			// Since VMCache::Resize() can temporarily drop the lock, we must
665 			// unlock all lower caches to prevent locking order inversion.
666 			cacheChainLocker.Unlock(cache);
667 			cache->Resize(cache->virtual_base + newSize, priority);
668 			cache->ReleaseRefAndUnlock();
669 		}
670 
671 		return B_OK;
672 	}
673 
674 	// Cut the beginning only?
675 	if (area->Base() >= address) {
676 		addr_t oldBase = area->Base();
677 		addr_t newBase = lastAddress + 1;
678 		size_t newSize = areaLast - lastAddress;
679 
680 		// unmap pages
681 		unmap_pages(area, oldBase, newBase - oldBase);
682 
683 		// resize the area
684 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 			allocationFlags);
686 		if (error != B_OK)
687 			return error;
688 
689 		// TODO: If no one else uses the area's cache, we should resize it, too!
690 
691 		area->cache_offset += newBase - oldBase;
692 
693 		return B_OK;
694 	}
695 
696 	// The tough part -- cut a piece out of the middle of the area.
697 	// We do that by shrinking the area to the begin section and creating a
698 	// new area for the end section.
699 
700 	addr_t firstNewSize = address - area->Base();
701 	addr_t secondBase = lastAddress + 1;
702 	addr_t secondSize = areaLast - lastAddress;
703 
704 	// unmap pages
705 	unmap_pages(area, address, area->Size() - firstNewSize);
706 
707 	// resize the area
708 	addr_t oldSize = area->Size();
709 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 		allocationFlags);
711 	if (error != B_OK)
712 		return error;
713 
714 	// TODO: If no one else uses the area's cache, we might want to create a
715 	// new cache for the second area, transfer the concerned pages from the
716 	// first cache to it and resize the first cache.
717 
718 	// map the second area
719 	virtual_address_restrictions addressRestrictions = {};
720 	addressRestrictions.address = (void*)secondBase;
721 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 	VMArea* secondArea;
723 	error = map_backing_store(addressSpace, cache,
724 		area->cache_offset + (secondBase - area->Base()), area->name,
725 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 		&addressRestrictions, kernel, &secondArea, NULL);
727 	if (error != B_OK) {
728 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 		return error;
730 	}
731 
732 	// We need a cache reference for the new area.
733 	cache->AcquireRefLocked();
734 
735 	if (_secondArea != NULL)
736 		*_secondArea = secondArea;
737 
738 	return B_OK;
739 }
740 
741 
742 /*!	Deletes all areas in the given address range.
743 	The address space must be write-locked.
744 	The caller must ensure that no part of the given range is wired.
745 */
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 	bool kernel)
749 {
750 	size = PAGE_ALIGN(size);
751 	addr_t lastAddress = address + (size - 1);
752 
753 	// Check, whether the caller is allowed to modify the concerned areas.
754 	if (!kernel) {
755 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 				VMArea* area = it.Next();) {
757 			addr_t areaLast = area->Base() + (area->Size() - 1);
758 			if (area->Base() < lastAddress && address < areaLast) {
759 				if (area->address_space == VMAddressSpace::Kernel()) {
760 					dprintf("unmap_address_range: team %" B_PRId32 " tried to "
761 						"unmap range of kernel area %" B_PRId32 " (%s)\n",
762 						team_get_current_team_id(), area->id, area->name);
763 					return B_NOT_ALLOWED;
764 				}
765 			}
766 		}
767 	}
768 
769 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
770 			VMArea* area = it.Next();) {
771 		addr_t areaLast = area->Base() + (area->Size() - 1);
772 		if (area->Base() < lastAddress && address < areaLast) {
773 			status_t error = cut_area(addressSpace, area, address,
774 				lastAddress, NULL, kernel);
775 			if (error != B_OK)
776 				return error;
777 				// Failing after already messing with areas is ugly, but we
778 				// can't do anything about it.
779 		}
780 	}
781 
782 	return B_OK;
783 }
784 
785 
786 /*! You need to hold the lock of the cache and the write lock of the address
787 	space when calling this function.
788 	Note, that in case of error your cache will be temporarily unlocked.
789 	If \a addressSpec is \c B_EXACT_ADDRESS and the
790 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
791 	that no part of the specified address range (base \c *_virtualAddress, size
792 	\a size) is wired.
793 */
794 static status_t
795 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
796 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
797 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
798 	bool kernel, VMArea** _area, void** _virtualAddress)
799 {
800 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
801 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
802 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
803 		addressRestrictions->address, offset, size,
804 		addressRestrictions->address_specification, wiring, protection,
805 		_area, areaName));
806 	cache->AssertLocked();
807 
808 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
809 		| HEAP_DONT_LOCK_KERNEL_SPACE;
810 	int priority;
811 	if (addressSpace != VMAddressSpace::Kernel()) {
812 		priority = VM_PRIORITY_USER;
813 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
814 		priority = VM_PRIORITY_VIP;
815 		allocationFlags |= HEAP_PRIORITY_VIP;
816 	} else
817 		priority = VM_PRIORITY_SYSTEM;
818 
819 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
820 		allocationFlags);
821 	if (area == NULL)
822 		return B_NO_MEMORY;
823 
824 	status_t status;
825 
826 	// if this is a private map, we need to create a new cache
827 	// to handle the private copies of pages as they are written to
828 	VMCache* sourceCache = cache;
829 	if (mapping == REGION_PRIVATE_MAP) {
830 		VMCache* newCache;
831 
832 		// create an anonymous cache
833 		status = VMCacheFactory::CreateAnonymousCache(newCache,
834 			(protection & B_STACK_AREA) != 0
835 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
836 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
837 		if (status != B_OK)
838 			goto err1;
839 
840 		newCache->Lock();
841 		newCache->temporary = 1;
842 		newCache->virtual_base = offset;
843 		newCache->virtual_end = offset + size;
844 
845 		cache->AddConsumer(newCache);
846 
847 		cache = newCache;
848 	}
849 
850 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
851 		status = cache->SetMinimalCommitment(size, priority);
852 		if (status != B_OK)
853 			goto err2;
854 	}
855 
856 	// check to see if this address space has entered DELETE state
857 	if (addressSpace->IsBeingDeleted()) {
858 		// okay, someone is trying to delete this address space now, so we can't
859 		// insert the area, so back out
860 		status = B_BAD_TEAM_ID;
861 		goto err2;
862 	}
863 
864 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
865 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
866 		status = unmap_address_range(addressSpace,
867 			(addr_t)addressRestrictions->address, size, kernel);
868 		if (status != B_OK)
869 			goto err2;
870 	}
871 
872 	status = addressSpace->InsertArea(area, size, addressRestrictions,
873 		allocationFlags, _virtualAddress);
874 	if (status == B_NO_MEMORY
875 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
876 		// Since the kernel address space is locked by the caller, we can't
877 		// wait here as of course no resources can be released while the locks
878 		// are held. But we can at least issue this so the next caller doesn't
879 		// run into the same problem.
880 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, 0, 0);
881 	}
882 	if (status != B_OK)
883 		goto err2;
884 
885 	// attach the cache to the area
886 	area->cache = cache;
887 	area->cache_offset = offset;
888 
889 	// point the cache back to the area
890 	cache->InsertAreaLocked(area);
891 	if (mapping == REGION_PRIVATE_MAP)
892 		cache->Unlock();
893 
894 	// insert the area in the global area hash table
895 	VMAreaHash::Insert(area);
896 
897 	// grab a ref to the address space (the area holds this)
898 	addressSpace->Get();
899 
900 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
901 //		cache, sourceCache, areaName, area);
902 
903 	*_area = area;
904 	return B_OK;
905 
906 err2:
907 	if (mapping == REGION_PRIVATE_MAP) {
908 		// We created this cache, so we must delete it again. Note, that we
909 		// need to temporarily unlock the source cache or we'll otherwise
910 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
911 		sourceCache->Unlock();
912 		cache->ReleaseRefAndUnlock();
913 		sourceCache->Lock();
914 	}
915 err1:
916 	addressSpace->DeleteArea(area, allocationFlags);
917 	return status;
918 }
919 
920 
921 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
922 	  locker1, locker2).
923 */
924 template<typename LockerType1, typename LockerType2>
925 static inline bool
926 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
927 {
928 	area->cache->AssertLocked();
929 
930 	VMAreaUnwiredWaiter waiter;
931 	if (!area->AddWaiterIfWired(&waiter))
932 		return false;
933 
934 	// unlock everything and wait
935 	if (locker1 != NULL)
936 		locker1->Unlock();
937 	if (locker2 != NULL)
938 		locker2->Unlock();
939 
940 	waiter.waitEntry.Wait();
941 
942 	return true;
943 }
944 
945 
946 /*!	Checks whether the given area has any wired ranges intersecting with the
947 	specified range and waits, if so.
948 
949 	When it has to wait, the function calls \c Unlock() on both \a locker1
950 	and \a locker2, if given.
951 	The area's top cache must be locked and must be unlocked as a side effect
952 	of calling \c Unlock() on either \a locker1 or \a locker2.
953 
954 	If the function does not have to wait it does not modify or unlock any
955 	object.
956 
957 	\param area The area to be checked.
958 	\param base The base address of the range to check.
959 	\param size The size of the address range to check.
960 	\param locker1 An object to be unlocked when before starting to wait (may
961 		be \c NULL).
962 	\param locker2 An object to be unlocked when before starting to wait (may
963 		be \c NULL).
964 	\return \c true, if the function had to wait, \c false otherwise.
965 */
966 template<typename LockerType1, typename LockerType2>
967 static inline bool
968 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
969 	LockerType1* locker1, LockerType2* locker2)
970 {
971 	area->cache->AssertLocked();
972 
973 	VMAreaUnwiredWaiter waiter;
974 	if (!area->AddWaiterIfWired(&waiter, base, size))
975 		return false;
976 
977 	// unlock everything and wait
978 	if (locker1 != NULL)
979 		locker1->Unlock();
980 	if (locker2 != NULL)
981 		locker2->Unlock();
982 
983 	waiter.waitEntry.Wait();
984 
985 	return true;
986 }
987 
988 
989 /*!	Checks whether the given address space has any wired ranges intersecting
990 	with the specified range and waits, if so.
991 
992 	Similar to wait_if_area_range_is_wired(), with the following differences:
993 	- All areas intersecting with the range are checked (respectively all until
994 	  one is found that contains a wired range intersecting with the given
995 	  range).
996 	- The given address space must at least be read-locked and must be unlocked
997 	  when \c Unlock() is called on \a locker.
998 	- None of the areas' caches are allowed to be locked.
999 */
1000 template<typename LockerType>
1001 static inline bool
1002 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1003 	size_t size, LockerType* locker)
1004 {
1005 	addr_t end = base + size - 1;
1006 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1007 			VMArea* area = it.Next();) {
1008 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1009 		if (area->Base() > end)
1010 			return false;
1011 
1012 		if (base >= area->Base() + area->Size() - 1)
1013 			continue;
1014 
1015 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1016 
1017 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1018 			return true;
1019 	}
1020 
1021 	return false;
1022 }
1023 
1024 
1025 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1026 	It must be called in a situation where the kernel address space may be
1027 	locked.
1028 */
1029 status_t
1030 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1031 {
1032 	AddressSpaceReadLocker locker;
1033 	VMArea* area;
1034 	status_t status = locker.SetFromArea(id, area);
1035 	if (status != B_OK)
1036 		return status;
1037 
1038 	if (area->page_protections == NULL) {
1039 		status = allocate_area_page_protections(area);
1040 		if (status != B_OK)
1041 			return status;
1042 	}
1043 
1044 	*cookie = (void*)area;
1045 	return B_OK;
1046 }
1047 
1048 
1049 /*!	This is a debug helper function that can only be used with very specific
1050 	use cases.
1051 	Sets protection for the given address range to the protection specified.
1052 	If \a protection is 0 then the involved pages will be marked non-present
1053 	in the translation map to cause a fault on access. The pages aren't
1054 	actually unmapped however so that they can be marked present again with
1055 	additional calls to this function. For this to work the area must be
1056 	fully locked in memory so that the pages aren't otherwise touched.
1057 	This function does not lock the kernel address space and needs to be
1058 	supplied with a \a cookie retrieved from a successful call to
1059 	vm_prepare_kernel_area_debug_protection().
1060 */
1061 status_t
1062 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1063 	uint32 protection)
1064 {
1065 	// check address range
1066 	addr_t address = (addr_t)_address;
1067 	size = PAGE_ALIGN(size);
1068 
1069 	if ((address % B_PAGE_SIZE) != 0
1070 		|| (addr_t)address + size < (addr_t)address
1071 		|| !IS_KERNEL_ADDRESS(address)
1072 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1073 		return B_BAD_VALUE;
1074 	}
1075 
1076 	// Translate the kernel protection to user protection as we only store that.
1077 	if ((protection & B_KERNEL_READ_AREA) != 0)
1078 		protection |= B_READ_AREA;
1079 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1080 		protection |= B_WRITE_AREA;
1081 
1082 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1083 	VMTranslationMap* map = addressSpace->TranslationMap();
1084 	VMArea* area = (VMArea*)cookie;
1085 
1086 	addr_t offset = address - area->Base();
1087 	if (area->Size() - offset < size) {
1088 		panic("protect range not fully within supplied area");
1089 		return B_BAD_VALUE;
1090 	}
1091 
1092 	if (area->page_protections == NULL) {
1093 		panic("area has no page protections");
1094 		return B_BAD_VALUE;
1095 	}
1096 
1097 	// Invalidate the mapping entries so any access to them will fault or
1098 	// restore the mapping entries unchanged so that lookup will success again.
1099 	map->Lock();
1100 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1101 	map->Unlock();
1102 
1103 	// And set the proper page protections so that the fault case will actually
1104 	// fail and not simply try to map a new page.
1105 	for (addr_t pageAddress = address; pageAddress < address + size;
1106 			pageAddress += B_PAGE_SIZE) {
1107 		set_area_page_protection(area, pageAddress, protection);
1108 	}
1109 
1110 	return B_OK;
1111 }
1112 
1113 
1114 status_t
1115 vm_block_address_range(const char* name, void* address, addr_t size)
1116 {
1117 	if (!arch_vm_supports_protection(0))
1118 		return B_NOT_SUPPORTED;
1119 
1120 	AddressSpaceWriteLocker locker;
1121 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1122 	if (status != B_OK)
1123 		return status;
1124 
1125 	VMAddressSpace* addressSpace = locker.AddressSpace();
1126 
1127 	// create an anonymous cache
1128 	VMCache* cache;
1129 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1130 		VM_PRIORITY_SYSTEM);
1131 	if (status != B_OK)
1132 		return status;
1133 
1134 	cache->temporary = 1;
1135 	cache->virtual_end = size;
1136 	cache->Lock();
1137 
1138 	VMArea* area;
1139 	virtual_address_restrictions addressRestrictions = {};
1140 	addressRestrictions.address = address;
1141 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1142 	status = map_backing_store(addressSpace, cache, 0, name, size,
1143 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1144 		true, &area, NULL);
1145 	if (status != B_OK) {
1146 		cache->ReleaseRefAndUnlock();
1147 		return status;
1148 	}
1149 
1150 	cache->Unlock();
1151 	area->cache_type = CACHE_TYPE_RAM;
1152 	return area->id;
1153 }
1154 
1155 
1156 status_t
1157 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1158 {
1159 	AddressSpaceWriteLocker locker(team);
1160 	if (!locker.IsLocked())
1161 		return B_BAD_TEAM_ID;
1162 
1163 	VMAddressSpace* addressSpace = locker.AddressSpace();
1164 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1165 		addressSpace == VMAddressSpace::Kernel()
1166 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1167 }
1168 
1169 
1170 status_t
1171 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1172 	addr_t size, uint32 flags)
1173 {
1174 	if (size == 0)
1175 		return B_BAD_VALUE;
1176 
1177 	AddressSpaceWriteLocker locker(team);
1178 	if (!locker.IsLocked())
1179 		return B_BAD_TEAM_ID;
1180 
1181 	virtual_address_restrictions addressRestrictions = {};
1182 	addressRestrictions.address = *_address;
1183 	addressRestrictions.address_specification = addressSpec;
1184 	VMAddressSpace* addressSpace = locker.AddressSpace();
1185 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1186 		addressSpace == VMAddressSpace::Kernel()
1187 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1188 		_address);
1189 }
1190 
1191 
1192 area_id
1193 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1194 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1195 	const virtual_address_restrictions* virtualAddressRestrictions,
1196 	const physical_address_restrictions* physicalAddressRestrictions,
1197 	bool kernel, void** _address)
1198 {
1199 	VMArea* area;
1200 	VMCache* cache;
1201 	vm_page* page = NULL;
1202 	bool isStack = (protection & B_STACK_AREA) != 0;
1203 	page_num_t guardPages;
1204 	bool canOvercommit = false;
1205 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1206 		? VM_PAGE_ALLOC_CLEAR : 0;
1207 
1208 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1209 		team, name, size));
1210 
1211 	size = PAGE_ALIGN(size);
1212 	guardSize = PAGE_ALIGN(guardSize);
1213 	guardPages = guardSize / B_PAGE_SIZE;
1214 
1215 	if (size == 0 || size < guardSize)
1216 		return B_BAD_VALUE;
1217 	if (!arch_vm_supports_protection(protection))
1218 		return B_NOT_SUPPORTED;
1219 
1220 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1221 		canOvercommit = true;
1222 
1223 #ifdef DEBUG_KERNEL_STACKS
1224 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1225 		isStack = true;
1226 #endif
1227 
1228 	// check parameters
1229 	switch (virtualAddressRestrictions->address_specification) {
1230 		case B_ANY_ADDRESS:
1231 		case B_EXACT_ADDRESS:
1232 		case B_BASE_ADDRESS:
1233 		case B_ANY_KERNEL_ADDRESS:
1234 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1235 		case B_RANDOMIZED_ANY_ADDRESS:
1236 		case B_RANDOMIZED_BASE_ADDRESS:
1237 			break;
1238 
1239 		default:
1240 			return B_BAD_VALUE;
1241 	}
1242 
1243 	// If low or high physical address restrictions are given, we force
1244 	// B_CONTIGUOUS wiring, since only then we'll use
1245 	// vm_page_allocate_page_run() which deals with those restrictions.
1246 	if (physicalAddressRestrictions->low_address != 0
1247 		|| physicalAddressRestrictions->high_address != 0) {
1248 		wiring = B_CONTIGUOUS;
1249 	}
1250 
1251 	physical_address_restrictions stackPhysicalRestrictions;
1252 	bool doReserveMemory = false;
1253 	switch (wiring) {
1254 		case B_NO_LOCK:
1255 			break;
1256 		case B_FULL_LOCK:
1257 		case B_LAZY_LOCK:
1258 		case B_CONTIGUOUS:
1259 			doReserveMemory = true;
1260 			break;
1261 		case B_ALREADY_WIRED:
1262 			break;
1263 		case B_LOMEM:
1264 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1265 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1266 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1267 			wiring = B_CONTIGUOUS;
1268 			doReserveMemory = true;
1269 			break;
1270 		case B_32_BIT_FULL_LOCK:
1271 			if (B_HAIKU_PHYSICAL_BITS <= 32
1272 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1273 				wiring = B_FULL_LOCK;
1274 				doReserveMemory = true;
1275 				break;
1276 			}
1277 			// TODO: We don't really support this mode efficiently. Just fall
1278 			// through for now ...
1279 		case B_32_BIT_CONTIGUOUS:
1280 			#if B_HAIKU_PHYSICAL_BITS > 32
1281 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1282 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1283 					stackPhysicalRestrictions.high_address
1284 						= (phys_addr_t)1 << 32;
1285 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1286 				}
1287 			#endif
1288 			wiring = B_CONTIGUOUS;
1289 			doReserveMemory = true;
1290 			break;
1291 		default:
1292 			return B_BAD_VALUE;
1293 	}
1294 
1295 	// Optimization: For a single-page contiguous allocation without low/high
1296 	// memory restriction B_FULL_LOCK wiring suffices.
1297 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1298 		&& physicalAddressRestrictions->low_address == 0
1299 		&& physicalAddressRestrictions->high_address == 0) {
1300 		wiring = B_FULL_LOCK;
1301 	}
1302 
1303 	// For full lock or contiguous areas we're also going to map the pages and
1304 	// thus need to reserve pages for the mapping backend upfront.
1305 	addr_t reservedMapPages = 0;
1306 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1307 		AddressSpaceWriteLocker locker;
1308 		status_t status = locker.SetTo(team);
1309 		if (status != B_OK)
1310 			return status;
1311 
1312 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1313 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1314 	}
1315 
1316 	int priority;
1317 	if (team != VMAddressSpace::KernelID())
1318 		priority = VM_PRIORITY_USER;
1319 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1320 		priority = VM_PRIORITY_VIP;
1321 	else
1322 		priority = VM_PRIORITY_SYSTEM;
1323 
1324 	// Reserve memory before acquiring the address space lock. This reduces the
1325 	// chances of failure, since while holding the write lock to the address
1326 	// space (if it is the kernel address space that is), the low memory handler
1327 	// won't be able to free anything for us.
1328 	addr_t reservedMemory = 0;
1329 	if (doReserveMemory) {
1330 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1331 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1332 			return B_NO_MEMORY;
1333 		reservedMemory = size;
1334 		// TODO: We don't reserve the memory for the pages for the page
1335 		// directories/tables. We actually need to do since we currently don't
1336 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1337 		// there are actually less physical pages than there should be, which
1338 		// can get the VM into trouble in low memory situations.
1339 	}
1340 
1341 	AddressSpaceWriteLocker locker;
1342 	VMAddressSpace* addressSpace;
1343 	status_t status;
1344 
1345 	// For full lock areas reserve the pages before locking the address
1346 	// space. E.g. block caches can't release their memory while we hold the
1347 	// address space lock.
1348 	page_num_t reservedPages = reservedMapPages;
1349 	if (wiring == B_FULL_LOCK)
1350 		reservedPages += size / B_PAGE_SIZE;
1351 
1352 	vm_page_reservation reservation;
1353 	if (reservedPages > 0) {
1354 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1355 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1356 					priority)) {
1357 				reservedPages = 0;
1358 				status = B_WOULD_BLOCK;
1359 				goto err0;
1360 			}
1361 		} else
1362 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1363 	}
1364 
1365 	if (wiring == B_CONTIGUOUS) {
1366 		// we try to allocate the page run here upfront as this may easily
1367 		// fail for obvious reasons
1368 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1369 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1370 		if (page == NULL) {
1371 			status = B_NO_MEMORY;
1372 			goto err0;
1373 		}
1374 	}
1375 
1376 	// Lock the address space and, if B_EXACT_ADDRESS and
1377 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1378 	// is not wired.
1379 	do {
1380 		status = locker.SetTo(team);
1381 		if (status != B_OK)
1382 			goto err1;
1383 
1384 		addressSpace = locker.AddressSpace();
1385 	} while (virtualAddressRestrictions->address_specification
1386 			== B_EXACT_ADDRESS
1387 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1388 		&& wait_if_address_range_is_wired(addressSpace,
1389 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1390 
1391 	// create an anonymous cache
1392 	// if it's a stack, make sure that two pages are available at least
1393 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1394 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1395 		wiring == B_NO_LOCK, priority);
1396 	if (status != B_OK)
1397 		goto err1;
1398 
1399 	cache->temporary = 1;
1400 	cache->virtual_end = size;
1401 	cache->committed_size = reservedMemory;
1402 		// TODO: This should be done via a method.
1403 	reservedMemory = 0;
1404 
1405 	cache->Lock();
1406 
1407 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1408 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1409 		kernel, &area, _address);
1410 
1411 	if (status != B_OK) {
1412 		cache->ReleaseRefAndUnlock();
1413 		goto err1;
1414 	}
1415 
1416 	locker.DegradeToReadLock();
1417 
1418 	switch (wiring) {
1419 		case B_NO_LOCK:
1420 		case B_LAZY_LOCK:
1421 			// do nothing - the pages are mapped in as needed
1422 			break;
1423 
1424 		case B_FULL_LOCK:
1425 		{
1426 			// Allocate and map all pages for this area
1427 
1428 			off_t offset = 0;
1429 			for (addr_t address = area->Base();
1430 					address < area->Base() + (area->Size() - 1);
1431 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1432 #ifdef DEBUG_KERNEL_STACKS
1433 #	ifdef STACK_GROWS_DOWNWARDS
1434 				if (isStack && address < area->Base()
1435 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1436 #	else
1437 				if (isStack && address >= area->Base() + area->Size()
1438 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1439 #	endif
1440 					continue;
1441 #endif
1442 				vm_page* page = vm_page_allocate_page(&reservation,
1443 					PAGE_STATE_WIRED | pageAllocFlags);
1444 				cache->InsertPage(page, offset);
1445 				map_page(area, page, address, protection, &reservation);
1446 
1447 				DEBUG_PAGE_ACCESS_END(page);
1448 			}
1449 
1450 			break;
1451 		}
1452 
1453 		case B_ALREADY_WIRED:
1454 		{
1455 			// The pages should already be mapped. This is only really useful
1456 			// during boot time. Find the appropriate vm_page objects and stick
1457 			// them in the cache object.
1458 			VMTranslationMap* map = addressSpace->TranslationMap();
1459 			off_t offset = 0;
1460 
1461 			if (!gKernelStartup)
1462 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1463 
1464 			map->Lock();
1465 
1466 			for (addr_t virtualAddress = area->Base();
1467 					virtualAddress < area->Base() + (area->Size() - 1);
1468 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1469 				phys_addr_t physicalAddress;
1470 				uint32 flags;
1471 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1472 				if (status < B_OK) {
1473 					panic("looking up mapping failed for va 0x%lx\n",
1474 						virtualAddress);
1475 				}
1476 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1477 				if (page == NULL) {
1478 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1479 						"\n", physicalAddress);
1480 				}
1481 
1482 				DEBUG_PAGE_ACCESS_START(page);
1483 
1484 				cache->InsertPage(page, offset);
1485 				increment_page_wired_count(page);
1486 				vm_page_set_state(page, PAGE_STATE_WIRED);
1487 				page->busy = false;
1488 
1489 				DEBUG_PAGE_ACCESS_END(page);
1490 			}
1491 
1492 			map->Unlock();
1493 			break;
1494 		}
1495 
1496 		case B_CONTIGUOUS:
1497 		{
1498 			// We have already allocated our continuous pages run, so we can now
1499 			// just map them in the address space
1500 			VMTranslationMap* map = addressSpace->TranslationMap();
1501 			phys_addr_t physicalAddress
1502 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1503 			addr_t virtualAddress = area->Base();
1504 			off_t offset = 0;
1505 
1506 			map->Lock();
1507 
1508 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1509 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1510 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1511 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1512 				if (page == NULL)
1513 					panic("couldn't lookup physical page just allocated\n");
1514 
1515 				status = map->Map(virtualAddress, physicalAddress, protection,
1516 					area->MemoryType(), &reservation);
1517 				if (status < B_OK)
1518 					panic("couldn't map physical page in page run\n");
1519 
1520 				cache->InsertPage(page, offset);
1521 				increment_page_wired_count(page);
1522 
1523 				DEBUG_PAGE_ACCESS_END(page);
1524 			}
1525 
1526 			map->Unlock();
1527 			break;
1528 		}
1529 
1530 		default:
1531 			break;
1532 	}
1533 
1534 	cache->Unlock();
1535 
1536 	if (reservedPages > 0)
1537 		vm_page_unreserve_pages(&reservation);
1538 
1539 	TRACE(("vm_create_anonymous_area: done\n"));
1540 
1541 	area->cache_type = CACHE_TYPE_RAM;
1542 	return area->id;
1543 
1544 err1:
1545 	if (wiring == B_CONTIGUOUS) {
1546 		// we had reserved the area space upfront...
1547 		phys_addr_t pageNumber = page->physical_page_number;
1548 		int32 i;
1549 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1550 			page = vm_lookup_page(pageNumber);
1551 			if (page == NULL)
1552 				panic("couldn't lookup physical page just allocated\n");
1553 
1554 			vm_page_set_state(page, PAGE_STATE_FREE);
1555 		}
1556 	}
1557 
1558 err0:
1559 	if (reservedPages > 0)
1560 		vm_page_unreserve_pages(&reservation);
1561 	if (reservedMemory > 0)
1562 		vm_unreserve_memory(reservedMemory);
1563 
1564 	return status;
1565 }
1566 
1567 
1568 area_id
1569 vm_map_physical_memory(team_id team, const char* name, void** _address,
1570 	uint32 addressSpec, addr_t size, uint32 protection,
1571 	phys_addr_t physicalAddress, bool alreadyWired)
1572 {
1573 	VMArea* area;
1574 	VMCache* cache;
1575 	addr_t mapOffset;
1576 
1577 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1578 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1579 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1580 		addressSpec, size, protection, physicalAddress));
1581 
1582 	if (!arch_vm_supports_protection(protection))
1583 		return B_NOT_SUPPORTED;
1584 
1585 	AddressSpaceWriteLocker locker(team);
1586 	if (!locker.IsLocked())
1587 		return B_BAD_TEAM_ID;
1588 
1589 	// if the physical address is somewhat inside a page,
1590 	// move the actual area down to align on a page boundary
1591 	mapOffset = physicalAddress % B_PAGE_SIZE;
1592 	size += mapOffset;
1593 	physicalAddress -= mapOffset;
1594 
1595 	size = PAGE_ALIGN(size);
1596 
1597 	// create a device cache
1598 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1599 	if (status != B_OK)
1600 		return status;
1601 
1602 	cache->virtual_end = size;
1603 
1604 	cache->Lock();
1605 
1606 	virtual_address_restrictions addressRestrictions = {};
1607 	addressRestrictions.address = *_address;
1608 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1609 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1610 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1611 		true, &area, _address);
1612 
1613 	if (status < B_OK)
1614 		cache->ReleaseRefLocked();
1615 
1616 	cache->Unlock();
1617 
1618 	if (status == B_OK) {
1619 		// set requested memory type -- use uncached, if not given
1620 		uint32 memoryType = addressSpec & B_MTR_MASK;
1621 		if (memoryType == 0)
1622 			memoryType = B_MTR_UC;
1623 
1624 		area->SetMemoryType(memoryType);
1625 
1626 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1627 		if (status != B_OK)
1628 			delete_area(locker.AddressSpace(), area, false);
1629 	}
1630 
1631 	if (status != B_OK)
1632 		return status;
1633 
1634 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1635 
1636 	if (alreadyWired) {
1637 		// The area is already mapped, but possibly not with the right
1638 		// memory type.
1639 		map->Lock();
1640 		map->ProtectArea(area, area->protection);
1641 		map->Unlock();
1642 	} else {
1643 		// Map the area completely.
1644 
1645 		// reserve pages needed for the mapping
1646 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1647 			area->Base() + (size - 1));
1648 		vm_page_reservation reservation;
1649 		vm_page_reserve_pages(&reservation, reservePages,
1650 			team == VMAddressSpace::KernelID()
1651 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1652 
1653 		map->Lock();
1654 
1655 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1656 			map->Map(area->Base() + offset, physicalAddress + offset,
1657 				protection, area->MemoryType(), &reservation);
1658 		}
1659 
1660 		map->Unlock();
1661 
1662 		vm_page_unreserve_pages(&reservation);
1663 	}
1664 
1665 	// modify the pointer returned to be offset back into the new area
1666 	// the same way the physical address in was offset
1667 	*_address = (void*)((addr_t)*_address + mapOffset);
1668 
1669 	area->cache_type = CACHE_TYPE_DEVICE;
1670 	return area->id;
1671 }
1672 
1673 
1674 /*!	Don't use!
1675 	TODO: This function was introduced to map physical page vecs to
1676 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1677 	use a device cache and does not track vm_page::wired_count!
1678 */
1679 area_id
1680 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1681 	uint32 addressSpec, addr_t* _size, uint32 protection,
1682 	struct generic_io_vec* vecs, uint32 vecCount)
1683 {
1684 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1685 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1686 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1687 		addressSpec, _size, protection, vecs, vecCount));
1688 
1689 	if (!arch_vm_supports_protection(protection)
1690 		|| (addressSpec & B_MTR_MASK) != 0) {
1691 		return B_NOT_SUPPORTED;
1692 	}
1693 
1694 	AddressSpaceWriteLocker locker(team);
1695 	if (!locker.IsLocked())
1696 		return B_BAD_TEAM_ID;
1697 
1698 	if (vecCount == 0)
1699 		return B_BAD_VALUE;
1700 
1701 	addr_t size = 0;
1702 	for (uint32 i = 0; i < vecCount; i++) {
1703 		if (vecs[i].base % B_PAGE_SIZE != 0
1704 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1705 			return B_BAD_VALUE;
1706 		}
1707 
1708 		size += vecs[i].length;
1709 	}
1710 
1711 	// create a device cache
1712 	VMCache* cache;
1713 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1714 	if (result != B_OK)
1715 		return result;
1716 
1717 	cache->virtual_end = size;
1718 
1719 	cache->Lock();
1720 
1721 	VMArea* area;
1722 	virtual_address_restrictions addressRestrictions = {};
1723 	addressRestrictions.address = *_address;
1724 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1725 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1726 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1727 		&addressRestrictions, true, &area, _address);
1728 
1729 	if (result != B_OK)
1730 		cache->ReleaseRefLocked();
1731 
1732 	cache->Unlock();
1733 
1734 	if (result != B_OK)
1735 		return result;
1736 
1737 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1738 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1739 		area->Base() + (size - 1));
1740 
1741 	vm_page_reservation reservation;
1742 	vm_page_reserve_pages(&reservation, reservePages,
1743 			team == VMAddressSpace::KernelID()
1744 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1745 	map->Lock();
1746 
1747 	uint32 vecIndex = 0;
1748 	size_t vecOffset = 0;
1749 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1750 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1751 			vecOffset = 0;
1752 			vecIndex++;
1753 		}
1754 
1755 		if (vecIndex >= vecCount)
1756 			break;
1757 
1758 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1759 			protection, area->MemoryType(), &reservation);
1760 
1761 		vecOffset += B_PAGE_SIZE;
1762 	}
1763 
1764 	map->Unlock();
1765 	vm_page_unreserve_pages(&reservation);
1766 
1767 	if (_size != NULL)
1768 		*_size = size;
1769 
1770 	area->cache_type = CACHE_TYPE_DEVICE;
1771 	return area->id;
1772 }
1773 
1774 
1775 area_id
1776 vm_create_null_area(team_id team, const char* name, void** address,
1777 	uint32 addressSpec, addr_t size, uint32 flags)
1778 {
1779 	size = PAGE_ALIGN(size);
1780 
1781 	// Lock the address space and, if B_EXACT_ADDRESS and
1782 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1783 	// is not wired.
1784 	AddressSpaceWriteLocker locker;
1785 	do {
1786 		if (locker.SetTo(team) != B_OK)
1787 			return B_BAD_TEAM_ID;
1788 	} while (addressSpec == B_EXACT_ADDRESS
1789 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1790 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1791 			(addr_t)*address, size, &locker));
1792 
1793 	// create a null cache
1794 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1795 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1796 	VMCache* cache;
1797 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1798 	if (status != B_OK)
1799 		return status;
1800 
1801 	cache->temporary = 1;
1802 	cache->virtual_end = size;
1803 
1804 	cache->Lock();
1805 
1806 	VMArea* area;
1807 	virtual_address_restrictions addressRestrictions = {};
1808 	addressRestrictions.address = *address;
1809 	addressRestrictions.address_specification = addressSpec;
1810 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1811 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1812 		&addressRestrictions, true, &area, address);
1813 
1814 	if (status < B_OK) {
1815 		cache->ReleaseRefAndUnlock();
1816 		return status;
1817 	}
1818 
1819 	cache->Unlock();
1820 
1821 	area->cache_type = CACHE_TYPE_NULL;
1822 	return area->id;
1823 }
1824 
1825 
1826 /*!	Creates the vnode cache for the specified \a vnode.
1827 	The vnode has to be marked busy when calling this function.
1828 */
1829 status_t
1830 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1831 {
1832 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1833 }
1834 
1835 
1836 /*!	\a cache must be locked. The area's address space must be read-locked.
1837 */
1838 static void
1839 pre_map_area_pages(VMArea* area, VMCache* cache,
1840 	vm_page_reservation* reservation)
1841 {
1842 	addr_t baseAddress = area->Base();
1843 	addr_t cacheOffset = area->cache_offset;
1844 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1845 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1846 
1847 	for (VMCachePagesTree::Iterator it
1848 				= cache->pages.GetIterator(firstPage, true, true);
1849 			vm_page* page = it.Next();) {
1850 		if (page->cache_offset >= endPage)
1851 			break;
1852 
1853 		// skip busy and inactive pages
1854 		if (page->busy || page->usage_count == 0)
1855 			continue;
1856 
1857 		DEBUG_PAGE_ACCESS_START(page);
1858 		map_page(area, page,
1859 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1860 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1861 		DEBUG_PAGE_ACCESS_END(page);
1862 	}
1863 }
1864 
1865 
1866 /*!	Will map the file specified by \a fd to an area in memory.
1867 	The file will be mirrored beginning at the specified \a offset. The
1868 	\a offset and \a size arguments have to be page aligned.
1869 */
1870 static area_id
1871 _vm_map_file(team_id team, const char* name, void** _address,
1872 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1873 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1874 {
1875 	// TODO: for binary files, we want to make sure that they get the
1876 	//	copy of a file at a given time, ie. later changes should not
1877 	//	make it into the mapped copy -- this will need quite some changes
1878 	//	to be done in a nice way
1879 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1880 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1881 
1882 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1883 	size = PAGE_ALIGN(size);
1884 
1885 	if (mapping == REGION_NO_PRIVATE_MAP)
1886 		protection |= B_SHARED_AREA;
1887 	if (addressSpec != B_EXACT_ADDRESS)
1888 		unmapAddressRange = false;
1889 
1890 	if (fd < 0) {
1891 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1892 		virtual_address_restrictions virtualRestrictions = {};
1893 		virtualRestrictions.address = *_address;
1894 		virtualRestrictions.address_specification = addressSpec;
1895 		physical_address_restrictions physicalRestrictions = {};
1896 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1897 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1898 			_address);
1899 	}
1900 
1901 	// get the open flags of the FD
1902 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1903 	if (descriptor == NULL)
1904 		return EBADF;
1905 	int32 openMode = descriptor->open_mode;
1906 	put_fd(descriptor);
1907 
1908 	// The FD must open for reading at any rate. For shared mapping with write
1909 	// access, additionally the FD must be open for writing.
1910 	if ((openMode & O_ACCMODE) == O_WRONLY
1911 		|| (mapping == REGION_NO_PRIVATE_MAP
1912 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1913 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1914 		return EACCES;
1915 	}
1916 
1917 	// get the vnode for the object, this also grabs a ref to it
1918 	struct vnode* vnode = NULL;
1919 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1920 	if (status < B_OK)
1921 		return status;
1922 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1923 
1924 	// If we're going to pre-map pages, we need to reserve the pages needed by
1925 	// the mapping backend upfront.
1926 	page_num_t reservedPreMapPages = 0;
1927 	vm_page_reservation reservation;
1928 	if ((protection & B_READ_AREA) != 0) {
1929 		AddressSpaceWriteLocker locker;
1930 		status = locker.SetTo(team);
1931 		if (status != B_OK)
1932 			return status;
1933 
1934 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1935 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1936 
1937 		locker.Unlock();
1938 
1939 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1940 			team == VMAddressSpace::KernelID()
1941 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1942 	}
1943 
1944 	struct PageUnreserver {
1945 		PageUnreserver(vm_page_reservation* reservation)
1946 			:
1947 			fReservation(reservation)
1948 		{
1949 		}
1950 
1951 		~PageUnreserver()
1952 		{
1953 			if (fReservation != NULL)
1954 				vm_page_unreserve_pages(fReservation);
1955 		}
1956 
1957 		vm_page_reservation* fReservation;
1958 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1959 
1960 	// Lock the address space and, if the specified address range shall be
1961 	// unmapped, ensure it is not wired.
1962 	AddressSpaceWriteLocker locker;
1963 	do {
1964 		if (locker.SetTo(team) != B_OK)
1965 			return B_BAD_TEAM_ID;
1966 	} while (unmapAddressRange
1967 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1968 			(addr_t)*_address, size, &locker));
1969 
1970 	// TODO: this only works for file systems that use the file cache
1971 	VMCache* cache;
1972 	status = vfs_get_vnode_cache(vnode, &cache, false);
1973 	if (status < B_OK)
1974 		return status;
1975 
1976 	cache->Lock();
1977 
1978 	VMArea* area;
1979 	virtual_address_restrictions addressRestrictions = {};
1980 	addressRestrictions.address = *_address;
1981 	addressRestrictions.address_specification = addressSpec;
1982 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1983 		0, protection, mapping,
1984 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1985 		&addressRestrictions, kernel, &area, _address);
1986 
1987 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1988 		// map_backing_store() cannot know we no longer need the ref
1989 		cache->ReleaseRefLocked();
1990 	}
1991 
1992 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1993 		pre_map_area_pages(area, cache, &reservation);
1994 
1995 	cache->Unlock();
1996 
1997 	if (status == B_OK) {
1998 		// TODO: this probably deserves a smarter solution, ie. don't always
1999 		// prefetch stuff, and also, probably don't trigger it at this place.
2000 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2001 			// prefetches at max 10 MB starting from "offset"
2002 	}
2003 
2004 	if (status != B_OK)
2005 		return status;
2006 
2007 	area->cache_type = CACHE_TYPE_VNODE;
2008 	return area->id;
2009 }
2010 
2011 
2012 area_id
2013 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2014 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2015 	int fd, off_t offset)
2016 {
2017 	if (!arch_vm_supports_protection(protection))
2018 		return B_NOT_SUPPORTED;
2019 
2020 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2021 		mapping, unmapAddressRange, fd, offset, true);
2022 }
2023 
2024 
2025 VMCache*
2026 vm_area_get_locked_cache(VMArea* area)
2027 {
2028 	rw_lock_read_lock(&sAreaCacheLock);
2029 
2030 	while (true) {
2031 		VMCache* cache = area->cache;
2032 
2033 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2034 			// cache has been deleted
2035 			rw_lock_read_lock(&sAreaCacheLock);
2036 			continue;
2037 		}
2038 
2039 		rw_lock_read_lock(&sAreaCacheLock);
2040 
2041 		if (cache == area->cache) {
2042 			cache->AcquireRefLocked();
2043 			rw_lock_read_unlock(&sAreaCacheLock);
2044 			return cache;
2045 		}
2046 
2047 		// the cache changed in the meantime
2048 		cache->Unlock();
2049 	}
2050 }
2051 
2052 
2053 void
2054 vm_area_put_locked_cache(VMCache* cache)
2055 {
2056 	cache->ReleaseRefAndUnlock();
2057 }
2058 
2059 
2060 area_id
2061 vm_clone_area(team_id team, const char* name, void** address,
2062 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2063 	bool kernel)
2064 {
2065 	VMArea* newArea = NULL;
2066 	VMArea* sourceArea;
2067 
2068 	// Check whether the source area exists and is cloneable. If so, mark it
2069 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2070 	{
2071 		AddressSpaceWriteLocker locker;
2072 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2073 		if (status != B_OK)
2074 			return status;
2075 
2076 		sourceArea->protection |= B_SHARED_AREA;
2077 		protection |= B_SHARED_AREA;
2078 	}
2079 
2080 	// Now lock both address spaces and actually do the cloning.
2081 
2082 	MultiAddressSpaceLocker locker;
2083 	VMAddressSpace* sourceAddressSpace;
2084 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2085 	if (status != B_OK)
2086 		return status;
2087 
2088 	VMAddressSpace* targetAddressSpace;
2089 	status = locker.AddTeam(team, true, &targetAddressSpace);
2090 	if (status != B_OK)
2091 		return status;
2092 
2093 	status = locker.Lock();
2094 	if (status != B_OK)
2095 		return status;
2096 
2097 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2098 	if (sourceArea == NULL)
2099 		return B_BAD_VALUE;
2100 
2101 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2102 
2103 	if (!kernel && sourceAddressSpace == VMAddressSpace::Kernel()
2104 		&& targetAddressSpace != VMAddressSpace::Kernel()
2105 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2106 		// kernel areas must not be cloned in userland, unless explicitly
2107 		// declared user-cloneable upon construction
2108 #if KDEBUG
2109 		panic("attempting to clone kernel area \"%s\" (%" B_PRId32 ")!",
2110 			sourceArea->name, sourceID);
2111 #endif
2112 		status = B_NOT_ALLOWED;
2113 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2114 		status = B_NOT_ALLOWED;
2115 	} else {
2116 		virtual_address_restrictions addressRestrictions = {};
2117 		addressRestrictions.address = *address;
2118 		addressRestrictions.address_specification = addressSpec;
2119 		status = map_backing_store(targetAddressSpace, cache,
2120 			sourceArea->cache_offset, name, sourceArea->Size(),
2121 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2122 			kernel, &newArea, address);
2123 	}
2124 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2125 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2126 		// to create a new cache, and has therefore already acquired a reference
2127 		// to the source cache - but otherwise it has no idea that we need
2128 		// one.
2129 		cache->AcquireRefLocked();
2130 	}
2131 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2132 		// we need to map in everything at this point
2133 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2134 			// we don't have actual pages to map but a physical area
2135 			VMTranslationMap* map
2136 				= sourceArea->address_space->TranslationMap();
2137 			map->Lock();
2138 
2139 			phys_addr_t physicalAddress;
2140 			uint32 oldProtection;
2141 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2142 
2143 			map->Unlock();
2144 
2145 			map = targetAddressSpace->TranslationMap();
2146 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2147 				newArea->Base() + (newArea->Size() - 1));
2148 
2149 			vm_page_reservation reservation;
2150 			vm_page_reserve_pages(&reservation, reservePages,
2151 				targetAddressSpace == VMAddressSpace::Kernel()
2152 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2153 			map->Lock();
2154 
2155 			for (addr_t offset = 0; offset < newArea->Size();
2156 					offset += B_PAGE_SIZE) {
2157 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2158 					protection, newArea->MemoryType(), &reservation);
2159 			}
2160 
2161 			map->Unlock();
2162 			vm_page_unreserve_pages(&reservation);
2163 		} else {
2164 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2165 			size_t reservePages = map->MaxPagesNeededToMap(
2166 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2167 			vm_page_reservation reservation;
2168 			vm_page_reserve_pages(&reservation, reservePages,
2169 				targetAddressSpace == VMAddressSpace::Kernel()
2170 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2171 
2172 			// map in all pages from source
2173 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2174 					vm_page* page  = it.Next();) {
2175 				if (!page->busy) {
2176 					DEBUG_PAGE_ACCESS_START(page);
2177 					map_page(newArea, page,
2178 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2179 							- newArea->cache_offset),
2180 						protection, &reservation);
2181 					DEBUG_PAGE_ACCESS_END(page);
2182 				}
2183 			}
2184 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2185 			// ensuring that!
2186 
2187 			vm_page_unreserve_pages(&reservation);
2188 		}
2189 	}
2190 	if (status == B_OK)
2191 		newArea->cache_type = sourceArea->cache_type;
2192 
2193 	vm_area_put_locked_cache(cache);
2194 
2195 	if (status < B_OK)
2196 		return status;
2197 
2198 	return newArea->id;
2199 }
2200 
2201 
2202 /*!	Deletes the specified area of the given address space.
2203 
2204 	The address space must be write-locked.
2205 	The caller must ensure that the area does not have any wired ranges.
2206 
2207 	\param addressSpace The address space containing the area.
2208 	\param area The area to be deleted.
2209 	\param deletingAddressSpace \c true, if the address space is in the process
2210 		of being deleted.
2211 */
2212 static void
2213 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2214 	bool deletingAddressSpace)
2215 {
2216 	ASSERT(!area->IsWired());
2217 
2218 	VMAreaHash::Remove(area);
2219 
2220 	// At this point the area is removed from the global hash table, but
2221 	// still exists in the area list.
2222 
2223 	// Unmap the virtual address space the area occupied.
2224 	{
2225 		// We need to lock the complete cache chain.
2226 		VMCache* topCache = vm_area_get_locked_cache(area);
2227 		VMCacheChainLocker cacheChainLocker(topCache);
2228 		cacheChainLocker.LockAllSourceCaches();
2229 
2230 		// If the area's top cache is a temporary cache and the area is the only
2231 		// one referencing it (besides us currently holding a second reference),
2232 		// the unmapping code doesn't need to care about preserving the accessed
2233 		// and dirty flags of the top cache page mappings.
2234 		bool ignoreTopCachePageFlags
2235 			= topCache->temporary && topCache->RefCount() == 2;
2236 
2237 		area->address_space->TranslationMap()->UnmapArea(area,
2238 			deletingAddressSpace, ignoreTopCachePageFlags);
2239 	}
2240 
2241 	if (!area->cache->temporary)
2242 		area->cache->WriteModified();
2243 
2244 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2245 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2246 
2247 	arch_vm_unset_memory_type(area);
2248 	addressSpace->RemoveArea(area, allocationFlags);
2249 	addressSpace->Put();
2250 
2251 	area->cache->RemoveArea(area);
2252 	area->cache->ReleaseRef();
2253 
2254 	addressSpace->DeleteArea(area, allocationFlags);
2255 }
2256 
2257 
2258 status_t
2259 vm_delete_area(team_id team, area_id id, bool kernel)
2260 {
2261 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2262 		team, id));
2263 
2264 	// lock the address space and make sure the area isn't wired
2265 	AddressSpaceWriteLocker locker;
2266 	VMArea* area;
2267 	AreaCacheLocker cacheLocker;
2268 
2269 	do {
2270 		status_t status = locker.SetFromArea(team, id, area);
2271 		if (status != B_OK)
2272 			return status;
2273 
2274 		cacheLocker.SetTo(area);
2275 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2276 
2277 	cacheLocker.Unlock();
2278 
2279 	// SetFromArea will have returned an error if the area's owning team is not
2280 	// the same as the passed team, so we don't need to do those checks here.
2281 
2282 	delete_area(locker.AddressSpace(), area, false);
2283 	return B_OK;
2284 }
2285 
2286 
2287 /*!	Creates a new cache on top of given cache, moves all areas from
2288 	the old cache to the new one, and changes the protection of all affected
2289 	areas' pages to read-only. If requested, wired pages are moved up to the
2290 	new cache and copies are added to the old cache in their place.
2291 	Preconditions:
2292 	- The given cache must be locked.
2293 	- All of the cache's areas' address spaces must be read locked.
2294 	- Either the cache must not have any wired ranges or a page reservation for
2295 	  all wired pages must be provided, so they can be copied.
2296 
2297 	\param lowerCache The cache on top of which a new cache shall be created.
2298 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2299 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2300 		has wired page. The wired pages are copied in this case.
2301 */
2302 static status_t
2303 vm_copy_on_write_area(VMCache* lowerCache,
2304 	vm_page_reservation* wiredPagesReservation)
2305 {
2306 	VMCache* upperCache;
2307 
2308 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2309 
2310 	// We need to separate the cache from its areas. The cache goes one level
2311 	// deeper and we create a new cache inbetween.
2312 
2313 	// create an anonymous cache
2314 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2315 		lowerCache->GuardSize() / B_PAGE_SIZE,
2316 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2317 		VM_PRIORITY_USER);
2318 	if (status != B_OK)
2319 		return status;
2320 
2321 	upperCache->Lock();
2322 
2323 	upperCache->temporary = 1;
2324 	upperCache->virtual_base = lowerCache->virtual_base;
2325 	upperCache->virtual_end = lowerCache->virtual_end;
2326 
2327 	// transfer the lower cache areas to the upper cache
2328 	rw_lock_write_lock(&sAreaCacheLock);
2329 	upperCache->TransferAreas(lowerCache);
2330 	rw_lock_write_unlock(&sAreaCacheLock);
2331 
2332 	lowerCache->AddConsumer(upperCache);
2333 
2334 	// We now need to remap all pages from all of the cache's areas read-only,
2335 	// so that a copy will be created on next write access. If there are wired
2336 	// pages, we keep their protection, move them to the upper cache and create
2337 	// copies for the lower cache.
2338 	if (wiredPagesReservation != NULL) {
2339 		// We need to handle wired pages -- iterate through the cache's pages.
2340 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2341 				vm_page* page = it.Next();) {
2342 			if (page->WiredCount() > 0) {
2343 				// allocate a new page and copy the wired one
2344 				vm_page* copiedPage = vm_page_allocate_page(
2345 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2346 
2347 				vm_memcpy_physical_page(
2348 					copiedPage->physical_page_number * B_PAGE_SIZE,
2349 					page->physical_page_number * B_PAGE_SIZE);
2350 
2351 				// move the wired page to the upper cache (note: removing is OK
2352 				// with the SplayTree iterator) and insert the copy
2353 				upperCache->MovePage(page);
2354 				lowerCache->InsertPage(copiedPage,
2355 					page->cache_offset * B_PAGE_SIZE);
2356 
2357 				DEBUG_PAGE_ACCESS_END(copiedPage);
2358 			} else {
2359 				// Change the protection of this page in all areas.
2360 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2361 						tempArea = tempArea->cache_next) {
2362 					// The area must be readable in the same way it was
2363 					// previously writable.
2364 					uint32 protection = B_KERNEL_READ_AREA;
2365 					if ((tempArea->protection & B_READ_AREA) != 0)
2366 						protection |= B_READ_AREA;
2367 
2368 					VMTranslationMap* map
2369 						= tempArea->address_space->TranslationMap();
2370 					map->Lock();
2371 					map->ProtectPage(tempArea,
2372 						virtual_page_address(tempArea, page), protection);
2373 					map->Unlock();
2374 				}
2375 			}
2376 		}
2377 	} else {
2378 		ASSERT(lowerCache->WiredPagesCount() == 0);
2379 
2380 		// just change the protection of all areas
2381 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2382 				tempArea = tempArea->cache_next) {
2383 			// The area must be readable in the same way it was previously
2384 			// writable.
2385 			uint32 protection = B_KERNEL_READ_AREA;
2386 			if ((tempArea->protection & B_READ_AREA) != 0)
2387 				protection |= B_READ_AREA;
2388 
2389 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2390 			map->Lock();
2391 			map->ProtectArea(tempArea, protection);
2392 			map->Unlock();
2393 		}
2394 	}
2395 
2396 	vm_area_put_locked_cache(upperCache);
2397 
2398 	return B_OK;
2399 }
2400 
2401 
2402 area_id
2403 vm_copy_area(team_id team, const char* name, void** _address,
2404 	uint32 addressSpec, uint32 protection, area_id sourceID)
2405 {
2406 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2407 
2408 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2409 		// set the same protection for the kernel as for userland
2410 		protection |= B_KERNEL_READ_AREA;
2411 		if (writableCopy)
2412 			protection |= B_KERNEL_WRITE_AREA;
2413 	}
2414 
2415 	// Do the locking: target address space, all address spaces associated with
2416 	// the source cache, and the cache itself.
2417 	MultiAddressSpaceLocker locker;
2418 	VMAddressSpace* targetAddressSpace;
2419 	VMCache* cache;
2420 	VMArea* source;
2421 	AreaCacheLocker cacheLocker;
2422 	status_t status;
2423 	bool sharedArea;
2424 
2425 	page_num_t wiredPages = 0;
2426 	vm_page_reservation wiredPagesReservation;
2427 
2428 	bool restart;
2429 	do {
2430 		restart = false;
2431 
2432 		locker.Unset();
2433 		status = locker.AddTeam(team, true, &targetAddressSpace);
2434 		if (status == B_OK) {
2435 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2436 				&cache);
2437 		}
2438 		if (status != B_OK)
2439 			return status;
2440 
2441 		cacheLocker.SetTo(cache, true);	// already locked
2442 
2443 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2444 
2445 		page_num_t oldWiredPages = wiredPages;
2446 		wiredPages = 0;
2447 
2448 		// If the source area isn't shared, count the number of wired pages in
2449 		// the cache and reserve as many pages.
2450 		if (!sharedArea) {
2451 			wiredPages = cache->WiredPagesCount();
2452 
2453 			if (wiredPages > oldWiredPages) {
2454 				cacheLocker.Unlock();
2455 				locker.Unlock();
2456 
2457 				if (oldWiredPages > 0)
2458 					vm_page_unreserve_pages(&wiredPagesReservation);
2459 
2460 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2461 					VM_PRIORITY_USER);
2462 
2463 				restart = true;
2464 			}
2465 		} else if (oldWiredPages > 0)
2466 			vm_page_unreserve_pages(&wiredPagesReservation);
2467 	} while (restart);
2468 
2469 	// unreserve pages later
2470 	struct PagesUnreserver {
2471 		PagesUnreserver(vm_page_reservation* reservation)
2472 			:
2473 			fReservation(reservation)
2474 		{
2475 		}
2476 
2477 		~PagesUnreserver()
2478 		{
2479 			if (fReservation != NULL)
2480 				vm_page_unreserve_pages(fReservation);
2481 		}
2482 
2483 	private:
2484 		vm_page_reservation*	fReservation;
2485 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2486 
2487 	if (addressSpec == B_CLONE_ADDRESS) {
2488 		addressSpec = B_EXACT_ADDRESS;
2489 		*_address = (void*)source->Base();
2490 	}
2491 
2492 	// First, create a cache on top of the source area, respectively use the
2493 	// existing one, if this is a shared area.
2494 
2495 	VMArea* target;
2496 	virtual_address_restrictions addressRestrictions = {};
2497 	addressRestrictions.address = *_address;
2498 	addressRestrictions.address_specification = addressSpec;
2499 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2500 		name, source->Size(), source->wiring, protection,
2501 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2502 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2503 		&addressRestrictions, true, &target, _address);
2504 	if (status < B_OK)
2505 		return status;
2506 
2507 	if (sharedArea) {
2508 		// The new area uses the old area's cache, but map_backing_store()
2509 		// hasn't acquired a ref. So we have to do that now.
2510 		cache->AcquireRefLocked();
2511 	}
2512 
2513 	// If the source area is writable, we need to move it one layer up as well
2514 
2515 	if (!sharedArea) {
2516 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2517 			// TODO: do something more useful if this fails!
2518 			if (vm_copy_on_write_area(cache,
2519 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2520 				panic("vm_copy_on_write_area() failed!\n");
2521 			}
2522 		}
2523 	}
2524 
2525 	// we return the ID of the newly created area
2526 	return target->id;
2527 }
2528 
2529 
2530 status_t
2531 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2532 	bool kernel)
2533 {
2534 	fix_protection(&newProtection);
2535 
2536 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2537 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2538 
2539 	if (!arch_vm_supports_protection(newProtection))
2540 		return B_NOT_SUPPORTED;
2541 
2542 	bool becomesWritable
2543 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2544 
2545 	// lock address spaces and cache
2546 	MultiAddressSpaceLocker locker;
2547 	VMCache* cache;
2548 	VMArea* area;
2549 	status_t status;
2550 	AreaCacheLocker cacheLocker;
2551 	bool isWritable;
2552 
2553 	bool restart;
2554 	do {
2555 		restart = false;
2556 
2557 		locker.Unset();
2558 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2559 		if (status != B_OK)
2560 			return status;
2561 
2562 		cacheLocker.SetTo(cache, true);	// already locked
2563 
2564 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
2565 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2566 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2567 				" (%s)\n", team, newProtection, areaID, area->name);
2568 			return B_NOT_ALLOWED;
2569 		}
2570 
2571 		if (area->protection == newProtection)
2572 			return B_OK;
2573 
2574 		if (team != VMAddressSpace::KernelID()
2575 			&& area->address_space->ID() != team) {
2576 			// unless you're the kernel, you are only allowed to set
2577 			// the protection of your own areas
2578 			return B_NOT_ALLOWED;
2579 		}
2580 
2581 		isWritable
2582 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2583 
2584 		// Make sure the area (respectively, if we're going to call
2585 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2586 		// wired ranges.
2587 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2588 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2589 					otherArea = otherArea->cache_next) {
2590 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2591 					restart = true;
2592 					break;
2593 				}
2594 			}
2595 		} else {
2596 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2597 				restart = true;
2598 		}
2599 	} while (restart);
2600 
2601 	bool changePageProtection = true;
2602 	bool changeTopCachePagesOnly = false;
2603 
2604 	if (isWritable && !becomesWritable) {
2605 		// writable -> !writable
2606 
2607 		if (cache->source != NULL && cache->temporary) {
2608 			if (cache->CountWritableAreas(area) == 0) {
2609 				// Since this cache now lives from the pages in its source cache,
2610 				// we can change the cache's commitment to take only those pages
2611 				// into account that really are in this cache.
2612 
2613 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2614 					team == VMAddressSpace::KernelID()
2615 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2616 
2617 				// TODO: we may be able to join with our source cache, if
2618 				// count == 0
2619 			}
2620 		}
2621 
2622 		// If only the writability changes, we can just remap the pages of the
2623 		// top cache, since the pages of lower caches are mapped read-only
2624 		// anyway. That's advantageous only, if the number of pages in the cache
2625 		// is significantly smaller than the number of pages in the area,
2626 		// though.
2627 		if (newProtection
2628 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2629 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2630 			changeTopCachePagesOnly = true;
2631 		}
2632 	} else if (!isWritable && becomesWritable) {
2633 		// !writable -> writable
2634 
2635 		if (!cache->consumers.IsEmpty()) {
2636 			// There are consumers -- we have to insert a new cache. Fortunately
2637 			// vm_copy_on_write_area() does everything that's needed.
2638 			changePageProtection = false;
2639 			status = vm_copy_on_write_area(cache, NULL);
2640 		} else {
2641 			// No consumers, so we don't need to insert a new one.
2642 			if (cache->source != NULL && cache->temporary) {
2643 				// the cache's commitment must contain all possible pages
2644 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2645 					team == VMAddressSpace::KernelID()
2646 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2647 			}
2648 
2649 			if (status == B_OK && cache->source != NULL) {
2650 				// There's a source cache, hence we can't just change all pages'
2651 				// protection or we might allow writing into pages belonging to
2652 				// a lower cache.
2653 				changeTopCachePagesOnly = true;
2654 			}
2655 		}
2656 	} else {
2657 		// we don't have anything special to do in all other cases
2658 	}
2659 
2660 	if (status == B_OK) {
2661 		// remap existing pages in this cache
2662 		if (changePageProtection) {
2663 			VMTranslationMap* map = area->address_space->TranslationMap();
2664 			map->Lock();
2665 
2666 			if (changeTopCachePagesOnly) {
2667 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2668 				page_num_t lastPageOffset
2669 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2670 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2671 						vm_page* page = it.Next();) {
2672 					if (page->cache_offset >= firstPageOffset
2673 						&& page->cache_offset <= lastPageOffset) {
2674 						addr_t address = virtual_page_address(area, page);
2675 						map->ProtectPage(area, address, newProtection);
2676 					}
2677 				}
2678 			} else
2679 				map->ProtectArea(area, newProtection);
2680 
2681 			map->Unlock();
2682 		}
2683 
2684 		area->protection = newProtection;
2685 	}
2686 
2687 	return status;
2688 }
2689 
2690 
2691 status_t
2692 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2693 {
2694 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2695 	if (addressSpace == NULL)
2696 		return B_BAD_TEAM_ID;
2697 
2698 	VMTranslationMap* map = addressSpace->TranslationMap();
2699 
2700 	map->Lock();
2701 	uint32 dummyFlags;
2702 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2703 	map->Unlock();
2704 
2705 	addressSpace->Put();
2706 	return status;
2707 }
2708 
2709 
2710 /*!	The page's cache must be locked.
2711 */
2712 bool
2713 vm_test_map_modification(vm_page* page)
2714 {
2715 	if (page->modified)
2716 		return true;
2717 
2718 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2719 	vm_page_mapping* mapping;
2720 	while ((mapping = iterator.Next()) != NULL) {
2721 		VMArea* area = mapping->area;
2722 		VMTranslationMap* map = area->address_space->TranslationMap();
2723 
2724 		phys_addr_t physicalAddress;
2725 		uint32 flags;
2726 		map->Lock();
2727 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2728 		map->Unlock();
2729 
2730 		if ((flags & PAGE_MODIFIED) != 0)
2731 			return true;
2732 	}
2733 
2734 	return false;
2735 }
2736 
2737 
2738 /*!	The page's cache must be locked.
2739 */
2740 void
2741 vm_clear_map_flags(vm_page* page, uint32 flags)
2742 {
2743 	if ((flags & PAGE_ACCESSED) != 0)
2744 		page->accessed = false;
2745 	if ((flags & PAGE_MODIFIED) != 0)
2746 		page->modified = false;
2747 
2748 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2749 	vm_page_mapping* mapping;
2750 	while ((mapping = iterator.Next()) != NULL) {
2751 		VMArea* area = mapping->area;
2752 		VMTranslationMap* map = area->address_space->TranslationMap();
2753 
2754 		map->Lock();
2755 		map->ClearFlags(virtual_page_address(area, page), flags);
2756 		map->Unlock();
2757 	}
2758 }
2759 
2760 
2761 /*!	Removes all mappings from a page.
2762 	After you've called this function, the page is unmapped from memory and
2763 	the page's \c accessed and \c modified flags have been updated according
2764 	to the state of the mappings.
2765 	The page's cache must be locked.
2766 */
2767 void
2768 vm_remove_all_page_mappings(vm_page* page)
2769 {
2770 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2771 		VMArea* area = mapping->area;
2772 		VMTranslationMap* map = area->address_space->TranslationMap();
2773 		addr_t address = virtual_page_address(area, page);
2774 		map->UnmapPage(area, address, false);
2775 	}
2776 }
2777 
2778 
2779 int32
2780 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2781 {
2782 	int32 count = 0;
2783 
2784 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2785 	vm_page_mapping* mapping;
2786 	while ((mapping = iterator.Next()) != NULL) {
2787 		VMArea* area = mapping->area;
2788 		VMTranslationMap* map = area->address_space->TranslationMap();
2789 
2790 		bool modified;
2791 		if (map->ClearAccessedAndModified(area,
2792 				virtual_page_address(area, page), false, modified)) {
2793 			count++;
2794 		}
2795 
2796 		page->modified |= modified;
2797 	}
2798 
2799 
2800 	if (page->accessed) {
2801 		count++;
2802 		page->accessed = false;
2803 	}
2804 
2805 	return count;
2806 }
2807 
2808 
2809 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2810 	mappings.
2811 	The function iterates through the page mappings and removes them until
2812 	encountering one that has been accessed. From then on it will continue to
2813 	iterate, but only clear the accessed flag of the mapping. The page's
2814 	\c modified bit will be updated accordingly, the \c accessed bit will be
2815 	cleared.
2816 	\return The number of mapping accessed bits encountered, including the
2817 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2818 		of the page have been removed.
2819 */
2820 int32
2821 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2822 {
2823 	ASSERT(page->WiredCount() == 0);
2824 
2825 	if (page->accessed)
2826 		return vm_clear_page_mapping_accessed_flags(page);
2827 
2828 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2829 		VMArea* area = mapping->area;
2830 		VMTranslationMap* map = area->address_space->TranslationMap();
2831 		addr_t address = virtual_page_address(area, page);
2832 		bool modified = false;
2833 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2834 			page->accessed = true;
2835 			page->modified |= modified;
2836 			return vm_clear_page_mapping_accessed_flags(page);
2837 		}
2838 		page->modified |= modified;
2839 	}
2840 
2841 	return 0;
2842 }
2843 
2844 
2845 static int
2846 display_mem(int argc, char** argv)
2847 {
2848 	bool physical = false;
2849 	addr_t copyAddress;
2850 	int32 displayWidth;
2851 	int32 itemSize;
2852 	int32 num = -1;
2853 	addr_t address;
2854 	int i = 1, j;
2855 
2856 	if (argc > 1 && argv[1][0] == '-') {
2857 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2858 			physical = true;
2859 			i++;
2860 		} else
2861 			i = 99;
2862 	}
2863 
2864 	if (argc < i + 1 || argc > i + 2) {
2865 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2866 			"\tdl - 8 bytes\n"
2867 			"\tdw - 4 bytes\n"
2868 			"\tds - 2 bytes\n"
2869 			"\tdb - 1 byte\n"
2870 			"\tstring - a whole string\n"
2871 			"  -p or --physical only allows memory from a single page to be "
2872 			"displayed.\n");
2873 		return 0;
2874 	}
2875 
2876 	address = parse_expression(argv[i]);
2877 
2878 	if (argc > i + 1)
2879 		num = parse_expression(argv[i + 1]);
2880 
2881 	// build the format string
2882 	if (strcmp(argv[0], "db") == 0) {
2883 		itemSize = 1;
2884 		displayWidth = 16;
2885 	} else if (strcmp(argv[0], "ds") == 0) {
2886 		itemSize = 2;
2887 		displayWidth = 8;
2888 	} else if (strcmp(argv[0], "dw") == 0) {
2889 		itemSize = 4;
2890 		displayWidth = 4;
2891 	} else if (strcmp(argv[0], "dl") == 0) {
2892 		itemSize = 8;
2893 		displayWidth = 2;
2894 	} else if (strcmp(argv[0], "string") == 0) {
2895 		itemSize = 1;
2896 		displayWidth = -1;
2897 	} else {
2898 		kprintf("display_mem called in an invalid way!\n");
2899 		return 0;
2900 	}
2901 
2902 	if (num <= 0)
2903 		num = displayWidth;
2904 
2905 	void* physicalPageHandle = NULL;
2906 
2907 	if (physical) {
2908 		int32 offset = address & (B_PAGE_SIZE - 1);
2909 		if (num * itemSize + offset > B_PAGE_SIZE) {
2910 			num = (B_PAGE_SIZE - offset) / itemSize;
2911 			kprintf("NOTE: number of bytes has been cut to page size\n");
2912 		}
2913 
2914 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2915 
2916 		if (vm_get_physical_page_debug(address, &copyAddress,
2917 				&physicalPageHandle) != B_OK) {
2918 			kprintf("getting the hardware page failed.");
2919 			return 0;
2920 		}
2921 
2922 		address += offset;
2923 		copyAddress += offset;
2924 	} else
2925 		copyAddress = address;
2926 
2927 	if (!strcmp(argv[0], "string")) {
2928 		kprintf("%p \"", (char*)copyAddress);
2929 
2930 		// string mode
2931 		for (i = 0; true; i++) {
2932 			char c;
2933 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2934 					!= B_OK
2935 				|| c == '\0') {
2936 				break;
2937 			}
2938 
2939 			if (c == '\n')
2940 				kprintf("\\n");
2941 			else if (c == '\t')
2942 				kprintf("\\t");
2943 			else {
2944 				if (!isprint(c))
2945 					c = '.';
2946 
2947 				kprintf("%c", c);
2948 			}
2949 		}
2950 
2951 		kprintf("\"\n");
2952 	} else {
2953 		// number mode
2954 		for (i = 0; i < num; i++) {
2955 			uint64 value;
2956 
2957 			if ((i % displayWidth) == 0) {
2958 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2959 				if (i != 0)
2960 					kprintf("\n");
2961 
2962 				kprintf("[0x%lx]  ", address + i * itemSize);
2963 
2964 				for (j = 0; j < displayed; j++) {
2965 					char c;
2966 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2967 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2968 						displayed = j;
2969 						break;
2970 					}
2971 					if (!isprint(c))
2972 						c = '.';
2973 
2974 					kprintf("%c", c);
2975 				}
2976 				if (num > displayWidth) {
2977 					// make sure the spacing in the last line is correct
2978 					for (j = displayed; j < displayWidth * itemSize; j++)
2979 						kprintf(" ");
2980 				}
2981 				kprintf("  ");
2982 			}
2983 
2984 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2985 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2986 				kprintf("read fault");
2987 				break;
2988 			}
2989 
2990 			switch (itemSize) {
2991 				case 1:
2992 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2993 					break;
2994 				case 2:
2995 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2996 					break;
2997 				case 4:
2998 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2999 					break;
3000 				case 8:
3001 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3002 					break;
3003 			}
3004 		}
3005 
3006 		kprintf("\n");
3007 	}
3008 
3009 	if (physical) {
3010 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3011 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3012 	}
3013 	return 0;
3014 }
3015 
3016 
3017 static void
3018 dump_cache_tree_recursively(VMCache* cache, int level,
3019 	VMCache* highlightCache)
3020 {
3021 	// print this cache
3022 	for (int i = 0; i < level; i++)
3023 		kprintf("  ");
3024 	if (cache == highlightCache)
3025 		kprintf("%p <--\n", cache);
3026 	else
3027 		kprintf("%p\n", cache);
3028 
3029 	// recursively print its consumers
3030 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3031 			VMCache* consumer = it.Next();) {
3032 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3033 	}
3034 }
3035 
3036 
3037 static int
3038 dump_cache_tree(int argc, char** argv)
3039 {
3040 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3041 		kprintf("usage: %s <address>\n", argv[0]);
3042 		return 0;
3043 	}
3044 
3045 	addr_t address = parse_expression(argv[1]);
3046 	if (address == 0)
3047 		return 0;
3048 
3049 	VMCache* cache = (VMCache*)address;
3050 	VMCache* root = cache;
3051 
3052 	// find the root cache (the transitive source)
3053 	while (root->source != NULL)
3054 		root = root->source;
3055 
3056 	dump_cache_tree_recursively(root, 0, cache);
3057 
3058 	return 0;
3059 }
3060 
3061 
3062 const char*
3063 vm_cache_type_to_string(int32 type)
3064 {
3065 	switch (type) {
3066 		case CACHE_TYPE_RAM:
3067 			return "RAM";
3068 		case CACHE_TYPE_DEVICE:
3069 			return "device";
3070 		case CACHE_TYPE_VNODE:
3071 			return "vnode";
3072 		case CACHE_TYPE_NULL:
3073 			return "null";
3074 
3075 		default:
3076 			return "unknown";
3077 	}
3078 }
3079 
3080 
3081 #if DEBUG_CACHE_LIST
3082 
3083 static void
3084 update_cache_info_recursively(VMCache* cache, cache_info& info)
3085 {
3086 	info.page_count += cache->page_count;
3087 	if (cache->type == CACHE_TYPE_RAM)
3088 		info.committed += cache->committed_size;
3089 
3090 	// recurse
3091 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3092 			VMCache* consumer = it.Next();) {
3093 		update_cache_info_recursively(consumer, info);
3094 	}
3095 }
3096 
3097 
3098 static int
3099 cache_info_compare_page_count(const void* _a, const void* _b)
3100 {
3101 	const cache_info* a = (const cache_info*)_a;
3102 	const cache_info* b = (const cache_info*)_b;
3103 	if (a->page_count == b->page_count)
3104 		return 0;
3105 	return a->page_count < b->page_count ? 1 : -1;
3106 }
3107 
3108 
3109 static int
3110 cache_info_compare_committed(const void* _a, const void* _b)
3111 {
3112 	const cache_info* a = (const cache_info*)_a;
3113 	const cache_info* b = (const cache_info*)_b;
3114 	if (a->committed == b->committed)
3115 		return 0;
3116 	return a->committed < b->committed ? 1 : -1;
3117 }
3118 
3119 
3120 static void
3121 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3122 {
3123 	for (int i = 0; i < level; i++)
3124 		kprintf("  ");
3125 
3126 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3127 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3128 		cache->virtual_base, cache->virtual_end, cache->page_count);
3129 
3130 	if (level == 0)
3131 		kprintf("/%lu", info.page_count);
3132 
3133 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3134 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3135 
3136 		if (level == 0)
3137 			kprintf("/%lu", info.committed);
3138 	}
3139 
3140 	// areas
3141 	if (cache->areas != NULL) {
3142 		VMArea* area = cache->areas;
3143 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3144 			area->name, area->address_space->ID());
3145 
3146 		while (area->cache_next != NULL) {
3147 			area = area->cache_next;
3148 			kprintf(", %" B_PRId32, area->id);
3149 		}
3150 	}
3151 
3152 	kputs("\n");
3153 
3154 	// recurse
3155 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3156 			VMCache* consumer = it.Next();) {
3157 		dump_caches_recursively(consumer, info, level + 1);
3158 	}
3159 }
3160 
3161 
3162 static int
3163 dump_caches(int argc, char** argv)
3164 {
3165 	if (sCacheInfoTable == NULL) {
3166 		kprintf("No cache info table!\n");
3167 		return 0;
3168 	}
3169 
3170 	bool sortByPageCount = true;
3171 
3172 	for (int32 i = 1; i < argc; i++) {
3173 		if (strcmp(argv[i], "-c") == 0) {
3174 			sortByPageCount = false;
3175 		} else {
3176 			print_debugger_command_usage(argv[0]);
3177 			return 0;
3178 		}
3179 	}
3180 
3181 	uint32 totalCount = 0;
3182 	uint32 rootCount = 0;
3183 	off_t totalCommitted = 0;
3184 	page_num_t totalPages = 0;
3185 
3186 	VMCache* cache = gDebugCacheList;
3187 	while (cache) {
3188 		totalCount++;
3189 		if (cache->source == NULL) {
3190 			cache_info stackInfo;
3191 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3192 				? sCacheInfoTable[rootCount] : stackInfo;
3193 			rootCount++;
3194 			info.cache = cache;
3195 			info.page_count = 0;
3196 			info.committed = 0;
3197 			update_cache_info_recursively(cache, info);
3198 			totalCommitted += info.committed;
3199 			totalPages += info.page_count;
3200 		}
3201 
3202 		cache = cache->debug_next;
3203 	}
3204 
3205 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3206 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3207 			sortByPageCount
3208 				? &cache_info_compare_page_count
3209 				: &cache_info_compare_committed);
3210 	}
3211 
3212 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3213 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3214 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3215 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3216 			"page count" : "committed size");
3217 
3218 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3219 		for (uint32 i = 0; i < rootCount; i++) {
3220 			cache_info& info = sCacheInfoTable[i];
3221 			dump_caches_recursively(info.cache, info, 0);
3222 		}
3223 	} else
3224 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3225 
3226 	return 0;
3227 }
3228 
3229 #endif	// DEBUG_CACHE_LIST
3230 
3231 
3232 static int
3233 dump_cache(int argc, char** argv)
3234 {
3235 	VMCache* cache;
3236 	bool showPages = false;
3237 	int i = 1;
3238 
3239 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3240 		kprintf("usage: %s [-ps] <address>\n"
3241 			"  if -p is specified, all pages are shown, if -s is used\n"
3242 			"  only the cache info is shown respectively.\n", argv[0]);
3243 		return 0;
3244 	}
3245 	while (argv[i][0] == '-') {
3246 		char* arg = argv[i] + 1;
3247 		while (arg[0]) {
3248 			if (arg[0] == 'p')
3249 				showPages = true;
3250 			arg++;
3251 		}
3252 		i++;
3253 	}
3254 	if (argv[i] == NULL) {
3255 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3256 		return 0;
3257 	}
3258 
3259 	addr_t address = parse_expression(argv[i]);
3260 	if (address == 0)
3261 		return 0;
3262 
3263 	cache = (VMCache*)address;
3264 
3265 	cache->Dump(showPages);
3266 
3267 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3268 
3269 	return 0;
3270 }
3271 
3272 
3273 static void
3274 dump_area_struct(VMArea* area, bool mappings)
3275 {
3276 	kprintf("AREA: %p\n", area);
3277 	kprintf("name:\t\t'%s'\n", area->name);
3278 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3279 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3280 	kprintf("base:\t\t0x%lx\n", area->Base());
3281 	kprintf("size:\t\t0x%lx\n", area->Size());
3282 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3283 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3284 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3285 	kprintf("cache:\t\t%p\n", area->cache);
3286 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3287 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3288 	kprintf("cache_next:\t%p\n", area->cache_next);
3289 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3290 
3291 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3292 	if (mappings) {
3293 		kprintf("page mappings:\n");
3294 		while (iterator.HasNext()) {
3295 			vm_page_mapping* mapping = iterator.Next();
3296 			kprintf("  %p", mapping->page);
3297 		}
3298 		kprintf("\n");
3299 	} else {
3300 		uint32 count = 0;
3301 		while (iterator.Next() != NULL) {
3302 			count++;
3303 		}
3304 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3305 	}
3306 }
3307 
3308 
3309 static int
3310 dump_area(int argc, char** argv)
3311 {
3312 	bool mappings = false;
3313 	bool found = false;
3314 	int32 index = 1;
3315 	VMArea* area;
3316 	addr_t num;
3317 
3318 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3319 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3320 			"All areas matching either id/address/name are listed. You can\n"
3321 			"force to check only a specific item by prefixing the specifier\n"
3322 			"with the id/contains/address/name keywords.\n"
3323 			"-m shows the area's mappings as well.\n");
3324 		return 0;
3325 	}
3326 
3327 	if (!strcmp(argv[1], "-m")) {
3328 		mappings = true;
3329 		index++;
3330 	}
3331 
3332 	int32 mode = 0xf;
3333 	if (!strcmp(argv[index], "id"))
3334 		mode = 1;
3335 	else if (!strcmp(argv[index], "contains"))
3336 		mode = 2;
3337 	else if (!strcmp(argv[index], "name"))
3338 		mode = 4;
3339 	else if (!strcmp(argv[index], "address"))
3340 		mode = 0;
3341 	if (mode != 0xf)
3342 		index++;
3343 
3344 	if (index >= argc) {
3345 		kprintf("No area specifier given.\n");
3346 		return 0;
3347 	}
3348 
3349 	num = parse_expression(argv[index]);
3350 
3351 	if (mode == 0) {
3352 		dump_area_struct((struct VMArea*)num, mappings);
3353 	} else {
3354 		// walk through the area list, looking for the arguments as a name
3355 
3356 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3357 		while ((area = it.Next()) != NULL) {
3358 			if (((mode & 4) != 0 && area->name != NULL
3359 					&& !strcmp(argv[index], area->name))
3360 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3361 					|| (((mode & 2) != 0 && area->Base() <= num
3362 						&& area->Base() + area->Size() > num))))) {
3363 				dump_area_struct(area, mappings);
3364 				found = true;
3365 			}
3366 		}
3367 
3368 		if (!found)
3369 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3370 	}
3371 
3372 	return 0;
3373 }
3374 
3375 
3376 static int
3377 dump_area_list(int argc, char** argv)
3378 {
3379 	VMArea* area;
3380 	const char* name = NULL;
3381 	int32 id = 0;
3382 
3383 	if (argc > 1) {
3384 		id = parse_expression(argv[1]);
3385 		if (id == 0)
3386 			name = argv[1];
3387 	}
3388 
3389 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3390 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3391 		B_PRINTF_POINTER_WIDTH, "size");
3392 
3393 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3394 	while ((area = it.Next()) != NULL) {
3395 		if ((id != 0 && area->address_space->ID() != id)
3396 			|| (name != NULL && strstr(area->name, name) == NULL))
3397 			continue;
3398 
3399 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3400 			area->id, (void*)area->Base(), (void*)area->Size(),
3401 			area->protection, area->wiring, area->name);
3402 	}
3403 	return 0;
3404 }
3405 
3406 
3407 static int
3408 dump_available_memory(int argc, char** argv)
3409 {
3410 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3411 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3412 	return 0;
3413 }
3414 
3415 
3416 static int
3417 dump_mapping_info(int argc, char** argv)
3418 {
3419 	bool reverseLookup = false;
3420 	bool pageLookup = false;
3421 
3422 	int argi = 1;
3423 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3424 		const char* arg = argv[argi];
3425 		if (strcmp(arg, "-r") == 0) {
3426 			reverseLookup = true;
3427 		} else if (strcmp(arg, "-p") == 0) {
3428 			reverseLookup = true;
3429 			pageLookup = true;
3430 		} else {
3431 			print_debugger_command_usage(argv[0]);
3432 			return 0;
3433 		}
3434 	}
3435 
3436 	// We need at least one argument, the address. Optionally a thread ID can be
3437 	// specified.
3438 	if (argi >= argc || argi + 2 < argc) {
3439 		print_debugger_command_usage(argv[0]);
3440 		return 0;
3441 	}
3442 
3443 	uint64 addressValue;
3444 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3445 		return 0;
3446 
3447 	Team* team = NULL;
3448 	if (argi < argc) {
3449 		uint64 threadID;
3450 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3451 			return 0;
3452 
3453 		Thread* thread = Thread::GetDebug(threadID);
3454 		if (thread == NULL) {
3455 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3456 			return 0;
3457 		}
3458 
3459 		team = thread->team;
3460 	}
3461 
3462 	if (reverseLookup) {
3463 		phys_addr_t physicalAddress;
3464 		if (pageLookup) {
3465 			vm_page* page = (vm_page*)(addr_t)addressValue;
3466 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3467 		} else {
3468 			physicalAddress = (phys_addr_t)addressValue;
3469 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3470 		}
3471 
3472 		kprintf("    Team     Virtual Address      Area\n");
3473 		kprintf("--------------------------------------\n");
3474 
3475 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3476 			Callback()
3477 				:
3478 				fAddressSpace(NULL)
3479 			{
3480 			}
3481 
3482 			void SetAddressSpace(VMAddressSpace* addressSpace)
3483 			{
3484 				fAddressSpace = addressSpace;
3485 			}
3486 
3487 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3488 			{
3489 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3490 					virtualAddress);
3491 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3492 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3493 				else
3494 					kprintf("\n");
3495 				return false;
3496 			}
3497 
3498 		private:
3499 			VMAddressSpace*	fAddressSpace;
3500 		} callback;
3501 
3502 		if (team != NULL) {
3503 			// team specified -- get its address space
3504 			VMAddressSpace* addressSpace = team->address_space;
3505 			if (addressSpace == NULL) {
3506 				kprintf("Failed to get address space!\n");
3507 				return 0;
3508 			}
3509 
3510 			callback.SetAddressSpace(addressSpace);
3511 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3512 				physicalAddress, callback);
3513 		} else {
3514 			// no team specified -- iterate through all address spaces
3515 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3516 				addressSpace != NULL;
3517 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3518 				callback.SetAddressSpace(addressSpace);
3519 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3520 					physicalAddress, callback);
3521 			}
3522 		}
3523 	} else {
3524 		// get the address space
3525 		addr_t virtualAddress = (addr_t)addressValue;
3526 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3527 		VMAddressSpace* addressSpace;
3528 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3529 			addressSpace = VMAddressSpace::Kernel();
3530 		} else if (team != NULL) {
3531 			addressSpace = team->address_space;
3532 		} else {
3533 			Thread* thread = debug_get_debugged_thread();
3534 			if (thread == NULL || thread->team == NULL) {
3535 				kprintf("Failed to get team!\n");
3536 				return 0;
3537 			}
3538 
3539 			addressSpace = thread->team->address_space;
3540 		}
3541 
3542 		if (addressSpace == NULL) {
3543 			kprintf("Failed to get address space!\n");
3544 			return 0;
3545 		}
3546 
3547 		// let the translation map implementation do the job
3548 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3549 	}
3550 
3551 	return 0;
3552 }
3553 
3554 
3555 /*!	Deletes all areas and reserved regions in the given address space.
3556 
3557 	The caller must ensure that none of the areas has any wired ranges.
3558 
3559 	\param addressSpace The address space.
3560 	\param deletingAddressSpace \c true, if the address space is in the process
3561 		of being deleted.
3562 */
3563 void
3564 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3565 {
3566 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3567 		addressSpace->ID()));
3568 
3569 	addressSpace->WriteLock();
3570 
3571 	// remove all reserved areas in this address space
3572 	addressSpace->UnreserveAllAddressRanges(0);
3573 
3574 	// delete all the areas in this address space
3575 	while (VMArea* area = addressSpace->FirstArea()) {
3576 		ASSERT(!area->IsWired());
3577 		delete_area(addressSpace, area, deletingAddressSpace);
3578 	}
3579 
3580 	addressSpace->WriteUnlock();
3581 }
3582 
3583 
3584 static area_id
3585 vm_area_for(addr_t address, bool kernel)
3586 {
3587 	team_id team;
3588 	if (IS_USER_ADDRESS(address)) {
3589 		// we try the user team address space, if any
3590 		team = VMAddressSpace::CurrentID();
3591 		if (team < 0)
3592 			return team;
3593 	} else
3594 		team = VMAddressSpace::KernelID();
3595 
3596 	AddressSpaceReadLocker locker(team);
3597 	if (!locker.IsLocked())
3598 		return B_BAD_TEAM_ID;
3599 
3600 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3601 	if (area != NULL) {
3602 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3603 			return B_ERROR;
3604 
3605 		return area->id;
3606 	}
3607 
3608 	return B_ERROR;
3609 }
3610 
3611 
3612 /*!	Frees physical pages that were used during the boot process.
3613 	\a end is inclusive.
3614 */
3615 static void
3616 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3617 {
3618 	// free all physical pages in the specified range
3619 
3620 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3621 		phys_addr_t physicalAddress;
3622 		uint32 flags;
3623 
3624 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3625 			&& (flags & PAGE_PRESENT) != 0) {
3626 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3627 			if (page != NULL && page->State() != PAGE_STATE_FREE
3628 					 && page->State() != PAGE_STATE_CLEAR
3629 					 && page->State() != PAGE_STATE_UNUSED) {
3630 				DEBUG_PAGE_ACCESS_START(page);
3631 				vm_page_set_state(page, PAGE_STATE_FREE);
3632 			}
3633 		}
3634 	}
3635 
3636 	// unmap the memory
3637 	map->Unmap(start, end);
3638 }
3639 
3640 
3641 void
3642 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3643 {
3644 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3645 	addr_t end = start + (size - 1);
3646 	addr_t lastEnd = start;
3647 
3648 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3649 		(void*)start, (void*)end));
3650 
3651 	// The areas are sorted in virtual address space order, so
3652 	// we just have to find the holes between them that fall
3653 	// into the area we should dispose
3654 
3655 	map->Lock();
3656 
3657 	for (VMAddressSpace::AreaIterator it
3658 				= VMAddressSpace::Kernel()->GetAreaIterator();
3659 			VMArea* area = it.Next();) {
3660 		addr_t areaStart = area->Base();
3661 		addr_t areaEnd = areaStart + (area->Size() - 1);
3662 
3663 		if (areaEnd < start)
3664 			continue;
3665 
3666 		if (areaStart > end) {
3667 			// we are done, the area is already beyond of what we have to free
3668 			break;
3669 		}
3670 
3671 		if (areaStart > lastEnd) {
3672 			// this is something we can free
3673 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3674 				(void*)areaStart));
3675 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3676 		}
3677 
3678 		if (areaEnd >= end) {
3679 			lastEnd = areaEnd;
3680 				// no +1 to prevent potential overflow
3681 			break;
3682 		}
3683 
3684 		lastEnd = areaEnd + 1;
3685 	}
3686 
3687 	if (lastEnd < end) {
3688 		// we can also get rid of some space at the end of the area
3689 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3690 			(void*)end));
3691 		unmap_and_free_physical_pages(map, lastEnd, end);
3692 	}
3693 
3694 	map->Unlock();
3695 }
3696 
3697 
3698 static void
3699 create_preloaded_image_areas(struct preloaded_image* _image)
3700 {
3701 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3702 	char name[B_OS_NAME_LENGTH];
3703 	void* address;
3704 	int32 length;
3705 
3706 	// use file name to create a good area name
3707 	char* fileName = strrchr(image->name, '/');
3708 	if (fileName == NULL)
3709 		fileName = image->name;
3710 	else
3711 		fileName++;
3712 
3713 	length = strlen(fileName);
3714 	// make sure there is enough space for the suffix
3715 	if (length > 25)
3716 		length = 25;
3717 
3718 	memcpy(name, fileName, length);
3719 	strcpy(name + length, "_text");
3720 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3721 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3722 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3723 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3724 		// this will later be remapped read-only/executable by the
3725 		// ELF initialization code
3726 
3727 	strcpy(name + length, "_data");
3728 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3729 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3730 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3731 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3732 }
3733 
3734 
3735 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3736 	Any boot loader resources contained in that arguments must not be accessed
3737 	anymore past this point.
3738 */
3739 void
3740 vm_free_kernel_args(kernel_args* args)
3741 {
3742 	uint32 i;
3743 
3744 	TRACE(("vm_free_kernel_args()\n"));
3745 
3746 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3747 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3748 		if (area >= B_OK)
3749 			delete_area(area);
3750 	}
3751 }
3752 
3753 
3754 static void
3755 allocate_kernel_args(kernel_args* args)
3756 {
3757 	TRACE(("allocate_kernel_args()\n"));
3758 
3759 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3760 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3761 
3762 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3763 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3764 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3765 	}
3766 }
3767 
3768 
3769 static void
3770 unreserve_boot_loader_ranges(kernel_args* args)
3771 {
3772 	TRACE(("unreserve_boot_loader_ranges()\n"));
3773 
3774 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3775 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3776 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3777 			args->virtual_allocated_range[i].size);
3778 	}
3779 }
3780 
3781 
3782 static void
3783 reserve_boot_loader_ranges(kernel_args* args)
3784 {
3785 	TRACE(("reserve_boot_loader_ranges()\n"));
3786 
3787 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3788 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3789 
3790 		// If the address is no kernel address, we just skip it. The
3791 		// architecture specific code has to deal with it.
3792 		if (!IS_KERNEL_ADDRESS(address)) {
3793 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3794 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3795 			continue;
3796 		}
3797 
3798 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3799 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3800 		if (status < B_OK)
3801 			panic("could not reserve boot loader ranges\n");
3802 	}
3803 }
3804 
3805 
3806 static addr_t
3807 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3808 {
3809 	size = PAGE_ALIGN(size);
3810 
3811 	// find a slot in the virtual allocation addr range
3812 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3813 		// check to see if the space between this one and the last is big enough
3814 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3815 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3816 			+ args->virtual_allocated_range[i - 1].size;
3817 
3818 		addr_t base = alignment > 0
3819 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3820 
3821 		if (base >= KERNEL_BASE && base < rangeStart
3822 				&& rangeStart - base >= size) {
3823 			args->virtual_allocated_range[i - 1].size
3824 				+= base + size - previousRangeEnd;
3825 			return base;
3826 		}
3827 	}
3828 
3829 	// we hadn't found one between allocation ranges. this is ok.
3830 	// see if there's a gap after the last one
3831 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3832 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3833 		+ args->virtual_allocated_range[lastEntryIndex].size;
3834 	addr_t base = alignment > 0
3835 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3836 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3837 		args->virtual_allocated_range[lastEntryIndex].size
3838 			+= base + size - lastRangeEnd;
3839 		return base;
3840 	}
3841 
3842 	// see if there's a gap before the first one
3843 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3844 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3845 		base = rangeStart - size;
3846 		if (alignment > 0)
3847 			base = ROUNDDOWN(base, alignment);
3848 
3849 		if (base >= KERNEL_BASE) {
3850 			args->virtual_allocated_range[0].start = base;
3851 			args->virtual_allocated_range[0].size += rangeStart - base;
3852 			return base;
3853 		}
3854 	}
3855 
3856 	return 0;
3857 }
3858 
3859 
3860 static bool
3861 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3862 {
3863 	// TODO: horrible brute-force method of determining if the page can be
3864 	// allocated
3865 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3866 		if (address >= args->physical_memory_range[i].start
3867 			&& address < args->physical_memory_range[i].start
3868 				+ args->physical_memory_range[i].size)
3869 			return true;
3870 	}
3871 	return false;
3872 }
3873 
3874 
3875 page_num_t
3876 vm_allocate_early_physical_page(kernel_args* args)
3877 {
3878 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3879 		phys_addr_t nextPage;
3880 
3881 		nextPage = args->physical_allocated_range[i].start
3882 			+ args->physical_allocated_range[i].size;
3883 		// see if the page after the next allocated paddr run can be allocated
3884 		if (i + 1 < args->num_physical_allocated_ranges
3885 			&& args->physical_allocated_range[i + 1].size != 0) {
3886 			// see if the next page will collide with the next allocated range
3887 			if (nextPage >= args->physical_allocated_range[i+1].start)
3888 				continue;
3889 		}
3890 		// see if the next physical page fits in the memory block
3891 		if (is_page_in_physical_memory_range(args, nextPage)) {
3892 			// we got one!
3893 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3894 			return nextPage / B_PAGE_SIZE;
3895 		}
3896 	}
3897 
3898 	// Expanding upwards didn't work, try going downwards.
3899 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3900 		phys_addr_t nextPage;
3901 
3902 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3903 		// see if the page after the prev allocated paddr run can be allocated
3904 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3905 			// see if the next page will collide with the next allocated range
3906 			if (nextPage < args->physical_allocated_range[i-1].start
3907 				+ args->physical_allocated_range[i-1].size)
3908 				continue;
3909 		}
3910 		// see if the next physical page fits in the memory block
3911 		if (is_page_in_physical_memory_range(args, nextPage)) {
3912 			// we got one!
3913 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3914 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3915 			return nextPage / B_PAGE_SIZE;
3916 		}
3917 	}
3918 
3919 	return 0;
3920 		// could not allocate a block
3921 }
3922 
3923 
3924 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3925 	allocate some pages before the VM is completely up.
3926 */
3927 addr_t
3928 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3929 	uint32 attributes, addr_t alignment)
3930 {
3931 	if (physicalSize > virtualSize)
3932 		physicalSize = virtualSize;
3933 
3934 	// find the vaddr to allocate at
3935 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3936 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3937 	if (virtualBase == 0) {
3938 		panic("vm_allocate_early: could not allocate virtual address\n");
3939 		return 0;
3940 	}
3941 
3942 	// map the pages
3943 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3944 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3945 		if (physicalAddress == 0)
3946 			panic("error allocating early page!\n");
3947 
3948 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3949 
3950 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3951 			physicalAddress * B_PAGE_SIZE, attributes,
3952 			&vm_allocate_early_physical_page);
3953 	}
3954 
3955 	return virtualBase;
3956 }
3957 
3958 
3959 /*!	The main entrance point to initialize the VM. */
3960 status_t
3961 vm_init(kernel_args* args)
3962 {
3963 	struct preloaded_image* image;
3964 	void* address;
3965 	status_t err = 0;
3966 	uint32 i;
3967 
3968 	TRACE(("vm_init: entry\n"));
3969 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3970 	err = arch_vm_init(args);
3971 
3972 	// initialize some globals
3973 	vm_page_init_num_pages(args);
3974 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3975 
3976 	slab_init(args);
3977 
3978 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3979 	off_t heapSize = INITIAL_HEAP_SIZE;
3980 	// try to accomodate low memory systems
3981 	while (heapSize > sAvailableMemory / 8)
3982 		heapSize /= 2;
3983 	if (heapSize < 1024 * 1024)
3984 		panic("vm_init: go buy some RAM please.");
3985 
3986 	// map in the new heap and initialize it
3987 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3988 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3989 	TRACE(("heap at 0x%lx\n", heapBase));
3990 	heap_init(heapBase, heapSize);
3991 #endif
3992 
3993 	// initialize the free page list and physical page mapper
3994 	vm_page_init(args);
3995 
3996 	// initialize the cache allocators
3997 	vm_cache_init(args);
3998 
3999 	{
4000 		status_t error = VMAreaHash::Init();
4001 		if (error != B_OK)
4002 			panic("vm_init: error initializing area hash table\n");
4003 	}
4004 
4005 	VMAddressSpace::Init();
4006 	reserve_boot_loader_ranges(args);
4007 
4008 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4009 	heap_init_post_area();
4010 #endif
4011 
4012 	// Do any further initialization that the architecture dependant layers may
4013 	// need now
4014 	arch_vm_translation_map_init_post_area(args);
4015 	arch_vm_init_post_area(args);
4016 	vm_page_init_post_area(args);
4017 	slab_init_post_area();
4018 
4019 	// allocate areas to represent stuff that already exists
4020 
4021 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4022 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4023 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4024 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4025 #endif
4026 
4027 	allocate_kernel_args(args);
4028 
4029 	create_preloaded_image_areas(args->kernel_image);
4030 
4031 	// allocate areas for preloaded images
4032 	for (image = args->preloaded_images; image != NULL; image = image->next)
4033 		create_preloaded_image_areas(image);
4034 
4035 	// allocate kernel stacks
4036 	for (i = 0; i < args->num_cpus; i++) {
4037 		char name[64];
4038 
4039 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4040 		address = (void*)args->cpu_kstack[i].start;
4041 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4042 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4043 	}
4044 
4045 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4046 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4047 
4048 #if PARANOID_KERNEL_MALLOC
4049 	vm_block_address_range("uninitialized heap memory",
4050 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4051 #endif
4052 #if PARANOID_KERNEL_FREE
4053 	vm_block_address_range("freed heap memory",
4054 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4055 #endif
4056 
4057 	// create the object cache for the page mappings
4058 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4059 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4060 		NULL, NULL);
4061 	if (gPageMappingsObjectCache == NULL)
4062 		panic("failed to create page mappings object cache");
4063 
4064 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4065 
4066 #if DEBUG_CACHE_LIST
4067 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4068 		virtual_address_restrictions virtualRestrictions = {};
4069 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4070 		physical_address_restrictions physicalRestrictions = {};
4071 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4072 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4073 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4074 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4075 			&physicalRestrictions, (void**)&sCacheInfoTable);
4076 	}
4077 #endif	// DEBUG_CACHE_LIST
4078 
4079 	// add some debugger commands
4080 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4081 	add_debugger_command("area", &dump_area,
4082 		"Dump info about a particular area");
4083 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4084 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4085 #if DEBUG_CACHE_LIST
4086 	if (sCacheInfoTable != NULL) {
4087 		add_debugger_command_etc("caches", &dump_caches,
4088 			"List all VMCache trees",
4089 			"[ \"-c\" ]\n"
4090 			"All cache trees are listed sorted in decreasing order by number "
4091 				"of\n"
4092 			"used pages or, if \"-c\" is specified, by size of committed "
4093 				"memory.\n",
4094 			0);
4095 	}
4096 #endif
4097 	add_debugger_command("avail", &dump_available_memory,
4098 		"Dump available memory");
4099 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4100 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4101 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4102 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4103 	add_debugger_command("string", &display_mem, "dump strings");
4104 
4105 	add_debugger_command_etc("mapping", &dump_mapping_info,
4106 		"Print address mapping information",
4107 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4108 		"Prints low-level page mapping information for a given address. If\n"
4109 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4110 		"address that is looked up in the translation map of the current\n"
4111 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4112 		"\"-r\" is specified, <address> is a physical address that is\n"
4113 		"searched in the translation map of all teams, respectively the team\n"
4114 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4115 		"<address> is the address of a vm_page structure. The behavior is\n"
4116 		"equivalent to specifying \"-r\" with the physical address of that\n"
4117 		"page.\n",
4118 		0);
4119 
4120 	TRACE(("vm_init: exit\n"));
4121 
4122 	vm_cache_init_post_heap();
4123 
4124 	return err;
4125 }
4126 
4127 
4128 status_t
4129 vm_init_post_sem(kernel_args* args)
4130 {
4131 	// This frees all unused boot loader resources and makes its space available
4132 	// again
4133 	arch_vm_init_end(args);
4134 	unreserve_boot_loader_ranges(args);
4135 
4136 	// fill in all of the semaphores that were not allocated before
4137 	// since we're still single threaded and only the kernel address space
4138 	// exists, it isn't that hard to find all of the ones we need to create
4139 
4140 	arch_vm_translation_map_init_post_sem(args);
4141 
4142 	slab_init_post_sem();
4143 
4144 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4145 	heap_init_post_sem();
4146 #endif
4147 
4148 	return B_OK;
4149 }
4150 
4151 
4152 status_t
4153 vm_init_post_thread(kernel_args* args)
4154 {
4155 	vm_page_init_post_thread(args);
4156 	slab_init_post_thread();
4157 	return heap_init_post_thread();
4158 }
4159 
4160 
4161 status_t
4162 vm_init_post_modules(kernel_args* args)
4163 {
4164 	return arch_vm_init_post_modules(args);
4165 }
4166 
4167 
4168 void
4169 permit_page_faults(void)
4170 {
4171 	Thread* thread = thread_get_current_thread();
4172 	if (thread != NULL)
4173 		atomic_add(&thread->page_faults_allowed, 1);
4174 }
4175 
4176 
4177 void
4178 forbid_page_faults(void)
4179 {
4180 	Thread* thread = thread_get_current_thread();
4181 	if (thread != NULL)
4182 		atomic_add(&thread->page_faults_allowed, -1);
4183 }
4184 
4185 
4186 status_t
4187 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4188 	bool isUser, addr_t* newIP)
4189 {
4190 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4191 		faultAddress));
4192 
4193 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4194 
4195 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4196 	VMAddressSpace* addressSpace = NULL;
4197 
4198 	status_t status = B_OK;
4199 	*newIP = 0;
4200 	atomic_add((int32*)&sPageFaults, 1);
4201 
4202 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4203 		addressSpace = VMAddressSpace::GetKernel();
4204 	} else if (IS_USER_ADDRESS(pageAddress)) {
4205 		addressSpace = VMAddressSpace::GetCurrent();
4206 		if (addressSpace == NULL) {
4207 			if (!isUser) {
4208 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4209 					"memory!\n");
4210 				status = B_BAD_ADDRESS;
4211 				TPF(PageFaultError(-1,
4212 					VMPageFaultTracing
4213 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4214 			} else {
4215 				// XXX weird state.
4216 				panic("vm_page_fault: non kernel thread accessing user memory "
4217 					"that doesn't exist!\n");
4218 				status = B_BAD_ADDRESS;
4219 			}
4220 		}
4221 	} else {
4222 		// the hit was probably in the 64k DMZ between kernel and user space
4223 		// this keeps a user space thread from passing a buffer that crosses
4224 		// into kernel space
4225 		status = B_BAD_ADDRESS;
4226 		TPF(PageFaultError(-1,
4227 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4228 	}
4229 
4230 	if (status == B_OK) {
4231 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4232 			isUser, NULL);
4233 	}
4234 
4235 	if (status < B_OK) {
4236 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4237 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4238 			strerror(status), address, faultAddress, isWrite, isUser,
4239 			thread_get_current_thread_id());
4240 		if (!isUser) {
4241 			Thread* thread = thread_get_current_thread();
4242 			if (thread != NULL && thread->fault_handler != 0) {
4243 				// this will cause the arch dependant page fault handler to
4244 				// modify the IP on the interrupt frame or whatever to return
4245 				// to this address
4246 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4247 			} else {
4248 				// unhandled page fault in the kernel
4249 				panic("vm_page_fault: unhandled page fault in kernel space at "
4250 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4251 			}
4252 		} else {
4253 			Thread* thread = thread_get_current_thread();
4254 
4255 #ifdef TRACE_FAULTS
4256 			VMArea* area = NULL;
4257 			if (addressSpace != NULL) {
4258 				addressSpace->ReadLock();
4259 				area = addressSpace->LookupArea(faultAddress);
4260 			}
4261 
4262 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4263 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4264 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4265 				thread->team->Name(), thread->team->id,
4266 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4267 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4268 					area->Base() : 0x0));
4269 
4270 			if (addressSpace != NULL)
4271 				addressSpace->ReadUnlock();
4272 #endif
4273 
4274 			// If the thread has a signal handler for SIGSEGV, we simply
4275 			// send it the signal. Otherwise we notify the user debugger
4276 			// first.
4277 			struct sigaction action;
4278 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4279 					&& action.sa_handler != SIG_DFL
4280 					&& action.sa_handler != SIG_IGN)
4281 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4282 					SIGSEGV)) {
4283 				Signal signal(SIGSEGV,
4284 					status == B_PERMISSION_DENIED
4285 						? SEGV_ACCERR : SEGV_MAPERR,
4286 					EFAULT, thread->team->id);
4287 				signal.SetAddress((void*)address);
4288 				send_signal_to_thread(thread, signal, 0);
4289 			}
4290 		}
4291 	}
4292 
4293 	if (addressSpace != NULL)
4294 		addressSpace->Put();
4295 
4296 	return B_HANDLED_INTERRUPT;
4297 }
4298 
4299 
4300 struct PageFaultContext {
4301 	AddressSpaceReadLocker	addressSpaceLocker;
4302 	VMCacheChainLocker		cacheChainLocker;
4303 
4304 	VMTranslationMap*		map;
4305 	VMCache*				topCache;
4306 	off_t					cacheOffset;
4307 	vm_page_reservation		reservation;
4308 	bool					isWrite;
4309 
4310 	// return values
4311 	vm_page*				page;
4312 	bool					restart;
4313 	bool					pageAllocated;
4314 
4315 
4316 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4317 		:
4318 		addressSpaceLocker(addressSpace, true),
4319 		map(addressSpace->TranslationMap()),
4320 		isWrite(isWrite)
4321 	{
4322 	}
4323 
4324 	~PageFaultContext()
4325 	{
4326 		UnlockAll();
4327 		vm_page_unreserve_pages(&reservation);
4328 	}
4329 
4330 	void Prepare(VMCache* topCache, off_t cacheOffset)
4331 	{
4332 		this->topCache = topCache;
4333 		this->cacheOffset = cacheOffset;
4334 		page = NULL;
4335 		restart = false;
4336 		pageAllocated = false;
4337 
4338 		cacheChainLocker.SetTo(topCache);
4339 	}
4340 
4341 	void UnlockAll(VMCache* exceptCache = NULL)
4342 	{
4343 		topCache = NULL;
4344 		addressSpaceLocker.Unlock();
4345 		cacheChainLocker.Unlock(exceptCache);
4346 	}
4347 };
4348 
4349 
4350 /*!	Gets the page that should be mapped into the area.
4351 	Returns an error code other than \c B_OK, if the page couldn't be found or
4352 	paged in. The locking state of the address space and the caches is undefined
4353 	in that case.
4354 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4355 	had to unlock the address space and all caches and is supposed to be called
4356 	again.
4357 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4358 	found. It is returned in \c context.page. The address space will still be
4359 	locked as well as all caches starting from the top cache to at least the
4360 	cache the page lives in.
4361 */
4362 static status_t
4363 fault_get_page(PageFaultContext& context)
4364 {
4365 	VMCache* cache = context.topCache;
4366 	VMCache* lastCache = NULL;
4367 	vm_page* page = NULL;
4368 
4369 	while (cache != NULL) {
4370 		// We already hold the lock of the cache at this point.
4371 
4372 		lastCache = cache;
4373 
4374 		page = cache->LookupPage(context.cacheOffset);
4375 		if (page != NULL && page->busy) {
4376 			// page must be busy -- wait for it to become unbusy
4377 			context.UnlockAll(cache);
4378 			cache->ReleaseRefLocked();
4379 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4380 
4381 			// restart the whole process
4382 			context.restart = true;
4383 			return B_OK;
4384 		}
4385 
4386 		if (page != NULL)
4387 			break;
4388 
4389 		// The current cache does not contain the page we're looking for.
4390 
4391 		// see if the backing store has it
4392 		if (cache->HasPage(context.cacheOffset)) {
4393 			// insert a fresh page and mark it busy -- we're going to read it in
4394 			page = vm_page_allocate_page(&context.reservation,
4395 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4396 			cache->InsertPage(page, context.cacheOffset);
4397 
4398 			// We need to unlock all caches and the address space while reading
4399 			// the page in. Keep a reference to the cache around.
4400 			cache->AcquireRefLocked();
4401 			context.UnlockAll();
4402 
4403 			// read the page in
4404 			generic_io_vec vec;
4405 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4406 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4407 
4408 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4409 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4410 
4411 			cache->Lock();
4412 
4413 			if (status < B_OK) {
4414 				// on error remove and free the page
4415 				dprintf("reading page from cache %p returned: %s!\n",
4416 					cache, strerror(status));
4417 
4418 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4419 				cache->RemovePage(page);
4420 				vm_page_set_state(page, PAGE_STATE_FREE);
4421 
4422 				cache->ReleaseRefAndUnlock();
4423 				return status;
4424 			}
4425 
4426 			// mark the page unbusy again
4427 			cache->MarkPageUnbusy(page);
4428 
4429 			DEBUG_PAGE_ACCESS_END(page);
4430 
4431 			// Since we needed to unlock everything temporarily, the area
4432 			// situation might have changed. So we need to restart the whole
4433 			// process.
4434 			cache->ReleaseRefAndUnlock();
4435 			context.restart = true;
4436 			return B_OK;
4437 		}
4438 
4439 		cache = context.cacheChainLocker.LockSourceCache();
4440 	}
4441 
4442 	if (page == NULL) {
4443 		// There was no adequate page, determine the cache for a clean one.
4444 		// Read-only pages come in the deepest cache, only the top most cache
4445 		// may have direct write access.
4446 		cache = context.isWrite ? context.topCache : lastCache;
4447 
4448 		// allocate a clean page
4449 		page = vm_page_allocate_page(&context.reservation,
4450 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4451 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4452 			page->physical_page_number));
4453 
4454 		// insert the new page into our cache
4455 		cache->InsertPage(page, context.cacheOffset);
4456 		context.pageAllocated = true;
4457 	} else if (page->Cache() != context.topCache && context.isWrite) {
4458 		// We have a page that has the data we want, but in the wrong cache
4459 		// object so we need to copy it and stick it into the top cache.
4460 		vm_page* sourcePage = page;
4461 
4462 		// TODO: If memory is low, it might be a good idea to steal the page
4463 		// from our source cache -- if possible, that is.
4464 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4465 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4466 
4467 		// To not needlessly kill concurrency we unlock all caches but the top
4468 		// one while copying the page. Lacking another mechanism to ensure that
4469 		// the source page doesn't disappear, we mark it busy.
4470 		sourcePage->busy = true;
4471 		context.cacheChainLocker.UnlockKeepRefs(true);
4472 
4473 		// copy the page
4474 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4475 			sourcePage->physical_page_number * B_PAGE_SIZE);
4476 
4477 		context.cacheChainLocker.RelockCaches(true);
4478 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4479 
4480 		// insert the new page into our cache
4481 		context.topCache->InsertPage(page, context.cacheOffset);
4482 		context.pageAllocated = true;
4483 	} else
4484 		DEBUG_PAGE_ACCESS_START(page);
4485 
4486 	context.page = page;
4487 	return B_OK;
4488 }
4489 
4490 
4491 /*!	Makes sure the address in the given address space is mapped.
4492 
4493 	\param addressSpace The address space.
4494 	\param originalAddress The address. Doesn't need to be page aligned.
4495 	\param isWrite If \c true the address shall be write-accessible.
4496 	\param isUser If \c true the access is requested by a userland team.
4497 	\param wirePage On success, if non \c NULL, the wired count of the page
4498 		mapped at the given address is incremented and the page is returned
4499 		via this parameter.
4500 	\return \c B_OK on success, another error code otherwise.
4501 */
4502 static status_t
4503 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4504 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4505 {
4506 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4507 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4508 		originalAddress, isWrite, isUser));
4509 
4510 	PageFaultContext context(addressSpace, isWrite);
4511 
4512 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4513 	status_t status = B_OK;
4514 
4515 	addressSpace->IncrementFaultCount();
4516 
4517 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4518 	// the pages upfront makes sure we don't have any cache locked, so that the
4519 	// page daemon/thief can do their job without problems.
4520 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4521 		originalAddress);
4522 	context.addressSpaceLocker.Unlock();
4523 	vm_page_reserve_pages(&context.reservation, reservePages,
4524 		addressSpace == VMAddressSpace::Kernel()
4525 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4526 
4527 	while (true) {
4528 		context.addressSpaceLocker.Lock();
4529 
4530 		// get the area the fault was in
4531 		VMArea* area = addressSpace->LookupArea(address);
4532 		if (area == NULL) {
4533 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4534 				"space\n", originalAddress);
4535 			TPF(PageFaultError(-1,
4536 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4537 			status = B_BAD_ADDRESS;
4538 			break;
4539 		}
4540 
4541 		// check permissions
4542 		uint32 protection = get_area_page_protection(area, address);
4543 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4544 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4545 				area->id, (void*)originalAddress);
4546 			TPF(PageFaultError(area->id,
4547 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4548 			status = B_PERMISSION_DENIED;
4549 			break;
4550 		}
4551 		if (isWrite && (protection
4552 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4553 			dprintf("write access attempted on write-protected area 0x%"
4554 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4555 			TPF(PageFaultError(area->id,
4556 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4557 			status = B_PERMISSION_DENIED;
4558 			break;
4559 		} else if (isExecute && (protection
4560 				& (B_EXECUTE_AREA
4561 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4562 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4563 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4564 			TPF(PageFaultError(area->id,
4565 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4566 			status = B_PERMISSION_DENIED;
4567 			break;
4568 		} else if (!isWrite && !isExecute && (protection
4569 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4570 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4571 				" at %p\n", area->id, (void*)originalAddress);
4572 			TPF(PageFaultError(area->id,
4573 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4574 			status = B_PERMISSION_DENIED;
4575 			break;
4576 		}
4577 
4578 		// We have the area, it was a valid access, so let's try to resolve the
4579 		// page fault now.
4580 		// At first, the top most cache from the area is investigated.
4581 
4582 		context.Prepare(vm_area_get_locked_cache(area),
4583 			address - area->Base() + area->cache_offset);
4584 
4585 		// See if this cache has a fault handler -- this will do all the work
4586 		// for us.
4587 		{
4588 			// Note, since the page fault is resolved with interrupts enabled,
4589 			// the fault handler could be called more than once for the same
4590 			// reason -- the store must take this into account.
4591 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4592 			if (status != B_BAD_HANDLER)
4593 				break;
4594 		}
4595 
4596 		// The top most cache has no fault handler, so let's see if the cache or
4597 		// its sources already have the page we're searching for (we're going
4598 		// from top to bottom).
4599 		status = fault_get_page(context);
4600 		if (status != B_OK) {
4601 			TPF(PageFaultError(area->id, status));
4602 			break;
4603 		}
4604 
4605 		if (context.restart)
4606 			continue;
4607 
4608 		// All went fine, all there is left to do is to map the page into the
4609 		// address space.
4610 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4611 			context.page));
4612 
4613 		// If the page doesn't reside in the area's cache, we need to make sure
4614 		// it's mapped in read-only, so that we cannot overwrite someone else's
4615 		// data (copy-on-write)
4616 		uint32 newProtection = protection;
4617 		if (context.page->Cache() != context.topCache && !isWrite)
4618 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4619 
4620 		bool unmapPage = false;
4621 		bool mapPage = true;
4622 
4623 		// check whether there's already a page mapped at the address
4624 		context.map->Lock();
4625 
4626 		phys_addr_t physicalAddress;
4627 		uint32 flags;
4628 		vm_page* mappedPage = NULL;
4629 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4630 			&& (flags & PAGE_PRESENT) != 0
4631 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4632 				!= NULL) {
4633 			// Yep there's already a page. If it's ours, we can simply adjust
4634 			// its protection. Otherwise we have to unmap it.
4635 			if (mappedPage == context.page) {
4636 				context.map->ProtectPage(area, address, newProtection);
4637 					// Note: We assume that ProtectPage() is atomic (i.e.
4638 					// the page isn't temporarily unmapped), otherwise we'd have
4639 					// to make sure it isn't wired.
4640 				mapPage = false;
4641 			} else
4642 				unmapPage = true;
4643 		}
4644 
4645 		context.map->Unlock();
4646 
4647 		if (unmapPage) {
4648 			// If the page is wired, we can't unmap it. Wait until it is unwired
4649 			// again and restart. Note that the page cannot be wired for
4650 			// writing, since it it isn't in the topmost cache. So we can safely
4651 			// ignore ranges wired for writing (our own and other concurrent
4652 			// wiring attempts in progress) and in fact have to do that to avoid
4653 			// a deadlock.
4654 			VMAreaUnwiredWaiter waiter;
4655 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4656 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4657 				// unlock everything and wait
4658 				if (context.pageAllocated) {
4659 					// ... but since we allocated a page and inserted it into
4660 					// the top cache, remove and free it first. Otherwise we'd
4661 					// have a page from a lower cache mapped while an upper
4662 					// cache has a page that would shadow it.
4663 					context.topCache->RemovePage(context.page);
4664 					vm_page_free_etc(context.topCache, context.page,
4665 						&context.reservation);
4666 				} else
4667 					DEBUG_PAGE_ACCESS_END(context.page);
4668 
4669 				context.UnlockAll();
4670 				waiter.waitEntry.Wait();
4671 				continue;
4672 			}
4673 
4674 			// Note: The mapped page is a page of a lower cache. We are
4675 			// guaranteed to have that cached locked, our new page is a copy of
4676 			// that page, and the page is not busy. The logic for that guarantee
4677 			// is as follows: Since the page is mapped, it must live in the top
4678 			// cache (ruled out above) or any of its lower caches, and there is
4679 			// (was before the new page was inserted) no other page in any
4680 			// cache between the top cache and the page's cache (otherwise that
4681 			// would be mapped instead). That in turn means that our algorithm
4682 			// must have found it and therefore it cannot be busy either.
4683 			DEBUG_PAGE_ACCESS_START(mappedPage);
4684 			unmap_page(area, address);
4685 			DEBUG_PAGE_ACCESS_END(mappedPage);
4686 		}
4687 
4688 		if (mapPage) {
4689 			if (map_page(area, context.page, address, newProtection,
4690 					&context.reservation) != B_OK) {
4691 				// Mapping can only fail, when the page mapping object couldn't
4692 				// be allocated. Save for the missing mapping everything is
4693 				// fine, though. If this was a regular page fault, we'll simply
4694 				// leave and probably fault again. To make sure we'll have more
4695 				// luck then, we ensure that the minimum object reserve is
4696 				// available.
4697 				DEBUG_PAGE_ACCESS_END(context.page);
4698 
4699 				context.UnlockAll();
4700 
4701 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4702 						!= B_OK) {
4703 					// Apparently the situation is serious. Let's get ourselves
4704 					// killed.
4705 					status = B_NO_MEMORY;
4706 				} else if (wirePage != NULL) {
4707 					// The caller expects us to wire the page. Since
4708 					// object_cache_reserve() succeeded, we should now be able
4709 					// to allocate a mapping structure. Restart.
4710 					continue;
4711 				}
4712 
4713 				break;
4714 			}
4715 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4716 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4717 
4718 		// also wire the page, if requested
4719 		if (wirePage != NULL && status == B_OK) {
4720 			increment_page_wired_count(context.page);
4721 			*wirePage = context.page;
4722 		}
4723 
4724 		DEBUG_PAGE_ACCESS_END(context.page);
4725 
4726 		break;
4727 	}
4728 
4729 	return status;
4730 }
4731 
4732 
4733 status_t
4734 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4735 {
4736 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4737 }
4738 
4739 status_t
4740 vm_put_physical_page(addr_t vaddr, void* handle)
4741 {
4742 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4743 }
4744 
4745 
4746 status_t
4747 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4748 	void** _handle)
4749 {
4750 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4751 }
4752 
4753 status_t
4754 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4755 {
4756 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4757 }
4758 
4759 
4760 status_t
4761 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4762 {
4763 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4764 }
4765 
4766 status_t
4767 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4768 {
4769 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4770 }
4771 
4772 
4773 void
4774 vm_get_info(system_info* info)
4775 {
4776 	swap_get_info(info);
4777 
4778 	MutexLocker locker(sAvailableMemoryLock);
4779 	info->needed_memory = sNeededMemory;
4780 	info->free_memory = sAvailableMemory;
4781 }
4782 
4783 
4784 uint32
4785 vm_num_page_faults(void)
4786 {
4787 	return sPageFaults;
4788 }
4789 
4790 
4791 off_t
4792 vm_available_memory(void)
4793 {
4794 	MutexLocker locker(sAvailableMemoryLock);
4795 	return sAvailableMemory;
4796 }
4797 
4798 
4799 off_t
4800 vm_available_not_needed_memory(void)
4801 {
4802 	MutexLocker locker(sAvailableMemoryLock);
4803 	return sAvailableMemory - sNeededMemory;
4804 }
4805 
4806 
4807 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4808 	debugger.
4809 */
4810 off_t
4811 vm_available_not_needed_memory_debug(void)
4812 {
4813 	return sAvailableMemory - sNeededMemory;
4814 }
4815 
4816 
4817 size_t
4818 vm_kernel_address_space_left(void)
4819 {
4820 	return VMAddressSpace::Kernel()->FreeSpace();
4821 }
4822 
4823 
4824 void
4825 vm_unreserve_memory(size_t amount)
4826 {
4827 	mutex_lock(&sAvailableMemoryLock);
4828 
4829 	sAvailableMemory += amount;
4830 
4831 	mutex_unlock(&sAvailableMemoryLock);
4832 }
4833 
4834 
4835 status_t
4836 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4837 {
4838 	size_t reserve = kMemoryReserveForPriority[priority];
4839 
4840 	MutexLocker locker(sAvailableMemoryLock);
4841 
4842 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4843 
4844 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4845 		sAvailableMemory -= amount;
4846 		return B_OK;
4847 	}
4848 
4849 	if (timeout <= 0)
4850 		return B_NO_MEMORY;
4851 
4852 	// turn timeout into an absolute timeout
4853 	timeout += system_time();
4854 
4855 	// loop until we've got the memory or the timeout occurs
4856 	do {
4857 		sNeededMemory += amount;
4858 
4859 		// call the low resource manager
4860 		locker.Unlock();
4861 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4862 			B_ABSOLUTE_TIMEOUT, timeout);
4863 		locker.Lock();
4864 
4865 		sNeededMemory -= amount;
4866 
4867 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4868 			sAvailableMemory -= amount;
4869 			return B_OK;
4870 		}
4871 	} while (timeout > system_time());
4872 
4873 	return B_NO_MEMORY;
4874 }
4875 
4876 
4877 status_t
4878 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4879 {
4880 	// NOTE: The caller is responsible for synchronizing calls to this function!
4881 
4882 	AddressSpaceReadLocker locker;
4883 	VMArea* area;
4884 	status_t status = locker.SetFromArea(id, area);
4885 	if (status != B_OK)
4886 		return status;
4887 
4888 	// nothing to do, if the type doesn't change
4889 	uint32 oldType = area->MemoryType();
4890 	if (type == oldType)
4891 		return B_OK;
4892 
4893 	// set the memory type of the area and the mapped pages
4894 	VMTranslationMap* map = area->address_space->TranslationMap();
4895 	map->Lock();
4896 	area->SetMemoryType(type);
4897 	map->ProtectArea(area, area->protection);
4898 	map->Unlock();
4899 
4900 	// set the physical memory type
4901 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4902 	if (error != B_OK) {
4903 		// reset the memory type of the area and the mapped pages
4904 		map->Lock();
4905 		area->SetMemoryType(oldType);
4906 		map->ProtectArea(area, area->protection);
4907 		map->Unlock();
4908 		return error;
4909 	}
4910 
4911 	return B_OK;
4912 
4913 }
4914 
4915 
4916 /*!	This function enforces some protection properties:
4917 	 - kernel areas must be W^X (after kernel startup)
4918 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4919 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4920 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4921 	   and B_KERNEL_WRITE_AREA.
4922 */
4923 static void
4924 fix_protection(uint32* protection)
4925 {
4926 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4927 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
4928 			|| (*protection & B_WRITE_AREA) != 0)
4929 		&& !gKernelStartup)
4930 		panic("kernel areas cannot be both writable and executable!");
4931 
4932 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4933 		if ((*protection & B_USER_PROTECTION) == 0
4934 			|| (*protection & B_WRITE_AREA) != 0)
4935 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4936 		else
4937 			*protection |= B_KERNEL_READ_AREA;
4938 	}
4939 }
4940 
4941 
4942 static void
4943 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4944 {
4945 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4946 	info->area = area->id;
4947 	info->address = (void*)area->Base();
4948 	info->size = area->Size();
4949 	info->protection = area->protection;
4950 	info->lock = B_FULL_LOCK;
4951 	info->team = area->address_space->ID();
4952 	info->copy_count = 0;
4953 	info->in_count = 0;
4954 	info->out_count = 0;
4955 		// TODO: retrieve real values here!
4956 
4957 	VMCache* cache = vm_area_get_locked_cache(area);
4958 
4959 	// Note, this is a simplification; the cache could be larger than this area
4960 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4961 
4962 	vm_area_put_locked_cache(cache);
4963 }
4964 
4965 
4966 static status_t
4967 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4968 {
4969 	// is newSize a multiple of B_PAGE_SIZE?
4970 	if (newSize & (B_PAGE_SIZE - 1))
4971 		return B_BAD_VALUE;
4972 
4973 	// lock all affected address spaces and the cache
4974 	VMArea* area;
4975 	VMCache* cache;
4976 
4977 	MultiAddressSpaceLocker locker;
4978 	AreaCacheLocker cacheLocker;
4979 
4980 	status_t status;
4981 	size_t oldSize;
4982 	bool anyKernelArea;
4983 	bool restart;
4984 
4985 	do {
4986 		anyKernelArea = false;
4987 		restart = false;
4988 
4989 		locker.Unset();
4990 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4991 		if (status != B_OK)
4992 			return status;
4993 		cacheLocker.SetTo(cache, true);	// already locked
4994 
4995 		// enforce restrictions
4996 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
4997 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
4998 				"resize kernel area %" B_PRId32 " (%s)\n",
4999 				team_get_current_team_id(), areaID, area->name);
5000 			return B_NOT_ALLOWED;
5001 		}
5002 		// TODO: Enforce all restrictions (team, etc.)!
5003 
5004 		oldSize = area->Size();
5005 		if (newSize == oldSize)
5006 			return B_OK;
5007 
5008 		if (cache->type != CACHE_TYPE_RAM)
5009 			return B_NOT_ALLOWED;
5010 
5011 		if (oldSize < newSize) {
5012 			// We need to check if all areas of this cache can be resized.
5013 			for (VMArea* current = cache->areas; current != NULL;
5014 					current = current->cache_next) {
5015 				if (!current->address_space->CanResizeArea(current, newSize))
5016 					return B_ERROR;
5017 				anyKernelArea
5018 					|= current->address_space == VMAddressSpace::Kernel();
5019 			}
5020 		} else {
5021 			// We're shrinking the areas, so we must make sure the affected
5022 			// ranges are not wired.
5023 			for (VMArea* current = cache->areas; current != NULL;
5024 					current = current->cache_next) {
5025 				anyKernelArea
5026 					|= current->address_space == VMAddressSpace::Kernel();
5027 
5028 				if (wait_if_area_range_is_wired(current,
5029 						current->Base() + newSize, oldSize - newSize, &locker,
5030 						&cacheLocker)) {
5031 					restart = true;
5032 					break;
5033 				}
5034 			}
5035 		}
5036 	} while (restart);
5037 
5038 	// Okay, looks good so far, so let's do it
5039 
5040 	int priority = kernel && anyKernelArea
5041 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5042 	uint32 allocationFlags = kernel && anyKernelArea
5043 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5044 
5045 	if (oldSize < newSize) {
5046 		// Growing the cache can fail, so we do it first.
5047 		status = cache->Resize(cache->virtual_base + newSize, priority);
5048 		if (status != B_OK)
5049 			return status;
5050 	}
5051 
5052 	for (VMArea* current = cache->areas; current != NULL;
5053 			current = current->cache_next) {
5054 		status = current->address_space->ResizeArea(current, newSize,
5055 			allocationFlags);
5056 		if (status != B_OK)
5057 			break;
5058 
5059 		// We also need to unmap all pages beyond the new size, if the area has
5060 		// shrunk
5061 		if (newSize < oldSize) {
5062 			VMCacheChainLocker cacheChainLocker(cache);
5063 			cacheChainLocker.LockAllSourceCaches();
5064 
5065 			unmap_pages(current, current->Base() + newSize,
5066 				oldSize - newSize);
5067 
5068 			cacheChainLocker.Unlock(cache);
5069 		}
5070 	}
5071 
5072 	if (status == B_OK) {
5073 		// Shrink or grow individual page protections if in use.
5074 		if (area->page_protections != NULL) {
5075 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5076 			uint8* newProtections
5077 				= (uint8*)realloc(area->page_protections, bytes);
5078 			if (newProtections == NULL)
5079 				status = B_NO_MEMORY;
5080 			else {
5081 				area->page_protections = newProtections;
5082 
5083 				if (oldSize < newSize) {
5084 					// init the additional page protections to that of the area
5085 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5086 					uint32 areaProtection = area->protection
5087 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5088 					memset(area->page_protections + offset,
5089 						areaProtection | (areaProtection << 4), bytes - offset);
5090 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5091 						uint8& entry = area->page_protections[offset - 1];
5092 						entry = (entry & 0x0f) | (areaProtection << 4);
5093 					}
5094 				}
5095 			}
5096 		}
5097 	}
5098 
5099 	// shrinking the cache can't fail, so we do it now
5100 	if (status == B_OK && newSize < oldSize)
5101 		status = cache->Resize(cache->virtual_base + newSize, priority);
5102 
5103 	if (status != B_OK) {
5104 		// Something failed -- resize the areas back to their original size.
5105 		// This can fail, too, in which case we're seriously screwed.
5106 		for (VMArea* current = cache->areas; current != NULL;
5107 				current = current->cache_next) {
5108 			if (current->address_space->ResizeArea(current, oldSize,
5109 					allocationFlags) != B_OK) {
5110 				panic("vm_resize_area(): Failed and not being able to restore "
5111 					"original state.");
5112 			}
5113 		}
5114 
5115 		cache->Resize(cache->virtual_base + oldSize, priority);
5116 	}
5117 
5118 	// TODO: we must honour the lock restrictions of this area
5119 	return status;
5120 }
5121 
5122 
5123 status_t
5124 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5125 {
5126 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5127 }
5128 
5129 
5130 status_t
5131 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5132 {
5133 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5134 }
5135 
5136 
5137 status_t
5138 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5139 	bool user)
5140 {
5141 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5142 }
5143 
5144 
5145 void
5146 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5147 {
5148 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5149 }
5150 
5151 
5152 /*!	Copies a range of memory directly from/to a page that might not be mapped
5153 	at the moment.
5154 
5155 	For \a unsafeMemory the current mapping (if any is ignored). The function
5156 	walks through the respective area's cache chain to find the physical page
5157 	and copies from/to it directly.
5158 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5159 	must not cross a page boundary.
5160 
5161 	\param teamID The team ID identifying the address space \a unsafeMemory is
5162 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5163 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5164 		is passed, the address space of the thread returned by
5165 		debug_get_debugged_thread() is used.
5166 	\param unsafeMemory The start of the unsafe memory range to be copied
5167 		from/to.
5168 	\param buffer A safely accessible kernel buffer to be copied from/to.
5169 	\param size The number of bytes to be copied.
5170 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5171 		\a unsafeMemory, the other way around otherwise.
5172 */
5173 status_t
5174 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5175 	size_t size, bool copyToUnsafe)
5176 {
5177 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5178 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5179 		return B_BAD_VALUE;
5180 	}
5181 
5182 	// get the address space for the debugged thread
5183 	VMAddressSpace* addressSpace;
5184 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5185 		addressSpace = VMAddressSpace::Kernel();
5186 	} else if (teamID == B_CURRENT_TEAM) {
5187 		Thread* thread = debug_get_debugged_thread();
5188 		if (thread == NULL || thread->team == NULL)
5189 			return B_BAD_ADDRESS;
5190 
5191 		addressSpace = thread->team->address_space;
5192 	} else
5193 		addressSpace = VMAddressSpace::DebugGet(teamID);
5194 
5195 	if (addressSpace == NULL)
5196 		return B_BAD_ADDRESS;
5197 
5198 	// get the area
5199 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5200 	if (area == NULL)
5201 		return B_BAD_ADDRESS;
5202 
5203 	// search the page
5204 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5205 		+ area->cache_offset;
5206 	VMCache* cache = area->cache;
5207 	vm_page* page = NULL;
5208 	while (cache != NULL) {
5209 		page = cache->DebugLookupPage(cacheOffset);
5210 		if (page != NULL)
5211 			break;
5212 
5213 		// Page not found in this cache -- if it is paged out, we must not try
5214 		// to get it from lower caches.
5215 		if (cache->DebugHasPage(cacheOffset))
5216 			break;
5217 
5218 		cache = cache->source;
5219 	}
5220 
5221 	if (page == NULL)
5222 		return B_UNSUPPORTED;
5223 
5224 	// copy from/to physical memory
5225 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5226 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5227 
5228 	if (copyToUnsafe) {
5229 		if (page->Cache() != area->cache)
5230 			return B_UNSUPPORTED;
5231 
5232 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5233 	}
5234 
5235 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5236 }
5237 
5238 
5239 //	#pragma mark - kernel public API
5240 
5241 
5242 status_t
5243 user_memcpy(void* to, const void* from, size_t size)
5244 {
5245 	// don't allow address overflows
5246 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5247 		return B_BAD_ADDRESS;
5248 
5249 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5250 		return B_BAD_ADDRESS;
5251 
5252 	return B_OK;
5253 }
5254 
5255 
5256 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5257 	the string in \a to, NULL-terminating the result.
5258 
5259 	\param to Pointer to the destination C-string.
5260 	\param from Pointer to the source C-string.
5261 	\param size Size in bytes of the string buffer pointed to by \a to.
5262 
5263 	\return strlen(\a from).
5264 */
5265 ssize_t
5266 user_strlcpy(char* to, const char* from, size_t size)
5267 {
5268 	if (to == NULL && size != 0)
5269 		return B_BAD_VALUE;
5270 	if (from == NULL)
5271 		return B_BAD_ADDRESS;
5272 
5273 	// limit size to avoid address overflows
5274 	size_t maxSize = std::min((addr_t)size,
5275 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5276 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5277 		// the source address might still overflow.
5278 
5279 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5280 
5281 	// If we hit the address overflow boundary, fail.
5282 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5283 			&& maxSize < size)) {
5284 		return B_BAD_ADDRESS;
5285 	}
5286 
5287 	return result;
5288 }
5289 
5290 
5291 status_t
5292 user_memset(void* s, char c, size_t count)
5293 {
5294 	// don't allow address overflows
5295 	if ((addr_t)s + count < (addr_t)s)
5296 		return B_BAD_ADDRESS;
5297 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5298 		return B_BAD_ADDRESS;
5299 
5300 	return B_OK;
5301 }
5302 
5303 
5304 /*!	Wires a single page at the given address.
5305 
5306 	\param team The team whose address space the address belongs to. Supports
5307 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5308 		parameter is ignored.
5309 	\param address address The virtual address to wire down. Does not need to
5310 		be page aligned.
5311 	\param writable If \c true the page shall be writable.
5312 	\param info On success the info is filled in, among other things
5313 		containing the physical address the given virtual one translates to.
5314 	\return \c B_OK, when the page could be wired, another error code otherwise.
5315 */
5316 status_t
5317 vm_wire_page(team_id team, addr_t address, bool writable,
5318 	VMPageWiringInfo* info)
5319 {
5320 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5321 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5322 
5323 	// compute the page protection that is required
5324 	bool isUser = IS_USER_ADDRESS(address);
5325 	uint32 requiredProtection = PAGE_PRESENT
5326 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5327 	if (writable)
5328 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5329 
5330 	// get and read lock the address space
5331 	VMAddressSpace* addressSpace = NULL;
5332 	if (isUser) {
5333 		if (team == B_CURRENT_TEAM)
5334 			addressSpace = VMAddressSpace::GetCurrent();
5335 		else
5336 			addressSpace = VMAddressSpace::Get(team);
5337 	} else
5338 		addressSpace = VMAddressSpace::GetKernel();
5339 	if (addressSpace == NULL)
5340 		return B_ERROR;
5341 
5342 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5343 
5344 	VMTranslationMap* map = addressSpace->TranslationMap();
5345 	status_t error = B_OK;
5346 
5347 	// get the area
5348 	VMArea* area = addressSpace->LookupArea(pageAddress);
5349 	if (area == NULL) {
5350 		addressSpace->Put();
5351 		return B_BAD_ADDRESS;
5352 	}
5353 
5354 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5355 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5356 
5357 	// mark the area range wired
5358 	area->Wire(&info->range);
5359 
5360 	// Lock the area's cache chain and the translation map. Needed to look
5361 	// up the page and play with its wired count.
5362 	cacheChainLocker.LockAllSourceCaches();
5363 	map->Lock();
5364 
5365 	phys_addr_t physicalAddress;
5366 	uint32 flags;
5367 	vm_page* page;
5368 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5369 		&& (flags & requiredProtection) == requiredProtection
5370 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5371 			!= NULL) {
5372 		// Already mapped with the correct permissions -- just increment
5373 		// the page's wired count.
5374 		increment_page_wired_count(page);
5375 
5376 		map->Unlock();
5377 		cacheChainLocker.Unlock();
5378 		addressSpaceLocker.Unlock();
5379 	} else {
5380 		// Let vm_soft_fault() map the page for us, if possible. We need
5381 		// to fully unlock to avoid deadlocks. Since we have already
5382 		// wired the area itself, nothing disturbing will happen with it
5383 		// in the meantime.
5384 		map->Unlock();
5385 		cacheChainLocker.Unlock();
5386 		addressSpaceLocker.Unlock();
5387 
5388 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5389 			isUser, &page);
5390 
5391 		if (error != B_OK) {
5392 			// The page could not be mapped -- clean up.
5393 			VMCache* cache = vm_area_get_locked_cache(area);
5394 			area->Unwire(&info->range);
5395 			cache->ReleaseRefAndUnlock();
5396 			addressSpace->Put();
5397 			return error;
5398 		}
5399 	}
5400 
5401 	info->physicalAddress
5402 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5403 			+ address % B_PAGE_SIZE;
5404 	info->page = page;
5405 
5406 	return B_OK;
5407 }
5408 
5409 
5410 /*!	Unwires a single page previously wired via vm_wire_page().
5411 
5412 	\param info The same object passed to vm_wire_page() before.
5413 */
5414 void
5415 vm_unwire_page(VMPageWiringInfo* info)
5416 {
5417 	// lock the address space
5418 	VMArea* area = info->range.area;
5419 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5420 		// takes over our reference
5421 
5422 	// lock the top cache
5423 	VMCache* cache = vm_area_get_locked_cache(area);
5424 	VMCacheChainLocker cacheChainLocker(cache);
5425 
5426 	if (info->page->Cache() != cache) {
5427 		// The page is not in the top cache, so we lock the whole cache chain
5428 		// before touching the page's wired count.
5429 		cacheChainLocker.LockAllSourceCaches();
5430 	}
5431 
5432 	decrement_page_wired_count(info->page);
5433 
5434 	// remove the wired range from the range
5435 	area->Unwire(&info->range);
5436 
5437 	cacheChainLocker.Unlock();
5438 }
5439 
5440 
5441 /*!	Wires down the given address range in the specified team's address space.
5442 
5443 	If successful the function
5444 	- acquires a reference to the specified team's address space,
5445 	- adds respective wired ranges to all areas that intersect with the given
5446 	  address range,
5447 	- makes sure all pages in the given address range are mapped with the
5448 	  requested access permissions and increments their wired count.
5449 
5450 	It fails, when \a team doesn't specify a valid address space, when any part
5451 	of the specified address range is not covered by areas, when the concerned
5452 	areas don't allow mapping with the requested permissions, or when mapping
5453 	failed for another reason.
5454 
5455 	When successful the call must be balanced by a unlock_memory_etc() call with
5456 	the exact same parameters.
5457 
5458 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5459 		supported.
5460 	\param address The start of the address range to be wired.
5461 	\param numBytes The size of the address range to be wired.
5462 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5463 		requests that the range must be wired writable ("read from device
5464 		into memory").
5465 	\return \c B_OK on success, another error code otherwise.
5466 */
5467 status_t
5468 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5469 {
5470 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5471 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5472 
5473 	// compute the page protection that is required
5474 	bool isUser = IS_USER_ADDRESS(address);
5475 	bool writable = (flags & B_READ_DEVICE) == 0;
5476 	uint32 requiredProtection = PAGE_PRESENT
5477 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5478 	if (writable)
5479 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5480 
5481 	uint32 mallocFlags = isUser
5482 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5483 
5484 	// get and read lock the address space
5485 	VMAddressSpace* addressSpace = NULL;
5486 	if (isUser) {
5487 		if (team == B_CURRENT_TEAM)
5488 			addressSpace = VMAddressSpace::GetCurrent();
5489 		else
5490 			addressSpace = VMAddressSpace::Get(team);
5491 	} else
5492 		addressSpace = VMAddressSpace::GetKernel();
5493 	if (addressSpace == NULL)
5494 		return B_ERROR;
5495 
5496 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5497 		// We get a new address space reference here. The one we got above will
5498 		// be freed by unlock_memory_etc().
5499 
5500 	VMTranslationMap* map = addressSpace->TranslationMap();
5501 	status_t error = B_OK;
5502 
5503 	// iterate through all concerned areas
5504 	addr_t nextAddress = lockBaseAddress;
5505 	while (nextAddress != lockEndAddress) {
5506 		// get the next area
5507 		VMArea* area = addressSpace->LookupArea(nextAddress);
5508 		if (area == NULL) {
5509 			error = B_BAD_ADDRESS;
5510 			break;
5511 		}
5512 
5513 		addr_t areaStart = nextAddress;
5514 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5515 
5516 		// allocate the wired range (do that before locking the cache to avoid
5517 		// deadlocks)
5518 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5519 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5520 		if (range == NULL) {
5521 			error = B_NO_MEMORY;
5522 			break;
5523 		}
5524 
5525 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5526 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5527 
5528 		// mark the area range wired
5529 		area->Wire(range);
5530 
5531 		// Depending on the area cache type and the wiring, we may not need to
5532 		// look at the individual pages.
5533 		if (area->cache_type == CACHE_TYPE_NULL
5534 			|| area->cache_type == CACHE_TYPE_DEVICE
5535 			|| area->wiring == B_FULL_LOCK
5536 			|| area->wiring == B_CONTIGUOUS) {
5537 			nextAddress = areaEnd;
5538 			continue;
5539 		}
5540 
5541 		// Lock the area's cache chain and the translation map. Needed to look
5542 		// up pages and play with their wired count.
5543 		cacheChainLocker.LockAllSourceCaches();
5544 		map->Lock();
5545 
5546 		// iterate through the pages and wire them
5547 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5548 			phys_addr_t physicalAddress;
5549 			uint32 flags;
5550 
5551 			vm_page* page;
5552 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5553 				&& (flags & requiredProtection) == requiredProtection
5554 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5555 					!= NULL) {
5556 				// Already mapped with the correct permissions -- just increment
5557 				// the page's wired count.
5558 				increment_page_wired_count(page);
5559 			} else {
5560 				// Let vm_soft_fault() map the page for us, if possible. We need
5561 				// to fully unlock to avoid deadlocks. Since we have already
5562 				// wired the area itself, nothing disturbing will happen with it
5563 				// in the meantime.
5564 				map->Unlock();
5565 				cacheChainLocker.Unlock();
5566 				addressSpaceLocker.Unlock();
5567 
5568 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5569 					false, isUser, &page);
5570 
5571 				addressSpaceLocker.Lock();
5572 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5573 				cacheChainLocker.LockAllSourceCaches();
5574 				map->Lock();
5575 			}
5576 
5577 			if (error != B_OK)
5578 				break;
5579 		}
5580 
5581 		map->Unlock();
5582 
5583 		if (error == B_OK) {
5584 			cacheChainLocker.Unlock();
5585 		} else {
5586 			// An error occurred, so abort right here. If the current address
5587 			// is the first in this area, unwire the area, since we won't get
5588 			// to it when reverting what we've done so far.
5589 			if (nextAddress == areaStart) {
5590 				area->Unwire(range);
5591 				cacheChainLocker.Unlock();
5592 				range->~VMAreaWiredRange();
5593 				free_etc(range, mallocFlags);
5594 			} else
5595 				cacheChainLocker.Unlock();
5596 
5597 			break;
5598 		}
5599 	}
5600 
5601 	if (error != B_OK) {
5602 		// An error occurred, so unwire all that we've already wired. Note that
5603 		// even if not a single page was wired, unlock_memory_etc() is called
5604 		// to put the address space reference.
5605 		addressSpaceLocker.Unlock();
5606 		unlock_memory_etc(team, (void*)lockBaseAddress,
5607 			nextAddress - lockBaseAddress, flags);
5608 	}
5609 
5610 	return error;
5611 }
5612 
5613 
5614 status_t
5615 lock_memory(void* address, size_t numBytes, uint32 flags)
5616 {
5617 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5618 }
5619 
5620 
5621 /*!	Unwires an address range previously wired with lock_memory_etc().
5622 
5623 	Note that a call to this function must balance a previous lock_memory_etc()
5624 	call with exactly the same parameters.
5625 */
5626 status_t
5627 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5628 {
5629 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5630 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5631 
5632 	// compute the page protection that is required
5633 	bool isUser = IS_USER_ADDRESS(address);
5634 	bool writable = (flags & B_READ_DEVICE) == 0;
5635 	uint32 requiredProtection = PAGE_PRESENT
5636 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5637 	if (writable)
5638 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5639 
5640 	uint32 mallocFlags = isUser
5641 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5642 
5643 	// get and read lock the address space
5644 	VMAddressSpace* addressSpace = NULL;
5645 	if (isUser) {
5646 		if (team == B_CURRENT_TEAM)
5647 			addressSpace = VMAddressSpace::GetCurrent();
5648 		else
5649 			addressSpace = VMAddressSpace::Get(team);
5650 	} else
5651 		addressSpace = VMAddressSpace::GetKernel();
5652 	if (addressSpace == NULL)
5653 		return B_ERROR;
5654 
5655 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5656 		// Take over the address space reference. We don't unlock until we're
5657 		// done.
5658 
5659 	VMTranslationMap* map = addressSpace->TranslationMap();
5660 	status_t error = B_OK;
5661 
5662 	// iterate through all concerned areas
5663 	addr_t nextAddress = lockBaseAddress;
5664 	while (nextAddress != lockEndAddress) {
5665 		// get the next area
5666 		VMArea* area = addressSpace->LookupArea(nextAddress);
5667 		if (area == NULL) {
5668 			error = B_BAD_ADDRESS;
5669 			break;
5670 		}
5671 
5672 		addr_t areaStart = nextAddress;
5673 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5674 
5675 		// Lock the area's top cache. This is a requirement for
5676 		// VMArea::Unwire().
5677 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5678 
5679 		// Depending on the area cache type and the wiring, we may not need to
5680 		// look at the individual pages.
5681 		if (area->cache_type == CACHE_TYPE_NULL
5682 			|| area->cache_type == CACHE_TYPE_DEVICE
5683 			|| area->wiring == B_FULL_LOCK
5684 			|| area->wiring == B_CONTIGUOUS) {
5685 			// unwire the range (to avoid deadlocks we delete the range after
5686 			// unlocking the cache)
5687 			nextAddress = areaEnd;
5688 			VMAreaWiredRange* range = area->Unwire(areaStart,
5689 				areaEnd - areaStart, writable);
5690 			cacheChainLocker.Unlock();
5691 			if (range != NULL) {
5692 				range->~VMAreaWiredRange();
5693 				free_etc(range, mallocFlags);
5694 			}
5695 			continue;
5696 		}
5697 
5698 		// Lock the area's cache chain and the translation map. Needed to look
5699 		// up pages and play with their wired count.
5700 		cacheChainLocker.LockAllSourceCaches();
5701 		map->Lock();
5702 
5703 		// iterate through the pages and unwire them
5704 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5705 			phys_addr_t physicalAddress;
5706 			uint32 flags;
5707 
5708 			vm_page* page;
5709 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5710 				&& (flags & PAGE_PRESENT) != 0
5711 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5712 					!= NULL) {
5713 				// Already mapped with the correct permissions -- just increment
5714 				// the page's wired count.
5715 				decrement_page_wired_count(page);
5716 			} else {
5717 				panic("unlock_memory_etc(): Failed to unwire page: address "
5718 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5719 					nextAddress);
5720 				error = B_BAD_VALUE;
5721 				break;
5722 			}
5723 		}
5724 
5725 		map->Unlock();
5726 
5727 		// All pages are unwired. Remove the area's wired range as well (to
5728 		// avoid deadlocks we delete the range after unlocking the cache).
5729 		VMAreaWiredRange* range = area->Unwire(areaStart,
5730 			areaEnd - areaStart, writable);
5731 
5732 		cacheChainLocker.Unlock();
5733 
5734 		if (range != NULL) {
5735 			range->~VMAreaWiredRange();
5736 			free_etc(range, mallocFlags);
5737 		}
5738 
5739 		if (error != B_OK)
5740 			break;
5741 	}
5742 
5743 	// get rid of the address space reference lock_memory_etc() acquired
5744 	addressSpace->Put();
5745 
5746 	return error;
5747 }
5748 
5749 
5750 status_t
5751 unlock_memory(void* address, size_t numBytes, uint32 flags)
5752 {
5753 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5754 }
5755 
5756 
5757 /*!	Similar to get_memory_map(), but also allows to specify the address space
5758 	for the memory in question and has a saner semantics.
5759 	Returns \c B_OK when the complete range could be translated or
5760 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5761 	case the actual number of entries is written to \c *_numEntries. Any other
5762 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5763 	in this case.
5764 */
5765 status_t
5766 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5767 	physical_entry* table, uint32* _numEntries)
5768 {
5769 	uint32 numEntries = *_numEntries;
5770 	*_numEntries = 0;
5771 
5772 	VMAddressSpace* addressSpace;
5773 	addr_t virtualAddress = (addr_t)address;
5774 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5775 	phys_addr_t physicalAddress;
5776 	status_t status = B_OK;
5777 	int32 index = -1;
5778 	addr_t offset = 0;
5779 	bool interrupts = are_interrupts_enabled();
5780 
5781 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5782 		"entries)\n", team, address, numBytes, numEntries));
5783 
5784 	if (numEntries == 0 || numBytes == 0)
5785 		return B_BAD_VALUE;
5786 
5787 	// in which address space is the address to be found?
5788 	if (IS_USER_ADDRESS(virtualAddress)) {
5789 		if (team == B_CURRENT_TEAM)
5790 			addressSpace = VMAddressSpace::GetCurrent();
5791 		else
5792 			addressSpace = VMAddressSpace::Get(team);
5793 	} else
5794 		addressSpace = VMAddressSpace::GetKernel();
5795 
5796 	if (addressSpace == NULL)
5797 		return B_ERROR;
5798 
5799 	VMTranslationMap* map = addressSpace->TranslationMap();
5800 
5801 	if (interrupts)
5802 		map->Lock();
5803 
5804 	while (offset < numBytes) {
5805 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5806 		uint32 flags;
5807 
5808 		if (interrupts) {
5809 			status = map->Query((addr_t)address + offset, &physicalAddress,
5810 				&flags);
5811 		} else {
5812 			status = map->QueryInterrupt((addr_t)address + offset,
5813 				&physicalAddress, &flags);
5814 		}
5815 		if (status < B_OK)
5816 			break;
5817 		if ((flags & PAGE_PRESENT) == 0) {
5818 			panic("get_memory_map() called on unmapped memory!");
5819 			return B_BAD_ADDRESS;
5820 		}
5821 
5822 		if (index < 0 && pageOffset > 0) {
5823 			physicalAddress += pageOffset;
5824 			if (bytes > B_PAGE_SIZE - pageOffset)
5825 				bytes = B_PAGE_SIZE - pageOffset;
5826 		}
5827 
5828 		// need to switch to the next physical_entry?
5829 		if (index < 0 || table[index].address
5830 				!= physicalAddress - table[index].size) {
5831 			if ((uint32)++index + 1 > numEntries) {
5832 				// table to small
5833 				break;
5834 			}
5835 			table[index].address = physicalAddress;
5836 			table[index].size = bytes;
5837 		} else {
5838 			// page does fit in current entry
5839 			table[index].size += bytes;
5840 		}
5841 
5842 		offset += bytes;
5843 	}
5844 
5845 	if (interrupts)
5846 		map->Unlock();
5847 
5848 	if (status != B_OK)
5849 		return status;
5850 
5851 	if ((uint32)index + 1 > numEntries) {
5852 		*_numEntries = index;
5853 		return B_BUFFER_OVERFLOW;
5854 	}
5855 
5856 	*_numEntries = index + 1;
5857 	return B_OK;
5858 }
5859 
5860 
5861 /*!	According to the BeBook, this function should always succeed.
5862 	This is no longer the case.
5863 */
5864 extern "C" int32
5865 __get_memory_map_haiku(const void* address, size_t numBytes,
5866 	physical_entry* table, int32 numEntries)
5867 {
5868 	uint32 entriesRead = numEntries;
5869 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5870 		table, &entriesRead);
5871 	if (error != B_OK)
5872 		return error;
5873 
5874 	// close the entry list
5875 
5876 	// if it's only one entry, we will silently accept the missing ending
5877 	if (numEntries == 1)
5878 		return B_OK;
5879 
5880 	if (entriesRead + 1 > (uint32)numEntries)
5881 		return B_BUFFER_OVERFLOW;
5882 
5883 	table[entriesRead].address = 0;
5884 	table[entriesRead].size = 0;
5885 
5886 	return B_OK;
5887 }
5888 
5889 
5890 area_id
5891 area_for(void* address)
5892 {
5893 	return vm_area_for((addr_t)address, true);
5894 }
5895 
5896 
5897 area_id
5898 find_area(const char* name)
5899 {
5900 	return VMAreaHash::Find(name);
5901 }
5902 
5903 
5904 status_t
5905 _get_area_info(area_id id, area_info* info, size_t size)
5906 {
5907 	if (size != sizeof(area_info) || info == NULL)
5908 		return B_BAD_VALUE;
5909 
5910 	AddressSpaceReadLocker locker;
5911 	VMArea* area;
5912 	status_t status = locker.SetFromArea(id, area);
5913 	if (status != B_OK)
5914 		return status;
5915 
5916 	fill_area_info(area, info, size);
5917 	return B_OK;
5918 }
5919 
5920 
5921 status_t
5922 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5923 {
5924 	addr_t nextBase = *(addr_t*)cookie;
5925 
5926 	// we're already through the list
5927 	if (nextBase == (addr_t)-1)
5928 		return B_ENTRY_NOT_FOUND;
5929 
5930 	if (team == B_CURRENT_TEAM)
5931 		team = team_get_current_team_id();
5932 
5933 	AddressSpaceReadLocker locker(team);
5934 	if (!locker.IsLocked())
5935 		return B_BAD_TEAM_ID;
5936 
5937 	VMArea* area;
5938 	for (VMAddressSpace::AreaIterator it
5939 				= locker.AddressSpace()->GetAreaIterator();
5940 			(area = it.Next()) != NULL;) {
5941 		if (area->Base() > nextBase)
5942 			break;
5943 	}
5944 
5945 	if (area == NULL) {
5946 		nextBase = (addr_t)-1;
5947 		return B_ENTRY_NOT_FOUND;
5948 	}
5949 
5950 	fill_area_info(area, info, size);
5951 	*cookie = (ssize_t)(area->Base());
5952 
5953 	return B_OK;
5954 }
5955 
5956 
5957 status_t
5958 set_area_protection(area_id area, uint32 newProtection)
5959 {
5960 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5961 		newProtection, true);
5962 }
5963 
5964 
5965 status_t
5966 resize_area(area_id areaID, size_t newSize)
5967 {
5968 	return vm_resize_area(areaID, newSize, true);
5969 }
5970 
5971 
5972 /*!	Transfers the specified area to a new team. The caller must be the owner
5973 	of the area.
5974 */
5975 area_id
5976 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5977 	bool kernel)
5978 {
5979 	area_info info;
5980 	status_t status = get_area_info(id, &info);
5981 	if (status != B_OK)
5982 		return status;
5983 
5984 	if (info.team != thread_get_current_thread()->team->id)
5985 		return B_PERMISSION_DENIED;
5986 
5987 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5988 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5989 	if (clonedArea < 0)
5990 		return clonedArea;
5991 
5992 	status = vm_delete_area(info.team, id, kernel);
5993 	if (status != B_OK) {
5994 		vm_delete_area(target, clonedArea, kernel);
5995 		return status;
5996 	}
5997 
5998 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5999 
6000 	return clonedArea;
6001 }
6002 
6003 
6004 extern "C" area_id
6005 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6006 	size_t numBytes, uint32 addressSpec, uint32 protection,
6007 	void** _virtualAddress)
6008 {
6009 	if (!arch_vm_supports_protection(protection))
6010 		return B_NOT_SUPPORTED;
6011 
6012 	fix_protection(&protection);
6013 
6014 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6015 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6016 		false);
6017 }
6018 
6019 
6020 area_id
6021 clone_area(const char* name, void** _address, uint32 addressSpec,
6022 	uint32 protection, area_id source)
6023 {
6024 	if ((protection & B_KERNEL_PROTECTION) == 0)
6025 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6026 
6027 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6028 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6029 }
6030 
6031 
6032 area_id
6033 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6034 	uint32 protection, uint32 flags, uint32 guardSize,
6035 	const virtual_address_restrictions* virtualAddressRestrictions,
6036 	const physical_address_restrictions* physicalAddressRestrictions,
6037 	void** _address)
6038 {
6039 	fix_protection(&protection);
6040 
6041 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6042 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6043 		true, _address);
6044 }
6045 
6046 
6047 extern "C" area_id
6048 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6049 	size_t size, uint32 lock, uint32 protection)
6050 {
6051 	fix_protection(&protection);
6052 
6053 	virtual_address_restrictions virtualRestrictions = {};
6054 	virtualRestrictions.address = *_address;
6055 	virtualRestrictions.address_specification = addressSpec;
6056 	physical_address_restrictions physicalRestrictions = {};
6057 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6058 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6059 		true, _address);
6060 }
6061 
6062 
6063 status_t
6064 delete_area(area_id area)
6065 {
6066 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6067 }
6068 
6069 
6070 //	#pragma mark - Userland syscalls
6071 
6072 
6073 status_t
6074 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6075 	addr_t size)
6076 {
6077 	// filter out some unavailable values (for userland)
6078 	switch (addressSpec) {
6079 		case B_ANY_KERNEL_ADDRESS:
6080 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6081 			return B_BAD_VALUE;
6082 	}
6083 
6084 	addr_t address;
6085 
6086 	if (!IS_USER_ADDRESS(userAddress)
6087 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6088 		return B_BAD_ADDRESS;
6089 
6090 	status_t status = vm_reserve_address_range(
6091 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6092 		RESERVED_AVOID_BASE);
6093 	if (status != B_OK)
6094 		return status;
6095 
6096 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6097 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6098 			(void*)address, size);
6099 		return B_BAD_ADDRESS;
6100 	}
6101 
6102 	return B_OK;
6103 }
6104 
6105 
6106 status_t
6107 _user_unreserve_address_range(addr_t address, addr_t size)
6108 {
6109 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6110 		(void*)address, size);
6111 }
6112 
6113 
6114 area_id
6115 _user_area_for(void* address)
6116 {
6117 	return vm_area_for((addr_t)address, false);
6118 }
6119 
6120 
6121 area_id
6122 _user_find_area(const char* userName)
6123 {
6124 	char name[B_OS_NAME_LENGTH];
6125 
6126 	if (!IS_USER_ADDRESS(userName)
6127 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6128 		return B_BAD_ADDRESS;
6129 
6130 	return find_area(name);
6131 }
6132 
6133 
6134 status_t
6135 _user_get_area_info(area_id area, area_info* userInfo)
6136 {
6137 	if (!IS_USER_ADDRESS(userInfo))
6138 		return B_BAD_ADDRESS;
6139 
6140 	area_info info;
6141 	status_t status = get_area_info(area, &info);
6142 	if (status < B_OK)
6143 		return status;
6144 
6145 	// TODO: do we want to prevent userland from seeing kernel protections?
6146 	//info.protection &= B_USER_PROTECTION;
6147 
6148 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6149 		return B_BAD_ADDRESS;
6150 
6151 	return status;
6152 }
6153 
6154 
6155 status_t
6156 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6157 {
6158 	ssize_t cookie;
6159 
6160 	if (!IS_USER_ADDRESS(userCookie)
6161 		|| !IS_USER_ADDRESS(userInfo)
6162 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6163 		return B_BAD_ADDRESS;
6164 
6165 	area_info info;
6166 	status_t status = _get_next_area_info(team, &cookie, &info,
6167 		sizeof(area_info));
6168 	if (status != B_OK)
6169 		return status;
6170 
6171 	//info.protection &= B_USER_PROTECTION;
6172 
6173 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6174 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6175 		return B_BAD_ADDRESS;
6176 
6177 	return status;
6178 }
6179 
6180 
6181 status_t
6182 _user_set_area_protection(area_id area, uint32 newProtection)
6183 {
6184 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6185 		return B_BAD_VALUE;
6186 
6187 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6188 		newProtection, false);
6189 }
6190 
6191 
6192 status_t
6193 _user_resize_area(area_id area, size_t newSize)
6194 {
6195 	// TODO: Since we restrict deleting of areas to those owned by the team,
6196 	// we should also do that for resizing (check other functions, too).
6197 	return vm_resize_area(area, newSize, false);
6198 }
6199 
6200 
6201 area_id
6202 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6203 	team_id target)
6204 {
6205 	// filter out some unavailable values (for userland)
6206 	switch (addressSpec) {
6207 		case B_ANY_KERNEL_ADDRESS:
6208 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6209 			return B_BAD_VALUE;
6210 	}
6211 
6212 	void* address;
6213 	if (!IS_USER_ADDRESS(userAddress)
6214 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6215 		return B_BAD_ADDRESS;
6216 
6217 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6218 	if (newArea < B_OK)
6219 		return newArea;
6220 
6221 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6222 		return B_BAD_ADDRESS;
6223 
6224 	return newArea;
6225 }
6226 
6227 
6228 area_id
6229 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6230 	uint32 protection, area_id sourceArea)
6231 {
6232 	char name[B_OS_NAME_LENGTH];
6233 	void* address;
6234 
6235 	// filter out some unavailable values (for userland)
6236 	switch (addressSpec) {
6237 		case B_ANY_KERNEL_ADDRESS:
6238 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6239 			return B_BAD_VALUE;
6240 	}
6241 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6242 		return B_BAD_VALUE;
6243 
6244 	if (!IS_USER_ADDRESS(userName)
6245 		|| !IS_USER_ADDRESS(userAddress)
6246 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6247 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6248 		return B_BAD_ADDRESS;
6249 
6250 	fix_protection(&protection);
6251 
6252 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6253 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6254 		false);
6255 	if (clonedArea < B_OK)
6256 		return clonedArea;
6257 
6258 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6259 		delete_area(clonedArea);
6260 		return B_BAD_ADDRESS;
6261 	}
6262 
6263 	return clonedArea;
6264 }
6265 
6266 
6267 area_id
6268 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6269 	size_t size, uint32 lock, uint32 protection)
6270 {
6271 	char name[B_OS_NAME_LENGTH];
6272 	void* address;
6273 
6274 	// filter out some unavailable values (for userland)
6275 	switch (addressSpec) {
6276 		case B_ANY_KERNEL_ADDRESS:
6277 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6278 			return B_BAD_VALUE;
6279 	}
6280 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6281 		return B_BAD_VALUE;
6282 
6283 	if (!IS_USER_ADDRESS(userName)
6284 		|| !IS_USER_ADDRESS(userAddress)
6285 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6286 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6287 		return B_BAD_ADDRESS;
6288 
6289 	if (addressSpec == B_EXACT_ADDRESS
6290 		&& IS_KERNEL_ADDRESS(address))
6291 		return B_BAD_VALUE;
6292 
6293 	if (addressSpec == B_ANY_ADDRESS)
6294 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6295 	if (addressSpec == B_BASE_ADDRESS)
6296 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6297 
6298 	fix_protection(&protection);
6299 
6300 	virtual_address_restrictions virtualRestrictions = {};
6301 	virtualRestrictions.address = address;
6302 	virtualRestrictions.address_specification = addressSpec;
6303 	physical_address_restrictions physicalRestrictions = {};
6304 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6305 		size, lock, protection, 0, 0, &virtualRestrictions,
6306 		&physicalRestrictions, false, &address);
6307 
6308 	if (area >= B_OK
6309 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6310 		delete_area(area);
6311 		return B_BAD_ADDRESS;
6312 	}
6313 
6314 	return area;
6315 }
6316 
6317 
6318 status_t
6319 _user_delete_area(area_id area)
6320 {
6321 	// Unlike the BeOS implementation, you can now only delete areas
6322 	// that you have created yourself from userland.
6323 	// The documentation to delete_area() explicitly states that this
6324 	// will be restricted in the future, and so it will.
6325 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6326 }
6327 
6328 
6329 // TODO: create a BeOS style call for this!
6330 
6331 area_id
6332 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6333 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6334 	int fd, off_t offset)
6335 {
6336 	char name[B_OS_NAME_LENGTH];
6337 	void* address;
6338 	area_id area;
6339 
6340 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6341 		return B_BAD_VALUE;
6342 
6343 	fix_protection(&protection);
6344 
6345 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6346 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6347 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6348 		return B_BAD_ADDRESS;
6349 
6350 	if (addressSpec == B_EXACT_ADDRESS) {
6351 		if ((addr_t)address + size < (addr_t)address
6352 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6353 			return B_BAD_VALUE;
6354 		}
6355 		if (!IS_USER_ADDRESS(address)
6356 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6357 			return B_BAD_ADDRESS;
6358 		}
6359 	}
6360 
6361 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6362 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6363 		false);
6364 	if (area < B_OK)
6365 		return area;
6366 
6367 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6368 		return B_BAD_ADDRESS;
6369 
6370 	return area;
6371 }
6372 
6373 
6374 status_t
6375 _user_unmap_memory(void* _address, size_t size)
6376 {
6377 	addr_t address = (addr_t)_address;
6378 
6379 	// check params
6380 	if (size == 0 || (addr_t)address + size < (addr_t)address
6381 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6382 		return B_BAD_VALUE;
6383 	}
6384 
6385 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6386 		return B_BAD_ADDRESS;
6387 
6388 	// Write lock the address space and ensure the address range is not wired.
6389 	AddressSpaceWriteLocker locker;
6390 	do {
6391 		status_t status = locker.SetTo(team_get_current_team_id());
6392 		if (status != B_OK)
6393 			return status;
6394 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6395 			size, &locker));
6396 
6397 	// unmap
6398 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6399 }
6400 
6401 
6402 status_t
6403 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6404 {
6405 	// check address range
6406 	addr_t address = (addr_t)_address;
6407 	size = PAGE_ALIGN(size);
6408 
6409 	if ((address % B_PAGE_SIZE) != 0)
6410 		return B_BAD_VALUE;
6411 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6412 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6413 		// weird error code required by POSIX
6414 		return ENOMEM;
6415 	}
6416 
6417 	// extend and check protection
6418 	if ((protection & ~B_USER_PROTECTION) != 0)
6419 		return B_BAD_VALUE;
6420 
6421 	fix_protection(&protection);
6422 
6423 	// We need to write lock the address space, since we're going to play with
6424 	// the areas. Also make sure that none of the areas is wired and that we're
6425 	// actually allowed to change the protection.
6426 	AddressSpaceWriteLocker locker;
6427 
6428 	bool restart;
6429 	do {
6430 		restart = false;
6431 
6432 		status_t status = locker.SetTo(team_get_current_team_id());
6433 		if (status != B_OK)
6434 			return status;
6435 
6436 		// First round: Check whether the whole range is covered by areas and we
6437 		// are allowed to modify them.
6438 		addr_t currentAddress = address;
6439 		size_t sizeLeft = size;
6440 		while (sizeLeft > 0) {
6441 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6442 			if (area == NULL)
6443 				return B_NO_MEMORY;
6444 
6445 			if (area->address_space == VMAddressSpace::Kernel())
6446 				return B_NOT_ALLOWED;
6447 
6448 			// TODO: For (shared) mapped files we should check whether the new
6449 			// protections are compatible with the file permissions. We don't
6450 			// have a way to do that yet, though.
6451 
6452 			addr_t offset = currentAddress - area->Base();
6453 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6454 
6455 			AreaCacheLocker cacheLocker(area);
6456 
6457 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6458 					&locker, &cacheLocker)) {
6459 				restart = true;
6460 				break;
6461 			}
6462 
6463 			cacheLocker.Unlock();
6464 
6465 			currentAddress += rangeSize;
6466 			sizeLeft -= rangeSize;
6467 		}
6468 	} while (restart);
6469 
6470 	// Second round: If the protections differ from that of the area, create a
6471 	// page protection array and re-map mapped pages.
6472 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6473 	addr_t currentAddress = address;
6474 	size_t sizeLeft = size;
6475 	while (sizeLeft > 0) {
6476 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6477 		if (area == NULL)
6478 			return B_NO_MEMORY;
6479 
6480 		addr_t offset = currentAddress - area->Base();
6481 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6482 
6483 		currentAddress += rangeSize;
6484 		sizeLeft -= rangeSize;
6485 
6486 		if (area->page_protections == NULL) {
6487 			if (area->protection == protection)
6488 				continue;
6489 
6490 			status_t status = allocate_area_page_protections(area);
6491 			if (status != B_OK)
6492 				return status;
6493 		}
6494 
6495 		// We need to lock the complete cache chain, since we potentially unmap
6496 		// pages of lower caches.
6497 		VMCache* topCache = vm_area_get_locked_cache(area);
6498 		VMCacheChainLocker cacheChainLocker(topCache);
6499 		cacheChainLocker.LockAllSourceCaches();
6500 
6501 		for (addr_t pageAddress = area->Base() + offset;
6502 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6503 			map->Lock();
6504 
6505 			set_area_page_protection(area, pageAddress, protection);
6506 
6507 			phys_addr_t physicalAddress;
6508 			uint32 flags;
6509 
6510 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6511 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6512 				map->Unlock();
6513 				continue;
6514 			}
6515 
6516 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6517 			if (page == NULL) {
6518 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6519 					"\n", area, physicalAddress);
6520 				map->Unlock();
6521 				return B_ERROR;
6522 			}
6523 
6524 			// If the page is not in the topmost cache and write access is
6525 			// requested, we have to unmap it. Otherwise we can re-map it with
6526 			// the new protection.
6527 			bool unmapPage = page->Cache() != topCache
6528 				&& (protection & B_WRITE_AREA) != 0;
6529 
6530 			if (!unmapPage)
6531 				map->ProtectPage(area, pageAddress, protection);
6532 
6533 			map->Unlock();
6534 
6535 			if (unmapPage) {
6536 				DEBUG_PAGE_ACCESS_START(page);
6537 				unmap_page(area, pageAddress);
6538 				DEBUG_PAGE_ACCESS_END(page);
6539 			}
6540 		}
6541 	}
6542 
6543 	return B_OK;
6544 }
6545 
6546 
6547 status_t
6548 _user_sync_memory(void* _address, size_t size, uint32 flags)
6549 {
6550 	addr_t address = (addr_t)_address;
6551 	size = PAGE_ALIGN(size);
6552 
6553 	// check params
6554 	if ((address % B_PAGE_SIZE) != 0)
6555 		return B_BAD_VALUE;
6556 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6557 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6558 		// weird error code required by POSIX
6559 		return ENOMEM;
6560 	}
6561 
6562 	bool writeSync = (flags & MS_SYNC) != 0;
6563 	bool writeAsync = (flags & MS_ASYNC) != 0;
6564 	if (writeSync && writeAsync)
6565 		return B_BAD_VALUE;
6566 
6567 	if (size == 0 || (!writeSync && !writeAsync))
6568 		return B_OK;
6569 
6570 	// iterate through the range and sync all concerned areas
6571 	while (size > 0) {
6572 		// read lock the address space
6573 		AddressSpaceReadLocker locker;
6574 		status_t error = locker.SetTo(team_get_current_team_id());
6575 		if (error != B_OK)
6576 			return error;
6577 
6578 		// get the first area
6579 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6580 		if (area == NULL)
6581 			return B_NO_MEMORY;
6582 
6583 		uint32 offset = address - area->Base();
6584 		size_t rangeSize = min_c(area->Size() - offset, size);
6585 		offset += area->cache_offset;
6586 
6587 		// lock the cache
6588 		AreaCacheLocker cacheLocker(area);
6589 		if (!cacheLocker)
6590 			return B_BAD_VALUE;
6591 		VMCache* cache = area->cache;
6592 
6593 		locker.Unlock();
6594 
6595 		uint32 firstPage = offset >> PAGE_SHIFT;
6596 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6597 
6598 		// write the pages
6599 		if (cache->type == CACHE_TYPE_VNODE) {
6600 			if (writeSync) {
6601 				// synchronous
6602 				error = vm_page_write_modified_page_range(cache, firstPage,
6603 					endPage);
6604 				if (error != B_OK)
6605 					return error;
6606 			} else {
6607 				// asynchronous
6608 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6609 				// TODO: This is probably not quite what is supposed to happen.
6610 				// Especially when a lot has to be written, it might take ages
6611 				// until it really hits the disk.
6612 			}
6613 		}
6614 
6615 		address += rangeSize;
6616 		size -= rangeSize;
6617 	}
6618 
6619 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6620 	// synchronize multiple mappings of the same file. In our VM they never get
6621 	// out of sync, though, so we don't have to do anything.
6622 
6623 	return B_OK;
6624 }
6625 
6626 
6627 status_t
6628 _user_memory_advice(void* address, size_t size, uint32 advice)
6629 {
6630 	// TODO: Implement!
6631 	return B_OK;
6632 }
6633 
6634 
6635 status_t
6636 _user_get_memory_properties(team_id teamID, const void* address,
6637 	uint32* _protected, uint32* _lock)
6638 {
6639 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6640 		return B_BAD_ADDRESS;
6641 
6642 	AddressSpaceReadLocker locker;
6643 	status_t error = locker.SetTo(teamID);
6644 	if (error != B_OK)
6645 		return error;
6646 
6647 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6648 	if (area == NULL)
6649 		return B_NO_MEMORY;
6650 
6651 
6652 	uint32 protection = area->protection;
6653 	if (area->page_protections != NULL)
6654 		protection = get_area_page_protection(area, (addr_t)address);
6655 
6656 	uint32 wiring = area->wiring;
6657 
6658 	locker.Unlock();
6659 
6660 	error = user_memcpy(_protected, &protection, sizeof(protection));
6661 	if (error != B_OK)
6662 		return error;
6663 
6664 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6665 
6666 	return error;
6667 }
6668 
6669 
6670 // #pragma mark -- compatibility
6671 
6672 
6673 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6674 
6675 
6676 struct physical_entry_beos {
6677 	uint32	address;
6678 	uint32	size;
6679 };
6680 
6681 
6682 /*!	The physical_entry structure has changed. We need to translate it to the
6683 	old one.
6684 */
6685 extern "C" int32
6686 __get_memory_map_beos(const void* _address, size_t numBytes,
6687 	physical_entry_beos* table, int32 numEntries)
6688 {
6689 	if (numEntries <= 0)
6690 		return B_BAD_VALUE;
6691 
6692 	const uint8* address = (const uint8*)_address;
6693 
6694 	int32 count = 0;
6695 	while (numBytes > 0 && count < numEntries) {
6696 		physical_entry entry;
6697 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6698 		if (result < 0) {
6699 			if (result != B_BUFFER_OVERFLOW)
6700 				return result;
6701 		}
6702 
6703 		if (entry.address >= (phys_addr_t)1 << 32) {
6704 			panic("get_memory_map(): Address is greater 4 GB!");
6705 			return B_ERROR;
6706 		}
6707 
6708 		table[count].address = entry.address;
6709 		table[count++].size = entry.size;
6710 
6711 		address += entry.size;
6712 		numBytes -= entry.size;
6713 	}
6714 
6715 	// null-terminate the table, if possible
6716 	if (count < numEntries) {
6717 		table[count].address = 0;
6718 		table[count].size = 0;
6719 	}
6720 
6721 	return B_OK;
6722 }
6723 
6724 
6725 /*!	The type of the \a physicalAddress parameter has changed from void* to
6726 	phys_addr_t.
6727 */
6728 extern "C" area_id
6729 __map_physical_memory_beos(const char* name, void* physicalAddress,
6730 	size_t numBytes, uint32 addressSpec, uint32 protection,
6731 	void** _virtualAddress)
6732 {
6733 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6734 		addressSpec, protection, _virtualAddress);
6735 }
6736 
6737 
6738 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6739 	we meddle with the \a lock parameter to force 32 bit.
6740 */
6741 extern "C" area_id
6742 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6743 	size_t size, uint32 lock, uint32 protection)
6744 {
6745 	switch (lock) {
6746 		case B_NO_LOCK:
6747 			break;
6748 		case B_FULL_LOCK:
6749 		case B_LAZY_LOCK:
6750 			lock = B_32_BIT_FULL_LOCK;
6751 			break;
6752 		case B_CONTIGUOUS:
6753 			lock = B_32_BIT_CONTIGUOUS;
6754 			break;
6755 	}
6756 
6757 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6758 		protection);
6759 }
6760 
6761 
6762 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6763 	"BASE");
6764 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6765 	"map_physical_memory@", "BASE");
6766 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6767 	"BASE");
6768 
6769 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6770 	"get_memory_map@@", "1_ALPHA3");
6771 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6772 	"map_physical_memory@@", "1_ALPHA3");
6773 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6774 	"1_ALPHA3");
6775 
6776 
6777 #else
6778 
6779 
6780 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6781 	"get_memory_map@@", "BASE");
6782 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6783 	"map_physical_memory@@", "BASE");
6784 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6785 	"BASE");
6786 
6787 
6788 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6789