xref: /haiku/src/system/kernel/vm/vm.cpp (revision e3857211d305a595c2d0b58768f25623d5967675)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/khash.h>
51 #include <vm/vm_page.h>
52 #include <vm/vm_priv.h>
53 #include <vm/VMAddressSpace.h>
54 #include <vm/VMArea.h>
55 #include <vm/VMCache.h>
56 
57 #include "VMAddressSpaceLocking.h"
58 #include "VMAnonymousCache.h"
59 #include "VMAnonymousNoSwapCache.h"
60 #include "IORequest.h"
61 
62 
63 //#define TRACE_VM
64 //#define TRACE_FAULTS
65 #ifdef TRACE_VM
66 #	define TRACE(x) dprintf x
67 #else
68 #	define TRACE(x) ;
69 #endif
70 #ifdef TRACE_FAULTS
71 #	define FTRACE(x) dprintf x
72 #else
73 #	define FTRACE(x) ;
74 #endif
75 
76 
77 class AreaCacheLocking {
78 public:
79 	inline bool Lock(VMCache* lockable)
80 	{
81 		return false;
82 	}
83 
84 	inline void Unlock(VMCache* lockable)
85 	{
86 		vm_area_put_locked_cache(lockable);
87 	}
88 };
89 
90 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
91 public:
92 	inline AreaCacheLocker(VMCache* cache = NULL)
93 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
94 	{
95 	}
96 
97 	inline AreaCacheLocker(VMArea* area)
98 		: AutoLocker<VMCache, AreaCacheLocking>()
99 	{
100 		SetTo(area);
101 	}
102 
103 	inline void SetTo(VMCache* cache, bool alreadyLocked)
104 	{
105 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
106 	}
107 
108 	inline void SetTo(VMArea* area)
109 	{
110 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
111 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
112 	}
113 };
114 
115 
116 class VMCacheChainLocker {
117 public:
118 	VMCacheChainLocker()
119 		:
120 		fTopCache(NULL),
121 		fBottomCache(NULL)
122 	{
123 	}
124 
125 	VMCacheChainLocker(VMCache* topCache)
126 		:
127 		fTopCache(topCache),
128 		fBottomCache(topCache)
129 	{
130 	}
131 
132 	~VMCacheChainLocker()
133 	{
134 		Unlock();
135 	}
136 
137 	void SetTo(VMCache* topCache)
138 	{
139 		fTopCache = topCache;
140 		fBottomCache = topCache;
141 
142 		if (topCache != NULL)
143 			topCache->SetUserData(NULL);
144 	}
145 
146 	VMCache* LockSourceCache()
147 	{
148 		if (fBottomCache == NULL || fBottomCache->source == NULL)
149 			return NULL;
150 
151 		VMCache* previousCache = fBottomCache;
152 
153 		fBottomCache = fBottomCache->source;
154 		fBottomCache->Lock();
155 		fBottomCache->AcquireRefLocked();
156 		fBottomCache->SetUserData(previousCache);
157 
158 		return fBottomCache;
159 	}
160 
161 	void LockAllSourceCaches()
162 	{
163 		while (LockSourceCache() != NULL) {
164 		}
165 	}
166 
167 	void Unlock(VMCache* exceptCache = NULL)
168 	{
169 		if (fTopCache == NULL)
170 			return;
171 
172 		// Unlock caches in source -> consumer direction. This is important to
173 		// avoid double-locking and a reversal of locking order in case a cache
174 		// is eligable for merging.
175 		VMCache* cache = fBottomCache;
176 		while (cache != NULL) {
177 			VMCache* nextCache = (VMCache*)cache->UserData();
178 			if (cache != exceptCache)
179 				cache->ReleaseRefAndUnlock(cache != fTopCache);
180 
181 			if (cache == fTopCache)
182 				break;
183 
184 			cache = nextCache;
185 		}
186 
187 		fTopCache = NULL;
188 		fBottomCache = NULL;
189 	}
190 
191 	void UnlockKeepRefs(bool keepTopCacheLocked)
192 	{
193 		if (fTopCache == NULL)
194 			return;
195 
196 		VMCache* nextCache = fBottomCache;
197 		VMCache* cache = NULL;
198 
199 		while (keepTopCacheLocked
200 				? nextCache != fTopCache : cache != fTopCache) {
201 			cache = nextCache;
202 			nextCache = (VMCache*)cache->UserData();
203 			cache->Unlock(cache != fTopCache);
204 		}
205 	}
206 
207 	void RelockCaches(bool topCacheLocked)
208 	{
209 		if (fTopCache == NULL)
210 			return;
211 
212 		VMCache* nextCache = fTopCache;
213 		VMCache* cache = NULL;
214 		if (topCacheLocked) {
215 			cache = nextCache;
216 			nextCache = cache->source;
217 		}
218 
219 		while (cache != fBottomCache && nextCache != NULL) {
220 			VMCache* consumer = cache;
221 			cache = nextCache;
222 			nextCache = cache->source;
223 			cache->Lock();
224 			cache->SetUserData(consumer);
225 		}
226 	}
227 
228 private:
229 	VMCache*	fTopCache;
230 	VMCache*	fBottomCache;
231 };
232 
233 
234 // The memory reserve an allocation of the certain priority must not touch.
235 static const size_t kMemoryReserveForPriority[] = {
236 	VM_MEMORY_RESERVE_USER,		// user
237 	VM_MEMORY_RESERVE_SYSTEM,	// system
238 	0							// VIP
239 };
240 
241 
242 ObjectCache* gPageMappingsObjectCache;
243 
244 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
245 
246 static off_t sAvailableMemory;
247 static off_t sNeededMemory;
248 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
249 static uint32 sPageFaults;
250 
251 static VMPhysicalPageMapper* sPhysicalPageMapper;
252 
253 #if DEBUG_CACHE_LIST
254 
255 struct cache_info {
256 	VMCache*	cache;
257 	addr_t		page_count;
258 	addr_t		committed;
259 };
260 
261 static const int kCacheInfoTableCount = 100 * 1024;
262 static cache_info* sCacheInfoTable;
263 
264 #endif	// DEBUG_CACHE_LIST
265 
266 
267 // function declarations
268 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
269 	bool addressSpaceCleanup);
270 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
271 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage,
272 	VMAreaWiredRange* wiredRange = NULL);
273 static status_t map_backing_store(VMAddressSpace* addressSpace,
274 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
275 	int protection, int mapping, uint32 flags,
276 	const virtual_address_restrictions* addressRestrictions, bool kernel,
277 	VMArea** _area, void** _virtualAddress);
278 static void fix_protection(uint32* protection);
279 
280 
281 //	#pragma mark -
282 
283 
284 #if VM_PAGE_FAULT_TRACING
285 
286 namespace VMPageFaultTracing {
287 
288 class PageFaultStart : public AbstractTraceEntry {
289 public:
290 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
291 		:
292 		fAddress(address),
293 		fPC(pc),
294 		fWrite(write),
295 		fUser(user)
296 	{
297 		Initialized();
298 	}
299 
300 	virtual void AddDump(TraceOutput& out)
301 	{
302 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
303 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
304 	}
305 
306 private:
307 	addr_t	fAddress;
308 	addr_t	fPC;
309 	bool	fWrite;
310 	bool	fUser;
311 };
312 
313 
314 // page fault errors
315 enum {
316 	PAGE_FAULT_ERROR_NO_AREA		= 0,
317 	PAGE_FAULT_ERROR_KERNEL_ONLY,
318 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
319 	PAGE_FAULT_ERROR_READ_PROTECTED,
320 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
321 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
322 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
323 };
324 
325 
326 class PageFaultError : public AbstractTraceEntry {
327 public:
328 	PageFaultError(area_id area, status_t error)
329 		:
330 		fArea(area),
331 		fError(error)
332 	{
333 		Initialized();
334 	}
335 
336 	virtual void AddDump(TraceOutput& out)
337 	{
338 		switch (fError) {
339 			case PAGE_FAULT_ERROR_NO_AREA:
340 				out.Print("page fault error: no area");
341 				break;
342 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
343 				out.Print("page fault error: area: %ld, kernel only", fArea);
344 				break;
345 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
346 				out.Print("page fault error: area: %ld, write protected",
347 					fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_READ_PROTECTED:
350 				out.Print("page fault error: area: %ld, read protected", fArea);
351 				break;
352 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
353 				out.Print("page fault error: area: %ld, execute protected",
354 					fArea);
355 				break;
356 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
357 				out.Print("page fault error: kernel touching bad user memory");
358 				break;
359 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
360 				out.Print("page fault error: no address space");
361 				break;
362 			default:
363 				out.Print("page fault error: area: %ld, error: %s", fArea,
364 					strerror(fError));
365 				break;
366 		}
367 	}
368 
369 private:
370 	area_id		fArea;
371 	status_t	fError;
372 };
373 
374 
375 class PageFaultDone : public AbstractTraceEntry {
376 public:
377 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
378 			vm_page* page)
379 		:
380 		fArea(area),
381 		fTopCache(topCache),
382 		fCache(cache),
383 		fPage(page)
384 	{
385 		Initialized();
386 	}
387 
388 	virtual void AddDump(TraceOutput& out)
389 	{
390 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
391 			"page: %p", fArea, fTopCache, fCache, fPage);
392 	}
393 
394 private:
395 	area_id		fArea;
396 	VMCache*	fTopCache;
397 	VMCache*	fCache;
398 	vm_page*	fPage;
399 };
400 
401 }	// namespace VMPageFaultTracing
402 
403 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
404 #else
405 #	define TPF(x) ;
406 #endif	// VM_PAGE_FAULT_TRACING
407 
408 
409 //	#pragma mark -
410 
411 
412 /*!	The page's cache must be locked.
413 */
414 static inline void
415 increment_page_wired_count(vm_page* page)
416 {
417 	if (!page->IsMapped())
418 		atomic_add(&gMappedPagesCount, 1);
419 	page->IncrementWiredCount();
420 }
421 
422 
423 /*!	The page's cache must be locked.
424 */
425 static inline void
426 decrement_page_wired_count(vm_page* page)
427 {
428 	page->DecrementWiredCount();
429 	if (!page->IsMapped())
430 		atomic_add(&gMappedPagesCount, -1);
431 }
432 
433 
434 static inline addr_t
435 virtual_page_address(VMArea* area, vm_page* page)
436 {
437 	return area->Base()
438 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
439 }
440 
441 
442 //! You need to have the address space locked when calling this function
443 static VMArea*
444 lookup_area(VMAddressSpace* addressSpace, area_id id)
445 {
446 	VMAreaHash::ReadLock();
447 
448 	VMArea* area = VMAreaHash::LookupLocked(id);
449 	if (area != NULL && area->address_space != addressSpace)
450 		area = NULL;
451 
452 	VMAreaHash::ReadUnlock();
453 
454 	return area;
455 }
456 
457 
458 static status_t
459 allocate_area_page_protections(VMArea* area)
460 {
461 	// In the page protections we store only the three user protections,
462 	// so we use 4 bits per page.
463 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
464 	area->page_protections = (uint8*)malloc_etc(bytes,
465 		HEAP_DONT_LOCK_KERNEL_SPACE);
466 	if (area->page_protections == NULL)
467 		return B_NO_MEMORY;
468 
469 	// init the page protections for all pages to that of the area
470 	uint32 areaProtection = area->protection
471 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
472 	memset(area->page_protections, areaProtection | (areaProtection << 4),
473 		bytes);
474 	return B_OK;
475 }
476 
477 
478 static inline void
479 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
480 {
481 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
482 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
483 	uint8& entry = area->page_protections[pageIndex / 2];
484 	if (pageIndex % 2 == 0)
485 		entry = (entry & 0xf0) | protection;
486 	else
487 		entry = (entry & 0x0f) | (protection << 4);
488 }
489 
490 
491 static inline uint32
492 get_area_page_protection(VMArea* area, addr_t pageAddress)
493 {
494 	if (area->page_protections == NULL)
495 		return area->protection;
496 
497 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
498 	uint32 protection = area->page_protections[pageIndex / 2];
499 	if (pageIndex % 2 == 0)
500 		protection &= 0x0f;
501 	else
502 		protection >>= 4;
503 
504 	// If this is a kernel area we translate the user flags to kernel flags.
505 	if (area->address_space == VMAddressSpace::Kernel()) {
506 		uint32 kernelProtection = 0;
507 		if ((protection & B_READ_AREA) != 0)
508 			kernelProtection |= B_KERNEL_READ_AREA;
509 		if ((protection & B_WRITE_AREA) != 0)
510 			kernelProtection |= B_KERNEL_WRITE_AREA;
511 
512 		return kernelProtection;
513 	}
514 
515 	return protection | B_KERNEL_READ_AREA
516 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
517 }
518 
519 
520 /*!	The caller must have reserved enough pages the translation map
521 	implementation might need to map this page.
522 	The page's cache must be locked.
523 */
524 static status_t
525 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
526 	vm_page_reservation* reservation)
527 {
528 	VMTranslationMap* map = area->address_space->TranslationMap();
529 
530 	bool wasMapped = page->IsMapped();
531 
532 	if (area->wiring == B_NO_LOCK) {
533 		DEBUG_PAGE_ACCESS_CHECK(page);
534 
535 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
536 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
537 			gPageMappingsObjectCache,
538 			CACHE_DONT_WAIT_FOR_MEMORY
539 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
540 		if (mapping == NULL)
541 			return B_NO_MEMORY;
542 
543 		mapping->page = page;
544 		mapping->area = area;
545 
546 		map->Lock();
547 
548 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
549 			area->MemoryType(), reservation);
550 
551 		// insert mapping into lists
552 		if (!page->IsMapped())
553 			atomic_add(&gMappedPagesCount, 1);
554 
555 		page->mappings.Add(mapping);
556 		area->mappings.Add(mapping);
557 
558 		map->Unlock();
559 	} else {
560 		DEBUG_PAGE_ACCESS_CHECK(page);
561 
562 		map->Lock();
563 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
564 			area->MemoryType(), reservation);
565 		map->Unlock();
566 
567 		increment_page_wired_count(page);
568 	}
569 
570 	if (!wasMapped) {
571 		// The page is mapped now, so we must not remain in the cached queue.
572 		// It also makes sense to move it from the inactive to the active, since
573 		// otherwise the page daemon wouldn't come to keep track of it (in idle
574 		// mode) -- if the page isn't touched, it will be deactivated after a
575 		// full iteration through the queue at the latest.
576 		if (page->State() == PAGE_STATE_CACHED
577 				|| page->State() == PAGE_STATE_INACTIVE) {
578 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
579 		}
580 	}
581 
582 	return B_OK;
583 }
584 
585 
586 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
587 	page's cache.
588 */
589 static inline bool
590 unmap_page(VMArea* area, addr_t virtualAddress)
591 {
592 	return area->address_space->TranslationMap()->UnmapPage(area,
593 		virtualAddress, true);
594 }
595 
596 
597 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
598 	mapped pages' caches.
599 */
600 static inline void
601 unmap_pages(VMArea* area, addr_t base, size_t size)
602 {
603 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
604 }
605 
606 
607 /*!	Cuts a piece out of an area. If the given cut range covers the complete
608 	area, it is deleted. If it covers the beginning or the end, the area is
609 	resized accordingly. If the range covers some part in the middle of the
610 	area, it is split in two; in this case the second area is returned via
611 	\a _secondArea (the variable is left untouched in the other cases).
612 	The address space must be write locked.
613 	The caller must ensure that no part of the given range is wired.
614 */
615 static status_t
616 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
617 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
618 {
619 	// Does the cut range intersect with the area at all?
620 	addr_t areaLast = area->Base() + (area->Size() - 1);
621 	if (area->Base() > lastAddress || areaLast < address)
622 		return B_OK;
623 
624 	// Is the area fully covered?
625 	if (area->Base() >= address && areaLast <= lastAddress) {
626 		delete_area(addressSpace, area, false);
627 		return B_OK;
628 	}
629 
630 	int priority;
631 	uint32 allocationFlags;
632 	if (addressSpace == VMAddressSpace::Kernel()) {
633 		priority = VM_PRIORITY_SYSTEM;
634 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
635 			| HEAP_DONT_LOCK_KERNEL_SPACE;
636 	} else {
637 		priority = VM_PRIORITY_USER;
638 		allocationFlags = 0;
639 	}
640 
641 	VMCache* cache = vm_area_get_locked_cache(area);
642 	VMCacheChainLocker cacheChainLocker(cache);
643 	cacheChainLocker.LockAllSourceCaches();
644 
645 	// Cut the end only?
646 	if (areaLast <= lastAddress) {
647 		size_t oldSize = area->Size();
648 		size_t newSize = address - area->Base();
649 
650 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
651 			allocationFlags);
652 		if (error != B_OK)
653 			return error;
654 
655 		// unmap pages
656 		unmap_pages(area, address, oldSize - newSize);
657 
658 		// If no one else uses the area's cache, we can resize it, too.
659 		if (cache->areas == area && area->cache_next == NULL
660 			&& cache->consumers.IsEmpty()
661 			&& cache->type == CACHE_TYPE_RAM) {
662 			// Since VMCache::Resize() can temporarily drop the lock, we must
663 			// unlock all lower caches to prevent locking order inversion.
664 			cacheChainLocker.Unlock(cache);
665 			cache->Resize(cache->virtual_base + newSize, priority);
666 			cache->ReleaseRefAndUnlock();
667 		}
668 
669 		return B_OK;
670 	}
671 
672 	// Cut the beginning only?
673 	if (area->Base() >= address) {
674 		addr_t oldBase = area->Base();
675 		addr_t newBase = lastAddress + 1;
676 		size_t newSize = areaLast - lastAddress;
677 
678 		// unmap pages
679 		unmap_pages(area, oldBase, newBase - oldBase);
680 
681 		// resize the area
682 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
683 			allocationFlags);
684 		if (error != B_OK)
685 			return error;
686 
687 		// TODO: If no one else uses the area's cache, we should resize it, too!
688 
689 		area->cache_offset += newBase - oldBase;
690 
691 		return B_OK;
692 	}
693 
694 	// The tough part -- cut a piece out of the middle of the area.
695 	// We do that by shrinking the area to the begin section and creating a
696 	// new area for the end section.
697 
698 	addr_t firstNewSize = address - area->Base();
699 	addr_t secondBase = lastAddress + 1;
700 	addr_t secondSize = areaLast - lastAddress;
701 
702 	// unmap pages
703 	unmap_pages(area, address, area->Size() - firstNewSize);
704 
705 	// resize the area
706 	addr_t oldSize = area->Size();
707 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
708 		allocationFlags);
709 	if (error != B_OK)
710 		return error;
711 
712 	// TODO: If no one else uses the area's cache, we might want to create a
713 	// new cache for the second area, transfer the concerned pages from the
714 	// first cache to it and resize the first cache.
715 
716 	// map the second area
717 	virtual_address_restrictions addressRestrictions = {};
718 	addressRestrictions.address = (void*)secondBase;
719 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
720 	VMArea* secondArea;
721 	error = map_backing_store(addressSpace, cache,
722 		area->cache_offset + (secondBase - area->Base()), area->name,
723 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
724 		&addressRestrictions, kernel, &secondArea, NULL);
725 	if (error != B_OK) {
726 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
727 		return error;
728 	}
729 
730 	// We need a cache reference for the new area.
731 	cache->AcquireRefLocked();
732 
733 	if (_secondArea != NULL)
734 		*_secondArea = secondArea;
735 
736 	return B_OK;
737 }
738 
739 
740 /*!	Deletes all areas in the given address range.
741 	The address space must be write-locked.
742 	The caller must ensure that no part of the given range is wired.
743 */
744 static status_t
745 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
746 	bool kernel)
747 {
748 	size = PAGE_ALIGN(size);
749 	addr_t lastAddress = address + (size - 1);
750 
751 	// Check, whether the caller is allowed to modify the concerned areas.
752 	if (!kernel) {
753 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
754 				VMArea* area = it.Next();) {
755 			addr_t areaLast = area->Base() + (area->Size() - 1);
756 			if (area->Base() < lastAddress && address < areaLast) {
757 				if ((area->protection & B_KERNEL_AREA) != 0)
758 					return B_NOT_ALLOWED;
759 			}
760 		}
761 	}
762 
763 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
764 			VMArea* area = it.Next();) {
765 		addr_t areaLast = area->Base() + (area->Size() - 1);
766 		if (area->Base() < lastAddress && address < areaLast) {
767 			status_t error = cut_area(addressSpace, area, address,
768 				lastAddress, NULL, kernel);
769 			if (error != B_OK)
770 				return error;
771 				// Failing after already messing with areas is ugly, but we
772 				// can't do anything about it.
773 		}
774 	}
775 
776 	return B_OK;
777 }
778 
779 
780 /*! You need to hold the lock of the cache and the write lock of the address
781 	space when calling this function.
782 	Note, that in case of error your cache will be temporarily unlocked.
783 	If \a addressSpec is \c B_EXACT_ADDRESS and the
784 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
785 	that no part of the specified address range (base \c *_virtualAddress, size
786 	\a size) is wired.
787 */
788 static status_t
789 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
790 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
791 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
792 	bool kernel, VMArea** _area, void** _virtualAddress)
793 {
794 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
795 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
796 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
797 		addressRestrictions->address, offset, size,
798 		addressRestrictions->address_specification, wiring, protection,
799 		_area, areaName));
800 	cache->AssertLocked();
801 
802 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
803 		| HEAP_DONT_LOCK_KERNEL_SPACE;
804 	int priority;
805 	if (addressSpace != VMAddressSpace::Kernel()) {
806 		priority = VM_PRIORITY_USER;
807 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
808 		priority = VM_PRIORITY_VIP;
809 		allocationFlags |= HEAP_PRIORITY_VIP;
810 	} else
811 		priority = VM_PRIORITY_SYSTEM;
812 
813 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
814 		allocationFlags);
815 	if (area == NULL)
816 		return B_NO_MEMORY;
817 
818 	status_t status;
819 
820 	// if this is a private map, we need to create a new cache
821 	// to handle the private copies of pages as they are written to
822 	VMCache* sourceCache = cache;
823 	if (mapping == REGION_PRIVATE_MAP) {
824 		VMCache* newCache;
825 
826 		// create an anonymous cache
827 		status = VMCacheFactory::CreateAnonymousCache(newCache,
828 			(protection & B_STACK_AREA) != 0
829 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
830 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
831 		if (status != B_OK)
832 			goto err1;
833 
834 		newCache->Lock();
835 		newCache->temporary = 1;
836 		newCache->virtual_base = offset;
837 		newCache->virtual_end = offset + size;
838 
839 		cache->AddConsumer(newCache);
840 
841 		cache = newCache;
842 	}
843 
844 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
845 		status = cache->SetMinimalCommitment(size, priority);
846 		if (status != B_OK)
847 			goto err2;
848 	}
849 
850 	// check to see if this address space has entered DELETE state
851 	if (addressSpace->IsBeingDeleted()) {
852 		// okay, someone is trying to delete this address space now, so we can't
853 		// insert the area, so back out
854 		status = B_BAD_TEAM_ID;
855 		goto err2;
856 	}
857 
858 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
859 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
860 		status = unmap_address_range(addressSpace,
861 			(addr_t)addressRestrictions->address, size, kernel);
862 		if (status != B_OK)
863 			goto err2;
864 	}
865 
866 	status = addressSpace->InsertArea(area, size, addressRestrictions,
867 		allocationFlags, _virtualAddress);
868 	if (status != B_OK) {
869 		// TODO: wait and try again once this is working in the backend
870 #if 0
871 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
872 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
873 				0, 0);
874 		}
875 #endif
876 		goto err2;
877 	}
878 
879 	// attach the cache to the area
880 	area->cache = cache;
881 	area->cache_offset = offset;
882 
883 	// point the cache back to the area
884 	cache->InsertAreaLocked(area);
885 	if (mapping == REGION_PRIVATE_MAP)
886 		cache->Unlock();
887 
888 	// insert the area in the global area hash table
889 	VMAreaHash::Insert(area);
890 
891 	// grab a ref to the address space (the area holds this)
892 	addressSpace->Get();
893 
894 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
895 //		cache, sourceCache, areaName, area);
896 
897 	*_area = area;
898 	return B_OK;
899 
900 err2:
901 	if (mapping == REGION_PRIVATE_MAP) {
902 		// We created this cache, so we must delete it again. Note, that we
903 		// need to temporarily unlock the source cache or we'll otherwise
904 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
905 		sourceCache->Unlock();
906 		cache->ReleaseRefAndUnlock();
907 		sourceCache->Lock();
908 	}
909 err1:
910 	addressSpace->DeleteArea(area, allocationFlags);
911 	return status;
912 }
913 
914 
915 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
916 	  locker1, locker2).
917 */
918 template<typename LockerType1, typename LockerType2>
919 static inline bool
920 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
921 {
922 	area->cache->AssertLocked();
923 
924 	VMAreaUnwiredWaiter waiter;
925 	if (!area->AddWaiterIfWired(&waiter))
926 		return false;
927 
928 	// unlock everything and wait
929 	if (locker1 != NULL)
930 		locker1->Unlock();
931 	if (locker2 != NULL)
932 		locker2->Unlock();
933 
934 	waiter.waitEntry.Wait();
935 
936 	return true;
937 }
938 
939 
940 /*!	Checks whether the given area has any wired ranges intersecting with the
941 	specified range and waits, if so.
942 
943 	When it has to wait, the function calls \c Unlock() on both \a locker1
944 	and \a locker2, if given.
945 	The area's top cache must be locked and must be unlocked as a side effect
946 	of calling \c Unlock() on either \a locker1 or \a locker2.
947 
948 	If the function does not have to wait it does not modify or unlock any
949 	object.
950 
951 	\param area The area to be checked.
952 	\param base The base address of the range to check.
953 	\param size The size of the address range to check.
954 	\param locker1 An object to be unlocked when before starting to wait (may
955 		be \c NULL).
956 	\param locker2 An object to be unlocked when before starting to wait (may
957 		be \c NULL).
958 	\return \c true, if the function had to wait, \c false otherwise.
959 */
960 template<typename LockerType1, typename LockerType2>
961 static inline bool
962 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
963 	LockerType1* locker1, LockerType2* locker2)
964 {
965 	area->cache->AssertLocked();
966 
967 	VMAreaUnwiredWaiter waiter;
968 	if (!area->AddWaiterIfWired(&waiter, base, size))
969 		return false;
970 
971 	// unlock everything and wait
972 	if (locker1 != NULL)
973 		locker1->Unlock();
974 	if (locker2 != NULL)
975 		locker2->Unlock();
976 
977 	waiter.waitEntry.Wait();
978 
979 	return true;
980 }
981 
982 
983 /*!	Checks whether the given address space has any wired ranges intersecting
984 	with the specified range and waits, if so.
985 
986 	Similar to wait_if_area_range_is_wired(), with the following differences:
987 	- All areas intersecting with the range are checked (respectively all until
988 	  one is found that contains a wired range intersecting with the given
989 	  range).
990 	- The given address space must at least be read-locked and must be unlocked
991 	  when \c Unlock() is called on \a locker.
992 	- None of the areas' caches are allowed to be locked.
993 */
994 template<typename LockerType>
995 static inline bool
996 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
997 	size_t size, LockerType* locker)
998 {
999 	addr_t end = base + size - 1;
1000 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1001 			VMArea* area = it.Next();) {
1002 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1003 		if (area->Base() > end)
1004 			return false;
1005 
1006 		if (base >= area->Base() + area->Size() - 1)
1007 			continue;
1008 
1009 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1010 
1011 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1012 			return true;
1013 	}
1014 
1015 	return false;
1016 }
1017 
1018 
1019 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1020 	It must be called in a situation where the kernel address space may be
1021 	locked.
1022 */
1023 status_t
1024 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1025 {
1026 	AddressSpaceReadLocker locker;
1027 	VMArea* area;
1028 	status_t status = locker.SetFromArea(id, area);
1029 	if (status != B_OK)
1030 		return status;
1031 
1032 	if (area->page_protections == NULL) {
1033 		status = allocate_area_page_protections(area);
1034 		if (status != B_OK)
1035 			return status;
1036 	}
1037 
1038 	*cookie = (void*)area;
1039 	return B_OK;
1040 }
1041 
1042 
1043 /*!	This is a debug helper function that can only be used with very specific
1044 	use cases.
1045 	Sets protection for the given address range to the protection specified.
1046 	If \a protection is 0 then the involved pages will be marked non-present
1047 	in the translation map to cause a fault on access. The pages aren't
1048 	actually unmapped however so that they can be marked present again with
1049 	additional calls to this function. For this to work the area must be
1050 	fully locked in memory so that the pages aren't otherwise touched.
1051 	This function does not lock the kernel address space and needs to be
1052 	supplied with a \a cookie retrieved from a successful call to
1053 	vm_prepare_kernel_area_debug_protection().
1054 */
1055 status_t
1056 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1057 	uint32 protection)
1058 {
1059 	// check address range
1060 	addr_t address = (addr_t)_address;
1061 	size = PAGE_ALIGN(size);
1062 
1063 	if ((address % B_PAGE_SIZE) != 0
1064 		|| (addr_t)address + size < (addr_t)address
1065 		|| !IS_KERNEL_ADDRESS(address)
1066 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1067 		return B_BAD_VALUE;
1068 	}
1069 
1070 	// Translate the kernel protection to user protection as we only store that.
1071 	if ((protection & B_KERNEL_READ_AREA) != 0)
1072 		protection |= B_READ_AREA;
1073 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1074 		protection |= B_WRITE_AREA;
1075 
1076 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1077 	VMTranslationMap* map = addressSpace->TranslationMap();
1078 	VMArea* area = (VMArea*)cookie;
1079 
1080 	addr_t offset = address - area->Base();
1081 	if (area->Size() - offset < size) {
1082 		panic("protect range not fully within supplied area");
1083 		return B_BAD_VALUE;
1084 	}
1085 
1086 	if (area->page_protections == NULL) {
1087 		panic("area has no page protections");
1088 		return B_BAD_VALUE;
1089 	}
1090 
1091 	// Invalidate the mapping entries so any access to them will fault or
1092 	// restore the mapping entries unchanged so that lookup will success again.
1093 	map->Lock();
1094 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1095 	map->Unlock();
1096 
1097 	// And set the proper page protections so that the fault case will actually
1098 	// fail and not simply try to map a new page.
1099 	for (addr_t pageAddress = address; pageAddress < address + size;
1100 			pageAddress += B_PAGE_SIZE) {
1101 		set_area_page_protection(area, pageAddress, protection);
1102 	}
1103 
1104 	return B_OK;
1105 }
1106 
1107 
1108 status_t
1109 vm_block_address_range(const char* name, void* address, addr_t size)
1110 {
1111 	if (!arch_vm_supports_protection(0))
1112 		return B_NOT_SUPPORTED;
1113 
1114 	AddressSpaceWriteLocker locker;
1115 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1116 	if (status != B_OK)
1117 		return status;
1118 
1119 	VMAddressSpace* addressSpace = locker.AddressSpace();
1120 
1121 	// create an anonymous cache
1122 	VMCache* cache;
1123 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1124 		VM_PRIORITY_SYSTEM);
1125 	if (status != B_OK)
1126 		return status;
1127 
1128 	cache->temporary = 1;
1129 	cache->virtual_end = size;
1130 	cache->Lock();
1131 
1132 	VMArea* area;
1133 	virtual_address_restrictions addressRestrictions = {};
1134 	addressRestrictions.address = address;
1135 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1136 	status = map_backing_store(addressSpace, cache, 0, name, size,
1137 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1138 		true, &area, NULL);
1139 	if (status != B_OK) {
1140 		cache->ReleaseRefAndUnlock();
1141 		return status;
1142 	}
1143 
1144 	cache->Unlock();
1145 	area->cache_type = CACHE_TYPE_RAM;
1146 	return area->id;
1147 }
1148 
1149 
1150 status_t
1151 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1152 {
1153 	AddressSpaceWriteLocker locker(team);
1154 	if (!locker.IsLocked())
1155 		return B_BAD_TEAM_ID;
1156 
1157 	VMAddressSpace* addressSpace = locker.AddressSpace();
1158 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1159 		addressSpace == VMAddressSpace::Kernel()
1160 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1161 }
1162 
1163 
1164 status_t
1165 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1166 	addr_t size, uint32 flags)
1167 {
1168 	if (size == 0)
1169 		return B_BAD_VALUE;
1170 
1171 	AddressSpaceWriteLocker locker(team);
1172 	if (!locker.IsLocked())
1173 		return B_BAD_TEAM_ID;
1174 
1175 	virtual_address_restrictions addressRestrictions = {};
1176 	addressRestrictions.address = *_address;
1177 	addressRestrictions.address_specification = addressSpec;
1178 	VMAddressSpace* addressSpace = locker.AddressSpace();
1179 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1180 		addressSpace == VMAddressSpace::Kernel()
1181 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1182 		_address);
1183 }
1184 
1185 
1186 area_id
1187 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1188 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1189 	const virtual_address_restrictions* virtualAddressRestrictions,
1190 	const physical_address_restrictions* physicalAddressRestrictions,
1191 	bool kernel, void** _address)
1192 {
1193 	VMArea* area;
1194 	VMCache* cache;
1195 	vm_page* page = NULL;
1196 	bool isStack = (protection & B_STACK_AREA) != 0;
1197 	page_num_t guardPages;
1198 	bool canOvercommit = false;
1199 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1200 		? VM_PAGE_ALLOC_CLEAR : 0;
1201 
1202 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1203 		team, name, size));
1204 
1205 	size = PAGE_ALIGN(size);
1206 	guardSize = PAGE_ALIGN(guardSize);
1207 	guardPages = guardSize / B_PAGE_SIZE;
1208 
1209 	if (size == 0 || size < guardSize)
1210 		return B_BAD_VALUE;
1211 	if (!arch_vm_supports_protection(protection))
1212 		return B_NOT_SUPPORTED;
1213 
1214 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1215 		canOvercommit = true;
1216 
1217 #ifdef DEBUG_KERNEL_STACKS
1218 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1219 		isStack = true;
1220 #endif
1221 
1222 	// check parameters
1223 	switch (virtualAddressRestrictions->address_specification) {
1224 		case B_ANY_ADDRESS:
1225 		case B_EXACT_ADDRESS:
1226 		case B_BASE_ADDRESS:
1227 		case B_ANY_KERNEL_ADDRESS:
1228 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1229 		case B_RANDOMIZED_ANY_ADDRESS:
1230 		case B_RANDOMIZED_BASE_ADDRESS:
1231 			break;
1232 
1233 		default:
1234 			return B_BAD_VALUE;
1235 	}
1236 
1237 	// If low or high physical address restrictions are given, we force
1238 	// B_CONTIGUOUS wiring, since only then we'll use
1239 	// vm_page_allocate_page_run() which deals with those restrictions.
1240 	if (physicalAddressRestrictions->low_address != 0
1241 		|| physicalAddressRestrictions->high_address != 0) {
1242 		wiring = B_CONTIGUOUS;
1243 	}
1244 
1245 	physical_address_restrictions stackPhysicalRestrictions;
1246 	bool doReserveMemory = false;
1247 	switch (wiring) {
1248 		case B_NO_LOCK:
1249 			break;
1250 		case B_FULL_LOCK:
1251 		case B_LAZY_LOCK:
1252 		case B_CONTIGUOUS:
1253 			doReserveMemory = true;
1254 			break;
1255 		case B_ALREADY_WIRED:
1256 			break;
1257 		case B_LOMEM:
1258 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1259 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1260 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1261 			wiring = B_CONTIGUOUS;
1262 			doReserveMemory = true;
1263 			break;
1264 		case B_32_BIT_FULL_LOCK:
1265 			if (B_HAIKU_PHYSICAL_BITS <= 32
1266 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1267 				wiring = B_FULL_LOCK;
1268 				doReserveMemory = true;
1269 				break;
1270 			}
1271 			// TODO: We don't really support this mode efficiently. Just fall
1272 			// through for now ...
1273 		case B_32_BIT_CONTIGUOUS:
1274 			#if B_HAIKU_PHYSICAL_BITS > 32
1275 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1276 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1277 					stackPhysicalRestrictions.high_address
1278 						= (phys_addr_t)1 << 32;
1279 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1280 				}
1281 			#endif
1282 			wiring = B_CONTIGUOUS;
1283 			doReserveMemory = true;
1284 			break;
1285 		default:
1286 			return B_BAD_VALUE;
1287 	}
1288 
1289 	// Optimization: For a single-page contiguous allocation without low/high
1290 	// memory restriction B_FULL_LOCK wiring suffices.
1291 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1292 		&& physicalAddressRestrictions->low_address == 0
1293 		&& physicalAddressRestrictions->high_address == 0) {
1294 		wiring = B_FULL_LOCK;
1295 	}
1296 
1297 	// For full lock or contiguous areas we're also going to map the pages and
1298 	// thus need to reserve pages for the mapping backend upfront.
1299 	addr_t reservedMapPages = 0;
1300 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1301 		AddressSpaceWriteLocker locker;
1302 		status_t status = locker.SetTo(team);
1303 		if (status != B_OK)
1304 			return status;
1305 
1306 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1307 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1308 	}
1309 
1310 	int priority;
1311 	if (team != VMAddressSpace::KernelID())
1312 		priority = VM_PRIORITY_USER;
1313 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1314 		priority = VM_PRIORITY_VIP;
1315 	else
1316 		priority = VM_PRIORITY_SYSTEM;
1317 
1318 	// Reserve memory before acquiring the address space lock. This reduces the
1319 	// chances of failure, since while holding the write lock to the address
1320 	// space (if it is the kernel address space that is), the low memory handler
1321 	// won't be able to free anything for us.
1322 	addr_t reservedMemory = 0;
1323 	if (doReserveMemory) {
1324 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1325 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1326 			return B_NO_MEMORY;
1327 		reservedMemory = size;
1328 		// TODO: We don't reserve the memory for the pages for the page
1329 		// directories/tables. We actually need to do since we currently don't
1330 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1331 		// there are actually less physical pages than there should be, which
1332 		// can get the VM into trouble in low memory situations.
1333 	}
1334 
1335 	AddressSpaceWriteLocker locker;
1336 	VMAddressSpace* addressSpace;
1337 	status_t status;
1338 
1339 	// For full lock areas reserve the pages before locking the address
1340 	// space. E.g. block caches can't release their memory while we hold the
1341 	// address space lock.
1342 	page_num_t reservedPages = reservedMapPages;
1343 	if (wiring == B_FULL_LOCK)
1344 		reservedPages += size / B_PAGE_SIZE;
1345 
1346 	vm_page_reservation reservation;
1347 	if (reservedPages > 0) {
1348 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1349 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1350 					priority)) {
1351 				reservedPages = 0;
1352 				status = B_WOULD_BLOCK;
1353 				goto err0;
1354 			}
1355 		} else
1356 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1357 	}
1358 
1359 	if (wiring == B_CONTIGUOUS) {
1360 		// we try to allocate the page run here upfront as this may easily
1361 		// fail for obvious reasons
1362 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1363 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1364 		if (page == NULL) {
1365 			status = B_NO_MEMORY;
1366 			goto err0;
1367 		}
1368 	}
1369 
1370 	// Lock the address space and, if B_EXACT_ADDRESS and
1371 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1372 	// is not wired.
1373 	do {
1374 		status = locker.SetTo(team);
1375 		if (status != B_OK)
1376 			goto err1;
1377 
1378 		addressSpace = locker.AddressSpace();
1379 	} while (virtualAddressRestrictions->address_specification
1380 			== B_EXACT_ADDRESS
1381 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1382 		&& wait_if_address_range_is_wired(addressSpace,
1383 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1384 
1385 	// create an anonymous cache
1386 	// if it's a stack, make sure that two pages are available at least
1387 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1388 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1389 		wiring == B_NO_LOCK, priority);
1390 	if (status != B_OK)
1391 		goto err1;
1392 
1393 	cache->temporary = 1;
1394 	cache->virtual_end = size;
1395 	cache->committed_size = reservedMemory;
1396 		// TODO: This should be done via a method.
1397 	reservedMemory = 0;
1398 
1399 	cache->Lock();
1400 
1401 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1402 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1403 		kernel, &area, _address);
1404 
1405 	if (status != B_OK) {
1406 		cache->ReleaseRefAndUnlock();
1407 		goto err1;
1408 	}
1409 
1410 	locker.DegradeToReadLock();
1411 
1412 	switch (wiring) {
1413 		case B_NO_LOCK:
1414 		case B_LAZY_LOCK:
1415 			// do nothing - the pages are mapped in as needed
1416 			break;
1417 
1418 		case B_FULL_LOCK:
1419 		{
1420 			// Allocate and map all pages for this area
1421 
1422 			off_t offset = 0;
1423 			for (addr_t address = area->Base();
1424 					address < area->Base() + (area->Size() - 1);
1425 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1426 #ifdef DEBUG_KERNEL_STACKS
1427 #	ifdef STACK_GROWS_DOWNWARDS
1428 				if (isStack && address < area->Base()
1429 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1430 #	else
1431 				if (isStack && address >= area->Base() + area->Size()
1432 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1433 #	endif
1434 					continue;
1435 #endif
1436 				vm_page* page = vm_page_allocate_page(&reservation,
1437 					PAGE_STATE_WIRED | pageAllocFlags);
1438 				cache->InsertPage(page, offset);
1439 				map_page(area, page, address, protection, &reservation);
1440 
1441 				DEBUG_PAGE_ACCESS_END(page);
1442 			}
1443 
1444 			break;
1445 		}
1446 
1447 		case B_ALREADY_WIRED:
1448 		{
1449 			// The pages should already be mapped. This is only really useful
1450 			// during boot time. Find the appropriate vm_page objects and stick
1451 			// them in the cache object.
1452 			VMTranslationMap* map = addressSpace->TranslationMap();
1453 			off_t offset = 0;
1454 
1455 			if (!gKernelStartup)
1456 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1457 
1458 			map->Lock();
1459 
1460 			for (addr_t virtualAddress = area->Base();
1461 					virtualAddress < area->Base() + (area->Size() - 1);
1462 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1463 				phys_addr_t physicalAddress;
1464 				uint32 flags;
1465 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1466 				if (status < B_OK) {
1467 					panic("looking up mapping failed for va 0x%lx\n",
1468 						virtualAddress);
1469 				}
1470 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1471 				if (page == NULL) {
1472 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1473 						"\n", physicalAddress);
1474 				}
1475 
1476 				DEBUG_PAGE_ACCESS_START(page);
1477 
1478 				cache->InsertPage(page, offset);
1479 				increment_page_wired_count(page);
1480 				vm_page_set_state(page, PAGE_STATE_WIRED);
1481 				page->busy = false;
1482 
1483 				DEBUG_PAGE_ACCESS_END(page);
1484 			}
1485 
1486 			map->Unlock();
1487 			break;
1488 		}
1489 
1490 		case B_CONTIGUOUS:
1491 		{
1492 			// We have already allocated our continuous pages run, so we can now
1493 			// just map them in the address space
1494 			VMTranslationMap* map = addressSpace->TranslationMap();
1495 			phys_addr_t physicalAddress
1496 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1497 			addr_t virtualAddress = area->Base();
1498 			off_t offset = 0;
1499 
1500 			map->Lock();
1501 
1502 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1503 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1504 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1505 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1506 				if (page == NULL)
1507 					panic("couldn't lookup physical page just allocated\n");
1508 
1509 				status = map->Map(virtualAddress, physicalAddress, protection,
1510 					area->MemoryType(), &reservation);
1511 				if (status < B_OK)
1512 					panic("couldn't map physical page in page run\n");
1513 
1514 				cache->InsertPage(page, offset);
1515 				increment_page_wired_count(page);
1516 
1517 				DEBUG_PAGE_ACCESS_END(page);
1518 			}
1519 
1520 			map->Unlock();
1521 			break;
1522 		}
1523 
1524 		default:
1525 			break;
1526 	}
1527 
1528 	cache->Unlock();
1529 
1530 	if (reservedPages > 0)
1531 		vm_page_unreserve_pages(&reservation);
1532 
1533 	TRACE(("vm_create_anonymous_area: done\n"));
1534 
1535 	area->cache_type = CACHE_TYPE_RAM;
1536 	return area->id;
1537 
1538 err1:
1539 	if (wiring == B_CONTIGUOUS) {
1540 		// we had reserved the area space upfront...
1541 		phys_addr_t pageNumber = page->physical_page_number;
1542 		int32 i;
1543 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1544 			page = vm_lookup_page(pageNumber);
1545 			if (page == NULL)
1546 				panic("couldn't lookup physical page just allocated\n");
1547 
1548 			vm_page_set_state(page, PAGE_STATE_FREE);
1549 		}
1550 	}
1551 
1552 err0:
1553 	if (reservedPages > 0)
1554 		vm_page_unreserve_pages(&reservation);
1555 	if (reservedMemory > 0)
1556 		vm_unreserve_memory(reservedMemory);
1557 
1558 	return status;
1559 }
1560 
1561 
1562 area_id
1563 vm_map_physical_memory(team_id team, const char* name, void** _address,
1564 	uint32 addressSpec, addr_t size, uint32 protection,
1565 	phys_addr_t physicalAddress, bool alreadyWired)
1566 {
1567 	VMArea* area;
1568 	VMCache* cache;
1569 	addr_t mapOffset;
1570 
1571 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1572 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1573 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1574 		addressSpec, size, protection, physicalAddress));
1575 
1576 	if (!arch_vm_supports_protection(protection))
1577 		return B_NOT_SUPPORTED;
1578 
1579 	AddressSpaceWriteLocker locker(team);
1580 	if (!locker.IsLocked())
1581 		return B_BAD_TEAM_ID;
1582 
1583 	// if the physical address is somewhat inside a page,
1584 	// move the actual area down to align on a page boundary
1585 	mapOffset = physicalAddress % B_PAGE_SIZE;
1586 	size += mapOffset;
1587 	physicalAddress -= mapOffset;
1588 
1589 	size = PAGE_ALIGN(size);
1590 
1591 	// create a device cache
1592 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1593 	if (status != B_OK)
1594 		return status;
1595 
1596 	cache->virtual_end = size;
1597 
1598 	cache->Lock();
1599 
1600 	virtual_address_restrictions addressRestrictions = {};
1601 	addressRestrictions.address = *_address;
1602 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1603 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1604 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1605 		true, &area, _address);
1606 
1607 	if (status < B_OK)
1608 		cache->ReleaseRefLocked();
1609 
1610 	cache->Unlock();
1611 
1612 	if (status == B_OK) {
1613 		// set requested memory type -- use uncached, if not given
1614 		uint32 memoryType = addressSpec & B_MTR_MASK;
1615 		if (memoryType == 0)
1616 			memoryType = B_MTR_UC;
1617 
1618 		area->SetMemoryType(memoryType);
1619 
1620 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1621 		if (status != B_OK)
1622 			delete_area(locker.AddressSpace(), area, false);
1623 	}
1624 
1625 	if (status != B_OK)
1626 		return status;
1627 
1628 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1629 
1630 	if (alreadyWired) {
1631 		// The area is already mapped, but possibly not with the right
1632 		// memory type.
1633 		map->Lock();
1634 		map->ProtectArea(area, area->protection);
1635 		map->Unlock();
1636 	} else {
1637 		// Map the area completely.
1638 
1639 		// reserve pages needed for the mapping
1640 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1641 			area->Base() + (size - 1));
1642 		vm_page_reservation reservation;
1643 		vm_page_reserve_pages(&reservation, reservePages,
1644 			team == VMAddressSpace::KernelID()
1645 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1646 
1647 		map->Lock();
1648 
1649 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1650 			map->Map(area->Base() + offset, physicalAddress + offset,
1651 				protection, area->MemoryType(), &reservation);
1652 		}
1653 
1654 		map->Unlock();
1655 
1656 		vm_page_unreserve_pages(&reservation);
1657 	}
1658 
1659 	// modify the pointer returned to be offset back into the new area
1660 	// the same way the physical address in was offset
1661 	*_address = (void*)((addr_t)*_address + mapOffset);
1662 
1663 	area->cache_type = CACHE_TYPE_DEVICE;
1664 	return area->id;
1665 }
1666 
1667 
1668 /*!	Don't use!
1669 	TODO: This function was introduced to map physical page vecs to
1670 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1671 	use a device cache and does not track vm_page::wired_count!
1672 */
1673 area_id
1674 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1675 	uint32 addressSpec, addr_t* _size, uint32 protection,
1676 	struct generic_io_vec* vecs, uint32 vecCount)
1677 {
1678 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1679 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1680 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1681 		addressSpec, _size, protection, vecs, vecCount));
1682 
1683 	if (!arch_vm_supports_protection(protection)
1684 		|| (addressSpec & B_MTR_MASK) != 0) {
1685 		return B_NOT_SUPPORTED;
1686 	}
1687 
1688 	AddressSpaceWriteLocker locker(team);
1689 	if (!locker.IsLocked())
1690 		return B_BAD_TEAM_ID;
1691 
1692 	if (vecCount == 0)
1693 		return B_BAD_VALUE;
1694 
1695 	addr_t size = 0;
1696 	for (uint32 i = 0; i < vecCount; i++) {
1697 		if (vecs[i].base % B_PAGE_SIZE != 0
1698 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1699 			return B_BAD_VALUE;
1700 		}
1701 
1702 		size += vecs[i].length;
1703 	}
1704 
1705 	// create a device cache
1706 	VMCache* cache;
1707 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1708 	if (result != B_OK)
1709 		return result;
1710 
1711 	cache->virtual_end = size;
1712 
1713 	cache->Lock();
1714 
1715 	VMArea* area;
1716 	virtual_address_restrictions addressRestrictions = {};
1717 	addressRestrictions.address = *_address;
1718 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1719 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1720 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1721 		&addressRestrictions, true, &area, _address);
1722 
1723 	if (result != B_OK)
1724 		cache->ReleaseRefLocked();
1725 
1726 	cache->Unlock();
1727 
1728 	if (result != B_OK)
1729 		return result;
1730 
1731 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1732 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1733 		area->Base() + (size - 1));
1734 
1735 	vm_page_reservation reservation;
1736 	vm_page_reserve_pages(&reservation, reservePages,
1737 			team == VMAddressSpace::KernelID()
1738 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1739 	map->Lock();
1740 
1741 	uint32 vecIndex = 0;
1742 	size_t vecOffset = 0;
1743 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1744 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1745 			vecOffset = 0;
1746 			vecIndex++;
1747 		}
1748 
1749 		if (vecIndex >= vecCount)
1750 			break;
1751 
1752 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1753 			protection, area->MemoryType(), &reservation);
1754 
1755 		vecOffset += B_PAGE_SIZE;
1756 	}
1757 
1758 	map->Unlock();
1759 	vm_page_unreserve_pages(&reservation);
1760 
1761 	if (_size != NULL)
1762 		*_size = size;
1763 
1764 	area->cache_type = CACHE_TYPE_DEVICE;
1765 	return area->id;
1766 }
1767 
1768 
1769 area_id
1770 vm_create_null_area(team_id team, const char* name, void** address,
1771 	uint32 addressSpec, addr_t size, uint32 flags)
1772 {
1773 	size = PAGE_ALIGN(size);
1774 
1775 	// Lock the address space and, if B_EXACT_ADDRESS and
1776 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1777 	// is not wired.
1778 	AddressSpaceWriteLocker locker;
1779 	do {
1780 		if (locker.SetTo(team) != B_OK)
1781 			return B_BAD_TEAM_ID;
1782 	} while (addressSpec == B_EXACT_ADDRESS
1783 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1784 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1785 			(addr_t)*address, size, &locker));
1786 
1787 	// create a null cache
1788 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1789 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1790 	VMCache* cache;
1791 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1792 	if (status != B_OK)
1793 		return status;
1794 
1795 	cache->temporary = 1;
1796 	cache->virtual_end = size;
1797 
1798 	cache->Lock();
1799 
1800 	VMArea* area;
1801 	virtual_address_restrictions addressRestrictions = {};
1802 	addressRestrictions.address = *address;
1803 	addressRestrictions.address_specification = addressSpec;
1804 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1805 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1806 		&addressRestrictions, true, &area, address);
1807 
1808 	if (status < B_OK) {
1809 		cache->ReleaseRefAndUnlock();
1810 		return status;
1811 	}
1812 
1813 	cache->Unlock();
1814 
1815 	area->cache_type = CACHE_TYPE_NULL;
1816 	return area->id;
1817 }
1818 
1819 
1820 /*!	Creates the vnode cache for the specified \a vnode.
1821 	The vnode has to be marked busy when calling this function.
1822 */
1823 status_t
1824 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1825 {
1826 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1827 }
1828 
1829 
1830 /*!	\a cache must be locked. The area's address space must be read-locked.
1831 */
1832 static void
1833 pre_map_area_pages(VMArea* area, VMCache* cache,
1834 	vm_page_reservation* reservation)
1835 {
1836 	addr_t baseAddress = area->Base();
1837 	addr_t cacheOffset = area->cache_offset;
1838 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1839 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1840 
1841 	for (VMCachePagesTree::Iterator it
1842 				= cache->pages.GetIterator(firstPage, true, true);
1843 			vm_page* page = it.Next();) {
1844 		if (page->cache_offset >= endPage)
1845 			break;
1846 
1847 		// skip busy and inactive pages
1848 		if (page->busy || page->usage_count == 0)
1849 			continue;
1850 
1851 		DEBUG_PAGE_ACCESS_START(page);
1852 		map_page(area, page,
1853 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1854 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1855 		DEBUG_PAGE_ACCESS_END(page);
1856 	}
1857 }
1858 
1859 
1860 /*!	Will map the file specified by \a fd to an area in memory.
1861 	The file will be mirrored beginning at the specified \a offset. The
1862 	\a offset and \a size arguments have to be page aligned.
1863 */
1864 static area_id
1865 _vm_map_file(team_id team, const char* name, void** _address,
1866 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1867 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1868 {
1869 	// TODO: for binary files, we want to make sure that they get the
1870 	//	copy of a file at a given time, ie. later changes should not
1871 	//	make it into the mapped copy -- this will need quite some changes
1872 	//	to be done in a nice way
1873 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1874 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1875 
1876 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1877 	size = PAGE_ALIGN(size);
1878 
1879 	if (mapping == REGION_NO_PRIVATE_MAP)
1880 		protection |= B_SHARED_AREA;
1881 	if (addressSpec != B_EXACT_ADDRESS)
1882 		unmapAddressRange = false;
1883 
1884 	if (fd < 0) {
1885 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1886 		virtual_address_restrictions virtualRestrictions = {};
1887 		virtualRestrictions.address = *_address;
1888 		virtualRestrictions.address_specification = addressSpec;
1889 		physical_address_restrictions physicalRestrictions = {};
1890 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1891 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1892 			_address);
1893 	}
1894 
1895 	// get the open flags of the FD
1896 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1897 	if (descriptor == NULL)
1898 		return EBADF;
1899 	int32 openMode = descriptor->open_mode;
1900 	put_fd(descriptor);
1901 
1902 	// The FD must open for reading at any rate. For shared mapping with write
1903 	// access, additionally the FD must be open for writing.
1904 	if ((openMode & O_ACCMODE) == O_WRONLY
1905 		|| (mapping == REGION_NO_PRIVATE_MAP
1906 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1907 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1908 		return EACCES;
1909 	}
1910 
1911 	// get the vnode for the object, this also grabs a ref to it
1912 	struct vnode* vnode = NULL;
1913 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1914 	if (status < B_OK)
1915 		return status;
1916 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1917 
1918 	// If we're going to pre-map pages, we need to reserve the pages needed by
1919 	// the mapping backend upfront.
1920 	page_num_t reservedPreMapPages = 0;
1921 	vm_page_reservation reservation;
1922 	if ((protection & B_READ_AREA) != 0) {
1923 		AddressSpaceWriteLocker locker;
1924 		status = locker.SetTo(team);
1925 		if (status != B_OK)
1926 			return status;
1927 
1928 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1929 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1930 
1931 		locker.Unlock();
1932 
1933 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1934 			team == VMAddressSpace::KernelID()
1935 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1936 	}
1937 
1938 	struct PageUnreserver {
1939 		PageUnreserver(vm_page_reservation* reservation)
1940 			:
1941 			fReservation(reservation)
1942 		{
1943 		}
1944 
1945 		~PageUnreserver()
1946 		{
1947 			if (fReservation != NULL)
1948 				vm_page_unreserve_pages(fReservation);
1949 		}
1950 
1951 		vm_page_reservation* fReservation;
1952 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1953 
1954 	// Lock the address space and, if the specified address range shall be
1955 	// unmapped, ensure it is not wired.
1956 	AddressSpaceWriteLocker locker;
1957 	do {
1958 		if (locker.SetTo(team) != B_OK)
1959 			return B_BAD_TEAM_ID;
1960 	} while (unmapAddressRange
1961 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1962 			(addr_t)*_address, size, &locker));
1963 
1964 	// TODO: this only works for file systems that use the file cache
1965 	VMCache* cache;
1966 	status = vfs_get_vnode_cache(vnode, &cache, false);
1967 	if (status < B_OK)
1968 		return status;
1969 
1970 	cache->Lock();
1971 
1972 	VMArea* area;
1973 	virtual_address_restrictions addressRestrictions = {};
1974 	addressRestrictions.address = *_address;
1975 	addressRestrictions.address_specification = addressSpec;
1976 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1977 		0, protection, mapping,
1978 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1979 		&addressRestrictions, kernel, &area, _address);
1980 
1981 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1982 		// map_backing_store() cannot know we no longer need the ref
1983 		cache->ReleaseRefLocked();
1984 	}
1985 
1986 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1987 		pre_map_area_pages(area, cache, &reservation);
1988 
1989 	cache->Unlock();
1990 
1991 	if (status == B_OK) {
1992 		// TODO: this probably deserves a smarter solution, ie. don't always
1993 		// prefetch stuff, and also, probably don't trigger it at this place.
1994 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1995 			// prefetches at max 10 MB starting from "offset"
1996 	}
1997 
1998 	if (status != B_OK)
1999 		return status;
2000 
2001 	area->cache_type = CACHE_TYPE_VNODE;
2002 	return area->id;
2003 }
2004 
2005 
2006 area_id
2007 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2008 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2009 	int fd, off_t offset)
2010 {
2011 	if (!arch_vm_supports_protection(protection))
2012 		return B_NOT_SUPPORTED;
2013 
2014 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2015 		mapping, unmapAddressRange, fd, offset, true);
2016 }
2017 
2018 
2019 VMCache*
2020 vm_area_get_locked_cache(VMArea* area)
2021 {
2022 	rw_lock_read_lock(&sAreaCacheLock);
2023 
2024 	while (true) {
2025 		VMCache* cache = area->cache;
2026 
2027 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2028 			// cache has been deleted
2029 			rw_lock_read_lock(&sAreaCacheLock);
2030 			continue;
2031 		}
2032 
2033 		rw_lock_read_lock(&sAreaCacheLock);
2034 
2035 		if (cache == area->cache) {
2036 			cache->AcquireRefLocked();
2037 			rw_lock_read_unlock(&sAreaCacheLock);
2038 			return cache;
2039 		}
2040 
2041 		// the cache changed in the meantime
2042 		cache->Unlock();
2043 	}
2044 }
2045 
2046 
2047 void
2048 vm_area_put_locked_cache(VMCache* cache)
2049 {
2050 	cache->ReleaseRefAndUnlock();
2051 }
2052 
2053 
2054 area_id
2055 vm_clone_area(team_id team, const char* name, void** address,
2056 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2057 	bool kernel)
2058 {
2059 	VMArea* newArea = NULL;
2060 	VMArea* sourceArea;
2061 
2062 	// Check whether the source area exists and is cloneable. If so, mark it
2063 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2064 	{
2065 		AddressSpaceWriteLocker locker;
2066 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2067 		if (status != B_OK)
2068 			return status;
2069 
2070 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2071 			return B_NOT_ALLOWED;
2072 
2073 		sourceArea->protection |= B_SHARED_AREA;
2074 		protection |= B_SHARED_AREA;
2075 	}
2076 
2077 	// Now lock both address spaces and actually do the cloning.
2078 
2079 	MultiAddressSpaceLocker locker;
2080 	VMAddressSpace* sourceAddressSpace;
2081 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2082 	if (status != B_OK)
2083 		return status;
2084 
2085 	VMAddressSpace* targetAddressSpace;
2086 	status = locker.AddTeam(team, true, &targetAddressSpace);
2087 	if (status != B_OK)
2088 		return status;
2089 
2090 	status = locker.Lock();
2091 	if (status != B_OK)
2092 		return status;
2093 
2094 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2095 	if (sourceArea == NULL)
2096 		return B_BAD_VALUE;
2097 
2098 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2099 		return B_NOT_ALLOWED;
2100 
2101 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2102 
2103 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2104 	//	have been adapted. Maybe it should be part of the kernel settings,
2105 	//	anyway (so that old drivers can always work).
2106 #if 0
2107 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2108 		&& addressSpace != VMAddressSpace::Kernel()
2109 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2110 		// kernel areas must not be cloned in userland, unless explicitly
2111 		// declared user-cloneable upon construction
2112 		status = B_NOT_ALLOWED;
2113 	} else
2114 #endif
2115 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2116 		status = B_NOT_ALLOWED;
2117 	else {
2118 		virtual_address_restrictions addressRestrictions = {};
2119 		addressRestrictions.address = *address;
2120 		addressRestrictions.address_specification = addressSpec;
2121 		status = map_backing_store(targetAddressSpace, cache,
2122 			sourceArea->cache_offset, name, sourceArea->Size(),
2123 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2124 			kernel, &newArea, address);
2125 	}
2126 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2127 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2128 		// to create a new cache, and has therefore already acquired a reference
2129 		// to the source cache - but otherwise it has no idea that we need
2130 		// one.
2131 		cache->AcquireRefLocked();
2132 	}
2133 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2134 		// we need to map in everything at this point
2135 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2136 			// we don't have actual pages to map but a physical area
2137 			VMTranslationMap* map
2138 				= sourceArea->address_space->TranslationMap();
2139 			map->Lock();
2140 
2141 			phys_addr_t physicalAddress;
2142 			uint32 oldProtection;
2143 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2144 
2145 			map->Unlock();
2146 
2147 			map = targetAddressSpace->TranslationMap();
2148 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2149 				newArea->Base() + (newArea->Size() - 1));
2150 
2151 			vm_page_reservation reservation;
2152 			vm_page_reserve_pages(&reservation, reservePages,
2153 				targetAddressSpace == VMAddressSpace::Kernel()
2154 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2155 			map->Lock();
2156 
2157 			for (addr_t offset = 0; offset < newArea->Size();
2158 					offset += B_PAGE_SIZE) {
2159 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2160 					protection, newArea->MemoryType(), &reservation);
2161 			}
2162 
2163 			map->Unlock();
2164 			vm_page_unreserve_pages(&reservation);
2165 		} else {
2166 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2167 			size_t reservePages = map->MaxPagesNeededToMap(
2168 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2169 			vm_page_reservation reservation;
2170 			vm_page_reserve_pages(&reservation, reservePages,
2171 				targetAddressSpace == VMAddressSpace::Kernel()
2172 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2173 
2174 			// map in all pages from source
2175 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2176 					vm_page* page  = it.Next();) {
2177 				if (!page->busy) {
2178 					DEBUG_PAGE_ACCESS_START(page);
2179 					map_page(newArea, page,
2180 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2181 							- newArea->cache_offset),
2182 						protection, &reservation);
2183 					DEBUG_PAGE_ACCESS_END(page);
2184 				}
2185 			}
2186 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2187 			// ensuring that!
2188 
2189 			vm_page_unreserve_pages(&reservation);
2190 		}
2191 	}
2192 	if (status == B_OK)
2193 		newArea->cache_type = sourceArea->cache_type;
2194 
2195 	vm_area_put_locked_cache(cache);
2196 
2197 	if (status < B_OK)
2198 		return status;
2199 
2200 	return newArea->id;
2201 }
2202 
2203 
2204 /*!	Deletes the specified area of the given address space.
2205 
2206 	The address space must be write-locked.
2207 	The caller must ensure that the area does not have any wired ranges.
2208 
2209 	\param addressSpace The address space containing the area.
2210 	\param area The area to be deleted.
2211 	\param deletingAddressSpace \c true, if the address space is in the process
2212 		of being deleted.
2213 */
2214 static void
2215 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2216 	bool deletingAddressSpace)
2217 {
2218 	ASSERT(!area->IsWired());
2219 
2220 	VMAreaHash::Remove(area);
2221 
2222 	// At this point the area is removed from the global hash table, but
2223 	// still exists in the area list.
2224 
2225 	// Unmap the virtual address space the area occupied.
2226 	{
2227 		// We need to lock the complete cache chain.
2228 		VMCache* topCache = vm_area_get_locked_cache(area);
2229 		VMCacheChainLocker cacheChainLocker(topCache);
2230 		cacheChainLocker.LockAllSourceCaches();
2231 
2232 		// If the area's top cache is a temporary cache and the area is the only
2233 		// one referencing it (besides us currently holding a second reference),
2234 		// the unmapping code doesn't need to care about preserving the accessed
2235 		// and dirty flags of the top cache page mappings.
2236 		bool ignoreTopCachePageFlags
2237 			= topCache->temporary && topCache->RefCount() == 2;
2238 
2239 		area->address_space->TranslationMap()->UnmapArea(area,
2240 			deletingAddressSpace, ignoreTopCachePageFlags);
2241 	}
2242 
2243 	if (!area->cache->temporary)
2244 		area->cache->WriteModified();
2245 
2246 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2247 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2248 
2249 	arch_vm_unset_memory_type(area);
2250 	addressSpace->RemoveArea(area, allocationFlags);
2251 	addressSpace->Put();
2252 
2253 	area->cache->RemoveArea(area);
2254 	area->cache->ReleaseRef();
2255 
2256 	addressSpace->DeleteArea(area, allocationFlags);
2257 }
2258 
2259 
2260 status_t
2261 vm_delete_area(team_id team, area_id id, bool kernel)
2262 {
2263 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2264 		team, id));
2265 
2266 	// lock the address space and make sure the area isn't wired
2267 	AddressSpaceWriteLocker locker;
2268 	VMArea* area;
2269 	AreaCacheLocker cacheLocker;
2270 
2271 	do {
2272 		status_t status = locker.SetFromArea(team, id, area);
2273 		if (status != B_OK)
2274 			return status;
2275 
2276 		cacheLocker.SetTo(area);
2277 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2278 
2279 	cacheLocker.Unlock();
2280 
2281 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2282 		return B_NOT_ALLOWED;
2283 
2284 	delete_area(locker.AddressSpace(), area, false);
2285 	return B_OK;
2286 }
2287 
2288 
2289 /*!	Creates a new cache on top of given cache, moves all areas from
2290 	the old cache to the new one, and changes the protection of all affected
2291 	areas' pages to read-only. If requested, wired pages are moved up to the
2292 	new cache and copies are added to the old cache in their place.
2293 	Preconditions:
2294 	- The given cache must be locked.
2295 	- All of the cache's areas' address spaces must be read locked.
2296 	- Either the cache must not have any wired ranges or a page reservation for
2297 	  all wired pages must be provided, so they can be copied.
2298 
2299 	\param lowerCache The cache on top of which a new cache shall be created.
2300 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2301 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2302 		has wired page. The wired pages are copied in this case.
2303 */
2304 static status_t
2305 vm_copy_on_write_area(VMCache* lowerCache,
2306 	vm_page_reservation* wiredPagesReservation)
2307 {
2308 	VMCache* upperCache;
2309 
2310 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2311 
2312 	// We need to separate the cache from its areas. The cache goes one level
2313 	// deeper and we create a new cache inbetween.
2314 
2315 	// create an anonymous cache
2316 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2317 		lowerCache->GuardSize() / B_PAGE_SIZE,
2318 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2319 		VM_PRIORITY_USER);
2320 	if (status != B_OK)
2321 		return status;
2322 
2323 	upperCache->Lock();
2324 
2325 	upperCache->temporary = 1;
2326 	upperCache->virtual_base = lowerCache->virtual_base;
2327 	upperCache->virtual_end = lowerCache->virtual_end;
2328 
2329 	// transfer the lower cache areas to the upper cache
2330 	rw_lock_write_lock(&sAreaCacheLock);
2331 	upperCache->TransferAreas(lowerCache);
2332 	rw_lock_write_unlock(&sAreaCacheLock);
2333 
2334 	lowerCache->AddConsumer(upperCache);
2335 
2336 	// We now need to remap all pages from all of the cache's areas read-only,
2337 	// so that a copy will be created on next write access. If there are wired
2338 	// pages, we keep their protection, move them to the upper cache and create
2339 	// copies for the lower cache.
2340 	if (wiredPagesReservation != NULL) {
2341 		// We need to handle wired pages -- iterate through the cache's pages.
2342 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2343 				vm_page* page = it.Next();) {
2344 			if (page->WiredCount() > 0) {
2345 				// allocate a new page and copy the wired one
2346 				vm_page* copiedPage = vm_page_allocate_page(
2347 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2348 
2349 				vm_memcpy_physical_page(
2350 					copiedPage->physical_page_number * B_PAGE_SIZE,
2351 					page->physical_page_number * B_PAGE_SIZE);
2352 
2353 				// move the wired page to the upper cache (note: removing is OK
2354 				// with the SplayTree iterator) and insert the copy
2355 				upperCache->MovePage(page);
2356 				lowerCache->InsertPage(copiedPage,
2357 					page->cache_offset * B_PAGE_SIZE);
2358 
2359 				DEBUG_PAGE_ACCESS_END(copiedPage);
2360 			} else {
2361 				// Change the protection of this page in all areas.
2362 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2363 						tempArea = tempArea->cache_next) {
2364 					// The area must be readable in the same way it was
2365 					// previously writable.
2366 					uint32 protection = B_KERNEL_READ_AREA;
2367 					if ((tempArea->protection & B_READ_AREA) != 0)
2368 						protection |= B_READ_AREA;
2369 
2370 					VMTranslationMap* map
2371 						= tempArea->address_space->TranslationMap();
2372 					map->Lock();
2373 					map->ProtectPage(tempArea,
2374 						virtual_page_address(tempArea, page), protection);
2375 					map->Unlock();
2376 				}
2377 			}
2378 		}
2379 	} else {
2380 		ASSERT(lowerCache->WiredPagesCount() == 0);
2381 
2382 		// just change the protection of all areas
2383 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2384 				tempArea = tempArea->cache_next) {
2385 			// The area must be readable in the same way it was previously
2386 			// writable.
2387 			uint32 protection = B_KERNEL_READ_AREA;
2388 			if ((tempArea->protection & B_READ_AREA) != 0)
2389 				protection |= B_READ_AREA;
2390 
2391 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2392 			map->Lock();
2393 			map->ProtectArea(tempArea, protection);
2394 			map->Unlock();
2395 		}
2396 	}
2397 
2398 	vm_area_put_locked_cache(upperCache);
2399 
2400 	return B_OK;
2401 }
2402 
2403 
2404 area_id
2405 vm_copy_area(team_id team, const char* name, void** _address,
2406 	uint32 addressSpec, uint32 protection, area_id sourceID)
2407 {
2408 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2409 
2410 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2411 		// set the same protection for the kernel as for userland
2412 		protection |= B_KERNEL_READ_AREA;
2413 		if (writableCopy)
2414 			protection |= B_KERNEL_WRITE_AREA;
2415 	}
2416 
2417 	// Do the locking: target address space, all address spaces associated with
2418 	// the source cache, and the cache itself.
2419 	MultiAddressSpaceLocker locker;
2420 	VMAddressSpace* targetAddressSpace;
2421 	VMCache* cache;
2422 	VMArea* source;
2423 	AreaCacheLocker cacheLocker;
2424 	status_t status;
2425 	bool sharedArea;
2426 
2427 	page_num_t wiredPages = 0;
2428 	vm_page_reservation wiredPagesReservation;
2429 
2430 	bool restart;
2431 	do {
2432 		restart = false;
2433 
2434 		locker.Unset();
2435 		status = locker.AddTeam(team, true, &targetAddressSpace);
2436 		if (status == B_OK) {
2437 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2438 				&cache);
2439 		}
2440 		if (status != B_OK)
2441 			return status;
2442 
2443 		cacheLocker.SetTo(cache, true);	// already locked
2444 
2445 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2446 
2447 		page_num_t oldWiredPages = wiredPages;
2448 		wiredPages = 0;
2449 
2450 		// If the source area isn't shared, count the number of wired pages in
2451 		// the cache and reserve as many pages.
2452 		if (!sharedArea) {
2453 			wiredPages = cache->WiredPagesCount();
2454 
2455 			if (wiredPages > oldWiredPages) {
2456 				cacheLocker.Unlock();
2457 				locker.Unlock();
2458 
2459 				if (oldWiredPages > 0)
2460 					vm_page_unreserve_pages(&wiredPagesReservation);
2461 
2462 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2463 					VM_PRIORITY_USER);
2464 
2465 				restart = true;
2466 			}
2467 		} else if (oldWiredPages > 0)
2468 			vm_page_unreserve_pages(&wiredPagesReservation);
2469 	} while (restart);
2470 
2471 	// unreserve pages later
2472 	struct PagesUnreserver {
2473 		PagesUnreserver(vm_page_reservation* reservation)
2474 			:
2475 			fReservation(reservation)
2476 		{
2477 		}
2478 
2479 		~PagesUnreserver()
2480 		{
2481 			if (fReservation != NULL)
2482 				vm_page_unreserve_pages(fReservation);
2483 		}
2484 
2485 	private:
2486 		vm_page_reservation*	fReservation;
2487 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2488 
2489 	if (addressSpec == B_CLONE_ADDRESS) {
2490 		addressSpec = B_EXACT_ADDRESS;
2491 		*_address = (void*)source->Base();
2492 	}
2493 
2494 	// First, create a cache on top of the source area, respectively use the
2495 	// existing one, if this is a shared area.
2496 
2497 	VMArea* target;
2498 	virtual_address_restrictions addressRestrictions = {};
2499 	addressRestrictions.address = *_address;
2500 	addressRestrictions.address_specification = addressSpec;
2501 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2502 		name, source->Size(), source->wiring, protection,
2503 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2504 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2505 		&addressRestrictions, true, &target, _address);
2506 	if (status < B_OK)
2507 		return status;
2508 
2509 	if (sharedArea) {
2510 		// The new area uses the old area's cache, but map_backing_store()
2511 		// hasn't acquired a ref. So we have to do that now.
2512 		cache->AcquireRefLocked();
2513 	}
2514 
2515 	// If the source area is writable, we need to move it one layer up as well
2516 
2517 	if (!sharedArea) {
2518 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2519 			// TODO: do something more useful if this fails!
2520 			if (vm_copy_on_write_area(cache,
2521 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2522 				panic("vm_copy_on_write_area() failed!\n");
2523 			}
2524 		}
2525 	}
2526 
2527 	// we return the ID of the newly created area
2528 	return target->id;
2529 }
2530 
2531 
2532 status_t
2533 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2534 	bool kernel)
2535 {
2536 	fix_protection(&newProtection);
2537 
2538 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2539 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2540 
2541 	if (!arch_vm_supports_protection(newProtection))
2542 		return B_NOT_SUPPORTED;
2543 
2544 	bool becomesWritable
2545 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2546 
2547 	// lock address spaces and cache
2548 	MultiAddressSpaceLocker locker;
2549 	VMCache* cache;
2550 	VMArea* area;
2551 	status_t status;
2552 	AreaCacheLocker cacheLocker;
2553 	bool isWritable;
2554 
2555 	bool restart;
2556 	do {
2557 		restart = false;
2558 
2559 		locker.Unset();
2560 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2561 		if (status != B_OK)
2562 			return status;
2563 
2564 		cacheLocker.SetTo(cache, true);	// already locked
2565 
2566 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2567 			return B_NOT_ALLOWED;
2568 
2569 		if (area->protection == newProtection)
2570 			return B_OK;
2571 
2572 		if (team != VMAddressSpace::KernelID()
2573 			&& area->address_space->ID() != team) {
2574 			// unless you're the kernel, you are only allowed to set
2575 			// the protection of your own areas
2576 			return B_NOT_ALLOWED;
2577 		}
2578 
2579 		isWritable
2580 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2581 
2582 		// Make sure the area (respectively, if we're going to call
2583 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2584 		// wired ranges.
2585 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2586 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2587 					otherArea = otherArea->cache_next) {
2588 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2589 					restart = true;
2590 					break;
2591 				}
2592 			}
2593 		} else {
2594 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2595 				restart = true;
2596 		}
2597 	} while (restart);
2598 
2599 	bool changePageProtection = true;
2600 	bool changeTopCachePagesOnly = false;
2601 
2602 	if (isWritable && !becomesWritable) {
2603 		// writable -> !writable
2604 
2605 		if (cache->source != NULL && cache->temporary) {
2606 			if (cache->CountWritableAreas(area) == 0) {
2607 				// Since this cache now lives from the pages in its source cache,
2608 				// we can change the cache's commitment to take only those pages
2609 				// into account that really are in this cache.
2610 
2611 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2612 					team == VMAddressSpace::KernelID()
2613 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2614 
2615 				// TODO: we may be able to join with our source cache, if
2616 				// count == 0
2617 			}
2618 		}
2619 
2620 		// If only the writability changes, we can just remap the pages of the
2621 		// top cache, since the pages of lower caches are mapped read-only
2622 		// anyway. That's advantageous only, if the number of pages in the cache
2623 		// is significantly smaller than the number of pages in the area,
2624 		// though.
2625 		if (newProtection
2626 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2627 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2628 			changeTopCachePagesOnly = true;
2629 		}
2630 	} else if (!isWritable && becomesWritable) {
2631 		// !writable -> writable
2632 
2633 		if (!cache->consumers.IsEmpty()) {
2634 			// There are consumers -- we have to insert a new cache. Fortunately
2635 			// vm_copy_on_write_area() does everything that's needed.
2636 			changePageProtection = false;
2637 			status = vm_copy_on_write_area(cache, NULL);
2638 		} else {
2639 			// No consumers, so we don't need to insert a new one.
2640 			if (cache->source != NULL && cache->temporary) {
2641 				// the cache's commitment must contain all possible pages
2642 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2643 					team == VMAddressSpace::KernelID()
2644 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2645 			}
2646 
2647 			if (status == B_OK && cache->source != NULL) {
2648 				// There's a source cache, hence we can't just change all pages'
2649 				// protection or we might allow writing into pages belonging to
2650 				// a lower cache.
2651 				changeTopCachePagesOnly = true;
2652 			}
2653 		}
2654 	} else {
2655 		// we don't have anything special to do in all other cases
2656 	}
2657 
2658 	if (status == B_OK) {
2659 		// remap existing pages in this cache
2660 		if (changePageProtection) {
2661 			VMTranslationMap* map = area->address_space->TranslationMap();
2662 			map->Lock();
2663 
2664 			if (changeTopCachePagesOnly) {
2665 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2666 				page_num_t lastPageOffset
2667 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2668 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2669 						vm_page* page = it.Next();) {
2670 					if (page->cache_offset >= firstPageOffset
2671 						&& page->cache_offset <= lastPageOffset) {
2672 						addr_t address = virtual_page_address(area, page);
2673 						map->ProtectPage(area, address, newProtection);
2674 					}
2675 				}
2676 			} else
2677 				map->ProtectArea(area, newProtection);
2678 
2679 			map->Unlock();
2680 		}
2681 
2682 		area->protection = newProtection;
2683 	}
2684 
2685 	return status;
2686 }
2687 
2688 
2689 status_t
2690 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2691 {
2692 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2693 	if (addressSpace == NULL)
2694 		return B_BAD_TEAM_ID;
2695 
2696 	VMTranslationMap* map = addressSpace->TranslationMap();
2697 
2698 	map->Lock();
2699 	uint32 dummyFlags;
2700 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2701 	map->Unlock();
2702 
2703 	addressSpace->Put();
2704 	return status;
2705 }
2706 
2707 
2708 /*!	The page's cache must be locked.
2709 */
2710 bool
2711 vm_test_map_modification(vm_page* page)
2712 {
2713 	if (page->modified)
2714 		return true;
2715 
2716 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2717 	vm_page_mapping* mapping;
2718 	while ((mapping = iterator.Next()) != NULL) {
2719 		VMArea* area = mapping->area;
2720 		VMTranslationMap* map = area->address_space->TranslationMap();
2721 
2722 		phys_addr_t physicalAddress;
2723 		uint32 flags;
2724 		map->Lock();
2725 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2726 		map->Unlock();
2727 
2728 		if ((flags & PAGE_MODIFIED) != 0)
2729 			return true;
2730 	}
2731 
2732 	return false;
2733 }
2734 
2735 
2736 /*!	The page's cache must be locked.
2737 */
2738 void
2739 vm_clear_map_flags(vm_page* page, uint32 flags)
2740 {
2741 	if ((flags & PAGE_ACCESSED) != 0)
2742 		page->accessed = false;
2743 	if ((flags & PAGE_MODIFIED) != 0)
2744 		page->modified = false;
2745 
2746 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2747 	vm_page_mapping* mapping;
2748 	while ((mapping = iterator.Next()) != NULL) {
2749 		VMArea* area = mapping->area;
2750 		VMTranslationMap* map = area->address_space->TranslationMap();
2751 
2752 		map->Lock();
2753 		map->ClearFlags(virtual_page_address(area, page), flags);
2754 		map->Unlock();
2755 	}
2756 }
2757 
2758 
2759 /*!	Removes all mappings from a page.
2760 	After you've called this function, the page is unmapped from memory and
2761 	the page's \c accessed and \c modified flags have been updated according
2762 	to the state of the mappings.
2763 	The page's cache must be locked.
2764 */
2765 void
2766 vm_remove_all_page_mappings(vm_page* page)
2767 {
2768 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2769 		VMArea* area = mapping->area;
2770 		VMTranslationMap* map = area->address_space->TranslationMap();
2771 		addr_t address = virtual_page_address(area, page);
2772 		map->UnmapPage(area, address, false);
2773 	}
2774 }
2775 
2776 
2777 int32
2778 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2779 {
2780 	int32 count = 0;
2781 
2782 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2783 	vm_page_mapping* mapping;
2784 	while ((mapping = iterator.Next()) != NULL) {
2785 		VMArea* area = mapping->area;
2786 		VMTranslationMap* map = area->address_space->TranslationMap();
2787 
2788 		bool modified;
2789 		if (map->ClearAccessedAndModified(area,
2790 				virtual_page_address(area, page), false, modified)) {
2791 			count++;
2792 		}
2793 
2794 		page->modified |= modified;
2795 	}
2796 
2797 
2798 	if (page->accessed) {
2799 		count++;
2800 		page->accessed = false;
2801 	}
2802 
2803 	return count;
2804 }
2805 
2806 
2807 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2808 	mappings.
2809 	The function iterates through the page mappings and removes them until
2810 	encountering one that has been accessed. From then on it will continue to
2811 	iterate, but only clear the accessed flag of the mapping. The page's
2812 	\c modified bit will be updated accordingly, the \c accessed bit will be
2813 	cleared.
2814 	\return The number of mapping accessed bits encountered, including the
2815 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2816 		of the page have been removed.
2817 */
2818 int32
2819 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2820 {
2821 	ASSERT(page->WiredCount() == 0);
2822 
2823 	if (page->accessed)
2824 		return vm_clear_page_mapping_accessed_flags(page);
2825 
2826 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2827 		VMArea* area = mapping->area;
2828 		VMTranslationMap* map = area->address_space->TranslationMap();
2829 		addr_t address = virtual_page_address(area, page);
2830 		bool modified = false;
2831 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2832 			page->accessed = true;
2833 			page->modified |= modified;
2834 			return vm_clear_page_mapping_accessed_flags(page);
2835 		}
2836 		page->modified |= modified;
2837 	}
2838 
2839 	return 0;
2840 }
2841 
2842 
2843 static int
2844 display_mem(int argc, char** argv)
2845 {
2846 	bool physical = false;
2847 	addr_t copyAddress;
2848 	int32 displayWidth;
2849 	int32 itemSize;
2850 	int32 num = -1;
2851 	addr_t address;
2852 	int i = 1, j;
2853 
2854 	if (argc > 1 && argv[1][0] == '-') {
2855 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2856 			physical = true;
2857 			i++;
2858 		} else
2859 			i = 99;
2860 	}
2861 
2862 	if (argc < i + 1 || argc > i + 2) {
2863 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2864 			"\tdl - 8 bytes\n"
2865 			"\tdw - 4 bytes\n"
2866 			"\tds - 2 bytes\n"
2867 			"\tdb - 1 byte\n"
2868 			"\tstring - a whole string\n"
2869 			"  -p or --physical only allows memory from a single page to be "
2870 			"displayed.\n");
2871 		return 0;
2872 	}
2873 
2874 	address = parse_expression(argv[i]);
2875 
2876 	if (argc > i + 1)
2877 		num = parse_expression(argv[i + 1]);
2878 
2879 	// build the format string
2880 	if (strcmp(argv[0], "db") == 0) {
2881 		itemSize = 1;
2882 		displayWidth = 16;
2883 	} else if (strcmp(argv[0], "ds") == 0) {
2884 		itemSize = 2;
2885 		displayWidth = 8;
2886 	} else if (strcmp(argv[0], "dw") == 0) {
2887 		itemSize = 4;
2888 		displayWidth = 4;
2889 	} else if (strcmp(argv[0], "dl") == 0) {
2890 		itemSize = 8;
2891 		displayWidth = 2;
2892 	} else if (strcmp(argv[0], "string") == 0) {
2893 		itemSize = 1;
2894 		displayWidth = -1;
2895 	} else {
2896 		kprintf("display_mem called in an invalid way!\n");
2897 		return 0;
2898 	}
2899 
2900 	if (num <= 0)
2901 		num = displayWidth;
2902 
2903 	void* physicalPageHandle = NULL;
2904 
2905 	if (physical) {
2906 		int32 offset = address & (B_PAGE_SIZE - 1);
2907 		if (num * itemSize + offset > B_PAGE_SIZE) {
2908 			num = (B_PAGE_SIZE - offset) / itemSize;
2909 			kprintf("NOTE: number of bytes has been cut to page size\n");
2910 		}
2911 
2912 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2913 
2914 		if (vm_get_physical_page_debug(address, &copyAddress,
2915 				&physicalPageHandle) != B_OK) {
2916 			kprintf("getting the hardware page failed.");
2917 			return 0;
2918 		}
2919 
2920 		address += offset;
2921 		copyAddress += offset;
2922 	} else
2923 		copyAddress = address;
2924 
2925 	if (!strcmp(argv[0], "string")) {
2926 		kprintf("%p \"", (char*)copyAddress);
2927 
2928 		// string mode
2929 		for (i = 0; true; i++) {
2930 			char c;
2931 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2932 					!= B_OK
2933 				|| c == '\0') {
2934 				break;
2935 			}
2936 
2937 			if (c == '\n')
2938 				kprintf("\\n");
2939 			else if (c == '\t')
2940 				kprintf("\\t");
2941 			else {
2942 				if (!isprint(c))
2943 					c = '.';
2944 
2945 				kprintf("%c", c);
2946 			}
2947 		}
2948 
2949 		kprintf("\"\n");
2950 	} else {
2951 		// number mode
2952 		for (i = 0; i < num; i++) {
2953 			uint32 value;
2954 
2955 			if ((i % displayWidth) == 0) {
2956 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2957 				if (i != 0)
2958 					kprintf("\n");
2959 
2960 				kprintf("[0x%lx]  ", address + i * itemSize);
2961 
2962 				for (j = 0; j < displayed; j++) {
2963 					char c;
2964 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2965 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2966 						displayed = j;
2967 						break;
2968 					}
2969 					if (!isprint(c))
2970 						c = '.';
2971 
2972 					kprintf("%c", c);
2973 				}
2974 				if (num > displayWidth) {
2975 					// make sure the spacing in the last line is correct
2976 					for (j = displayed; j < displayWidth * itemSize; j++)
2977 						kprintf(" ");
2978 				}
2979 				kprintf("  ");
2980 			}
2981 
2982 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2983 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2984 				kprintf("read fault");
2985 				break;
2986 			}
2987 
2988 			switch (itemSize) {
2989 				case 1:
2990 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2991 					break;
2992 				case 2:
2993 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2994 					break;
2995 				case 4:
2996 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2997 					break;
2998 				case 8:
2999 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3000 					break;
3001 			}
3002 		}
3003 
3004 		kprintf("\n");
3005 	}
3006 
3007 	if (physical) {
3008 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3009 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3010 	}
3011 	return 0;
3012 }
3013 
3014 
3015 static void
3016 dump_cache_tree_recursively(VMCache* cache, int level,
3017 	VMCache* highlightCache)
3018 {
3019 	// print this cache
3020 	for (int i = 0; i < level; i++)
3021 		kprintf("  ");
3022 	if (cache == highlightCache)
3023 		kprintf("%p <--\n", cache);
3024 	else
3025 		kprintf("%p\n", cache);
3026 
3027 	// recursively print its consumers
3028 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3029 			VMCache* consumer = it.Next();) {
3030 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3031 	}
3032 }
3033 
3034 
3035 static int
3036 dump_cache_tree(int argc, char** argv)
3037 {
3038 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3039 		kprintf("usage: %s <address>\n", argv[0]);
3040 		return 0;
3041 	}
3042 
3043 	addr_t address = parse_expression(argv[1]);
3044 	if (address == 0)
3045 		return 0;
3046 
3047 	VMCache* cache = (VMCache*)address;
3048 	VMCache* root = cache;
3049 
3050 	// find the root cache (the transitive source)
3051 	while (root->source != NULL)
3052 		root = root->source;
3053 
3054 	dump_cache_tree_recursively(root, 0, cache);
3055 
3056 	return 0;
3057 }
3058 
3059 
3060 const char*
3061 vm_cache_type_to_string(int32 type)
3062 {
3063 	switch (type) {
3064 		case CACHE_TYPE_RAM:
3065 			return "RAM";
3066 		case CACHE_TYPE_DEVICE:
3067 			return "device";
3068 		case CACHE_TYPE_VNODE:
3069 			return "vnode";
3070 		case CACHE_TYPE_NULL:
3071 			return "null";
3072 
3073 		default:
3074 			return "unknown";
3075 	}
3076 }
3077 
3078 
3079 #if DEBUG_CACHE_LIST
3080 
3081 static void
3082 update_cache_info_recursively(VMCache* cache, cache_info& info)
3083 {
3084 	info.page_count += cache->page_count;
3085 	if (cache->type == CACHE_TYPE_RAM)
3086 		info.committed += cache->committed_size;
3087 
3088 	// recurse
3089 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3090 			VMCache* consumer = it.Next();) {
3091 		update_cache_info_recursively(consumer, info);
3092 	}
3093 }
3094 
3095 
3096 static int
3097 cache_info_compare_page_count(const void* _a, const void* _b)
3098 {
3099 	const cache_info* a = (const cache_info*)_a;
3100 	const cache_info* b = (const cache_info*)_b;
3101 	if (a->page_count == b->page_count)
3102 		return 0;
3103 	return a->page_count < b->page_count ? 1 : -1;
3104 }
3105 
3106 
3107 static int
3108 cache_info_compare_committed(const void* _a, const void* _b)
3109 {
3110 	const cache_info* a = (const cache_info*)_a;
3111 	const cache_info* b = (const cache_info*)_b;
3112 	if (a->committed == b->committed)
3113 		return 0;
3114 	return a->committed < b->committed ? 1 : -1;
3115 }
3116 
3117 
3118 static void
3119 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3120 {
3121 	for (int i = 0; i < level; i++)
3122 		kprintf("  ");
3123 
3124 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3125 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3126 		cache->virtual_base, cache->virtual_end, cache->page_count);
3127 
3128 	if (level == 0)
3129 		kprintf("/%lu", info.page_count);
3130 
3131 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3132 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3133 
3134 		if (level == 0)
3135 			kprintf("/%lu", info.committed);
3136 	}
3137 
3138 	// areas
3139 	if (cache->areas != NULL) {
3140 		VMArea* area = cache->areas;
3141 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3142 			area->name, area->address_space->ID());
3143 
3144 		while (area->cache_next != NULL) {
3145 			area = area->cache_next;
3146 			kprintf(", %" B_PRId32, area->id);
3147 		}
3148 	}
3149 
3150 	kputs("\n");
3151 
3152 	// recurse
3153 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3154 			VMCache* consumer = it.Next();) {
3155 		dump_caches_recursively(consumer, info, level + 1);
3156 	}
3157 }
3158 
3159 
3160 static int
3161 dump_caches(int argc, char** argv)
3162 {
3163 	if (sCacheInfoTable == NULL) {
3164 		kprintf("No cache info table!\n");
3165 		return 0;
3166 	}
3167 
3168 	bool sortByPageCount = true;
3169 
3170 	for (int32 i = 1; i < argc; i++) {
3171 		if (strcmp(argv[i], "-c") == 0) {
3172 			sortByPageCount = false;
3173 		} else {
3174 			print_debugger_command_usage(argv[0]);
3175 			return 0;
3176 		}
3177 	}
3178 
3179 	uint32 totalCount = 0;
3180 	uint32 rootCount = 0;
3181 	off_t totalCommitted = 0;
3182 	page_num_t totalPages = 0;
3183 
3184 	VMCache* cache = gDebugCacheList;
3185 	while (cache) {
3186 		totalCount++;
3187 		if (cache->source == NULL) {
3188 			cache_info stackInfo;
3189 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3190 				? sCacheInfoTable[rootCount] : stackInfo;
3191 			rootCount++;
3192 			info.cache = cache;
3193 			info.page_count = 0;
3194 			info.committed = 0;
3195 			update_cache_info_recursively(cache, info);
3196 			totalCommitted += info.committed;
3197 			totalPages += info.page_count;
3198 		}
3199 
3200 		cache = cache->debug_next;
3201 	}
3202 
3203 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3204 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3205 			sortByPageCount
3206 				? &cache_info_compare_page_count
3207 				: &cache_info_compare_committed);
3208 	}
3209 
3210 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3211 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3212 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3213 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3214 			"page count" : "committed size");
3215 
3216 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3217 		for (uint32 i = 0; i < rootCount; i++) {
3218 			cache_info& info = sCacheInfoTable[i];
3219 			dump_caches_recursively(info.cache, info, 0);
3220 		}
3221 	} else
3222 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3223 
3224 	return 0;
3225 }
3226 
3227 #endif	// DEBUG_CACHE_LIST
3228 
3229 
3230 static int
3231 dump_cache(int argc, char** argv)
3232 {
3233 	VMCache* cache;
3234 	bool showPages = false;
3235 	int i = 1;
3236 
3237 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3238 		kprintf("usage: %s [-ps] <address>\n"
3239 			"  if -p is specified, all pages are shown, if -s is used\n"
3240 			"  only the cache info is shown respectively.\n", argv[0]);
3241 		return 0;
3242 	}
3243 	while (argv[i][0] == '-') {
3244 		char* arg = argv[i] + 1;
3245 		while (arg[0]) {
3246 			if (arg[0] == 'p')
3247 				showPages = true;
3248 			arg++;
3249 		}
3250 		i++;
3251 	}
3252 	if (argv[i] == NULL) {
3253 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3254 		return 0;
3255 	}
3256 
3257 	addr_t address = parse_expression(argv[i]);
3258 	if (address == 0)
3259 		return 0;
3260 
3261 	cache = (VMCache*)address;
3262 
3263 	cache->Dump(showPages);
3264 
3265 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3266 
3267 	return 0;
3268 }
3269 
3270 
3271 static void
3272 dump_area_struct(VMArea* area, bool mappings)
3273 {
3274 	kprintf("AREA: %p\n", area);
3275 	kprintf("name:\t\t'%s'\n", area->name);
3276 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3277 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3278 	kprintf("base:\t\t0x%lx\n", area->Base());
3279 	kprintf("size:\t\t0x%lx\n", area->Size());
3280 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3281 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3282 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3283 	kprintf("cache:\t\t%p\n", area->cache);
3284 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3285 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3286 	kprintf("cache_next:\t%p\n", area->cache_next);
3287 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3288 
3289 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3290 	if (mappings) {
3291 		kprintf("page mappings:\n");
3292 		while (iterator.HasNext()) {
3293 			vm_page_mapping* mapping = iterator.Next();
3294 			kprintf("  %p", mapping->page);
3295 		}
3296 		kprintf("\n");
3297 	} else {
3298 		uint32 count = 0;
3299 		while (iterator.Next() != NULL) {
3300 			count++;
3301 		}
3302 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3303 	}
3304 }
3305 
3306 
3307 static int
3308 dump_area(int argc, char** argv)
3309 {
3310 	bool mappings = false;
3311 	bool found = false;
3312 	int32 index = 1;
3313 	VMArea* area;
3314 	addr_t num;
3315 
3316 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3317 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3318 			"All areas matching either id/address/name are listed. You can\n"
3319 			"force to check only a specific item by prefixing the specifier\n"
3320 			"with the id/contains/address/name keywords.\n"
3321 			"-m shows the area's mappings as well.\n");
3322 		return 0;
3323 	}
3324 
3325 	if (!strcmp(argv[1], "-m")) {
3326 		mappings = true;
3327 		index++;
3328 	}
3329 
3330 	int32 mode = 0xf;
3331 	if (!strcmp(argv[index], "id"))
3332 		mode = 1;
3333 	else if (!strcmp(argv[index], "contains"))
3334 		mode = 2;
3335 	else if (!strcmp(argv[index], "name"))
3336 		mode = 4;
3337 	else if (!strcmp(argv[index], "address"))
3338 		mode = 0;
3339 	if (mode != 0xf)
3340 		index++;
3341 
3342 	if (index >= argc) {
3343 		kprintf("No area specifier given.\n");
3344 		return 0;
3345 	}
3346 
3347 	num = parse_expression(argv[index]);
3348 
3349 	if (mode == 0) {
3350 		dump_area_struct((struct VMArea*)num, mappings);
3351 	} else {
3352 		// walk through the area list, looking for the arguments as a name
3353 
3354 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3355 		while ((area = it.Next()) != NULL) {
3356 			if (((mode & 4) != 0 && area->name != NULL
3357 					&& !strcmp(argv[index], area->name))
3358 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3359 					|| (((mode & 2) != 0 && area->Base() <= num
3360 						&& area->Base() + area->Size() > num))))) {
3361 				dump_area_struct(area, mappings);
3362 				found = true;
3363 			}
3364 		}
3365 
3366 		if (!found)
3367 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3368 	}
3369 
3370 	return 0;
3371 }
3372 
3373 
3374 static int
3375 dump_area_list(int argc, char** argv)
3376 {
3377 	VMArea* area;
3378 	const char* name = NULL;
3379 	int32 id = 0;
3380 
3381 	if (argc > 1) {
3382 		id = parse_expression(argv[1]);
3383 		if (id == 0)
3384 			name = argv[1];
3385 	}
3386 
3387 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3388 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3389 		B_PRINTF_POINTER_WIDTH, "size");
3390 
3391 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3392 	while ((area = it.Next()) != NULL) {
3393 		if ((id != 0 && area->address_space->ID() != id)
3394 			|| (name != NULL && strstr(area->name, name) == NULL))
3395 			continue;
3396 
3397 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3398 			area->id, (void*)area->Base(), (void*)area->Size(),
3399 			area->protection, area->wiring, area->name);
3400 	}
3401 	return 0;
3402 }
3403 
3404 
3405 static int
3406 dump_available_memory(int argc, char** argv)
3407 {
3408 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3409 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3410 	return 0;
3411 }
3412 
3413 
3414 static int
3415 dump_mapping_info(int argc, char** argv)
3416 {
3417 	bool reverseLookup = false;
3418 	bool pageLookup = false;
3419 
3420 	int argi = 1;
3421 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3422 		const char* arg = argv[argi];
3423 		if (strcmp(arg, "-r") == 0) {
3424 			reverseLookup = true;
3425 		} else if (strcmp(arg, "-p") == 0) {
3426 			reverseLookup = true;
3427 			pageLookup = true;
3428 		} else {
3429 			print_debugger_command_usage(argv[0]);
3430 			return 0;
3431 		}
3432 	}
3433 
3434 	// We need at least one argument, the address. Optionally a thread ID can be
3435 	// specified.
3436 	if (argi >= argc || argi + 2 < argc) {
3437 		print_debugger_command_usage(argv[0]);
3438 		return 0;
3439 	}
3440 
3441 	uint64 addressValue;
3442 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3443 		return 0;
3444 
3445 	Team* team = NULL;
3446 	if (argi < argc) {
3447 		uint64 threadID;
3448 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3449 			return 0;
3450 
3451 		Thread* thread = Thread::GetDebug(threadID);
3452 		if (thread == NULL) {
3453 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3454 			return 0;
3455 		}
3456 
3457 		team = thread->team;
3458 	}
3459 
3460 	if (reverseLookup) {
3461 		phys_addr_t physicalAddress;
3462 		if (pageLookup) {
3463 			vm_page* page = (vm_page*)(addr_t)addressValue;
3464 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3465 		} else {
3466 			physicalAddress = (phys_addr_t)addressValue;
3467 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3468 		}
3469 
3470 		kprintf("    Team     Virtual Address      Area\n");
3471 		kprintf("--------------------------------------\n");
3472 
3473 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3474 			Callback()
3475 				:
3476 				fAddressSpace(NULL)
3477 			{
3478 			}
3479 
3480 			void SetAddressSpace(VMAddressSpace* addressSpace)
3481 			{
3482 				fAddressSpace = addressSpace;
3483 			}
3484 
3485 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3486 			{
3487 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3488 					virtualAddress);
3489 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3490 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3491 				else
3492 					kprintf("\n");
3493 				return false;
3494 			}
3495 
3496 		private:
3497 			VMAddressSpace*	fAddressSpace;
3498 		} callback;
3499 
3500 		if (team != NULL) {
3501 			// team specified -- get its address space
3502 			VMAddressSpace* addressSpace = team->address_space;
3503 			if (addressSpace == NULL) {
3504 				kprintf("Failed to get address space!\n");
3505 				return 0;
3506 			}
3507 
3508 			callback.SetAddressSpace(addressSpace);
3509 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3510 				physicalAddress, callback);
3511 		} else {
3512 			// no team specified -- iterate through all address spaces
3513 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3514 				addressSpace != NULL;
3515 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3516 				callback.SetAddressSpace(addressSpace);
3517 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3518 					physicalAddress, callback);
3519 			}
3520 		}
3521 	} else {
3522 		// get the address space
3523 		addr_t virtualAddress = (addr_t)addressValue;
3524 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3525 		VMAddressSpace* addressSpace;
3526 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3527 			addressSpace = VMAddressSpace::Kernel();
3528 		} else if (team != NULL) {
3529 			addressSpace = team->address_space;
3530 		} else {
3531 			Thread* thread = debug_get_debugged_thread();
3532 			if (thread == NULL || thread->team == NULL) {
3533 				kprintf("Failed to get team!\n");
3534 				return 0;
3535 			}
3536 
3537 			addressSpace = thread->team->address_space;
3538 		}
3539 
3540 		if (addressSpace == NULL) {
3541 			kprintf("Failed to get address space!\n");
3542 			return 0;
3543 		}
3544 
3545 		// let the translation map implementation do the job
3546 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3547 	}
3548 
3549 	return 0;
3550 }
3551 
3552 
3553 /*!	Deletes all areas and reserved regions in the given address space.
3554 
3555 	The caller must ensure that none of the areas has any wired ranges.
3556 
3557 	\param addressSpace The address space.
3558 	\param deletingAddressSpace \c true, if the address space is in the process
3559 		of being deleted.
3560 */
3561 void
3562 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3563 {
3564 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3565 		addressSpace->ID()));
3566 
3567 	addressSpace->WriteLock();
3568 
3569 	// remove all reserved areas in this address space
3570 	addressSpace->UnreserveAllAddressRanges(0);
3571 
3572 	// delete all the areas in this address space
3573 	while (VMArea* area = addressSpace->FirstArea()) {
3574 		ASSERT(!area->IsWired());
3575 		delete_area(addressSpace, area, deletingAddressSpace);
3576 	}
3577 
3578 	addressSpace->WriteUnlock();
3579 }
3580 
3581 
3582 static area_id
3583 vm_area_for(addr_t address, bool kernel)
3584 {
3585 	team_id team;
3586 	if (IS_USER_ADDRESS(address)) {
3587 		// we try the user team address space, if any
3588 		team = VMAddressSpace::CurrentID();
3589 		if (team < 0)
3590 			return team;
3591 	} else
3592 		team = VMAddressSpace::KernelID();
3593 
3594 	AddressSpaceReadLocker locker(team);
3595 	if (!locker.IsLocked())
3596 		return B_BAD_TEAM_ID;
3597 
3598 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3599 	if (area != NULL) {
3600 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3601 			return B_ERROR;
3602 
3603 		return area->id;
3604 	}
3605 
3606 	return B_ERROR;
3607 }
3608 
3609 
3610 /*!	Frees physical pages that were used during the boot process.
3611 	\a end is inclusive.
3612 */
3613 static void
3614 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3615 {
3616 	// free all physical pages in the specified range
3617 
3618 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3619 		phys_addr_t physicalAddress;
3620 		uint32 flags;
3621 
3622 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3623 			&& (flags & PAGE_PRESENT) != 0) {
3624 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3625 			if (page != NULL && page->State() != PAGE_STATE_FREE
3626 					 && page->State() != PAGE_STATE_CLEAR
3627 					 && page->State() != PAGE_STATE_UNUSED) {
3628 				DEBUG_PAGE_ACCESS_START(page);
3629 				vm_page_set_state(page, PAGE_STATE_FREE);
3630 			}
3631 		}
3632 	}
3633 
3634 	// unmap the memory
3635 	map->Unmap(start, end);
3636 }
3637 
3638 
3639 void
3640 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3641 {
3642 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3643 	addr_t end = start + (size - 1);
3644 	addr_t lastEnd = start;
3645 
3646 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3647 		(void*)start, (void*)end));
3648 
3649 	// The areas are sorted in virtual address space order, so
3650 	// we just have to find the holes between them that fall
3651 	// into the area we should dispose
3652 
3653 	map->Lock();
3654 
3655 	for (VMAddressSpace::AreaIterator it
3656 				= VMAddressSpace::Kernel()->GetAreaIterator();
3657 			VMArea* area = it.Next();) {
3658 		addr_t areaStart = area->Base();
3659 		addr_t areaEnd = areaStart + (area->Size() - 1);
3660 
3661 		if (areaEnd < start)
3662 			continue;
3663 
3664 		if (areaStart > end) {
3665 			// we are done, the area is already beyond of what we have to free
3666 			break;
3667 		}
3668 
3669 		if (areaStart > lastEnd) {
3670 			// this is something we can free
3671 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3672 				(void*)areaStart));
3673 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3674 		}
3675 
3676 		if (areaEnd >= end) {
3677 			lastEnd = areaEnd;
3678 				// no +1 to prevent potential overflow
3679 			break;
3680 		}
3681 
3682 		lastEnd = areaEnd + 1;
3683 	}
3684 
3685 	if (lastEnd < end) {
3686 		// we can also get rid of some space at the end of the area
3687 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3688 			(void*)end));
3689 		unmap_and_free_physical_pages(map, lastEnd, end);
3690 	}
3691 
3692 	map->Unlock();
3693 }
3694 
3695 
3696 static void
3697 create_preloaded_image_areas(struct preloaded_image* _image)
3698 {
3699 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3700 	char name[B_OS_NAME_LENGTH];
3701 	void* address;
3702 	int32 length;
3703 
3704 	// use file name to create a good area name
3705 	char* fileName = strrchr(image->name, '/');
3706 	if (fileName == NULL)
3707 		fileName = image->name;
3708 	else
3709 		fileName++;
3710 
3711 	length = strlen(fileName);
3712 	// make sure there is enough space for the suffix
3713 	if (length > 25)
3714 		length = 25;
3715 
3716 	memcpy(name, fileName, length);
3717 	strcpy(name + length, "_text");
3718 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3719 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3720 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3721 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3722 		// this will later be remapped read-only/executable by the
3723 		// ELF initialization code
3724 
3725 	strcpy(name + length, "_data");
3726 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3727 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3728 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3729 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3730 }
3731 
3732 
3733 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3734 	Any boot loader resources contained in that arguments must not be accessed
3735 	anymore past this point.
3736 */
3737 void
3738 vm_free_kernel_args(kernel_args* args)
3739 {
3740 	uint32 i;
3741 
3742 	TRACE(("vm_free_kernel_args()\n"));
3743 
3744 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3745 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3746 		if (area >= B_OK)
3747 			delete_area(area);
3748 	}
3749 }
3750 
3751 
3752 static void
3753 allocate_kernel_args(kernel_args* args)
3754 {
3755 	TRACE(("allocate_kernel_args()\n"));
3756 
3757 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3758 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3759 
3760 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3761 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3762 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3763 	}
3764 }
3765 
3766 
3767 static void
3768 unreserve_boot_loader_ranges(kernel_args* args)
3769 {
3770 	TRACE(("unreserve_boot_loader_ranges()\n"));
3771 
3772 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3773 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3774 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3775 			args->virtual_allocated_range[i].size);
3776 	}
3777 }
3778 
3779 
3780 static void
3781 reserve_boot_loader_ranges(kernel_args* args)
3782 {
3783 	TRACE(("reserve_boot_loader_ranges()\n"));
3784 
3785 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3786 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3787 
3788 		// If the address is no kernel address, we just skip it. The
3789 		// architecture specific code has to deal with it.
3790 		if (!IS_KERNEL_ADDRESS(address)) {
3791 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3792 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3793 			continue;
3794 		}
3795 
3796 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3797 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3798 		if (status < B_OK)
3799 			panic("could not reserve boot loader ranges\n");
3800 	}
3801 }
3802 
3803 
3804 static addr_t
3805 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3806 {
3807 	size = PAGE_ALIGN(size);
3808 
3809 	// find a slot in the virtual allocation addr range
3810 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3811 		// check to see if the space between this one and the last is big enough
3812 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3813 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3814 			+ args->virtual_allocated_range[i - 1].size;
3815 
3816 		addr_t base = alignment > 0
3817 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3818 
3819 		if (base >= KERNEL_BASE && base < rangeStart
3820 				&& rangeStart - base >= size) {
3821 			args->virtual_allocated_range[i - 1].size
3822 				+= base + size - previousRangeEnd;
3823 			return base;
3824 		}
3825 	}
3826 
3827 	// we hadn't found one between allocation ranges. this is ok.
3828 	// see if there's a gap after the last one
3829 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3830 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3831 		+ args->virtual_allocated_range[lastEntryIndex].size;
3832 	addr_t base = alignment > 0
3833 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3834 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3835 		args->virtual_allocated_range[lastEntryIndex].size
3836 			+= base + size - lastRangeEnd;
3837 		return base;
3838 	}
3839 
3840 	// see if there's a gap before the first one
3841 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3842 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3843 		base = rangeStart - size;
3844 		if (alignment > 0)
3845 			base = ROUNDDOWN(base, alignment);
3846 
3847 		if (base >= KERNEL_BASE) {
3848 			args->virtual_allocated_range[0].start = base;
3849 			args->virtual_allocated_range[0].size += rangeStart - base;
3850 			return base;
3851 		}
3852 	}
3853 
3854 	return 0;
3855 }
3856 
3857 
3858 static bool
3859 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3860 {
3861 	// TODO: horrible brute-force method of determining if the page can be
3862 	// allocated
3863 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3864 		if (address >= args->physical_memory_range[i].start
3865 			&& address < args->physical_memory_range[i].start
3866 				+ args->physical_memory_range[i].size)
3867 			return true;
3868 	}
3869 	return false;
3870 }
3871 
3872 
3873 page_num_t
3874 vm_allocate_early_physical_page(kernel_args* args)
3875 {
3876 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3877 		phys_addr_t nextPage;
3878 
3879 		nextPage = args->physical_allocated_range[i].start
3880 			+ args->physical_allocated_range[i].size;
3881 		// see if the page after the next allocated paddr run can be allocated
3882 		if (i + 1 < args->num_physical_allocated_ranges
3883 			&& args->physical_allocated_range[i + 1].size != 0) {
3884 			// see if the next page will collide with the next allocated range
3885 			if (nextPage >= args->physical_allocated_range[i+1].start)
3886 				continue;
3887 		}
3888 		// see if the next physical page fits in the memory block
3889 		if (is_page_in_physical_memory_range(args, nextPage)) {
3890 			// we got one!
3891 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3892 			return nextPage / B_PAGE_SIZE;
3893 		}
3894 	}
3895 
3896 	return 0;
3897 		// could not allocate a block
3898 }
3899 
3900 
3901 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3902 	allocate some pages before the VM is completely up.
3903 */
3904 addr_t
3905 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3906 	uint32 attributes, addr_t alignment)
3907 {
3908 	if (physicalSize > virtualSize)
3909 		physicalSize = virtualSize;
3910 
3911 	// find the vaddr to allocate at
3912 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3913 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3914 	if (virtualBase == 0) {
3915 		panic("vm_allocate_early: could not allocate virtual address\n");
3916 		return 0;
3917 	}
3918 
3919 	// map the pages
3920 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3921 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3922 		if (physicalAddress == 0)
3923 			panic("error allocating early page!\n");
3924 
3925 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3926 
3927 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3928 			physicalAddress * B_PAGE_SIZE, attributes,
3929 			&vm_allocate_early_physical_page);
3930 	}
3931 
3932 	return virtualBase;
3933 }
3934 
3935 
3936 /*!	The main entrance point to initialize the VM. */
3937 status_t
3938 vm_init(kernel_args* args)
3939 {
3940 	struct preloaded_image* image;
3941 	void* address;
3942 	status_t err = 0;
3943 	uint32 i;
3944 
3945 	TRACE(("vm_init: entry\n"));
3946 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3947 	err = arch_vm_init(args);
3948 
3949 	// initialize some globals
3950 	vm_page_init_num_pages(args);
3951 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3952 
3953 	slab_init(args);
3954 
3955 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3956 	size_t heapSize = INITIAL_HEAP_SIZE;
3957 	// try to accomodate low memory systems
3958 	while (heapSize > sAvailableMemory / 8)
3959 		heapSize /= 2;
3960 	if (heapSize < 1024 * 1024)
3961 		panic("vm_init: go buy some RAM please.");
3962 
3963 	// map in the new heap and initialize it
3964 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3965 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3966 	TRACE(("heap at 0x%lx\n", heapBase));
3967 	heap_init(heapBase, heapSize);
3968 #endif
3969 
3970 	// initialize the free page list and physical page mapper
3971 	vm_page_init(args);
3972 
3973 	// initialize the cache allocators
3974 	vm_cache_init(args);
3975 
3976 	{
3977 		status_t error = VMAreaHash::Init();
3978 		if (error != B_OK)
3979 			panic("vm_init: error initializing area hash table\n");
3980 	}
3981 
3982 	VMAddressSpace::Init();
3983 	reserve_boot_loader_ranges(args);
3984 
3985 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3986 	heap_init_post_area();
3987 #endif
3988 
3989 	// Do any further initialization that the architecture dependant layers may
3990 	// need now
3991 	arch_vm_translation_map_init_post_area(args);
3992 	arch_vm_init_post_area(args);
3993 	vm_page_init_post_area(args);
3994 	slab_init_post_area();
3995 
3996 	// allocate areas to represent stuff that already exists
3997 
3998 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3999 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4000 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4001 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4002 #endif
4003 
4004 	allocate_kernel_args(args);
4005 
4006 	create_preloaded_image_areas(args->kernel_image);
4007 
4008 	// allocate areas for preloaded images
4009 	for (image = args->preloaded_images; image != NULL; image = image->next)
4010 		create_preloaded_image_areas(image);
4011 
4012 	// allocate kernel stacks
4013 	for (i = 0; i < args->num_cpus; i++) {
4014 		char name[64];
4015 
4016 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4017 		address = (void*)args->cpu_kstack[i].start;
4018 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4019 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4020 	}
4021 
4022 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4023 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4024 
4025 #if PARANOID_KERNEL_MALLOC
4026 	vm_block_address_range("uninitialized heap memory",
4027 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4028 #endif
4029 #if PARANOID_KERNEL_FREE
4030 	vm_block_address_range("freed heap memory",
4031 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4032 #endif
4033 
4034 	// create the object cache for the page mappings
4035 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4036 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4037 		NULL, NULL);
4038 	if (gPageMappingsObjectCache == NULL)
4039 		panic("failed to create page mappings object cache");
4040 
4041 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4042 
4043 #if DEBUG_CACHE_LIST
4044 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4045 		virtual_address_restrictions virtualRestrictions = {};
4046 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4047 		physical_address_restrictions physicalRestrictions = {};
4048 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4049 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4050 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4051 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4052 			&physicalRestrictions, (void**)&sCacheInfoTable);
4053 	}
4054 #endif	// DEBUG_CACHE_LIST
4055 
4056 	// add some debugger commands
4057 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4058 	add_debugger_command("area", &dump_area,
4059 		"Dump info about a particular area");
4060 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4061 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4062 #if DEBUG_CACHE_LIST
4063 	if (sCacheInfoTable != NULL) {
4064 		add_debugger_command_etc("caches", &dump_caches,
4065 			"List all VMCache trees",
4066 			"[ \"-c\" ]\n"
4067 			"All cache trees are listed sorted in decreasing order by number "
4068 				"of\n"
4069 			"used pages or, if \"-c\" is specified, by size of committed "
4070 				"memory.\n",
4071 			0);
4072 	}
4073 #endif
4074 	add_debugger_command("avail", &dump_available_memory,
4075 		"Dump available memory");
4076 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4077 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4078 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4079 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4080 	add_debugger_command("string", &display_mem, "dump strings");
4081 
4082 	add_debugger_command_etc("mapping", &dump_mapping_info,
4083 		"Print address mapping information",
4084 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4085 		"Prints low-level page mapping information for a given address. If\n"
4086 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4087 		"address that is looked up in the translation map of the current\n"
4088 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4089 		"\"-r\" is specified, <address> is a physical address that is\n"
4090 		"searched in the translation map of all teams, respectively the team\n"
4091 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4092 		"<address> is the address of a vm_page structure. The behavior is\n"
4093 		"equivalent to specifying \"-r\" with the physical address of that\n"
4094 		"page.\n",
4095 		0);
4096 
4097 	TRACE(("vm_init: exit\n"));
4098 
4099 	vm_cache_init_post_heap();
4100 
4101 	return err;
4102 }
4103 
4104 
4105 status_t
4106 vm_init_post_sem(kernel_args* args)
4107 {
4108 	// This frees all unused boot loader resources and makes its space available
4109 	// again
4110 	arch_vm_init_end(args);
4111 	unreserve_boot_loader_ranges(args);
4112 
4113 	// fill in all of the semaphores that were not allocated before
4114 	// since we're still single threaded and only the kernel address space
4115 	// exists, it isn't that hard to find all of the ones we need to create
4116 
4117 	arch_vm_translation_map_init_post_sem(args);
4118 
4119 	slab_init_post_sem();
4120 
4121 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4122 	heap_init_post_sem();
4123 #endif
4124 
4125 	return B_OK;
4126 }
4127 
4128 
4129 status_t
4130 vm_init_post_thread(kernel_args* args)
4131 {
4132 	vm_page_init_post_thread(args);
4133 	slab_init_post_thread();
4134 	return heap_init_post_thread();
4135 }
4136 
4137 
4138 status_t
4139 vm_init_post_modules(kernel_args* args)
4140 {
4141 	return arch_vm_init_post_modules(args);
4142 }
4143 
4144 
4145 void
4146 permit_page_faults(void)
4147 {
4148 	Thread* thread = thread_get_current_thread();
4149 	if (thread != NULL)
4150 		atomic_add(&thread->page_faults_allowed, 1);
4151 }
4152 
4153 
4154 void
4155 forbid_page_faults(void)
4156 {
4157 	Thread* thread = thread_get_current_thread();
4158 	if (thread != NULL)
4159 		atomic_add(&thread->page_faults_allowed, -1);
4160 }
4161 
4162 
4163 status_t
4164 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4165 	bool isUser, addr_t* newIP)
4166 {
4167 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4168 		faultAddress));
4169 
4170 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4171 
4172 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4173 	VMAddressSpace* addressSpace = NULL;
4174 
4175 	status_t status = B_OK;
4176 	*newIP = 0;
4177 	atomic_add((int32*)&sPageFaults, 1);
4178 
4179 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4180 		addressSpace = VMAddressSpace::GetKernel();
4181 	} else if (IS_USER_ADDRESS(pageAddress)) {
4182 		addressSpace = VMAddressSpace::GetCurrent();
4183 		if (addressSpace == NULL) {
4184 			if (!isUser) {
4185 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4186 					"memory!\n");
4187 				status = B_BAD_ADDRESS;
4188 				TPF(PageFaultError(-1,
4189 					VMPageFaultTracing
4190 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4191 			} else {
4192 				// XXX weird state.
4193 				panic("vm_page_fault: non kernel thread accessing user memory "
4194 					"that doesn't exist!\n");
4195 				status = B_BAD_ADDRESS;
4196 			}
4197 		}
4198 	} else {
4199 		// the hit was probably in the 64k DMZ between kernel and user space
4200 		// this keeps a user space thread from passing a buffer that crosses
4201 		// into kernel space
4202 		status = B_BAD_ADDRESS;
4203 		TPF(PageFaultError(-1,
4204 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4205 	}
4206 
4207 	if (status == B_OK) {
4208 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4209 			isUser, NULL);
4210 	}
4211 
4212 	if (status < B_OK) {
4213 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4214 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4215 			strerror(status), address, faultAddress, isWrite, isUser,
4216 			thread_get_current_thread_id());
4217 		if (!isUser) {
4218 			Thread* thread = thread_get_current_thread();
4219 			if (thread != NULL && thread->fault_handler != 0) {
4220 				// this will cause the arch dependant page fault handler to
4221 				// modify the IP on the interrupt frame or whatever to return
4222 				// to this address
4223 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4224 			} else {
4225 				// unhandled page fault in the kernel
4226 				panic("vm_page_fault: unhandled page fault in kernel space at "
4227 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4228 			}
4229 		} else {
4230 #if 1
4231 			// TODO: remove me once we have proper userland debugging support
4232 			// (and tools)
4233 			VMArea* area = NULL;
4234 			if (addressSpace != NULL) {
4235 				addressSpace->ReadLock();
4236 				area = addressSpace->LookupArea(faultAddress);
4237 			}
4238 
4239 			Thread* thread = thread_get_current_thread();
4240 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4241 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4242 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4243 				thread->team->Name(), thread->team->id,
4244 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4245 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4246 					area->Base() : 0x0));
4247 
4248 			// We can print a stack trace of the userland thread here.
4249 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4250 // fault and someone is already waiting for a write lock on the same address
4251 // space. This thread will then try to acquire the lock again and will
4252 // be queued after the writer.
4253 #	if 0
4254 			if (area) {
4255 				struct stack_frame {
4256 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4257 						struct stack_frame*	previous;
4258 						void*				return_address;
4259 					#else
4260 						// ...
4261 					#warning writeme
4262 					#endif
4263 				} frame;
4264 #		ifdef __INTEL__
4265 				struct iframe* iframe = x86_get_user_iframe();
4266 				if (iframe == NULL)
4267 					panic("iframe is NULL!");
4268 
4269 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4270 					sizeof(struct stack_frame));
4271 #		elif defined(__POWERPC__)
4272 				struct iframe* iframe = ppc_get_user_iframe();
4273 				if (iframe == NULL)
4274 					panic("iframe is NULL!");
4275 
4276 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4277 					sizeof(struct stack_frame));
4278 #		else
4279 #			warning "vm_page_fault() stack trace won't work"
4280 				status = B_ERROR;
4281 #		endif
4282 
4283 				dprintf("stack trace:\n");
4284 				int32 maxFrames = 50;
4285 				while (status == B_OK && --maxFrames >= 0
4286 						&& frame.return_address != NULL) {
4287 					dprintf("  %p", frame.return_address);
4288 					area = addressSpace->LookupArea(
4289 						(addr_t)frame.return_address);
4290 					if (area) {
4291 						dprintf(" (%s + %#lx)", area->name,
4292 							(addr_t)frame.return_address - area->Base());
4293 					}
4294 					dprintf("\n");
4295 
4296 					status = user_memcpy(&frame, frame.previous,
4297 						sizeof(struct stack_frame));
4298 				}
4299 			}
4300 #	endif	// 0 (stack trace)
4301 
4302 			if (addressSpace != NULL)
4303 				addressSpace->ReadUnlock();
4304 #endif
4305 
4306 			// If the thread has a signal handler for SIGSEGV, we simply
4307 			// send it the signal. Otherwise we notify the user debugger
4308 			// first.
4309 			struct sigaction action;
4310 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4311 					&& action.sa_handler != SIG_DFL
4312 					&& action.sa_handler != SIG_IGN)
4313 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4314 					SIGSEGV)) {
4315 				Signal signal(SIGSEGV,
4316 					status == B_PERMISSION_DENIED
4317 						? SEGV_ACCERR : SEGV_MAPERR,
4318 					EFAULT, thread->team->id);
4319 				signal.SetAddress((void*)address);
4320 				send_signal_to_thread(thread, signal, 0);
4321 			}
4322 		}
4323 	}
4324 
4325 	if (addressSpace != NULL)
4326 		addressSpace->Put();
4327 
4328 	return B_HANDLED_INTERRUPT;
4329 }
4330 
4331 
4332 struct PageFaultContext {
4333 	AddressSpaceReadLocker	addressSpaceLocker;
4334 	VMCacheChainLocker		cacheChainLocker;
4335 
4336 	VMTranslationMap*		map;
4337 	VMCache*				topCache;
4338 	off_t					cacheOffset;
4339 	vm_page_reservation		reservation;
4340 	bool					isWrite;
4341 
4342 	// return values
4343 	vm_page*				page;
4344 	bool					restart;
4345 
4346 
4347 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4348 		:
4349 		addressSpaceLocker(addressSpace, true),
4350 		map(addressSpace->TranslationMap()),
4351 		isWrite(isWrite)
4352 	{
4353 	}
4354 
4355 	~PageFaultContext()
4356 	{
4357 		UnlockAll();
4358 		vm_page_unreserve_pages(&reservation);
4359 	}
4360 
4361 	void Prepare(VMCache* topCache, off_t cacheOffset)
4362 	{
4363 		this->topCache = topCache;
4364 		this->cacheOffset = cacheOffset;
4365 		page = NULL;
4366 		restart = false;
4367 
4368 		cacheChainLocker.SetTo(topCache);
4369 	}
4370 
4371 	void UnlockAll(VMCache* exceptCache = NULL)
4372 	{
4373 		topCache = NULL;
4374 		addressSpaceLocker.Unlock();
4375 		cacheChainLocker.Unlock(exceptCache);
4376 	}
4377 };
4378 
4379 
4380 /*!	Gets the page that should be mapped into the area.
4381 	Returns an error code other than \c B_OK, if the page couldn't be found or
4382 	paged in. The locking state of the address space and the caches is undefined
4383 	in that case.
4384 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4385 	had to unlock the address space and all caches and is supposed to be called
4386 	again.
4387 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4388 	found. It is returned in \c context.page. The address space will still be
4389 	locked as well as all caches starting from the top cache to at least the
4390 	cache the page lives in.
4391 */
4392 static status_t
4393 fault_get_page(PageFaultContext& context)
4394 {
4395 	VMCache* cache = context.topCache;
4396 	VMCache* lastCache = NULL;
4397 	vm_page* page = NULL;
4398 
4399 	while (cache != NULL) {
4400 		// We already hold the lock of the cache at this point.
4401 
4402 		lastCache = cache;
4403 
4404 		page = cache->LookupPage(context.cacheOffset);
4405 		if (page != NULL && page->busy) {
4406 			// page must be busy -- wait for it to become unbusy
4407 			context.UnlockAll(cache);
4408 			cache->ReleaseRefLocked();
4409 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4410 
4411 			// restart the whole process
4412 			context.restart = true;
4413 			return B_OK;
4414 		}
4415 
4416 		if (page != NULL)
4417 			break;
4418 
4419 		// The current cache does not contain the page we're looking for.
4420 
4421 		// see if the backing store has it
4422 		if (cache->HasPage(context.cacheOffset)) {
4423 			// insert a fresh page and mark it busy -- we're going to read it in
4424 			page = vm_page_allocate_page(&context.reservation,
4425 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4426 			cache->InsertPage(page, context.cacheOffset);
4427 
4428 			// We need to unlock all caches and the address space while reading
4429 			// the page in. Keep a reference to the cache around.
4430 			cache->AcquireRefLocked();
4431 			context.UnlockAll();
4432 
4433 			// read the page in
4434 			generic_io_vec vec;
4435 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4436 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4437 
4438 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4439 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4440 
4441 			cache->Lock();
4442 
4443 			if (status < B_OK) {
4444 				// on error remove and free the page
4445 				dprintf("reading page from cache %p returned: %s!\n",
4446 					cache, strerror(status));
4447 
4448 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4449 				cache->RemovePage(page);
4450 				vm_page_set_state(page, PAGE_STATE_FREE);
4451 
4452 				cache->ReleaseRefAndUnlock();
4453 				return status;
4454 			}
4455 
4456 			// mark the page unbusy again
4457 			cache->MarkPageUnbusy(page);
4458 
4459 			DEBUG_PAGE_ACCESS_END(page);
4460 
4461 			// Since we needed to unlock everything temporarily, the area
4462 			// situation might have changed. So we need to restart the whole
4463 			// process.
4464 			cache->ReleaseRefAndUnlock();
4465 			context.restart = true;
4466 			return B_OK;
4467 		}
4468 
4469 		cache = context.cacheChainLocker.LockSourceCache();
4470 	}
4471 
4472 	if (page == NULL) {
4473 		// There was no adequate page, determine the cache for a clean one.
4474 		// Read-only pages come in the deepest cache, only the top most cache
4475 		// may have direct write access.
4476 		cache = context.isWrite ? context.topCache : lastCache;
4477 
4478 		// allocate a clean page
4479 		page = vm_page_allocate_page(&context.reservation,
4480 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4481 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4482 			page->physical_page_number));
4483 
4484 		// insert the new page into our cache
4485 		cache->InsertPage(page, context.cacheOffset);
4486 	} else if (page->Cache() != context.topCache && context.isWrite) {
4487 		// We have a page that has the data we want, but in the wrong cache
4488 		// object so we need to copy it and stick it into the top cache.
4489 		vm_page* sourcePage = page;
4490 
4491 		// TODO: If memory is low, it might be a good idea to steal the page
4492 		// from our source cache -- if possible, that is.
4493 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4494 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4495 
4496 		// To not needlessly kill concurrency we unlock all caches but the top
4497 		// one while copying the page. Lacking another mechanism to ensure that
4498 		// the source page doesn't disappear, we mark it busy.
4499 		sourcePage->busy = true;
4500 		context.cacheChainLocker.UnlockKeepRefs(true);
4501 
4502 		// copy the page
4503 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4504 			sourcePage->physical_page_number * B_PAGE_SIZE);
4505 
4506 		context.cacheChainLocker.RelockCaches(true);
4507 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4508 
4509 		// insert the new page into our cache
4510 		context.topCache->InsertPage(page, context.cacheOffset);
4511 	} else
4512 		DEBUG_PAGE_ACCESS_START(page);
4513 
4514 	context.page = page;
4515 	return B_OK;
4516 }
4517 
4518 
4519 /*!	Makes sure the address in the given address space is mapped.
4520 
4521 	\param addressSpace The address space.
4522 	\param originalAddress The address. Doesn't need to be page aligned.
4523 	\param isWrite If \c true the address shall be write-accessible.
4524 	\param isUser If \c true the access is requested by a userland team.
4525 	\param wirePage On success, if non \c NULL, the wired count of the page
4526 		mapped at the given address is incremented and the page is returned
4527 		via this parameter.
4528 	\param wiredRange If given, this wiredRange is ignored when checking whether
4529 		an already mapped page at the virtual address can be unmapped.
4530 	\return \c B_OK on success, another error code otherwise.
4531 */
4532 static status_t
4533 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4534 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage,
4535 	VMAreaWiredRange* wiredRange)
4536 {
4537 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4538 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4539 		originalAddress, isWrite, isUser));
4540 
4541 	PageFaultContext context(addressSpace, isWrite);
4542 
4543 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4544 	status_t status = B_OK;
4545 
4546 	addressSpace->IncrementFaultCount();
4547 
4548 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4549 	// the pages upfront makes sure we don't have any cache locked, so that the
4550 	// page daemon/thief can do their job without problems.
4551 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4552 		originalAddress);
4553 	context.addressSpaceLocker.Unlock();
4554 	vm_page_reserve_pages(&context.reservation, reservePages,
4555 		addressSpace == VMAddressSpace::Kernel()
4556 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4557 
4558 	while (true) {
4559 		context.addressSpaceLocker.Lock();
4560 
4561 		// get the area the fault was in
4562 		VMArea* area = addressSpace->LookupArea(address);
4563 		if (area == NULL) {
4564 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4565 				"space\n", originalAddress);
4566 			TPF(PageFaultError(-1,
4567 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4568 			status = B_BAD_ADDRESS;
4569 			break;
4570 		}
4571 
4572 		// check permissions
4573 		uint32 protection = get_area_page_protection(area, address);
4574 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4575 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4576 				area->id, (void*)originalAddress);
4577 			TPF(PageFaultError(area->id,
4578 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4579 			status = B_PERMISSION_DENIED;
4580 			break;
4581 		}
4582 		if (isWrite && (protection
4583 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4584 			dprintf("write access attempted on write-protected area 0x%"
4585 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4586 			TPF(PageFaultError(area->id,
4587 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4588 			status = B_PERMISSION_DENIED;
4589 			break;
4590 		} else if (isExecute && (protection
4591 				& (B_EXECUTE_AREA
4592 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4593 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4594 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4595 			TPF(PageFaultError(area->id,
4596 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4597 			status = B_PERMISSION_DENIED;
4598 			break;
4599 		} else if (!isWrite && !isExecute && (protection
4600 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4601 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4602 				" at %p\n", area->id, (void*)originalAddress);
4603 			TPF(PageFaultError(area->id,
4604 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4605 			status = B_PERMISSION_DENIED;
4606 			break;
4607 		}
4608 
4609 		// We have the area, it was a valid access, so let's try to resolve the
4610 		// page fault now.
4611 		// At first, the top most cache from the area is investigated.
4612 
4613 		context.Prepare(vm_area_get_locked_cache(area),
4614 			address - area->Base() + area->cache_offset);
4615 
4616 		// See if this cache has a fault handler -- this will do all the work
4617 		// for us.
4618 		{
4619 			// Note, since the page fault is resolved with interrupts enabled,
4620 			// the fault handler could be called more than once for the same
4621 			// reason -- the store must take this into account.
4622 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4623 			if (status != B_BAD_HANDLER)
4624 				break;
4625 		}
4626 
4627 		// The top most cache has no fault handler, so let's see if the cache or
4628 		// its sources already have the page we're searching for (we're going
4629 		// from top to bottom).
4630 		status = fault_get_page(context);
4631 		if (status != B_OK) {
4632 			TPF(PageFaultError(area->id, status));
4633 			break;
4634 		}
4635 
4636 		if (context.restart)
4637 			continue;
4638 
4639 		// All went fine, all there is left to do is to map the page into the
4640 		// address space.
4641 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4642 			context.page));
4643 
4644 		// If the page doesn't reside in the area's cache, we need to make sure
4645 		// it's mapped in read-only, so that we cannot overwrite someone else's
4646 		// data (copy-on-write)
4647 		uint32 newProtection = protection;
4648 		if (context.page->Cache() != context.topCache && !isWrite)
4649 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4650 
4651 		bool unmapPage = false;
4652 		bool mapPage = true;
4653 
4654 		// check whether there's already a page mapped at the address
4655 		context.map->Lock();
4656 
4657 		phys_addr_t physicalAddress;
4658 		uint32 flags;
4659 		vm_page* mappedPage = NULL;
4660 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4661 			&& (flags & PAGE_PRESENT) != 0
4662 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4663 				!= NULL) {
4664 			// Yep there's already a page. If it's ours, we can simply adjust
4665 			// its protection. Otherwise we have to unmap it.
4666 			if (mappedPage == context.page) {
4667 				context.map->ProtectPage(area, address, newProtection);
4668 					// Note: We assume that ProtectPage() is atomic (i.e.
4669 					// the page isn't temporarily unmapped), otherwise we'd have
4670 					// to make sure it isn't wired.
4671 				mapPage = false;
4672 			} else
4673 				unmapPage = true;
4674 		}
4675 
4676 		context.map->Unlock();
4677 
4678 		if (unmapPage) {
4679 			// If the page is wired, we can't unmap it. Wait until it is unwired
4680 			// again and restart.
4681 			VMAreaUnwiredWaiter waiter;
4682 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4683 					wiredRange)) {
4684 				// unlock everything and wait
4685 				context.UnlockAll();
4686 				waiter.waitEntry.Wait();
4687 				continue;
4688 			}
4689 
4690 			// Note: The mapped page is a page of a lower cache. We are
4691 			// guaranteed to have that cached locked, our new page is a copy of
4692 			// that page, and the page is not busy. The logic for that guarantee
4693 			// is as follows: Since the page is mapped, it must live in the top
4694 			// cache (ruled out above) or any of its lower caches, and there is
4695 			// (was before the new page was inserted) no other page in any
4696 			// cache between the top cache and the page's cache (otherwise that
4697 			// would be mapped instead). That in turn means that our algorithm
4698 			// must have found it and therefore it cannot be busy either.
4699 			DEBUG_PAGE_ACCESS_START(mappedPage);
4700 			unmap_page(area, address);
4701 			DEBUG_PAGE_ACCESS_END(mappedPage);
4702 		}
4703 
4704 		if (mapPage) {
4705 			if (map_page(area, context.page, address, newProtection,
4706 					&context.reservation) != B_OK) {
4707 				// Mapping can only fail, when the page mapping object couldn't
4708 				// be allocated. Save for the missing mapping everything is
4709 				// fine, though. If this was a regular page fault, we'll simply
4710 				// leave and probably fault again. To make sure we'll have more
4711 				// luck then, we ensure that the minimum object reserve is
4712 				// available.
4713 				DEBUG_PAGE_ACCESS_END(context.page);
4714 
4715 				context.UnlockAll();
4716 
4717 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4718 						!= B_OK) {
4719 					// Apparently the situation is serious. Let's get ourselves
4720 					// killed.
4721 					status = B_NO_MEMORY;
4722 				} else if (wirePage != NULL) {
4723 					// The caller expects us to wire the page. Since
4724 					// object_cache_reserve() succeeded, we should now be able
4725 					// to allocate a mapping structure. Restart.
4726 					continue;
4727 				}
4728 
4729 				break;
4730 			}
4731 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4732 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4733 
4734 		// also wire the page, if requested
4735 		if (wirePage != NULL && status == B_OK) {
4736 			increment_page_wired_count(context.page);
4737 			*wirePage = context.page;
4738 		}
4739 
4740 		DEBUG_PAGE_ACCESS_END(context.page);
4741 
4742 		break;
4743 	}
4744 
4745 	return status;
4746 }
4747 
4748 
4749 status_t
4750 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4751 {
4752 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4753 }
4754 
4755 status_t
4756 vm_put_physical_page(addr_t vaddr, void* handle)
4757 {
4758 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4759 }
4760 
4761 
4762 status_t
4763 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4764 	void** _handle)
4765 {
4766 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4767 }
4768 
4769 status_t
4770 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4771 {
4772 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4773 }
4774 
4775 
4776 status_t
4777 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4778 {
4779 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4780 }
4781 
4782 status_t
4783 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4784 {
4785 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4786 }
4787 
4788 
4789 void
4790 vm_get_info(system_info* info)
4791 {
4792 	swap_get_info(info);
4793 
4794 	MutexLocker locker(sAvailableMemoryLock);
4795 	info->needed_memory = sNeededMemory;
4796 	info->free_memory = sAvailableMemory;
4797 }
4798 
4799 
4800 uint32
4801 vm_num_page_faults(void)
4802 {
4803 	return sPageFaults;
4804 }
4805 
4806 
4807 off_t
4808 vm_available_memory(void)
4809 {
4810 	MutexLocker locker(sAvailableMemoryLock);
4811 	return sAvailableMemory;
4812 }
4813 
4814 
4815 off_t
4816 vm_available_not_needed_memory(void)
4817 {
4818 	MutexLocker locker(sAvailableMemoryLock);
4819 	return sAvailableMemory - sNeededMemory;
4820 }
4821 
4822 
4823 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4824 	debugger.
4825 */
4826 off_t
4827 vm_available_not_needed_memory_debug(void)
4828 {
4829 	return sAvailableMemory - sNeededMemory;
4830 }
4831 
4832 
4833 size_t
4834 vm_kernel_address_space_left(void)
4835 {
4836 	return VMAddressSpace::Kernel()->FreeSpace();
4837 }
4838 
4839 
4840 void
4841 vm_unreserve_memory(size_t amount)
4842 {
4843 	mutex_lock(&sAvailableMemoryLock);
4844 
4845 	sAvailableMemory += amount;
4846 
4847 	mutex_unlock(&sAvailableMemoryLock);
4848 }
4849 
4850 
4851 status_t
4852 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4853 {
4854 	size_t reserve = kMemoryReserveForPriority[priority];
4855 
4856 	MutexLocker locker(sAvailableMemoryLock);
4857 
4858 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4859 
4860 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4861 		sAvailableMemory -= amount;
4862 		return B_OK;
4863 	}
4864 
4865 	if (timeout <= 0)
4866 		return B_NO_MEMORY;
4867 
4868 	// turn timeout into an absolute timeout
4869 	timeout += system_time();
4870 
4871 	// loop until we've got the memory or the timeout occurs
4872 	do {
4873 		sNeededMemory += amount;
4874 
4875 		// call the low resource manager
4876 		locker.Unlock();
4877 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4878 			B_ABSOLUTE_TIMEOUT, timeout);
4879 		locker.Lock();
4880 
4881 		sNeededMemory -= amount;
4882 
4883 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4884 			sAvailableMemory -= amount;
4885 			return B_OK;
4886 		}
4887 	} while (timeout > system_time());
4888 
4889 	return B_NO_MEMORY;
4890 }
4891 
4892 
4893 status_t
4894 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4895 {
4896 	// NOTE: The caller is responsible for synchronizing calls to this function!
4897 
4898 	AddressSpaceReadLocker locker;
4899 	VMArea* area;
4900 	status_t status = locker.SetFromArea(id, area);
4901 	if (status != B_OK)
4902 		return status;
4903 
4904 	// nothing to do, if the type doesn't change
4905 	uint32 oldType = area->MemoryType();
4906 	if (type == oldType)
4907 		return B_OK;
4908 
4909 	// set the memory type of the area and the mapped pages
4910 	VMTranslationMap* map = area->address_space->TranslationMap();
4911 	map->Lock();
4912 	area->SetMemoryType(type);
4913 	map->ProtectArea(area, area->protection);
4914 	map->Unlock();
4915 
4916 	// set the physical memory type
4917 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4918 	if (error != B_OK) {
4919 		// reset the memory type of the area and the mapped pages
4920 		map->Lock();
4921 		area->SetMemoryType(oldType);
4922 		map->ProtectArea(area, area->protection);
4923 		map->Unlock();
4924 		return error;
4925 	}
4926 
4927 	return B_OK;
4928 
4929 }
4930 
4931 
4932 /*!	This function enforces some protection properties:
4933 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4934 	 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well
4935 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4936 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4937 	   and B_KERNEL_WRITE_AREA.
4938 */
4939 static void
4940 fix_protection(uint32* protection)
4941 {
4942 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4943 		if ((*protection & B_USER_PROTECTION) == 0
4944 			|| (*protection & B_WRITE_AREA) != 0)
4945 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4946 		else
4947 			*protection |= B_KERNEL_READ_AREA;
4948 		if ((*protection & B_EXECUTE_AREA) != 0)
4949 			*protection |= B_KERNEL_EXECUTE_AREA;
4950 	}
4951 }
4952 
4953 
4954 static void
4955 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4956 {
4957 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4958 	info->area = area->id;
4959 	info->address = (void*)area->Base();
4960 	info->size = area->Size();
4961 	info->protection = area->protection;
4962 	info->lock = B_FULL_LOCK;
4963 	info->team = area->address_space->ID();
4964 	info->copy_count = 0;
4965 	info->in_count = 0;
4966 	info->out_count = 0;
4967 		// TODO: retrieve real values here!
4968 
4969 	VMCache* cache = vm_area_get_locked_cache(area);
4970 
4971 	// Note, this is a simplification; the cache could be larger than this area
4972 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4973 
4974 	vm_area_put_locked_cache(cache);
4975 }
4976 
4977 
4978 static status_t
4979 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4980 {
4981 	// is newSize a multiple of B_PAGE_SIZE?
4982 	if (newSize & (B_PAGE_SIZE - 1))
4983 		return B_BAD_VALUE;
4984 
4985 	// lock all affected address spaces and the cache
4986 	VMArea* area;
4987 	VMCache* cache;
4988 
4989 	MultiAddressSpaceLocker locker;
4990 	AreaCacheLocker cacheLocker;
4991 
4992 	status_t status;
4993 	size_t oldSize;
4994 	bool anyKernelArea;
4995 	bool restart;
4996 
4997 	do {
4998 		anyKernelArea = false;
4999 		restart = false;
5000 
5001 		locker.Unset();
5002 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5003 		if (status != B_OK)
5004 			return status;
5005 		cacheLocker.SetTo(cache, true);	// already locked
5006 
5007 		// enforce restrictions
5008 		if (!kernel) {
5009 			if ((area->protection & B_KERNEL_AREA) != 0)
5010 				return B_NOT_ALLOWED;
5011 			// TODO: Enforce all restrictions (team, etc.)!
5012 		}
5013 
5014 		oldSize = area->Size();
5015 		if (newSize == oldSize)
5016 			return B_OK;
5017 
5018 		if (cache->type != CACHE_TYPE_RAM)
5019 			return B_NOT_ALLOWED;
5020 
5021 		if (oldSize < newSize) {
5022 			// We need to check if all areas of this cache can be resized.
5023 			for (VMArea* current = cache->areas; current != NULL;
5024 					current = current->cache_next) {
5025 				if (!current->address_space->CanResizeArea(current, newSize))
5026 					return B_ERROR;
5027 				anyKernelArea
5028 					|= current->address_space == VMAddressSpace::Kernel();
5029 			}
5030 		} else {
5031 			// We're shrinking the areas, so we must make sure the affected
5032 			// ranges are not wired.
5033 			for (VMArea* current = cache->areas; current != NULL;
5034 					current = current->cache_next) {
5035 				anyKernelArea
5036 					|= current->address_space == VMAddressSpace::Kernel();
5037 
5038 				if (wait_if_area_range_is_wired(current,
5039 						current->Base() + newSize, oldSize - newSize, &locker,
5040 						&cacheLocker)) {
5041 					restart = true;
5042 					break;
5043 				}
5044 			}
5045 		}
5046 	} while (restart);
5047 
5048 	// Okay, looks good so far, so let's do it
5049 
5050 	int priority = kernel && anyKernelArea
5051 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5052 	uint32 allocationFlags = kernel && anyKernelArea
5053 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5054 
5055 	if (oldSize < newSize) {
5056 		// Growing the cache can fail, so we do it first.
5057 		status = cache->Resize(cache->virtual_base + newSize, priority);
5058 		if (status != B_OK)
5059 			return status;
5060 	}
5061 
5062 	for (VMArea* current = cache->areas; current != NULL;
5063 			current = current->cache_next) {
5064 		status = current->address_space->ResizeArea(current, newSize,
5065 			allocationFlags);
5066 		if (status != B_OK)
5067 			break;
5068 
5069 		// We also need to unmap all pages beyond the new size, if the area has
5070 		// shrunk
5071 		if (newSize < oldSize) {
5072 			VMCacheChainLocker cacheChainLocker(cache);
5073 			cacheChainLocker.LockAllSourceCaches();
5074 
5075 			unmap_pages(current, current->Base() + newSize,
5076 				oldSize - newSize);
5077 
5078 			cacheChainLocker.Unlock(cache);
5079 		}
5080 	}
5081 
5082 	if (status == B_OK) {
5083 		// Shrink or grow individual page protections if in use.
5084 		if (area->page_protections != NULL) {
5085 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5086 			uint8* newProtections
5087 				= (uint8*)realloc(area->page_protections, bytes);
5088 			if (newProtections == NULL)
5089 				status = B_NO_MEMORY;
5090 			else {
5091 				area->page_protections = newProtections;
5092 
5093 				if (oldSize < newSize) {
5094 					// init the additional page protections to that of the area
5095 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5096 					uint32 areaProtection = area->protection
5097 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5098 					memset(area->page_protections + offset,
5099 						areaProtection | (areaProtection << 4), bytes - offset);
5100 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5101 						uint8& entry = area->page_protections[offset - 1];
5102 						entry = (entry & 0x0f) | (areaProtection << 4);
5103 					}
5104 				}
5105 			}
5106 		}
5107 	}
5108 
5109 	// shrinking the cache can't fail, so we do it now
5110 	if (status == B_OK && newSize < oldSize)
5111 		status = cache->Resize(cache->virtual_base + newSize, priority);
5112 
5113 	if (status != B_OK) {
5114 		// Something failed -- resize the areas back to their original size.
5115 		// This can fail, too, in which case we're seriously screwed.
5116 		for (VMArea* current = cache->areas; current != NULL;
5117 				current = current->cache_next) {
5118 			if (current->address_space->ResizeArea(current, oldSize,
5119 					allocationFlags) != B_OK) {
5120 				panic("vm_resize_area(): Failed and not being able to restore "
5121 					"original state.");
5122 			}
5123 		}
5124 
5125 		cache->Resize(cache->virtual_base + oldSize, priority);
5126 	}
5127 
5128 	// TODO: we must honour the lock restrictions of this area
5129 	return status;
5130 }
5131 
5132 
5133 status_t
5134 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5135 {
5136 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5137 }
5138 
5139 
5140 status_t
5141 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5142 {
5143 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5144 }
5145 
5146 
5147 status_t
5148 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5149 	bool user)
5150 {
5151 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5152 }
5153 
5154 
5155 void
5156 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5157 {
5158 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5159 }
5160 
5161 
5162 /*!	Copies a range of memory directly from/to a page that might not be mapped
5163 	at the moment.
5164 
5165 	For \a unsafeMemory the current mapping (if any is ignored). The function
5166 	walks through the respective area's cache chain to find the physical page
5167 	and copies from/to it directly.
5168 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5169 	must not cross a page boundary.
5170 
5171 	\param teamID The team ID identifying the address space \a unsafeMemory is
5172 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5173 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5174 		is passed, the address space of the thread returned by
5175 		debug_get_debugged_thread() is used.
5176 	\param unsafeMemory The start of the unsafe memory range to be copied
5177 		from/to.
5178 	\param buffer A safely accessible kernel buffer to be copied from/to.
5179 	\param size The number of bytes to be copied.
5180 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5181 		\a unsafeMemory, the other way around otherwise.
5182 */
5183 status_t
5184 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5185 	size_t size, bool copyToUnsafe)
5186 {
5187 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5188 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5189 		return B_BAD_VALUE;
5190 	}
5191 
5192 	// get the address space for the debugged thread
5193 	VMAddressSpace* addressSpace;
5194 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5195 		addressSpace = VMAddressSpace::Kernel();
5196 	} else if (teamID == B_CURRENT_TEAM) {
5197 		Thread* thread = debug_get_debugged_thread();
5198 		if (thread == NULL || thread->team == NULL)
5199 			return B_BAD_ADDRESS;
5200 
5201 		addressSpace = thread->team->address_space;
5202 	} else
5203 		addressSpace = VMAddressSpace::DebugGet(teamID);
5204 
5205 	if (addressSpace == NULL)
5206 		return B_BAD_ADDRESS;
5207 
5208 	// get the area
5209 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5210 	if (area == NULL)
5211 		return B_BAD_ADDRESS;
5212 
5213 	// search the page
5214 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5215 		+ area->cache_offset;
5216 	VMCache* cache = area->cache;
5217 	vm_page* page = NULL;
5218 	while (cache != NULL) {
5219 		page = cache->DebugLookupPage(cacheOffset);
5220 		if (page != NULL)
5221 			break;
5222 
5223 		// Page not found in this cache -- if it is paged out, we must not try
5224 		// to get it from lower caches.
5225 		if (cache->DebugHasPage(cacheOffset))
5226 			break;
5227 
5228 		cache = cache->source;
5229 	}
5230 
5231 	if (page == NULL)
5232 		return B_UNSUPPORTED;
5233 
5234 	// copy from/to physical memory
5235 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5236 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5237 
5238 	if (copyToUnsafe) {
5239 		if (page->Cache() != area->cache)
5240 			return B_UNSUPPORTED;
5241 
5242 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5243 	}
5244 
5245 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5246 }
5247 
5248 
5249 //	#pragma mark - kernel public API
5250 
5251 
5252 status_t
5253 user_memcpy(void* to, const void* from, size_t size)
5254 {
5255 	// don't allow address overflows
5256 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5257 		return B_BAD_ADDRESS;
5258 
5259 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5260 		return B_BAD_ADDRESS;
5261 
5262 	return B_OK;
5263 }
5264 
5265 
5266 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5267 	the string in \a to, NULL-terminating the result.
5268 
5269 	\param to Pointer to the destination C-string.
5270 	\param from Pointer to the source C-string.
5271 	\param size Size in bytes of the string buffer pointed to by \a to.
5272 
5273 	\return strlen(\a from).
5274 */
5275 ssize_t
5276 user_strlcpy(char* to, const char* from, size_t size)
5277 {
5278 	if (to == NULL && size != 0)
5279 		return B_BAD_VALUE;
5280 	if (from == NULL)
5281 		return B_BAD_ADDRESS;
5282 
5283 	// limit size to avoid address overflows
5284 	size_t maxSize = std::min(size,
5285 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5286 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5287 		// the source address might still overflow.
5288 
5289 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5290 
5291 	// If we hit the address overflow boundary, fail.
5292 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5293 			&& maxSize < size)) {
5294 		return B_BAD_ADDRESS;
5295 	}
5296 
5297 	return result;
5298 }
5299 
5300 
5301 status_t
5302 user_memset(void* s, char c, size_t count)
5303 {
5304 	// don't allow address overflows
5305 	if ((addr_t)s + count < (addr_t)s)
5306 		return B_BAD_ADDRESS;
5307 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5308 		return B_BAD_ADDRESS;
5309 
5310 	return B_OK;
5311 }
5312 
5313 
5314 /*!	Wires a single page at the given address.
5315 
5316 	\param team The team whose address space the address belongs to. Supports
5317 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5318 		parameter is ignored.
5319 	\param address address The virtual address to wire down. Does not need to
5320 		be page aligned.
5321 	\param writable If \c true the page shall be writable.
5322 	\param info On success the info is filled in, among other things
5323 		containing the physical address the given virtual one translates to.
5324 	\return \c B_OK, when the page could be wired, another error code otherwise.
5325 */
5326 status_t
5327 vm_wire_page(team_id team, addr_t address, bool writable,
5328 	VMPageWiringInfo* info)
5329 {
5330 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5331 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5332 
5333 	// compute the page protection that is required
5334 	bool isUser = IS_USER_ADDRESS(address);
5335 	uint32 requiredProtection = PAGE_PRESENT
5336 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5337 	if (writable)
5338 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5339 
5340 	// get and read lock the address space
5341 	VMAddressSpace* addressSpace = NULL;
5342 	if (isUser) {
5343 		if (team == B_CURRENT_TEAM)
5344 			addressSpace = VMAddressSpace::GetCurrent();
5345 		else
5346 			addressSpace = VMAddressSpace::Get(team);
5347 	} else
5348 		addressSpace = VMAddressSpace::GetKernel();
5349 	if (addressSpace == NULL)
5350 		return B_ERROR;
5351 
5352 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5353 
5354 	VMTranslationMap* map = addressSpace->TranslationMap();
5355 	status_t error = B_OK;
5356 
5357 	// get the area
5358 	VMArea* area = addressSpace->LookupArea(pageAddress);
5359 	if (area == NULL) {
5360 		addressSpace->Put();
5361 		return B_BAD_ADDRESS;
5362 	}
5363 
5364 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5365 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5366 
5367 	// mark the area range wired
5368 	area->Wire(&info->range);
5369 
5370 	// Lock the area's cache chain and the translation map. Needed to look
5371 	// up the page and play with its wired count.
5372 	cacheChainLocker.LockAllSourceCaches();
5373 	map->Lock();
5374 
5375 	phys_addr_t physicalAddress;
5376 	uint32 flags;
5377 	vm_page* page;
5378 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5379 		&& (flags & requiredProtection) == requiredProtection
5380 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5381 			!= NULL) {
5382 		// Already mapped with the correct permissions -- just increment
5383 		// the page's wired count.
5384 		increment_page_wired_count(page);
5385 
5386 		map->Unlock();
5387 		cacheChainLocker.Unlock();
5388 		addressSpaceLocker.Unlock();
5389 	} else {
5390 		// Let vm_soft_fault() map the page for us, if possible. We need
5391 		// to fully unlock to avoid deadlocks. Since we have already
5392 		// wired the area itself, nothing disturbing will happen with it
5393 		// in the meantime.
5394 		map->Unlock();
5395 		cacheChainLocker.Unlock();
5396 		addressSpaceLocker.Unlock();
5397 
5398 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5399 			isUser, &page, &info->range);
5400 
5401 		if (error != B_OK) {
5402 			// The page could not be mapped -- clean up.
5403 			VMCache* cache = vm_area_get_locked_cache(area);
5404 			area->Unwire(&info->range);
5405 			cache->ReleaseRefAndUnlock();
5406 			addressSpace->Put();
5407 			return error;
5408 		}
5409 	}
5410 
5411 	info->physicalAddress
5412 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5413 			+ address % B_PAGE_SIZE;
5414 	info->page = page;
5415 
5416 	return B_OK;
5417 }
5418 
5419 
5420 /*!	Unwires a single page previously wired via vm_wire_page().
5421 
5422 	\param info The same object passed to vm_wire_page() before.
5423 */
5424 void
5425 vm_unwire_page(VMPageWiringInfo* info)
5426 {
5427 	// lock the address space
5428 	VMArea* area = info->range.area;
5429 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5430 		// takes over our reference
5431 
5432 	// lock the top cache
5433 	VMCache* cache = vm_area_get_locked_cache(area);
5434 	VMCacheChainLocker cacheChainLocker(cache);
5435 
5436 	if (info->page->Cache() != cache) {
5437 		// The page is not in the top cache, so we lock the whole cache chain
5438 		// before touching the page's wired count.
5439 		cacheChainLocker.LockAllSourceCaches();
5440 	}
5441 
5442 	decrement_page_wired_count(info->page);
5443 
5444 	// remove the wired range from the range
5445 	area->Unwire(&info->range);
5446 
5447 	cacheChainLocker.Unlock();
5448 }
5449 
5450 
5451 /*!	Wires down the given address range in the specified team's address space.
5452 
5453 	If successful the function
5454 	- acquires a reference to the specified team's address space,
5455 	- adds respective wired ranges to all areas that intersect with the given
5456 	  address range,
5457 	- makes sure all pages in the given address range are mapped with the
5458 	  requested access permissions and increments their wired count.
5459 
5460 	It fails, when \a team doesn't specify a valid address space, when any part
5461 	of the specified address range is not covered by areas, when the concerned
5462 	areas don't allow mapping with the requested permissions, or when mapping
5463 	failed for another reason.
5464 
5465 	When successful the call must be balanced by a unlock_memory_etc() call with
5466 	the exact same parameters.
5467 
5468 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5469 		supported.
5470 	\param address The start of the address range to be wired.
5471 	\param numBytes The size of the address range to be wired.
5472 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5473 		requests that the range must be wired writable ("read from device
5474 		into memory").
5475 	\return \c B_OK on success, another error code otherwise.
5476 */
5477 status_t
5478 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5479 {
5480 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5481 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5482 
5483 	// compute the page protection that is required
5484 	bool isUser = IS_USER_ADDRESS(address);
5485 	bool writable = (flags & B_READ_DEVICE) == 0;
5486 	uint32 requiredProtection = PAGE_PRESENT
5487 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5488 	if (writable)
5489 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5490 
5491 	uint32 mallocFlags = isUser
5492 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5493 
5494 	// get and read lock the address space
5495 	VMAddressSpace* addressSpace = NULL;
5496 	if (isUser) {
5497 		if (team == B_CURRENT_TEAM)
5498 			addressSpace = VMAddressSpace::GetCurrent();
5499 		else
5500 			addressSpace = VMAddressSpace::Get(team);
5501 	} else
5502 		addressSpace = VMAddressSpace::GetKernel();
5503 	if (addressSpace == NULL)
5504 		return B_ERROR;
5505 
5506 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5507 		// We get a new address space reference here. The one we got above will
5508 		// be freed by unlock_memory_etc().
5509 
5510 	VMTranslationMap* map = addressSpace->TranslationMap();
5511 	status_t error = B_OK;
5512 
5513 	// iterate through all concerned areas
5514 	addr_t nextAddress = lockBaseAddress;
5515 	while (nextAddress != lockEndAddress) {
5516 		// get the next area
5517 		VMArea* area = addressSpace->LookupArea(nextAddress);
5518 		if (area == NULL) {
5519 			error = B_BAD_ADDRESS;
5520 			break;
5521 		}
5522 
5523 		addr_t areaStart = nextAddress;
5524 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5525 
5526 		// allocate the wired range (do that before locking the cache to avoid
5527 		// deadlocks)
5528 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5529 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5530 		if (range == NULL) {
5531 			error = B_NO_MEMORY;
5532 			break;
5533 		}
5534 
5535 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5536 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5537 
5538 		// mark the area range wired
5539 		area->Wire(range);
5540 
5541 		// Depending on the area cache type and the wiring, we may not need to
5542 		// look at the individual pages.
5543 		if (area->cache_type == CACHE_TYPE_NULL
5544 			|| area->cache_type == CACHE_TYPE_DEVICE
5545 			|| area->wiring == B_FULL_LOCK
5546 			|| area->wiring == B_CONTIGUOUS) {
5547 			nextAddress = areaEnd;
5548 			continue;
5549 		}
5550 
5551 		// Lock the area's cache chain and the translation map. Needed to look
5552 		// up pages and play with their wired count.
5553 		cacheChainLocker.LockAllSourceCaches();
5554 		map->Lock();
5555 
5556 		// iterate through the pages and wire them
5557 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5558 			phys_addr_t physicalAddress;
5559 			uint32 flags;
5560 
5561 			vm_page* page;
5562 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5563 				&& (flags & requiredProtection) == requiredProtection
5564 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5565 					!= NULL) {
5566 				// Already mapped with the correct permissions -- just increment
5567 				// the page's wired count.
5568 				increment_page_wired_count(page);
5569 			} else {
5570 				// Let vm_soft_fault() map the page for us, if possible. We need
5571 				// to fully unlock to avoid deadlocks. Since we have already
5572 				// wired the area itself, nothing disturbing will happen with it
5573 				// in the meantime.
5574 				map->Unlock();
5575 				cacheChainLocker.Unlock();
5576 				addressSpaceLocker.Unlock();
5577 
5578 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5579 					false, isUser, &page, range);
5580 
5581 				addressSpaceLocker.Lock();
5582 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5583 				cacheChainLocker.LockAllSourceCaches();
5584 				map->Lock();
5585 			}
5586 
5587 			if (error != B_OK)
5588 				break;
5589 		}
5590 
5591 		map->Unlock();
5592 
5593 		if (error == B_OK) {
5594 			cacheChainLocker.Unlock();
5595 		} else {
5596 			// An error occurred, so abort right here. If the current address
5597 			// is the first in this area, unwire the area, since we won't get
5598 			// to it when reverting what we've done so far.
5599 			if (nextAddress == areaStart) {
5600 				area->Unwire(range);
5601 				cacheChainLocker.Unlock();
5602 				range->~VMAreaWiredRange();
5603 				free_etc(range, mallocFlags);
5604 			} else
5605 				cacheChainLocker.Unlock();
5606 
5607 			break;
5608 		}
5609 	}
5610 
5611 	if (error != B_OK) {
5612 		// An error occurred, so unwire all that we've already wired. Note that
5613 		// even if not a single page was wired, unlock_memory_etc() is called
5614 		// to put the address space reference.
5615 		addressSpaceLocker.Unlock();
5616 		unlock_memory_etc(team, (void*)lockBaseAddress,
5617 			nextAddress - lockBaseAddress, flags);
5618 	}
5619 
5620 	return error;
5621 }
5622 
5623 
5624 status_t
5625 lock_memory(void* address, size_t numBytes, uint32 flags)
5626 {
5627 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5628 }
5629 
5630 
5631 /*!	Unwires an address range previously wired with lock_memory_etc().
5632 
5633 	Note that a call to this function must balance a previous lock_memory_etc()
5634 	call with exactly the same parameters.
5635 */
5636 status_t
5637 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5638 {
5639 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5640 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5641 
5642 	// compute the page protection that is required
5643 	bool isUser = IS_USER_ADDRESS(address);
5644 	bool writable = (flags & B_READ_DEVICE) == 0;
5645 	uint32 requiredProtection = PAGE_PRESENT
5646 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5647 	if (writable)
5648 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5649 
5650 	uint32 mallocFlags = isUser
5651 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5652 
5653 	// get and read lock the address space
5654 	VMAddressSpace* addressSpace = NULL;
5655 	if (isUser) {
5656 		if (team == B_CURRENT_TEAM)
5657 			addressSpace = VMAddressSpace::GetCurrent();
5658 		else
5659 			addressSpace = VMAddressSpace::Get(team);
5660 	} else
5661 		addressSpace = VMAddressSpace::GetKernel();
5662 	if (addressSpace == NULL)
5663 		return B_ERROR;
5664 
5665 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5666 		// Take over the address space reference. We don't unlock until we're
5667 		// done.
5668 
5669 	VMTranslationMap* map = addressSpace->TranslationMap();
5670 	status_t error = B_OK;
5671 
5672 	// iterate through all concerned areas
5673 	addr_t nextAddress = lockBaseAddress;
5674 	while (nextAddress != lockEndAddress) {
5675 		// get the next area
5676 		VMArea* area = addressSpace->LookupArea(nextAddress);
5677 		if (area == NULL) {
5678 			error = B_BAD_ADDRESS;
5679 			break;
5680 		}
5681 
5682 		addr_t areaStart = nextAddress;
5683 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5684 
5685 		// Lock the area's top cache. This is a requirement for
5686 		// VMArea::Unwire().
5687 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5688 
5689 		// Depending on the area cache type and the wiring, we may not need to
5690 		// look at the individual pages.
5691 		if (area->cache_type == CACHE_TYPE_NULL
5692 			|| area->cache_type == CACHE_TYPE_DEVICE
5693 			|| area->wiring == B_FULL_LOCK
5694 			|| area->wiring == B_CONTIGUOUS) {
5695 			// unwire the range (to avoid deadlocks we delete the range after
5696 			// unlocking the cache)
5697 			nextAddress = areaEnd;
5698 			VMAreaWiredRange* range = area->Unwire(areaStart,
5699 				areaEnd - areaStart, writable);
5700 			cacheChainLocker.Unlock();
5701 			if (range != NULL) {
5702 				range->~VMAreaWiredRange();
5703 				free_etc(range, mallocFlags);
5704 			}
5705 			continue;
5706 		}
5707 
5708 		// Lock the area's cache chain and the translation map. Needed to look
5709 		// up pages and play with their wired count.
5710 		cacheChainLocker.LockAllSourceCaches();
5711 		map->Lock();
5712 
5713 		// iterate through the pages and unwire them
5714 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5715 			phys_addr_t physicalAddress;
5716 			uint32 flags;
5717 
5718 			vm_page* page;
5719 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5720 				&& (flags & PAGE_PRESENT) != 0
5721 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5722 					!= NULL) {
5723 				// Already mapped with the correct permissions -- just increment
5724 				// the page's wired count.
5725 				decrement_page_wired_count(page);
5726 			} else {
5727 				panic("unlock_memory_etc(): Failed to unwire page: address "
5728 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5729 					nextAddress);
5730 				error = B_BAD_VALUE;
5731 				break;
5732 			}
5733 		}
5734 
5735 		map->Unlock();
5736 
5737 		// All pages are unwired. Remove the area's wired range as well (to
5738 		// avoid deadlocks we delete the range after unlocking the cache).
5739 		VMAreaWiredRange* range = area->Unwire(areaStart,
5740 			areaEnd - areaStart, writable);
5741 
5742 		cacheChainLocker.Unlock();
5743 
5744 		if (range != NULL) {
5745 			range->~VMAreaWiredRange();
5746 			free_etc(range, mallocFlags);
5747 		}
5748 
5749 		if (error != B_OK)
5750 			break;
5751 	}
5752 
5753 	// get rid of the address space reference lock_memory_etc() acquired
5754 	addressSpace->Put();
5755 
5756 	return error;
5757 }
5758 
5759 
5760 status_t
5761 unlock_memory(void* address, size_t numBytes, uint32 flags)
5762 {
5763 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5764 }
5765 
5766 
5767 /*!	Similar to get_memory_map(), but also allows to specify the address space
5768 	for the memory in question and has a saner semantics.
5769 	Returns \c B_OK when the complete range could be translated or
5770 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5771 	case the actual number of entries is written to \c *_numEntries. Any other
5772 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5773 	in this case.
5774 */
5775 status_t
5776 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5777 	physical_entry* table, uint32* _numEntries)
5778 {
5779 	uint32 numEntries = *_numEntries;
5780 	*_numEntries = 0;
5781 
5782 	VMAddressSpace* addressSpace;
5783 	addr_t virtualAddress = (addr_t)address;
5784 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5785 	phys_addr_t physicalAddress;
5786 	status_t status = B_OK;
5787 	int32 index = -1;
5788 	addr_t offset = 0;
5789 	bool interrupts = are_interrupts_enabled();
5790 
5791 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5792 		"entries)\n", team, address, numBytes, numEntries));
5793 
5794 	if (numEntries == 0 || numBytes == 0)
5795 		return B_BAD_VALUE;
5796 
5797 	// in which address space is the address to be found?
5798 	if (IS_USER_ADDRESS(virtualAddress)) {
5799 		if (team == B_CURRENT_TEAM)
5800 			addressSpace = VMAddressSpace::GetCurrent();
5801 		else
5802 			addressSpace = VMAddressSpace::Get(team);
5803 	} else
5804 		addressSpace = VMAddressSpace::GetKernel();
5805 
5806 	if (addressSpace == NULL)
5807 		return B_ERROR;
5808 
5809 	VMTranslationMap* map = addressSpace->TranslationMap();
5810 
5811 	if (interrupts)
5812 		map->Lock();
5813 
5814 	while (offset < numBytes) {
5815 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5816 		uint32 flags;
5817 
5818 		if (interrupts) {
5819 			status = map->Query((addr_t)address + offset, &physicalAddress,
5820 				&flags);
5821 		} else {
5822 			status = map->QueryInterrupt((addr_t)address + offset,
5823 				&physicalAddress, &flags);
5824 		}
5825 		if (status < B_OK)
5826 			break;
5827 		if ((flags & PAGE_PRESENT) == 0) {
5828 			panic("get_memory_map() called on unmapped memory!");
5829 			return B_BAD_ADDRESS;
5830 		}
5831 
5832 		if (index < 0 && pageOffset > 0) {
5833 			physicalAddress += pageOffset;
5834 			if (bytes > B_PAGE_SIZE - pageOffset)
5835 				bytes = B_PAGE_SIZE - pageOffset;
5836 		}
5837 
5838 		// need to switch to the next physical_entry?
5839 		if (index < 0 || table[index].address
5840 				!= physicalAddress - table[index].size) {
5841 			if ((uint32)++index + 1 > numEntries) {
5842 				// table to small
5843 				break;
5844 			}
5845 			table[index].address = physicalAddress;
5846 			table[index].size = bytes;
5847 		} else {
5848 			// page does fit in current entry
5849 			table[index].size += bytes;
5850 		}
5851 
5852 		offset += bytes;
5853 	}
5854 
5855 	if (interrupts)
5856 		map->Unlock();
5857 
5858 	if (status != B_OK)
5859 		return status;
5860 
5861 	if ((uint32)index + 1 > numEntries) {
5862 		*_numEntries = index;
5863 		return B_BUFFER_OVERFLOW;
5864 	}
5865 
5866 	*_numEntries = index + 1;
5867 	return B_OK;
5868 }
5869 
5870 
5871 /*!	According to the BeBook, this function should always succeed.
5872 	This is no longer the case.
5873 */
5874 extern "C" int32
5875 __get_memory_map_haiku(const void* address, size_t numBytes,
5876 	physical_entry* table, int32 numEntries)
5877 {
5878 	uint32 entriesRead = numEntries;
5879 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5880 		table, &entriesRead);
5881 	if (error != B_OK)
5882 		return error;
5883 
5884 	// close the entry list
5885 
5886 	// if it's only one entry, we will silently accept the missing ending
5887 	if (numEntries == 1)
5888 		return B_OK;
5889 
5890 	if (entriesRead + 1 > (uint32)numEntries)
5891 		return B_BUFFER_OVERFLOW;
5892 
5893 	table[entriesRead].address = 0;
5894 	table[entriesRead].size = 0;
5895 
5896 	return B_OK;
5897 }
5898 
5899 
5900 area_id
5901 area_for(void* address)
5902 {
5903 	return vm_area_for((addr_t)address, true);
5904 }
5905 
5906 
5907 area_id
5908 find_area(const char* name)
5909 {
5910 	return VMAreaHash::Find(name);
5911 }
5912 
5913 
5914 status_t
5915 _get_area_info(area_id id, area_info* info, size_t size)
5916 {
5917 	if (size != sizeof(area_info) || info == NULL)
5918 		return B_BAD_VALUE;
5919 
5920 	AddressSpaceReadLocker locker;
5921 	VMArea* area;
5922 	status_t status = locker.SetFromArea(id, area);
5923 	if (status != B_OK)
5924 		return status;
5925 
5926 	fill_area_info(area, info, size);
5927 	return B_OK;
5928 }
5929 
5930 
5931 status_t
5932 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5933 {
5934 	addr_t nextBase = *(addr_t*)cookie;
5935 
5936 	// we're already through the list
5937 	if (nextBase == (addr_t)-1)
5938 		return B_ENTRY_NOT_FOUND;
5939 
5940 	if (team == B_CURRENT_TEAM)
5941 		team = team_get_current_team_id();
5942 
5943 	AddressSpaceReadLocker locker(team);
5944 	if (!locker.IsLocked())
5945 		return B_BAD_TEAM_ID;
5946 
5947 	VMArea* area;
5948 	for (VMAddressSpace::AreaIterator it
5949 				= locker.AddressSpace()->GetAreaIterator();
5950 			(area = it.Next()) != NULL;) {
5951 		if (area->Base() > nextBase)
5952 			break;
5953 	}
5954 
5955 	if (area == NULL) {
5956 		nextBase = (addr_t)-1;
5957 		return B_ENTRY_NOT_FOUND;
5958 	}
5959 
5960 	fill_area_info(area, info, size);
5961 	*cookie = (ssize_t)(area->Base());
5962 
5963 	return B_OK;
5964 }
5965 
5966 
5967 status_t
5968 set_area_protection(area_id area, uint32 newProtection)
5969 {
5970 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5971 		newProtection, true);
5972 }
5973 
5974 
5975 status_t
5976 resize_area(area_id areaID, size_t newSize)
5977 {
5978 	return vm_resize_area(areaID, newSize, true);
5979 }
5980 
5981 
5982 /*!	Transfers the specified area to a new team. The caller must be the owner
5983 	of the area.
5984 */
5985 area_id
5986 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5987 	bool kernel)
5988 {
5989 	area_info info;
5990 	status_t status = get_area_info(id, &info);
5991 	if (status != B_OK)
5992 		return status;
5993 
5994 	if (info.team != thread_get_current_thread()->team->id)
5995 		return B_PERMISSION_DENIED;
5996 
5997 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5998 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5999 	if (clonedArea < 0)
6000 		return clonedArea;
6001 
6002 	status = vm_delete_area(info.team, id, kernel);
6003 	if (status != B_OK) {
6004 		vm_delete_area(target, clonedArea, kernel);
6005 		return status;
6006 	}
6007 
6008 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6009 
6010 	return clonedArea;
6011 }
6012 
6013 
6014 extern "C" area_id
6015 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6016 	size_t numBytes, uint32 addressSpec, uint32 protection,
6017 	void** _virtualAddress)
6018 {
6019 	if (!arch_vm_supports_protection(protection))
6020 		return B_NOT_SUPPORTED;
6021 
6022 	fix_protection(&protection);
6023 
6024 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6025 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6026 		false);
6027 }
6028 
6029 
6030 area_id
6031 clone_area(const char* name, void** _address, uint32 addressSpec,
6032 	uint32 protection, area_id source)
6033 {
6034 	if ((protection & B_KERNEL_PROTECTION) == 0)
6035 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6036 
6037 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6038 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6039 }
6040 
6041 
6042 area_id
6043 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
6044 	uint32 protection, uint32 flags, uint32 guardSize,
6045 	const virtual_address_restrictions* virtualAddressRestrictions,
6046 	const physical_address_restrictions* physicalAddressRestrictions,
6047 	void** _address)
6048 {
6049 	fix_protection(&protection);
6050 
6051 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6052 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6053 		true, _address);
6054 }
6055 
6056 
6057 extern "C" area_id
6058 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6059 	size_t size, uint32 lock, uint32 protection)
6060 {
6061 	fix_protection(&protection);
6062 
6063 	virtual_address_restrictions virtualRestrictions = {};
6064 	virtualRestrictions.address = *_address;
6065 	virtualRestrictions.address_specification = addressSpec;
6066 	physical_address_restrictions physicalRestrictions = {};
6067 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6068 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6069 		true, _address);
6070 }
6071 
6072 
6073 status_t
6074 delete_area(area_id area)
6075 {
6076 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6077 }
6078 
6079 
6080 //	#pragma mark - Userland syscalls
6081 
6082 
6083 status_t
6084 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6085 	addr_t size)
6086 {
6087 	// filter out some unavailable values (for userland)
6088 	switch (addressSpec) {
6089 		case B_ANY_KERNEL_ADDRESS:
6090 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6091 			return B_BAD_VALUE;
6092 	}
6093 
6094 	addr_t address;
6095 
6096 	if (!IS_USER_ADDRESS(userAddress)
6097 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6098 		return B_BAD_ADDRESS;
6099 
6100 	status_t status = vm_reserve_address_range(
6101 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6102 		RESERVED_AVOID_BASE);
6103 	if (status != B_OK)
6104 		return status;
6105 
6106 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6107 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6108 			(void*)address, size);
6109 		return B_BAD_ADDRESS;
6110 	}
6111 
6112 	return B_OK;
6113 }
6114 
6115 
6116 status_t
6117 _user_unreserve_address_range(addr_t address, addr_t size)
6118 {
6119 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6120 		(void*)address, size);
6121 }
6122 
6123 
6124 area_id
6125 _user_area_for(void* address)
6126 {
6127 	return vm_area_for((addr_t)address, false);
6128 }
6129 
6130 
6131 area_id
6132 _user_find_area(const char* userName)
6133 {
6134 	char name[B_OS_NAME_LENGTH];
6135 
6136 	if (!IS_USER_ADDRESS(userName)
6137 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6138 		return B_BAD_ADDRESS;
6139 
6140 	return find_area(name);
6141 }
6142 
6143 
6144 status_t
6145 _user_get_area_info(area_id area, area_info* userInfo)
6146 {
6147 	if (!IS_USER_ADDRESS(userInfo))
6148 		return B_BAD_ADDRESS;
6149 
6150 	area_info info;
6151 	status_t status = get_area_info(area, &info);
6152 	if (status < B_OK)
6153 		return status;
6154 
6155 	// TODO: do we want to prevent userland from seeing kernel protections?
6156 	//info.protection &= B_USER_PROTECTION;
6157 
6158 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6159 		return B_BAD_ADDRESS;
6160 
6161 	return status;
6162 }
6163 
6164 
6165 status_t
6166 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6167 {
6168 	ssize_t cookie;
6169 
6170 	if (!IS_USER_ADDRESS(userCookie)
6171 		|| !IS_USER_ADDRESS(userInfo)
6172 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6173 		return B_BAD_ADDRESS;
6174 
6175 	area_info info;
6176 	status_t status = _get_next_area_info(team, &cookie, &info,
6177 		sizeof(area_info));
6178 	if (status != B_OK)
6179 		return status;
6180 
6181 	//info.protection &= B_USER_PROTECTION;
6182 
6183 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6184 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6185 		return B_BAD_ADDRESS;
6186 
6187 	return status;
6188 }
6189 
6190 
6191 status_t
6192 _user_set_area_protection(area_id area, uint32 newProtection)
6193 {
6194 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6195 		return B_BAD_VALUE;
6196 
6197 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6198 		newProtection, false);
6199 }
6200 
6201 
6202 status_t
6203 _user_resize_area(area_id area, size_t newSize)
6204 {
6205 	// TODO: Since we restrict deleting of areas to those owned by the team,
6206 	// we should also do that for resizing (check other functions, too).
6207 	return vm_resize_area(area, newSize, false);
6208 }
6209 
6210 
6211 area_id
6212 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6213 	team_id target)
6214 {
6215 	// filter out some unavailable values (for userland)
6216 	switch (addressSpec) {
6217 		case B_ANY_KERNEL_ADDRESS:
6218 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6219 			return B_BAD_VALUE;
6220 	}
6221 
6222 	void* address;
6223 	if (!IS_USER_ADDRESS(userAddress)
6224 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6225 		return B_BAD_ADDRESS;
6226 
6227 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6228 	if (newArea < B_OK)
6229 		return newArea;
6230 
6231 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6232 		return B_BAD_ADDRESS;
6233 
6234 	return newArea;
6235 }
6236 
6237 
6238 area_id
6239 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6240 	uint32 protection, area_id sourceArea)
6241 {
6242 	char name[B_OS_NAME_LENGTH];
6243 	void* address;
6244 
6245 	// filter out some unavailable values (for userland)
6246 	switch (addressSpec) {
6247 		case B_ANY_KERNEL_ADDRESS:
6248 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6249 			return B_BAD_VALUE;
6250 	}
6251 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6252 		return B_BAD_VALUE;
6253 
6254 	if (!IS_USER_ADDRESS(userName)
6255 		|| !IS_USER_ADDRESS(userAddress)
6256 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6257 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6258 		return B_BAD_ADDRESS;
6259 
6260 	fix_protection(&protection);
6261 
6262 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6263 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6264 		false);
6265 	if (clonedArea < B_OK)
6266 		return clonedArea;
6267 
6268 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6269 		delete_area(clonedArea);
6270 		return B_BAD_ADDRESS;
6271 	}
6272 
6273 	return clonedArea;
6274 }
6275 
6276 
6277 area_id
6278 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6279 	size_t size, uint32 lock, uint32 protection)
6280 {
6281 	char name[B_OS_NAME_LENGTH];
6282 	void* address;
6283 
6284 	// filter out some unavailable values (for userland)
6285 	switch (addressSpec) {
6286 		case B_ANY_KERNEL_ADDRESS:
6287 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6288 			return B_BAD_VALUE;
6289 	}
6290 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6291 		return B_BAD_VALUE;
6292 
6293 	if (!IS_USER_ADDRESS(userName)
6294 		|| !IS_USER_ADDRESS(userAddress)
6295 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6296 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6297 		return B_BAD_ADDRESS;
6298 
6299 	if (addressSpec == B_EXACT_ADDRESS
6300 		&& IS_KERNEL_ADDRESS(address))
6301 		return B_BAD_VALUE;
6302 
6303 	if (addressSpec == B_ANY_ADDRESS)
6304 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6305 	if (addressSpec == B_BASE_ADDRESS)
6306 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6307 
6308 	fix_protection(&protection);
6309 
6310 	virtual_address_restrictions virtualRestrictions = {};
6311 	virtualRestrictions.address = address;
6312 	virtualRestrictions.address_specification = addressSpec;
6313 	physical_address_restrictions physicalRestrictions = {};
6314 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6315 		size, lock, protection, 0, 0, &virtualRestrictions,
6316 		&physicalRestrictions, false, &address);
6317 
6318 	if (area >= B_OK
6319 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6320 		delete_area(area);
6321 		return B_BAD_ADDRESS;
6322 	}
6323 
6324 	return area;
6325 }
6326 
6327 
6328 status_t
6329 _user_delete_area(area_id area)
6330 {
6331 	// Unlike the BeOS implementation, you can now only delete areas
6332 	// that you have created yourself from userland.
6333 	// The documentation to delete_area() explicitly states that this
6334 	// will be restricted in the future, and so it will.
6335 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6336 }
6337 
6338 
6339 // TODO: create a BeOS style call for this!
6340 
6341 area_id
6342 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6343 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6344 	int fd, off_t offset)
6345 {
6346 	char name[B_OS_NAME_LENGTH];
6347 	void* address;
6348 	area_id area;
6349 
6350 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6351 		return B_BAD_VALUE;
6352 
6353 	fix_protection(&protection);
6354 
6355 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6356 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6357 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6358 		return B_BAD_ADDRESS;
6359 
6360 	if (addressSpec == B_EXACT_ADDRESS) {
6361 		if ((addr_t)address + size < (addr_t)address
6362 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6363 			return B_BAD_VALUE;
6364 		}
6365 		if (!IS_USER_ADDRESS(address)
6366 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6367 			return B_BAD_ADDRESS;
6368 		}
6369 	}
6370 
6371 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6372 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6373 		false);
6374 	if (area < B_OK)
6375 		return area;
6376 
6377 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6378 		return B_BAD_ADDRESS;
6379 
6380 	return area;
6381 }
6382 
6383 
6384 status_t
6385 _user_unmap_memory(void* _address, size_t size)
6386 {
6387 	addr_t address = (addr_t)_address;
6388 
6389 	// check params
6390 	if (size == 0 || (addr_t)address + size < (addr_t)address
6391 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6392 		return B_BAD_VALUE;
6393 	}
6394 
6395 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6396 		return B_BAD_ADDRESS;
6397 
6398 	// Write lock the address space and ensure the address range is not wired.
6399 	AddressSpaceWriteLocker locker;
6400 	do {
6401 		status_t status = locker.SetTo(team_get_current_team_id());
6402 		if (status != B_OK)
6403 			return status;
6404 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6405 			size, &locker));
6406 
6407 	// unmap
6408 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6409 }
6410 
6411 
6412 status_t
6413 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6414 {
6415 	// check address range
6416 	addr_t address = (addr_t)_address;
6417 	size = PAGE_ALIGN(size);
6418 
6419 	if ((address % B_PAGE_SIZE) != 0)
6420 		return B_BAD_VALUE;
6421 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6422 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6423 		// weird error code required by POSIX
6424 		return ENOMEM;
6425 	}
6426 
6427 	// extend and check protection
6428 	if ((protection & ~B_USER_PROTECTION) != 0)
6429 		return B_BAD_VALUE;
6430 
6431 	fix_protection(&protection);
6432 
6433 	// We need to write lock the address space, since we're going to play with
6434 	// the areas. Also make sure that none of the areas is wired and that we're
6435 	// actually allowed to change the protection.
6436 	AddressSpaceWriteLocker locker;
6437 
6438 	bool restart;
6439 	do {
6440 		restart = false;
6441 
6442 		status_t status = locker.SetTo(team_get_current_team_id());
6443 		if (status != B_OK)
6444 			return status;
6445 
6446 		// First round: Check whether the whole range is covered by areas and we
6447 		// are allowed to modify them.
6448 		addr_t currentAddress = address;
6449 		size_t sizeLeft = size;
6450 		while (sizeLeft > 0) {
6451 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6452 			if (area == NULL)
6453 				return B_NO_MEMORY;
6454 
6455 			if ((area->protection & B_KERNEL_AREA) != 0)
6456 				return B_NOT_ALLOWED;
6457 
6458 			// TODO: For (shared) mapped files we should check whether the new
6459 			// protections are compatible with the file permissions. We don't
6460 			// have a way to do that yet, though.
6461 
6462 			addr_t offset = currentAddress - area->Base();
6463 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6464 
6465 			AreaCacheLocker cacheLocker(area);
6466 
6467 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6468 					&locker, &cacheLocker)) {
6469 				restart = true;
6470 				break;
6471 			}
6472 
6473 			cacheLocker.Unlock();
6474 
6475 			currentAddress += rangeSize;
6476 			sizeLeft -= rangeSize;
6477 		}
6478 	} while (restart);
6479 
6480 	// Second round: If the protections differ from that of the area, create a
6481 	// page protection array and re-map mapped pages.
6482 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6483 	addr_t currentAddress = address;
6484 	size_t sizeLeft = size;
6485 	while (sizeLeft > 0) {
6486 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6487 		if (area == NULL)
6488 			return B_NO_MEMORY;
6489 
6490 		addr_t offset = currentAddress - area->Base();
6491 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6492 
6493 		currentAddress += rangeSize;
6494 		sizeLeft -= rangeSize;
6495 
6496 		if (area->page_protections == NULL) {
6497 			if (area->protection == protection)
6498 				continue;
6499 
6500 			status_t status = allocate_area_page_protections(area);
6501 			if (status != B_OK)
6502 				return status;
6503 		}
6504 
6505 		// We need to lock the complete cache chain, since we potentially unmap
6506 		// pages of lower caches.
6507 		VMCache* topCache = vm_area_get_locked_cache(area);
6508 		VMCacheChainLocker cacheChainLocker(topCache);
6509 		cacheChainLocker.LockAllSourceCaches();
6510 
6511 		for (addr_t pageAddress = area->Base() + offset;
6512 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6513 			map->Lock();
6514 
6515 			set_area_page_protection(area, pageAddress, protection);
6516 
6517 			phys_addr_t physicalAddress;
6518 			uint32 flags;
6519 
6520 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6521 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6522 				map->Unlock();
6523 				continue;
6524 			}
6525 
6526 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6527 			if (page == NULL) {
6528 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6529 					"\n", area, physicalAddress);
6530 				map->Unlock();
6531 				return B_ERROR;
6532 			}
6533 
6534 			// If the page is not in the topmost cache and write access is
6535 			// requested, we have to unmap it. Otherwise we can re-map it with
6536 			// the new protection.
6537 			bool unmapPage = page->Cache() != topCache
6538 				&& (protection & B_WRITE_AREA) != 0;
6539 
6540 			if (!unmapPage)
6541 				map->ProtectPage(area, pageAddress, protection);
6542 
6543 			map->Unlock();
6544 
6545 			if (unmapPage) {
6546 				DEBUG_PAGE_ACCESS_START(page);
6547 				unmap_page(area, pageAddress);
6548 				DEBUG_PAGE_ACCESS_END(page);
6549 			}
6550 		}
6551 	}
6552 
6553 	return B_OK;
6554 }
6555 
6556 
6557 status_t
6558 _user_sync_memory(void* _address, size_t size, uint32 flags)
6559 {
6560 	addr_t address = (addr_t)_address;
6561 	size = PAGE_ALIGN(size);
6562 
6563 	// check params
6564 	if ((address % B_PAGE_SIZE) != 0)
6565 		return B_BAD_VALUE;
6566 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6567 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6568 		// weird error code required by POSIX
6569 		return ENOMEM;
6570 	}
6571 
6572 	bool writeSync = (flags & MS_SYNC) != 0;
6573 	bool writeAsync = (flags & MS_ASYNC) != 0;
6574 	if (writeSync && writeAsync)
6575 		return B_BAD_VALUE;
6576 
6577 	if (size == 0 || (!writeSync && !writeAsync))
6578 		return B_OK;
6579 
6580 	// iterate through the range and sync all concerned areas
6581 	while (size > 0) {
6582 		// read lock the address space
6583 		AddressSpaceReadLocker locker;
6584 		status_t error = locker.SetTo(team_get_current_team_id());
6585 		if (error != B_OK)
6586 			return error;
6587 
6588 		// get the first area
6589 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6590 		if (area == NULL)
6591 			return B_NO_MEMORY;
6592 
6593 		uint32 offset = address - area->Base();
6594 		size_t rangeSize = min_c(area->Size() - offset, size);
6595 		offset += area->cache_offset;
6596 
6597 		// lock the cache
6598 		AreaCacheLocker cacheLocker(area);
6599 		if (!cacheLocker)
6600 			return B_BAD_VALUE;
6601 		VMCache* cache = area->cache;
6602 
6603 		locker.Unlock();
6604 
6605 		uint32 firstPage = offset >> PAGE_SHIFT;
6606 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6607 
6608 		// write the pages
6609 		if (cache->type == CACHE_TYPE_VNODE) {
6610 			if (writeSync) {
6611 				// synchronous
6612 				error = vm_page_write_modified_page_range(cache, firstPage,
6613 					endPage);
6614 				if (error != B_OK)
6615 					return error;
6616 			} else {
6617 				// asynchronous
6618 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6619 				// TODO: This is probably not quite what is supposed to happen.
6620 				// Especially when a lot has to be written, it might take ages
6621 				// until it really hits the disk.
6622 			}
6623 		}
6624 
6625 		address += rangeSize;
6626 		size -= rangeSize;
6627 	}
6628 
6629 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6630 	// synchronize multiple mappings of the same file. In our VM they never get
6631 	// out of sync, though, so we don't have to do anything.
6632 
6633 	return B_OK;
6634 }
6635 
6636 
6637 status_t
6638 _user_memory_advice(void* address, size_t size, uint32 advice)
6639 {
6640 	// TODO: Implement!
6641 	return B_OK;
6642 }
6643 
6644 
6645 status_t
6646 _user_get_memory_properties(team_id teamID, const void* address,
6647 	uint32* _protected, uint32* _lock)
6648 {
6649 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6650 		return B_BAD_ADDRESS;
6651 
6652 	AddressSpaceReadLocker locker;
6653 	status_t error = locker.SetTo(teamID);
6654 	if (error != B_OK)
6655 		return error;
6656 
6657 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6658 	if (area == NULL)
6659 		return B_NO_MEMORY;
6660 
6661 
6662 	uint32 protection = area->protection;
6663 	if (area->page_protections != NULL)
6664 		protection = get_area_page_protection(area, (addr_t)address);
6665 
6666 	uint32 wiring = area->wiring;
6667 
6668 	locker.Unlock();
6669 
6670 	error = user_memcpy(_protected, &protection, sizeof(protection));
6671 	if (error != B_OK)
6672 		return error;
6673 
6674 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6675 
6676 	return error;
6677 }
6678 
6679 
6680 // #pragma mark -- compatibility
6681 
6682 
6683 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6684 
6685 
6686 struct physical_entry_beos {
6687 	uint32	address;
6688 	uint32	size;
6689 };
6690 
6691 
6692 /*!	The physical_entry structure has changed. We need to translate it to the
6693 	old one.
6694 */
6695 extern "C" int32
6696 __get_memory_map_beos(const void* _address, size_t numBytes,
6697 	physical_entry_beos* table, int32 numEntries)
6698 {
6699 	if (numEntries <= 0)
6700 		return B_BAD_VALUE;
6701 
6702 	const uint8* address = (const uint8*)_address;
6703 
6704 	int32 count = 0;
6705 	while (numBytes > 0 && count < numEntries) {
6706 		physical_entry entry;
6707 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6708 		if (result < 0) {
6709 			if (result != B_BUFFER_OVERFLOW)
6710 				return result;
6711 		}
6712 
6713 		if (entry.address >= (phys_addr_t)1 << 32) {
6714 			panic("get_memory_map(): Address is greater 4 GB!");
6715 			return B_ERROR;
6716 		}
6717 
6718 		table[count].address = entry.address;
6719 		table[count++].size = entry.size;
6720 
6721 		address += entry.size;
6722 		numBytes -= entry.size;
6723 	}
6724 
6725 	// null-terminate the table, if possible
6726 	if (count < numEntries) {
6727 		table[count].address = 0;
6728 		table[count].size = 0;
6729 	}
6730 
6731 	return B_OK;
6732 }
6733 
6734 
6735 /*!	The type of the \a physicalAddress parameter has changed from void* to
6736 	phys_addr_t.
6737 */
6738 extern "C" area_id
6739 __map_physical_memory_beos(const char* name, void* physicalAddress,
6740 	size_t numBytes, uint32 addressSpec, uint32 protection,
6741 	void** _virtualAddress)
6742 {
6743 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6744 		addressSpec, protection, _virtualAddress);
6745 }
6746 
6747 
6748 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6749 	we meddle with the \a lock parameter to force 32 bit.
6750 */
6751 extern "C" area_id
6752 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6753 	size_t size, uint32 lock, uint32 protection)
6754 {
6755 	switch (lock) {
6756 		case B_NO_LOCK:
6757 			break;
6758 		case B_FULL_LOCK:
6759 		case B_LAZY_LOCK:
6760 			lock = B_32_BIT_FULL_LOCK;
6761 			break;
6762 		case B_CONTIGUOUS:
6763 			lock = B_32_BIT_CONTIGUOUS;
6764 			break;
6765 	}
6766 
6767 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6768 		protection);
6769 }
6770 
6771 
6772 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6773 	"BASE");
6774 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6775 	"map_physical_memory@", "BASE");
6776 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6777 	"BASE");
6778 
6779 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6780 	"get_memory_map@@", "1_ALPHA3");
6781 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6782 	"map_physical_memory@@", "1_ALPHA3");
6783 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6784 	"1_ALPHA3");
6785 
6786 
6787 #else
6788 
6789 
6790 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6791 	"get_memory_map@@", "BASE");
6792 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6793 	"map_physical_memory@@", "BASE");
6794 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6795 	"BASE");
6796 
6797 
6798 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6799