xref: /haiku/src/system/kernel/vm/vm.cpp (revision 60a6f1d5d7a8715cd3897dd0b626f2e4a64984a8)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, kernel, &secondArea,
811 			NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 /*! You need to hold the lock of the cache and the write lock of the address
869 	space when calling this function.
870 	Note, that in case of error your cache will be temporarily unlocked.
871 	If \a addressSpec is \c B_EXACT_ADDRESS and the
872 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
873 	that no part of the specified address range (base \c *_virtualAddress, size
874 	\a size) is wired.
875 */
876 static status_t
877 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
878 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
879 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
880 	bool kernel, VMArea** _area, void** _virtualAddress)
881 {
882 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
883 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
884 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
885 		addressRestrictions->address, offset, size,
886 		addressRestrictions->address_specification, wiring, protection,
887 		_area, areaName));
888 	cache->AssertLocked();
889 
890 	if (size == 0) {
891 #if KDEBUG
892 		panic("map_backing_store(): called with size=0 for area '%s'!",
893 			areaName);
894 #endif
895 		return B_BAD_VALUE;
896 	}
897 
898 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
899 		| HEAP_DONT_LOCK_KERNEL_SPACE;
900 	int priority;
901 	if (addressSpace != VMAddressSpace::Kernel()) {
902 		priority = VM_PRIORITY_USER;
903 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
904 		priority = VM_PRIORITY_VIP;
905 		allocationFlags |= HEAP_PRIORITY_VIP;
906 	} else
907 		priority = VM_PRIORITY_SYSTEM;
908 
909 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
910 		allocationFlags);
911 	if (area == NULL)
912 		return B_NO_MEMORY;
913 
914 	status_t status;
915 
916 	// if this is a private map, we need to create a new cache
917 	// to handle the private copies of pages as they are written to
918 	VMCache* sourceCache = cache;
919 	if (mapping == REGION_PRIVATE_MAP) {
920 		VMCache* newCache;
921 
922 		// create an anonymous cache
923 		status = VMCacheFactory::CreateAnonymousCache(newCache,
924 			(protection & B_STACK_AREA) != 0
925 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
926 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
927 		if (status != B_OK)
928 			goto err1;
929 
930 		newCache->Lock();
931 		newCache->temporary = 1;
932 		newCache->virtual_base = offset;
933 		newCache->virtual_end = offset + size;
934 
935 		cache->AddConsumer(newCache);
936 
937 		cache = newCache;
938 	}
939 
940 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
941 		status = cache->SetMinimalCommitment(size, priority);
942 		if (status != B_OK)
943 			goto err2;
944 	}
945 
946 	// check to see if this address space has entered DELETE state
947 	if (addressSpace->IsBeingDeleted()) {
948 		// okay, someone is trying to delete this address space now, so we can't
949 		// insert the area, so back out
950 		status = B_BAD_TEAM_ID;
951 		goto err2;
952 	}
953 
954 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
955 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
956 		status = unmap_address_range(addressSpace,
957 			(addr_t)addressRestrictions->address, size, kernel);
958 		if (status != B_OK)
959 			goto err2;
960 	}
961 
962 	status = addressSpace->InsertArea(area, size, addressRestrictions,
963 		allocationFlags, _virtualAddress);
964 	if (status == B_NO_MEMORY
965 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
966 		// TODO: At present, there is no way to notify the low_resource monitor
967 		// that kernel addresss space is fragmented, nor does it check for this
968 		// automatically. Due to how many locks are held, we cannot wait here
969 		// for space to be freed up, but it would be good to at least notify
970 		// that we tried and failed to allocate some amount.
971 	}
972 	if (status != B_OK)
973 		goto err2;
974 
975 	// attach the cache to the area
976 	area->cache = cache;
977 	area->cache_offset = offset;
978 
979 	// point the cache back to the area
980 	cache->InsertAreaLocked(area);
981 	if (mapping == REGION_PRIVATE_MAP)
982 		cache->Unlock();
983 
984 	// insert the area in the global area hash table
985 	VMAreaHash::Insert(area);
986 
987 	// grab a ref to the address space (the area holds this)
988 	addressSpace->Get();
989 
990 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
991 //		cache, sourceCache, areaName, area);
992 
993 	*_area = area;
994 	return B_OK;
995 
996 err2:
997 	if (mapping == REGION_PRIVATE_MAP) {
998 		// We created this cache, so we must delete it again. Note, that we
999 		// need to temporarily unlock the source cache or we'll otherwise
1000 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1001 		sourceCache->Unlock();
1002 		cache->ReleaseRefAndUnlock();
1003 		sourceCache->Lock();
1004 	}
1005 err1:
1006 	addressSpace->DeleteArea(area, allocationFlags);
1007 	return status;
1008 }
1009 
1010 
1011 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1012 	  locker1, locker2).
1013 */
1014 template<typename LockerType1, typename LockerType2>
1015 static inline bool
1016 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1017 {
1018 	area->cache->AssertLocked();
1019 
1020 	VMAreaUnwiredWaiter waiter;
1021 	if (!area->AddWaiterIfWired(&waiter))
1022 		return false;
1023 
1024 	// unlock everything and wait
1025 	if (locker1 != NULL)
1026 		locker1->Unlock();
1027 	if (locker2 != NULL)
1028 		locker2->Unlock();
1029 
1030 	waiter.waitEntry.Wait();
1031 
1032 	return true;
1033 }
1034 
1035 
1036 /*!	Checks whether the given area has any wired ranges intersecting with the
1037 	specified range and waits, if so.
1038 
1039 	When it has to wait, the function calls \c Unlock() on both \a locker1
1040 	and \a locker2, if given.
1041 	The area's top cache must be locked and must be unlocked as a side effect
1042 	of calling \c Unlock() on either \a locker1 or \a locker2.
1043 
1044 	If the function does not have to wait it does not modify or unlock any
1045 	object.
1046 
1047 	\param area The area to be checked.
1048 	\param base The base address of the range to check.
1049 	\param size The size of the address range to check.
1050 	\param locker1 An object to be unlocked when before starting to wait (may
1051 		be \c NULL).
1052 	\param locker2 An object to be unlocked when before starting to wait (may
1053 		be \c NULL).
1054 	\return \c true, if the function had to wait, \c false otherwise.
1055 */
1056 template<typename LockerType1, typename LockerType2>
1057 static inline bool
1058 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1059 	LockerType1* locker1, LockerType2* locker2)
1060 {
1061 	area->cache->AssertLocked();
1062 
1063 	VMAreaUnwiredWaiter waiter;
1064 	if (!area->AddWaiterIfWired(&waiter, base, size))
1065 		return false;
1066 
1067 	// unlock everything and wait
1068 	if (locker1 != NULL)
1069 		locker1->Unlock();
1070 	if (locker2 != NULL)
1071 		locker2->Unlock();
1072 
1073 	waiter.waitEntry.Wait();
1074 
1075 	return true;
1076 }
1077 
1078 
1079 /*!	Checks whether the given address space has any wired ranges intersecting
1080 	with the specified range and waits, if so.
1081 
1082 	Similar to wait_if_area_range_is_wired(), with the following differences:
1083 	- All areas intersecting with the range are checked (respectively all until
1084 	  one is found that contains a wired range intersecting with the given
1085 	  range).
1086 	- The given address space must at least be read-locked and must be unlocked
1087 	  when \c Unlock() is called on \a locker.
1088 	- None of the areas' caches are allowed to be locked.
1089 */
1090 template<typename LockerType>
1091 static inline bool
1092 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1093 	size_t size, LockerType* locker)
1094 {
1095 	for (VMAddressSpace::AreaRangeIterator it
1096 		= addressSpace->GetAreaRangeIterator(base, size);
1097 			VMArea* area = it.Next();) {
1098 
1099 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1100 
1101 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1102 			return true;
1103 	}
1104 
1105 	return false;
1106 }
1107 
1108 
1109 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1110 	It must be called in a situation where the kernel address space may be
1111 	locked.
1112 */
1113 status_t
1114 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1115 {
1116 	AddressSpaceReadLocker locker;
1117 	VMArea* area;
1118 	status_t status = locker.SetFromArea(id, area);
1119 	if (status != B_OK)
1120 		return status;
1121 
1122 	if (area->page_protections == NULL) {
1123 		status = allocate_area_page_protections(area);
1124 		if (status != B_OK)
1125 			return status;
1126 	}
1127 
1128 	*cookie = (void*)area;
1129 	return B_OK;
1130 }
1131 
1132 
1133 /*!	This is a debug helper function that can only be used with very specific
1134 	use cases.
1135 	Sets protection for the given address range to the protection specified.
1136 	If \a protection is 0 then the involved pages will be marked non-present
1137 	in the translation map to cause a fault on access. The pages aren't
1138 	actually unmapped however so that they can be marked present again with
1139 	additional calls to this function. For this to work the area must be
1140 	fully locked in memory so that the pages aren't otherwise touched.
1141 	This function does not lock the kernel address space and needs to be
1142 	supplied with a \a cookie retrieved from a successful call to
1143 	vm_prepare_kernel_area_debug_protection().
1144 */
1145 status_t
1146 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1147 	uint32 protection)
1148 {
1149 	// check address range
1150 	addr_t address = (addr_t)_address;
1151 	size = PAGE_ALIGN(size);
1152 
1153 	if ((address % B_PAGE_SIZE) != 0
1154 		|| (addr_t)address + size < (addr_t)address
1155 		|| !IS_KERNEL_ADDRESS(address)
1156 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1157 		return B_BAD_VALUE;
1158 	}
1159 
1160 	// Translate the kernel protection to user protection as we only store that.
1161 	if ((protection & B_KERNEL_READ_AREA) != 0)
1162 		protection |= B_READ_AREA;
1163 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1164 		protection |= B_WRITE_AREA;
1165 
1166 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1167 	VMTranslationMap* map = addressSpace->TranslationMap();
1168 	VMArea* area = (VMArea*)cookie;
1169 
1170 	addr_t offset = address - area->Base();
1171 	if (area->Size() - offset < size) {
1172 		panic("protect range not fully within supplied area");
1173 		return B_BAD_VALUE;
1174 	}
1175 
1176 	if (area->page_protections == NULL) {
1177 		panic("area has no page protections");
1178 		return B_BAD_VALUE;
1179 	}
1180 
1181 	// Invalidate the mapping entries so any access to them will fault or
1182 	// restore the mapping entries unchanged so that lookup will success again.
1183 	map->Lock();
1184 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1185 	map->Unlock();
1186 
1187 	// And set the proper page protections so that the fault case will actually
1188 	// fail and not simply try to map a new page.
1189 	for (addr_t pageAddress = address; pageAddress < address + size;
1190 			pageAddress += B_PAGE_SIZE) {
1191 		set_area_page_protection(area, pageAddress, protection);
1192 	}
1193 
1194 	return B_OK;
1195 }
1196 
1197 
1198 status_t
1199 vm_block_address_range(const char* name, void* address, addr_t size)
1200 {
1201 	if (!arch_vm_supports_protection(0))
1202 		return B_NOT_SUPPORTED;
1203 
1204 	AddressSpaceWriteLocker locker;
1205 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1206 	if (status != B_OK)
1207 		return status;
1208 
1209 	VMAddressSpace* addressSpace = locker.AddressSpace();
1210 
1211 	// create an anonymous cache
1212 	VMCache* cache;
1213 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1214 		VM_PRIORITY_SYSTEM);
1215 	if (status != B_OK)
1216 		return status;
1217 
1218 	cache->temporary = 1;
1219 	cache->virtual_end = size;
1220 	cache->Lock();
1221 
1222 	VMArea* area;
1223 	virtual_address_restrictions addressRestrictions = {};
1224 	addressRestrictions.address = address;
1225 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1226 	status = map_backing_store(addressSpace, cache, 0, name, size,
1227 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1228 		true, &area, NULL);
1229 	if (status != B_OK) {
1230 		cache->ReleaseRefAndUnlock();
1231 		return status;
1232 	}
1233 
1234 	cache->Unlock();
1235 	area->cache_type = CACHE_TYPE_RAM;
1236 	return area->id;
1237 }
1238 
1239 
1240 status_t
1241 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1242 {
1243 	AddressSpaceWriteLocker locker(team);
1244 	if (!locker.IsLocked())
1245 		return B_BAD_TEAM_ID;
1246 
1247 	VMAddressSpace* addressSpace = locker.AddressSpace();
1248 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1249 		addressSpace == VMAddressSpace::Kernel()
1250 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1251 }
1252 
1253 
1254 status_t
1255 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1256 	addr_t size, uint32 flags)
1257 {
1258 	if (size == 0)
1259 		return B_BAD_VALUE;
1260 
1261 	AddressSpaceWriteLocker locker(team);
1262 	if (!locker.IsLocked())
1263 		return B_BAD_TEAM_ID;
1264 
1265 	virtual_address_restrictions addressRestrictions = {};
1266 	addressRestrictions.address = *_address;
1267 	addressRestrictions.address_specification = addressSpec;
1268 	VMAddressSpace* addressSpace = locker.AddressSpace();
1269 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1270 		addressSpace == VMAddressSpace::Kernel()
1271 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1272 		_address);
1273 }
1274 
1275 
1276 area_id
1277 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1278 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1279 	const virtual_address_restrictions* virtualAddressRestrictions,
1280 	const physical_address_restrictions* physicalAddressRestrictions,
1281 	bool kernel, void** _address)
1282 {
1283 	VMArea* area;
1284 	VMCache* cache;
1285 	vm_page* page = NULL;
1286 	bool isStack = (protection & B_STACK_AREA) != 0;
1287 	page_num_t guardPages;
1288 	bool canOvercommit = false;
1289 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1290 		? VM_PAGE_ALLOC_CLEAR : 0;
1291 
1292 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1293 		team, name, size));
1294 
1295 	size = PAGE_ALIGN(size);
1296 	guardSize = PAGE_ALIGN(guardSize);
1297 	guardPages = guardSize / B_PAGE_SIZE;
1298 
1299 	if (size == 0 || size < guardSize)
1300 		return B_BAD_VALUE;
1301 	if (!arch_vm_supports_protection(protection))
1302 		return B_NOT_SUPPORTED;
1303 
1304 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1305 		canOvercommit = true;
1306 
1307 #ifdef DEBUG_KERNEL_STACKS
1308 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1309 		isStack = true;
1310 #endif
1311 
1312 	// check parameters
1313 	switch (virtualAddressRestrictions->address_specification) {
1314 		case B_ANY_ADDRESS:
1315 		case B_EXACT_ADDRESS:
1316 		case B_BASE_ADDRESS:
1317 		case B_ANY_KERNEL_ADDRESS:
1318 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1319 		case B_RANDOMIZED_ANY_ADDRESS:
1320 		case B_RANDOMIZED_BASE_ADDRESS:
1321 			break;
1322 
1323 		default:
1324 			return B_BAD_VALUE;
1325 	}
1326 
1327 	// If low or high physical address restrictions are given, we force
1328 	// B_CONTIGUOUS wiring, since only then we'll use
1329 	// vm_page_allocate_page_run() which deals with those restrictions.
1330 	if (physicalAddressRestrictions->low_address != 0
1331 		|| physicalAddressRestrictions->high_address != 0) {
1332 		wiring = B_CONTIGUOUS;
1333 	}
1334 
1335 	physical_address_restrictions stackPhysicalRestrictions;
1336 	bool doReserveMemory = false;
1337 	switch (wiring) {
1338 		case B_NO_LOCK:
1339 			break;
1340 		case B_FULL_LOCK:
1341 		case B_LAZY_LOCK:
1342 		case B_CONTIGUOUS:
1343 			doReserveMemory = true;
1344 			break;
1345 		case B_ALREADY_WIRED:
1346 			break;
1347 		case B_LOMEM:
1348 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1349 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1350 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1351 			wiring = B_CONTIGUOUS;
1352 			doReserveMemory = true;
1353 			break;
1354 		case B_32_BIT_FULL_LOCK:
1355 			if (B_HAIKU_PHYSICAL_BITS <= 32
1356 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1357 				wiring = B_FULL_LOCK;
1358 				doReserveMemory = true;
1359 				break;
1360 			}
1361 			// TODO: We don't really support this mode efficiently. Just fall
1362 			// through for now ...
1363 		case B_32_BIT_CONTIGUOUS:
1364 			#if B_HAIKU_PHYSICAL_BITS > 32
1365 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1366 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1367 					stackPhysicalRestrictions.high_address
1368 						= (phys_addr_t)1 << 32;
1369 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1370 				}
1371 			#endif
1372 			wiring = B_CONTIGUOUS;
1373 			doReserveMemory = true;
1374 			break;
1375 		default:
1376 			return B_BAD_VALUE;
1377 	}
1378 
1379 	// Optimization: For a single-page contiguous allocation without low/high
1380 	// memory restriction B_FULL_LOCK wiring suffices.
1381 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1382 		&& physicalAddressRestrictions->low_address == 0
1383 		&& physicalAddressRestrictions->high_address == 0) {
1384 		wiring = B_FULL_LOCK;
1385 	}
1386 
1387 	// For full lock or contiguous areas we're also going to map the pages and
1388 	// thus need to reserve pages for the mapping backend upfront.
1389 	addr_t reservedMapPages = 0;
1390 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1391 		AddressSpaceWriteLocker locker;
1392 		status_t status = locker.SetTo(team);
1393 		if (status != B_OK)
1394 			return status;
1395 
1396 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1397 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1398 	}
1399 
1400 	int priority;
1401 	if (team != VMAddressSpace::KernelID())
1402 		priority = VM_PRIORITY_USER;
1403 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1404 		priority = VM_PRIORITY_VIP;
1405 	else
1406 		priority = VM_PRIORITY_SYSTEM;
1407 
1408 	// Reserve memory before acquiring the address space lock. This reduces the
1409 	// chances of failure, since while holding the write lock to the address
1410 	// space (if it is the kernel address space that is), the low memory handler
1411 	// won't be able to free anything for us.
1412 	addr_t reservedMemory = 0;
1413 	if (doReserveMemory) {
1414 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1415 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1416 			return B_NO_MEMORY;
1417 		reservedMemory = size;
1418 		// TODO: We don't reserve the memory for the pages for the page
1419 		// directories/tables. We actually need to do since we currently don't
1420 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1421 		// there are actually less physical pages than there should be, which
1422 		// can get the VM into trouble in low memory situations.
1423 	}
1424 
1425 	AddressSpaceWriteLocker locker;
1426 	VMAddressSpace* addressSpace;
1427 	status_t status;
1428 
1429 	// For full lock areas reserve the pages before locking the address
1430 	// space. E.g. block caches can't release their memory while we hold the
1431 	// address space lock.
1432 	page_num_t reservedPages = reservedMapPages;
1433 	if (wiring == B_FULL_LOCK)
1434 		reservedPages += size / B_PAGE_SIZE;
1435 
1436 	vm_page_reservation reservation;
1437 	if (reservedPages > 0) {
1438 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1439 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1440 					priority)) {
1441 				reservedPages = 0;
1442 				status = B_WOULD_BLOCK;
1443 				goto err0;
1444 			}
1445 		} else
1446 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1447 	}
1448 
1449 	if (wiring == B_CONTIGUOUS) {
1450 		// we try to allocate the page run here upfront as this may easily
1451 		// fail for obvious reasons
1452 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1453 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1454 		if (page == NULL) {
1455 			status = B_NO_MEMORY;
1456 			goto err0;
1457 		}
1458 	}
1459 
1460 	// Lock the address space and, if B_EXACT_ADDRESS and
1461 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1462 	// is not wired.
1463 	do {
1464 		status = locker.SetTo(team);
1465 		if (status != B_OK)
1466 			goto err1;
1467 
1468 		addressSpace = locker.AddressSpace();
1469 	} while (virtualAddressRestrictions->address_specification
1470 			== B_EXACT_ADDRESS
1471 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1472 		&& wait_if_address_range_is_wired(addressSpace,
1473 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1474 
1475 	// create an anonymous cache
1476 	// if it's a stack, make sure that two pages are available at least
1477 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1478 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1479 		wiring == B_NO_LOCK, priority);
1480 	if (status != B_OK)
1481 		goto err1;
1482 
1483 	cache->temporary = 1;
1484 	cache->virtual_end = size;
1485 	cache->committed_size = reservedMemory;
1486 		// TODO: This should be done via a method.
1487 	reservedMemory = 0;
1488 
1489 	cache->Lock();
1490 
1491 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1492 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1493 		kernel, &area, _address);
1494 
1495 	if (status != B_OK) {
1496 		cache->ReleaseRefAndUnlock();
1497 		goto err1;
1498 	}
1499 
1500 	locker.DegradeToReadLock();
1501 
1502 	switch (wiring) {
1503 		case B_NO_LOCK:
1504 		case B_LAZY_LOCK:
1505 			// do nothing - the pages are mapped in as needed
1506 			break;
1507 
1508 		case B_FULL_LOCK:
1509 		{
1510 			// Allocate and map all pages for this area
1511 
1512 			off_t offset = 0;
1513 			for (addr_t address = area->Base();
1514 					address < area->Base() + (area->Size() - 1);
1515 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1516 #ifdef DEBUG_KERNEL_STACKS
1517 #	ifdef STACK_GROWS_DOWNWARDS
1518 				if (isStack && address < area->Base()
1519 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1520 #	else
1521 				if (isStack && address >= area->Base() + area->Size()
1522 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1523 #	endif
1524 					continue;
1525 #endif
1526 				vm_page* page = vm_page_allocate_page(&reservation,
1527 					PAGE_STATE_WIRED | pageAllocFlags);
1528 				cache->InsertPage(page, offset);
1529 				map_page(area, page, address, protection, &reservation);
1530 
1531 				DEBUG_PAGE_ACCESS_END(page);
1532 			}
1533 
1534 			break;
1535 		}
1536 
1537 		case B_ALREADY_WIRED:
1538 		{
1539 			// The pages should already be mapped. This is only really useful
1540 			// during boot time. Find the appropriate vm_page objects and stick
1541 			// them in the cache object.
1542 			VMTranslationMap* map = addressSpace->TranslationMap();
1543 			off_t offset = 0;
1544 
1545 			if (!gKernelStartup)
1546 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1547 
1548 			map->Lock();
1549 
1550 			for (addr_t virtualAddress = area->Base();
1551 					virtualAddress < area->Base() + (area->Size() - 1);
1552 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1553 				phys_addr_t physicalAddress;
1554 				uint32 flags;
1555 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1556 				if (status < B_OK) {
1557 					panic("looking up mapping failed for va 0x%lx\n",
1558 						virtualAddress);
1559 				}
1560 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1561 				if (page == NULL) {
1562 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1563 						"\n", physicalAddress);
1564 				}
1565 
1566 				DEBUG_PAGE_ACCESS_START(page);
1567 
1568 				cache->InsertPage(page, offset);
1569 				increment_page_wired_count(page);
1570 				vm_page_set_state(page, PAGE_STATE_WIRED);
1571 				page->busy = false;
1572 
1573 				DEBUG_PAGE_ACCESS_END(page);
1574 			}
1575 
1576 			map->Unlock();
1577 			break;
1578 		}
1579 
1580 		case B_CONTIGUOUS:
1581 		{
1582 			// We have already allocated our continuous pages run, so we can now
1583 			// just map them in the address space
1584 			VMTranslationMap* map = addressSpace->TranslationMap();
1585 			phys_addr_t physicalAddress
1586 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1587 			addr_t virtualAddress = area->Base();
1588 			off_t offset = 0;
1589 
1590 			map->Lock();
1591 
1592 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1593 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1594 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1595 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1596 				if (page == NULL)
1597 					panic("couldn't lookup physical page just allocated\n");
1598 
1599 				status = map->Map(virtualAddress, physicalAddress, protection,
1600 					area->MemoryType(), &reservation);
1601 				if (status < B_OK)
1602 					panic("couldn't map physical page in page run\n");
1603 
1604 				cache->InsertPage(page, offset);
1605 				increment_page_wired_count(page);
1606 
1607 				DEBUG_PAGE_ACCESS_END(page);
1608 			}
1609 
1610 			map->Unlock();
1611 			break;
1612 		}
1613 
1614 		default:
1615 			break;
1616 	}
1617 
1618 	cache->Unlock();
1619 
1620 	if (reservedPages > 0)
1621 		vm_page_unreserve_pages(&reservation);
1622 
1623 	TRACE(("vm_create_anonymous_area: done\n"));
1624 
1625 	area->cache_type = CACHE_TYPE_RAM;
1626 	return area->id;
1627 
1628 err1:
1629 	if (wiring == B_CONTIGUOUS) {
1630 		// we had reserved the area space upfront...
1631 		phys_addr_t pageNumber = page->physical_page_number;
1632 		int32 i;
1633 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1634 			page = vm_lookup_page(pageNumber);
1635 			if (page == NULL)
1636 				panic("couldn't lookup physical page just allocated\n");
1637 
1638 			vm_page_set_state(page, PAGE_STATE_FREE);
1639 		}
1640 	}
1641 
1642 err0:
1643 	if (reservedPages > 0)
1644 		vm_page_unreserve_pages(&reservation);
1645 	if (reservedMemory > 0)
1646 		vm_unreserve_memory(reservedMemory);
1647 
1648 	return status;
1649 }
1650 
1651 
1652 area_id
1653 vm_map_physical_memory(team_id team, const char* name, void** _address,
1654 	uint32 addressSpec, addr_t size, uint32 protection,
1655 	phys_addr_t physicalAddress, bool alreadyWired)
1656 {
1657 	VMArea* area;
1658 	VMCache* cache;
1659 	addr_t mapOffset;
1660 
1661 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1662 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1663 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1664 		addressSpec, size, protection, physicalAddress));
1665 
1666 	if (!arch_vm_supports_protection(protection))
1667 		return B_NOT_SUPPORTED;
1668 
1669 	AddressSpaceWriteLocker locker(team);
1670 	if (!locker.IsLocked())
1671 		return B_BAD_TEAM_ID;
1672 
1673 	// if the physical address is somewhat inside a page,
1674 	// move the actual area down to align on a page boundary
1675 	mapOffset = physicalAddress % B_PAGE_SIZE;
1676 	size += mapOffset;
1677 	physicalAddress -= mapOffset;
1678 
1679 	size = PAGE_ALIGN(size);
1680 
1681 	// create a device cache
1682 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1683 	if (status != B_OK)
1684 		return status;
1685 
1686 	cache->virtual_end = size;
1687 
1688 	cache->Lock();
1689 
1690 	virtual_address_restrictions addressRestrictions = {};
1691 	addressRestrictions.address = *_address;
1692 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1693 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1694 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1695 		true, &area, _address);
1696 
1697 	if (status < B_OK)
1698 		cache->ReleaseRefLocked();
1699 
1700 	cache->Unlock();
1701 
1702 	if (status == B_OK) {
1703 		// set requested memory type -- use uncached, if not given
1704 		uint32 memoryType = addressSpec & B_MTR_MASK;
1705 		if (memoryType == 0)
1706 			memoryType = B_MTR_UC;
1707 
1708 		area->SetMemoryType(memoryType);
1709 
1710 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1711 		if (status != B_OK)
1712 			delete_area(locker.AddressSpace(), area, false);
1713 	}
1714 
1715 	if (status != B_OK)
1716 		return status;
1717 
1718 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1719 
1720 	if (alreadyWired) {
1721 		// The area is already mapped, but possibly not with the right
1722 		// memory type.
1723 		map->Lock();
1724 		map->ProtectArea(area, area->protection);
1725 		map->Unlock();
1726 	} else {
1727 		// Map the area completely.
1728 
1729 		// reserve pages needed for the mapping
1730 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1731 			area->Base() + (size - 1));
1732 		vm_page_reservation reservation;
1733 		vm_page_reserve_pages(&reservation, reservePages,
1734 			team == VMAddressSpace::KernelID()
1735 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1736 
1737 		map->Lock();
1738 
1739 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1740 			map->Map(area->Base() + offset, physicalAddress + offset,
1741 				protection, area->MemoryType(), &reservation);
1742 		}
1743 
1744 		map->Unlock();
1745 
1746 		vm_page_unreserve_pages(&reservation);
1747 	}
1748 
1749 	// modify the pointer returned to be offset back into the new area
1750 	// the same way the physical address in was offset
1751 	*_address = (void*)((addr_t)*_address + mapOffset);
1752 
1753 	area->cache_type = CACHE_TYPE_DEVICE;
1754 	return area->id;
1755 }
1756 
1757 
1758 /*!	Don't use!
1759 	TODO: This function was introduced to map physical page vecs to
1760 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1761 	use a device cache and does not track vm_page::wired_count!
1762 */
1763 area_id
1764 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1765 	uint32 addressSpec, addr_t* _size, uint32 protection,
1766 	struct generic_io_vec* vecs, uint32 vecCount)
1767 {
1768 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1769 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1770 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1771 		addressSpec, _size, protection, vecs, vecCount));
1772 
1773 	if (!arch_vm_supports_protection(protection)
1774 		|| (addressSpec & B_MTR_MASK) != 0) {
1775 		return B_NOT_SUPPORTED;
1776 	}
1777 
1778 	AddressSpaceWriteLocker locker(team);
1779 	if (!locker.IsLocked())
1780 		return B_BAD_TEAM_ID;
1781 
1782 	if (vecCount == 0)
1783 		return B_BAD_VALUE;
1784 
1785 	addr_t size = 0;
1786 	for (uint32 i = 0; i < vecCount; i++) {
1787 		if (vecs[i].base % B_PAGE_SIZE != 0
1788 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1789 			return B_BAD_VALUE;
1790 		}
1791 
1792 		size += vecs[i].length;
1793 	}
1794 
1795 	// create a device cache
1796 	VMCache* cache;
1797 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1798 	if (result != B_OK)
1799 		return result;
1800 
1801 	cache->virtual_end = size;
1802 
1803 	cache->Lock();
1804 
1805 	VMArea* area;
1806 	virtual_address_restrictions addressRestrictions = {};
1807 	addressRestrictions.address = *_address;
1808 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1809 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1810 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1811 		&addressRestrictions, true, &area, _address);
1812 
1813 	if (result != B_OK)
1814 		cache->ReleaseRefLocked();
1815 
1816 	cache->Unlock();
1817 
1818 	if (result != B_OK)
1819 		return result;
1820 
1821 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1822 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1823 		area->Base() + (size - 1));
1824 
1825 	vm_page_reservation reservation;
1826 	vm_page_reserve_pages(&reservation, reservePages,
1827 			team == VMAddressSpace::KernelID()
1828 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1829 	map->Lock();
1830 
1831 	uint32 vecIndex = 0;
1832 	size_t vecOffset = 0;
1833 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1834 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1835 			vecOffset = 0;
1836 			vecIndex++;
1837 		}
1838 
1839 		if (vecIndex >= vecCount)
1840 			break;
1841 
1842 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1843 			protection, area->MemoryType(), &reservation);
1844 
1845 		vecOffset += B_PAGE_SIZE;
1846 	}
1847 
1848 	map->Unlock();
1849 	vm_page_unreserve_pages(&reservation);
1850 
1851 	if (_size != NULL)
1852 		*_size = size;
1853 
1854 	area->cache_type = CACHE_TYPE_DEVICE;
1855 	return area->id;
1856 }
1857 
1858 
1859 area_id
1860 vm_create_null_area(team_id team, const char* name, void** address,
1861 	uint32 addressSpec, addr_t size, uint32 flags)
1862 {
1863 	size = PAGE_ALIGN(size);
1864 
1865 	// Lock the address space and, if B_EXACT_ADDRESS and
1866 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1867 	// is not wired.
1868 	AddressSpaceWriteLocker locker;
1869 	do {
1870 		if (locker.SetTo(team) != B_OK)
1871 			return B_BAD_TEAM_ID;
1872 	} while (addressSpec == B_EXACT_ADDRESS
1873 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1874 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1875 			(addr_t)*address, size, &locker));
1876 
1877 	// create a null cache
1878 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1879 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1880 	VMCache* cache;
1881 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1882 	if (status != B_OK)
1883 		return status;
1884 
1885 	cache->temporary = 1;
1886 	cache->virtual_end = size;
1887 
1888 	cache->Lock();
1889 
1890 	VMArea* area;
1891 	virtual_address_restrictions addressRestrictions = {};
1892 	addressRestrictions.address = *address;
1893 	addressRestrictions.address_specification = addressSpec;
1894 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1895 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1896 		&addressRestrictions, true, &area, address);
1897 
1898 	if (status < B_OK) {
1899 		cache->ReleaseRefAndUnlock();
1900 		return status;
1901 	}
1902 
1903 	cache->Unlock();
1904 
1905 	area->cache_type = CACHE_TYPE_NULL;
1906 	return area->id;
1907 }
1908 
1909 
1910 /*!	Creates the vnode cache for the specified \a vnode.
1911 	The vnode has to be marked busy when calling this function.
1912 */
1913 status_t
1914 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1915 {
1916 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1917 }
1918 
1919 
1920 /*!	\a cache must be locked. The area's address space must be read-locked.
1921 */
1922 static void
1923 pre_map_area_pages(VMArea* area, VMCache* cache,
1924 	vm_page_reservation* reservation)
1925 {
1926 	addr_t baseAddress = area->Base();
1927 	addr_t cacheOffset = area->cache_offset;
1928 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1929 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1930 
1931 	for (VMCachePagesTree::Iterator it
1932 				= cache->pages.GetIterator(firstPage, true, true);
1933 			vm_page* page = it.Next();) {
1934 		if (page->cache_offset >= endPage)
1935 			break;
1936 
1937 		// skip busy and inactive pages
1938 		if (page->busy || page->usage_count == 0)
1939 			continue;
1940 
1941 		DEBUG_PAGE_ACCESS_START(page);
1942 		map_page(area, page,
1943 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1944 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1945 		DEBUG_PAGE_ACCESS_END(page);
1946 	}
1947 }
1948 
1949 
1950 /*!	Will map the file specified by \a fd to an area in memory.
1951 	The file will be mirrored beginning at the specified \a offset. The
1952 	\a offset and \a size arguments have to be page aligned.
1953 */
1954 static area_id
1955 _vm_map_file(team_id team, const char* name, void** _address,
1956 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1957 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1958 {
1959 	// TODO: for binary files, we want to make sure that they get the
1960 	//	copy of a file at a given time, ie. later changes should not
1961 	//	make it into the mapped copy -- this will need quite some changes
1962 	//	to be done in a nice way
1963 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1964 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1965 
1966 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1967 	size = PAGE_ALIGN(size);
1968 
1969 	if (mapping == REGION_NO_PRIVATE_MAP)
1970 		protection |= B_SHARED_AREA;
1971 	if (addressSpec != B_EXACT_ADDRESS)
1972 		unmapAddressRange = false;
1973 
1974 	if (fd < 0) {
1975 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1976 		virtual_address_restrictions virtualRestrictions = {};
1977 		virtualRestrictions.address = *_address;
1978 		virtualRestrictions.address_specification = addressSpec;
1979 		physical_address_restrictions physicalRestrictions = {};
1980 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1981 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1982 			_address);
1983 	}
1984 
1985 	// get the open flags of the FD
1986 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1987 	if (descriptor == NULL)
1988 		return EBADF;
1989 	int32 openMode = descriptor->open_mode;
1990 	put_fd(descriptor);
1991 
1992 	// The FD must open for reading at any rate. For shared mapping with write
1993 	// access, additionally the FD must be open for writing.
1994 	if ((openMode & O_ACCMODE) == O_WRONLY
1995 		|| (mapping == REGION_NO_PRIVATE_MAP
1996 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1997 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1998 		return EACCES;
1999 	}
2000 
2001 	// get the vnode for the object, this also grabs a ref to it
2002 	struct vnode* vnode = NULL;
2003 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2004 	if (status < B_OK)
2005 		return status;
2006 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
2007 
2008 	// If we're going to pre-map pages, we need to reserve the pages needed by
2009 	// the mapping backend upfront.
2010 	page_num_t reservedPreMapPages = 0;
2011 	vm_page_reservation reservation;
2012 	if ((protection & B_READ_AREA) != 0) {
2013 		AddressSpaceWriteLocker locker;
2014 		status = locker.SetTo(team);
2015 		if (status != B_OK)
2016 			return status;
2017 
2018 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2019 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2020 
2021 		locker.Unlock();
2022 
2023 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2024 			team == VMAddressSpace::KernelID()
2025 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2026 	}
2027 
2028 	struct PageUnreserver {
2029 		PageUnreserver(vm_page_reservation* reservation)
2030 			:
2031 			fReservation(reservation)
2032 		{
2033 		}
2034 
2035 		~PageUnreserver()
2036 		{
2037 			if (fReservation != NULL)
2038 				vm_page_unreserve_pages(fReservation);
2039 		}
2040 
2041 		vm_page_reservation* fReservation;
2042 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2043 
2044 	// Lock the address space and, if the specified address range shall be
2045 	// unmapped, ensure it is not wired.
2046 	AddressSpaceWriteLocker locker;
2047 	do {
2048 		if (locker.SetTo(team) != B_OK)
2049 			return B_BAD_TEAM_ID;
2050 	} while (unmapAddressRange
2051 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2052 			(addr_t)*_address, size, &locker));
2053 
2054 	// TODO: this only works for file systems that use the file cache
2055 	VMCache* cache;
2056 	status = vfs_get_vnode_cache(vnode, &cache, false);
2057 	if (status < B_OK)
2058 		return status;
2059 
2060 	cache->Lock();
2061 
2062 	VMArea* area;
2063 	virtual_address_restrictions addressRestrictions = {};
2064 	addressRestrictions.address = *_address;
2065 	addressRestrictions.address_specification = addressSpec;
2066 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2067 		0, protection, mapping,
2068 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2069 		&addressRestrictions, kernel, &area, _address);
2070 
2071 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2072 		// map_backing_store() cannot know we no longer need the ref
2073 		cache->ReleaseRefLocked();
2074 	}
2075 
2076 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2077 		pre_map_area_pages(area, cache, &reservation);
2078 
2079 	cache->Unlock();
2080 
2081 	if (status == B_OK) {
2082 		// TODO: this probably deserves a smarter solution, ie. don't always
2083 		// prefetch stuff, and also, probably don't trigger it at this place.
2084 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2085 			// prefetches at max 10 MB starting from "offset"
2086 	}
2087 
2088 	if (status != B_OK)
2089 		return status;
2090 
2091 	area->cache_type = CACHE_TYPE_VNODE;
2092 	return area->id;
2093 }
2094 
2095 
2096 area_id
2097 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2098 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2099 	int fd, off_t offset)
2100 {
2101 	if (!arch_vm_supports_protection(protection))
2102 		return B_NOT_SUPPORTED;
2103 
2104 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2105 		mapping, unmapAddressRange, fd, offset, true);
2106 }
2107 
2108 
2109 VMCache*
2110 vm_area_get_locked_cache(VMArea* area)
2111 {
2112 	rw_lock_read_lock(&sAreaCacheLock);
2113 
2114 	while (true) {
2115 		VMCache* cache = area->cache;
2116 
2117 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2118 			// cache has been deleted
2119 			rw_lock_read_lock(&sAreaCacheLock);
2120 			continue;
2121 		}
2122 
2123 		rw_lock_read_lock(&sAreaCacheLock);
2124 
2125 		if (cache == area->cache) {
2126 			cache->AcquireRefLocked();
2127 			rw_lock_read_unlock(&sAreaCacheLock);
2128 			return cache;
2129 		}
2130 
2131 		// the cache changed in the meantime
2132 		cache->Unlock();
2133 	}
2134 }
2135 
2136 
2137 void
2138 vm_area_put_locked_cache(VMCache* cache)
2139 {
2140 	cache->ReleaseRefAndUnlock();
2141 }
2142 
2143 
2144 area_id
2145 vm_clone_area(team_id team, const char* name, void** address,
2146 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2147 	bool kernel)
2148 {
2149 	VMArea* newArea = NULL;
2150 	VMArea* sourceArea;
2151 
2152 	// Check whether the source area exists and is cloneable. If so, mark it
2153 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2154 	{
2155 		AddressSpaceWriteLocker locker;
2156 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2157 		if (status != B_OK)
2158 			return status;
2159 
2160 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2161 			return B_NOT_ALLOWED;
2162 
2163 		sourceArea->protection |= B_SHARED_AREA;
2164 		protection |= B_SHARED_AREA;
2165 	}
2166 
2167 	// Now lock both address spaces and actually do the cloning.
2168 
2169 	MultiAddressSpaceLocker locker;
2170 	VMAddressSpace* sourceAddressSpace;
2171 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2172 	if (status != B_OK)
2173 		return status;
2174 
2175 	VMAddressSpace* targetAddressSpace;
2176 	status = locker.AddTeam(team, true, &targetAddressSpace);
2177 	if (status != B_OK)
2178 		return status;
2179 
2180 	status = locker.Lock();
2181 	if (status != B_OK)
2182 		return status;
2183 
2184 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2185 	if (sourceArea == NULL)
2186 		return B_BAD_VALUE;
2187 
2188 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2189 		return B_NOT_ALLOWED;
2190 
2191 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2192 
2193 	if (!kernel && sourceAddressSpace != targetAddressSpace
2194 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2195 #if KDEBUG
2196 		Team* team = thread_get_current_thread()->team;
2197 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2198 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2199 #endif
2200 		status = B_NOT_ALLOWED;
2201 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2202 		status = B_NOT_ALLOWED;
2203 	} else {
2204 		virtual_address_restrictions addressRestrictions = {};
2205 		addressRestrictions.address = *address;
2206 		addressRestrictions.address_specification = addressSpec;
2207 		status = map_backing_store(targetAddressSpace, cache,
2208 			sourceArea->cache_offset, name, sourceArea->Size(),
2209 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2210 			kernel, &newArea, address);
2211 	}
2212 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2213 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2214 		// to create a new cache, and has therefore already acquired a reference
2215 		// to the source cache - but otherwise it has no idea that we need
2216 		// one.
2217 		cache->AcquireRefLocked();
2218 	}
2219 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2220 		// we need to map in everything at this point
2221 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2222 			// we don't have actual pages to map but a physical area
2223 			VMTranslationMap* map
2224 				= sourceArea->address_space->TranslationMap();
2225 			map->Lock();
2226 
2227 			phys_addr_t physicalAddress;
2228 			uint32 oldProtection;
2229 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2230 
2231 			map->Unlock();
2232 
2233 			map = targetAddressSpace->TranslationMap();
2234 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2235 				newArea->Base() + (newArea->Size() - 1));
2236 
2237 			vm_page_reservation reservation;
2238 			vm_page_reserve_pages(&reservation, reservePages,
2239 				targetAddressSpace == VMAddressSpace::Kernel()
2240 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2241 			map->Lock();
2242 
2243 			for (addr_t offset = 0; offset < newArea->Size();
2244 					offset += B_PAGE_SIZE) {
2245 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2246 					protection, newArea->MemoryType(), &reservation);
2247 			}
2248 
2249 			map->Unlock();
2250 			vm_page_unreserve_pages(&reservation);
2251 		} else {
2252 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2253 			size_t reservePages = map->MaxPagesNeededToMap(
2254 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2255 			vm_page_reservation reservation;
2256 			vm_page_reserve_pages(&reservation, reservePages,
2257 				targetAddressSpace == VMAddressSpace::Kernel()
2258 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2259 
2260 			// map in all pages from source
2261 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2262 					vm_page* page  = it.Next();) {
2263 				if (!page->busy) {
2264 					DEBUG_PAGE_ACCESS_START(page);
2265 					map_page(newArea, page,
2266 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2267 							- newArea->cache_offset),
2268 						protection, &reservation);
2269 					DEBUG_PAGE_ACCESS_END(page);
2270 				}
2271 			}
2272 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2273 			// ensuring that!
2274 
2275 			vm_page_unreserve_pages(&reservation);
2276 		}
2277 	}
2278 	if (status == B_OK)
2279 		newArea->cache_type = sourceArea->cache_type;
2280 
2281 	vm_area_put_locked_cache(cache);
2282 
2283 	if (status < B_OK)
2284 		return status;
2285 
2286 	return newArea->id;
2287 }
2288 
2289 
2290 /*!	Deletes the specified area of the given address space.
2291 
2292 	The address space must be write-locked.
2293 	The caller must ensure that the area does not have any wired ranges.
2294 
2295 	\param addressSpace The address space containing the area.
2296 	\param area The area to be deleted.
2297 	\param deletingAddressSpace \c true, if the address space is in the process
2298 		of being deleted.
2299 */
2300 static void
2301 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2302 	bool deletingAddressSpace)
2303 {
2304 	ASSERT(!area->IsWired());
2305 
2306 	VMAreaHash::Remove(area);
2307 
2308 	// At this point the area is removed from the global hash table, but
2309 	// still exists in the area list.
2310 
2311 	// Unmap the virtual address space the area occupied.
2312 	{
2313 		// We need to lock the complete cache chain.
2314 		VMCache* topCache = vm_area_get_locked_cache(area);
2315 		VMCacheChainLocker cacheChainLocker(topCache);
2316 		cacheChainLocker.LockAllSourceCaches();
2317 
2318 		// If the area's top cache is a temporary cache and the area is the only
2319 		// one referencing it (besides us currently holding a second reference),
2320 		// the unmapping code doesn't need to care about preserving the accessed
2321 		// and dirty flags of the top cache page mappings.
2322 		bool ignoreTopCachePageFlags
2323 			= topCache->temporary && topCache->RefCount() == 2;
2324 
2325 		area->address_space->TranslationMap()->UnmapArea(area,
2326 			deletingAddressSpace, ignoreTopCachePageFlags);
2327 	}
2328 
2329 	if (!area->cache->temporary)
2330 		area->cache->WriteModified();
2331 
2332 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2333 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2334 
2335 	arch_vm_unset_memory_type(area);
2336 	addressSpace->RemoveArea(area, allocationFlags);
2337 	addressSpace->Put();
2338 
2339 	area->cache->RemoveArea(area);
2340 	area->cache->ReleaseRef();
2341 
2342 	addressSpace->DeleteArea(area, allocationFlags);
2343 }
2344 
2345 
2346 status_t
2347 vm_delete_area(team_id team, area_id id, bool kernel)
2348 {
2349 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2350 		team, id));
2351 
2352 	// lock the address space and make sure the area isn't wired
2353 	AddressSpaceWriteLocker locker;
2354 	VMArea* area;
2355 	AreaCacheLocker cacheLocker;
2356 
2357 	do {
2358 		status_t status = locker.SetFromArea(team, id, area);
2359 		if (status != B_OK)
2360 			return status;
2361 
2362 		cacheLocker.SetTo(area);
2363 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2364 
2365 	cacheLocker.Unlock();
2366 
2367 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2368 		return B_NOT_ALLOWED;
2369 
2370 	delete_area(locker.AddressSpace(), area, false);
2371 	return B_OK;
2372 }
2373 
2374 
2375 /*!	Creates a new cache on top of given cache, moves all areas from
2376 	the old cache to the new one, and changes the protection of all affected
2377 	areas' pages to read-only. If requested, wired pages are moved up to the
2378 	new cache and copies are added to the old cache in their place.
2379 	Preconditions:
2380 	- The given cache must be locked.
2381 	- All of the cache's areas' address spaces must be read locked.
2382 	- Either the cache must not have any wired ranges or a page reservation for
2383 	  all wired pages must be provided, so they can be copied.
2384 
2385 	\param lowerCache The cache on top of which a new cache shall be created.
2386 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2387 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2388 		has wired page. The wired pages are copied in this case.
2389 */
2390 static status_t
2391 vm_copy_on_write_area(VMCache* lowerCache,
2392 	vm_page_reservation* wiredPagesReservation)
2393 {
2394 	VMCache* upperCache;
2395 
2396 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2397 
2398 	// We need to separate the cache from its areas. The cache goes one level
2399 	// deeper and we create a new cache inbetween.
2400 
2401 	// create an anonymous cache
2402 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2403 		lowerCache->GuardSize() / B_PAGE_SIZE,
2404 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2405 		VM_PRIORITY_USER);
2406 	if (status != B_OK)
2407 		return status;
2408 
2409 	upperCache->Lock();
2410 
2411 	upperCache->temporary = 1;
2412 	upperCache->virtual_base = lowerCache->virtual_base;
2413 	upperCache->virtual_end = lowerCache->virtual_end;
2414 
2415 	// transfer the lower cache areas to the upper cache
2416 	rw_lock_write_lock(&sAreaCacheLock);
2417 	upperCache->TransferAreas(lowerCache);
2418 	rw_lock_write_unlock(&sAreaCacheLock);
2419 
2420 	lowerCache->AddConsumer(upperCache);
2421 
2422 	// We now need to remap all pages from all of the cache's areas read-only,
2423 	// so that a copy will be created on next write access. If there are wired
2424 	// pages, we keep their protection, move them to the upper cache and create
2425 	// copies for the lower cache.
2426 	if (wiredPagesReservation != NULL) {
2427 		// We need to handle wired pages -- iterate through the cache's pages.
2428 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2429 				vm_page* page = it.Next();) {
2430 			if (page->WiredCount() > 0) {
2431 				// allocate a new page and copy the wired one
2432 				vm_page* copiedPage = vm_page_allocate_page(
2433 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2434 
2435 				vm_memcpy_physical_page(
2436 					copiedPage->physical_page_number * B_PAGE_SIZE,
2437 					page->physical_page_number * B_PAGE_SIZE);
2438 
2439 				// move the wired page to the upper cache (note: removing is OK
2440 				// with the SplayTree iterator) and insert the copy
2441 				upperCache->MovePage(page);
2442 				lowerCache->InsertPage(copiedPage,
2443 					page->cache_offset * B_PAGE_SIZE);
2444 
2445 				DEBUG_PAGE_ACCESS_END(copiedPage);
2446 			} else {
2447 				// Change the protection of this page in all areas.
2448 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2449 						tempArea = tempArea->cache_next) {
2450 					// The area must be readable in the same way it was
2451 					// previously writable.
2452 					uint32 protection = B_KERNEL_READ_AREA;
2453 					if ((tempArea->protection & B_READ_AREA) != 0)
2454 						protection |= B_READ_AREA;
2455 
2456 					VMTranslationMap* map
2457 						= tempArea->address_space->TranslationMap();
2458 					map->Lock();
2459 					map->ProtectPage(tempArea,
2460 						virtual_page_address(tempArea, page), protection);
2461 					map->Unlock();
2462 				}
2463 			}
2464 		}
2465 	} else {
2466 		ASSERT(lowerCache->WiredPagesCount() == 0);
2467 
2468 		// just change the protection of all areas
2469 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2470 				tempArea = tempArea->cache_next) {
2471 			// The area must be readable in the same way it was previously
2472 			// writable.
2473 			uint32 protection = B_KERNEL_READ_AREA;
2474 			if ((tempArea->protection & B_READ_AREA) != 0)
2475 				protection |= B_READ_AREA;
2476 
2477 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2478 			map->Lock();
2479 			map->ProtectArea(tempArea, protection);
2480 			map->Unlock();
2481 		}
2482 	}
2483 
2484 	vm_area_put_locked_cache(upperCache);
2485 
2486 	return B_OK;
2487 }
2488 
2489 
2490 area_id
2491 vm_copy_area(team_id team, const char* name, void** _address,
2492 	uint32 addressSpec, uint32 protection, area_id sourceID)
2493 {
2494 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2495 
2496 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2497 		// set the same protection for the kernel as for userland
2498 		protection |= B_KERNEL_READ_AREA;
2499 		if (writableCopy)
2500 			protection |= B_KERNEL_WRITE_AREA;
2501 	}
2502 
2503 	// Do the locking: target address space, all address spaces associated with
2504 	// the source cache, and the cache itself.
2505 	MultiAddressSpaceLocker locker;
2506 	VMAddressSpace* targetAddressSpace;
2507 	VMCache* cache;
2508 	VMArea* source;
2509 	AreaCacheLocker cacheLocker;
2510 	status_t status;
2511 	bool sharedArea;
2512 
2513 	page_num_t wiredPages = 0;
2514 	vm_page_reservation wiredPagesReservation;
2515 
2516 	bool restart;
2517 	do {
2518 		restart = false;
2519 
2520 		locker.Unset();
2521 		status = locker.AddTeam(team, true, &targetAddressSpace);
2522 		if (status == B_OK) {
2523 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2524 				&cache);
2525 		}
2526 		if (status != B_OK)
2527 			return status;
2528 
2529 		cacheLocker.SetTo(cache, true);	// already locked
2530 
2531 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2532 
2533 		page_num_t oldWiredPages = wiredPages;
2534 		wiredPages = 0;
2535 
2536 		// If the source area isn't shared, count the number of wired pages in
2537 		// the cache and reserve as many pages.
2538 		if (!sharedArea) {
2539 			wiredPages = cache->WiredPagesCount();
2540 
2541 			if (wiredPages > oldWiredPages) {
2542 				cacheLocker.Unlock();
2543 				locker.Unlock();
2544 
2545 				if (oldWiredPages > 0)
2546 					vm_page_unreserve_pages(&wiredPagesReservation);
2547 
2548 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2549 					VM_PRIORITY_USER);
2550 
2551 				restart = true;
2552 			}
2553 		} else if (oldWiredPages > 0)
2554 			vm_page_unreserve_pages(&wiredPagesReservation);
2555 	} while (restart);
2556 
2557 	// unreserve pages later
2558 	struct PagesUnreserver {
2559 		PagesUnreserver(vm_page_reservation* reservation)
2560 			:
2561 			fReservation(reservation)
2562 		{
2563 		}
2564 
2565 		~PagesUnreserver()
2566 		{
2567 			if (fReservation != NULL)
2568 				vm_page_unreserve_pages(fReservation);
2569 		}
2570 
2571 	private:
2572 		vm_page_reservation*	fReservation;
2573 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2574 
2575 	if (addressSpec == B_CLONE_ADDRESS) {
2576 		addressSpec = B_EXACT_ADDRESS;
2577 		*_address = (void*)source->Base();
2578 	}
2579 
2580 	// First, create a cache on top of the source area, respectively use the
2581 	// existing one, if this is a shared area.
2582 
2583 	VMArea* target;
2584 	virtual_address_restrictions addressRestrictions = {};
2585 	addressRestrictions.address = *_address;
2586 	addressRestrictions.address_specification = addressSpec;
2587 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2588 		name, source->Size(), source->wiring, protection,
2589 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2590 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2591 		&addressRestrictions, true, &target, _address);
2592 	if (status < B_OK)
2593 		return status;
2594 
2595 	if (sharedArea) {
2596 		// The new area uses the old area's cache, but map_backing_store()
2597 		// hasn't acquired a ref. So we have to do that now.
2598 		cache->AcquireRefLocked();
2599 	}
2600 
2601 	// If the source area is writable, we need to move it one layer up as well
2602 
2603 	if (!sharedArea) {
2604 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2605 			// TODO: do something more useful if this fails!
2606 			if (vm_copy_on_write_area(cache,
2607 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2608 				panic("vm_copy_on_write_area() failed!\n");
2609 			}
2610 		}
2611 	}
2612 
2613 	// we return the ID of the newly created area
2614 	return target->id;
2615 }
2616 
2617 
2618 status_t
2619 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2620 	bool kernel)
2621 {
2622 	fix_protection(&newProtection);
2623 
2624 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2625 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2626 
2627 	if (!arch_vm_supports_protection(newProtection))
2628 		return B_NOT_SUPPORTED;
2629 
2630 	bool becomesWritable
2631 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2632 
2633 	// lock address spaces and cache
2634 	MultiAddressSpaceLocker locker;
2635 	VMCache* cache;
2636 	VMArea* area;
2637 	status_t status;
2638 	AreaCacheLocker cacheLocker;
2639 	bool isWritable;
2640 
2641 	bool restart;
2642 	do {
2643 		restart = false;
2644 
2645 		locker.Unset();
2646 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2647 		if (status != B_OK)
2648 			return status;
2649 
2650 		cacheLocker.SetTo(cache, true);	// already locked
2651 
2652 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2653 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2654 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2655 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2656 				" (%s)\n", team, newProtection, areaID, area->name);
2657 			return B_NOT_ALLOWED;
2658 		}
2659 
2660 		if (area->protection == newProtection)
2661 			return B_OK;
2662 
2663 		if (team != VMAddressSpace::KernelID()
2664 			&& area->address_space->ID() != team) {
2665 			// unless you're the kernel, you are only allowed to set
2666 			// the protection of your own areas
2667 			return B_NOT_ALLOWED;
2668 		}
2669 
2670 		isWritable
2671 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2672 
2673 		// Make sure the area (respectively, if we're going to call
2674 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2675 		// wired ranges.
2676 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2677 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2678 					otherArea = otherArea->cache_next) {
2679 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2680 					restart = true;
2681 					break;
2682 				}
2683 			}
2684 		} else {
2685 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2686 				restart = true;
2687 		}
2688 	} while (restart);
2689 
2690 	bool changePageProtection = true;
2691 	bool changeTopCachePagesOnly = false;
2692 
2693 	if (isWritable && !becomesWritable) {
2694 		// writable -> !writable
2695 
2696 		if (cache->source != NULL && cache->temporary) {
2697 			if (cache->CountWritableAreas(area) == 0) {
2698 				// Since this cache now lives from the pages in its source cache,
2699 				// we can change the cache's commitment to take only those pages
2700 				// into account that really are in this cache.
2701 
2702 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2703 					team == VMAddressSpace::KernelID()
2704 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2705 
2706 				// TODO: we may be able to join with our source cache, if
2707 				// count == 0
2708 			}
2709 		}
2710 
2711 		// If only the writability changes, we can just remap the pages of the
2712 		// top cache, since the pages of lower caches are mapped read-only
2713 		// anyway. That's advantageous only, if the number of pages in the cache
2714 		// is significantly smaller than the number of pages in the area,
2715 		// though.
2716 		if (newProtection
2717 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2718 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2719 			changeTopCachePagesOnly = true;
2720 		}
2721 	} else if (!isWritable && becomesWritable) {
2722 		// !writable -> writable
2723 
2724 		if (!cache->consumers.IsEmpty()) {
2725 			// There are consumers -- we have to insert a new cache. Fortunately
2726 			// vm_copy_on_write_area() does everything that's needed.
2727 			changePageProtection = false;
2728 			status = vm_copy_on_write_area(cache, NULL);
2729 		} else {
2730 			// No consumers, so we don't need to insert a new one.
2731 			if (cache->source != NULL && cache->temporary) {
2732 				// the cache's commitment must contain all possible pages
2733 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2734 					team == VMAddressSpace::KernelID()
2735 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2736 			}
2737 
2738 			if (status == B_OK && cache->source != NULL) {
2739 				// There's a source cache, hence we can't just change all pages'
2740 				// protection or we might allow writing into pages belonging to
2741 				// a lower cache.
2742 				changeTopCachePagesOnly = true;
2743 			}
2744 		}
2745 	} else {
2746 		// we don't have anything special to do in all other cases
2747 	}
2748 
2749 	if (status == B_OK) {
2750 		// remap existing pages in this cache
2751 		if (changePageProtection) {
2752 			VMTranslationMap* map = area->address_space->TranslationMap();
2753 			map->Lock();
2754 
2755 			if (changeTopCachePagesOnly) {
2756 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2757 				page_num_t lastPageOffset
2758 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2759 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2760 						vm_page* page = it.Next();) {
2761 					if (page->cache_offset >= firstPageOffset
2762 						&& page->cache_offset <= lastPageOffset) {
2763 						addr_t address = virtual_page_address(area, page);
2764 						map->ProtectPage(area, address, newProtection);
2765 					}
2766 				}
2767 			} else
2768 				map->ProtectArea(area, newProtection);
2769 
2770 			map->Unlock();
2771 		}
2772 
2773 		area->protection = newProtection;
2774 	}
2775 
2776 	return status;
2777 }
2778 
2779 
2780 status_t
2781 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2782 {
2783 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2784 	if (addressSpace == NULL)
2785 		return B_BAD_TEAM_ID;
2786 
2787 	VMTranslationMap* map = addressSpace->TranslationMap();
2788 
2789 	map->Lock();
2790 	uint32 dummyFlags;
2791 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2792 	map->Unlock();
2793 
2794 	addressSpace->Put();
2795 	return status;
2796 }
2797 
2798 
2799 /*!	The page's cache must be locked.
2800 */
2801 bool
2802 vm_test_map_modification(vm_page* page)
2803 {
2804 	if (page->modified)
2805 		return true;
2806 
2807 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2808 	vm_page_mapping* mapping;
2809 	while ((mapping = iterator.Next()) != NULL) {
2810 		VMArea* area = mapping->area;
2811 		VMTranslationMap* map = area->address_space->TranslationMap();
2812 
2813 		phys_addr_t physicalAddress;
2814 		uint32 flags;
2815 		map->Lock();
2816 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2817 		map->Unlock();
2818 
2819 		if ((flags & PAGE_MODIFIED) != 0)
2820 			return true;
2821 	}
2822 
2823 	return false;
2824 }
2825 
2826 
2827 /*!	The page's cache must be locked.
2828 */
2829 void
2830 vm_clear_map_flags(vm_page* page, uint32 flags)
2831 {
2832 	if ((flags & PAGE_ACCESSED) != 0)
2833 		page->accessed = false;
2834 	if ((flags & PAGE_MODIFIED) != 0)
2835 		page->modified = false;
2836 
2837 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2838 	vm_page_mapping* mapping;
2839 	while ((mapping = iterator.Next()) != NULL) {
2840 		VMArea* area = mapping->area;
2841 		VMTranslationMap* map = area->address_space->TranslationMap();
2842 
2843 		map->Lock();
2844 		map->ClearFlags(virtual_page_address(area, page), flags);
2845 		map->Unlock();
2846 	}
2847 }
2848 
2849 
2850 /*!	Removes all mappings from a page.
2851 	After you've called this function, the page is unmapped from memory and
2852 	the page's \c accessed and \c modified flags have been updated according
2853 	to the state of the mappings.
2854 	The page's cache must be locked.
2855 */
2856 void
2857 vm_remove_all_page_mappings(vm_page* page)
2858 {
2859 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2860 		VMArea* area = mapping->area;
2861 		VMTranslationMap* map = area->address_space->TranslationMap();
2862 		addr_t address = virtual_page_address(area, page);
2863 		map->UnmapPage(area, address, false);
2864 	}
2865 }
2866 
2867 
2868 int32
2869 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2870 {
2871 	int32 count = 0;
2872 
2873 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2874 	vm_page_mapping* mapping;
2875 	while ((mapping = iterator.Next()) != NULL) {
2876 		VMArea* area = mapping->area;
2877 		VMTranslationMap* map = area->address_space->TranslationMap();
2878 
2879 		bool modified;
2880 		if (map->ClearAccessedAndModified(area,
2881 				virtual_page_address(area, page), false, modified)) {
2882 			count++;
2883 		}
2884 
2885 		page->modified |= modified;
2886 	}
2887 
2888 
2889 	if (page->accessed) {
2890 		count++;
2891 		page->accessed = false;
2892 	}
2893 
2894 	return count;
2895 }
2896 
2897 
2898 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2899 	mappings.
2900 	The function iterates through the page mappings and removes them until
2901 	encountering one that has been accessed. From then on it will continue to
2902 	iterate, but only clear the accessed flag of the mapping. The page's
2903 	\c modified bit will be updated accordingly, the \c accessed bit will be
2904 	cleared.
2905 	\return The number of mapping accessed bits encountered, including the
2906 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2907 		of the page have been removed.
2908 */
2909 int32
2910 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2911 {
2912 	ASSERT(page->WiredCount() == 0);
2913 
2914 	if (page->accessed)
2915 		return vm_clear_page_mapping_accessed_flags(page);
2916 
2917 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2918 		VMArea* area = mapping->area;
2919 		VMTranslationMap* map = area->address_space->TranslationMap();
2920 		addr_t address = virtual_page_address(area, page);
2921 		bool modified = false;
2922 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2923 			page->accessed = true;
2924 			page->modified |= modified;
2925 			return vm_clear_page_mapping_accessed_flags(page);
2926 		}
2927 		page->modified |= modified;
2928 	}
2929 
2930 	return 0;
2931 }
2932 
2933 
2934 static int
2935 display_mem(int argc, char** argv)
2936 {
2937 	bool physical = false;
2938 	addr_t copyAddress;
2939 	int32 displayWidth;
2940 	int32 itemSize;
2941 	int32 num = -1;
2942 	addr_t address;
2943 	int i = 1, j;
2944 
2945 	if (argc > 1 && argv[1][0] == '-') {
2946 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2947 			physical = true;
2948 			i++;
2949 		} else
2950 			i = 99;
2951 	}
2952 
2953 	if (argc < i + 1 || argc > i + 2) {
2954 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2955 			"\tdl - 8 bytes\n"
2956 			"\tdw - 4 bytes\n"
2957 			"\tds - 2 bytes\n"
2958 			"\tdb - 1 byte\n"
2959 			"\tstring - a whole string\n"
2960 			"  -p or --physical only allows memory from a single page to be "
2961 			"displayed.\n");
2962 		return 0;
2963 	}
2964 
2965 	address = parse_expression(argv[i]);
2966 
2967 	if (argc > i + 1)
2968 		num = parse_expression(argv[i + 1]);
2969 
2970 	// build the format string
2971 	if (strcmp(argv[0], "db") == 0) {
2972 		itemSize = 1;
2973 		displayWidth = 16;
2974 	} else if (strcmp(argv[0], "ds") == 0) {
2975 		itemSize = 2;
2976 		displayWidth = 8;
2977 	} else if (strcmp(argv[0], "dw") == 0) {
2978 		itemSize = 4;
2979 		displayWidth = 4;
2980 	} else if (strcmp(argv[0], "dl") == 0) {
2981 		itemSize = 8;
2982 		displayWidth = 2;
2983 	} else if (strcmp(argv[0], "string") == 0) {
2984 		itemSize = 1;
2985 		displayWidth = -1;
2986 	} else {
2987 		kprintf("display_mem called in an invalid way!\n");
2988 		return 0;
2989 	}
2990 
2991 	if (num <= 0)
2992 		num = displayWidth;
2993 
2994 	void* physicalPageHandle = NULL;
2995 
2996 	if (physical) {
2997 		int32 offset = address & (B_PAGE_SIZE - 1);
2998 		if (num * itemSize + offset > B_PAGE_SIZE) {
2999 			num = (B_PAGE_SIZE - offset) / itemSize;
3000 			kprintf("NOTE: number of bytes has been cut to page size\n");
3001 		}
3002 
3003 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3004 
3005 		if (vm_get_physical_page_debug(address, &copyAddress,
3006 				&physicalPageHandle) != B_OK) {
3007 			kprintf("getting the hardware page failed.");
3008 			return 0;
3009 		}
3010 
3011 		address += offset;
3012 		copyAddress += offset;
3013 	} else
3014 		copyAddress = address;
3015 
3016 	if (!strcmp(argv[0], "string")) {
3017 		kprintf("%p \"", (char*)copyAddress);
3018 
3019 		// string mode
3020 		for (i = 0; true; i++) {
3021 			char c;
3022 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3023 					!= B_OK
3024 				|| c == '\0') {
3025 				break;
3026 			}
3027 
3028 			if (c == '\n')
3029 				kprintf("\\n");
3030 			else if (c == '\t')
3031 				kprintf("\\t");
3032 			else {
3033 				if (!isprint(c))
3034 					c = '.';
3035 
3036 				kprintf("%c", c);
3037 			}
3038 		}
3039 
3040 		kprintf("\"\n");
3041 	} else {
3042 		// number mode
3043 		for (i = 0; i < num; i++) {
3044 			uint64 value;
3045 
3046 			if ((i % displayWidth) == 0) {
3047 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3048 				if (i != 0)
3049 					kprintf("\n");
3050 
3051 				kprintf("[0x%lx]  ", address + i * itemSize);
3052 
3053 				for (j = 0; j < displayed; j++) {
3054 					char c;
3055 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3056 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3057 						displayed = j;
3058 						break;
3059 					}
3060 					if (!isprint(c))
3061 						c = '.';
3062 
3063 					kprintf("%c", c);
3064 				}
3065 				if (num > displayWidth) {
3066 					// make sure the spacing in the last line is correct
3067 					for (j = displayed; j < displayWidth * itemSize; j++)
3068 						kprintf(" ");
3069 				}
3070 				kprintf("  ");
3071 			}
3072 
3073 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3074 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3075 				kprintf("read fault");
3076 				break;
3077 			}
3078 
3079 			switch (itemSize) {
3080 				case 1:
3081 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3082 					break;
3083 				case 2:
3084 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3085 					break;
3086 				case 4:
3087 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3088 					break;
3089 				case 8:
3090 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3091 					break;
3092 			}
3093 		}
3094 
3095 		kprintf("\n");
3096 	}
3097 
3098 	if (physical) {
3099 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3100 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3101 	}
3102 	return 0;
3103 }
3104 
3105 
3106 static void
3107 dump_cache_tree_recursively(VMCache* cache, int level,
3108 	VMCache* highlightCache)
3109 {
3110 	// print this cache
3111 	for (int i = 0; i < level; i++)
3112 		kprintf("  ");
3113 	if (cache == highlightCache)
3114 		kprintf("%p <--\n", cache);
3115 	else
3116 		kprintf("%p\n", cache);
3117 
3118 	// recursively print its consumers
3119 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3120 			VMCache* consumer = it.Next();) {
3121 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3122 	}
3123 }
3124 
3125 
3126 static int
3127 dump_cache_tree(int argc, char** argv)
3128 {
3129 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3130 		kprintf("usage: %s <address>\n", argv[0]);
3131 		return 0;
3132 	}
3133 
3134 	addr_t address = parse_expression(argv[1]);
3135 	if (address == 0)
3136 		return 0;
3137 
3138 	VMCache* cache = (VMCache*)address;
3139 	VMCache* root = cache;
3140 
3141 	// find the root cache (the transitive source)
3142 	while (root->source != NULL)
3143 		root = root->source;
3144 
3145 	dump_cache_tree_recursively(root, 0, cache);
3146 
3147 	return 0;
3148 }
3149 
3150 
3151 const char*
3152 vm_cache_type_to_string(int32 type)
3153 {
3154 	switch (type) {
3155 		case CACHE_TYPE_RAM:
3156 			return "RAM";
3157 		case CACHE_TYPE_DEVICE:
3158 			return "device";
3159 		case CACHE_TYPE_VNODE:
3160 			return "vnode";
3161 		case CACHE_TYPE_NULL:
3162 			return "null";
3163 
3164 		default:
3165 			return "unknown";
3166 	}
3167 }
3168 
3169 
3170 #if DEBUG_CACHE_LIST
3171 
3172 static void
3173 update_cache_info_recursively(VMCache* cache, cache_info& info)
3174 {
3175 	info.page_count += cache->page_count;
3176 	if (cache->type == CACHE_TYPE_RAM)
3177 		info.committed += cache->committed_size;
3178 
3179 	// recurse
3180 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3181 			VMCache* consumer = it.Next();) {
3182 		update_cache_info_recursively(consumer, info);
3183 	}
3184 }
3185 
3186 
3187 static int
3188 cache_info_compare_page_count(const void* _a, const void* _b)
3189 {
3190 	const cache_info* a = (const cache_info*)_a;
3191 	const cache_info* b = (const cache_info*)_b;
3192 	if (a->page_count == b->page_count)
3193 		return 0;
3194 	return a->page_count < b->page_count ? 1 : -1;
3195 }
3196 
3197 
3198 static int
3199 cache_info_compare_committed(const void* _a, const void* _b)
3200 {
3201 	const cache_info* a = (const cache_info*)_a;
3202 	const cache_info* b = (const cache_info*)_b;
3203 	if (a->committed == b->committed)
3204 		return 0;
3205 	return a->committed < b->committed ? 1 : -1;
3206 }
3207 
3208 
3209 static void
3210 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3211 {
3212 	for (int i = 0; i < level; i++)
3213 		kprintf("  ");
3214 
3215 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3216 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3217 		cache->virtual_base, cache->virtual_end, cache->page_count);
3218 
3219 	if (level == 0)
3220 		kprintf("/%lu", info.page_count);
3221 
3222 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3223 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3224 
3225 		if (level == 0)
3226 			kprintf("/%lu", info.committed);
3227 	}
3228 
3229 	// areas
3230 	if (cache->areas != NULL) {
3231 		VMArea* area = cache->areas;
3232 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3233 			area->name, area->address_space->ID());
3234 
3235 		while (area->cache_next != NULL) {
3236 			area = area->cache_next;
3237 			kprintf(", %" B_PRId32, area->id);
3238 		}
3239 	}
3240 
3241 	kputs("\n");
3242 
3243 	// recurse
3244 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3245 			VMCache* consumer = it.Next();) {
3246 		dump_caches_recursively(consumer, info, level + 1);
3247 	}
3248 }
3249 
3250 
3251 static int
3252 dump_caches(int argc, char** argv)
3253 {
3254 	if (sCacheInfoTable == NULL) {
3255 		kprintf("No cache info table!\n");
3256 		return 0;
3257 	}
3258 
3259 	bool sortByPageCount = true;
3260 
3261 	for (int32 i = 1; i < argc; i++) {
3262 		if (strcmp(argv[i], "-c") == 0) {
3263 			sortByPageCount = false;
3264 		} else {
3265 			print_debugger_command_usage(argv[0]);
3266 			return 0;
3267 		}
3268 	}
3269 
3270 	uint32 totalCount = 0;
3271 	uint32 rootCount = 0;
3272 	off_t totalCommitted = 0;
3273 	page_num_t totalPages = 0;
3274 
3275 	VMCache* cache = gDebugCacheList;
3276 	while (cache) {
3277 		totalCount++;
3278 		if (cache->source == NULL) {
3279 			cache_info stackInfo;
3280 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3281 				? sCacheInfoTable[rootCount] : stackInfo;
3282 			rootCount++;
3283 			info.cache = cache;
3284 			info.page_count = 0;
3285 			info.committed = 0;
3286 			update_cache_info_recursively(cache, info);
3287 			totalCommitted += info.committed;
3288 			totalPages += info.page_count;
3289 		}
3290 
3291 		cache = cache->debug_next;
3292 	}
3293 
3294 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3295 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3296 			sortByPageCount
3297 				? &cache_info_compare_page_count
3298 				: &cache_info_compare_committed);
3299 	}
3300 
3301 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3302 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3303 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3304 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3305 			"page count" : "committed size");
3306 
3307 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3308 		for (uint32 i = 0; i < rootCount; i++) {
3309 			cache_info& info = sCacheInfoTable[i];
3310 			dump_caches_recursively(info.cache, info, 0);
3311 		}
3312 	} else
3313 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3314 
3315 	return 0;
3316 }
3317 
3318 #endif	// DEBUG_CACHE_LIST
3319 
3320 
3321 static int
3322 dump_cache(int argc, char** argv)
3323 {
3324 	VMCache* cache;
3325 	bool showPages = false;
3326 	int i = 1;
3327 
3328 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3329 		kprintf("usage: %s [-ps] <address>\n"
3330 			"  if -p is specified, all pages are shown, if -s is used\n"
3331 			"  only the cache info is shown respectively.\n", argv[0]);
3332 		return 0;
3333 	}
3334 	while (argv[i][0] == '-') {
3335 		char* arg = argv[i] + 1;
3336 		while (arg[0]) {
3337 			if (arg[0] == 'p')
3338 				showPages = true;
3339 			arg++;
3340 		}
3341 		i++;
3342 	}
3343 	if (argv[i] == NULL) {
3344 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3345 		return 0;
3346 	}
3347 
3348 	addr_t address = parse_expression(argv[i]);
3349 	if (address == 0)
3350 		return 0;
3351 
3352 	cache = (VMCache*)address;
3353 
3354 	cache->Dump(showPages);
3355 
3356 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3357 
3358 	return 0;
3359 }
3360 
3361 
3362 static void
3363 dump_area_struct(VMArea* area, bool mappings)
3364 {
3365 	kprintf("AREA: %p\n", area);
3366 	kprintf("name:\t\t'%s'\n", area->name);
3367 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3368 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3369 	kprintf("base:\t\t0x%lx\n", area->Base());
3370 	kprintf("size:\t\t0x%lx\n", area->Size());
3371 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3372 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3373 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3374 	kprintf("cache:\t\t%p\n", area->cache);
3375 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3376 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3377 	kprintf("cache_next:\t%p\n", area->cache_next);
3378 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3379 
3380 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3381 	if (mappings) {
3382 		kprintf("page mappings:\n");
3383 		while (iterator.HasNext()) {
3384 			vm_page_mapping* mapping = iterator.Next();
3385 			kprintf("  %p", mapping->page);
3386 		}
3387 		kprintf("\n");
3388 	} else {
3389 		uint32 count = 0;
3390 		while (iterator.Next() != NULL) {
3391 			count++;
3392 		}
3393 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3394 	}
3395 }
3396 
3397 
3398 static int
3399 dump_area(int argc, char** argv)
3400 {
3401 	bool mappings = false;
3402 	bool found = false;
3403 	int32 index = 1;
3404 	VMArea* area;
3405 	addr_t num;
3406 
3407 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3408 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3409 			"All areas matching either id/address/name are listed. You can\n"
3410 			"force to check only a specific item by prefixing the specifier\n"
3411 			"with the id/contains/address/name keywords.\n"
3412 			"-m shows the area's mappings as well.\n");
3413 		return 0;
3414 	}
3415 
3416 	if (!strcmp(argv[1], "-m")) {
3417 		mappings = true;
3418 		index++;
3419 	}
3420 
3421 	int32 mode = 0xf;
3422 	if (!strcmp(argv[index], "id"))
3423 		mode = 1;
3424 	else if (!strcmp(argv[index], "contains"))
3425 		mode = 2;
3426 	else if (!strcmp(argv[index], "name"))
3427 		mode = 4;
3428 	else if (!strcmp(argv[index], "address"))
3429 		mode = 0;
3430 	if (mode != 0xf)
3431 		index++;
3432 
3433 	if (index >= argc) {
3434 		kprintf("No area specifier given.\n");
3435 		return 0;
3436 	}
3437 
3438 	num = parse_expression(argv[index]);
3439 
3440 	if (mode == 0) {
3441 		dump_area_struct((struct VMArea*)num, mappings);
3442 	} else {
3443 		// walk through the area list, looking for the arguments as a name
3444 
3445 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3446 		while ((area = it.Next()) != NULL) {
3447 			if (((mode & 4) != 0
3448 					&& !strcmp(argv[index], area->name))
3449 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3450 					|| (((mode & 2) != 0 && area->Base() <= num
3451 						&& area->Base() + area->Size() > num))))) {
3452 				dump_area_struct(area, mappings);
3453 				found = true;
3454 			}
3455 		}
3456 
3457 		if (!found)
3458 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3459 	}
3460 
3461 	return 0;
3462 }
3463 
3464 
3465 static int
3466 dump_area_list(int argc, char** argv)
3467 {
3468 	VMArea* area;
3469 	const char* name = NULL;
3470 	int32 id = 0;
3471 
3472 	if (argc > 1) {
3473 		id = parse_expression(argv[1]);
3474 		if (id == 0)
3475 			name = argv[1];
3476 	}
3477 
3478 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3479 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3480 		B_PRINTF_POINTER_WIDTH, "size");
3481 
3482 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3483 	while ((area = it.Next()) != NULL) {
3484 		if ((id != 0 && area->address_space->ID() != id)
3485 			|| (name != NULL && strstr(area->name, name) == NULL))
3486 			continue;
3487 
3488 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3489 			area->id, (void*)area->Base(), (void*)area->Size(),
3490 			area->protection, area->wiring, area->name);
3491 	}
3492 	return 0;
3493 }
3494 
3495 
3496 static int
3497 dump_available_memory(int argc, char** argv)
3498 {
3499 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3500 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3501 	return 0;
3502 }
3503 
3504 
3505 static int
3506 dump_mapping_info(int argc, char** argv)
3507 {
3508 	bool reverseLookup = false;
3509 	bool pageLookup = false;
3510 
3511 	int argi = 1;
3512 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3513 		const char* arg = argv[argi];
3514 		if (strcmp(arg, "-r") == 0) {
3515 			reverseLookup = true;
3516 		} else if (strcmp(arg, "-p") == 0) {
3517 			reverseLookup = true;
3518 			pageLookup = true;
3519 		} else {
3520 			print_debugger_command_usage(argv[0]);
3521 			return 0;
3522 		}
3523 	}
3524 
3525 	// We need at least one argument, the address. Optionally a thread ID can be
3526 	// specified.
3527 	if (argi >= argc || argi + 2 < argc) {
3528 		print_debugger_command_usage(argv[0]);
3529 		return 0;
3530 	}
3531 
3532 	uint64 addressValue;
3533 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3534 		return 0;
3535 
3536 	Team* team = NULL;
3537 	if (argi < argc) {
3538 		uint64 threadID;
3539 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3540 			return 0;
3541 
3542 		Thread* thread = Thread::GetDebug(threadID);
3543 		if (thread == NULL) {
3544 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3545 			return 0;
3546 		}
3547 
3548 		team = thread->team;
3549 	}
3550 
3551 	if (reverseLookup) {
3552 		phys_addr_t physicalAddress;
3553 		if (pageLookup) {
3554 			vm_page* page = (vm_page*)(addr_t)addressValue;
3555 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3556 		} else {
3557 			physicalAddress = (phys_addr_t)addressValue;
3558 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3559 		}
3560 
3561 		kprintf("    Team     Virtual Address      Area\n");
3562 		kprintf("--------------------------------------\n");
3563 
3564 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3565 			Callback()
3566 				:
3567 				fAddressSpace(NULL)
3568 			{
3569 			}
3570 
3571 			void SetAddressSpace(VMAddressSpace* addressSpace)
3572 			{
3573 				fAddressSpace = addressSpace;
3574 			}
3575 
3576 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3577 			{
3578 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3579 					virtualAddress);
3580 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3581 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3582 				else
3583 					kprintf("\n");
3584 				return false;
3585 			}
3586 
3587 		private:
3588 			VMAddressSpace*	fAddressSpace;
3589 		} callback;
3590 
3591 		if (team != NULL) {
3592 			// team specified -- get its address space
3593 			VMAddressSpace* addressSpace = team->address_space;
3594 			if (addressSpace == NULL) {
3595 				kprintf("Failed to get address space!\n");
3596 				return 0;
3597 			}
3598 
3599 			callback.SetAddressSpace(addressSpace);
3600 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3601 				physicalAddress, callback);
3602 		} else {
3603 			// no team specified -- iterate through all address spaces
3604 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3605 				addressSpace != NULL;
3606 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3607 				callback.SetAddressSpace(addressSpace);
3608 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3609 					physicalAddress, callback);
3610 			}
3611 		}
3612 	} else {
3613 		// get the address space
3614 		addr_t virtualAddress = (addr_t)addressValue;
3615 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3616 		VMAddressSpace* addressSpace;
3617 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3618 			addressSpace = VMAddressSpace::Kernel();
3619 		} else if (team != NULL) {
3620 			addressSpace = team->address_space;
3621 		} else {
3622 			Thread* thread = debug_get_debugged_thread();
3623 			if (thread == NULL || thread->team == NULL) {
3624 				kprintf("Failed to get team!\n");
3625 				return 0;
3626 			}
3627 
3628 			addressSpace = thread->team->address_space;
3629 		}
3630 
3631 		if (addressSpace == NULL) {
3632 			kprintf("Failed to get address space!\n");
3633 			return 0;
3634 		}
3635 
3636 		// let the translation map implementation do the job
3637 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3638 	}
3639 
3640 	return 0;
3641 }
3642 
3643 
3644 /*!	Deletes all areas and reserved regions in the given address space.
3645 
3646 	The caller must ensure that none of the areas has any wired ranges.
3647 
3648 	\param addressSpace The address space.
3649 	\param deletingAddressSpace \c true, if the address space is in the process
3650 		of being deleted.
3651 */
3652 void
3653 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3654 {
3655 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3656 		addressSpace->ID()));
3657 
3658 	addressSpace->WriteLock();
3659 
3660 	// remove all reserved areas in this address space
3661 	addressSpace->UnreserveAllAddressRanges(0);
3662 
3663 	// delete all the areas in this address space
3664 	while (VMArea* area = addressSpace->FirstArea()) {
3665 		ASSERT(!area->IsWired());
3666 		delete_area(addressSpace, area, deletingAddressSpace);
3667 	}
3668 
3669 	addressSpace->WriteUnlock();
3670 }
3671 
3672 
3673 static area_id
3674 vm_area_for(addr_t address, bool kernel)
3675 {
3676 	team_id team;
3677 	if (IS_USER_ADDRESS(address)) {
3678 		// we try the user team address space, if any
3679 		team = VMAddressSpace::CurrentID();
3680 		if (team < 0)
3681 			return team;
3682 	} else
3683 		team = VMAddressSpace::KernelID();
3684 
3685 	AddressSpaceReadLocker locker(team);
3686 	if (!locker.IsLocked())
3687 		return B_BAD_TEAM_ID;
3688 
3689 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3690 	if (area != NULL) {
3691 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3692 			return B_ERROR;
3693 
3694 		return area->id;
3695 	}
3696 
3697 	return B_ERROR;
3698 }
3699 
3700 
3701 /*!	Frees physical pages that were used during the boot process.
3702 	\a end is inclusive.
3703 */
3704 static void
3705 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3706 {
3707 	// free all physical pages in the specified range
3708 
3709 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3710 		phys_addr_t physicalAddress;
3711 		uint32 flags;
3712 
3713 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3714 			&& (flags & PAGE_PRESENT) != 0) {
3715 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3716 			if (page != NULL && page->State() != PAGE_STATE_FREE
3717 					&& page->State() != PAGE_STATE_CLEAR
3718 					&& page->State() != PAGE_STATE_UNUSED) {
3719 				DEBUG_PAGE_ACCESS_START(page);
3720 				vm_page_set_state(page, PAGE_STATE_FREE);
3721 			}
3722 		}
3723 	}
3724 
3725 	// unmap the memory
3726 	map->Unmap(start, end);
3727 }
3728 
3729 
3730 void
3731 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3732 {
3733 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3734 	addr_t end = start + (size - 1);
3735 	addr_t lastEnd = start;
3736 
3737 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3738 		(void*)start, (void*)end));
3739 
3740 	// The areas are sorted in virtual address space order, so
3741 	// we just have to find the holes between them that fall
3742 	// into the area we should dispose
3743 
3744 	map->Lock();
3745 
3746 	for (VMAddressSpace::AreaIterator it
3747 				= VMAddressSpace::Kernel()->GetAreaIterator();
3748 			VMArea* area = it.Next();) {
3749 		addr_t areaStart = area->Base();
3750 		addr_t areaEnd = areaStart + (area->Size() - 1);
3751 
3752 		if (areaEnd < start)
3753 			continue;
3754 
3755 		if (areaStart > end) {
3756 			// we are done, the area is already beyond of what we have to free
3757 			break;
3758 		}
3759 
3760 		if (areaStart > lastEnd) {
3761 			// this is something we can free
3762 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3763 				(void*)areaStart));
3764 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3765 		}
3766 
3767 		if (areaEnd >= end) {
3768 			lastEnd = areaEnd;
3769 				// no +1 to prevent potential overflow
3770 			break;
3771 		}
3772 
3773 		lastEnd = areaEnd + 1;
3774 	}
3775 
3776 	if (lastEnd < end) {
3777 		// we can also get rid of some space at the end of the area
3778 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3779 			(void*)end));
3780 		unmap_and_free_physical_pages(map, lastEnd, end);
3781 	}
3782 
3783 	map->Unlock();
3784 }
3785 
3786 
3787 static void
3788 create_preloaded_image_areas(struct preloaded_image* _image)
3789 {
3790 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3791 	char name[B_OS_NAME_LENGTH];
3792 	void* address;
3793 	int32 length;
3794 
3795 	// use file name to create a good area name
3796 	char* fileName = strrchr(image->name, '/');
3797 	if (fileName == NULL)
3798 		fileName = image->name;
3799 	else
3800 		fileName++;
3801 
3802 	length = strlen(fileName);
3803 	// make sure there is enough space for the suffix
3804 	if (length > 25)
3805 		length = 25;
3806 
3807 	memcpy(name, fileName, length);
3808 	strcpy(name + length, "_text");
3809 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3810 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3811 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3812 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3813 		// this will later be remapped read-only/executable by the
3814 		// ELF initialization code
3815 
3816 	strcpy(name + length, "_data");
3817 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3818 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3819 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3820 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3821 }
3822 
3823 
3824 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3825 	Any boot loader resources contained in that arguments must not be accessed
3826 	anymore past this point.
3827 */
3828 void
3829 vm_free_kernel_args(kernel_args* args)
3830 {
3831 	uint32 i;
3832 
3833 	TRACE(("vm_free_kernel_args()\n"));
3834 
3835 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3836 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3837 		if (area >= B_OK)
3838 			delete_area(area);
3839 	}
3840 }
3841 
3842 
3843 static void
3844 allocate_kernel_args(kernel_args* args)
3845 {
3846 	TRACE(("allocate_kernel_args()\n"));
3847 
3848 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3849 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3850 
3851 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3852 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3853 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3854 	}
3855 }
3856 
3857 
3858 static void
3859 unreserve_boot_loader_ranges(kernel_args* args)
3860 {
3861 	TRACE(("unreserve_boot_loader_ranges()\n"));
3862 
3863 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3864 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3865 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3866 			args->virtual_allocated_range[i].size);
3867 	}
3868 }
3869 
3870 
3871 static void
3872 reserve_boot_loader_ranges(kernel_args* args)
3873 {
3874 	TRACE(("reserve_boot_loader_ranges()\n"));
3875 
3876 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3877 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3878 
3879 		// If the address is no kernel address, we just skip it. The
3880 		// architecture specific code has to deal with it.
3881 		if (!IS_KERNEL_ADDRESS(address)) {
3882 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3883 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3884 			continue;
3885 		}
3886 
3887 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3888 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3889 		if (status < B_OK)
3890 			panic("could not reserve boot loader ranges\n");
3891 	}
3892 }
3893 
3894 
3895 static addr_t
3896 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3897 {
3898 	size = PAGE_ALIGN(size);
3899 
3900 	// find a slot in the virtual allocation addr range
3901 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3902 		// check to see if the space between this one and the last is big enough
3903 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3904 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3905 			+ args->virtual_allocated_range[i - 1].size;
3906 
3907 		addr_t base = alignment > 0
3908 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3909 
3910 		if (base >= KERNEL_BASE && base < rangeStart
3911 				&& rangeStart - base >= size) {
3912 			args->virtual_allocated_range[i - 1].size
3913 				+= base + size - previousRangeEnd;
3914 			return base;
3915 		}
3916 	}
3917 
3918 	// we hadn't found one between allocation ranges. this is ok.
3919 	// see if there's a gap after the last one
3920 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3921 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3922 		+ args->virtual_allocated_range[lastEntryIndex].size;
3923 	addr_t base = alignment > 0
3924 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3925 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3926 		args->virtual_allocated_range[lastEntryIndex].size
3927 			+= base + size - lastRangeEnd;
3928 		return base;
3929 	}
3930 
3931 	// see if there's a gap before the first one
3932 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3933 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3934 		base = rangeStart - size;
3935 		if (alignment > 0)
3936 			base = ROUNDDOWN(base, alignment);
3937 
3938 		if (base >= KERNEL_BASE) {
3939 			args->virtual_allocated_range[0].start = base;
3940 			args->virtual_allocated_range[0].size += rangeStart - base;
3941 			return base;
3942 		}
3943 	}
3944 
3945 	return 0;
3946 }
3947 
3948 
3949 static bool
3950 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3951 {
3952 	// TODO: horrible brute-force method of determining if the page can be
3953 	// allocated
3954 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3955 		if (address >= args->physical_memory_range[i].start
3956 			&& address < args->physical_memory_range[i].start
3957 				+ args->physical_memory_range[i].size)
3958 			return true;
3959 	}
3960 	return false;
3961 }
3962 
3963 
3964 page_num_t
3965 vm_allocate_early_physical_page(kernel_args* args)
3966 {
3967 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3968 		phys_addr_t nextPage;
3969 
3970 		nextPage = args->physical_allocated_range[i].start
3971 			+ args->physical_allocated_range[i].size;
3972 		// see if the page after the next allocated paddr run can be allocated
3973 		if (i + 1 < args->num_physical_allocated_ranges
3974 			&& args->physical_allocated_range[i + 1].size != 0) {
3975 			// see if the next page will collide with the next allocated range
3976 			if (nextPage >= args->physical_allocated_range[i+1].start)
3977 				continue;
3978 		}
3979 		// see if the next physical page fits in the memory block
3980 		if (is_page_in_physical_memory_range(args, nextPage)) {
3981 			// we got one!
3982 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3983 			return nextPage / B_PAGE_SIZE;
3984 		}
3985 	}
3986 
3987 	// Expanding upwards didn't work, try going downwards.
3988 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3989 		phys_addr_t nextPage;
3990 
3991 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3992 		// see if the page after the prev allocated paddr run can be allocated
3993 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3994 			// see if the next page will collide with the next allocated range
3995 			if (nextPage < args->physical_allocated_range[i-1].start
3996 				+ args->physical_allocated_range[i-1].size)
3997 				continue;
3998 		}
3999 		// see if the next physical page fits in the memory block
4000 		if (is_page_in_physical_memory_range(args, nextPage)) {
4001 			// we got one!
4002 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4003 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4004 			return nextPage / B_PAGE_SIZE;
4005 		}
4006 	}
4007 
4008 	return 0;
4009 		// could not allocate a block
4010 }
4011 
4012 
4013 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4014 	allocate some pages before the VM is completely up.
4015 */
4016 addr_t
4017 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4018 	uint32 attributes, addr_t alignment)
4019 {
4020 	if (physicalSize > virtualSize)
4021 		physicalSize = virtualSize;
4022 
4023 	// find the vaddr to allocate at
4024 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4025 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4026 	if (virtualBase == 0) {
4027 		panic("vm_allocate_early: could not allocate virtual address\n");
4028 		return 0;
4029 	}
4030 
4031 	// map the pages
4032 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4033 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4034 		if (physicalAddress == 0)
4035 			panic("error allocating early page!\n");
4036 
4037 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4038 
4039 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4040 			physicalAddress * B_PAGE_SIZE, attributes,
4041 			&vm_allocate_early_physical_page);
4042 	}
4043 
4044 	return virtualBase;
4045 }
4046 
4047 
4048 /*!	The main entrance point to initialize the VM. */
4049 status_t
4050 vm_init(kernel_args* args)
4051 {
4052 	struct preloaded_image* image;
4053 	void* address;
4054 	status_t err = 0;
4055 	uint32 i;
4056 
4057 	TRACE(("vm_init: entry\n"));
4058 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4059 	err = arch_vm_init(args);
4060 
4061 	// initialize some globals
4062 	vm_page_init_num_pages(args);
4063 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4064 
4065 	slab_init(args);
4066 
4067 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4068 	off_t heapSize = INITIAL_HEAP_SIZE;
4069 	// try to accomodate low memory systems
4070 	while (heapSize > sAvailableMemory / 8)
4071 		heapSize /= 2;
4072 	if (heapSize < 1024 * 1024)
4073 		panic("vm_init: go buy some RAM please.");
4074 
4075 	// map in the new heap and initialize it
4076 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4077 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4078 	TRACE(("heap at 0x%lx\n", heapBase));
4079 	heap_init(heapBase, heapSize);
4080 #endif
4081 
4082 	// initialize the free page list and physical page mapper
4083 	vm_page_init(args);
4084 
4085 	// initialize the cache allocators
4086 	vm_cache_init(args);
4087 
4088 	{
4089 		status_t error = VMAreaHash::Init();
4090 		if (error != B_OK)
4091 			panic("vm_init: error initializing area hash table\n");
4092 	}
4093 
4094 	VMAddressSpace::Init();
4095 	reserve_boot_loader_ranges(args);
4096 
4097 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4098 	heap_init_post_area();
4099 #endif
4100 
4101 	// Do any further initialization that the architecture dependant layers may
4102 	// need now
4103 	arch_vm_translation_map_init_post_area(args);
4104 	arch_vm_init_post_area(args);
4105 	vm_page_init_post_area(args);
4106 	slab_init_post_area();
4107 
4108 	// allocate areas to represent stuff that already exists
4109 
4110 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4111 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4112 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4113 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4114 #endif
4115 
4116 	allocate_kernel_args(args);
4117 
4118 	create_preloaded_image_areas(args->kernel_image);
4119 
4120 	// allocate areas for preloaded images
4121 	for (image = args->preloaded_images; image != NULL; image = image->next)
4122 		create_preloaded_image_areas(image);
4123 
4124 	// allocate kernel stacks
4125 	for (i = 0; i < args->num_cpus; i++) {
4126 		char name[64];
4127 
4128 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4129 		address = (void*)args->cpu_kstack[i].start;
4130 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4131 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4132 	}
4133 
4134 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4135 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4136 
4137 #if PARANOID_KERNEL_MALLOC
4138 	vm_block_address_range("uninitialized heap memory",
4139 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4140 #endif
4141 #if PARANOID_KERNEL_FREE
4142 	vm_block_address_range("freed heap memory",
4143 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4144 #endif
4145 
4146 	// create the object cache for the page mappings
4147 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4148 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4149 		NULL, NULL);
4150 	if (gPageMappingsObjectCache == NULL)
4151 		panic("failed to create page mappings object cache");
4152 
4153 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4154 
4155 #if DEBUG_CACHE_LIST
4156 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4157 		virtual_address_restrictions virtualRestrictions = {};
4158 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4159 		physical_address_restrictions physicalRestrictions = {};
4160 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4161 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4162 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4163 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4164 			&physicalRestrictions, (void**)&sCacheInfoTable);
4165 	}
4166 #endif	// DEBUG_CACHE_LIST
4167 
4168 	// add some debugger commands
4169 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4170 	add_debugger_command("area", &dump_area,
4171 		"Dump info about a particular area");
4172 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4173 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4174 #if DEBUG_CACHE_LIST
4175 	if (sCacheInfoTable != NULL) {
4176 		add_debugger_command_etc("caches", &dump_caches,
4177 			"List all VMCache trees",
4178 			"[ \"-c\" ]\n"
4179 			"All cache trees are listed sorted in decreasing order by number "
4180 				"of\n"
4181 			"used pages or, if \"-c\" is specified, by size of committed "
4182 				"memory.\n",
4183 			0);
4184 	}
4185 #endif
4186 	add_debugger_command("avail", &dump_available_memory,
4187 		"Dump available memory");
4188 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4189 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4190 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4191 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4192 	add_debugger_command("string", &display_mem, "dump strings");
4193 
4194 	add_debugger_command_etc("mapping", &dump_mapping_info,
4195 		"Print address mapping information",
4196 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4197 		"Prints low-level page mapping information for a given address. If\n"
4198 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4199 		"address that is looked up in the translation map of the current\n"
4200 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4201 		"\"-r\" is specified, <address> is a physical address that is\n"
4202 		"searched in the translation map of all teams, respectively the team\n"
4203 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4204 		"<address> is the address of a vm_page structure. The behavior is\n"
4205 		"equivalent to specifying \"-r\" with the physical address of that\n"
4206 		"page.\n",
4207 		0);
4208 
4209 	TRACE(("vm_init: exit\n"));
4210 
4211 	vm_cache_init_post_heap();
4212 
4213 	return err;
4214 }
4215 
4216 
4217 status_t
4218 vm_init_post_sem(kernel_args* args)
4219 {
4220 	// This frees all unused boot loader resources and makes its space available
4221 	// again
4222 	arch_vm_init_end(args);
4223 	unreserve_boot_loader_ranges(args);
4224 
4225 	// fill in all of the semaphores that were not allocated before
4226 	// since we're still single threaded and only the kernel address space
4227 	// exists, it isn't that hard to find all of the ones we need to create
4228 
4229 	arch_vm_translation_map_init_post_sem(args);
4230 
4231 	slab_init_post_sem();
4232 
4233 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4234 	heap_init_post_sem();
4235 #endif
4236 
4237 	return B_OK;
4238 }
4239 
4240 
4241 status_t
4242 vm_init_post_thread(kernel_args* args)
4243 {
4244 	vm_page_init_post_thread(args);
4245 	slab_init_post_thread();
4246 	return heap_init_post_thread();
4247 }
4248 
4249 
4250 status_t
4251 vm_init_post_modules(kernel_args* args)
4252 {
4253 	return arch_vm_init_post_modules(args);
4254 }
4255 
4256 
4257 void
4258 permit_page_faults(void)
4259 {
4260 	Thread* thread = thread_get_current_thread();
4261 	if (thread != NULL)
4262 		atomic_add(&thread->page_faults_allowed, 1);
4263 }
4264 
4265 
4266 void
4267 forbid_page_faults(void)
4268 {
4269 	Thread* thread = thread_get_current_thread();
4270 	if (thread != NULL)
4271 		atomic_add(&thread->page_faults_allowed, -1);
4272 }
4273 
4274 
4275 status_t
4276 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4277 	bool isUser, addr_t* newIP)
4278 {
4279 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4280 		faultAddress));
4281 
4282 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4283 
4284 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4285 	VMAddressSpace* addressSpace = NULL;
4286 
4287 	status_t status = B_OK;
4288 	*newIP = 0;
4289 	atomic_add((int32*)&sPageFaults, 1);
4290 
4291 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4292 		addressSpace = VMAddressSpace::GetKernel();
4293 	} else if (IS_USER_ADDRESS(pageAddress)) {
4294 		addressSpace = VMAddressSpace::GetCurrent();
4295 		if (addressSpace == NULL) {
4296 			if (!isUser) {
4297 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4298 					"memory!\n");
4299 				status = B_BAD_ADDRESS;
4300 				TPF(PageFaultError(-1,
4301 					VMPageFaultTracing
4302 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4303 			} else {
4304 				// XXX weird state.
4305 				panic("vm_page_fault: non kernel thread accessing user memory "
4306 					"that doesn't exist!\n");
4307 				status = B_BAD_ADDRESS;
4308 			}
4309 		}
4310 	} else {
4311 		// the hit was probably in the 64k DMZ between kernel and user space
4312 		// this keeps a user space thread from passing a buffer that crosses
4313 		// into kernel space
4314 		status = B_BAD_ADDRESS;
4315 		TPF(PageFaultError(-1,
4316 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4317 	}
4318 
4319 	if (status == B_OK) {
4320 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4321 			isUser, NULL);
4322 	}
4323 
4324 	if (status < B_OK) {
4325 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4326 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4327 			strerror(status), address, faultAddress, isWrite, isUser,
4328 			thread_get_current_thread_id());
4329 		if (!isUser) {
4330 			Thread* thread = thread_get_current_thread();
4331 			if (thread != NULL && thread->fault_handler != 0) {
4332 				// this will cause the arch dependant page fault handler to
4333 				// modify the IP on the interrupt frame or whatever to return
4334 				// to this address
4335 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4336 			} else {
4337 				// unhandled page fault in the kernel
4338 				panic("vm_page_fault: unhandled page fault in kernel space at "
4339 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4340 			}
4341 		} else {
4342 			Thread* thread = thread_get_current_thread();
4343 
4344 #ifdef TRACE_FAULTS
4345 			VMArea* area = NULL;
4346 			if (addressSpace != NULL) {
4347 				addressSpace->ReadLock();
4348 				area = addressSpace->LookupArea(faultAddress);
4349 			}
4350 
4351 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4352 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4353 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4354 				thread->team->Name(), thread->team->id,
4355 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4356 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4357 					area->Base() : 0x0));
4358 
4359 			if (addressSpace != NULL)
4360 				addressSpace->ReadUnlock();
4361 #endif
4362 
4363 			// If the thread has a signal handler for SIGSEGV, we simply
4364 			// send it the signal. Otherwise we notify the user debugger
4365 			// first.
4366 			struct sigaction action;
4367 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4368 					&& action.sa_handler != SIG_DFL
4369 					&& action.sa_handler != SIG_IGN)
4370 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4371 					SIGSEGV)) {
4372 				Signal signal(SIGSEGV,
4373 					status == B_PERMISSION_DENIED
4374 						? SEGV_ACCERR : SEGV_MAPERR,
4375 					EFAULT, thread->team->id);
4376 				signal.SetAddress((void*)address);
4377 				send_signal_to_thread(thread, signal, 0);
4378 			}
4379 		}
4380 	}
4381 
4382 	if (addressSpace != NULL)
4383 		addressSpace->Put();
4384 
4385 	return B_HANDLED_INTERRUPT;
4386 }
4387 
4388 
4389 struct PageFaultContext {
4390 	AddressSpaceReadLocker	addressSpaceLocker;
4391 	VMCacheChainLocker		cacheChainLocker;
4392 
4393 	VMTranslationMap*		map;
4394 	VMCache*				topCache;
4395 	off_t					cacheOffset;
4396 	vm_page_reservation		reservation;
4397 	bool					isWrite;
4398 
4399 	// return values
4400 	vm_page*				page;
4401 	bool					restart;
4402 	bool					pageAllocated;
4403 
4404 
4405 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4406 		:
4407 		addressSpaceLocker(addressSpace, true),
4408 		map(addressSpace->TranslationMap()),
4409 		isWrite(isWrite)
4410 	{
4411 	}
4412 
4413 	~PageFaultContext()
4414 	{
4415 		UnlockAll();
4416 		vm_page_unreserve_pages(&reservation);
4417 	}
4418 
4419 	void Prepare(VMCache* topCache, off_t cacheOffset)
4420 	{
4421 		this->topCache = topCache;
4422 		this->cacheOffset = cacheOffset;
4423 		page = NULL;
4424 		restart = false;
4425 		pageAllocated = false;
4426 
4427 		cacheChainLocker.SetTo(topCache);
4428 	}
4429 
4430 	void UnlockAll(VMCache* exceptCache = NULL)
4431 	{
4432 		topCache = NULL;
4433 		addressSpaceLocker.Unlock();
4434 		cacheChainLocker.Unlock(exceptCache);
4435 	}
4436 };
4437 
4438 
4439 /*!	Gets the page that should be mapped into the area.
4440 	Returns an error code other than \c B_OK, if the page couldn't be found or
4441 	paged in. The locking state of the address space and the caches is undefined
4442 	in that case.
4443 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4444 	had to unlock the address space and all caches and is supposed to be called
4445 	again.
4446 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4447 	found. It is returned in \c context.page. The address space will still be
4448 	locked as well as all caches starting from the top cache to at least the
4449 	cache the page lives in.
4450 */
4451 static status_t
4452 fault_get_page(PageFaultContext& context)
4453 {
4454 	VMCache* cache = context.topCache;
4455 	VMCache* lastCache = NULL;
4456 	vm_page* page = NULL;
4457 
4458 	while (cache != NULL) {
4459 		// We already hold the lock of the cache at this point.
4460 
4461 		lastCache = cache;
4462 
4463 		page = cache->LookupPage(context.cacheOffset);
4464 		if (page != NULL && page->busy) {
4465 			// page must be busy -- wait for it to become unbusy
4466 			context.UnlockAll(cache);
4467 			cache->ReleaseRefLocked();
4468 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4469 
4470 			// restart the whole process
4471 			context.restart = true;
4472 			return B_OK;
4473 		}
4474 
4475 		if (page != NULL)
4476 			break;
4477 
4478 		// The current cache does not contain the page we're looking for.
4479 
4480 		// see if the backing store has it
4481 		if (cache->HasPage(context.cacheOffset)) {
4482 			// insert a fresh page and mark it busy -- we're going to read it in
4483 			page = vm_page_allocate_page(&context.reservation,
4484 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4485 			cache->InsertPage(page, context.cacheOffset);
4486 
4487 			// We need to unlock all caches and the address space while reading
4488 			// the page in. Keep a reference to the cache around.
4489 			cache->AcquireRefLocked();
4490 			context.UnlockAll();
4491 
4492 			// read the page in
4493 			generic_io_vec vec;
4494 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4495 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4496 
4497 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4498 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4499 
4500 			cache->Lock();
4501 
4502 			if (status < B_OK) {
4503 				// on error remove and free the page
4504 				dprintf("reading page from cache %p returned: %s!\n",
4505 					cache, strerror(status));
4506 
4507 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4508 				cache->RemovePage(page);
4509 				vm_page_set_state(page, PAGE_STATE_FREE);
4510 
4511 				cache->ReleaseRefAndUnlock();
4512 				return status;
4513 			}
4514 
4515 			// mark the page unbusy again
4516 			cache->MarkPageUnbusy(page);
4517 
4518 			DEBUG_PAGE_ACCESS_END(page);
4519 
4520 			// Since we needed to unlock everything temporarily, the area
4521 			// situation might have changed. So we need to restart the whole
4522 			// process.
4523 			cache->ReleaseRefAndUnlock();
4524 			context.restart = true;
4525 			return B_OK;
4526 		}
4527 
4528 		cache = context.cacheChainLocker.LockSourceCache();
4529 	}
4530 
4531 	if (page == NULL) {
4532 		// There was no adequate page, determine the cache for a clean one.
4533 		// Read-only pages come in the deepest cache, only the top most cache
4534 		// may have direct write access.
4535 		cache = context.isWrite ? context.topCache : lastCache;
4536 
4537 		// allocate a clean page
4538 		page = vm_page_allocate_page(&context.reservation,
4539 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4540 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4541 			page->physical_page_number));
4542 
4543 		// insert the new page into our cache
4544 		cache->InsertPage(page, context.cacheOffset);
4545 		context.pageAllocated = true;
4546 	} else if (page->Cache() != context.topCache && context.isWrite) {
4547 		// We have a page that has the data we want, but in the wrong cache
4548 		// object so we need to copy it and stick it into the top cache.
4549 		vm_page* sourcePage = page;
4550 
4551 		// TODO: If memory is low, it might be a good idea to steal the page
4552 		// from our source cache -- if possible, that is.
4553 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4554 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4555 
4556 		// To not needlessly kill concurrency we unlock all caches but the top
4557 		// one while copying the page. Lacking another mechanism to ensure that
4558 		// the source page doesn't disappear, we mark it busy.
4559 		sourcePage->busy = true;
4560 		context.cacheChainLocker.UnlockKeepRefs(true);
4561 
4562 		// copy the page
4563 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4564 			sourcePage->physical_page_number * B_PAGE_SIZE);
4565 
4566 		context.cacheChainLocker.RelockCaches(true);
4567 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4568 
4569 		// insert the new page into our cache
4570 		context.topCache->InsertPage(page, context.cacheOffset);
4571 		context.pageAllocated = true;
4572 	} else
4573 		DEBUG_PAGE_ACCESS_START(page);
4574 
4575 	context.page = page;
4576 	return B_OK;
4577 }
4578 
4579 
4580 /*!	Makes sure the address in the given address space is mapped.
4581 
4582 	\param addressSpace The address space.
4583 	\param originalAddress The address. Doesn't need to be page aligned.
4584 	\param isWrite If \c true the address shall be write-accessible.
4585 	\param isUser If \c true the access is requested by a userland team.
4586 	\param wirePage On success, if non \c NULL, the wired count of the page
4587 		mapped at the given address is incremented and the page is returned
4588 		via this parameter.
4589 	\return \c B_OK on success, another error code otherwise.
4590 */
4591 static status_t
4592 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4593 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4594 {
4595 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4596 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4597 		originalAddress, isWrite, isUser));
4598 
4599 	PageFaultContext context(addressSpace, isWrite);
4600 
4601 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4602 	status_t status = B_OK;
4603 
4604 	addressSpace->IncrementFaultCount();
4605 
4606 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4607 	// the pages upfront makes sure we don't have any cache locked, so that the
4608 	// page daemon/thief can do their job without problems.
4609 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4610 		originalAddress);
4611 	context.addressSpaceLocker.Unlock();
4612 	vm_page_reserve_pages(&context.reservation, reservePages,
4613 		addressSpace == VMAddressSpace::Kernel()
4614 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4615 
4616 	while (true) {
4617 		context.addressSpaceLocker.Lock();
4618 
4619 		// get the area the fault was in
4620 		VMArea* area = addressSpace->LookupArea(address);
4621 		if (area == NULL) {
4622 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4623 				"space\n", originalAddress);
4624 			TPF(PageFaultError(-1,
4625 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4626 			status = B_BAD_ADDRESS;
4627 			break;
4628 		}
4629 
4630 		// check permissions
4631 		uint32 protection = get_area_page_protection(area, address);
4632 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4633 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4634 				area->id, (void*)originalAddress);
4635 			TPF(PageFaultError(area->id,
4636 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4637 			status = B_PERMISSION_DENIED;
4638 			break;
4639 		}
4640 		if (isWrite && (protection
4641 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4642 			dprintf("write access attempted on write-protected area 0x%"
4643 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4644 			TPF(PageFaultError(area->id,
4645 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4646 			status = B_PERMISSION_DENIED;
4647 			break;
4648 		} else if (isExecute && (protection
4649 				& (B_EXECUTE_AREA
4650 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4651 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4652 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4653 			TPF(PageFaultError(area->id,
4654 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4655 			status = B_PERMISSION_DENIED;
4656 			break;
4657 		} else if (!isWrite && !isExecute && (protection
4658 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4659 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4660 				" at %p\n", area->id, (void*)originalAddress);
4661 			TPF(PageFaultError(area->id,
4662 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4663 			status = B_PERMISSION_DENIED;
4664 			break;
4665 		}
4666 
4667 		// We have the area, it was a valid access, so let's try to resolve the
4668 		// page fault now.
4669 		// At first, the top most cache from the area is investigated.
4670 
4671 		context.Prepare(vm_area_get_locked_cache(area),
4672 			address - area->Base() + area->cache_offset);
4673 
4674 		// See if this cache has a fault handler -- this will do all the work
4675 		// for us.
4676 		{
4677 			// Note, since the page fault is resolved with interrupts enabled,
4678 			// the fault handler could be called more than once for the same
4679 			// reason -- the store must take this into account.
4680 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4681 			if (status != B_BAD_HANDLER)
4682 				break;
4683 		}
4684 
4685 		// The top most cache has no fault handler, so let's see if the cache or
4686 		// its sources already have the page we're searching for (we're going
4687 		// from top to bottom).
4688 		status = fault_get_page(context);
4689 		if (status != B_OK) {
4690 			TPF(PageFaultError(area->id, status));
4691 			break;
4692 		}
4693 
4694 		if (context.restart)
4695 			continue;
4696 
4697 		// All went fine, all there is left to do is to map the page into the
4698 		// address space.
4699 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4700 			context.page));
4701 
4702 		// If the page doesn't reside in the area's cache, we need to make sure
4703 		// it's mapped in read-only, so that we cannot overwrite someone else's
4704 		// data (copy-on-write)
4705 		uint32 newProtection = protection;
4706 		if (context.page->Cache() != context.topCache && !isWrite)
4707 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4708 
4709 		bool unmapPage = false;
4710 		bool mapPage = true;
4711 
4712 		// check whether there's already a page mapped at the address
4713 		context.map->Lock();
4714 
4715 		phys_addr_t physicalAddress;
4716 		uint32 flags;
4717 		vm_page* mappedPage = NULL;
4718 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4719 			&& (flags & PAGE_PRESENT) != 0
4720 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4721 				!= NULL) {
4722 			// Yep there's already a page. If it's ours, we can simply adjust
4723 			// its protection. Otherwise we have to unmap it.
4724 			if (mappedPage == context.page) {
4725 				context.map->ProtectPage(area, address, newProtection);
4726 					// Note: We assume that ProtectPage() is atomic (i.e.
4727 					// the page isn't temporarily unmapped), otherwise we'd have
4728 					// to make sure it isn't wired.
4729 				mapPage = false;
4730 			} else
4731 				unmapPage = true;
4732 		}
4733 
4734 		context.map->Unlock();
4735 
4736 		if (unmapPage) {
4737 			// If the page is wired, we can't unmap it. Wait until it is unwired
4738 			// again and restart. Note that the page cannot be wired for
4739 			// writing, since it it isn't in the topmost cache. So we can safely
4740 			// ignore ranges wired for writing (our own and other concurrent
4741 			// wiring attempts in progress) and in fact have to do that to avoid
4742 			// a deadlock.
4743 			VMAreaUnwiredWaiter waiter;
4744 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4745 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4746 				// unlock everything and wait
4747 				if (context.pageAllocated) {
4748 					// ... but since we allocated a page and inserted it into
4749 					// the top cache, remove and free it first. Otherwise we'd
4750 					// have a page from a lower cache mapped while an upper
4751 					// cache has a page that would shadow it.
4752 					context.topCache->RemovePage(context.page);
4753 					vm_page_free_etc(context.topCache, context.page,
4754 						&context.reservation);
4755 				} else
4756 					DEBUG_PAGE_ACCESS_END(context.page);
4757 
4758 				context.UnlockAll();
4759 				waiter.waitEntry.Wait();
4760 				continue;
4761 			}
4762 
4763 			// Note: The mapped page is a page of a lower cache. We are
4764 			// guaranteed to have that cached locked, our new page is a copy of
4765 			// that page, and the page is not busy. The logic for that guarantee
4766 			// is as follows: Since the page is mapped, it must live in the top
4767 			// cache (ruled out above) or any of its lower caches, and there is
4768 			// (was before the new page was inserted) no other page in any
4769 			// cache between the top cache and the page's cache (otherwise that
4770 			// would be mapped instead). That in turn means that our algorithm
4771 			// must have found it and therefore it cannot be busy either.
4772 			DEBUG_PAGE_ACCESS_START(mappedPage);
4773 			unmap_page(area, address);
4774 			DEBUG_PAGE_ACCESS_END(mappedPage);
4775 		}
4776 
4777 		if (mapPage) {
4778 			if (map_page(area, context.page, address, newProtection,
4779 					&context.reservation) != B_OK) {
4780 				// Mapping can only fail, when the page mapping object couldn't
4781 				// be allocated. Save for the missing mapping everything is
4782 				// fine, though. If this was a regular page fault, we'll simply
4783 				// leave and probably fault again. To make sure we'll have more
4784 				// luck then, we ensure that the minimum object reserve is
4785 				// available.
4786 				DEBUG_PAGE_ACCESS_END(context.page);
4787 
4788 				context.UnlockAll();
4789 
4790 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4791 						!= B_OK) {
4792 					// Apparently the situation is serious. Let's get ourselves
4793 					// killed.
4794 					status = B_NO_MEMORY;
4795 				} else if (wirePage != NULL) {
4796 					// The caller expects us to wire the page. Since
4797 					// object_cache_reserve() succeeded, we should now be able
4798 					// to allocate a mapping structure. Restart.
4799 					continue;
4800 				}
4801 
4802 				break;
4803 			}
4804 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4805 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4806 
4807 		// also wire the page, if requested
4808 		if (wirePage != NULL && status == B_OK) {
4809 			increment_page_wired_count(context.page);
4810 			*wirePage = context.page;
4811 		}
4812 
4813 		DEBUG_PAGE_ACCESS_END(context.page);
4814 
4815 		break;
4816 	}
4817 
4818 	return status;
4819 }
4820 
4821 
4822 status_t
4823 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4824 {
4825 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4826 }
4827 
4828 status_t
4829 vm_put_physical_page(addr_t vaddr, void* handle)
4830 {
4831 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4832 }
4833 
4834 
4835 status_t
4836 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4837 	void** _handle)
4838 {
4839 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4840 }
4841 
4842 status_t
4843 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4844 {
4845 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4846 }
4847 
4848 
4849 status_t
4850 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4851 {
4852 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4853 }
4854 
4855 status_t
4856 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4857 {
4858 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4859 }
4860 
4861 
4862 void
4863 vm_get_info(system_info* info)
4864 {
4865 	swap_get_info(info);
4866 
4867 	MutexLocker locker(sAvailableMemoryLock);
4868 	info->needed_memory = sNeededMemory;
4869 	info->free_memory = sAvailableMemory;
4870 }
4871 
4872 
4873 uint32
4874 vm_num_page_faults(void)
4875 {
4876 	return sPageFaults;
4877 }
4878 
4879 
4880 off_t
4881 vm_available_memory(void)
4882 {
4883 	MutexLocker locker(sAvailableMemoryLock);
4884 	return sAvailableMemory;
4885 }
4886 
4887 
4888 off_t
4889 vm_available_not_needed_memory(void)
4890 {
4891 	MutexLocker locker(sAvailableMemoryLock);
4892 	return sAvailableMemory - sNeededMemory;
4893 }
4894 
4895 
4896 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4897 	debugger.
4898 */
4899 off_t
4900 vm_available_not_needed_memory_debug(void)
4901 {
4902 	return sAvailableMemory - sNeededMemory;
4903 }
4904 
4905 
4906 size_t
4907 vm_kernel_address_space_left(void)
4908 {
4909 	return VMAddressSpace::Kernel()->FreeSpace();
4910 }
4911 
4912 
4913 void
4914 vm_unreserve_memory(size_t amount)
4915 {
4916 	mutex_lock(&sAvailableMemoryLock);
4917 
4918 	sAvailableMemory += amount;
4919 
4920 	mutex_unlock(&sAvailableMemoryLock);
4921 }
4922 
4923 
4924 status_t
4925 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4926 {
4927 	size_t reserve = kMemoryReserveForPriority[priority];
4928 
4929 	MutexLocker locker(sAvailableMemoryLock);
4930 
4931 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4932 
4933 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4934 		sAvailableMemory -= amount;
4935 		return B_OK;
4936 	}
4937 
4938 	if (timeout <= 0)
4939 		return B_NO_MEMORY;
4940 
4941 	// turn timeout into an absolute timeout
4942 	timeout += system_time();
4943 
4944 	// loop until we've got the memory or the timeout occurs
4945 	do {
4946 		sNeededMemory += amount;
4947 
4948 		// call the low resource manager
4949 		locker.Unlock();
4950 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4951 			B_ABSOLUTE_TIMEOUT, timeout);
4952 		locker.Lock();
4953 
4954 		sNeededMemory -= amount;
4955 
4956 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4957 			sAvailableMemory -= amount;
4958 			return B_OK;
4959 		}
4960 	} while (timeout > system_time());
4961 
4962 	return B_NO_MEMORY;
4963 }
4964 
4965 
4966 status_t
4967 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4968 {
4969 	// NOTE: The caller is responsible for synchronizing calls to this function!
4970 
4971 	AddressSpaceReadLocker locker;
4972 	VMArea* area;
4973 	status_t status = locker.SetFromArea(id, area);
4974 	if (status != B_OK)
4975 		return status;
4976 
4977 	// nothing to do, if the type doesn't change
4978 	uint32 oldType = area->MemoryType();
4979 	if (type == oldType)
4980 		return B_OK;
4981 
4982 	// set the memory type of the area and the mapped pages
4983 	VMTranslationMap* map = area->address_space->TranslationMap();
4984 	map->Lock();
4985 	area->SetMemoryType(type);
4986 	map->ProtectArea(area, area->protection);
4987 	map->Unlock();
4988 
4989 	// set the physical memory type
4990 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4991 	if (error != B_OK) {
4992 		// reset the memory type of the area and the mapped pages
4993 		map->Lock();
4994 		area->SetMemoryType(oldType);
4995 		map->ProtectArea(area, area->protection);
4996 		map->Unlock();
4997 		return error;
4998 	}
4999 
5000 	return B_OK;
5001 
5002 }
5003 
5004 
5005 /*!	This function enforces some protection properties:
5006 	 - kernel areas must be W^X (after kernel startup)
5007 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5008 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5009 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5010 	   and B_KERNEL_WRITE_AREA.
5011 */
5012 static void
5013 fix_protection(uint32* protection)
5014 {
5015 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5016 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5017 			|| (*protection & B_WRITE_AREA) != 0)
5018 		&& !gKernelStartup)
5019 		panic("kernel areas cannot be both writable and executable!");
5020 
5021 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5022 		if ((*protection & B_USER_PROTECTION) == 0
5023 			|| (*protection & B_WRITE_AREA) != 0)
5024 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5025 		else
5026 			*protection |= B_KERNEL_READ_AREA;
5027 	}
5028 }
5029 
5030 
5031 static void
5032 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5033 {
5034 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5035 	info->area = area->id;
5036 	info->address = (void*)area->Base();
5037 	info->size = area->Size();
5038 	info->protection = area->protection;
5039 	info->lock = B_FULL_LOCK;
5040 	info->team = area->address_space->ID();
5041 	info->copy_count = 0;
5042 	info->in_count = 0;
5043 	info->out_count = 0;
5044 		// TODO: retrieve real values here!
5045 
5046 	VMCache* cache = vm_area_get_locked_cache(area);
5047 
5048 	// Note, this is a simplification; the cache could be larger than this area
5049 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5050 
5051 	vm_area_put_locked_cache(cache);
5052 }
5053 
5054 
5055 static status_t
5056 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5057 {
5058 	// is newSize a multiple of B_PAGE_SIZE?
5059 	if (newSize & (B_PAGE_SIZE - 1))
5060 		return B_BAD_VALUE;
5061 
5062 	// lock all affected address spaces and the cache
5063 	VMArea* area;
5064 	VMCache* cache;
5065 
5066 	MultiAddressSpaceLocker locker;
5067 	AreaCacheLocker cacheLocker;
5068 
5069 	status_t status;
5070 	size_t oldSize;
5071 	bool anyKernelArea;
5072 	bool restart;
5073 
5074 	do {
5075 		anyKernelArea = false;
5076 		restart = false;
5077 
5078 		locker.Unset();
5079 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5080 		if (status != B_OK)
5081 			return status;
5082 		cacheLocker.SetTo(cache, true);	// already locked
5083 
5084 		// enforce restrictions
5085 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5086 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5087 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5088 				"resize kernel area %" B_PRId32 " (%s)\n",
5089 				team_get_current_team_id(), areaID, area->name);
5090 			return B_NOT_ALLOWED;
5091 		}
5092 		// TODO: Enforce all restrictions (team, etc.)!
5093 
5094 		oldSize = area->Size();
5095 		if (newSize == oldSize)
5096 			return B_OK;
5097 
5098 		if (cache->type != CACHE_TYPE_RAM)
5099 			return B_NOT_ALLOWED;
5100 
5101 		if (oldSize < newSize) {
5102 			// We need to check if all areas of this cache can be resized.
5103 			for (VMArea* current = cache->areas; current != NULL;
5104 					current = current->cache_next) {
5105 				if (!current->address_space->CanResizeArea(current, newSize))
5106 					return B_ERROR;
5107 				anyKernelArea
5108 					|= current->address_space == VMAddressSpace::Kernel();
5109 			}
5110 		} else {
5111 			// We're shrinking the areas, so we must make sure the affected
5112 			// ranges are not wired.
5113 			for (VMArea* current = cache->areas; current != NULL;
5114 					current = current->cache_next) {
5115 				anyKernelArea
5116 					|= current->address_space == VMAddressSpace::Kernel();
5117 
5118 				if (wait_if_area_range_is_wired(current,
5119 						current->Base() + newSize, oldSize - newSize, &locker,
5120 						&cacheLocker)) {
5121 					restart = true;
5122 					break;
5123 				}
5124 			}
5125 		}
5126 	} while (restart);
5127 
5128 	// Okay, looks good so far, so let's do it
5129 
5130 	int priority = kernel && anyKernelArea
5131 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5132 	uint32 allocationFlags = kernel && anyKernelArea
5133 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5134 
5135 	if (oldSize < newSize) {
5136 		// Growing the cache can fail, so we do it first.
5137 		status = cache->Resize(cache->virtual_base + newSize, priority);
5138 		if (status != B_OK)
5139 			return status;
5140 	}
5141 
5142 	for (VMArea* current = cache->areas; current != NULL;
5143 			current = current->cache_next) {
5144 		status = current->address_space->ResizeArea(current, newSize,
5145 			allocationFlags);
5146 		if (status != B_OK)
5147 			break;
5148 
5149 		// We also need to unmap all pages beyond the new size, if the area has
5150 		// shrunk
5151 		if (newSize < oldSize) {
5152 			VMCacheChainLocker cacheChainLocker(cache);
5153 			cacheChainLocker.LockAllSourceCaches();
5154 
5155 			unmap_pages(current, current->Base() + newSize,
5156 				oldSize - newSize);
5157 
5158 			cacheChainLocker.Unlock(cache);
5159 		}
5160 	}
5161 
5162 	if (status == B_OK) {
5163 		// Shrink or grow individual page protections if in use.
5164 		if (area->page_protections != NULL) {
5165 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5166 			uint8* newProtections
5167 				= (uint8*)realloc(area->page_protections, bytes);
5168 			if (newProtections == NULL)
5169 				status = B_NO_MEMORY;
5170 			else {
5171 				area->page_protections = newProtections;
5172 
5173 				if (oldSize < newSize) {
5174 					// init the additional page protections to that of the area
5175 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5176 					uint32 areaProtection = area->protection
5177 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5178 					memset(area->page_protections + offset,
5179 						areaProtection | (areaProtection << 4), bytes - offset);
5180 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5181 						uint8& entry = area->page_protections[offset - 1];
5182 						entry = (entry & 0x0f) | (areaProtection << 4);
5183 					}
5184 				}
5185 			}
5186 		}
5187 	}
5188 
5189 	// shrinking the cache can't fail, so we do it now
5190 	if (status == B_OK && newSize < oldSize)
5191 		status = cache->Resize(cache->virtual_base + newSize, priority);
5192 
5193 	if (status != B_OK) {
5194 		// Something failed -- resize the areas back to their original size.
5195 		// This can fail, too, in which case we're seriously screwed.
5196 		for (VMArea* current = cache->areas; current != NULL;
5197 				current = current->cache_next) {
5198 			if (current->address_space->ResizeArea(current, oldSize,
5199 					allocationFlags) != B_OK) {
5200 				panic("vm_resize_area(): Failed and not being able to restore "
5201 					"original state.");
5202 			}
5203 		}
5204 
5205 		cache->Resize(cache->virtual_base + oldSize, priority);
5206 	}
5207 
5208 	// TODO: we must honour the lock restrictions of this area
5209 	return status;
5210 }
5211 
5212 
5213 status_t
5214 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5215 {
5216 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5217 }
5218 
5219 
5220 status_t
5221 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5222 {
5223 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5224 }
5225 
5226 
5227 status_t
5228 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5229 	bool user)
5230 {
5231 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5232 }
5233 
5234 
5235 void
5236 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5237 {
5238 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5239 }
5240 
5241 
5242 /*!	Copies a range of memory directly from/to a page that might not be mapped
5243 	at the moment.
5244 
5245 	For \a unsafeMemory the current mapping (if any is ignored). The function
5246 	walks through the respective area's cache chain to find the physical page
5247 	and copies from/to it directly.
5248 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5249 	must not cross a page boundary.
5250 
5251 	\param teamID The team ID identifying the address space \a unsafeMemory is
5252 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5253 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5254 		is passed, the address space of the thread returned by
5255 		debug_get_debugged_thread() is used.
5256 	\param unsafeMemory The start of the unsafe memory range to be copied
5257 		from/to.
5258 	\param buffer A safely accessible kernel buffer to be copied from/to.
5259 	\param size The number of bytes to be copied.
5260 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5261 		\a unsafeMemory, the other way around otherwise.
5262 */
5263 status_t
5264 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5265 	size_t size, bool copyToUnsafe)
5266 {
5267 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5268 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5269 		return B_BAD_VALUE;
5270 	}
5271 
5272 	// get the address space for the debugged thread
5273 	VMAddressSpace* addressSpace;
5274 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5275 		addressSpace = VMAddressSpace::Kernel();
5276 	} else if (teamID == B_CURRENT_TEAM) {
5277 		Thread* thread = debug_get_debugged_thread();
5278 		if (thread == NULL || thread->team == NULL)
5279 			return B_BAD_ADDRESS;
5280 
5281 		addressSpace = thread->team->address_space;
5282 	} else
5283 		addressSpace = VMAddressSpace::DebugGet(teamID);
5284 
5285 	if (addressSpace == NULL)
5286 		return B_BAD_ADDRESS;
5287 
5288 	// get the area
5289 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5290 	if (area == NULL)
5291 		return B_BAD_ADDRESS;
5292 
5293 	// search the page
5294 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5295 		+ area->cache_offset;
5296 	VMCache* cache = area->cache;
5297 	vm_page* page = NULL;
5298 	while (cache != NULL) {
5299 		page = cache->DebugLookupPage(cacheOffset);
5300 		if (page != NULL)
5301 			break;
5302 
5303 		// Page not found in this cache -- if it is paged out, we must not try
5304 		// to get it from lower caches.
5305 		if (cache->DebugHasPage(cacheOffset))
5306 			break;
5307 
5308 		cache = cache->source;
5309 	}
5310 
5311 	if (page == NULL)
5312 		return B_UNSUPPORTED;
5313 
5314 	// copy from/to physical memory
5315 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5316 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5317 
5318 	if (copyToUnsafe) {
5319 		if (page->Cache() != area->cache)
5320 			return B_UNSUPPORTED;
5321 
5322 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5323 	}
5324 
5325 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5326 }
5327 
5328 
5329 static inline bool
5330 validate_user_range(const void* addr, size_t size)
5331 {
5332 	addr_t address = (addr_t)addr;
5333 
5334 	// Check for overflows on all addresses.
5335 	if ((address + size) < address)
5336 		return false;
5337 
5338 	// Validate that the address does not cross the kernel/user boundary.
5339 	if (IS_USER_ADDRESS(address))
5340 		return IS_USER_ADDRESS(address + size);
5341 	else
5342 		return !IS_USER_ADDRESS(address + size);
5343 }
5344 
5345 
5346 //	#pragma mark - kernel public API
5347 
5348 
5349 status_t
5350 user_memcpy(void* to, const void* from, size_t size)
5351 {
5352 	if (!validate_user_range(to, size) || !validate_user_range(from, size))
5353 		return B_BAD_ADDRESS;
5354 
5355 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5356 		return B_BAD_ADDRESS;
5357 
5358 	return B_OK;
5359 }
5360 
5361 
5362 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5363 	the string in \a to, NULL-terminating the result.
5364 
5365 	\param to Pointer to the destination C-string.
5366 	\param from Pointer to the source C-string.
5367 	\param size Size in bytes of the string buffer pointed to by \a to.
5368 
5369 	\return strlen(\a from).
5370 */
5371 ssize_t
5372 user_strlcpy(char* to, const char* from, size_t size)
5373 {
5374 	if (to == NULL && size != 0)
5375 		return B_BAD_VALUE;
5376 	if (from == NULL)
5377 		return B_BAD_ADDRESS;
5378 
5379 	// Protect the source address from overflows.
5380 	size_t maxSize = size;
5381 	if ((addr_t)from + maxSize < (addr_t)from)
5382 		maxSize -= (addr_t)from + maxSize;
5383 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5384 		maxSize = USER_TOP - (addr_t)from;
5385 
5386 	if (!validate_user_range(to, maxSize))
5387 		return B_BAD_ADDRESS;
5388 
5389 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5390 	if (result < 0)
5391 		return result;
5392 
5393 	// If we hit the address overflow boundary, fail.
5394 	if ((size_t)result >= maxSize && maxSize < size)
5395 		return B_BAD_ADDRESS;
5396 
5397 	return result;
5398 }
5399 
5400 
5401 status_t
5402 user_memset(void* s, char c, size_t count)
5403 {
5404 	if (!validate_user_range(s, count))
5405 		return B_BAD_ADDRESS;
5406 
5407 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5408 		return B_BAD_ADDRESS;
5409 
5410 	return B_OK;
5411 }
5412 
5413 
5414 /*!	Wires a single page at the given address.
5415 
5416 	\param team The team whose address space the address belongs to. Supports
5417 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5418 		parameter is ignored.
5419 	\param address address The virtual address to wire down. Does not need to
5420 		be page aligned.
5421 	\param writable If \c true the page shall be writable.
5422 	\param info On success the info is filled in, among other things
5423 		containing the physical address the given virtual one translates to.
5424 	\return \c B_OK, when the page could be wired, another error code otherwise.
5425 */
5426 status_t
5427 vm_wire_page(team_id team, addr_t address, bool writable,
5428 	VMPageWiringInfo* info)
5429 {
5430 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5431 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5432 
5433 	// compute the page protection that is required
5434 	bool isUser = IS_USER_ADDRESS(address);
5435 	uint32 requiredProtection = PAGE_PRESENT
5436 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5437 	if (writable)
5438 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5439 
5440 	// get and read lock the address space
5441 	VMAddressSpace* addressSpace = NULL;
5442 	if (isUser) {
5443 		if (team == B_CURRENT_TEAM)
5444 			addressSpace = VMAddressSpace::GetCurrent();
5445 		else
5446 			addressSpace = VMAddressSpace::Get(team);
5447 	} else
5448 		addressSpace = VMAddressSpace::GetKernel();
5449 	if (addressSpace == NULL)
5450 		return B_ERROR;
5451 
5452 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5453 
5454 	VMTranslationMap* map = addressSpace->TranslationMap();
5455 	status_t error = B_OK;
5456 
5457 	// get the area
5458 	VMArea* area = addressSpace->LookupArea(pageAddress);
5459 	if (area == NULL) {
5460 		addressSpace->Put();
5461 		return B_BAD_ADDRESS;
5462 	}
5463 
5464 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5465 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5466 
5467 	// mark the area range wired
5468 	area->Wire(&info->range);
5469 
5470 	// Lock the area's cache chain and the translation map. Needed to look
5471 	// up the page and play with its wired count.
5472 	cacheChainLocker.LockAllSourceCaches();
5473 	map->Lock();
5474 
5475 	phys_addr_t physicalAddress;
5476 	uint32 flags;
5477 	vm_page* page;
5478 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5479 		&& (flags & requiredProtection) == requiredProtection
5480 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5481 			!= NULL) {
5482 		// Already mapped with the correct permissions -- just increment
5483 		// the page's wired count.
5484 		increment_page_wired_count(page);
5485 
5486 		map->Unlock();
5487 		cacheChainLocker.Unlock();
5488 		addressSpaceLocker.Unlock();
5489 	} else {
5490 		// Let vm_soft_fault() map the page for us, if possible. We need
5491 		// to fully unlock to avoid deadlocks. Since we have already
5492 		// wired the area itself, nothing disturbing will happen with it
5493 		// in the meantime.
5494 		map->Unlock();
5495 		cacheChainLocker.Unlock();
5496 		addressSpaceLocker.Unlock();
5497 
5498 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5499 			isUser, &page);
5500 
5501 		if (error != B_OK) {
5502 			// The page could not be mapped -- clean up.
5503 			VMCache* cache = vm_area_get_locked_cache(area);
5504 			area->Unwire(&info->range);
5505 			cache->ReleaseRefAndUnlock();
5506 			addressSpace->Put();
5507 			return error;
5508 		}
5509 	}
5510 
5511 	info->physicalAddress
5512 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5513 			+ address % B_PAGE_SIZE;
5514 	info->page = page;
5515 
5516 	return B_OK;
5517 }
5518 
5519 
5520 /*!	Unwires a single page previously wired via vm_wire_page().
5521 
5522 	\param info The same object passed to vm_wire_page() before.
5523 */
5524 void
5525 vm_unwire_page(VMPageWiringInfo* info)
5526 {
5527 	// lock the address space
5528 	VMArea* area = info->range.area;
5529 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5530 		// takes over our reference
5531 
5532 	// lock the top cache
5533 	VMCache* cache = vm_area_get_locked_cache(area);
5534 	VMCacheChainLocker cacheChainLocker(cache);
5535 
5536 	if (info->page->Cache() != cache) {
5537 		// The page is not in the top cache, so we lock the whole cache chain
5538 		// before touching the page's wired count.
5539 		cacheChainLocker.LockAllSourceCaches();
5540 	}
5541 
5542 	decrement_page_wired_count(info->page);
5543 
5544 	// remove the wired range from the range
5545 	area->Unwire(&info->range);
5546 
5547 	cacheChainLocker.Unlock();
5548 }
5549 
5550 
5551 /*!	Wires down the given address range in the specified team's address space.
5552 
5553 	If successful the function
5554 	- acquires a reference to the specified team's address space,
5555 	- adds respective wired ranges to all areas that intersect with the given
5556 	  address range,
5557 	- makes sure all pages in the given address range are mapped with the
5558 	  requested access permissions and increments their wired count.
5559 
5560 	It fails, when \a team doesn't specify a valid address space, when any part
5561 	of the specified address range is not covered by areas, when the concerned
5562 	areas don't allow mapping with the requested permissions, or when mapping
5563 	failed for another reason.
5564 
5565 	When successful the call must be balanced by a unlock_memory_etc() call with
5566 	the exact same parameters.
5567 
5568 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5569 		supported.
5570 	\param address The start of the address range to be wired.
5571 	\param numBytes The size of the address range to be wired.
5572 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5573 		requests that the range must be wired writable ("read from device
5574 		into memory").
5575 	\return \c B_OK on success, another error code otherwise.
5576 */
5577 status_t
5578 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5579 {
5580 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5581 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5582 
5583 	// compute the page protection that is required
5584 	bool isUser = IS_USER_ADDRESS(address);
5585 	bool writable = (flags & B_READ_DEVICE) == 0;
5586 	uint32 requiredProtection = PAGE_PRESENT
5587 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5588 	if (writable)
5589 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5590 
5591 	uint32 mallocFlags = isUser
5592 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5593 
5594 	// get and read lock the address space
5595 	VMAddressSpace* addressSpace = NULL;
5596 	if (isUser) {
5597 		if (team == B_CURRENT_TEAM)
5598 			addressSpace = VMAddressSpace::GetCurrent();
5599 		else
5600 			addressSpace = VMAddressSpace::Get(team);
5601 	} else
5602 		addressSpace = VMAddressSpace::GetKernel();
5603 	if (addressSpace == NULL)
5604 		return B_ERROR;
5605 
5606 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5607 		// We get a new address space reference here. The one we got above will
5608 		// be freed by unlock_memory_etc().
5609 
5610 	VMTranslationMap* map = addressSpace->TranslationMap();
5611 	status_t error = B_OK;
5612 
5613 	// iterate through all concerned areas
5614 	addr_t nextAddress = lockBaseAddress;
5615 	while (nextAddress != lockEndAddress) {
5616 		// get the next area
5617 		VMArea* area = addressSpace->LookupArea(nextAddress);
5618 		if (area == NULL) {
5619 			error = B_BAD_ADDRESS;
5620 			break;
5621 		}
5622 
5623 		addr_t areaStart = nextAddress;
5624 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5625 
5626 		// allocate the wired range (do that before locking the cache to avoid
5627 		// deadlocks)
5628 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5629 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5630 		if (range == NULL) {
5631 			error = B_NO_MEMORY;
5632 			break;
5633 		}
5634 
5635 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5636 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5637 
5638 		// mark the area range wired
5639 		area->Wire(range);
5640 
5641 		// Depending on the area cache type and the wiring, we may not need to
5642 		// look at the individual pages.
5643 		if (area->cache_type == CACHE_TYPE_NULL
5644 			|| area->cache_type == CACHE_TYPE_DEVICE
5645 			|| area->wiring == B_FULL_LOCK
5646 			|| area->wiring == B_CONTIGUOUS) {
5647 			nextAddress = areaEnd;
5648 			continue;
5649 		}
5650 
5651 		// Lock the area's cache chain and the translation map. Needed to look
5652 		// up pages and play with their wired count.
5653 		cacheChainLocker.LockAllSourceCaches();
5654 		map->Lock();
5655 
5656 		// iterate through the pages and wire them
5657 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5658 			phys_addr_t physicalAddress;
5659 			uint32 flags;
5660 
5661 			vm_page* page;
5662 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5663 				&& (flags & requiredProtection) == requiredProtection
5664 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5665 					!= NULL) {
5666 				// Already mapped with the correct permissions -- just increment
5667 				// the page's wired count.
5668 				increment_page_wired_count(page);
5669 			} else {
5670 				// Let vm_soft_fault() map the page for us, if possible. We need
5671 				// to fully unlock to avoid deadlocks. Since we have already
5672 				// wired the area itself, nothing disturbing will happen with it
5673 				// in the meantime.
5674 				map->Unlock();
5675 				cacheChainLocker.Unlock();
5676 				addressSpaceLocker.Unlock();
5677 
5678 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5679 					false, isUser, &page);
5680 
5681 				addressSpaceLocker.Lock();
5682 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5683 				cacheChainLocker.LockAllSourceCaches();
5684 				map->Lock();
5685 			}
5686 
5687 			if (error != B_OK)
5688 				break;
5689 		}
5690 
5691 		map->Unlock();
5692 
5693 		if (error == B_OK) {
5694 			cacheChainLocker.Unlock();
5695 		} else {
5696 			// An error occurred, so abort right here. If the current address
5697 			// is the first in this area, unwire the area, since we won't get
5698 			// to it when reverting what we've done so far.
5699 			if (nextAddress == areaStart) {
5700 				area->Unwire(range);
5701 				cacheChainLocker.Unlock();
5702 				range->~VMAreaWiredRange();
5703 				free_etc(range, mallocFlags);
5704 			} else
5705 				cacheChainLocker.Unlock();
5706 
5707 			break;
5708 		}
5709 	}
5710 
5711 	if (error != B_OK) {
5712 		// An error occurred, so unwire all that we've already wired. Note that
5713 		// even if not a single page was wired, unlock_memory_etc() is called
5714 		// to put the address space reference.
5715 		addressSpaceLocker.Unlock();
5716 		unlock_memory_etc(team, (void*)lockBaseAddress,
5717 			nextAddress - lockBaseAddress, flags);
5718 	}
5719 
5720 	return error;
5721 }
5722 
5723 
5724 status_t
5725 lock_memory(void* address, size_t numBytes, uint32 flags)
5726 {
5727 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5728 }
5729 
5730 
5731 /*!	Unwires an address range previously wired with lock_memory_etc().
5732 
5733 	Note that a call to this function must balance a previous lock_memory_etc()
5734 	call with exactly the same parameters.
5735 */
5736 status_t
5737 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5738 {
5739 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5740 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5741 
5742 	// compute the page protection that is required
5743 	bool isUser = IS_USER_ADDRESS(address);
5744 	bool writable = (flags & B_READ_DEVICE) == 0;
5745 	uint32 requiredProtection = PAGE_PRESENT
5746 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5747 	if (writable)
5748 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5749 
5750 	uint32 mallocFlags = isUser
5751 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5752 
5753 	// get and read lock the address space
5754 	VMAddressSpace* addressSpace = NULL;
5755 	if (isUser) {
5756 		if (team == B_CURRENT_TEAM)
5757 			addressSpace = VMAddressSpace::GetCurrent();
5758 		else
5759 			addressSpace = VMAddressSpace::Get(team);
5760 	} else
5761 		addressSpace = VMAddressSpace::GetKernel();
5762 	if (addressSpace == NULL)
5763 		return B_ERROR;
5764 
5765 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5766 		// Take over the address space reference. We don't unlock until we're
5767 		// done.
5768 
5769 	VMTranslationMap* map = addressSpace->TranslationMap();
5770 	status_t error = B_OK;
5771 
5772 	// iterate through all concerned areas
5773 	addr_t nextAddress = lockBaseAddress;
5774 	while (nextAddress != lockEndAddress) {
5775 		// get the next area
5776 		VMArea* area = addressSpace->LookupArea(nextAddress);
5777 		if (area == NULL) {
5778 			error = B_BAD_ADDRESS;
5779 			break;
5780 		}
5781 
5782 		addr_t areaStart = nextAddress;
5783 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5784 
5785 		// Lock the area's top cache. This is a requirement for
5786 		// VMArea::Unwire().
5787 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5788 
5789 		// Depending on the area cache type and the wiring, we may not need to
5790 		// look at the individual pages.
5791 		if (area->cache_type == CACHE_TYPE_NULL
5792 			|| area->cache_type == CACHE_TYPE_DEVICE
5793 			|| area->wiring == B_FULL_LOCK
5794 			|| area->wiring == B_CONTIGUOUS) {
5795 			// unwire the range (to avoid deadlocks we delete the range after
5796 			// unlocking the cache)
5797 			nextAddress = areaEnd;
5798 			VMAreaWiredRange* range = area->Unwire(areaStart,
5799 				areaEnd - areaStart, writable);
5800 			cacheChainLocker.Unlock();
5801 			if (range != NULL) {
5802 				range->~VMAreaWiredRange();
5803 				free_etc(range, mallocFlags);
5804 			}
5805 			continue;
5806 		}
5807 
5808 		// Lock the area's cache chain and the translation map. Needed to look
5809 		// up pages and play with their wired count.
5810 		cacheChainLocker.LockAllSourceCaches();
5811 		map->Lock();
5812 
5813 		// iterate through the pages and unwire them
5814 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5815 			phys_addr_t physicalAddress;
5816 			uint32 flags;
5817 
5818 			vm_page* page;
5819 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5820 				&& (flags & PAGE_PRESENT) != 0
5821 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5822 					!= NULL) {
5823 				// Already mapped with the correct permissions -- just increment
5824 				// the page's wired count.
5825 				decrement_page_wired_count(page);
5826 			} else {
5827 				panic("unlock_memory_etc(): Failed to unwire page: address "
5828 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5829 					nextAddress);
5830 				error = B_BAD_VALUE;
5831 				break;
5832 			}
5833 		}
5834 
5835 		map->Unlock();
5836 
5837 		// All pages are unwired. Remove the area's wired range as well (to
5838 		// avoid deadlocks we delete the range after unlocking the cache).
5839 		VMAreaWiredRange* range = area->Unwire(areaStart,
5840 			areaEnd - areaStart, writable);
5841 
5842 		cacheChainLocker.Unlock();
5843 
5844 		if (range != NULL) {
5845 			range->~VMAreaWiredRange();
5846 			free_etc(range, mallocFlags);
5847 		}
5848 
5849 		if (error != B_OK)
5850 			break;
5851 	}
5852 
5853 	// get rid of the address space reference lock_memory_etc() acquired
5854 	addressSpace->Put();
5855 
5856 	return error;
5857 }
5858 
5859 
5860 status_t
5861 unlock_memory(void* address, size_t numBytes, uint32 flags)
5862 {
5863 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5864 }
5865 
5866 
5867 /*!	Similar to get_memory_map(), but also allows to specify the address space
5868 	for the memory in question and has a saner semantics.
5869 	Returns \c B_OK when the complete range could be translated or
5870 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5871 	case the actual number of entries is written to \c *_numEntries. Any other
5872 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5873 	in this case.
5874 */
5875 status_t
5876 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5877 	physical_entry* table, uint32* _numEntries)
5878 {
5879 	uint32 numEntries = *_numEntries;
5880 	*_numEntries = 0;
5881 
5882 	VMAddressSpace* addressSpace;
5883 	addr_t virtualAddress = (addr_t)address;
5884 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5885 	phys_addr_t physicalAddress;
5886 	status_t status = B_OK;
5887 	int32 index = -1;
5888 	addr_t offset = 0;
5889 	bool interrupts = are_interrupts_enabled();
5890 
5891 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5892 		"entries)\n", team, address, numBytes, numEntries));
5893 
5894 	if (numEntries == 0 || numBytes == 0)
5895 		return B_BAD_VALUE;
5896 
5897 	// in which address space is the address to be found?
5898 	if (IS_USER_ADDRESS(virtualAddress)) {
5899 		if (team == B_CURRENT_TEAM)
5900 			addressSpace = VMAddressSpace::GetCurrent();
5901 		else
5902 			addressSpace = VMAddressSpace::Get(team);
5903 	} else
5904 		addressSpace = VMAddressSpace::GetKernel();
5905 
5906 	if (addressSpace == NULL)
5907 		return B_ERROR;
5908 
5909 	VMTranslationMap* map = addressSpace->TranslationMap();
5910 
5911 	if (interrupts)
5912 		map->Lock();
5913 
5914 	while (offset < numBytes) {
5915 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5916 		uint32 flags;
5917 
5918 		if (interrupts) {
5919 			status = map->Query((addr_t)address + offset, &physicalAddress,
5920 				&flags);
5921 		} else {
5922 			status = map->QueryInterrupt((addr_t)address + offset,
5923 				&physicalAddress, &flags);
5924 		}
5925 		if (status < B_OK)
5926 			break;
5927 		if ((flags & PAGE_PRESENT) == 0) {
5928 			panic("get_memory_map() called on unmapped memory!");
5929 			return B_BAD_ADDRESS;
5930 		}
5931 
5932 		if (index < 0 && pageOffset > 0) {
5933 			physicalAddress += pageOffset;
5934 			if (bytes > B_PAGE_SIZE - pageOffset)
5935 				bytes = B_PAGE_SIZE - pageOffset;
5936 		}
5937 
5938 		// need to switch to the next physical_entry?
5939 		if (index < 0 || table[index].address
5940 				!= physicalAddress - table[index].size) {
5941 			if ((uint32)++index + 1 > numEntries) {
5942 				// table to small
5943 				break;
5944 			}
5945 			table[index].address = physicalAddress;
5946 			table[index].size = bytes;
5947 		} else {
5948 			// page does fit in current entry
5949 			table[index].size += bytes;
5950 		}
5951 
5952 		offset += bytes;
5953 	}
5954 
5955 	if (interrupts)
5956 		map->Unlock();
5957 
5958 	if (status != B_OK)
5959 		return status;
5960 
5961 	if ((uint32)index + 1 > numEntries) {
5962 		*_numEntries = index;
5963 		return B_BUFFER_OVERFLOW;
5964 	}
5965 
5966 	*_numEntries = index + 1;
5967 	return B_OK;
5968 }
5969 
5970 
5971 /*!	According to the BeBook, this function should always succeed.
5972 	This is no longer the case.
5973 */
5974 extern "C" int32
5975 __get_memory_map_haiku(const void* address, size_t numBytes,
5976 	physical_entry* table, int32 numEntries)
5977 {
5978 	uint32 entriesRead = numEntries;
5979 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5980 		table, &entriesRead);
5981 	if (error != B_OK)
5982 		return error;
5983 
5984 	// close the entry list
5985 
5986 	// if it's only one entry, we will silently accept the missing ending
5987 	if (numEntries == 1)
5988 		return B_OK;
5989 
5990 	if (entriesRead + 1 > (uint32)numEntries)
5991 		return B_BUFFER_OVERFLOW;
5992 
5993 	table[entriesRead].address = 0;
5994 	table[entriesRead].size = 0;
5995 
5996 	return B_OK;
5997 }
5998 
5999 
6000 area_id
6001 area_for(void* address)
6002 {
6003 	return vm_area_for((addr_t)address, true);
6004 }
6005 
6006 
6007 area_id
6008 find_area(const char* name)
6009 {
6010 	return VMAreaHash::Find(name);
6011 }
6012 
6013 
6014 status_t
6015 _get_area_info(area_id id, area_info* info, size_t size)
6016 {
6017 	if (size != sizeof(area_info) || info == NULL)
6018 		return B_BAD_VALUE;
6019 
6020 	AddressSpaceReadLocker locker;
6021 	VMArea* area;
6022 	status_t status = locker.SetFromArea(id, area);
6023 	if (status != B_OK)
6024 		return status;
6025 
6026 	fill_area_info(area, info, size);
6027 	return B_OK;
6028 }
6029 
6030 
6031 status_t
6032 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6033 {
6034 	addr_t nextBase = *(addr_t*)cookie;
6035 
6036 	// we're already through the list
6037 	if (nextBase == (addr_t)-1)
6038 		return B_ENTRY_NOT_FOUND;
6039 
6040 	if (team == B_CURRENT_TEAM)
6041 		team = team_get_current_team_id();
6042 
6043 	AddressSpaceReadLocker locker(team);
6044 	if (!locker.IsLocked())
6045 		return B_BAD_TEAM_ID;
6046 
6047 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6048 	if (area == NULL) {
6049 		nextBase = (addr_t)-1;
6050 		return B_ENTRY_NOT_FOUND;
6051 	}
6052 
6053 	fill_area_info(area, info, size);
6054 	*cookie = (ssize_t)(area->Base() + 1);
6055 
6056 	return B_OK;
6057 }
6058 
6059 
6060 status_t
6061 set_area_protection(area_id area, uint32 newProtection)
6062 {
6063 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6064 		newProtection, true);
6065 }
6066 
6067 
6068 status_t
6069 resize_area(area_id areaID, size_t newSize)
6070 {
6071 	return vm_resize_area(areaID, newSize, true);
6072 }
6073 
6074 
6075 /*!	Transfers the specified area to a new team. The caller must be the owner
6076 	of the area.
6077 */
6078 area_id
6079 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6080 	bool kernel)
6081 {
6082 	area_info info;
6083 	status_t status = get_area_info(id, &info);
6084 	if (status != B_OK)
6085 		return status;
6086 
6087 	if (info.team != thread_get_current_thread()->team->id)
6088 		return B_PERMISSION_DENIED;
6089 
6090 	// We need to mark the area cloneable so the following operations work.
6091 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6092 	if (status != B_OK)
6093 		return status;
6094 
6095 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6096 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6097 	if (clonedArea < 0)
6098 		return clonedArea;
6099 
6100 	status = vm_delete_area(info.team, id, kernel);
6101 	if (status != B_OK) {
6102 		vm_delete_area(target, clonedArea, kernel);
6103 		return status;
6104 	}
6105 
6106 	// Now we can reset the protection to whatever it was before.
6107 	set_area_protection(clonedArea, info.protection);
6108 
6109 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6110 
6111 	return clonedArea;
6112 }
6113 
6114 
6115 extern "C" area_id
6116 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6117 	size_t numBytes, uint32 addressSpec, uint32 protection,
6118 	void** _virtualAddress)
6119 {
6120 	if (!arch_vm_supports_protection(protection))
6121 		return B_NOT_SUPPORTED;
6122 
6123 	fix_protection(&protection);
6124 
6125 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6126 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6127 		false);
6128 }
6129 
6130 
6131 area_id
6132 clone_area(const char* name, void** _address, uint32 addressSpec,
6133 	uint32 protection, area_id source)
6134 {
6135 	if ((protection & B_KERNEL_PROTECTION) == 0)
6136 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6137 
6138 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6139 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6140 }
6141 
6142 
6143 area_id
6144 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6145 	uint32 protection, uint32 flags, uint32 guardSize,
6146 	const virtual_address_restrictions* virtualAddressRestrictions,
6147 	const physical_address_restrictions* physicalAddressRestrictions,
6148 	void** _address)
6149 {
6150 	fix_protection(&protection);
6151 
6152 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6153 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6154 		true, _address);
6155 }
6156 
6157 
6158 extern "C" area_id
6159 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6160 	size_t size, uint32 lock, uint32 protection)
6161 {
6162 	fix_protection(&protection);
6163 
6164 	virtual_address_restrictions virtualRestrictions = {};
6165 	virtualRestrictions.address = *_address;
6166 	virtualRestrictions.address_specification = addressSpec;
6167 	physical_address_restrictions physicalRestrictions = {};
6168 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6169 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6170 		true, _address);
6171 }
6172 
6173 
6174 status_t
6175 delete_area(area_id area)
6176 {
6177 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6178 }
6179 
6180 
6181 //	#pragma mark - Userland syscalls
6182 
6183 
6184 status_t
6185 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6186 	addr_t size)
6187 {
6188 	// filter out some unavailable values (for userland)
6189 	switch (addressSpec) {
6190 		case B_ANY_KERNEL_ADDRESS:
6191 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6192 			return B_BAD_VALUE;
6193 	}
6194 
6195 	addr_t address;
6196 
6197 	if (!IS_USER_ADDRESS(userAddress)
6198 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6199 		return B_BAD_ADDRESS;
6200 
6201 	status_t status = vm_reserve_address_range(
6202 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6203 		RESERVED_AVOID_BASE);
6204 	if (status != B_OK)
6205 		return status;
6206 
6207 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6208 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6209 			(void*)address, size);
6210 		return B_BAD_ADDRESS;
6211 	}
6212 
6213 	return B_OK;
6214 }
6215 
6216 
6217 status_t
6218 _user_unreserve_address_range(addr_t address, addr_t size)
6219 {
6220 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6221 		(void*)address, size);
6222 }
6223 
6224 
6225 area_id
6226 _user_area_for(void* address)
6227 {
6228 	return vm_area_for((addr_t)address, false);
6229 }
6230 
6231 
6232 area_id
6233 _user_find_area(const char* userName)
6234 {
6235 	char name[B_OS_NAME_LENGTH];
6236 
6237 	if (!IS_USER_ADDRESS(userName)
6238 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6239 		return B_BAD_ADDRESS;
6240 
6241 	return find_area(name);
6242 }
6243 
6244 
6245 status_t
6246 _user_get_area_info(area_id area, area_info* userInfo)
6247 {
6248 	if (!IS_USER_ADDRESS(userInfo))
6249 		return B_BAD_ADDRESS;
6250 
6251 	area_info info;
6252 	status_t status = get_area_info(area, &info);
6253 	if (status < B_OK)
6254 		return status;
6255 
6256 	// TODO: do we want to prevent userland from seeing kernel protections?
6257 	//info.protection &= B_USER_PROTECTION;
6258 
6259 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6260 		return B_BAD_ADDRESS;
6261 
6262 	return status;
6263 }
6264 
6265 
6266 status_t
6267 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6268 {
6269 	ssize_t cookie;
6270 
6271 	if (!IS_USER_ADDRESS(userCookie)
6272 		|| !IS_USER_ADDRESS(userInfo)
6273 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6274 		return B_BAD_ADDRESS;
6275 
6276 	area_info info;
6277 	status_t status = _get_next_area_info(team, &cookie, &info,
6278 		sizeof(area_info));
6279 	if (status != B_OK)
6280 		return status;
6281 
6282 	//info.protection &= B_USER_PROTECTION;
6283 
6284 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6285 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6286 		return B_BAD_ADDRESS;
6287 
6288 	return status;
6289 }
6290 
6291 
6292 status_t
6293 _user_set_area_protection(area_id area, uint32 newProtection)
6294 {
6295 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6296 		return B_BAD_VALUE;
6297 
6298 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6299 		newProtection, false);
6300 }
6301 
6302 
6303 status_t
6304 _user_resize_area(area_id area, size_t newSize)
6305 {
6306 	// TODO: Since we restrict deleting of areas to those owned by the team,
6307 	// we should also do that for resizing (check other functions, too).
6308 	return vm_resize_area(area, newSize, false);
6309 }
6310 
6311 
6312 area_id
6313 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6314 	team_id target)
6315 {
6316 	// filter out some unavailable values (for userland)
6317 	switch (addressSpec) {
6318 		case B_ANY_KERNEL_ADDRESS:
6319 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6320 			return B_BAD_VALUE;
6321 	}
6322 
6323 	void* address;
6324 	if (!IS_USER_ADDRESS(userAddress)
6325 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6326 		return B_BAD_ADDRESS;
6327 
6328 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6329 	if (newArea < B_OK)
6330 		return newArea;
6331 
6332 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6333 		return B_BAD_ADDRESS;
6334 
6335 	return newArea;
6336 }
6337 
6338 
6339 area_id
6340 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6341 	uint32 protection, area_id sourceArea)
6342 {
6343 	char name[B_OS_NAME_LENGTH];
6344 	void* address;
6345 
6346 	// filter out some unavailable values (for userland)
6347 	switch (addressSpec) {
6348 		case B_ANY_KERNEL_ADDRESS:
6349 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6350 			return B_BAD_VALUE;
6351 	}
6352 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6353 		return B_BAD_VALUE;
6354 
6355 	if (!IS_USER_ADDRESS(userName)
6356 		|| !IS_USER_ADDRESS(userAddress)
6357 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6358 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6359 		return B_BAD_ADDRESS;
6360 
6361 	fix_protection(&protection);
6362 
6363 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6364 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6365 		false);
6366 	if (clonedArea < B_OK)
6367 		return clonedArea;
6368 
6369 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6370 		delete_area(clonedArea);
6371 		return B_BAD_ADDRESS;
6372 	}
6373 
6374 	return clonedArea;
6375 }
6376 
6377 
6378 area_id
6379 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6380 	size_t size, uint32 lock, uint32 protection)
6381 {
6382 	char name[B_OS_NAME_LENGTH];
6383 	void* address;
6384 
6385 	// filter out some unavailable values (for userland)
6386 	switch (addressSpec) {
6387 		case B_ANY_KERNEL_ADDRESS:
6388 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6389 			return B_BAD_VALUE;
6390 	}
6391 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6392 		return B_BAD_VALUE;
6393 
6394 	if (!IS_USER_ADDRESS(userName)
6395 		|| !IS_USER_ADDRESS(userAddress)
6396 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6397 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6398 		return B_BAD_ADDRESS;
6399 
6400 	if (addressSpec == B_EXACT_ADDRESS
6401 		&& IS_KERNEL_ADDRESS(address))
6402 		return B_BAD_VALUE;
6403 
6404 	if (addressSpec == B_ANY_ADDRESS)
6405 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6406 	if (addressSpec == B_BASE_ADDRESS)
6407 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6408 
6409 	fix_protection(&protection);
6410 
6411 	virtual_address_restrictions virtualRestrictions = {};
6412 	virtualRestrictions.address = address;
6413 	virtualRestrictions.address_specification = addressSpec;
6414 	physical_address_restrictions physicalRestrictions = {};
6415 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6416 		size, lock, protection, 0, 0, &virtualRestrictions,
6417 		&physicalRestrictions, false, &address);
6418 
6419 	if (area >= B_OK
6420 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6421 		delete_area(area);
6422 		return B_BAD_ADDRESS;
6423 	}
6424 
6425 	return area;
6426 }
6427 
6428 
6429 status_t
6430 _user_delete_area(area_id area)
6431 {
6432 	// Unlike the BeOS implementation, you can now only delete areas
6433 	// that you have created yourself from userland.
6434 	// The documentation to delete_area() explicitly states that this
6435 	// will be restricted in the future, and so it will.
6436 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6437 }
6438 
6439 
6440 // TODO: create a BeOS style call for this!
6441 
6442 area_id
6443 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6444 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6445 	int fd, off_t offset)
6446 {
6447 	char name[B_OS_NAME_LENGTH];
6448 	void* address;
6449 	area_id area;
6450 
6451 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6452 		return B_BAD_VALUE;
6453 
6454 	fix_protection(&protection);
6455 
6456 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6457 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6458 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6459 		return B_BAD_ADDRESS;
6460 
6461 	if (addressSpec == B_EXACT_ADDRESS) {
6462 		if ((addr_t)address + size < (addr_t)address
6463 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6464 			return B_BAD_VALUE;
6465 		}
6466 		if (!IS_USER_ADDRESS(address)
6467 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6468 			return B_BAD_ADDRESS;
6469 		}
6470 	}
6471 
6472 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6473 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6474 		false);
6475 	if (area < B_OK)
6476 		return area;
6477 
6478 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6479 		return B_BAD_ADDRESS;
6480 
6481 	return area;
6482 }
6483 
6484 
6485 status_t
6486 _user_unmap_memory(void* _address, size_t size)
6487 {
6488 	addr_t address = (addr_t)_address;
6489 
6490 	// check params
6491 	if (size == 0 || (addr_t)address + size < (addr_t)address
6492 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6493 		return B_BAD_VALUE;
6494 	}
6495 
6496 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6497 		return B_BAD_ADDRESS;
6498 
6499 	// Write lock the address space and ensure the address range is not wired.
6500 	AddressSpaceWriteLocker locker;
6501 	do {
6502 		status_t status = locker.SetTo(team_get_current_team_id());
6503 		if (status != B_OK)
6504 			return status;
6505 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6506 			size, &locker));
6507 
6508 	// unmap
6509 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6510 }
6511 
6512 
6513 status_t
6514 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6515 {
6516 	// check address range
6517 	addr_t address = (addr_t)_address;
6518 	size = PAGE_ALIGN(size);
6519 
6520 	if ((address % B_PAGE_SIZE) != 0)
6521 		return B_BAD_VALUE;
6522 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6523 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6524 		// weird error code required by POSIX
6525 		return ENOMEM;
6526 	}
6527 
6528 	// extend and check protection
6529 	if ((protection & ~B_USER_PROTECTION) != 0)
6530 		return B_BAD_VALUE;
6531 
6532 	fix_protection(&protection);
6533 
6534 	// We need to write lock the address space, since we're going to play with
6535 	// the areas. Also make sure that none of the areas is wired and that we're
6536 	// actually allowed to change the protection.
6537 	AddressSpaceWriteLocker locker;
6538 
6539 	bool restart;
6540 	do {
6541 		restart = false;
6542 
6543 		status_t status = locker.SetTo(team_get_current_team_id());
6544 		if (status != B_OK)
6545 			return status;
6546 
6547 		// First round: Check whether the whole range is covered by areas and we
6548 		// are allowed to modify them.
6549 		addr_t currentAddress = address;
6550 		size_t sizeLeft = size;
6551 		while (sizeLeft > 0) {
6552 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6553 			if (area == NULL)
6554 				return B_NO_MEMORY;
6555 
6556 			if ((area->protection & B_KERNEL_AREA) != 0)
6557 				return B_NOT_ALLOWED;
6558 
6559 			// TODO: For (shared) mapped files we should check whether the new
6560 			// protections are compatible with the file permissions. We don't
6561 			// have a way to do that yet, though.
6562 
6563 			addr_t offset = currentAddress - area->Base();
6564 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6565 
6566 			AreaCacheLocker cacheLocker(area);
6567 
6568 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6569 					&locker, &cacheLocker)) {
6570 				restart = true;
6571 				break;
6572 			}
6573 
6574 			cacheLocker.Unlock();
6575 
6576 			currentAddress += rangeSize;
6577 			sizeLeft -= rangeSize;
6578 		}
6579 	} while (restart);
6580 
6581 	// Second round: If the protections differ from that of the area, create a
6582 	// page protection array and re-map mapped pages.
6583 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6584 	addr_t currentAddress = address;
6585 	size_t sizeLeft = size;
6586 	while (sizeLeft > 0) {
6587 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6588 		if (area == NULL)
6589 			return B_NO_MEMORY;
6590 
6591 		addr_t offset = currentAddress - area->Base();
6592 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6593 
6594 		currentAddress += rangeSize;
6595 		sizeLeft -= rangeSize;
6596 
6597 		if (area->page_protections == NULL) {
6598 			if (area->protection == protection)
6599 				continue;
6600 
6601 			status_t status = allocate_area_page_protections(area);
6602 			if (status != B_OK)
6603 				return status;
6604 		}
6605 
6606 		// We need to lock the complete cache chain, since we potentially unmap
6607 		// pages of lower caches.
6608 		VMCache* topCache = vm_area_get_locked_cache(area);
6609 		VMCacheChainLocker cacheChainLocker(topCache);
6610 		cacheChainLocker.LockAllSourceCaches();
6611 
6612 		for (addr_t pageAddress = area->Base() + offset;
6613 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6614 			map->Lock();
6615 
6616 			set_area_page_protection(area, pageAddress, protection);
6617 
6618 			phys_addr_t physicalAddress;
6619 			uint32 flags;
6620 
6621 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6622 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6623 				map->Unlock();
6624 				continue;
6625 			}
6626 
6627 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6628 			if (page == NULL) {
6629 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6630 					"\n", area, physicalAddress);
6631 				map->Unlock();
6632 				return B_ERROR;
6633 			}
6634 
6635 			// If the page is not in the topmost cache and write access is
6636 			// requested, we have to unmap it. Otherwise we can re-map it with
6637 			// the new protection.
6638 			bool unmapPage = page->Cache() != topCache
6639 				&& (protection & B_WRITE_AREA) != 0;
6640 
6641 			if (!unmapPage)
6642 				map->ProtectPage(area, pageAddress, protection);
6643 
6644 			map->Unlock();
6645 
6646 			if (unmapPage) {
6647 				DEBUG_PAGE_ACCESS_START(page);
6648 				unmap_page(area, pageAddress);
6649 				DEBUG_PAGE_ACCESS_END(page);
6650 			}
6651 		}
6652 	}
6653 
6654 	return B_OK;
6655 }
6656 
6657 
6658 status_t
6659 _user_sync_memory(void* _address, size_t size, uint32 flags)
6660 {
6661 	addr_t address = (addr_t)_address;
6662 	size = PAGE_ALIGN(size);
6663 
6664 	// check params
6665 	if ((address % B_PAGE_SIZE) != 0)
6666 		return B_BAD_VALUE;
6667 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6668 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6669 		// weird error code required by POSIX
6670 		return ENOMEM;
6671 	}
6672 
6673 	bool writeSync = (flags & MS_SYNC) != 0;
6674 	bool writeAsync = (flags & MS_ASYNC) != 0;
6675 	if (writeSync && writeAsync)
6676 		return B_BAD_VALUE;
6677 
6678 	if (size == 0 || (!writeSync && !writeAsync))
6679 		return B_OK;
6680 
6681 	// iterate through the range and sync all concerned areas
6682 	while (size > 0) {
6683 		// read lock the address space
6684 		AddressSpaceReadLocker locker;
6685 		status_t error = locker.SetTo(team_get_current_team_id());
6686 		if (error != B_OK)
6687 			return error;
6688 
6689 		// get the first area
6690 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6691 		if (area == NULL)
6692 			return B_NO_MEMORY;
6693 
6694 		uint32 offset = address - area->Base();
6695 		size_t rangeSize = min_c(area->Size() - offset, size);
6696 		offset += area->cache_offset;
6697 
6698 		// lock the cache
6699 		AreaCacheLocker cacheLocker(area);
6700 		if (!cacheLocker)
6701 			return B_BAD_VALUE;
6702 		VMCache* cache = area->cache;
6703 
6704 		locker.Unlock();
6705 
6706 		uint32 firstPage = offset >> PAGE_SHIFT;
6707 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6708 
6709 		// write the pages
6710 		if (cache->type == CACHE_TYPE_VNODE) {
6711 			if (writeSync) {
6712 				// synchronous
6713 				error = vm_page_write_modified_page_range(cache, firstPage,
6714 					endPage);
6715 				if (error != B_OK)
6716 					return error;
6717 			} else {
6718 				// asynchronous
6719 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6720 				// TODO: This is probably not quite what is supposed to happen.
6721 				// Especially when a lot has to be written, it might take ages
6722 				// until it really hits the disk.
6723 			}
6724 		}
6725 
6726 		address += rangeSize;
6727 		size -= rangeSize;
6728 	}
6729 
6730 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6731 	// synchronize multiple mappings of the same file. In our VM they never get
6732 	// out of sync, though, so we don't have to do anything.
6733 
6734 	return B_OK;
6735 }
6736 
6737 
6738 status_t
6739 _user_memory_advice(void* address, size_t size, uint32 advice)
6740 {
6741 	// TODO: Implement!
6742 	return B_OK;
6743 }
6744 
6745 
6746 status_t
6747 _user_get_memory_properties(team_id teamID, const void* address,
6748 	uint32* _protected, uint32* _lock)
6749 {
6750 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6751 		return B_BAD_ADDRESS;
6752 
6753 	AddressSpaceReadLocker locker;
6754 	status_t error = locker.SetTo(teamID);
6755 	if (error != B_OK)
6756 		return error;
6757 
6758 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6759 	if (area == NULL)
6760 		return B_NO_MEMORY;
6761 
6762 
6763 	uint32 protection = area->protection;
6764 	if (area->page_protections != NULL)
6765 		protection = get_area_page_protection(area, (addr_t)address);
6766 
6767 	uint32 wiring = area->wiring;
6768 
6769 	locker.Unlock();
6770 
6771 	error = user_memcpy(_protected, &protection, sizeof(protection));
6772 	if (error != B_OK)
6773 		return error;
6774 
6775 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6776 
6777 	return error;
6778 }
6779 
6780 
6781 // #pragma mark -- compatibility
6782 
6783 
6784 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6785 
6786 
6787 struct physical_entry_beos {
6788 	uint32	address;
6789 	uint32	size;
6790 };
6791 
6792 
6793 /*!	The physical_entry structure has changed. We need to translate it to the
6794 	old one.
6795 */
6796 extern "C" int32
6797 __get_memory_map_beos(const void* _address, size_t numBytes,
6798 	physical_entry_beos* table, int32 numEntries)
6799 {
6800 	if (numEntries <= 0)
6801 		return B_BAD_VALUE;
6802 
6803 	const uint8* address = (const uint8*)_address;
6804 
6805 	int32 count = 0;
6806 	while (numBytes > 0 && count < numEntries) {
6807 		physical_entry entry;
6808 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6809 		if (result < 0) {
6810 			if (result != B_BUFFER_OVERFLOW)
6811 				return result;
6812 		}
6813 
6814 		if (entry.address >= (phys_addr_t)1 << 32) {
6815 			panic("get_memory_map(): Address is greater 4 GB!");
6816 			return B_ERROR;
6817 		}
6818 
6819 		table[count].address = entry.address;
6820 		table[count++].size = entry.size;
6821 
6822 		address += entry.size;
6823 		numBytes -= entry.size;
6824 	}
6825 
6826 	// null-terminate the table, if possible
6827 	if (count < numEntries) {
6828 		table[count].address = 0;
6829 		table[count].size = 0;
6830 	}
6831 
6832 	return B_OK;
6833 }
6834 
6835 
6836 /*!	The type of the \a physicalAddress parameter has changed from void* to
6837 	phys_addr_t.
6838 */
6839 extern "C" area_id
6840 __map_physical_memory_beos(const char* name, void* physicalAddress,
6841 	size_t numBytes, uint32 addressSpec, uint32 protection,
6842 	void** _virtualAddress)
6843 {
6844 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6845 		addressSpec, protection, _virtualAddress);
6846 }
6847 
6848 
6849 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6850 	we meddle with the \a lock parameter to force 32 bit.
6851 */
6852 extern "C" area_id
6853 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6854 	size_t size, uint32 lock, uint32 protection)
6855 {
6856 	switch (lock) {
6857 		case B_NO_LOCK:
6858 			break;
6859 		case B_FULL_LOCK:
6860 		case B_LAZY_LOCK:
6861 			lock = B_32_BIT_FULL_LOCK;
6862 			break;
6863 		case B_CONTIGUOUS:
6864 			lock = B_32_BIT_CONTIGUOUS;
6865 			break;
6866 	}
6867 
6868 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6869 		protection);
6870 }
6871 
6872 
6873 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6874 	"BASE");
6875 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6876 	"map_physical_memory@", "BASE");
6877 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6878 	"BASE");
6879 
6880 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6881 	"get_memory_map@@", "1_ALPHA3");
6882 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6883 	"map_physical_memory@@", "1_ALPHA3");
6884 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6885 	"1_ALPHA3");
6886 
6887 
6888 #else
6889 
6890 
6891 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6892 	"get_memory_map@@", "BASE");
6893 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6894 	"map_physical_memory@@", "BASE");
6895 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6896 	"BASE");
6897 
6898 
6899 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6900