xref: /haiku/src/system/kernel/vm/vm.cpp (revision dd2a1e350b303b855a50fd64e6cb55618be1ae6a)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 ObjectCache* gPageMappingsObjectCache;
248 
249 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
250 
251 static off_t sAvailableMemory;
252 static off_t sNeededMemory;
253 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
254 static uint32 sPageFaults;
255 
256 static VMPhysicalPageMapper* sPhysicalPageMapper;
257 
258 #if DEBUG_CACHE_LIST
259 
260 struct cache_info {
261 	VMCache*	cache;
262 	addr_t		page_count;
263 	addr_t		committed;
264 };
265 
266 static const int kCacheInfoTableCount = 100 * 1024;
267 static cache_info* sCacheInfoTable;
268 
269 #endif	// DEBUG_CACHE_LIST
270 
271 
272 // function declarations
273 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
274 	bool addressSpaceCleanup);
275 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
276 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
277 static status_t map_backing_store(VMAddressSpace* addressSpace,
278 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
279 	int protection, int protectionMax, int mapping, uint32 flags,
280 	const virtual_address_restrictions* addressRestrictions, bool kernel,
281 	VMArea** _area, void** _virtualAddress);
282 static void fix_protection(uint32* protection);
283 
284 
285 //	#pragma mark -
286 
287 
288 #if VM_PAGE_FAULT_TRACING
289 
290 namespace VMPageFaultTracing {
291 
292 class PageFaultStart : public AbstractTraceEntry {
293 public:
294 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
295 		:
296 		fAddress(address),
297 		fPC(pc),
298 		fWrite(write),
299 		fUser(user)
300 	{
301 		Initialized();
302 	}
303 
304 	virtual void AddDump(TraceOutput& out)
305 	{
306 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
307 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
308 	}
309 
310 private:
311 	addr_t	fAddress;
312 	addr_t	fPC;
313 	bool	fWrite;
314 	bool	fUser;
315 };
316 
317 
318 // page fault errors
319 enum {
320 	PAGE_FAULT_ERROR_NO_AREA		= 0,
321 	PAGE_FAULT_ERROR_KERNEL_ONLY,
322 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
323 	PAGE_FAULT_ERROR_READ_PROTECTED,
324 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
325 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
326 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
327 };
328 
329 
330 class PageFaultError : public AbstractTraceEntry {
331 public:
332 	PageFaultError(area_id area, status_t error)
333 		:
334 		fArea(area),
335 		fError(error)
336 	{
337 		Initialized();
338 	}
339 
340 	virtual void AddDump(TraceOutput& out)
341 	{
342 		switch (fError) {
343 			case PAGE_FAULT_ERROR_NO_AREA:
344 				out.Print("page fault error: no area");
345 				break;
346 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
347 				out.Print("page fault error: area: %ld, kernel only", fArea);
348 				break;
349 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
350 				out.Print("page fault error: area: %ld, write protected",
351 					fArea);
352 				break;
353 			case PAGE_FAULT_ERROR_READ_PROTECTED:
354 				out.Print("page fault error: area: %ld, read protected", fArea);
355 				break;
356 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
357 				out.Print("page fault error: area: %ld, execute protected",
358 					fArea);
359 				break;
360 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
361 				out.Print("page fault error: kernel touching bad user memory");
362 				break;
363 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
364 				out.Print("page fault error: no address space");
365 				break;
366 			default:
367 				out.Print("page fault error: area: %ld, error: %s", fArea,
368 					strerror(fError));
369 				break;
370 		}
371 	}
372 
373 private:
374 	area_id		fArea;
375 	status_t	fError;
376 };
377 
378 
379 class PageFaultDone : public AbstractTraceEntry {
380 public:
381 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
382 			vm_page* page)
383 		:
384 		fArea(area),
385 		fTopCache(topCache),
386 		fCache(cache),
387 		fPage(page)
388 	{
389 		Initialized();
390 	}
391 
392 	virtual void AddDump(TraceOutput& out)
393 	{
394 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
395 			"page: %p", fArea, fTopCache, fCache, fPage);
396 	}
397 
398 private:
399 	area_id		fArea;
400 	VMCache*	fTopCache;
401 	VMCache*	fCache;
402 	vm_page*	fPage;
403 };
404 
405 }	// namespace VMPageFaultTracing
406 
407 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
408 #else
409 #	define TPF(x) ;
410 #endif	// VM_PAGE_FAULT_TRACING
411 
412 
413 //	#pragma mark -
414 
415 
416 /*!	The page's cache must be locked.
417 */
418 static inline void
419 increment_page_wired_count(vm_page* page)
420 {
421 	if (!page->IsMapped())
422 		atomic_add(&gMappedPagesCount, 1);
423 	page->IncrementWiredCount();
424 }
425 
426 
427 /*!	The page's cache must be locked.
428 */
429 static inline void
430 decrement_page_wired_count(vm_page* page)
431 {
432 	page->DecrementWiredCount();
433 	if (!page->IsMapped())
434 		atomic_add(&gMappedPagesCount, -1);
435 }
436 
437 
438 static inline addr_t
439 virtual_page_address(VMArea* area, vm_page* page)
440 {
441 	return area->Base()
442 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
443 }
444 
445 
446 static inline bool
447 is_page_in_area(VMArea* area, vm_page* page)
448 {
449 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
450 	return pageCacheOffsetBytes >= area->cache_offset
451 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
452 }
453 
454 
455 //! You need to have the address space locked when calling this function
456 static VMArea*
457 lookup_area(VMAddressSpace* addressSpace, area_id id)
458 {
459 	VMAreas::ReadLock();
460 
461 	VMArea* area = VMAreas::LookupLocked(id);
462 	if (area != NULL && area->address_space != addressSpace)
463 		area = NULL;
464 
465 	VMAreas::ReadUnlock();
466 
467 	return area;
468 }
469 
470 
471 static inline size_t
472 area_page_protections_size(size_t areaSize)
473 {
474 	// In the page protections we store only the three user protections,
475 	// so we use 4 bits per page.
476 	return (areaSize / B_PAGE_SIZE + 1) / 2;
477 }
478 
479 
480 static status_t
481 allocate_area_page_protections(VMArea* area)
482 {
483 	size_t bytes = area_page_protections_size(area->Size());
484 	area->page_protections = (uint8*)malloc_etc(bytes,
485 		area->address_space == VMAddressSpace::Kernel()
486 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
487 	if (area->page_protections == NULL)
488 		return B_NO_MEMORY;
489 
490 	// init the page protections for all pages to that of the area
491 	uint32 areaProtection = area->protection
492 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
493 	memset(area->page_protections, areaProtection | (areaProtection << 4),
494 		bytes);
495 	return B_OK;
496 }
497 
498 
499 static inline void
500 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
501 {
502 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
503 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
504 	uint8& entry = area->page_protections[pageIndex / 2];
505 	if (pageIndex % 2 == 0)
506 		entry = (entry & 0xf0) | protection;
507 	else
508 		entry = (entry & 0x0f) | (protection << 4);
509 }
510 
511 
512 static inline uint32
513 get_area_page_protection(VMArea* area, addr_t pageAddress)
514 {
515 	if (area->page_protections == NULL)
516 		return area->protection;
517 
518 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
519 	uint32 protection = area->page_protections[pageIndex / 2];
520 	if (pageIndex % 2 == 0)
521 		protection &= 0x0f;
522 	else
523 		protection >>= 4;
524 
525 	uint32 kernelProtection = 0;
526 	if ((protection & B_READ_AREA) != 0)
527 		kernelProtection |= B_KERNEL_READ_AREA;
528 	if ((protection & B_WRITE_AREA) != 0)
529 		kernelProtection |= B_KERNEL_WRITE_AREA;
530 
531 	// If this is a kernel area we return only the kernel flags.
532 	if (area->address_space == VMAddressSpace::Kernel())
533 		return kernelProtection;
534 
535 	return protection | kernelProtection;
536 }
537 
538 
539 static inline uint8*
540 realloc_page_protections(uint8* pageProtections, size_t areaSize,
541 	uint32 allocationFlags)
542 {
543 	size_t bytes = area_page_protections_size(areaSize);
544 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
545 }
546 
547 
548 /*!	The caller must have reserved enough pages the translation map
549 	implementation might need to map this page.
550 	The page's cache must be locked.
551 */
552 static status_t
553 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
554 	vm_page_reservation* reservation)
555 {
556 	VMTranslationMap* map = area->address_space->TranslationMap();
557 
558 	bool wasMapped = page->IsMapped();
559 
560 	if (area->wiring == B_NO_LOCK) {
561 		DEBUG_PAGE_ACCESS_CHECK(page);
562 
563 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
564 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
565 			gPageMappingsObjectCache,
566 			CACHE_DONT_WAIT_FOR_MEMORY
567 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
568 		if (mapping == NULL)
569 			return B_NO_MEMORY;
570 
571 		mapping->page = page;
572 		mapping->area = area;
573 
574 		map->Lock();
575 
576 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
577 			area->MemoryType(), reservation);
578 
579 		// insert mapping into lists
580 		if (!page->IsMapped())
581 			atomic_add(&gMappedPagesCount, 1);
582 
583 		page->mappings.Add(mapping);
584 		area->mappings.Add(mapping);
585 
586 		map->Unlock();
587 	} else {
588 		DEBUG_PAGE_ACCESS_CHECK(page);
589 
590 		map->Lock();
591 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
592 			area->MemoryType(), reservation);
593 		map->Unlock();
594 
595 		increment_page_wired_count(page);
596 	}
597 
598 	if (!wasMapped) {
599 		// The page is mapped now, so we must not remain in the cached queue.
600 		// It also makes sense to move it from the inactive to the active, since
601 		// otherwise the page daemon wouldn't come to keep track of it (in idle
602 		// mode) -- if the page isn't touched, it will be deactivated after a
603 		// full iteration through the queue at the latest.
604 		if (page->State() == PAGE_STATE_CACHED
605 				|| page->State() == PAGE_STATE_INACTIVE) {
606 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
607 		}
608 	}
609 
610 	return B_OK;
611 }
612 
613 
614 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
615 	page's cache.
616 */
617 static inline bool
618 unmap_page(VMArea* area, addr_t virtualAddress)
619 {
620 	return area->address_space->TranslationMap()->UnmapPage(area,
621 		virtualAddress, true);
622 }
623 
624 
625 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
626 	mapped pages' caches.
627 */
628 static inline void
629 unmap_pages(VMArea* area, addr_t base, size_t size)
630 {
631 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
632 }
633 
634 
635 static inline bool
636 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
637 {
638 	if (address < area->Base()) {
639 		offset = area->Base() - address;
640 		if (offset >= size)
641 			return false;
642 
643 		address = area->Base();
644 		size -= offset;
645 		offset = 0;
646 		if (size > area->Size())
647 			size = area->Size();
648 
649 		return true;
650 	}
651 
652 	offset = address - area->Base();
653 	if (offset >= area->Size())
654 		return false;
655 
656 	if (size >= area->Size() - offset)
657 		size = area->Size() - offset;
658 
659 	return true;
660 }
661 
662 
663 /*!	Cuts a piece out of an area. If the given cut range covers the complete
664 	area, it is deleted. If it covers the beginning or the end, the area is
665 	resized accordingly. If the range covers some part in the middle of the
666 	area, it is split in two; in this case the second area is returned via
667 	\a _secondArea (the variable is left untouched in the other cases).
668 	The address space must be write locked.
669 	The caller must ensure that no part of the given range is wired.
670 */
671 static status_t
672 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
673 	addr_t size, VMArea** _secondArea, bool kernel)
674 {
675 	addr_t offset;
676 	if (!intersect_area(area, address, size, offset))
677 		return B_OK;
678 
679 	// Is the area fully covered?
680 	if (address == area->Base() && size == area->Size()) {
681 		delete_area(addressSpace, area, false);
682 		return B_OK;
683 	}
684 
685 	int priority;
686 	uint32 allocationFlags;
687 	if (addressSpace == VMAddressSpace::Kernel()) {
688 		priority = VM_PRIORITY_SYSTEM;
689 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
690 			| HEAP_DONT_LOCK_KERNEL_SPACE;
691 	} else {
692 		priority = VM_PRIORITY_USER;
693 		allocationFlags = 0;
694 	}
695 
696 	VMCache* cache = vm_area_get_locked_cache(area);
697 	VMCacheChainLocker cacheChainLocker(cache);
698 	cacheChainLocker.LockAllSourceCaches();
699 
700 	// If no one else uses the area's cache and it's an anonymous cache, we can
701 	// resize or split it, too.
702 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
703 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
704 
705 	const addr_t oldSize = area->Size();
706 
707 	// Cut the end only?
708 	if (offset > 0 && size == area->Size() - offset) {
709 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
710 			allocationFlags);
711 		if (error != B_OK)
712 			return error;
713 
714 		if (area->page_protections != NULL) {
715 			uint8* newProtections = realloc_page_protections(
716 				area->page_protections, area->Size(), allocationFlags);
717 
718 			if (newProtections == NULL) {
719 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
720 				return B_NO_MEMORY;
721 			}
722 
723 			area->page_protections = newProtections;
724 		}
725 
726 		// unmap pages
727 		unmap_pages(area, address, size);
728 
729 		if (onlyCacheUser) {
730 			// Since VMCache::Resize() can temporarily drop the lock, we must
731 			// unlock all lower caches to prevent locking order inversion.
732 			cacheChainLocker.Unlock(cache);
733 			cache->Resize(cache->virtual_base + offset, priority);
734 			cache->ReleaseRefAndUnlock();
735 		}
736 
737 		return B_OK;
738 	}
739 
740 	// Cut the beginning only?
741 	if (area->Base() == address) {
742 		uint8* newProtections = NULL;
743 		if (area->page_protections != NULL) {
744 			// Allocate all memory before shifting as the shift might lose some
745 			// bits.
746 			newProtections = realloc_page_protections(NULL, area->Size(),
747 				allocationFlags);
748 
749 			if (newProtections == NULL)
750 				return B_NO_MEMORY;
751 		}
752 
753 		// resize the area
754 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
755 			allocationFlags);
756 		if (error != B_OK) {
757 			if (newProtections != NULL)
758 				free_etc(newProtections, allocationFlags);
759 			return error;
760 		}
761 
762 		if (area->page_protections != NULL) {
763 			size_t oldBytes = area_page_protections_size(oldSize);
764 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
765 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
766 
767 			size_t bytes = area_page_protections_size(area->Size());
768 			memcpy(newProtections, area->page_protections, bytes);
769 			free_etc(area->page_protections, allocationFlags);
770 			area->page_protections = newProtections;
771 		}
772 
773 		// unmap pages
774 		unmap_pages(area, address, size);
775 
776 		if (onlyCacheUser) {
777 			// Since VMCache::Rebase() can temporarily drop the lock, we must
778 			// unlock all lower caches to prevent locking order inversion.
779 			cacheChainLocker.Unlock(cache);
780 			cache->Rebase(cache->virtual_base + size, priority);
781 			cache->ReleaseRefAndUnlock();
782 		}
783 		area->cache_offset += size;
784 
785 		return B_OK;
786 	}
787 
788 	// The tough part -- cut a piece out of the middle of the area.
789 	// We do that by shrinking the area to the begin section and creating a
790 	// new area for the end section.
791 	addr_t firstNewSize = offset;
792 	addr_t secondBase = address + size;
793 	addr_t secondSize = area->Size() - offset - size;
794 
795 	// unmap pages
796 	unmap_pages(area, address, area->Size() - firstNewSize);
797 
798 	// resize the area
799 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
800 		allocationFlags);
801 	if (error != B_OK)
802 		return error;
803 
804 	uint8* areaNewProtections = NULL;
805 	uint8* secondAreaNewProtections = NULL;
806 
807 	// Try to allocate the new memory before making some hard to reverse
808 	// changes.
809 	if (area->page_protections != NULL) {
810 		areaNewProtections = realloc_page_protections(NULL, area->Size(),
811 			allocationFlags);
812 		secondAreaNewProtections = realloc_page_protections(NULL, secondSize,
813 			allocationFlags);
814 
815 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
816 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
817 			free_etc(areaNewProtections, allocationFlags);
818 			free_etc(secondAreaNewProtections, allocationFlags);
819 			return B_NO_MEMORY;
820 		}
821 	}
822 
823 	virtual_address_restrictions addressRestrictions = {};
824 	addressRestrictions.address = (void*)secondBase;
825 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
826 	VMArea* secondArea;
827 
828 	if (onlyCacheUser) {
829 		// Create a new cache for the second area.
830 		VMCache* secondCache;
831 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
832 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
833 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
834 		if (error != B_OK) {
835 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
836 			free_etc(areaNewProtections, allocationFlags);
837 			free_etc(secondAreaNewProtections, allocationFlags);
838 			return error;
839 		}
840 
841 		secondCache->Lock();
842 		secondCache->temporary = cache->temporary;
843 		secondCache->virtual_base = area->cache_offset;
844 		secondCache->virtual_end = area->cache_offset + secondSize;
845 
846 		// Transfer the concerned pages from the first cache.
847 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
848 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
849 			area->cache_offset);
850 
851 		if (error == B_OK) {
852 			// Since VMCache::Resize() can temporarily drop the lock, we must
853 			// unlock all lower caches to prevent locking order inversion.
854 			cacheChainLocker.Unlock(cache);
855 			cache->Resize(cache->virtual_base + firstNewSize, priority);
856 			// Don't unlock the cache yet because we might have to resize it
857 			// back.
858 
859 			// Map the second area.
860 			error = map_backing_store(addressSpace, secondCache,
861 				area->cache_offset, area->name, secondSize, area->wiring,
862 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
863 				&addressRestrictions, kernel, &secondArea, NULL);
864 		}
865 
866 		if (error != B_OK) {
867 			// Restore the original cache.
868 			cache->Resize(cache->virtual_base + oldSize, priority);
869 
870 			// Move the pages back.
871 			status_t readoptStatus = cache->Adopt(secondCache,
872 				area->cache_offset, secondSize, adoptOffset);
873 			if (readoptStatus != B_OK) {
874 				// Some (swap) pages have not been moved back and will be lost
875 				// once the second cache is deleted.
876 				panic("failed to restore cache range: %s",
877 					strerror(readoptStatus));
878 
879 				// TODO: Handle out of memory cases by freeing memory and
880 				// retrying.
881 			}
882 
883 			cache->ReleaseRefAndUnlock();
884 			secondCache->ReleaseRefAndUnlock();
885 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
886 			free_etc(areaNewProtections, allocationFlags);
887 			free_etc(secondAreaNewProtections, allocationFlags);
888 			return error;
889 		}
890 
891 		// Now we can unlock it.
892 		cache->ReleaseRefAndUnlock();
893 		secondCache->Unlock();
894 	} else {
895 		error = map_backing_store(addressSpace, cache, area->cache_offset
896 			+ (secondBase - area->Base()),
897 			area->name, secondSize, area->wiring, area->protection,
898 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
899 			&addressRestrictions, kernel, &secondArea, NULL);
900 		if (error != B_OK) {
901 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
902 			free_etc(areaNewProtections, allocationFlags);
903 			free_etc(secondAreaNewProtections, allocationFlags);
904 			return error;
905 		}
906 		// We need a cache reference for the new area.
907 		cache->AcquireRefLocked();
908 	}
909 
910 	if (area->page_protections != NULL) {
911 		// Copy the protection bits of the first area.
912 		size_t areaBytes = area_page_protections_size(area->Size());
913 		memcpy(areaNewProtections, area->page_protections, areaBytes);
914 		uint8* areaOldProtections = area->page_protections;
915 		area->page_protections = areaNewProtections;
916 
917 		// Shift the protection bits of the second area to the start of
918 		// the old array.
919 		size_t oldBytes = area_page_protections_size(oldSize);
920 		addr_t secondAreaOffset = secondBase - area->Base();
921 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
922 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
923 
924 		// Copy the protection bits of the second area.
925 		size_t secondAreaBytes = area_page_protections_size(secondSize);
926 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
927 		secondArea->page_protections = secondAreaNewProtections;
928 
929 		// We don't need this anymore.
930 		free_etc(areaOldProtections, allocationFlags);
931 
932 		// Set the correct page protections for the second area.
933 		VMTranslationMap* map = addressSpace->TranslationMap();
934 		map->Lock();
935 		for (VMCachePagesTree::Iterator it
936 				= secondArea->cache->pages.GetIterator();
937 				vm_page* page = it.Next();) {
938 			if (is_page_in_area(secondArea, page)) {
939 				addr_t address = virtual_page_address(secondArea, page);
940 				uint32 pageProtection
941 					= get_area_page_protection(secondArea, address);
942 				map->ProtectPage(secondArea, address, pageProtection);
943 			}
944 		}
945 		map->Unlock();
946 	}
947 
948 	if (_secondArea != NULL)
949 		*_secondArea = secondArea;
950 
951 	return B_OK;
952 }
953 
954 
955 /*!	Deletes or cuts all areas in the given address range.
956 	The address space must be write-locked.
957 	The caller must ensure that no part of the given range is wired.
958 */
959 static status_t
960 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
961 	bool kernel)
962 {
963 	size = PAGE_ALIGN(size);
964 
965 	// Check, whether the caller is allowed to modify the concerned areas.
966 	if (!kernel) {
967 		for (VMAddressSpace::AreaRangeIterator it
968 				= addressSpace->GetAreaRangeIterator(address, size);
969 			VMArea* area = it.Next();) {
970 
971 			if ((area->protection & B_KERNEL_AREA) != 0) {
972 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
973 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
974 					team_get_current_team_id(), area->id, area->name);
975 				return B_NOT_ALLOWED;
976 			}
977 		}
978 	}
979 
980 	for (VMAddressSpace::AreaRangeIterator it
981 			= addressSpace->GetAreaRangeIterator(address, size);
982 		VMArea* area = it.Next();) {
983 
984 		status_t error = cut_area(addressSpace, area, address, size, NULL,
985 			kernel);
986 		if (error != B_OK)
987 			return error;
988 			// Failing after already messing with areas is ugly, but we
989 			// can't do anything about it.
990 	}
991 
992 	return B_OK;
993 }
994 
995 
996 static status_t
997 discard_area_range(VMArea* area, addr_t address, addr_t size)
998 {
999 	addr_t offset;
1000 	if (!intersect_area(area, address, size, offset))
1001 		return B_OK;
1002 
1003 	// If someone else uses the area's cache or it's not an anonymous cache, we
1004 	// can't discard.
1005 	VMCache* cache = vm_area_get_locked_cache(area);
1006 	if (cache->areas != area || area->cache_next != NULL
1007 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1008 		return B_OK;
1009 	}
1010 
1011 	VMCacheChainLocker cacheChainLocker(cache);
1012 	cacheChainLocker.LockAllSourceCaches();
1013 
1014 	unmap_pages(area, address, size);
1015 
1016 	// Since VMCache::Discard() can temporarily drop the lock, we must
1017 	// unlock all lower caches to prevent locking order inversion.
1018 	cacheChainLocker.Unlock(cache);
1019 	cache->Discard(cache->virtual_base + offset, size);
1020 	cache->ReleaseRefAndUnlock();
1021 
1022 	return B_OK;
1023 }
1024 
1025 
1026 static status_t
1027 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1028 	bool kernel)
1029 {
1030 	for (VMAddressSpace::AreaRangeIterator it
1031 		= addressSpace->GetAreaRangeIterator(address, size);
1032 			VMArea* area = it.Next();) {
1033 		status_t error = discard_area_range(area, address, size);
1034 		if (error != B_OK)
1035 			return error;
1036 	}
1037 
1038 	return B_OK;
1039 }
1040 
1041 
1042 /*! You need to hold the lock of the cache and the write lock of the address
1043 	space when calling this function.
1044 	Note, that in case of error your cache will be temporarily unlocked.
1045 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1046 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1047 	that no part of the specified address range (base \c *_virtualAddress, size
1048 	\a size) is wired. The cache will also be temporarily unlocked.
1049 */
1050 static status_t
1051 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1052 	const char* areaName, addr_t size, int wiring, int protection,
1053 	int protectionMax, int mapping,
1054 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1055 	bool kernel, VMArea** _area, void** _virtualAddress)
1056 {
1057 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1058 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1059 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1060 		addressSpace, cache, addressRestrictions->address, offset, size,
1061 		addressRestrictions->address_specification, wiring, protection,
1062 		protectionMax, _area, areaName));
1063 	cache->AssertLocked();
1064 
1065 	if (size == 0) {
1066 #if KDEBUG
1067 		panic("map_backing_store(): called with size=0 for area '%s'!",
1068 			areaName);
1069 #endif
1070 		return B_BAD_VALUE;
1071 	}
1072 	if (offset < 0)
1073 		return B_BAD_VALUE;
1074 
1075 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1076 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1077 	int priority;
1078 	if (addressSpace != VMAddressSpace::Kernel()) {
1079 		priority = VM_PRIORITY_USER;
1080 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1081 		priority = VM_PRIORITY_VIP;
1082 		allocationFlags |= HEAP_PRIORITY_VIP;
1083 	} else
1084 		priority = VM_PRIORITY_SYSTEM;
1085 
1086 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1087 		allocationFlags);
1088 	if (mapping != REGION_PRIVATE_MAP)
1089 		area->protection_max = protectionMax & B_USER_PROTECTION;
1090 	if (area == NULL)
1091 		return B_NO_MEMORY;
1092 
1093 	status_t status;
1094 
1095 	// if this is a private map, we need to create a new cache
1096 	// to handle the private copies of pages as they are written to
1097 	VMCache* sourceCache = cache;
1098 	if (mapping == REGION_PRIVATE_MAP) {
1099 		VMCache* newCache;
1100 
1101 		// create an anonymous cache
1102 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1103 			(protection & B_STACK_AREA) != 0
1104 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1105 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1106 		if (status != B_OK)
1107 			goto err1;
1108 
1109 		newCache->Lock();
1110 		newCache->temporary = 1;
1111 		newCache->virtual_base = offset;
1112 		newCache->virtual_end = offset + size;
1113 
1114 		cache->AddConsumer(newCache);
1115 
1116 		cache = newCache;
1117 	}
1118 
1119 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1120 		status = cache->SetMinimalCommitment(size, priority);
1121 		if (status != B_OK)
1122 			goto err2;
1123 	}
1124 
1125 	// check to see if this address space has entered DELETE state
1126 	if (addressSpace->IsBeingDeleted()) {
1127 		// okay, someone is trying to delete this address space now, so we can't
1128 		// insert the area, so back out
1129 		status = B_BAD_TEAM_ID;
1130 		goto err2;
1131 	}
1132 
1133 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1134 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1135 		// temporarily unlock the current cache since it might be mapped to
1136 		// some existing area, and unmap_address_range also needs to lock that
1137 		// cache to delete the area.
1138 		cache->Unlock();
1139 		status = unmap_address_range(addressSpace,
1140 			(addr_t)addressRestrictions->address, size, kernel);
1141 		cache->Lock();
1142 		if (status != B_OK)
1143 			goto err2;
1144 	}
1145 
1146 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1147 		allocationFlags, _virtualAddress);
1148 	if (status == B_NO_MEMORY
1149 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1150 		// Due to how many locks are held, we cannot wait here for space to be
1151 		// freed up, but we can at least notify the low_resource handler.
1152 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1153 	}
1154 	if (status != B_OK)
1155 		goto err2;
1156 
1157 	// attach the cache to the area
1158 	area->cache = cache;
1159 	area->cache_offset = offset;
1160 
1161 	// point the cache back to the area
1162 	cache->InsertAreaLocked(area);
1163 	if (mapping == REGION_PRIVATE_MAP)
1164 		cache->Unlock();
1165 
1166 	// insert the area in the global areas map
1167 	VMAreas::Insert(area);
1168 
1169 	// grab a ref to the address space (the area holds this)
1170 	addressSpace->Get();
1171 
1172 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1173 //		cache, sourceCache, areaName, area);
1174 
1175 	*_area = area;
1176 	return B_OK;
1177 
1178 err2:
1179 	if (mapping == REGION_PRIVATE_MAP) {
1180 		// We created this cache, so we must delete it again. Note, that we
1181 		// need to temporarily unlock the source cache or we'll otherwise
1182 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1183 		sourceCache->Unlock();
1184 		cache->ReleaseRefAndUnlock();
1185 		sourceCache->Lock();
1186 	}
1187 err1:
1188 	addressSpace->DeleteArea(area, allocationFlags);
1189 	return status;
1190 }
1191 
1192 
1193 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1194 	  locker1, locker2).
1195 */
1196 template<typename LockerType1, typename LockerType2>
1197 static inline bool
1198 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1199 {
1200 	area->cache->AssertLocked();
1201 
1202 	VMAreaUnwiredWaiter waiter;
1203 	if (!area->AddWaiterIfWired(&waiter))
1204 		return false;
1205 
1206 	// unlock everything and wait
1207 	if (locker1 != NULL)
1208 		locker1->Unlock();
1209 	if (locker2 != NULL)
1210 		locker2->Unlock();
1211 
1212 	waiter.waitEntry.Wait();
1213 
1214 	return true;
1215 }
1216 
1217 
1218 /*!	Checks whether the given area has any wired ranges intersecting with the
1219 	specified range and waits, if so.
1220 
1221 	When it has to wait, the function calls \c Unlock() on both \a locker1
1222 	and \a locker2, if given.
1223 	The area's top cache must be locked and must be unlocked as a side effect
1224 	of calling \c Unlock() on either \a locker1 or \a locker2.
1225 
1226 	If the function does not have to wait it does not modify or unlock any
1227 	object.
1228 
1229 	\param area The area to be checked.
1230 	\param base The base address of the range to check.
1231 	\param size The size of the address range to check.
1232 	\param locker1 An object to be unlocked when before starting to wait (may
1233 		be \c NULL).
1234 	\param locker2 An object to be unlocked when before starting to wait (may
1235 		be \c NULL).
1236 	\return \c true, if the function had to wait, \c false otherwise.
1237 */
1238 template<typename LockerType1, typename LockerType2>
1239 static inline bool
1240 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1241 	LockerType1* locker1, LockerType2* locker2)
1242 {
1243 	area->cache->AssertLocked();
1244 
1245 	VMAreaUnwiredWaiter waiter;
1246 	if (!area->AddWaiterIfWired(&waiter, base, size))
1247 		return false;
1248 
1249 	// unlock everything and wait
1250 	if (locker1 != NULL)
1251 		locker1->Unlock();
1252 	if (locker2 != NULL)
1253 		locker2->Unlock();
1254 
1255 	waiter.waitEntry.Wait();
1256 
1257 	return true;
1258 }
1259 
1260 
1261 /*!	Checks whether the given address space has any wired ranges intersecting
1262 	with the specified range and waits, if so.
1263 
1264 	Similar to wait_if_area_range_is_wired(), with the following differences:
1265 	- All areas intersecting with the range are checked (respectively all until
1266 	  one is found that contains a wired range intersecting with the given
1267 	  range).
1268 	- The given address space must at least be read-locked and must be unlocked
1269 	  when \c Unlock() is called on \a locker.
1270 	- None of the areas' caches are allowed to be locked.
1271 */
1272 template<typename LockerType>
1273 static inline bool
1274 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1275 	size_t size, LockerType* locker)
1276 {
1277 	for (VMAddressSpace::AreaRangeIterator it
1278 		= addressSpace->GetAreaRangeIterator(base, size);
1279 			VMArea* area = it.Next();) {
1280 
1281 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1282 
1283 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1284 			return true;
1285 	}
1286 
1287 	return false;
1288 }
1289 
1290 
1291 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1292 	It must be called in a situation where the kernel address space may be
1293 	locked.
1294 */
1295 status_t
1296 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1297 {
1298 	AddressSpaceReadLocker locker;
1299 	VMArea* area;
1300 	status_t status = locker.SetFromArea(id, area);
1301 	if (status != B_OK)
1302 		return status;
1303 
1304 	if (area->page_protections == NULL) {
1305 		status = allocate_area_page_protections(area);
1306 		if (status != B_OK)
1307 			return status;
1308 	}
1309 
1310 	*cookie = (void*)area;
1311 	return B_OK;
1312 }
1313 
1314 
1315 /*!	This is a debug helper function that can only be used with very specific
1316 	use cases.
1317 	Sets protection for the given address range to the protection specified.
1318 	If \a protection is 0 then the involved pages will be marked non-present
1319 	in the translation map to cause a fault on access. The pages aren't
1320 	actually unmapped however so that they can be marked present again with
1321 	additional calls to this function. For this to work the area must be
1322 	fully locked in memory so that the pages aren't otherwise touched.
1323 	This function does not lock the kernel address space and needs to be
1324 	supplied with a \a cookie retrieved from a successful call to
1325 	vm_prepare_kernel_area_debug_protection().
1326 */
1327 status_t
1328 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1329 	uint32 protection)
1330 {
1331 	// check address range
1332 	addr_t address = (addr_t)_address;
1333 	size = PAGE_ALIGN(size);
1334 
1335 	if ((address % B_PAGE_SIZE) != 0
1336 		|| (addr_t)address + size < (addr_t)address
1337 		|| !IS_KERNEL_ADDRESS(address)
1338 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1339 		return B_BAD_VALUE;
1340 	}
1341 
1342 	// Translate the kernel protection to user protection as we only store that.
1343 	if ((protection & B_KERNEL_READ_AREA) != 0)
1344 		protection |= B_READ_AREA;
1345 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1346 		protection |= B_WRITE_AREA;
1347 
1348 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1349 	VMTranslationMap* map = addressSpace->TranslationMap();
1350 	VMArea* area = (VMArea*)cookie;
1351 
1352 	addr_t offset = address - area->Base();
1353 	if (area->Size() - offset < size) {
1354 		panic("protect range not fully within supplied area");
1355 		return B_BAD_VALUE;
1356 	}
1357 
1358 	if (area->page_protections == NULL) {
1359 		panic("area has no page protections");
1360 		return B_BAD_VALUE;
1361 	}
1362 
1363 	// Invalidate the mapping entries so any access to them will fault or
1364 	// restore the mapping entries unchanged so that lookup will success again.
1365 	map->Lock();
1366 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1367 	map->Unlock();
1368 
1369 	// And set the proper page protections so that the fault case will actually
1370 	// fail and not simply try to map a new page.
1371 	for (addr_t pageAddress = address; pageAddress < address + size;
1372 			pageAddress += B_PAGE_SIZE) {
1373 		set_area_page_protection(area, pageAddress, protection);
1374 	}
1375 
1376 	return B_OK;
1377 }
1378 
1379 
1380 status_t
1381 vm_block_address_range(const char* name, void* address, addr_t size)
1382 {
1383 	if (!arch_vm_supports_protection(0))
1384 		return B_NOT_SUPPORTED;
1385 
1386 	AddressSpaceWriteLocker locker;
1387 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1388 	if (status != B_OK)
1389 		return status;
1390 
1391 	VMAddressSpace* addressSpace = locker.AddressSpace();
1392 
1393 	// create an anonymous cache
1394 	VMCache* cache;
1395 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1396 		VM_PRIORITY_SYSTEM);
1397 	if (status != B_OK)
1398 		return status;
1399 
1400 	cache->temporary = 1;
1401 	cache->virtual_end = size;
1402 	cache->Lock();
1403 
1404 	VMArea* area;
1405 	virtual_address_restrictions addressRestrictions = {};
1406 	addressRestrictions.address = address;
1407 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1408 	status = map_backing_store(addressSpace, cache, 0, name, size,
1409 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1410 		true, &area, NULL);
1411 	if (status != B_OK) {
1412 		cache->ReleaseRefAndUnlock();
1413 		return status;
1414 	}
1415 
1416 	cache->Unlock();
1417 	area->cache_type = CACHE_TYPE_RAM;
1418 	return area->id;
1419 }
1420 
1421 
1422 status_t
1423 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1424 {
1425 	AddressSpaceWriteLocker locker(team);
1426 	if (!locker.IsLocked())
1427 		return B_BAD_TEAM_ID;
1428 
1429 	VMAddressSpace* addressSpace = locker.AddressSpace();
1430 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1431 		addressSpace == VMAddressSpace::Kernel()
1432 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1433 }
1434 
1435 
1436 status_t
1437 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1438 	addr_t size, uint32 flags)
1439 {
1440 	if (size == 0)
1441 		return B_BAD_VALUE;
1442 
1443 	AddressSpaceWriteLocker locker(team);
1444 	if (!locker.IsLocked())
1445 		return B_BAD_TEAM_ID;
1446 
1447 	virtual_address_restrictions addressRestrictions = {};
1448 	addressRestrictions.address = *_address;
1449 	addressRestrictions.address_specification = addressSpec;
1450 	VMAddressSpace* addressSpace = locker.AddressSpace();
1451 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1452 		addressSpace == VMAddressSpace::Kernel()
1453 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1454 		_address);
1455 }
1456 
1457 
1458 area_id
1459 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1460 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1461 	const virtual_address_restrictions* virtualAddressRestrictions,
1462 	const physical_address_restrictions* physicalAddressRestrictions,
1463 	bool kernel, void** _address)
1464 {
1465 	VMArea* area;
1466 	VMCache* cache;
1467 	vm_page* page = NULL;
1468 	bool isStack = (protection & B_STACK_AREA) != 0;
1469 	page_num_t guardPages;
1470 	bool canOvercommit = false;
1471 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1472 		? VM_PAGE_ALLOC_CLEAR : 0;
1473 
1474 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1475 		team, name, size));
1476 
1477 	size = PAGE_ALIGN(size);
1478 	guardSize = PAGE_ALIGN(guardSize);
1479 	guardPages = guardSize / B_PAGE_SIZE;
1480 
1481 	if (size == 0 || size < guardSize)
1482 		return B_BAD_VALUE;
1483 	if (!arch_vm_supports_protection(protection))
1484 		return B_NOT_SUPPORTED;
1485 
1486 	if (team == B_CURRENT_TEAM)
1487 		team = VMAddressSpace::CurrentID();
1488 	if (team < 0)
1489 		return B_BAD_TEAM_ID;
1490 
1491 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1492 		canOvercommit = true;
1493 
1494 #ifdef DEBUG_KERNEL_STACKS
1495 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1496 		isStack = true;
1497 #endif
1498 
1499 	// check parameters
1500 	switch (virtualAddressRestrictions->address_specification) {
1501 		case B_ANY_ADDRESS:
1502 		case B_EXACT_ADDRESS:
1503 		case B_BASE_ADDRESS:
1504 		case B_ANY_KERNEL_ADDRESS:
1505 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1506 		case B_RANDOMIZED_ANY_ADDRESS:
1507 		case B_RANDOMIZED_BASE_ADDRESS:
1508 			break;
1509 
1510 		default:
1511 			return B_BAD_VALUE;
1512 	}
1513 
1514 	// If low or high physical address restrictions are given, we force
1515 	// B_CONTIGUOUS wiring, since only then we'll use
1516 	// vm_page_allocate_page_run() which deals with those restrictions.
1517 	if (physicalAddressRestrictions->low_address != 0
1518 		|| physicalAddressRestrictions->high_address != 0) {
1519 		wiring = B_CONTIGUOUS;
1520 	}
1521 
1522 	physical_address_restrictions stackPhysicalRestrictions;
1523 	bool doReserveMemory = false;
1524 	switch (wiring) {
1525 		case B_NO_LOCK:
1526 			break;
1527 		case B_FULL_LOCK:
1528 		case B_LAZY_LOCK:
1529 		case B_CONTIGUOUS:
1530 			doReserveMemory = true;
1531 			break;
1532 		case B_ALREADY_WIRED:
1533 			break;
1534 		case B_LOMEM:
1535 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1536 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1537 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1538 			wiring = B_CONTIGUOUS;
1539 			doReserveMemory = true;
1540 			break;
1541 		case B_32_BIT_FULL_LOCK:
1542 			if (B_HAIKU_PHYSICAL_BITS <= 32
1543 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1544 				wiring = B_FULL_LOCK;
1545 				doReserveMemory = true;
1546 				break;
1547 			}
1548 			// TODO: We don't really support this mode efficiently. Just fall
1549 			// through for now ...
1550 		case B_32_BIT_CONTIGUOUS:
1551 			#if B_HAIKU_PHYSICAL_BITS > 32
1552 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1553 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1554 					stackPhysicalRestrictions.high_address
1555 						= (phys_addr_t)1 << 32;
1556 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1557 				}
1558 			#endif
1559 			wiring = B_CONTIGUOUS;
1560 			doReserveMemory = true;
1561 			break;
1562 		default:
1563 			return B_BAD_VALUE;
1564 	}
1565 
1566 	// Optimization: For a single-page contiguous allocation without low/high
1567 	// memory restriction B_FULL_LOCK wiring suffices.
1568 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1569 		&& physicalAddressRestrictions->low_address == 0
1570 		&& physicalAddressRestrictions->high_address == 0) {
1571 		wiring = B_FULL_LOCK;
1572 	}
1573 
1574 	// For full lock or contiguous areas we're also going to map the pages and
1575 	// thus need to reserve pages for the mapping backend upfront.
1576 	addr_t reservedMapPages = 0;
1577 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1578 		AddressSpaceWriteLocker locker;
1579 		status_t status = locker.SetTo(team);
1580 		if (status != B_OK)
1581 			return status;
1582 
1583 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1584 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1585 	}
1586 
1587 	int priority;
1588 	if (team != VMAddressSpace::KernelID())
1589 		priority = VM_PRIORITY_USER;
1590 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1591 		priority = VM_PRIORITY_VIP;
1592 	else
1593 		priority = VM_PRIORITY_SYSTEM;
1594 
1595 	// Reserve memory before acquiring the address space lock. This reduces the
1596 	// chances of failure, since while holding the write lock to the address
1597 	// space (if it is the kernel address space that is), the low memory handler
1598 	// won't be able to free anything for us.
1599 	addr_t reservedMemory = 0;
1600 	if (doReserveMemory) {
1601 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1602 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1603 			return B_NO_MEMORY;
1604 		reservedMemory = size;
1605 		// TODO: We don't reserve the memory for the pages for the page
1606 		// directories/tables. We actually need to do since we currently don't
1607 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1608 		// there are actually less physical pages than there should be, which
1609 		// can get the VM into trouble in low memory situations.
1610 	}
1611 
1612 	AddressSpaceWriteLocker locker;
1613 	VMAddressSpace* addressSpace;
1614 	status_t status;
1615 
1616 	// For full lock areas reserve the pages before locking the address
1617 	// space. E.g. block caches can't release their memory while we hold the
1618 	// address space lock.
1619 	page_num_t reservedPages = reservedMapPages;
1620 	if (wiring == B_FULL_LOCK)
1621 		reservedPages += size / B_PAGE_SIZE;
1622 
1623 	vm_page_reservation reservation;
1624 	if (reservedPages > 0) {
1625 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1626 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1627 					priority)) {
1628 				reservedPages = 0;
1629 				status = B_WOULD_BLOCK;
1630 				goto err0;
1631 			}
1632 		} else
1633 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1634 	}
1635 
1636 	if (wiring == B_CONTIGUOUS) {
1637 		// we try to allocate the page run here upfront as this may easily
1638 		// fail for obvious reasons
1639 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1640 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1641 		if (page == NULL) {
1642 			status = B_NO_MEMORY;
1643 			goto err0;
1644 		}
1645 	}
1646 
1647 	// Lock the address space and, if B_EXACT_ADDRESS and
1648 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1649 	// is not wired.
1650 	do {
1651 		status = locker.SetTo(team);
1652 		if (status != B_OK)
1653 			goto err1;
1654 
1655 		addressSpace = locker.AddressSpace();
1656 	} while (virtualAddressRestrictions->address_specification
1657 			== B_EXACT_ADDRESS
1658 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1659 		&& wait_if_address_range_is_wired(addressSpace,
1660 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1661 
1662 	// create an anonymous cache
1663 	// if it's a stack, make sure that two pages are available at least
1664 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1665 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1666 		wiring == B_NO_LOCK, priority);
1667 	if (status != B_OK)
1668 		goto err1;
1669 
1670 	cache->temporary = 1;
1671 	cache->virtual_end = size;
1672 	cache->committed_size = reservedMemory;
1673 		// TODO: This should be done via a method.
1674 	reservedMemory = 0;
1675 
1676 	cache->Lock();
1677 
1678 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1679 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1680 		virtualAddressRestrictions, kernel, &area, _address);
1681 
1682 	if (status != B_OK) {
1683 		cache->ReleaseRefAndUnlock();
1684 		goto err1;
1685 	}
1686 
1687 	locker.DegradeToReadLock();
1688 
1689 	switch (wiring) {
1690 		case B_NO_LOCK:
1691 		case B_LAZY_LOCK:
1692 			// do nothing - the pages are mapped in as needed
1693 			break;
1694 
1695 		case B_FULL_LOCK:
1696 		{
1697 			// Allocate and map all pages for this area
1698 
1699 			off_t offset = 0;
1700 			for (addr_t address = area->Base();
1701 					address < area->Base() + (area->Size() - 1);
1702 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1703 #ifdef DEBUG_KERNEL_STACKS
1704 #	ifdef STACK_GROWS_DOWNWARDS
1705 				if (isStack && address < area->Base()
1706 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1707 #	else
1708 				if (isStack && address >= area->Base() + area->Size()
1709 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1710 #	endif
1711 					continue;
1712 #endif
1713 				vm_page* page = vm_page_allocate_page(&reservation,
1714 					PAGE_STATE_WIRED | pageAllocFlags);
1715 				cache->InsertPage(page, offset);
1716 				map_page(area, page, address, protection, &reservation);
1717 
1718 				DEBUG_PAGE_ACCESS_END(page);
1719 			}
1720 
1721 			break;
1722 		}
1723 
1724 		case B_ALREADY_WIRED:
1725 		{
1726 			// The pages should already be mapped. This is only really useful
1727 			// during boot time. Find the appropriate vm_page objects and stick
1728 			// them in the cache object.
1729 			VMTranslationMap* map = addressSpace->TranslationMap();
1730 			off_t offset = 0;
1731 
1732 			if (!gKernelStartup)
1733 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1734 
1735 			map->Lock();
1736 
1737 			for (addr_t virtualAddress = area->Base();
1738 					virtualAddress < area->Base() + (area->Size() - 1);
1739 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1740 				phys_addr_t physicalAddress;
1741 				uint32 flags;
1742 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1743 				if (status < B_OK) {
1744 					panic("looking up mapping failed for va 0x%lx\n",
1745 						virtualAddress);
1746 				}
1747 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1748 				if (page == NULL) {
1749 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1750 						"\n", physicalAddress);
1751 				}
1752 
1753 				DEBUG_PAGE_ACCESS_START(page);
1754 
1755 				cache->InsertPage(page, offset);
1756 				increment_page_wired_count(page);
1757 				vm_page_set_state(page, PAGE_STATE_WIRED);
1758 				page->busy = false;
1759 
1760 				DEBUG_PAGE_ACCESS_END(page);
1761 			}
1762 
1763 			map->Unlock();
1764 			break;
1765 		}
1766 
1767 		case B_CONTIGUOUS:
1768 		{
1769 			// We have already allocated our continuous pages run, so we can now
1770 			// just map them in the address space
1771 			VMTranslationMap* map = addressSpace->TranslationMap();
1772 			phys_addr_t physicalAddress
1773 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1774 			addr_t virtualAddress = area->Base();
1775 			off_t offset = 0;
1776 
1777 			map->Lock();
1778 
1779 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1780 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1781 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1782 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1783 				if (page == NULL)
1784 					panic("couldn't lookup physical page just allocated\n");
1785 
1786 				status = map->Map(virtualAddress, physicalAddress, protection,
1787 					area->MemoryType(), &reservation);
1788 				if (status < B_OK)
1789 					panic("couldn't map physical page in page run\n");
1790 
1791 				cache->InsertPage(page, offset);
1792 				increment_page_wired_count(page);
1793 
1794 				DEBUG_PAGE_ACCESS_END(page);
1795 			}
1796 
1797 			map->Unlock();
1798 			break;
1799 		}
1800 
1801 		default:
1802 			break;
1803 	}
1804 
1805 	cache->Unlock();
1806 
1807 	if (reservedPages > 0)
1808 		vm_page_unreserve_pages(&reservation);
1809 
1810 	TRACE(("vm_create_anonymous_area: done\n"));
1811 
1812 	area->cache_type = CACHE_TYPE_RAM;
1813 	return area->id;
1814 
1815 err1:
1816 	if (wiring == B_CONTIGUOUS) {
1817 		// we had reserved the area space upfront...
1818 		phys_addr_t pageNumber = page->physical_page_number;
1819 		int32 i;
1820 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1821 			page = vm_lookup_page(pageNumber);
1822 			if (page == NULL)
1823 				panic("couldn't lookup physical page just allocated\n");
1824 
1825 			vm_page_set_state(page, PAGE_STATE_FREE);
1826 		}
1827 	}
1828 
1829 err0:
1830 	if (reservedPages > 0)
1831 		vm_page_unreserve_pages(&reservation);
1832 	if (reservedMemory > 0)
1833 		vm_unreserve_memory(reservedMemory);
1834 
1835 	return status;
1836 }
1837 
1838 
1839 area_id
1840 vm_map_physical_memory(team_id team, const char* name, void** _address,
1841 	uint32 addressSpec, addr_t size, uint32 protection,
1842 	phys_addr_t physicalAddress, bool alreadyWired)
1843 {
1844 	VMArea* area;
1845 	VMCache* cache;
1846 	addr_t mapOffset;
1847 
1848 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1849 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1850 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1851 		addressSpec, size, protection, physicalAddress));
1852 
1853 	if (!arch_vm_supports_protection(protection))
1854 		return B_NOT_SUPPORTED;
1855 
1856 	AddressSpaceWriteLocker locker(team);
1857 	if (!locker.IsLocked())
1858 		return B_BAD_TEAM_ID;
1859 
1860 	// if the physical address is somewhat inside a page,
1861 	// move the actual area down to align on a page boundary
1862 	mapOffset = physicalAddress % B_PAGE_SIZE;
1863 	size += mapOffset;
1864 	physicalAddress -= mapOffset;
1865 
1866 	size = PAGE_ALIGN(size);
1867 
1868 	// create a device cache
1869 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1870 	if (status != B_OK)
1871 		return status;
1872 
1873 	cache->virtual_end = size;
1874 
1875 	cache->Lock();
1876 
1877 	virtual_address_restrictions addressRestrictions = {};
1878 	addressRestrictions.address = *_address;
1879 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1880 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1881 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1882 		true, &area, _address);
1883 
1884 	if (status < B_OK)
1885 		cache->ReleaseRefLocked();
1886 
1887 	cache->Unlock();
1888 
1889 	if (status == B_OK) {
1890 		// set requested memory type -- use uncached, if not given
1891 		uint32 memoryType = addressSpec & B_MTR_MASK;
1892 		if (memoryType == 0)
1893 			memoryType = B_MTR_UC;
1894 
1895 		area->SetMemoryType(memoryType);
1896 
1897 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1898 		if (status != B_OK)
1899 			delete_area(locker.AddressSpace(), area, false);
1900 	}
1901 
1902 	if (status != B_OK)
1903 		return status;
1904 
1905 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1906 
1907 	if (alreadyWired) {
1908 		// The area is already mapped, but possibly not with the right
1909 		// memory type.
1910 		map->Lock();
1911 		map->ProtectArea(area, area->protection);
1912 		map->Unlock();
1913 	} else {
1914 		// Map the area completely.
1915 
1916 		// reserve pages needed for the mapping
1917 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1918 			area->Base() + (size - 1));
1919 		vm_page_reservation reservation;
1920 		vm_page_reserve_pages(&reservation, reservePages,
1921 			team == VMAddressSpace::KernelID()
1922 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1923 
1924 		map->Lock();
1925 
1926 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1927 			map->Map(area->Base() + offset, physicalAddress + offset,
1928 				protection, area->MemoryType(), &reservation);
1929 		}
1930 
1931 		map->Unlock();
1932 
1933 		vm_page_unreserve_pages(&reservation);
1934 	}
1935 
1936 	// modify the pointer returned to be offset back into the new area
1937 	// the same way the physical address in was offset
1938 	*_address = (void*)((addr_t)*_address + mapOffset);
1939 
1940 	area->cache_type = CACHE_TYPE_DEVICE;
1941 	return area->id;
1942 }
1943 
1944 
1945 /*!	Don't use!
1946 	TODO: This function was introduced to map physical page vecs to
1947 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1948 	use a device cache and does not track vm_page::wired_count!
1949 */
1950 area_id
1951 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1952 	uint32 addressSpec, addr_t* _size, uint32 protection,
1953 	struct generic_io_vec* vecs, uint32 vecCount)
1954 {
1955 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1956 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1957 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1958 		addressSpec, _size, protection, vecs, vecCount));
1959 
1960 	if (!arch_vm_supports_protection(protection)
1961 		|| (addressSpec & B_MTR_MASK) != 0) {
1962 		return B_NOT_SUPPORTED;
1963 	}
1964 
1965 	AddressSpaceWriteLocker locker(team);
1966 	if (!locker.IsLocked())
1967 		return B_BAD_TEAM_ID;
1968 
1969 	if (vecCount == 0)
1970 		return B_BAD_VALUE;
1971 
1972 	addr_t size = 0;
1973 	for (uint32 i = 0; i < vecCount; i++) {
1974 		if (vecs[i].base % B_PAGE_SIZE != 0
1975 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1976 			return B_BAD_VALUE;
1977 		}
1978 
1979 		size += vecs[i].length;
1980 	}
1981 
1982 	// create a device cache
1983 	VMCache* cache;
1984 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1985 	if (result != B_OK)
1986 		return result;
1987 
1988 	cache->virtual_end = size;
1989 
1990 	cache->Lock();
1991 
1992 	VMArea* area;
1993 	virtual_address_restrictions addressRestrictions = {};
1994 	addressRestrictions.address = *_address;
1995 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1996 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1997 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
1998 		&addressRestrictions, true, &area, _address);
1999 
2000 	if (result != B_OK)
2001 		cache->ReleaseRefLocked();
2002 
2003 	cache->Unlock();
2004 
2005 	if (result != B_OK)
2006 		return result;
2007 
2008 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2009 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2010 		area->Base() + (size - 1));
2011 
2012 	vm_page_reservation reservation;
2013 	vm_page_reserve_pages(&reservation, reservePages,
2014 			team == VMAddressSpace::KernelID()
2015 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2016 	map->Lock();
2017 
2018 	uint32 vecIndex = 0;
2019 	size_t vecOffset = 0;
2020 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2021 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2022 			vecOffset = 0;
2023 			vecIndex++;
2024 		}
2025 
2026 		if (vecIndex >= vecCount)
2027 			break;
2028 
2029 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2030 			protection, area->MemoryType(), &reservation);
2031 
2032 		vecOffset += B_PAGE_SIZE;
2033 	}
2034 
2035 	map->Unlock();
2036 	vm_page_unreserve_pages(&reservation);
2037 
2038 	if (_size != NULL)
2039 		*_size = size;
2040 
2041 	area->cache_type = CACHE_TYPE_DEVICE;
2042 	return area->id;
2043 }
2044 
2045 
2046 area_id
2047 vm_create_null_area(team_id team, const char* name, void** address,
2048 	uint32 addressSpec, addr_t size, uint32 flags)
2049 {
2050 	size = PAGE_ALIGN(size);
2051 
2052 	// Lock the address space and, if B_EXACT_ADDRESS and
2053 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2054 	// is not wired.
2055 	AddressSpaceWriteLocker locker;
2056 	do {
2057 		if (locker.SetTo(team) != B_OK)
2058 			return B_BAD_TEAM_ID;
2059 	} while (addressSpec == B_EXACT_ADDRESS
2060 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2061 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2062 			(addr_t)*address, size, &locker));
2063 
2064 	// create a null cache
2065 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2066 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2067 	VMCache* cache;
2068 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2069 	if (status != B_OK)
2070 		return status;
2071 
2072 	cache->temporary = 1;
2073 	cache->virtual_end = size;
2074 
2075 	cache->Lock();
2076 
2077 	VMArea* area;
2078 	virtual_address_restrictions addressRestrictions = {};
2079 	addressRestrictions.address = *address;
2080 	addressRestrictions.address_specification = addressSpec;
2081 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2082 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2083 		REGION_NO_PRIVATE_MAP, flags,
2084 		&addressRestrictions, true, &area, address);
2085 
2086 	if (status < B_OK) {
2087 		cache->ReleaseRefAndUnlock();
2088 		return status;
2089 	}
2090 
2091 	cache->Unlock();
2092 
2093 	area->cache_type = CACHE_TYPE_NULL;
2094 	return area->id;
2095 }
2096 
2097 
2098 /*!	Creates the vnode cache for the specified \a vnode.
2099 	The vnode has to be marked busy when calling this function.
2100 */
2101 status_t
2102 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2103 {
2104 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2105 }
2106 
2107 
2108 /*!	\a cache must be locked. The area's address space must be read-locked.
2109 */
2110 static void
2111 pre_map_area_pages(VMArea* area, VMCache* cache,
2112 	vm_page_reservation* reservation)
2113 {
2114 	addr_t baseAddress = area->Base();
2115 	addr_t cacheOffset = area->cache_offset;
2116 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2117 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2118 
2119 	for (VMCachePagesTree::Iterator it
2120 				= cache->pages.GetIterator(firstPage, true, true);
2121 			vm_page* page = it.Next();) {
2122 		if (page->cache_offset >= endPage)
2123 			break;
2124 
2125 		// skip busy and inactive pages
2126 		if (page->busy || page->usage_count == 0)
2127 			continue;
2128 
2129 		DEBUG_PAGE_ACCESS_START(page);
2130 		map_page(area, page,
2131 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2132 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2133 		DEBUG_PAGE_ACCESS_END(page);
2134 	}
2135 }
2136 
2137 
2138 /*!	Will map the file specified by \a fd to an area in memory.
2139 	The file will be mirrored beginning at the specified \a offset. The
2140 	\a offset and \a size arguments have to be page aligned.
2141 */
2142 static area_id
2143 _vm_map_file(team_id team, const char* name, void** _address,
2144 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2145 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2146 {
2147 	// TODO: for binary files, we want to make sure that they get the
2148 	//	copy of a file at a given time, ie. later changes should not
2149 	//	make it into the mapped copy -- this will need quite some changes
2150 	//	to be done in a nice way
2151 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2152 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2153 
2154 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2155 	size = PAGE_ALIGN(size);
2156 
2157 	if (mapping == REGION_NO_PRIVATE_MAP)
2158 		protection |= B_SHARED_AREA;
2159 	if (addressSpec != B_EXACT_ADDRESS)
2160 		unmapAddressRange = false;
2161 
2162 	uint32 mappingFlags = 0;
2163 	if (unmapAddressRange)
2164 		mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2165 
2166 	if (fd < 0) {
2167 		virtual_address_restrictions virtualRestrictions = {};
2168 		virtualRestrictions.address = *_address;
2169 		virtualRestrictions.address_specification = addressSpec;
2170 		physical_address_restrictions physicalRestrictions = {};
2171 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2172 			mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2173 			_address);
2174 	}
2175 
2176 	// get the open flags of the FD
2177 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2178 	if (descriptor == NULL)
2179 		return EBADF;
2180 	int32 openMode = descriptor->open_mode;
2181 	put_fd(descriptor);
2182 
2183 	// The FD must open for reading at any rate. For shared mapping with write
2184 	// access, additionally the FD must be open for writing.
2185 	if ((openMode & O_ACCMODE) == O_WRONLY
2186 		|| (mapping == REGION_NO_PRIVATE_MAP
2187 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2188 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2189 		return EACCES;
2190 	}
2191 
2192 	uint32 protectionMax = 0;
2193 	if (mapping == REGION_NO_PRIVATE_MAP) {
2194 		if ((openMode & O_ACCMODE) == O_RDWR)
2195 			protectionMax = protection | B_USER_PROTECTION;
2196 		else
2197 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2198 	} else if (mapping == REGION_PRIVATE_MAP) {
2199 		// For privately mapped read-only regions, skip committing memory.
2200 		// (If protections are changed later on, memory will be committed then.)
2201 		if ((protection & B_WRITE_AREA) == 0)
2202 			mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2203 	}
2204 
2205 	// get the vnode for the object, this also grabs a ref to it
2206 	struct vnode* vnode = NULL;
2207 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2208 	if (status < B_OK)
2209 		return status;
2210 	VnodePutter vnodePutter(vnode);
2211 
2212 	// If we're going to pre-map pages, we need to reserve the pages needed by
2213 	// the mapping backend upfront.
2214 	page_num_t reservedPreMapPages = 0;
2215 	vm_page_reservation reservation;
2216 	if ((protection & B_READ_AREA) != 0) {
2217 		AddressSpaceWriteLocker locker;
2218 		status = locker.SetTo(team);
2219 		if (status != B_OK)
2220 			return status;
2221 
2222 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2223 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2224 
2225 		locker.Unlock();
2226 
2227 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2228 			team == VMAddressSpace::KernelID()
2229 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2230 	}
2231 
2232 	struct PageUnreserver {
2233 		PageUnreserver(vm_page_reservation* reservation)
2234 			:
2235 			fReservation(reservation)
2236 		{
2237 		}
2238 
2239 		~PageUnreserver()
2240 		{
2241 			if (fReservation != NULL)
2242 				vm_page_unreserve_pages(fReservation);
2243 		}
2244 
2245 		vm_page_reservation* fReservation;
2246 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2247 
2248 	// Lock the address space and, if the specified address range shall be
2249 	// unmapped, ensure it is not wired.
2250 	AddressSpaceWriteLocker locker;
2251 	do {
2252 		if (locker.SetTo(team) != B_OK)
2253 			return B_BAD_TEAM_ID;
2254 	} while (unmapAddressRange
2255 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2256 			(addr_t)*_address, size, &locker));
2257 
2258 	// TODO: this only works for file systems that use the file cache
2259 	VMCache* cache;
2260 	status = vfs_get_vnode_cache(vnode, &cache, false);
2261 	if (status < B_OK)
2262 		return status;
2263 
2264 	cache->Lock();
2265 
2266 	VMArea* area;
2267 	virtual_address_restrictions addressRestrictions = {};
2268 	addressRestrictions.address = *_address;
2269 	addressRestrictions.address_specification = addressSpec;
2270 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2271 		0, protection, protectionMax, mapping, mappingFlags,
2272 		&addressRestrictions, kernel, &area, _address);
2273 
2274 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2275 		// map_backing_store() cannot know we no longer need the ref
2276 		cache->ReleaseRefLocked();
2277 	}
2278 
2279 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2280 		pre_map_area_pages(area, cache, &reservation);
2281 
2282 	cache->Unlock();
2283 
2284 	if (status == B_OK) {
2285 		// TODO: this probably deserves a smarter solution, ie. don't always
2286 		// prefetch stuff, and also, probably don't trigger it at this place.
2287 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2288 			// prefetches at max 10 MB starting from "offset"
2289 	}
2290 
2291 	if (status != B_OK)
2292 		return status;
2293 
2294 	area->cache_type = CACHE_TYPE_VNODE;
2295 	return area->id;
2296 }
2297 
2298 
2299 area_id
2300 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2301 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2302 	int fd, off_t offset)
2303 {
2304 	if (!arch_vm_supports_protection(protection))
2305 		return B_NOT_SUPPORTED;
2306 
2307 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2308 		mapping, unmapAddressRange, fd, offset, true);
2309 }
2310 
2311 
2312 VMCache*
2313 vm_area_get_locked_cache(VMArea* area)
2314 {
2315 	rw_lock_read_lock(&sAreaCacheLock);
2316 
2317 	while (true) {
2318 		VMCache* cache = area->cache;
2319 
2320 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2321 			// cache has been deleted
2322 			rw_lock_read_lock(&sAreaCacheLock);
2323 			continue;
2324 		}
2325 
2326 		rw_lock_read_lock(&sAreaCacheLock);
2327 
2328 		if (cache == area->cache) {
2329 			cache->AcquireRefLocked();
2330 			rw_lock_read_unlock(&sAreaCacheLock);
2331 			return cache;
2332 		}
2333 
2334 		// the cache changed in the meantime
2335 		cache->Unlock();
2336 	}
2337 }
2338 
2339 
2340 void
2341 vm_area_put_locked_cache(VMCache* cache)
2342 {
2343 	cache->ReleaseRefAndUnlock();
2344 }
2345 
2346 
2347 area_id
2348 vm_clone_area(team_id team, const char* name, void** address,
2349 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2350 	bool kernel)
2351 {
2352 	VMArea* newArea = NULL;
2353 	VMArea* sourceArea;
2354 
2355 	// Check whether the source area exists and is cloneable. If so, mark it
2356 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2357 	{
2358 		AddressSpaceWriteLocker locker;
2359 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2360 		if (status != B_OK)
2361 			return status;
2362 
2363 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2364 			return B_NOT_ALLOWED;
2365 
2366 		sourceArea->protection |= B_SHARED_AREA;
2367 		protection |= B_SHARED_AREA;
2368 	}
2369 
2370 	// Now lock both address spaces and actually do the cloning.
2371 
2372 	MultiAddressSpaceLocker locker;
2373 	VMAddressSpace* sourceAddressSpace;
2374 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2375 	if (status != B_OK)
2376 		return status;
2377 
2378 	VMAddressSpace* targetAddressSpace;
2379 	status = locker.AddTeam(team, true, &targetAddressSpace);
2380 	if (status != B_OK)
2381 		return status;
2382 
2383 	status = locker.Lock();
2384 	if (status != B_OK)
2385 		return status;
2386 
2387 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2388 	if (sourceArea == NULL)
2389 		return B_BAD_VALUE;
2390 
2391 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2392 		return B_NOT_ALLOWED;
2393 
2394 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2395 
2396 	if (!kernel && sourceAddressSpace != targetAddressSpace
2397 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2398 #if KDEBUG
2399 		Team* team = thread_get_current_thread()->team;
2400 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2401 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2402 #endif
2403 		status = B_NOT_ALLOWED;
2404 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2405 		status = B_NOT_ALLOWED;
2406 	} else {
2407 		virtual_address_restrictions addressRestrictions = {};
2408 		addressRestrictions.address = *address;
2409 		addressRestrictions.address_specification = addressSpec;
2410 		status = map_backing_store(targetAddressSpace, cache,
2411 			sourceArea->cache_offset, name, sourceArea->Size(),
2412 			sourceArea->wiring, protection, sourceArea->protection_max,
2413 			mapping, 0, &addressRestrictions,
2414 			kernel, &newArea, address);
2415 	}
2416 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2417 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2418 		// to create a new cache, and has therefore already acquired a reference
2419 		// to the source cache - but otherwise it has no idea that we need
2420 		// one.
2421 		cache->AcquireRefLocked();
2422 	}
2423 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2424 		// we need to map in everything at this point
2425 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2426 			// we don't have actual pages to map but a physical area
2427 			VMTranslationMap* map
2428 				= sourceArea->address_space->TranslationMap();
2429 			map->Lock();
2430 
2431 			phys_addr_t physicalAddress;
2432 			uint32 oldProtection;
2433 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2434 
2435 			map->Unlock();
2436 
2437 			map = targetAddressSpace->TranslationMap();
2438 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2439 				newArea->Base() + (newArea->Size() - 1));
2440 
2441 			vm_page_reservation reservation;
2442 			vm_page_reserve_pages(&reservation, reservePages,
2443 				targetAddressSpace == VMAddressSpace::Kernel()
2444 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2445 			map->Lock();
2446 
2447 			for (addr_t offset = 0; offset < newArea->Size();
2448 					offset += B_PAGE_SIZE) {
2449 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2450 					protection, newArea->MemoryType(), &reservation);
2451 			}
2452 
2453 			map->Unlock();
2454 			vm_page_unreserve_pages(&reservation);
2455 		} else {
2456 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2457 			size_t reservePages = map->MaxPagesNeededToMap(
2458 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2459 			vm_page_reservation reservation;
2460 			vm_page_reserve_pages(&reservation, reservePages,
2461 				targetAddressSpace == VMAddressSpace::Kernel()
2462 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2463 
2464 			// map in all pages from source
2465 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2466 					vm_page* page  = it.Next();) {
2467 				if (!page->busy) {
2468 					DEBUG_PAGE_ACCESS_START(page);
2469 					map_page(newArea, page,
2470 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2471 							- newArea->cache_offset),
2472 						protection, &reservation);
2473 					DEBUG_PAGE_ACCESS_END(page);
2474 				}
2475 			}
2476 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2477 			// ensuring that!
2478 
2479 			vm_page_unreserve_pages(&reservation);
2480 		}
2481 	}
2482 	if (status == B_OK)
2483 		newArea->cache_type = sourceArea->cache_type;
2484 
2485 	vm_area_put_locked_cache(cache);
2486 
2487 	if (status < B_OK)
2488 		return status;
2489 
2490 	return newArea->id;
2491 }
2492 
2493 
2494 /*!	Deletes the specified area of the given address space.
2495 
2496 	The address space must be write-locked.
2497 	The caller must ensure that the area does not have any wired ranges.
2498 
2499 	\param addressSpace The address space containing the area.
2500 	\param area The area to be deleted.
2501 	\param deletingAddressSpace \c true, if the address space is in the process
2502 		of being deleted.
2503 */
2504 static void
2505 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2506 	bool deletingAddressSpace)
2507 {
2508 	ASSERT(!area->IsWired());
2509 
2510 	VMAreas::Remove(area);
2511 
2512 	// At this point the area is removed from the global hash table, but
2513 	// still exists in the area list.
2514 
2515 	// Unmap the virtual address space the area occupied.
2516 	{
2517 		// We need to lock the complete cache chain.
2518 		VMCache* topCache = vm_area_get_locked_cache(area);
2519 		VMCacheChainLocker cacheChainLocker(topCache);
2520 		cacheChainLocker.LockAllSourceCaches();
2521 
2522 		// If the area's top cache is a temporary cache and the area is the only
2523 		// one referencing it (besides us currently holding a second reference),
2524 		// the unmapping code doesn't need to care about preserving the accessed
2525 		// and dirty flags of the top cache page mappings.
2526 		bool ignoreTopCachePageFlags
2527 			= topCache->temporary && topCache->RefCount() == 2;
2528 
2529 		area->address_space->TranslationMap()->UnmapArea(area,
2530 			deletingAddressSpace, ignoreTopCachePageFlags);
2531 	}
2532 
2533 	if (!area->cache->temporary)
2534 		area->cache->WriteModified();
2535 
2536 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2537 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2538 
2539 	arch_vm_unset_memory_type(area);
2540 	addressSpace->RemoveArea(area, allocationFlags);
2541 	addressSpace->Put();
2542 
2543 	area->cache->RemoveArea(area);
2544 	area->cache->ReleaseRef();
2545 
2546 	addressSpace->DeleteArea(area, allocationFlags);
2547 }
2548 
2549 
2550 status_t
2551 vm_delete_area(team_id team, area_id id, bool kernel)
2552 {
2553 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2554 		team, id));
2555 
2556 	// lock the address space and make sure the area isn't wired
2557 	AddressSpaceWriteLocker locker;
2558 	VMArea* area;
2559 	AreaCacheLocker cacheLocker;
2560 
2561 	do {
2562 		status_t status = locker.SetFromArea(team, id, area);
2563 		if (status != B_OK)
2564 			return status;
2565 
2566 		cacheLocker.SetTo(area);
2567 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2568 
2569 	cacheLocker.Unlock();
2570 
2571 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2572 		return B_NOT_ALLOWED;
2573 
2574 	delete_area(locker.AddressSpace(), area, false);
2575 	return B_OK;
2576 }
2577 
2578 
2579 /*!	Creates a new cache on top of given cache, moves all areas from
2580 	the old cache to the new one, and changes the protection of all affected
2581 	areas' pages to read-only. If requested, wired pages are moved up to the
2582 	new cache and copies are added to the old cache in their place.
2583 	Preconditions:
2584 	- The given cache must be locked.
2585 	- All of the cache's areas' address spaces must be read locked.
2586 	- Either the cache must not have any wired ranges or a page reservation for
2587 	  all wired pages must be provided, so they can be copied.
2588 
2589 	\param lowerCache The cache on top of which a new cache shall be created.
2590 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2591 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2592 		has wired page. The wired pages are copied in this case.
2593 */
2594 static status_t
2595 vm_copy_on_write_area(VMCache* lowerCache,
2596 	vm_page_reservation* wiredPagesReservation)
2597 {
2598 	VMCache* upperCache;
2599 
2600 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2601 
2602 	// We need to separate the cache from its areas. The cache goes one level
2603 	// deeper and we create a new cache inbetween.
2604 
2605 	// create an anonymous cache
2606 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2607 		lowerCache->GuardSize() / B_PAGE_SIZE,
2608 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2609 		VM_PRIORITY_USER);
2610 	if (status != B_OK)
2611 		return status;
2612 
2613 	upperCache->Lock();
2614 
2615 	upperCache->temporary = 1;
2616 	upperCache->virtual_base = lowerCache->virtual_base;
2617 	upperCache->virtual_end = lowerCache->virtual_end;
2618 
2619 	// transfer the lower cache areas to the upper cache
2620 	rw_lock_write_lock(&sAreaCacheLock);
2621 	upperCache->TransferAreas(lowerCache);
2622 	rw_lock_write_unlock(&sAreaCacheLock);
2623 
2624 	lowerCache->AddConsumer(upperCache);
2625 
2626 	// We now need to remap all pages from all of the cache's areas read-only,
2627 	// so that a copy will be created on next write access. If there are wired
2628 	// pages, we keep their protection, move them to the upper cache and create
2629 	// copies for the lower cache.
2630 	if (wiredPagesReservation != NULL) {
2631 		// We need to handle wired pages -- iterate through the cache's pages.
2632 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2633 				vm_page* page = it.Next();) {
2634 			if (page->WiredCount() > 0) {
2635 				// allocate a new page and copy the wired one
2636 				vm_page* copiedPage = vm_page_allocate_page(
2637 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2638 
2639 				vm_memcpy_physical_page(
2640 					copiedPage->physical_page_number * B_PAGE_SIZE,
2641 					page->physical_page_number * B_PAGE_SIZE);
2642 
2643 				// move the wired page to the upper cache (note: removing is OK
2644 				// with the SplayTree iterator) and insert the copy
2645 				upperCache->MovePage(page);
2646 				lowerCache->InsertPage(copiedPage,
2647 					page->cache_offset * B_PAGE_SIZE);
2648 
2649 				DEBUG_PAGE_ACCESS_END(copiedPage);
2650 			} else {
2651 				// Change the protection of this page in all areas.
2652 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2653 						tempArea = tempArea->cache_next) {
2654 					if (!is_page_in_area(tempArea, page))
2655 						continue;
2656 
2657 					// The area must be readable in the same way it was
2658 					// previously writable.
2659 					addr_t address = virtual_page_address(tempArea, page);
2660 					uint32 protection = 0;
2661 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2662 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2663 						protection |= B_KERNEL_READ_AREA;
2664 					if ((pageProtection & B_READ_AREA) != 0)
2665 						protection |= B_READ_AREA;
2666 
2667 					VMTranslationMap* map
2668 						= tempArea->address_space->TranslationMap();
2669 					map->Lock();
2670 					map->ProtectPage(tempArea, address, protection);
2671 					map->Unlock();
2672 				}
2673 			}
2674 		}
2675 	} else {
2676 		ASSERT(lowerCache->WiredPagesCount() == 0);
2677 
2678 		// just change the protection of all areas
2679 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2680 				tempArea = tempArea->cache_next) {
2681 			if (tempArea->page_protections != NULL) {
2682 				// Change the protection of all pages in this area.
2683 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2684 				map->Lock();
2685 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2686 					vm_page* page = it.Next();) {
2687 					if (!is_page_in_area(tempArea, page))
2688 						continue;
2689 
2690 					// The area must be readable in the same way it was
2691 					// previously writable.
2692 					addr_t address = virtual_page_address(tempArea, page);
2693 					uint32 protection = 0;
2694 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2695 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2696 						protection |= B_KERNEL_READ_AREA;
2697 					if ((pageProtection & B_READ_AREA) != 0)
2698 						protection |= B_READ_AREA;
2699 
2700 					map->ProtectPage(tempArea, address, protection);
2701 				}
2702 				map->Unlock();
2703 				continue;
2704 			}
2705 			// The area must be readable in the same way it was previously
2706 			// writable.
2707 			uint32 protection = 0;
2708 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2709 				protection |= B_KERNEL_READ_AREA;
2710 			if ((tempArea->protection & B_READ_AREA) != 0)
2711 				protection |= B_READ_AREA;
2712 
2713 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2714 			map->Lock();
2715 			map->ProtectArea(tempArea, protection);
2716 			map->Unlock();
2717 		}
2718 	}
2719 
2720 	vm_area_put_locked_cache(upperCache);
2721 
2722 	return B_OK;
2723 }
2724 
2725 
2726 area_id
2727 vm_copy_area(team_id team, const char* name, void** _address,
2728 	uint32 addressSpec, area_id sourceID)
2729 {
2730 	// Do the locking: target address space, all address spaces associated with
2731 	// the source cache, and the cache itself.
2732 	MultiAddressSpaceLocker locker;
2733 	VMAddressSpace* targetAddressSpace;
2734 	VMCache* cache;
2735 	VMArea* source;
2736 	AreaCacheLocker cacheLocker;
2737 	status_t status;
2738 	bool sharedArea;
2739 
2740 	page_num_t wiredPages = 0;
2741 	vm_page_reservation wiredPagesReservation;
2742 
2743 	bool restart;
2744 	do {
2745 		restart = false;
2746 
2747 		locker.Unset();
2748 		status = locker.AddTeam(team, true, &targetAddressSpace);
2749 		if (status == B_OK) {
2750 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2751 				&cache);
2752 		}
2753 		if (status != B_OK)
2754 			return status;
2755 
2756 		cacheLocker.SetTo(cache, true);	// already locked
2757 
2758 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2759 
2760 		page_num_t oldWiredPages = wiredPages;
2761 		wiredPages = 0;
2762 
2763 		// If the source area isn't shared, count the number of wired pages in
2764 		// the cache and reserve as many pages.
2765 		if (!sharedArea) {
2766 			wiredPages = cache->WiredPagesCount();
2767 
2768 			if (wiredPages > oldWiredPages) {
2769 				cacheLocker.Unlock();
2770 				locker.Unlock();
2771 
2772 				if (oldWiredPages > 0)
2773 					vm_page_unreserve_pages(&wiredPagesReservation);
2774 
2775 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2776 					VM_PRIORITY_USER);
2777 
2778 				restart = true;
2779 			}
2780 		} else if (oldWiredPages > 0)
2781 			vm_page_unreserve_pages(&wiredPagesReservation);
2782 	} while (restart);
2783 
2784 	// unreserve pages later
2785 	struct PagesUnreserver {
2786 		PagesUnreserver(vm_page_reservation* reservation)
2787 			:
2788 			fReservation(reservation)
2789 		{
2790 		}
2791 
2792 		~PagesUnreserver()
2793 		{
2794 			if (fReservation != NULL)
2795 				vm_page_unreserve_pages(fReservation);
2796 		}
2797 
2798 	private:
2799 		vm_page_reservation*	fReservation;
2800 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2801 
2802 	bool writableCopy
2803 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2804 	uint8* targetPageProtections = NULL;
2805 
2806 	if (source->page_protections != NULL) {
2807 		size_t bytes = area_page_protections_size(source->Size());
2808 		targetPageProtections = (uint8*)malloc_etc(bytes,
2809 			(source->address_space == VMAddressSpace::Kernel()
2810 					|| targetAddressSpace == VMAddressSpace::Kernel())
2811 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2812 		if (targetPageProtections == NULL)
2813 			return B_NO_MEMORY;
2814 
2815 		memcpy(targetPageProtections, source->page_protections, bytes);
2816 
2817 		if (!writableCopy) {
2818 			for (size_t i = 0; i < bytes; i++) {
2819 				if ((targetPageProtections[i]
2820 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2821 					writableCopy = true;
2822 					break;
2823 				}
2824 			}
2825 		}
2826 	}
2827 
2828 	if (addressSpec == B_CLONE_ADDRESS) {
2829 		addressSpec = B_EXACT_ADDRESS;
2830 		*_address = (void*)source->Base();
2831 	}
2832 
2833 	// First, create a cache on top of the source area, respectively use the
2834 	// existing one, if this is a shared area.
2835 
2836 	VMArea* target;
2837 	virtual_address_restrictions addressRestrictions = {};
2838 	addressRestrictions.address = *_address;
2839 	addressRestrictions.address_specification = addressSpec;
2840 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2841 		name, source->Size(), source->wiring, source->protection,
2842 		source->protection_max,
2843 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2844 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2845 		&addressRestrictions, true, &target, _address);
2846 	if (status < B_OK) {
2847 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2848 		return status;
2849 	}
2850 
2851 	if (targetPageProtections != NULL)
2852 		target->page_protections = targetPageProtections;
2853 
2854 	if (sharedArea) {
2855 		// The new area uses the old area's cache, but map_backing_store()
2856 		// hasn't acquired a ref. So we have to do that now.
2857 		cache->AcquireRefLocked();
2858 	}
2859 
2860 	// If the source area is writable, we need to move it one layer up as well
2861 
2862 	if (!sharedArea) {
2863 		if (writableCopy) {
2864 			// TODO: do something more useful if this fails!
2865 			if (vm_copy_on_write_area(cache,
2866 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2867 				panic("vm_copy_on_write_area() failed!\n");
2868 			}
2869 		}
2870 	}
2871 
2872 	// we return the ID of the newly created area
2873 	return target->id;
2874 }
2875 
2876 
2877 status_t
2878 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2879 	bool kernel)
2880 {
2881 	fix_protection(&newProtection);
2882 
2883 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2884 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2885 
2886 	if (!arch_vm_supports_protection(newProtection))
2887 		return B_NOT_SUPPORTED;
2888 
2889 	bool becomesWritable
2890 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2891 
2892 	// lock address spaces and cache
2893 	MultiAddressSpaceLocker locker;
2894 	VMCache* cache;
2895 	VMArea* area;
2896 	status_t status;
2897 	AreaCacheLocker cacheLocker;
2898 	bool isWritable;
2899 
2900 	bool restart;
2901 	do {
2902 		restart = false;
2903 
2904 		locker.Unset();
2905 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2906 		if (status != B_OK)
2907 			return status;
2908 
2909 		cacheLocker.SetTo(cache, true);	// already locked
2910 
2911 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2912 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2913 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2914 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2915 				" (%s)\n", team, newProtection, areaID, area->name);
2916 			return B_NOT_ALLOWED;
2917 		}
2918 		if (!kernel && area->protection_max != 0
2919 			&& (newProtection & area->protection_max)
2920 				!= (newProtection & B_USER_PROTECTION)) {
2921 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2922 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2923 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2924 				area->protection_max, areaID, area->name);
2925 			return B_NOT_ALLOWED;
2926 		}
2927 
2928 		if (team != VMAddressSpace::KernelID()
2929 			&& area->address_space->ID() != team) {
2930 			// unless you're the kernel, you are only allowed to set
2931 			// the protection of your own areas
2932 			return B_NOT_ALLOWED;
2933 		}
2934 
2935 		if (area->protection == newProtection)
2936 			return B_OK;
2937 
2938 		isWritable
2939 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2940 
2941 		// Make sure the area (respectively, if we're going to call
2942 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2943 		// wired ranges.
2944 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2945 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2946 					otherArea = otherArea->cache_next) {
2947 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2948 					restart = true;
2949 					break;
2950 				}
2951 			}
2952 		} else {
2953 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2954 				restart = true;
2955 		}
2956 	} while (restart);
2957 
2958 	bool changePageProtection = true;
2959 	bool changeTopCachePagesOnly = false;
2960 
2961 	if (isWritable && !becomesWritable) {
2962 		// writable -> !writable
2963 
2964 		if (cache->source != NULL && cache->temporary) {
2965 			if (cache->CountWritableAreas(area) == 0) {
2966 				// Since this cache now lives from the pages in its source cache,
2967 				// we can change the cache's commitment to take only those pages
2968 				// into account that really are in this cache.
2969 
2970 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2971 					team == VMAddressSpace::KernelID()
2972 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2973 
2974 				// TODO: we may be able to join with our source cache, if
2975 				// count == 0
2976 			}
2977 		}
2978 
2979 		// If only the writability changes, we can just remap the pages of the
2980 		// top cache, since the pages of lower caches are mapped read-only
2981 		// anyway. That's advantageous only, if the number of pages in the cache
2982 		// is significantly smaller than the number of pages in the area,
2983 		// though.
2984 		if (newProtection
2985 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2986 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2987 			changeTopCachePagesOnly = true;
2988 		}
2989 	} else if (!isWritable && becomesWritable) {
2990 		// !writable -> writable
2991 
2992 		if (!cache->consumers.IsEmpty()) {
2993 			// There are consumers -- we have to insert a new cache. Fortunately
2994 			// vm_copy_on_write_area() does everything that's needed.
2995 			changePageProtection = false;
2996 			status = vm_copy_on_write_area(cache, NULL);
2997 		} else {
2998 			// No consumers, so we don't need to insert a new one.
2999 			if (cache->source != NULL && cache->temporary) {
3000 				// the cache's commitment must contain all possible pages
3001 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
3002 					team == VMAddressSpace::KernelID()
3003 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3004 			}
3005 
3006 			if (status == B_OK && cache->source != NULL) {
3007 				// There's a source cache, hence we can't just change all pages'
3008 				// protection or we might allow writing into pages belonging to
3009 				// a lower cache.
3010 				changeTopCachePagesOnly = true;
3011 			}
3012 		}
3013 	} else {
3014 		// we don't have anything special to do in all other cases
3015 	}
3016 
3017 	if (status == B_OK) {
3018 		// remap existing pages in this cache
3019 		if (changePageProtection) {
3020 			VMTranslationMap* map = area->address_space->TranslationMap();
3021 			map->Lock();
3022 
3023 			if (changeTopCachePagesOnly) {
3024 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3025 				page_num_t lastPageOffset
3026 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3027 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3028 						vm_page* page = it.Next();) {
3029 					if (page->cache_offset >= firstPageOffset
3030 						&& page->cache_offset <= lastPageOffset) {
3031 						addr_t address = virtual_page_address(area, page);
3032 						map->ProtectPage(area, address, newProtection);
3033 					}
3034 				}
3035 			} else
3036 				map->ProtectArea(area, newProtection);
3037 
3038 			map->Unlock();
3039 		}
3040 
3041 		area->protection = newProtection;
3042 	}
3043 
3044 	return status;
3045 }
3046 
3047 
3048 status_t
3049 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3050 {
3051 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3052 	if (addressSpace == NULL)
3053 		return B_BAD_TEAM_ID;
3054 
3055 	VMTranslationMap* map = addressSpace->TranslationMap();
3056 
3057 	map->Lock();
3058 	uint32 dummyFlags;
3059 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3060 	map->Unlock();
3061 
3062 	addressSpace->Put();
3063 	return status;
3064 }
3065 
3066 
3067 /*!	The page's cache must be locked.
3068 */
3069 bool
3070 vm_test_map_modification(vm_page* page)
3071 {
3072 	if (page->modified)
3073 		return true;
3074 
3075 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3076 	vm_page_mapping* mapping;
3077 	while ((mapping = iterator.Next()) != NULL) {
3078 		VMArea* area = mapping->area;
3079 		VMTranslationMap* map = area->address_space->TranslationMap();
3080 
3081 		phys_addr_t physicalAddress;
3082 		uint32 flags;
3083 		map->Lock();
3084 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3085 		map->Unlock();
3086 
3087 		if ((flags & PAGE_MODIFIED) != 0)
3088 			return true;
3089 	}
3090 
3091 	return false;
3092 }
3093 
3094 
3095 /*!	The page's cache must be locked.
3096 */
3097 void
3098 vm_clear_map_flags(vm_page* page, uint32 flags)
3099 {
3100 	if ((flags & PAGE_ACCESSED) != 0)
3101 		page->accessed = false;
3102 	if ((flags & PAGE_MODIFIED) != 0)
3103 		page->modified = false;
3104 
3105 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3106 	vm_page_mapping* mapping;
3107 	while ((mapping = iterator.Next()) != NULL) {
3108 		VMArea* area = mapping->area;
3109 		VMTranslationMap* map = area->address_space->TranslationMap();
3110 
3111 		map->Lock();
3112 		map->ClearFlags(virtual_page_address(area, page), flags);
3113 		map->Unlock();
3114 	}
3115 }
3116 
3117 
3118 /*!	Removes all mappings from a page.
3119 	After you've called this function, the page is unmapped from memory and
3120 	the page's \c accessed and \c modified flags have been updated according
3121 	to the state of the mappings.
3122 	The page's cache must be locked.
3123 */
3124 void
3125 vm_remove_all_page_mappings(vm_page* page)
3126 {
3127 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3128 		VMArea* area = mapping->area;
3129 		VMTranslationMap* map = area->address_space->TranslationMap();
3130 		addr_t address = virtual_page_address(area, page);
3131 		map->UnmapPage(area, address, false);
3132 	}
3133 }
3134 
3135 
3136 int32
3137 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3138 {
3139 	int32 count = 0;
3140 
3141 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3142 	vm_page_mapping* mapping;
3143 	while ((mapping = iterator.Next()) != NULL) {
3144 		VMArea* area = mapping->area;
3145 		VMTranslationMap* map = area->address_space->TranslationMap();
3146 
3147 		bool modified;
3148 		if (map->ClearAccessedAndModified(area,
3149 				virtual_page_address(area, page), false, modified)) {
3150 			count++;
3151 		}
3152 
3153 		page->modified |= modified;
3154 	}
3155 
3156 
3157 	if (page->accessed) {
3158 		count++;
3159 		page->accessed = false;
3160 	}
3161 
3162 	return count;
3163 }
3164 
3165 
3166 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3167 	mappings.
3168 	The function iterates through the page mappings and removes them until
3169 	encountering one that has been accessed. From then on it will continue to
3170 	iterate, but only clear the accessed flag of the mapping. The page's
3171 	\c modified bit will be updated accordingly, the \c accessed bit will be
3172 	cleared.
3173 	\return The number of mapping accessed bits encountered, including the
3174 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3175 		of the page have been removed.
3176 */
3177 int32
3178 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3179 {
3180 	ASSERT(page->WiredCount() == 0);
3181 
3182 	if (page->accessed)
3183 		return vm_clear_page_mapping_accessed_flags(page);
3184 
3185 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3186 		VMArea* area = mapping->area;
3187 		VMTranslationMap* map = area->address_space->TranslationMap();
3188 		addr_t address = virtual_page_address(area, page);
3189 		bool modified = false;
3190 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3191 			page->accessed = true;
3192 			page->modified |= modified;
3193 			return vm_clear_page_mapping_accessed_flags(page);
3194 		}
3195 		page->modified |= modified;
3196 	}
3197 
3198 	return 0;
3199 }
3200 
3201 
3202 static int
3203 display_mem(int argc, char** argv)
3204 {
3205 	bool physical = false;
3206 	addr_t copyAddress;
3207 	int32 displayWidth;
3208 	int32 itemSize;
3209 	int32 num = -1;
3210 	addr_t address;
3211 	int i = 1, j;
3212 
3213 	if (argc > 1 && argv[1][0] == '-') {
3214 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3215 			physical = true;
3216 			i++;
3217 		} else
3218 			i = 99;
3219 	}
3220 
3221 	if (argc < i + 1 || argc > i + 2) {
3222 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3223 			"\tdl - 8 bytes\n"
3224 			"\tdw - 4 bytes\n"
3225 			"\tds - 2 bytes\n"
3226 			"\tdb - 1 byte\n"
3227 			"\tstring - a whole string\n"
3228 			"  -p or --physical only allows memory from a single page to be "
3229 			"displayed.\n");
3230 		return 0;
3231 	}
3232 
3233 	address = parse_expression(argv[i]);
3234 
3235 	if (argc > i + 1)
3236 		num = parse_expression(argv[i + 1]);
3237 
3238 	// build the format string
3239 	if (strcmp(argv[0], "db") == 0) {
3240 		itemSize = 1;
3241 		displayWidth = 16;
3242 	} else if (strcmp(argv[0], "ds") == 0) {
3243 		itemSize = 2;
3244 		displayWidth = 8;
3245 	} else if (strcmp(argv[0], "dw") == 0) {
3246 		itemSize = 4;
3247 		displayWidth = 4;
3248 	} else if (strcmp(argv[0], "dl") == 0) {
3249 		itemSize = 8;
3250 		displayWidth = 2;
3251 	} else if (strcmp(argv[0], "string") == 0) {
3252 		itemSize = 1;
3253 		displayWidth = -1;
3254 	} else {
3255 		kprintf("display_mem called in an invalid way!\n");
3256 		return 0;
3257 	}
3258 
3259 	if (num <= 0)
3260 		num = displayWidth;
3261 
3262 	void* physicalPageHandle = NULL;
3263 
3264 	if (physical) {
3265 		int32 offset = address & (B_PAGE_SIZE - 1);
3266 		if (num * itemSize + offset > B_PAGE_SIZE) {
3267 			num = (B_PAGE_SIZE - offset) / itemSize;
3268 			kprintf("NOTE: number of bytes has been cut to page size\n");
3269 		}
3270 
3271 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3272 
3273 		if (vm_get_physical_page_debug(address, &copyAddress,
3274 				&physicalPageHandle) != B_OK) {
3275 			kprintf("getting the hardware page failed.");
3276 			return 0;
3277 		}
3278 
3279 		address += offset;
3280 		copyAddress += offset;
3281 	} else
3282 		copyAddress = address;
3283 
3284 	if (!strcmp(argv[0], "string")) {
3285 		kprintf("%p \"", (char*)copyAddress);
3286 
3287 		// string mode
3288 		for (i = 0; true; i++) {
3289 			char c;
3290 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3291 					!= B_OK
3292 				|| c == '\0') {
3293 				break;
3294 			}
3295 
3296 			if (c == '\n')
3297 				kprintf("\\n");
3298 			else if (c == '\t')
3299 				kprintf("\\t");
3300 			else {
3301 				if (!isprint(c))
3302 					c = '.';
3303 
3304 				kprintf("%c", c);
3305 			}
3306 		}
3307 
3308 		kprintf("\"\n");
3309 	} else {
3310 		// number mode
3311 		for (i = 0; i < num; i++) {
3312 			uint64 value;
3313 
3314 			if ((i % displayWidth) == 0) {
3315 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3316 				if (i != 0)
3317 					kprintf("\n");
3318 
3319 				kprintf("[0x%lx]  ", address + i * itemSize);
3320 
3321 				for (j = 0; j < displayed; j++) {
3322 					char c;
3323 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3324 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3325 						displayed = j;
3326 						break;
3327 					}
3328 					if (!isprint(c))
3329 						c = '.';
3330 
3331 					kprintf("%c", c);
3332 				}
3333 				if (num > displayWidth) {
3334 					// make sure the spacing in the last line is correct
3335 					for (j = displayed; j < displayWidth * itemSize; j++)
3336 						kprintf(" ");
3337 				}
3338 				kprintf("  ");
3339 			}
3340 
3341 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3342 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3343 				kprintf("read fault");
3344 				break;
3345 			}
3346 
3347 			switch (itemSize) {
3348 				case 1:
3349 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3350 					break;
3351 				case 2:
3352 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3353 					break;
3354 				case 4:
3355 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3356 					break;
3357 				case 8:
3358 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3359 					break;
3360 			}
3361 		}
3362 
3363 		kprintf("\n");
3364 	}
3365 
3366 	if (physical) {
3367 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3368 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3369 	}
3370 	return 0;
3371 }
3372 
3373 
3374 static void
3375 dump_cache_tree_recursively(VMCache* cache, int level,
3376 	VMCache* highlightCache)
3377 {
3378 	// print this cache
3379 	for (int i = 0; i < level; i++)
3380 		kprintf("  ");
3381 	if (cache == highlightCache)
3382 		kprintf("%p <--\n", cache);
3383 	else
3384 		kprintf("%p\n", cache);
3385 
3386 	// recursively print its consumers
3387 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3388 			VMCache* consumer = it.Next();) {
3389 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3390 	}
3391 }
3392 
3393 
3394 static int
3395 dump_cache_tree(int argc, char** argv)
3396 {
3397 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3398 		kprintf("usage: %s <address>\n", argv[0]);
3399 		return 0;
3400 	}
3401 
3402 	addr_t address = parse_expression(argv[1]);
3403 	if (address == 0)
3404 		return 0;
3405 
3406 	VMCache* cache = (VMCache*)address;
3407 	VMCache* root = cache;
3408 
3409 	// find the root cache (the transitive source)
3410 	while (root->source != NULL)
3411 		root = root->source;
3412 
3413 	dump_cache_tree_recursively(root, 0, cache);
3414 
3415 	return 0;
3416 }
3417 
3418 
3419 const char*
3420 vm_cache_type_to_string(int32 type)
3421 {
3422 	switch (type) {
3423 		case CACHE_TYPE_RAM:
3424 			return "RAM";
3425 		case CACHE_TYPE_DEVICE:
3426 			return "device";
3427 		case CACHE_TYPE_VNODE:
3428 			return "vnode";
3429 		case CACHE_TYPE_NULL:
3430 			return "null";
3431 
3432 		default:
3433 			return "unknown";
3434 	}
3435 }
3436 
3437 
3438 #if DEBUG_CACHE_LIST
3439 
3440 static void
3441 update_cache_info_recursively(VMCache* cache, cache_info& info)
3442 {
3443 	info.page_count += cache->page_count;
3444 	if (cache->type == CACHE_TYPE_RAM)
3445 		info.committed += cache->committed_size;
3446 
3447 	// recurse
3448 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3449 			VMCache* consumer = it.Next();) {
3450 		update_cache_info_recursively(consumer, info);
3451 	}
3452 }
3453 
3454 
3455 static int
3456 cache_info_compare_page_count(const void* _a, const void* _b)
3457 {
3458 	const cache_info* a = (const cache_info*)_a;
3459 	const cache_info* b = (const cache_info*)_b;
3460 	if (a->page_count == b->page_count)
3461 		return 0;
3462 	return a->page_count < b->page_count ? 1 : -1;
3463 }
3464 
3465 
3466 static int
3467 cache_info_compare_committed(const void* _a, const void* _b)
3468 {
3469 	const cache_info* a = (const cache_info*)_a;
3470 	const cache_info* b = (const cache_info*)_b;
3471 	if (a->committed == b->committed)
3472 		return 0;
3473 	return a->committed < b->committed ? 1 : -1;
3474 }
3475 
3476 
3477 static void
3478 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3479 {
3480 	for (int i = 0; i < level; i++)
3481 		kprintf("  ");
3482 
3483 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3484 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3485 		cache->virtual_base, cache->virtual_end, cache->page_count);
3486 
3487 	if (level == 0)
3488 		kprintf("/%lu", info.page_count);
3489 
3490 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3491 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3492 
3493 		if (level == 0)
3494 			kprintf("/%lu", info.committed);
3495 	}
3496 
3497 	// areas
3498 	if (cache->areas != NULL) {
3499 		VMArea* area = cache->areas;
3500 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3501 			area->name, area->address_space->ID());
3502 
3503 		while (area->cache_next != NULL) {
3504 			area = area->cache_next;
3505 			kprintf(", %" B_PRId32, area->id);
3506 		}
3507 	}
3508 
3509 	kputs("\n");
3510 
3511 	// recurse
3512 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3513 			VMCache* consumer = it.Next();) {
3514 		dump_caches_recursively(consumer, info, level + 1);
3515 	}
3516 }
3517 
3518 
3519 static int
3520 dump_caches(int argc, char** argv)
3521 {
3522 	if (sCacheInfoTable == NULL) {
3523 		kprintf("No cache info table!\n");
3524 		return 0;
3525 	}
3526 
3527 	bool sortByPageCount = true;
3528 
3529 	for (int32 i = 1; i < argc; i++) {
3530 		if (strcmp(argv[i], "-c") == 0) {
3531 			sortByPageCount = false;
3532 		} else {
3533 			print_debugger_command_usage(argv[0]);
3534 			return 0;
3535 		}
3536 	}
3537 
3538 	uint32 totalCount = 0;
3539 	uint32 rootCount = 0;
3540 	off_t totalCommitted = 0;
3541 	page_num_t totalPages = 0;
3542 
3543 	VMCache* cache = gDebugCacheList;
3544 	while (cache) {
3545 		totalCount++;
3546 		if (cache->source == NULL) {
3547 			cache_info stackInfo;
3548 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3549 				? sCacheInfoTable[rootCount] : stackInfo;
3550 			rootCount++;
3551 			info.cache = cache;
3552 			info.page_count = 0;
3553 			info.committed = 0;
3554 			update_cache_info_recursively(cache, info);
3555 			totalCommitted += info.committed;
3556 			totalPages += info.page_count;
3557 		}
3558 
3559 		cache = cache->debug_next;
3560 	}
3561 
3562 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3563 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3564 			sortByPageCount
3565 				? &cache_info_compare_page_count
3566 				: &cache_info_compare_committed);
3567 	}
3568 
3569 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3570 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3571 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3572 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3573 			"page count" : "committed size");
3574 
3575 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3576 		for (uint32 i = 0; i < rootCount; i++) {
3577 			cache_info& info = sCacheInfoTable[i];
3578 			dump_caches_recursively(info.cache, info, 0);
3579 		}
3580 	} else
3581 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3582 
3583 	return 0;
3584 }
3585 
3586 #endif	// DEBUG_CACHE_LIST
3587 
3588 
3589 static int
3590 dump_cache(int argc, char** argv)
3591 {
3592 	VMCache* cache;
3593 	bool showPages = false;
3594 	int i = 1;
3595 
3596 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3597 		kprintf("usage: %s [-ps] <address>\n"
3598 			"  if -p is specified, all pages are shown, if -s is used\n"
3599 			"  only the cache info is shown respectively.\n", argv[0]);
3600 		return 0;
3601 	}
3602 	while (argv[i][0] == '-') {
3603 		char* arg = argv[i] + 1;
3604 		while (arg[0]) {
3605 			if (arg[0] == 'p')
3606 				showPages = true;
3607 			arg++;
3608 		}
3609 		i++;
3610 	}
3611 	if (argv[i] == NULL) {
3612 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3613 		return 0;
3614 	}
3615 
3616 	addr_t address = parse_expression(argv[i]);
3617 	if (address == 0)
3618 		return 0;
3619 
3620 	cache = (VMCache*)address;
3621 
3622 	cache->Dump(showPages);
3623 
3624 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3625 
3626 	return 0;
3627 }
3628 
3629 
3630 static void
3631 dump_area_struct(VMArea* area, bool mappings)
3632 {
3633 	kprintf("AREA: %p\n", area);
3634 	kprintf("name:\t\t'%s'\n", area->name);
3635 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3636 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3637 	kprintf("base:\t\t0x%lx\n", area->Base());
3638 	kprintf("size:\t\t0x%lx\n", area->Size());
3639 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3640 	kprintf("page_protection:%p\n", area->page_protections);
3641 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3642 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3643 	kprintf("cache:\t\t%p\n", area->cache);
3644 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3645 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3646 	kprintf("cache_next:\t%p\n", area->cache_next);
3647 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3648 
3649 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3650 	if (mappings) {
3651 		kprintf("page mappings:\n");
3652 		while (iterator.HasNext()) {
3653 			vm_page_mapping* mapping = iterator.Next();
3654 			kprintf("  %p", mapping->page);
3655 		}
3656 		kprintf("\n");
3657 	} else {
3658 		uint32 count = 0;
3659 		while (iterator.Next() != NULL) {
3660 			count++;
3661 		}
3662 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3663 	}
3664 }
3665 
3666 
3667 static int
3668 dump_area(int argc, char** argv)
3669 {
3670 	bool mappings = false;
3671 	bool found = false;
3672 	int32 index = 1;
3673 	VMArea* area;
3674 	addr_t num;
3675 
3676 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3677 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3678 			"All areas matching either id/address/name are listed. You can\n"
3679 			"force to check only a specific item by prefixing the specifier\n"
3680 			"with the id/contains/address/name keywords.\n"
3681 			"-m shows the area's mappings as well.\n");
3682 		return 0;
3683 	}
3684 
3685 	if (!strcmp(argv[1], "-m")) {
3686 		mappings = true;
3687 		index++;
3688 	}
3689 
3690 	int32 mode = 0xf;
3691 	if (!strcmp(argv[index], "id"))
3692 		mode = 1;
3693 	else if (!strcmp(argv[index], "contains"))
3694 		mode = 2;
3695 	else if (!strcmp(argv[index], "name"))
3696 		mode = 4;
3697 	else if (!strcmp(argv[index], "address"))
3698 		mode = 0;
3699 	if (mode != 0xf)
3700 		index++;
3701 
3702 	if (index >= argc) {
3703 		kprintf("No area specifier given.\n");
3704 		return 0;
3705 	}
3706 
3707 	num = parse_expression(argv[index]);
3708 
3709 	if (mode == 0) {
3710 		dump_area_struct((struct VMArea*)num, mappings);
3711 	} else {
3712 		// walk through the area list, looking for the arguments as a name
3713 
3714 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3715 		while ((area = it.Next()) != NULL) {
3716 			if (((mode & 4) != 0
3717 					&& !strcmp(argv[index], area->name))
3718 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3719 					|| (((mode & 2) != 0 && area->Base() <= num
3720 						&& area->Base() + area->Size() > num))))) {
3721 				dump_area_struct(area, mappings);
3722 				found = true;
3723 			}
3724 		}
3725 
3726 		if (!found)
3727 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3728 	}
3729 
3730 	return 0;
3731 }
3732 
3733 
3734 static int
3735 dump_area_list(int argc, char** argv)
3736 {
3737 	VMArea* area;
3738 	const char* name = NULL;
3739 	int32 id = 0;
3740 
3741 	if (argc > 1) {
3742 		id = parse_expression(argv[1]);
3743 		if (id == 0)
3744 			name = argv[1];
3745 	}
3746 
3747 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3748 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3749 		B_PRINTF_POINTER_WIDTH, "size");
3750 
3751 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3752 	while ((area = it.Next()) != NULL) {
3753 		if ((id != 0 && area->address_space->ID() != id)
3754 			|| (name != NULL && strstr(area->name, name) == NULL))
3755 			continue;
3756 
3757 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3758 			area->id, (void*)area->Base(), (void*)area->Size(),
3759 			area->protection, area->wiring, area->name);
3760 	}
3761 	return 0;
3762 }
3763 
3764 
3765 static int
3766 dump_available_memory(int argc, char** argv)
3767 {
3768 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3769 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3770 	return 0;
3771 }
3772 
3773 
3774 static int
3775 dump_mapping_info(int argc, char** argv)
3776 {
3777 	bool reverseLookup = false;
3778 	bool pageLookup = false;
3779 
3780 	int argi = 1;
3781 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3782 		const char* arg = argv[argi];
3783 		if (strcmp(arg, "-r") == 0) {
3784 			reverseLookup = true;
3785 		} else if (strcmp(arg, "-p") == 0) {
3786 			reverseLookup = true;
3787 			pageLookup = true;
3788 		} else {
3789 			print_debugger_command_usage(argv[0]);
3790 			return 0;
3791 		}
3792 	}
3793 
3794 	// We need at least one argument, the address. Optionally a thread ID can be
3795 	// specified.
3796 	if (argi >= argc || argi + 2 < argc) {
3797 		print_debugger_command_usage(argv[0]);
3798 		return 0;
3799 	}
3800 
3801 	uint64 addressValue;
3802 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3803 		return 0;
3804 
3805 	Team* team = NULL;
3806 	if (argi < argc) {
3807 		uint64 threadID;
3808 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3809 			return 0;
3810 
3811 		Thread* thread = Thread::GetDebug(threadID);
3812 		if (thread == NULL) {
3813 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3814 			return 0;
3815 		}
3816 
3817 		team = thread->team;
3818 	}
3819 
3820 	if (reverseLookup) {
3821 		phys_addr_t physicalAddress;
3822 		if (pageLookup) {
3823 			vm_page* page = (vm_page*)(addr_t)addressValue;
3824 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3825 		} else {
3826 			physicalAddress = (phys_addr_t)addressValue;
3827 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3828 		}
3829 
3830 		kprintf("    Team     Virtual Address      Area\n");
3831 		kprintf("--------------------------------------\n");
3832 
3833 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3834 			Callback()
3835 				:
3836 				fAddressSpace(NULL)
3837 			{
3838 			}
3839 
3840 			void SetAddressSpace(VMAddressSpace* addressSpace)
3841 			{
3842 				fAddressSpace = addressSpace;
3843 			}
3844 
3845 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3846 			{
3847 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3848 					virtualAddress);
3849 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3850 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3851 				else
3852 					kprintf("\n");
3853 				return false;
3854 			}
3855 
3856 		private:
3857 			VMAddressSpace*	fAddressSpace;
3858 		} callback;
3859 
3860 		if (team != NULL) {
3861 			// team specified -- get its address space
3862 			VMAddressSpace* addressSpace = team->address_space;
3863 			if (addressSpace == NULL) {
3864 				kprintf("Failed to get address space!\n");
3865 				return 0;
3866 			}
3867 
3868 			callback.SetAddressSpace(addressSpace);
3869 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3870 				physicalAddress, callback);
3871 		} else {
3872 			// no team specified -- iterate through all address spaces
3873 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3874 				addressSpace != NULL;
3875 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3876 				callback.SetAddressSpace(addressSpace);
3877 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3878 					physicalAddress, callback);
3879 			}
3880 		}
3881 	} else {
3882 		// get the address space
3883 		addr_t virtualAddress = (addr_t)addressValue;
3884 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3885 		VMAddressSpace* addressSpace;
3886 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3887 			addressSpace = VMAddressSpace::Kernel();
3888 		} else if (team != NULL) {
3889 			addressSpace = team->address_space;
3890 		} else {
3891 			Thread* thread = debug_get_debugged_thread();
3892 			if (thread == NULL || thread->team == NULL) {
3893 				kprintf("Failed to get team!\n");
3894 				return 0;
3895 			}
3896 
3897 			addressSpace = thread->team->address_space;
3898 		}
3899 
3900 		if (addressSpace == NULL) {
3901 			kprintf("Failed to get address space!\n");
3902 			return 0;
3903 		}
3904 
3905 		// let the translation map implementation do the job
3906 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3907 	}
3908 
3909 	return 0;
3910 }
3911 
3912 
3913 /*!	Deletes all areas and reserved regions in the given address space.
3914 
3915 	The caller must ensure that none of the areas has any wired ranges.
3916 
3917 	\param addressSpace The address space.
3918 	\param deletingAddressSpace \c true, if the address space is in the process
3919 		of being deleted.
3920 */
3921 void
3922 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3923 {
3924 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3925 		addressSpace->ID()));
3926 
3927 	addressSpace->WriteLock();
3928 
3929 	// remove all reserved areas in this address space
3930 	addressSpace->UnreserveAllAddressRanges(0);
3931 
3932 	// delete all the areas in this address space
3933 	while (VMArea* area = addressSpace->FirstArea()) {
3934 		ASSERT(!area->IsWired());
3935 		delete_area(addressSpace, area, deletingAddressSpace);
3936 	}
3937 
3938 	addressSpace->WriteUnlock();
3939 }
3940 
3941 
3942 static area_id
3943 vm_area_for(addr_t address, bool kernel)
3944 {
3945 	team_id team;
3946 	if (IS_USER_ADDRESS(address)) {
3947 		// we try the user team address space, if any
3948 		team = VMAddressSpace::CurrentID();
3949 		if (team < 0)
3950 			return team;
3951 	} else
3952 		team = VMAddressSpace::KernelID();
3953 
3954 	AddressSpaceReadLocker locker(team);
3955 	if (!locker.IsLocked())
3956 		return B_BAD_TEAM_ID;
3957 
3958 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3959 	if (area != NULL) {
3960 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3961 				&& (area->protection & B_KERNEL_AREA) != 0)
3962 			return B_ERROR;
3963 
3964 		return area->id;
3965 	}
3966 
3967 	return B_ERROR;
3968 }
3969 
3970 
3971 /*!	Frees physical pages that were used during the boot process.
3972 	\a end is inclusive.
3973 */
3974 static void
3975 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3976 {
3977 	// free all physical pages in the specified range
3978 
3979 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3980 		phys_addr_t physicalAddress;
3981 		uint32 flags;
3982 
3983 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3984 			&& (flags & PAGE_PRESENT) != 0) {
3985 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3986 			if (page != NULL && page->State() != PAGE_STATE_FREE
3987 					&& page->State() != PAGE_STATE_CLEAR
3988 					&& page->State() != PAGE_STATE_UNUSED) {
3989 				DEBUG_PAGE_ACCESS_START(page);
3990 				vm_page_set_state(page, PAGE_STATE_FREE);
3991 			}
3992 		}
3993 	}
3994 
3995 	// unmap the memory
3996 	map->Unmap(start, end);
3997 }
3998 
3999 
4000 void
4001 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
4002 {
4003 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
4004 	addr_t end = start + (size - 1);
4005 	addr_t lastEnd = start;
4006 
4007 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4008 		(void*)start, (void*)end));
4009 
4010 	// The areas are sorted in virtual address space order, so
4011 	// we just have to find the holes between them that fall
4012 	// into the area we should dispose
4013 
4014 	map->Lock();
4015 
4016 	for (VMAddressSpace::AreaIterator it
4017 				= VMAddressSpace::Kernel()->GetAreaIterator();
4018 			VMArea* area = it.Next();) {
4019 		addr_t areaStart = area->Base();
4020 		addr_t areaEnd = areaStart + (area->Size() - 1);
4021 
4022 		if (areaEnd < start)
4023 			continue;
4024 
4025 		if (areaStart > end) {
4026 			// we are done, the area is already beyond of what we have to free
4027 			break;
4028 		}
4029 
4030 		if (areaStart > lastEnd) {
4031 			// this is something we can free
4032 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4033 				(void*)areaStart));
4034 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
4035 		}
4036 
4037 		if (areaEnd >= end) {
4038 			lastEnd = areaEnd;
4039 				// no +1 to prevent potential overflow
4040 			break;
4041 		}
4042 
4043 		lastEnd = areaEnd + 1;
4044 	}
4045 
4046 	if (lastEnd < end) {
4047 		// we can also get rid of some space at the end of the area
4048 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4049 			(void*)end));
4050 		unmap_and_free_physical_pages(map, lastEnd, end);
4051 	}
4052 
4053 	map->Unlock();
4054 }
4055 
4056 
4057 static void
4058 create_preloaded_image_areas(struct preloaded_image* _image)
4059 {
4060 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
4061 	char name[B_OS_NAME_LENGTH];
4062 	void* address;
4063 	int32 length;
4064 
4065 	// use file name to create a good area name
4066 	char* fileName = strrchr(image->name, '/');
4067 	if (fileName == NULL)
4068 		fileName = image->name;
4069 	else
4070 		fileName++;
4071 
4072 	length = strlen(fileName);
4073 	// make sure there is enough space for the suffix
4074 	if (length > 25)
4075 		length = 25;
4076 
4077 	memcpy(name, fileName, length);
4078 	strcpy(name + length, "_text");
4079 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4080 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4081 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4082 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4083 		// this will later be remapped read-only/executable by the
4084 		// ELF initialization code
4085 
4086 	strcpy(name + length, "_data");
4087 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4088 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4089 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4090 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4091 }
4092 
4093 
4094 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4095 	Any boot loader resources contained in that arguments must not be accessed
4096 	anymore past this point.
4097 */
4098 void
4099 vm_free_kernel_args(kernel_args* args)
4100 {
4101 	uint32 i;
4102 
4103 	TRACE(("vm_free_kernel_args()\n"));
4104 
4105 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4106 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
4107 		if (area >= B_OK)
4108 			delete_area(area);
4109 	}
4110 }
4111 
4112 
4113 static void
4114 allocate_kernel_args(kernel_args* args)
4115 {
4116 	TRACE(("allocate_kernel_args()\n"));
4117 
4118 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4119 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
4120 
4121 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4122 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4123 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4124 	}
4125 }
4126 
4127 
4128 static void
4129 unreserve_boot_loader_ranges(kernel_args* args)
4130 {
4131 	TRACE(("unreserve_boot_loader_ranges()\n"));
4132 
4133 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4134 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
4135 			(void*)(addr_t)args->virtual_allocated_range[i].start,
4136 			args->virtual_allocated_range[i].size);
4137 	}
4138 }
4139 
4140 
4141 static void
4142 reserve_boot_loader_ranges(kernel_args* args)
4143 {
4144 	TRACE(("reserve_boot_loader_ranges()\n"));
4145 
4146 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4147 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4148 
4149 		// If the address is no kernel address, we just skip it. The
4150 		// architecture specific code has to deal with it.
4151 		if (!IS_KERNEL_ADDRESS(address)) {
4152 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4153 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4154 			continue;
4155 		}
4156 
4157 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4158 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4159 		if (status < B_OK)
4160 			panic("could not reserve boot loader ranges\n");
4161 	}
4162 }
4163 
4164 
4165 static addr_t
4166 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4167 {
4168 	size = PAGE_ALIGN(size);
4169 
4170 	// find a slot in the virtual allocation addr range
4171 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4172 		// check to see if the space between this one and the last is big enough
4173 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4174 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4175 			+ args->virtual_allocated_range[i - 1].size;
4176 
4177 		addr_t base = alignment > 0
4178 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4179 
4180 		if (base >= KERNEL_BASE && base < rangeStart
4181 				&& rangeStart - base >= size) {
4182 			args->virtual_allocated_range[i - 1].size
4183 				+= base + size - previousRangeEnd;
4184 			return base;
4185 		}
4186 	}
4187 
4188 	// we hadn't found one between allocation ranges. this is ok.
4189 	// see if there's a gap after the last one
4190 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4191 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4192 		+ args->virtual_allocated_range[lastEntryIndex].size;
4193 	addr_t base = alignment > 0
4194 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4195 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4196 		args->virtual_allocated_range[lastEntryIndex].size
4197 			+= base + size - lastRangeEnd;
4198 		return base;
4199 	}
4200 
4201 	// see if there's a gap before the first one
4202 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4203 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4204 		base = rangeStart - size;
4205 		if (alignment > 0)
4206 			base = ROUNDDOWN(base, alignment);
4207 
4208 		if (base >= KERNEL_BASE) {
4209 			args->virtual_allocated_range[0].start = base;
4210 			args->virtual_allocated_range[0].size += rangeStart - base;
4211 			return base;
4212 		}
4213 	}
4214 
4215 	return 0;
4216 }
4217 
4218 
4219 static bool
4220 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4221 {
4222 	// TODO: horrible brute-force method of determining if the page can be
4223 	// allocated
4224 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4225 		if (address >= args->physical_memory_range[i].start
4226 			&& address < args->physical_memory_range[i].start
4227 				+ args->physical_memory_range[i].size)
4228 			return true;
4229 	}
4230 	return false;
4231 }
4232 
4233 
4234 page_num_t
4235 vm_allocate_early_physical_page(kernel_args* args)
4236 {
4237 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4238 		phys_addr_t nextPage;
4239 
4240 		nextPage = args->physical_allocated_range[i].start
4241 			+ args->physical_allocated_range[i].size;
4242 		// see if the page after the next allocated paddr run can be allocated
4243 		if (i + 1 < args->num_physical_allocated_ranges
4244 			&& args->physical_allocated_range[i + 1].size != 0) {
4245 			// see if the next page will collide with the next allocated range
4246 			if (nextPage >= args->physical_allocated_range[i+1].start)
4247 				continue;
4248 		}
4249 		// see if the next physical page fits in the memory block
4250 		if (is_page_in_physical_memory_range(args, nextPage)) {
4251 			// we got one!
4252 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4253 			return nextPage / B_PAGE_SIZE;
4254 		}
4255 	}
4256 
4257 	// Expanding upwards didn't work, try going downwards.
4258 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4259 		phys_addr_t nextPage;
4260 
4261 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4262 		// see if the page after the prev allocated paddr run can be allocated
4263 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4264 			// see if the next page will collide with the next allocated range
4265 			if (nextPage < args->physical_allocated_range[i-1].start
4266 				+ args->physical_allocated_range[i-1].size)
4267 				continue;
4268 		}
4269 		// see if the next physical page fits in the memory block
4270 		if (is_page_in_physical_memory_range(args, nextPage)) {
4271 			// we got one!
4272 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4273 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4274 			return nextPage / B_PAGE_SIZE;
4275 		}
4276 	}
4277 
4278 	return 0;
4279 		// could not allocate a block
4280 }
4281 
4282 
4283 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4284 	allocate some pages before the VM is completely up.
4285 */
4286 addr_t
4287 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4288 	uint32 attributes, addr_t alignment)
4289 {
4290 	if (physicalSize > virtualSize)
4291 		physicalSize = virtualSize;
4292 
4293 	// find the vaddr to allocate at
4294 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4295 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4296 	if (virtualBase == 0) {
4297 		panic("vm_allocate_early: could not allocate virtual address\n");
4298 		return 0;
4299 	}
4300 
4301 	// map the pages
4302 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4303 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4304 		if (physicalAddress == 0)
4305 			panic("error allocating early page!\n");
4306 
4307 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4308 
4309 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4310 			physicalAddress * B_PAGE_SIZE, attributes,
4311 			&vm_allocate_early_physical_page);
4312 	}
4313 
4314 	return virtualBase;
4315 }
4316 
4317 
4318 /*!	The main entrance point to initialize the VM. */
4319 status_t
4320 vm_init(kernel_args* args)
4321 {
4322 	struct preloaded_image* image;
4323 	void* address;
4324 	status_t err = 0;
4325 	uint32 i;
4326 
4327 	TRACE(("vm_init: entry\n"));
4328 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4329 	err = arch_vm_init(args);
4330 
4331 	// initialize some globals
4332 	vm_page_init_num_pages(args);
4333 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4334 
4335 	slab_init(args);
4336 
4337 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4338 	off_t heapSize = INITIAL_HEAP_SIZE;
4339 	// try to accomodate low memory systems
4340 	while (heapSize > sAvailableMemory / 8)
4341 		heapSize /= 2;
4342 	if (heapSize < 1024 * 1024)
4343 		panic("vm_init: go buy some RAM please.");
4344 
4345 	// map in the new heap and initialize it
4346 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4347 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4348 	TRACE(("heap at 0x%lx\n", heapBase));
4349 	heap_init(heapBase, heapSize);
4350 #endif
4351 
4352 	// initialize the free page list and physical page mapper
4353 	vm_page_init(args);
4354 
4355 	// initialize the cache allocators
4356 	vm_cache_init(args);
4357 
4358 	{
4359 		status_t error = VMAreas::Init();
4360 		if (error != B_OK)
4361 			panic("vm_init: error initializing areas map\n");
4362 	}
4363 
4364 	VMAddressSpace::Init();
4365 	reserve_boot_loader_ranges(args);
4366 
4367 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4368 	heap_init_post_area();
4369 #endif
4370 
4371 	// Do any further initialization that the architecture dependant layers may
4372 	// need now
4373 	arch_vm_translation_map_init_post_area(args);
4374 	arch_vm_init_post_area(args);
4375 	vm_page_init_post_area(args);
4376 	slab_init_post_area();
4377 
4378 	// allocate areas to represent stuff that already exists
4379 
4380 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4381 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4382 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4383 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4384 #endif
4385 
4386 	allocate_kernel_args(args);
4387 
4388 	create_preloaded_image_areas(args->kernel_image);
4389 
4390 	// allocate areas for preloaded images
4391 	for (image = args->preloaded_images; image != NULL; image = image->next)
4392 		create_preloaded_image_areas(image);
4393 
4394 	// allocate kernel stacks
4395 	for (i = 0; i < args->num_cpus; i++) {
4396 		char name[64];
4397 
4398 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4399 		address = (void*)args->cpu_kstack[i].start;
4400 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4401 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4402 	}
4403 
4404 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4405 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4406 
4407 #if PARANOID_KERNEL_MALLOC
4408 	vm_block_address_range("uninitialized heap memory",
4409 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4410 #endif
4411 #if PARANOID_KERNEL_FREE
4412 	vm_block_address_range("freed heap memory",
4413 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4414 #endif
4415 
4416 	// create the object cache for the page mappings
4417 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4418 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4419 		NULL, NULL);
4420 	if (gPageMappingsObjectCache == NULL)
4421 		panic("failed to create page mappings object cache");
4422 
4423 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4424 
4425 #if DEBUG_CACHE_LIST
4426 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4427 		virtual_address_restrictions virtualRestrictions = {};
4428 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4429 		physical_address_restrictions physicalRestrictions = {};
4430 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4431 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4432 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4433 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4434 			&physicalRestrictions, (void**)&sCacheInfoTable);
4435 	}
4436 #endif	// DEBUG_CACHE_LIST
4437 
4438 	// add some debugger commands
4439 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4440 	add_debugger_command("area", &dump_area,
4441 		"Dump info about a particular area");
4442 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4443 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4444 #if DEBUG_CACHE_LIST
4445 	if (sCacheInfoTable != NULL) {
4446 		add_debugger_command_etc("caches", &dump_caches,
4447 			"List all VMCache trees",
4448 			"[ \"-c\" ]\n"
4449 			"All cache trees are listed sorted in decreasing order by number "
4450 				"of\n"
4451 			"used pages or, if \"-c\" is specified, by size of committed "
4452 				"memory.\n",
4453 			0);
4454 	}
4455 #endif
4456 	add_debugger_command("avail", &dump_available_memory,
4457 		"Dump available memory");
4458 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4459 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4460 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4461 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4462 	add_debugger_command("string", &display_mem, "dump strings");
4463 
4464 	add_debugger_command_etc("mapping", &dump_mapping_info,
4465 		"Print address mapping information",
4466 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4467 		"Prints low-level page mapping information for a given address. If\n"
4468 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4469 		"address that is looked up in the translation map of the current\n"
4470 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4471 		"\"-r\" is specified, <address> is a physical address that is\n"
4472 		"searched in the translation map of all teams, respectively the team\n"
4473 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4474 		"<address> is the address of a vm_page structure. The behavior is\n"
4475 		"equivalent to specifying \"-r\" with the physical address of that\n"
4476 		"page.\n",
4477 		0);
4478 
4479 	TRACE(("vm_init: exit\n"));
4480 
4481 	vm_cache_init_post_heap();
4482 
4483 	return err;
4484 }
4485 
4486 
4487 status_t
4488 vm_init_post_sem(kernel_args* args)
4489 {
4490 	// This frees all unused boot loader resources and makes its space available
4491 	// again
4492 	arch_vm_init_end(args);
4493 	unreserve_boot_loader_ranges(args);
4494 
4495 	// fill in all of the semaphores that were not allocated before
4496 	// since we're still single threaded and only the kernel address space
4497 	// exists, it isn't that hard to find all of the ones we need to create
4498 
4499 	arch_vm_translation_map_init_post_sem(args);
4500 
4501 	slab_init_post_sem();
4502 
4503 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4504 	heap_init_post_sem();
4505 #endif
4506 
4507 	return B_OK;
4508 }
4509 
4510 
4511 status_t
4512 vm_init_post_thread(kernel_args* args)
4513 {
4514 	vm_page_init_post_thread(args);
4515 	slab_init_post_thread();
4516 	return heap_init_post_thread();
4517 }
4518 
4519 
4520 status_t
4521 vm_init_post_modules(kernel_args* args)
4522 {
4523 	return arch_vm_init_post_modules(args);
4524 }
4525 
4526 
4527 void
4528 permit_page_faults(void)
4529 {
4530 	Thread* thread = thread_get_current_thread();
4531 	if (thread != NULL)
4532 		atomic_add(&thread->page_faults_allowed, 1);
4533 }
4534 
4535 
4536 void
4537 forbid_page_faults(void)
4538 {
4539 	Thread* thread = thread_get_current_thread();
4540 	if (thread != NULL)
4541 		atomic_add(&thread->page_faults_allowed, -1);
4542 }
4543 
4544 
4545 status_t
4546 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4547 	bool isUser, addr_t* newIP)
4548 {
4549 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4550 		faultAddress));
4551 
4552 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4553 
4554 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4555 	VMAddressSpace* addressSpace = NULL;
4556 
4557 	status_t status = B_OK;
4558 	*newIP = 0;
4559 	atomic_add((int32*)&sPageFaults, 1);
4560 
4561 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4562 		addressSpace = VMAddressSpace::GetKernel();
4563 	} else if (IS_USER_ADDRESS(pageAddress)) {
4564 		addressSpace = VMAddressSpace::GetCurrent();
4565 		if (addressSpace == NULL) {
4566 			if (!isUser) {
4567 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4568 					"memory!\n");
4569 				status = B_BAD_ADDRESS;
4570 				TPF(PageFaultError(-1,
4571 					VMPageFaultTracing
4572 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4573 			} else {
4574 				// XXX weird state.
4575 				panic("vm_page_fault: non kernel thread accessing user memory "
4576 					"that doesn't exist!\n");
4577 				status = B_BAD_ADDRESS;
4578 			}
4579 		}
4580 	} else {
4581 		// the hit was probably in the 64k DMZ between kernel and user space
4582 		// this keeps a user space thread from passing a buffer that crosses
4583 		// into kernel space
4584 		status = B_BAD_ADDRESS;
4585 		TPF(PageFaultError(-1,
4586 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4587 	}
4588 
4589 	if (status == B_OK) {
4590 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4591 			isUser, NULL);
4592 	}
4593 
4594 	if (status < B_OK) {
4595 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4596 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4597 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4598 			thread_get_current_thread_id());
4599 		if (!isUser) {
4600 			Thread* thread = thread_get_current_thread();
4601 			if (thread != NULL && thread->fault_handler != 0) {
4602 				// this will cause the arch dependant page fault handler to
4603 				// modify the IP on the interrupt frame or whatever to return
4604 				// to this address
4605 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4606 			} else {
4607 				// unhandled page fault in the kernel
4608 				panic("vm_page_fault: unhandled page fault in kernel space at "
4609 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4610 			}
4611 		} else {
4612 			Thread* thread = thread_get_current_thread();
4613 
4614 #ifdef TRACE_FAULTS
4615 			VMArea* area = NULL;
4616 			if (addressSpace != NULL) {
4617 				addressSpace->ReadLock();
4618 				area = addressSpace->LookupArea(faultAddress);
4619 			}
4620 
4621 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4622 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4623 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4624 				thread->team->Name(), thread->team->id,
4625 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4626 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4627 					area->Base() : 0x0));
4628 
4629 			if (addressSpace != NULL)
4630 				addressSpace->ReadUnlock();
4631 #endif
4632 
4633 			// If the thread has a signal handler for SIGSEGV, we simply
4634 			// send it the signal. Otherwise we notify the user debugger
4635 			// first.
4636 			struct sigaction action;
4637 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4638 					&& action.sa_handler != SIG_DFL
4639 					&& action.sa_handler != SIG_IGN)
4640 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4641 					SIGSEGV)) {
4642 				Signal signal(SIGSEGV,
4643 					status == B_PERMISSION_DENIED
4644 						? SEGV_ACCERR : SEGV_MAPERR,
4645 					EFAULT, thread->team->id);
4646 				signal.SetAddress((void*)address);
4647 				send_signal_to_thread(thread, signal, 0);
4648 			}
4649 		}
4650 	}
4651 
4652 	if (addressSpace != NULL)
4653 		addressSpace->Put();
4654 
4655 	return B_HANDLED_INTERRUPT;
4656 }
4657 
4658 
4659 struct PageFaultContext {
4660 	AddressSpaceReadLocker	addressSpaceLocker;
4661 	VMCacheChainLocker		cacheChainLocker;
4662 
4663 	VMTranslationMap*		map;
4664 	VMCache*				topCache;
4665 	off_t					cacheOffset;
4666 	vm_page_reservation		reservation;
4667 	bool					isWrite;
4668 
4669 	// return values
4670 	vm_page*				page;
4671 	bool					restart;
4672 	bool					pageAllocated;
4673 
4674 
4675 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4676 		:
4677 		addressSpaceLocker(addressSpace, true),
4678 		map(addressSpace->TranslationMap()),
4679 		isWrite(isWrite)
4680 	{
4681 	}
4682 
4683 	~PageFaultContext()
4684 	{
4685 		UnlockAll();
4686 		vm_page_unreserve_pages(&reservation);
4687 	}
4688 
4689 	void Prepare(VMCache* topCache, off_t cacheOffset)
4690 	{
4691 		this->topCache = topCache;
4692 		this->cacheOffset = cacheOffset;
4693 		page = NULL;
4694 		restart = false;
4695 		pageAllocated = false;
4696 
4697 		cacheChainLocker.SetTo(topCache);
4698 	}
4699 
4700 	void UnlockAll(VMCache* exceptCache = NULL)
4701 	{
4702 		topCache = NULL;
4703 		addressSpaceLocker.Unlock();
4704 		cacheChainLocker.Unlock(exceptCache);
4705 	}
4706 };
4707 
4708 
4709 /*!	Gets the page that should be mapped into the area.
4710 	Returns an error code other than \c B_OK, if the page couldn't be found or
4711 	paged in. The locking state of the address space and the caches is undefined
4712 	in that case.
4713 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4714 	had to unlock the address space and all caches and is supposed to be called
4715 	again.
4716 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4717 	found. It is returned in \c context.page. The address space will still be
4718 	locked as well as all caches starting from the top cache to at least the
4719 	cache the page lives in.
4720 */
4721 static status_t
4722 fault_get_page(PageFaultContext& context)
4723 {
4724 	VMCache* cache = context.topCache;
4725 	VMCache* lastCache = NULL;
4726 	vm_page* page = NULL;
4727 
4728 	while (cache != NULL) {
4729 		// We already hold the lock of the cache at this point.
4730 
4731 		lastCache = cache;
4732 
4733 		page = cache->LookupPage(context.cacheOffset);
4734 		if (page != NULL && page->busy) {
4735 			// page must be busy -- wait for it to become unbusy
4736 			context.UnlockAll(cache);
4737 			cache->ReleaseRefLocked();
4738 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4739 
4740 			// restart the whole process
4741 			context.restart = true;
4742 			return B_OK;
4743 		}
4744 
4745 		if (page != NULL)
4746 			break;
4747 
4748 		// The current cache does not contain the page we're looking for.
4749 
4750 		// see if the backing store has it
4751 		if (cache->HasPage(context.cacheOffset)) {
4752 			// insert a fresh page and mark it busy -- we're going to read it in
4753 			page = vm_page_allocate_page(&context.reservation,
4754 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4755 			cache->InsertPage(page, context.cacheOffset);
4756 
4757 			// We need to unlock all caches and the address space while reading
4758 			// the page in. Keep a reference to the cache around.
4759 			cache->AcquireRefLocked();
4760 			context.UnlockAll();
4761 
4762 			// read the page in
4763 			generic_io_vec vec;
4764 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4765 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4766 
4767 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4768 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4769 
4770 			cache->Lock();
4771 
4772 			if (status < B_OK) {
4773 				// on error remove and free the page
4774 				dprintf("reading page from cache %p returned: %s!\n",
4775 					cache, strerror(status));
4776 
4777 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4778 				cache->RemovePage(page);
4779 				vm_page_set_state(page, PAGE_STATE_FREE);
4780 
4781 				cache->ReleaseRefAndUnlock();
4782 				return status;
4783 			}
4784 
4785 			// mark the page unbusy again
4786 			cache->MarkPageUnbusy(page);
4787 
4788 			DEBUG_PAGE_ACCESS_END(page);
4789 
4790 			// Since we needed to unlock everything temporarily, the area
4791 			// situation might have changed. So we need to restart the whole
4792 			// process.
4793 			cache->ReleaseRefAndUnlock();
4794 			context.restart = true;
4795 			return B_OK;
4796 		}
4797 
4798 		cache = context.cacheChainLocker.LockSourceCache();
4799 	}
4800 
4801 	if (page == NULL) {
4802 		// There was no adequate page, determine the cache for a clean one.
4803 		// Read-only pages come in the deepest cache, only the top most cache
4804 		// may have direct write access.
4805 		cache = context.isWrite ? context.topCache : lastCache;
4806 
4807 		// allocate a clean page
4808 		page = vm_page_allocate_page(&context.reservation,
4809 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4810 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4811 			page->physical_page_number));
4812 
4813 		// insert the new page into our cache
4814 		cache->InsertPage(page, context.cacheOffset);
4815 		context.pageAllocated = true;
4816 	} else if (page->Cache() != context.topCache && context.isWrite) {
4817 		// We have a page that has the data we want, but in the wrong cache
4818 		// object so we need to copy it and stick it into the top cache.
4819 		vm_page* sourcePage = page;
4820 
4821 		// TODO: If memory is low, it might be a good idea to steal the page
4822 		// from our source cache -- if possible, that is.
4823 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4824 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4825 
4826 		// To not needlessly kill concurrency we unlock all caches but the top
4827 		// one while copying the page. Lacking another mechanism to ensure that
4828 		// the source page doesn't disappear, we mark it busy.
4829 		sourcePage->busy = true;
4830 		context.cacheChainLocker.UnlockKeepRefs(true);
4831 
4832 		// copy the page
4833 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4834 			sourcePage->physical_page_number * B_PAGE_SIZE);
4835 
4836 		context.cacheChainLocker.RelockCaches(true);
4837 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4838 
4839 		// insert the new page into our cache
4840 		context.topCache->InsertPage(page, context.cacheOffset);
4841 		context.pageAllocated = true;
4842 	} else
4843 		DEBUG_PAGE_ACCESS_START(page);
4844 
4845 	context.page = page;
4846 	return B_OK;
4847 }
4848 
4849 
4850 /*!	Makes sure the address in the given address space is mapped.
4851 
4852 	\param addressSpace The address space.
4853 	\param originalAddress The address. Doesn't need to be page aligned.
4854 	\param isWrite If \c true the address shall be write-accessible.
4855 	\param isUser If \c true the access is requested by a userland team.
4856 	\param wirePage On success, if non \c NULL, the wired count of the page
4857 		mapped at the given address is incremented and the page is returned
4858 		via this parameter.
4859 	\return \c B_OK on success, another error code otherwise.
4860 */
4861 static status_t
4862 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4863 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4864 {
4865 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4866 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4867 		originalAddress, isWrite, isUser));
4868 
4869 	PageFaultContext context(addressSpace, isWrite);
4870 
4871 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4872 	status_t status = B_OK;
4873 
4874 	addressSpace->IncrementFaultCount();
4875 
4876 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4877 	// the pages upfront makes sure we don't have any cache locked, so that the
4878 	// page daemon/thief can do their job without problems.
4879 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4880 		originalAddress);
4881 	context.addressSpaceLocker.Unlock();
4882 	vm_page_reserve_pages(&context.reservation, reservePages,
4883 		addressSpace == VMAddressSpace::Kernel()
4884 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4885 
4886 	while (true) {
4887 		context.addressSpaceLocker.Lock();
4888 
4889 		// get the area the fault was in
4890 		VMArea* area = addressSpace->LookupArea(address);
4891 		if (area == NULL) {
4892 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4893 				"space\n", originalAddress);
4894 			TPF(PageFaultError(-1,
4895 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4896 			status = B_BAD_ADDRESS;
4897 			break;
4898 		}
4899 
4900 		// check permissions
4901 		uint32 protection = get_area_page_protection(area, address);
4902 		if (isUser && (protection & B_USER_PROTECTION) == 0
4903 				&& (area->protection & B_KERNEL_AREA) != 0) {
4904 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4905 				area->id, (void*)originalAddress);
4906 			TPF(PageFaultError(area->id,
4907 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4908 			status = B_PERMISSION_DENIED;
4909 			break;
4910 		}
4911 		if (isWrite && (protection
4912 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4913 			dprintf("write access attempted on write-protected area 0x%"
4914 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4915 			TPF(PageFaultError(area->id,
4916 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4917 			status = B_PERMISSION_DENIED;
4918 			break;
4919 		} else if (isExecute && (protection
4920 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4921 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4922 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4923 			TPF(PageFaultError(area->id,
4924 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4925 			status = B_PERMISSION_DENIED;
4926 			break;
4927 		} else if (!isWrite && !isExecute && (protection
4928 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4929 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4930 				" at %p\n", area->id, (void*)originalAddress);
4931 			TPF(PageFaultError(area->id,
4932 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4933 			status = B_PERMISSION_DENIED;
4934 			break;
4935 		}
4936 
4937 		// We have the area, it was a valid access, so let's try to resolve the
4938 		// page fault now.
4939 		// At first, the top most cache from the area is investigated.
4940 
4941 		context.Prepare(vm_area_get_locked_cache(area),
4942 			address - area->Base() + area->cache_offset);
4943 
4944 		// See if this cache has a fault handler -- this will do all the work
4945 		// for us.
4946 		{
4947 			// Note, since the page fault is resolved with interrupts enabled,
4948 			// the fault handler could be called more than once for the same
4949 			// reason -- the store must take this into account.
4950 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4951 			if (status != B_BAD_HANDLER)
4952 				break;
4953 		}
4954 
4955 		// The top most cache has no fault handler, so let's see if the cache or
4956 		// its sources already have the page we're searching for (we're going
4957 		// from top to bottom).
4958 		status = fault_get_page(context);
4959 		if (status != B_OK) {
4960 			TPF(PageFaultError(area->id, status));
4961 			break;
4962 		}
4963 
4964 		if (context.restart)
4965 			continue;
4966 
4967 		// All went fine, all there is left to do is to map the page into the
4968 		// address space.
4969 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4970 			context.page));
4971 
4972 		// If the page doesn't reside in the area's cache, we need to make sure
4973 		// it's mapped in read-only, so that we cannot overwrite someone else's
4974 		// data (copy-on-write)
4975 		uint32 newProtection = protection;
4976 		if (context.page->Cache() != context.topCache && !isWrite)
4977 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4978 
4979 		bool unmapPage = false;
4980 		bool mapPage = true;
4981 
4982 		// check whether there's already a page mapped at the address
4983 		context.map->Lock();
4984 
4985 		phys_addr_t physicalAddress;
4986 		uint32 flags;
4987 		vm_page* mappedPage = NULL;
4988 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4989 			&& (flags & PAGE_PRESENT) != 0
4990 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4991 				!= NULL) {
4992 			// Yep there's already a page. If it's ours, we can simply adjust
4993 			// its protection. Otherwise we have to unmap it.
4994 			if (mappedPage == context.page) {
4995 				context.map->ProtectPage(area, address, newProtection);
4996 					// Note: We assume that ProtectPage() is atomic (i.e.
4997 					// the page isn't temporarily unmapped), otherwise we'd have
4998 					// to make sure it isn't wired.
4999 				mapPage = false;
5000 			} else
5001 				unmapPage = true;
5002 		}
5003 
5004 		context.map->Unlock();
5005 
5006 		if (unmapPage) {
5007 			// If the page is wired, we can't unmap it. Wait until it is unwired
5008 			// again and restart. Note that the page cannot be wired for
5009 			// writing, since it it isn't in the topmost cache. So we can safely
5010 			// ignore ranges wired for writing (our own and other concurrent
5011 			// wiring attempts in progress) and in fact have to do that to avoid
5012 			// a deadlock.
5013 			VMAreaUnwiredWaiter waiter;
5014 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
5015 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
5016 				// unlock everything and wait
5017 				if (context.pageAllocated) {
5018 					// ... but since we allocated a page and inserted it into
5019 					// the top cache, remove and free it first. Otherwise we'd
5020 					// have a page from a lower cache mapped while an upper
5021 					// cache has a page that would shadow it.
5022 					context.topCache->RemovePage(context.page);
5023 					vm_page_free_etc(context.topCache, context.page,
5024 						&context.reservation);
5025 				} else
5026 					DEBUG_PAGE_ACCESS_END(context.page);
5027 
5028 				context.UnlockAll();
5029 				waiter.waitEntry.Wait();
5030 				continue;
5031 			}
5032 
5033 			// Note: The mapped page is a page of a lower cache. We are
5034 			// guaranteed to have that cached locked, our new page is a copy of
5035 			// that page, and the page is not busy. The logic for that guarantee
5036 			// is as follows: Since the page is mapped, it must live in the top
5037 			// cache (ruled out above) or any of its lower caches, and there is
5038 			// (was before the new page was inserted) no other page in any
5039 			// cache between the top cache and the page's cache (otherwise that
5040 			// would be mapped instead). That in turn means that our algorithm
5041 			// must have found it and therefore it cannot be busy either.
5042 			DEBUG_PAGE_ACCESS_START(mappedPage);
5043 			unmap_page(area, address);
5044 			DEBUG_PAGE_ACCESS_END(mappedPage);
5045 		}
5046 
5047 		if (mapPage) {
5048 			if (map_page(area, context.page, address, newProtection,
5049 					&context.reservation) != B_OK) {
5050 				// Mapping can only fail, when the page mapping object couldn't
5051 				// be allocated. Save for the missing mapping everything is
5052 				// fine, though. If this was a regular page fault, we'll simply
5053 				// leave and probably fault again. To make sure we'll have more
5054 				// luck then, we ensure that the minimum object reserve is
5055 				// available.
5056 				DEBUG_PAGE_ACCESS_END(context.page);
5057 
5058 				context.UnlockAll();
5059 
5060 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
5061 						!= B_OK) {
5062 					// Apparently the situation is serious. Let's get ourselves
5063 					// killed.
5064 					status = B_NO_MEMORY;
5065 				} else if (wirePage != NULL) {
5066 					// The caller expects us to wire the page. Since
5067 					// object_cache_reserve() succeeded, we should now be able
5068 					// to allocate a mapping structure. Restart.
5069 					continue;
5070 				}
5071 
5072 				break;
5073 			}
5074 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
5075 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
5076 
5077 		// also wire the page, if requested
5078 		if (wirePage != NULL && status == B_OK) {
5079 			increment_page_wired_count(context.page);
5080 			*wirePage = context.page;
5081 		}
5082 
5083 		DEBUG_PAGE_ACCESS_END(context.page);
5084 
5085 		break;
5086 	}
5087 
5088 	return status;
5089 }
5090 
5091 
5092 status_t
5093 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5094 {
5095 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
5096 }
5097 
5098 status_t
5099 vm_put_physical_page(addr_t vaddr, void* handle)
5100 {
5101 	return sPhysicalPageMapper->PutPage(vaddr, handle);
5102 }
5103 
5104 
5105 status_t
5106 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
5107 	void** _handle)
5108 {
5109 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
5110 }
5111 
5112 status_t
5113 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5114 {
5115 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
5116 }
5117 
5118 
5119 status_t
5120 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5121 {
5122 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
5123 }
5124 
5125 status_t
5126 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5127 {
5128 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
5129 }
5130 
5131 
5132 void
5133 vm_get_info(system_info* info)
5134 {
5135 	swap_get_info(info);
5136 
5137 	MutexLocker locker(sAvailableMemoryLock);
5138 	info->needed_memory = sNeededMemory;
5139 	info->free_memory = sAvailableMemory;
5140 }
5141 
5142 
5143 uint32
5144 vm_num_page_faults(void)
5145 {
5146 	return sPageFaults;
5147 }
5148 
5149 
5150 off_t
5151 vm_available_memory(void)
5152 {
5153 	MutexLocker locker(sAvailableMemoryLock);
5154 	return sAvailableMemory;
5155 }
5156 
5157 
5158 off_t
5159 vm_available_not_needed_memory(void)
5160 {
5161 	MutexLocker locker(sAvailableMemoryLock);
5162 	return sAvailableMemory - sNeededMemory;
5163 }
5164 
5165 
5166 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5167 	debugger.
5168 */
5169 off_t
5170 vm_available_not_needed_memory_debug(void)
5171 {
5172 	return sAvailableMemory - sNeededMemory;
5173 }
5174 
5175 
5176 size_t
5177 vm_kernel_address_space_left(void)
5178 {
5179 	return VMAddressSpace::Kernel()->FreeSpace();
5180 }
5181 
5182 
5183 void
5184 vm_unreserve_memory(size_t amount)
5185 {
5186 	mutex_lock(&sAvailableMemoryLock);
5187 
5188 	sAvailableMemory += amount;
5189 
5190 	mutex_unlock(&sAvailableMemoryLock);
5191 }
5192 
5193 
5194 status_t
5195 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5196 {
5197 	size_t reserve = kMemoryReserveForPriority[priority];
5198 
5199 	MutexLocker locker(sAvailableMemoryLock);
5200 
5201 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5202 
5203 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5204 		sAvailableMemory -= amount;
5205 		return B_OK;
5206 	}
5207 
5208 	if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) {
5209 		// Do not wait for something that will never happen.
5210 		return B_NO_MEMORY;
5211 	}
5212 
5213 	if (timeout <= 0)
5214 		return B_NO_MEMORY;
5215 
5216 	// turn timeout into an absolute timeout
5217 	timeout += system_time();
5218 
5219 	// loop until we've got the memory or the timeout occurs
5220 	do {
5221 		sNeededMemory += amount;
5222 
5223 		// call the low resource manager
5224 		locker.Unlock();
5225 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5226 			B_ABSOLUTE_TIMEOUT, timeout);
5227 		locker.Lock();
5228 
5229 		sNeededMemory -= amount;
5230 
5231 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5232 			sAvailableMemory -= amount;
5233 			return B_OK;
5234 		}
5235 	} while (timeout > system_time());
5236 
5237 	return B_NO_MEMORY;
5238 }
5239 
5240 
5241 status_t
5242 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5243 {
5244 	// NOTE: The caller is responsible for synchronizing calls to this function!
5245 
5246 	AddressSpaceReadLocker locker;
5247 	VMArea* area;
5248 	status_t status = locker.SetFromArea(id, area);
5249 	if (status != B_OK)
5250 		return status;
5251 
5252 	// nothing to do, if the type doesn't change
5253 	uint32 oldType = area->MemoryType();
5254 	if (type == oldType)
5255 		return B_OK;
5256 
5257 	// set the memory type of the area and the mapped pages
5258 	VMTranslationMap* map = area->address_space->TranslationMap();
5259 	map->Lock();
5260 	area->SetMemoryType(type);
5261 	map->ProtectArea(area, area->protection);
5262 	map->Unlock();
5263 
5264 	// set the physical memory type
5265 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5266 	if (error != B_OK) {
5267 		// reset the memory type of the area and the mapped pages
5268 		map->Lock();
5269 		area->SetMemoryType(oldType);
5270 		map->ProtectArea(area, area->protection);
5271 		map->Unlock();
5272 		return error;
5273 	}
5274 
5275 	return B_OK;
5276 
5277 }
5278 
5279 
5280 /*!	This function enforces some protection properties:
5281 	 - kernel areas must be W^X (after kernel startup)
5282 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5283 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5284 */
5285 static void
5286 fix_protection(uint32* protection)
5287 {
5288 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5289 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5290 			|| (*protection & B_WRITE_AREA) != 0)
5291 		&& !gKernelStartup)
5292 		panic("kernel areas cannot be both writable and executable!");
5293 
5294 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5295 		if ((*protection & B_WRITE_AREA) != 0)
5296 			*protection |= B_KERNEL_WRITE_AREA;
5297 		if ((*protection & B_READ_AREA) != 0)
5298 			*protection |= B_KERNEL_READ_AREA;
5299 	}
5300 }
5301 
5302 
5303 static void
5304 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5305 {
5306 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5307 	info->area = area->id;
5308 	info->address = (void*)area->Base();
5309 	info->size = area->Size();
5310 	info->protection = area->protection;
5311 	info->lock = area->wiring;
5312 	info->team = area->address_space->ID();
5313 	info->copy_count = 0;
5314 	info->in_count = 0;
5315 	info->out_count = 0;
5316 		// TODO: retrieve real values here!
5317 
5318 	VMCache* cache = vm_area_get_locked_cache(area);
5319 
5320 	// Note, this is a simplification; the cache could be larger than this area
5321 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5322 
5323 	vm_area_put_locked_cache(cache);
5324 }
5325 
5326 
5327 static status_t
5328 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5329 {
5330 	// is newSize a multiple of B_PAGE_SIZE?
5331 	if (newSize & (B_PAGE_SIZE - 1))
5332 		return B_BAD_VALUE;
5333 
5334 	// lock all affected address spaces and the cache
5335 	VMArea* area;
5336 	VMCache* cache;
5337 
5338 	MultiAddressSpaceLocker locker;
5339 	AreaCacheLocker cacheLocker;
5340 
5341 	status_t status;
5342 	size_t oldSize;
5343 	bool anyKernelArea;
5344 	bool restart;
5345 
5346 	do {
5347 		anyKernelArea = false;
5348 		restart = false;
5349 
5350 		locker.Unset();
5351 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5352 		if (status != B_OK)
5353 			return status;
5354 		cacheLocker.SetTo(cache, true);	// already locked
5355 
5356 		// enforce restrictions
5357 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5358 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5359 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5360 				"resize kernel area %" B_PRId32 " (%s)\n",
5361 				team_get_current_team_id(), areaID, area->name);
5362 			return B_NOT_ALLOWED;
5363 		}
5364 		// TODO: Enforce all restrictions (team, etc.)!
5365 
5366 		oldSize = area->Size();
5367 		if (newSize == oldSize)
5368 			return B_OK;
5369 
5370 		if (cache->type != CACHE_TYPE_RAM)
5371 			return B_NOT_ALLOWED;
5372 
5373 		if (oldSize < newSize) {
5374 			// We need to check if all areas of this cache can be resized.
5375 			for (VMArea* current = cache->areas; current != NULL;
5376 					current = current->cache_next) {
5377 				if (!current->address_space->CanResizeArea(current, newSize))
5378 					return B_ERROR;
5379 				anyKernelArea
5380 					|= current->address_space == VMAddressSpace::Kernel();
5381 			}
5382 		} else {
5383 			// We're shrinking the areas, so we must make sure the affected
5384 			// ranges are not wired.
5385 			for (VMArea* current = cache->areas; current != NULL;
5386 					current = current->cache_next) {
5387 				anyKernelArea
5388 					|= current->address_space == VMAddressSpace::Kernel();
5389 
5390 				if (wait_if_area_range_is_wired(current,
5391 						current->Base() + newSize, oldSize - newSize, &locker,
5392 						&cacheLocker)) {
5393 					restart = true;
5394 					break;
5395 				}
5396 			}
5397 		}
5398 	} while (restart);
5399 
5400 	// Okay, looks good so far, so let's do it
5401 
5402 	int priority = kernel && anyKernelArea
5403 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5404 	uint32 allocationFlags = kernel && anyKernelArea
5405 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5406 
5407 	if (oldSize < newSize) {
5408 		// Growing the cache can fail, so we do it first.
5409 		status = cache->Resize(cache->virtual_base + newSize, priority);
5410 		if (status != B_OK)
5411 			return status;
5412 	}
5413 
5414 	for (VMArea* current = cache->areas; current != NULL;
5415 			current = current->cache_next) {
5416 		status = current->address_space->ResizeArea(current, newSize,
5417 			allocationFlags);
5418 		if (status != B_OK)
5419 			break;
5420 
5421 		// We also need to unmap all pages beyond the new size, if the area has
5422 		// shrunk
5423 		if (newSize < oldSize) {
5424 			VMCacheChainLocker cacheChainLocker(cache);
5425 			cacheChainLocker.LockAllSourceCaches();
5426 
5427 			unmap_pages(current, current->Base() + newSize,
5428 				oldSize - newSize);
5429 
5430 			cacheChainLocker.Unlock(cache);
5431 		}
5432 	}
5433 
5434 	if (status == B_OK) {
5435 		// Shrink or grow individual page protections if in use.
5436 		if (area->page_protections != NULL) {
5437 			size_t bytes = area_page_protections_size(newSize);
5438 			uint8* newProtections
5439 				= (uint8*)realloc(area->page_protections, bytes);
5440 			if (newProtections == NULL)
5441 				status = B_NO_MEMORY;
5442 			else {
5443 				area->page_protections = newProtections;
5444 
5445 				if (oldSize < newSize) {
5446 					// init the additional page protections to that of the area
5447 					uint32 offset = area_page_protections_size(oldSize);
5448 					uint32 areaProtection = area->protection
5449 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5450 					memset(area->page_protections + offset,
5451 						areaProtection | (areaProtection << 4), bytes - offset);
5452 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5453 						uint8& entry = area->page_protections[offset - 1];
5454 						entry = (entry & 0x0f) | (areaProtection << 4);
5455 					}
5456 				}
5457 			}
5458 		}
5459 	}
5460 
5461 	// shrinking the cache can't fail, so we do it now
5462 	if (status == B_OK && newSize < oldSize)
5463 		status = cache->Resize(cache->virtual_base + newSize, priority);
5464 
5465 	if (status != B_OK) {
5466 		// Something failed -- resize the areas back to their original size.
5467 		// This can fail, too, in which case we're seriously screwed.
5468 		for (VMArea* current = cache->areas; current != NULL;
5469 				current = current->cache_next) {
5470 			if (current->address_space->ResizeArea(current, oldSize,
5471 					allocationFlags) != B_OK) {
5472 				panic("vm_resize_area(): Failed and not being able to restore "
5473 					"original state.");
5474 			}
5475 		}
5476 
5477 		cache->Resize(cache->virtual_base + oldSize, priority);
5478 	}
5479 
5480 	// TODO: we must honour the lock restrictions of this area
5481 	return status;
5482 }
5483 
5484 
5485 status_t
5486 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5487 {
5488 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5489 }
5490 
5491 
5492 status_t
5493 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5494 {
5495 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5496 }
5497 
5498 
5499 status_t
5500 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5501 	bool user)
5502 {
5503 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5504 }
5505 
5506 
5507 void
5508 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5509 {
5510 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5511 }
5512 
5513 
5514 /*!	Copies a range of memory directly from/to a page that might not be mapped
5515 	at the moment.
5516 
5517 	For \a unsafeMemory the current mapping (if any is ignored). The function
5518 	walks through the respective area's cache chain to find the physical page
5519 	and copies from/to it directly.
5520 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5521 	must not cross a page boundary.
5522 
5523 	\param teamID The team ID identifying the address space \a unsafeMemory is
5524 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5525 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5526 		is passed, the address space of the thread returned by
5527 		debug_get_debugged_thread() is used.
5528 	\param unsafeMemory The start of the unsafe memory range to be copied
5529 		from/to.
5530 	\param buffer A safely accessible kernel buffer to be copied from/to.
5531 	\param size The number of bytes to be copied.
5532 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5533 		\a unsafeMemory, the other way around otherwise.
5534 */
5535 status_t
5536 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5537 	size_t size, bool copyToUnsafe)
5538 {
5539 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5540 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5541 		return B_BAD_VALUE;
5542 	}
5543 
5544 	// get the address space for the debugged thread
5545 	VMAddressSpace* addressSpace;
5546 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5547 		addressSpace = VMAddressSpace::Kernel();
5548 	} else if (teamID == B_CURRENT_TEAM) {
5549 		Thread* thread = debug_get_debugged_thread();
5550 		if (thread == NULL || thread->team == NULL)
5551 			return B_BAD_ADDRESS;
5552 
5553 		addressSpace = thread->team->address_space;
5554 	} else
5555 		addressSpace = VMAddressSpace::DebugGet(teamID);
5556 
5557 	if (addressSpace == NULL)
5558 		return B_BAD_ADDRESS;
5559 
5560 	// get the area
5561 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5562 	if (area == NULL)
5563 		return B_BAD_ADDRESS;
5564 
5565 	// search the page
5566 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5567 		+ area->cache_offset;
5568 	VMCache* cache = area->cache;
5569 	vm_page* page = NULL;
5570 	while (cache != NULL) {
5571 		page = cache->DebugLookupPage(cacheOffset);
5572 		if (page != NULL)
5573 			break;
5574 
5575 		// Page not found in this cache -- if it is paged out, we must not try
5576 		// to get it from lower caches.
5577 		if (cache->DebugHasPage(cacheOffset))
5578 			break;
5579 
5580 		cache = cache->source;
5581 	}
5582 
5583 	if (page == NULL)
5584 		return B_UNSUPPORTED;
5585 
5586 	// copy from/to physical memory
5587 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5588 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5589 
5590 	if (copyToUnsafe) {
5591 		if (page->Cache() != area->cache)
5592 			return B_UNSUPPORTED;
5593 
5594 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5595 	}
5596 
5597 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5598 }
5599 
5600 
5601 /** Validate that a memory range is either fully in kernel space, or fully in
5602  *  userspace */
5603 static inline bool
5604 validate_memory_range(const void* addr, size_t size)
5605 {
5606 	addr_t address = (addr_t)addr;
5607 
5608 	// Check for overflows on all addresses.
5609 	if ((address + size) < address)
5610 		return false;
5611 
5612 	// Validate that the address range does not cross the kernel/user boundary.
5613 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5614 }
5615 
5616 
5617 //	#pragma mark - kernel public API
5618 
5619 
5620 status_t
5621 user_memcpy(void* to, const void* from, size_t size)
5622 {
5623 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5624 		return B_BAD_ADDRESS;
5625 
5626 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5627 		return B_BAD_ADDRESS;
5628 
5629 	return B_OK;
5630 }
5631 
5632 
5633 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5634 	the string in \a to, NULL-terminating the result.
5635 
5636 	\param to Pointer to the destination C-string.
5637 	\param from Pointer to the source C-string.
5638 	\param size Size in bytes of the string buffer pointed to by \a to.
5639 
5640 	\return strlen(\a from).
5641 */
5642 ssize_t
5643 user_strlcpy(char* to, const char* from, size_t size)
5644 {
5645 	if (to == NULL && size != 0)
5646 		return B_BAD_VALUE;
5647 	if (from == NULL)
5648 		return B_BAD_ADDRESS;
5649 
5650 	// Protect the source address from overflows.
5651 	size_t maxSize = size;
5652 	if ((addr_t)from + maxSize < (addr_t)from)
5653 		maxSize -= (addr_t)from + maxSize;
5654 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5655 		maxSize = USER_TOP - (addr_t)from;
5656 
5657 	if (!validate_memory_range(to, maxSize))
5658 		return B_BAD_ADDRESS;
5659 
5660 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5661 	if (result < 0)
5662 		return result;
5663 
5664 	// If we hit the address overflow boundary, fail.
5665 	if ((size_t)result >= maxSize && maxSize < size)
5666 		return B_BAD_ADDRESS;
5667 
5668 	return result;
5669 }
5670 
5671 
5672 status_t
5673 user_memset(void* s, char c, size_t count)
5674 {
5675 	if (!validate_memory_range(s, count))
5676 		return B_BAD_ADDRESS;
5677 
5678 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5679 		return B_BAD_ADDRESS;
5680 
5681 	return B_OK;
5682 }
5683 
5684 
5685 /*!	Wires a single page at the given address.
5686 
5687 	\param team The team whose address space the address belongs to. Supports
5688 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5689 		parameter is ignored.
5690 	\param address address The virtual address to wire down. Does not need to
5691 		be page aligned.
5692 	\param writable If \c true the page shall be writable.
5693 	\param info On success the info is filled in, among other things
5694 		containing the physical address the given virtual one translates to.
5695 	\return \c B_OK, when the page could be wired, another error code otherwise.
5696 */
5697 status_t
5698 vm_wire_page(team_id team, addr_t address, bool writable,
5699 	VMPageWiringInfo* info)
5700 {
5701 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5702 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5703 
5704 	// compute the page protection that is required
5705 	bool isUser = IS_USER_ADDRESS(address);
5706 	uint32 requiredProtection = PAGE_PRESENT
5707 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5708 	if (writable)
5709 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5710 
5711 	// get and read lock the address space
5712 	VMAddressSpace* addressSpace = NULL;
5713 	if (isUser) {
5714 		if (team == B_CURRENT_TEAM)
5715 			addressSpace = VMAddressSpace::GetCurrent();
5716 		else
5717 			addressSpace = VMAddressSpace::Get(team);
5718 	} else
5719 		addressSpace = VMAddressSpace::GetKernel();
5720 	if (addressSpace == NULL)
5721 		return B_ERROR;
5722 
5723 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5724 
5725 	VMTranslationMap* map = addressSpace->TranslationMap();
5726 	status_t error = B_OK;
5727 
5728 	// get the area
5729 	VMArea* area = addressSpace->LookupArea(pageAddress);
5730 	if (area == NULL) {
5731 		addressSpace->Put();
5732 		return B_BAD_ADDRESS;
5733 	}
5734 
5735 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5736 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5737 
5738 	// mark the area range wired
5739 	area->Wire(&info->range);
5740 
5741 	// Lock the area's cache chain and the translation map. Needed to look
5742 	// up the page and play with its wired count.
5743 	cacheChainLocker.LockAllSourceCaches();
5744 	map->Lock();
5745 
5746 	phys_addr_t physicalAddress;
5747 	uint32 flags;
5748 	vm_page* page;
5749 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5750 		&& (flags & requiredProtection) == requiredProtection
5751 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5752 			!= NULL) {
5753 		// Already mapped with the correct permissions -- just increment
5754 		// the page's wired count.
5755 		increment_page_wired_count(page);
5756 
5757 		map->Unlock();
5758 		cacheChainLocker.Unlock();
5759 		addressSpaceLocker.Unlock();
5760 	} else {
5761 		// Let vm_soft_fault() map the page for us, if possible. We need
5762 		// to fully unlock to avoid deadlocks. Since we have already
5763 		// wired the area itself, nothing disturbing will happen with it
5764 		// in the meantime.
5765 		map->Unlock();
5766 		cacheChainLocker.Unlock();
5767 		addressSpaceLocker.Unlock();
5768 
5769 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5770 			isUser, &page);
5771 
5772 		if (error != B_OK) {
5773 			// The page could not be mapped -- clean up.
5774 			VMCache* cache = vm_area_get_locked_cache(area);
5775 			area->Unwire(&info->range);
5776 			cache->ReleaseRefAndUnlock();
5777 			addressSpace->Put();
5778 			return error;
5779 		}
5780 	}
5781 
5782 	info->physicalAddress
5783 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5784 			+ address % B_PAGE_SIZE;
5785 	info->page = page;
5786 
5787 	return B_OK;
5788 }
5789 
5790 
5791 /*!	Unwires a single page previously wired via vm_wire_page().
5792 
5793 	\param info The same object passed to vm_wire_page() before.
5794 */
5795 void
5796 vm_unwire_page(VMPageWiringInfo* info)
5797 {
5798 	// lock the address space
5799 	VMArea* area = info->range.area;
5800 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5801 		// takes over our reference
5802 
5803 	// lock the top cache
5804 	VMCache* cache = vm_area_get_locked_cache(area);
5805 	VMCacheChainLocker cacheChainLocker(cache);
5806 
5807 	if (info->page->Cache() != cache) {
5808 		// The page is not in the top cache, so we lock the whole cache chain
5809 		// before touching the page's wired count.
5810 		cacheChainLocker.LockAllSourceCaches();
5811 	}
5812 
5813 	decrement_page_wired_count(info->page);
5814 
5815 	// remove the wired range from the range
5816 	area->Unwire(&info->range);
5817 
5818 	cacheChainLocker.Unlock();
5819 }
5820 
5821 
5822 /*!	Wires down the given address range in the specified team's address space.
5823 
5824 	If successful the function
5825 	- acquires a reference to the specified team's address space,
5826 	- adds respective wired ranges to all areas that intersect with the given
5827 	  address range,
5828 	- makes sure all pages in the given address range are mapped with the
5829 	  requested access permissions and increments their wired count.
5830 
5831 	It fails, when \a team doesn't specify a valid address space, when any part
5832 	of the specified address range is not covered by areas, when the concerned
5833 	areas don't allow mapping with the requested permissions, or when mapping
5834 	failed for another reason.
5835 
5836 	When successful the call must be balanced by a unlock_memory_etc() call with
5837 	the exact same parameters.
5838 
5839 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5840 		supported.
5841 	\param address The start of the address range to be wired.
5842 	\param numBytes The size of the address range to be wired.
5843 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5844 		requests that the range must be wired writable ("read from device
5845 		into memory").
5846 	\return \c B_OK on success, another error code otherwise.
5847 */
5848 status_t
5849 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5850 {
5851 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5852 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5853 
5854 	// compute the page protection that is required
5855 	bool isUser = IS_USER_ADDRESS(address);
5856 	bool writable = (flags & B_READ_DEVICE) == 0;
5857 	uint32 requiredProtection = PAGE_PRESENT
5858 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5859 	if (writable)
5860 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5861 
5862 	uint32 mallocFlags = isUser
5863 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5864 
5865 	// get and read lock the address space
5866 	VMAddressSpace* addressSpace = NULL;
5867 	if (isUser) {
5868 		if (team == B_CURRENT_TEAM)
5869 			addressSpace = VMAddressSpace::GetCurrent();
5870 		else
5871 			addressSpace = VMAddressSpace::Get(team);
5872 	} else
5873 		addressSpace = VMAddressSpace::GetKernel();
5874 	if (addressSpace == NULL)
5875 		return B_ERROR;
5876 
5877 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5878 		// We get a new address space reference here. The one we got above will
5879 		// be freed by unlock_memory_etc().
5880 
5881 	VMTranslationMap* map = addressSpace->TranslationMap();
5882 	status_t error = B_OK;
5883 
5884 	// iterate through all concerned areas
5885 	addr_t nextAddress = lockBaseAddress;
5886 	while (nextAddress != lockEndAddress) {
5887 		// get the next area
5888 		VMArea* area = addressSpace->LookupArea(nextAddress);
5889 		if (area == NULL) {
5890 			error = B_BAD_ADDRESS;
5891 			break;
5892 		}
5893 
5894 		addr_t areaStart = nextAddress;
5895 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5896 
5897 		// allocate the wired range (do that before locking the cache to avoid
5898 		// deadlocks)
5899 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5900 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5901 		if (range == NULL) {
5902 			error = B_NO_MEMORY;
5903 			break;
5904 		}
5905 
5906 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5907 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5908 
5909 		// mark the area range wired
5910 		area->Wire(range);
5911 
5912 		// Depending on the area cache type and the wiring, we may not need to
5913 		// look at the individual pages.
5914 		if (area->cache_type == CACHE_TYPE_NULL
5915 			|| area->cache_type == CACHE_TYPE_DEVICE
5916 			|| area->wiring == B_FULL_LOCK
5917 			|| area->wiring == B_CONTIGUOUS) {
5918 			nextAddress = areaEnd;
5919 			continue;
5920 		}
5921 
5922 		// Lock the area's cache chain and the translation map. Needed to look
5923 		// up pages and play with their wired count.
5924 		cacheChainLocker.LockAllSourceCaches();
5925 		map->Lock();
5926 
5927 		// iterate through the pages and wire them
5928 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5929 			phys_addr_t physicalAddress;
5930 			uint32 flags;
5931 
5932 			vm_page* page;
5933 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5934 				&& (flags & requiredProtection) == requiredProtection
5935 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5936 					!= NULL) {
5937 				// Already mapped with the correct permissions -- just increment
5938 				// the page's wired count.
5939 				increment_page_wired_count(page);
5940 			} else {
5941 				// Let vm_soft_fault() map the page for us, if possible. We need
5942 				// to fully unlock to avoid deadlocks. Since we have already
5943 				// wired the area itself, nothing disturbing will happen with it
5944 				// in the meantime.
5945 				map->Unlock();
5946 				cacheChainLocker.Unlock();
5947 				addressSpaceLocker.Unlock();
5948 
5949 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5950 					false, isUser, &page);
5951 
5952 				addressSpaceLocker.Lock();
5953 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5954 				cacheChainLocker.LockAllSourceCaches();
5955 				map->Lock();
5956 			}
5957 
5958 			if (error != B_OK)
5959 				break;
5960 		}
5961 
5962 		map->Unlock();
5963 
5964 		if (error == B_OK) {
5965 			cacheChainLocker.Unlock();
5966 		} else {
5967 			// An error occurred, so abort right here. If the current address
5968 			// is the first in this area, unwire the area, since we won't get
5969 			// to it when reverting what we've done so far.
5970 			if (nextAddress == areaStart) {
5971 				area->Unwire(range);
5972 				cacheChainLocker.Unlock();
5973 				range->~VMAreaWiredRange();
5974 				free_etc(range, mallocFlags);
5975 			} else
5976 				cacheChainLocker.Unlock();
5977 
5978 			break;
5979 		}
5980 	}
5981 
5982 	if (error != B_OK) {
5983 		// An error occurred, so unwire all that we've already wired. Note that
5984 		// even if not a single page was wired, unlock_memory_etc() is called
5985 		// to put the address space reference.
5986 		addressSpaceLocker.Unlock();
5987 		unlock_memory_etc(team, (void*)lockBaseAddress,
5988 			nextAddress - lockBaseAddress, flags);
5989 	}
5990 
5991 	return error;
5992 }
5993 
5994 
5995 status_t
5996 lock_memory(void* address, size_t numBytes, uint32 flags)
5997 {
5998 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5999 }
6000 
6001 
6002 /*!	Unwires an address range previously wired with lock_memory_etc().
6003 
6004 	Note that a call to this function must balance a previous lock_memory_etc()
6005 	call with exactly the same parameters.
6006 */
6007 status_t
6008 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
6009 {
6010 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
6011 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
6012 
6013 	// compute the page protection that is required
6014 	bool isUser = IS_USER_ADDRESS(address);
6015 	bool writable = (flags & B_READ_DEVICE) == 0;
6016 	uint32 requiredProtection = PAGE_PRESENT
6017 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
6018 	if (writable)
6019 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
6020 
6021 	uint32 mallocFlags = isUser
6022 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
6023 
6024 	// get and read lock the address space
6025 	VMAddressSpace* addressSpace = NULL;
6026 	if (isUser) {
6027 		if (team == B_CURRENT_TEAM)
6028 			addressSpace = VMAddressSpace::GetCurrent();
6029 		else
6030 			addressSpace = VMAddressSpace::Get(team);
6031 	} else
6032 		addressSpace = VMAddressSpace::GetKernel();
6033 	if (addressSpace == NULL)
6034 		return B_ERROR;
6035 
6036 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
6037 		// Take over the address space reference. We don't unlock until we're
6038 		// done.
6039 
6040 	VMTranslationMap* map = addressSpace->TranslationMap();
6041 	status_t error = B_OK;
6042 
6043 	// iterate through all concerned areas
6044 	addr_t nextAddress = lockBaseAddress;
6045 	while (nextAddress != lockEndAddress) {
6046 		// get the next area
6047 		VMArea* area = addressSpace->LookupArea(nextAddress);
6048 		if (area == NULL) {
6049 			error = B_BAD_ADDRESS;
6050 			break;
6051 		}
6052 
6053 		addr_t areaStart = nextAddress;
6054 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
6055 
6056 		// Lock the area's top cache. This is a requirement for
6057 		// VMArea::Unwire().
6058 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6059 
6060 		// Depending on the area cache type and the wiring, we may not need to
6061 		// look at the individual pages.
6062 		if (area->cache_type == CACHE_TYPE_NULL
6063 			|| area->cache_type == CACHE_TYPE_DEVICE
6064 			|| area->wiring == B_FULL_LOCK
6065 			|| area->wiring == B_CONTIGUOUS) {
6066 			// unwire the range (to avoid deadlocks we delete the range after
6067 			// unlocking the cache)
6068 			nextAddress = areaEnd;
6069 			VMAreaWiredRange* range = area->Unwire(areaStart,
6070 				areaEnd - areaStart, writable);
6071 			cacheChainLocker.Unlock();
6072 			if (range != NULL) {
6073 				range->~VMAreaWiredRange();
6074 				free_etc(range, mallocFlags);
6075 			}
6076 			continue;
6077 		}
6078 
6079 		// Lock the area's cache chain and the translation map. Needed to look
6080 		// up pages and play with their wired count.
6081 		cacheChainLocker.LockAllSourceCaches();
6082 		map->Lock();
6083 
6084 		// iterate through the pages and unwire them
6085 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6086 			phys_addr_t physicalAddress;
6087 			uint32 flags;
6088 
6089 			vm_page* page;
6090 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6091 				&& (flags & PAGE_PRESENT) != 0
6092 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6093 					!= NULL) {
6094 				// Already mapped with the correct permissions -- just increment
6095 				// the page's wired count.
6096 				decrement_page_wired_count(page);
6097 			} else {
6098 				panic("unlock_memory_etc(): Failed to unwire page: address "
6099 					"space %p, address: %#" B_PRIxADDR, addressSpace,
6100 					nextAddress);
6101 				error = B_BAD_VALUE;
6102 				break;
6103 			}
6104 		}
6105 
6106 		map->Unlock();
6107 
6108 		// All pages are unwired. Remove the area's wired range as well (to
6109 		// avoid deadlocks we delete the range after unlocking the cache).
6110 		VMAreaWiredRange* range = area->Unwire(areaStart,
6111 			areaEnd - areaStart, writable);
6112 
6113 		cacheChainLocker.Unlock();
6114 
6115 		if (range != NULL) {
6116 			range->~VMAreaWiredRange();
6117 			free_etc(range, mallocFlags);
6118 		}
6119 
6120 		if (error != B_OK)
6121 			break;
6122 	}
6123 
6124 	// get rid of the address space reference lock_memory_etc() acquired
6125 	addressSpace->Put();
6126 
6127 	return error;
6128 }
6129 
6130 
6131 status_t
6132 unlock_memory(void* address, size_t numBytes, uint32 flags)
6133 {
6134 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6135 }
6136 
6137 
6138 /*!	Similar to get_memory_map(), but also allows to specify the address space
6139 	for the memory in question and has a saner semantics.
6140 	Returns \c B_OK when the complete range could be translated or
6141 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
6142 	case the actual number of entries is written to \c *_numEntries. Any other
6143 	error case indicates complete failure; \c *_numEntries will be set to \c 0
6144 	in this case.
6145 */
6146 status_t
6147 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
6148 	physical_entry* table, uint32* _numEntries)
6149 {
6150 	uint32 numEntries = *_numEntries;
6151 	*_numEntries = 0;
6152 
6153 	VMAddressSpace* addressSpace;
6154 	addr_t virtualAddress = (addr_t)address;
6155 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6156 	phys_addr_t physicalAddress;
6157 	status_t status = B_OK;
6158 	int32 index = -1;
6159 	addr_t offset = 0;
6160 	bool interrupts = are_interrupts_enabled();
6161 
6162 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6163 		"entries)\n", team, address, numBytes, numEntries));
6164 
6165 	if (numEntries == 0 || numBytes == 0)
6166 		return B_BAD_VALUE;
6167 
6168 	// in which address space is the address to be found?
6169 	if (IS_USER_ADDRESS(virtualAddress)) {
6170 		if (team == B_CURRENT_TEAM)
6171 			addressSpace = VMAddressSpace::GetCurrent();
6172 		else
6173 			addressSpace = VMAddressSpace::Get(team);
6174 	} else
6175 		addressSpace = VMAddressSpace::GetKernel();
6176 
6177 	if (addressSpace == NULL)
6178 		return B_ERROR;
6179 
6180 	VMTranslationMap* map = addressSpace->TranslationMap();
6181 
6182 	if (interrupts)
6183 		map->Lock();
6184 
6185 	while (offset < numBytes) {
6186 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6187 		uint32 flags;
6188 
6189 		if (interrupts) {
6190 			status = map->Query((addr_t)address + offset, &physicalAddress,
6191 				&flags);
6192 		} else {
6193 			status = map->QueryInterrupt((addr_t)address + offset,
6194 				&physicalAddress, &flags);
6195 		}
6196 		if (status < B_OK)
6197 			break;
6198 		if ((flags & PAGE_PRESENT) == 0) {
6199 			panic("get_memory_map() called on unmapped memory!");
6200 			return B_BAD_ADDRESS;
6201 		}
6202 
6203 		if (index < 0 && pageOffset > 0) {
6204 			physicalAddress += pageOffset;
6205 			if (bytes > B_PAGE_SIZE - pageOffset)
6206 				bytes = B_PAGE_SIZE - pageOffset;
6207 		}
6208 
6209 		// need to switch to the next physical_entry?
6210 		if (index < 0 || table[index].address
6211 				!= physicalAddress - table[index].size) {
6212 			if ((uint32)++index + 1 > numEntries) {
6213 				// table to small
6214 				break;
6215 			}
6216 			table[index].address = physicalAddress;
6217 			table[index].size = bytes;
6218 		} else {
6219 			// page does fit in current entry
6220 			table[index].size += bytes;
6221 		}
6222 
6223 		offset += bytes;
6224 	}
6225 
6226 	if (interrupts)
6227 		map->Unlock();
6228 
6229 	if (status != B_OK)
6230 		return status;
6231 
6232 	if ((uint32)index + 1 > numEntries) {
6233 		*_numEntries = index;
6234 		return B_BUFFER_OVERFLOW;
6235 	}
6236 
6237 	*_numEntries = index + 1;
6238 	return B_OK;
6239 }
6240 
6241 
6242 /*!	According to the BeBook, this function should always succeed.
6243 	This is no longer the case.
6244 */
6245 extern "C" int32
6246 __get_memory_map_haiku(const void* address, size_t numBytes,
6247 	physical_entry* table, int32 numEntries)
6248 {
6249 	uint32 entriesRead = numEntries;
6250 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6251 		table, &entriesRead);
6252 	if (error != B_OK)
6253 		return error;
6254 
6255 	// close the entry list
6256 
6257 	// if it's only one entry, we will silently accept the missing ending
6258 	if (numEntries == 1)
6259 		return B_OK;
6260 
6261 	if (entriesRead + 1 > (uint32)numEntries)
6262 		return B_BUFFER_OVERFLOW;
6263 
6264 	table[entriesRead].address = 0;
6265 	table[entriesRead].size = 0;
6266 
6267 	return B_OK;
6268 }
6269 
6270 
6271 area_id
6272 area_for(void* address)
6273 {
6274 	return vm_area_for((addr_t)address, true);
6275 }
6276 
6277 
6278 area_id
6279 find_area(const char* name)
6280 {
6281 	return VMAreas::Find(name);
6282 }
6283 
6284 
6285 status_t
6286 _get_area_info(area_id id, area_info* info, size_t size)
6287 {
6288 	if (size != sizeof(area_info) || info == NULL)
6289 		return B_BAD_VALUE;
6290 
6291 	AddressSpaceReadLocker locker;
6292 	VMArea* area;
6293 	status_t status = locker.SetFromArea(id, area);
6294 	if (status != B_OK)
6295 		return status;
6296 
6297 	fill_area_info(area, info, size);
6298 	return B_OK;
6299 }
6300 
6301 
6302 status_t
6303 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6304 {
6305 	addr_t nextBase = *(addr_t*)cookie;
6306 
6307 	// we're already through the list
6308 	if (nextBase == (addr_t)-1)
6309 		return B_ENTRY_NOT_FOUND;
6310 
6311 	if (team == B_CURRENT_TEAM)
6312 		team = team_get_current_team_id();
6313 
6314 	AddressSpaceReadLocker locker(team);
6315 	if (!locker.IsLocked())
6316 		return B_BAD_TEAM_ID;
6317 
6318 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6319 	if (area == NULL) {
6320 		nextBase = (addr_t)-1;
6321 		return B_ENTRY_NOT_FOUND;
6322 	}
6323 
6324 	fill_area_info(area, info, size);
6325 	*cookie = (ssize_t)(area->Base() + 1);
6326 
6327 	return B_OK;
6328 }
6329 
6330 
6331 status_t
6332 set_area_protection(area_id area, uint32 newProtection)
6333 {
6334 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6335 		newProtection, true);
6336 }
6337 
6338 
6339 status_t
6340 resize_area(area_id areaID, size_t newSize)
6341 {
6342 	return vm_resize_area(areaID, newSize, true);
6343 }
6344 
6345 
6346 /*!	Transfers the specified area to a new team. The caller must be the owner
6347 	of the area.
6348 */
6349 area_id
6350 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6351 	bool kernel)
6352 {
6353 	area_info info;
6354 	status_t status = get_area_info(id, &info);
6355 	if (status != B_OK)
6356 		return status;
6357 
6358 	if (info.team != thread_get_current_thread()->team->id)
6359 		return B_PERMISSION_DENIED;
6360 
6361 	// We need to mark the area cloneable so the following operations work.
6362 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6363 	if (status != B_OK)
6364 		return status;
6365 
6366 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6367 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6368 	if (clonedArea < 0)
6369 		return clonedArea;
6370 
6371 	status = vm_delete_area(info.team, id, kernel);
6372 	if (status != B_OK) {
6373 		vm_delete_area(target, clonedArea, kernel);
6374 		return status;
6375 	}
6376 
6377 	// Now we can reset the protection to whatever it was before.
6378 	set_area_protection(clonedArea, info.protection);
6379 
6380 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6381 
6382 	return clonedArea;
6383 }
6384 
6385 
6386 extern "C" area_id
6387 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6388 	size_t numBytes, uint32 addressSpec, uint32 protection,
6389 	void** _virtualAddress)
6390 {
6391 	if (!arch_vm_supports_protection(protection))
6392 		return B_NOT_SUPPORTED;
6393 
6394 	fix_protection(&protection);
6395 
6396 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6397 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6398 		false);
6399 }
6400 
6401 
6402 area_id
6403 clone_area(const char* name, void** _address, uint32 addressSpec,
6404 	uint32 protection, area_id source)
6405 {
6406 	if ((protection & B_KERNEL_PROTECTION) == 0)
6407 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6408 
6409 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6410 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6411 }
6412 
6413 
6414 area_id
6415 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6416 	uint32 protection, uint32 flags, uint32 guardSize,
6417 	const virtual_address_restrictions* virtualAddressRestrictions,
6418 	const physical_address_restrictions* physicalAddressRestrictions,
6419 	void** _address)
6420 {
6421 	fix_protection(&protection);
6422 
6423 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6424 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6425 		true, _address);
6426 }
6427 
6428 
6429 extern "C" area_id
6430 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6431 	size_t size, uint32 lock, uint32 protection)
6432 {
6433 	fix_protection(&protection);
6434 
6435 	virtual_address_restrictions virtualRestrictions = {};
6436 	virtualRestrictions.address = *_address;
6437 	virtualRestrictions.address_specification = addressSpec;
6438 	physical_address_restrictions physicalRestrictions = {};
6439 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6440 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6441 		true, _address);
6442 }
6443 
6444 
6445 status_t
6446 delete_area(area_id area)
6447 {
6448 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6449 }
6450 
6451 
6452 //	#pragma mark - Userland syscalls
6453 
6454 
6455 status_t
6456 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6457 	addr_t size)
6458 {
6459 	// filter out some unavailable values (for userland)
6460 	switch (addressSpec) {
6461 		case B_ANY_KERNEL_ADDRESS:
6462 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6463 			return B_BAD_VALUE;
6464 	}
6465 
6466 	addr_t address;
6467 
6468 	if (!IS_USER_ADDRESS(userAddress)
6469 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6470 		return B_BAD_ADDRESS;
6471 
6472 	status_t status = vm_reserve_address_range(
6473 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6474 		RESERVED_AVOID_BASE);
6475 	if (status != B_OK)
6476 		return status;
6477 
6478 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6479 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6480 			(void*)address, size);
6481 		return B_BAD_ADDRESS;
6482 	}
6483 
6484 	return B_OK;
6485 }
6486 
6487 
6488 status_t
6489 _user_unreserve_address_range(addr_t address, addr_t size)
6490 {
6491 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6492 		(void*)address, size);
6493 }
6494 
6495 
6496 area_id
6497 _user_area_for(void* address)
6498 {
6499 	return vm_area_for((addr_t)address, false);
6500 }
6501 
6502 
6503 area_id
6504 _user_find_area(const char* userName)
6505 {
6506 	char name[B_OS_NAME_LENGTH];
6507 
6508 	if (!IS_USER_ADDRESS(userName)
6509 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6510 		return B_BAD_ADDRESS;
6511 
6512 	return find_area(name);
6513 }
6514 
6515 
6516 status_t
6517 _user_get_area_info(area_id area, area_info* userInfo)
6518 {
6519 	if (!IS_USER_ADDRESS(userInfo))
6520 		return B_BAD_ADDRESS;
6521 
6522 	area_info info;
6523 	status_t status = get_area_info(area, &info);
6524 	if (status < B_OK)
6525 		return status;
6526 
6527 	// TODO: do we want to prevent userland from seeing kernel protections?
6528 	//info.protection &= B_USER_PROTECTION;
6529 
6530 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6531 		return B_BAD_ADDRESS;
6532 
6533 	return status;
6534 }
6535 
6536 
6537 status_t
6538 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6539 {
6540 	ssize_t cookie;
6541 
6542 	if (!IS_USER_ADDRESS(userCookie)
6543 		|| !IS_USER_ADDRESS(userInfo)
6544 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6545 		return B_BAD_ADDRESS;
6546 
6547 	area_info info;
6548 	status_t status = _get_next_area_info(team, &cookie, &info,
6549 		sizeof(area_info));
6550 	if (status != B_OK)
6551 		return status;
6552 
6553 	//info.protection &= B_USER_PROTECTION;
6554 
6555 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6556 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6557 		return B_BAD_ADDRESS;
6558 
6559 	return status;
6560 }
6561 
6562 
6563 status_t
6564 _user_set_area_protection(area_id area, uint32 newProtection)
6565 {
6566 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
6567 		return B_BAD_VALUE;
6568 
6569 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6570 		newProtection, false);
6571 }
6572 
6573 
6574 status_t
6575 _user_resize_area(area_id area, size_t newSize)
6576 {
6577 	// TODO: Since we restrict deleting of areas to those owned by the team,
6578 	// we should also do that for resizing (check other functions, too).
6579 	return vm_resize_area(area, newSize, false);
6580 }
6581 
6582 
6583 area_id
6584 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6585 	team_id target)
6586 {
6587 	// filter out some unavailable values (for userland)
6588 	switch (addressSpec) {
6589 		case B_ANY_KERNEL_ADDRESS:
6590 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6591 			return B_BAD_VALUE;
6592 	}
6593 
6594 	void* address;
6595 	if (!IS_USER_ADDRESS(userAddress)
6596 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6597 		return B_BAD_ADDRESS;
6598 
6599 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6600 	if (newArea < B_OK)
6601 		return newArea;
6602 
6603 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6604 		return B_BAD_ADDRESS;
6605 
6606 	return newArea;
6607 }
6608 
6609 
6610 area_id
6611 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6612 	uint32 protection, area_id sourceArea)
6613 {
6614 	char name[B_OS_NAME_LENGTH];
6615 	void* address;
6616 
6617 	// filter out some unavailable values (for userland)
6618 	switch (addressSpec) {
6619 		case B_ANY_KERNEL_ADDRESS:
6620 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6621 			return B_BAD_VALUE;
6622 	}
6623 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6624 		return B_BAD_VALUE;
6625 
6626 	if (!IS_USER_ADDRESS(userName)
6627 		|| !IS_USER_ADDRESS(userAddress)
6628 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6629 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6630 		return B_BAD_ADDRESS;
6631 
6632 	fix_protection(&protection);
6633 
6634 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6635 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6636 		false);
6637 	if (clonedArea < B_OK)
6638 		return clonedArea;
6639 
6640 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6641 		delete_area(clonedArea);
6642 		return B_BAD_ADDRESS;
6643 	}
6644 
6645 	return clonedArea;
6646 }
6647 
6648 
6649 area_id
6650 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6651 	size_t size, uint32 lock, uint32 protection)
6652 {
6653 	char name[B_OS_NAME_LENGTH];
6654 	void* address;
6655 
6656 	// filter out some unavailable values (for userland)
6657 	switch (addressSpec) {
6658 		case B_ANY_KERNEL_ADDRESS:
6659 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6660 			return B_BAD_VALUE;
6661 	}
6662 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6663 		return B_BAD_VALUE;
6664 
6665 	if (!IS_USER_ADDRESS(userName)
6666 		|| !IS_USER_ADDRESS(userAddress)
6667 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6668 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6669 		return B_BAD_ADDRESS;
6670 
6671 	if (addressSpec == B_EXACT_ADDRESS
6672 		&& IS_KERNEL_ADDRESS(address))
6673 		return B_BAD_VALUE;
6674 
6675 	if (addressSpec == B_ANY_ADDRESS)
6676 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6677 	if (addressSpec == B_BASE_ADDRESS)
6678 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6679 
6680 	fix_protection(&protection);
6681 
6682 	virtual_address_restrictions virtualRestrictions = {};
6683 	virtualRestrictions.address = address;
6684 	virtualRestrictions.address_specification = addressSpec;
6685 	physical_address_restrictions physicalRestrictions = {};
6686 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6687 		size, lock, protection, 0, 0, &virtualRestrictions,
6688 		&physicalRestrictions, false, &address);
6689 
6690 	if (area >= B_OK
6691 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6692 		delete_area(area);
6693 		return B_BAD_ADDRESS;
6694 	}
6695 
6696 	return area;
6697 }
6698 
6699 
6700 status_t
6701 _user_delete_area(area_id area)
6702 {
6703 	// Unlike the BeOS implementation, you can now only delete areas
6704 	// that you have created yourself from userland.
6705 	// The documentation to delete_area() explicitly states that this
6706 	// will be restricted in the future, and so it will.
6707 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6708 }
6709 
6710 
6711 // TODO: create a BeOS style call for this!
6712 
6713 area_id
6714 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6715 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6716 	int fd, off_t offset)
6717 {
6718 	char name[B_OS_NAME_LENGTH];
6719 	void* address;
6720 	area_id area;
6721 
6722 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6723 		return B_BAD_VALUE;
6724 
6725 	fix_protection(&protection);
6726 
6727 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6728 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6729 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6730 		return B_BAD_ADDRESS;
6731 
6732 	if (addressSpec == B_EXACT_ADDRESS) {
6733 		if ((addr_t)address + size < (addr_t)address
6734 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6735 			return B_BAD_VALUE;
6736 		}
6737 		if (!IS_USER_ADDRESS(address)
6738 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6739 			return B_BAD_ADDRESS;
6740 		}
6741 	}
6742 
6743 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6744 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6745 		false);
6746 	if (area < B_OK)
6747 		return area;
6748 
6749 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6750 		return B_BAD_ADDRESS;
6751 
6752 	return area;
6753 }
6754 
6755 
6756 status_t
6757 _user_unmap_memory(void* _address, size_t size)
6758 {
6759 	addr_t address = (addr_t)_address;
6760 
6761 	// check params
6762 	if (size == 0 || (addr_t)address + size < (addr_t)address
6763 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6764 		return B_BAD_VALUE;
6765 	}
6766 
6767 	if (!IS_USER_ADDRESS(address)
6768 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6769 		return B_BAD_ADDRESS;
6770 	}
6771 
6772 	// Write lock the address space and ensure the address range is not wired.
6773 	AddressSpaceWriteLocker locker;
6774 	do {
6775 		status_t status = locker.SetTo(team_get_current_team_id());
6776 		if (status != B_OK)
6777 			return status;
6778 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6779 			size, &locker));
6780 
6781 	// unmap
6782 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6783 }
6784 
6785 
6786 status_t
6787 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6788 {
6789 	// check address range
6790 	addr_t address = (addr_t)_address;
6791 	size = PAGE_ALIGN(size);
6792 
6793 	if ((address % B_PAGE_SIZE) != 0)
6794 		return B_BAD_VALUE;
6795 	if (!is_user_address_range(_address, size)) {
6796 		// weird error code required by POSIX
6797 		return ENOMEM;
6798 	}
6799 
6800 	// extend and check protection
6801 	if ((protection & ~B_USER_PROTECTION) != 0)
6802 		return B_BAD_VALUE;
6803 
6804 	fix_protection(&protection);
6805 
6806 	// We need to write lock the address space, since we're going to play with
6807 	// the areas. Also make sure that none of the areas is wired and that we're
6808 	// actually allowed to change the protection.
6809 	AddressSpaceWriteLocker locker;
6810 
6811 	bool restart;
6812 	do {
6813 		restart = false;
6814 
6815 		status_t status = locker.SetTo(team_get_current_team_id());
6816 		if (status != B_OK)
6817 			return status;
6818 
6819 		// First round: Check whether the whole range is covered by areas and we
6820 		// are allowed to modify them.
6821 		addr_t currentAddress = address;
6822 		size_t sizeLeft = size;
6823 		while (sizeLeft > 0) {
6824 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6825 			if (area == NULL)
6826 				return B_NO_MEMORY;
6827 
6828 			if ((area->protection & B_KERNEL_AREA) != 0)
6829 				return B_NOT_ALLOWED;
6830 			if (area->protection_max != 0
6831 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6832 				return B_NOT_ALLOWED;
6833 			}
6834 
6835 			addr_t offset = currentAddress - area->Base();
6836 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6837 
6838 			AreaCacheLocker cacheLocker(area);
6839 
6840 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6841 					&locker, &cacheLocker)) {
6842 				restart = true;
6843 				break;
6844 			}
6845 
6846 			cacheLocker.Unlock();
6847 
6848 			currentAddress += rangeSize;
6849 			sizeLeft -= rangeSize;
6850 		}
6851 	} while (restart);
6852 
6853 	// Second round: If the protections differ from that of the area, create a
6854 	// page protection array and re-map mapped pages.
6855 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6856 	addr_t currentAddress = address;
6857 	size_t sizeLeft = size;
6858 	while (sizeLeft > 0) {
6859 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6860 		if (area == NULL)
6861 			return B_NO_MEMORY;
6862 
6863 		addr_t offset = currentAddress - area->Base();
6864 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6865 
6866 		currentAddress += rangeSize;
6867 		sizeLeft -= rangeSize;
6868 
6869 		if (area->page_protections == NULL) {
6870 			if (area->protection == protection)
6871 				continue;
6872 			if (offset == 0 && rangeSize == area->Size()) {
6873 				// The whole area is covered: let set_area_protection handle it.
6874 				status_t status = vm_set_area_protection(area->address_space->ID(),
6875 					area->id, protection, false);
6876 				if (status != B_OK)
6877 					return status;
6878 				continue;
6879 			}
6880 
6881 			status_t status = allocate_area_page_protections(area);
6882 			if (status != B_OK)
6883 				return status;
6884 		}
6885 
6886 		// We need to lock the complete cache chain, since we potentially unmap
6887 		// pages of lower caches.
6888 		VMCache* topCache = vm_area_get_locked_cache(area);
6889 		VMCacheChainLocker cacheChainLocker(topCache);
6890 		cacheChainLocker.LockAllSourceCaches();
6891 
6892 		// Adjust the committed size, if necessary.
6893 		if (topCache->source != NULL && topCache->temporary) {
6894 			const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6895 			ssize_t commitmentChange = 0;
6896 			for (addr_t pageAddress = area->Base() + offset;
6897 					pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6898 				if (topCache->LookupPage(pageAddress) != NULL) {
6899 					// This page should already be accounted for in the commitment.
6900 					continue;
6901 				}
6902 
6903 				const bool isWritable
6904 					= (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6905 
6906 				if (becomesWritable && !isWritable)
6907 					commitmentChange += B_PAGE_SIZE;
6908 				else if (!becomesWritable && isWritable)
6909 					commitmentChange -= B_PAGE_SIZE;
6910 			}
6911 
6912 			if (commitmentChange != 0) {
6913 				const off_t newCommitment = topCache->committed_size + commitmentChange;
6914 				ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
6915 				status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
6916 				if (status != B_OK)
6917 					return status;
6918 			}
6919 		}
6920 
6921 		for (addr_t pageAddress = area->Base() + offset;
6922 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6923 			map->Lock();
6924 
6925 			set_area_page_protection(area, pageAddress, protection);
6926 
6927 			phys_addr_t physicalAddress;
6928 			uint32 flags;
6929 
6930 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6931 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6932 				map->Unlock();
6933 				continue;
6934 			}
6935 
6936 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6937 			if (page == NULL) {
6938 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6939 					"\n", area, physicalAddress);
6940 				map->Unlock();
6941 				return B_ERROR;
6942 			}
6943 
6944 			// If the page is not in the topmost cache and write access is
6945 			// requested, we have to unmap it. Otherwise we can re-map it with
6946 			// the new protection.
6947 			bool unmapPage = page->Cache() != topCache
6948 				&& (protection & B_WRITE_AREA) != 0;
6949 
6950 			if (!unmapPage)
6951 				map->ProtectPage(area, pageAddress, protection);
6952 
6953 			map->Unlock();
6954 
6955 			if (unmapPage) {
6956 				DEBUG_PAGE_ACCESS_START(page);
6957 				unmap_page(area, pageAddress);
6958 				DEBUG_PAGE_ACCESS_END(page);
6959 			}
6960 		}
6961 	}
6962 
6963 	return B_OK;
6964 }
6965 
6966 
6967 status_t
6968 _user_sync_memory(void* _address, size_t size, uint32 flags)
6969 {
6970 	addr_t address = (addr_t)_address;
6971 	size = PAGE_ALIGN(size);
6972 
6973 	// check params
6974 	if ((address % B_PAGE_SIZE) != 0)
6975 		return B_BAD_VALUE;
6976 	if (!is_user_address_range(_address, size)) {
6977 		// weird error code required by POSIX
6978 		return ENOMEM;
6979 	}
6980 
6981 	bool writeSync = (flags & MS_SYNC) != 0;
6982 	bool writeAsync = (flags & MS_ASYNC) != 0;
6983 	if (writeSync && writeAsync)
6984 		return B_BAD_VALUE;
6985 
6986 	if (size == 0 || (!writeSync && !writeAsync))
6987 		return B_OK;
6988 
6989 	// iterate through the range and sync all concerned areas
6990 	while (size > 0) {
6991 		// read lock the address space
6992 		AddressSpaceReadLocker locker;
6993 		status_t error = locker.SetTo(team_get_current_team_id());
6994 		if (error != B_OK)
6995 			return error;
6996 
6997 		// get the first area
6998 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6999 		if (area == NULL)
7000 			return B_NO_MEMORY;
7001 
7002 		uint32 offset = address - area->Base();
7003 		size_t rangeSize = min_c(area->Size() - offset, size);
7004 		offset += area->cache_offset;
7005 
7006 		// lock the cache
7007 		AreaCacheLocker cacheLocker(area);
7008 		if (!cacheLocker)
7009 			return B_BAD_VALUE;
7010 		VMCache* cache = area->cache;
7011 
7012 		locker.Unlock();
7013 
7014 		uint32 firstPage = offset >> PAGE_SHIFT;
7015 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
7016 
7017 		// write the pages
7018 		if (cache->type == CACHE_TYPE_VNODE) {
7019 			if (writeSync) {
7020 				// synchronous
7021 				error = vm_page_write_modified_page_range(cache, firstPage,
7022 					endPage);
7023 				if (error != B_OK)
7024 					return error;
7025 			} else {
7026 				// asynchronous
7027 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
7028 				// TODO: This is probably not quite what is supposed to happen.
7029 				// Especially when a lot has to be written, it might take ages
7030 				// until it really hits the disk.
7031 			}
7032 		}
7033 
7034 		address += rangeSize;
7035 		size -= rangeSize;
7036 	}
7037 
7038 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
7039 	// synchronize multiple mappings of the same file. In our VM they never get
7040 	// out of sync, though, so we don't have to do anything.
7041 
7042 	return B_OK;
7043 }
7044 
7045 
7046 status_t
7047 _user_memory_advice(void* _address, size_t size, uint32 advice)
7048 {
7049 	addr_t address = (addr_t)_address;
7050 	if ((address % B_PAGE_SIZE) != 0)
7051 		return B_BAD_VALUE;
7052 
7053 	size = PAGE_ALIGN(size);
7054 	if (!is_user_address_range(_address, size)) {
7055 		// weird error code required by POSIX
7056 		return B_NO_MEMORY;
7057 	}
7058 
7059 	switch (advice) {
7060 		case MADV_NORMAL:
7061 		case MADV_SEQUENTIAL:
7062 		case MADV_RANDOM:
7063 		case MADV_WILLNEED:
7064 		case MADV_DONTNEED:
7065 			// TODO: Implement!
7066 			break;
7067 
7068 		case MADV_FREE:
7069 		{
7070 			AddressSpaceWriteLocker locker;
7071 			do {
7072 				status_t status = locker.SetTo(team_get_current_team_id());
7073 				if (status != B_OK)
7074 					return status;
7075 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
7076 					address, size, &locker));
7077 
7078 			discard_address_range(locker.AddressSpace(), address, size, false);
7079 			break;
7080 		}
7081 
7082 		default:
7083 			return B_BAD_VALUE;
7084 	}
7085 
7086 	return B_OK;
7087 }
7088 
7089 
7090 status_t
7091 _user_get_memory_properties(team_id teamID, const void* address,
7092 	uint32* _protected, uint32* _lock)
7093 {
7094 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
7095 		return B_BAD_ADDRESS;
7096 
7097 	AddressSpaceReadLocker locker;
7098 	status_t error = locker.SetTo(teamID);
7099 	if (error != B_OK)
7100 		return error;
7101 
7102 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
7103 	if (area == NULL)
7104 		return B_NO_MEMORY;
7105 
7106 	uint32 protection = get_area_page_protection(area, (addr_t)address);
7107 	uint32 wiring = area->wiring;
7108 
7109 	locker.Unlock();
7110 
7111 	error = user_memcpy(_protected, &protection, sizeof(protection));
7112 	if (error != B_OK)
7113 		return error;
7114 
7115 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
7116 
7117 	return error;
7118 }
7119 
7120 
7121 static status_t
7122 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
7123 {
7124 #if ENABLE_SWAP_SUPPORT
7125 	// check address range
7126 	addr_t address = (addr_t)_address;
7127 	size = PAGE_ALIGN(size);
7128 
7129 	if ((address % B_PAGE_SIZE) != 0)
7130 		return EINVAL;
7131 	if (!is_user_address_range(_address, size))
7132 		return EINVAL;
7133 
7134 	const addr_t endAddress = address + size;
7135 
7136 	AddressSpaceReadLocker addressSpaceLocker;
7137 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
7138 	if (error != B_OK)
7139 		return error;
7140 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
7141 
7142 	// iterate through all concerned areas
7143 	addr_t nextAddress = address;
7144 	while (nextAddress != endAddress) {
7145 		// get the next area
7146 		VMArea* area = addressSpace->LookupArea(nextAddress);
7147 		if (area == NULL) {
7148 			error = B_BAD_ADDRESS;
7149 			break;
7150 		}
7151 
7152 		const addr_t areaStart = nextAddress;
7153 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
7154 		nextAddress = areaEnd;
7155 
7156 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7157 		if (error != B_OK) {
7158 			// We don't need to unset or reset things on failure.
7159 			break;
7160 		}
7161 
7162 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
7163 		VMAnonymousCache* anonCache = NULL;
7164 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
7165 			// This memory will aready never be swapped. Nothing to do.
7166 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
7167 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
7168 				areaEnd - areaStart, swappable);
7169 		} else {
7170 			// Some other cache type? We cannot affect anything here.
7171 			error = EINVAL;
7172 		}
7173 
7174 		cacheChainLocker.Unlock();
7175 
7176 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7177 		if (error != B_OK)
7178 			break;
7179 	}
7180 
7181 	return error;
7182 #else
7183 	// No swap support? Nothing to do.
7184 	return B_OK;
7185 #endif
7186 }
7187 
7188 
7189 status_t
7190 _user_mlock(const void* _address, size_t size)
7191 {
7192 	return user_set_memory_swappable(_address, size, false);
7193 }
7194 
7195 
7196 status_t
7197 _user_munlock(const void* _address, size_t size)
7198 {
7199 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7200 	// if multiple clones of an area had mlock() called on them,
7201 	// munlock() must also be called on all of them to actually unlock.
7202 	// (At present, the first munlock() will unlock all.)
7203 	// TODO: fork() should automatically unlock memory in the child.
7204 	return user_set_memory_swappable(_address, size, true);
7205 }
7206 
7207 
7208 // #pragma mark -- compatibility
7209 
7210 
7211 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7212 
7213 
7214 struct physical_entry_beos {
7215 	uint32	address;
7216 	uint32	size;
7217 };
7218 
7219 
7220 /*!	The physical_entry structure has changed. We need to translate it to the
7221 	old one.
7222 */
7223 extern "C" int32
7224 __get_memory_map_beos(const void* _address, size_t numBytes,
7225 	physical_entry_beos* table, int32 numEntries)
7226 {
7227 	if (numEntries <= 0)
7228 		return B_BAD_VALUE;
7229 
7230 	const uint8* address = (const uint8*)_address;
7231 
7232 	int32 count = 0;
7233 	while (numBytes > 0 && count < numEntries) {
7234 		physical_entry entry;
7235 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7236 		if (result < 0) {
7237 			if (result != B_BUFFER_OVERFLOW)
7238 				return result;
7239 		}
7240 
7241 		if (entry.address >= (phys_addr_t)1 << 32) {
7242 			panic("get_memory_map(): Address is greater 4 GB!");
7243 			return B_ERROR;
7244 		}
7245 
7246 		table[count].address = entry.address;
7247 		table[count++].size = entry.size;
7248 
7249 		address += entry.size;
7250 		numBytes -= entry.size;
7251 	}
7252 
7253 	// null-terminate the table, if possible
7254 	if (count < numEntries) {
7255 		table[count].address = 0;
7256 		table[count].size = 0;
7257 	}
7258 
7259 	return B_OK;
7260 }
7261 
7262 
7263 /*!	The type of the \a physicalAddress parameter has changed from void* to
7264 	phys_addr_t.
7265 */
7266 extern "C" area_id
7267 __map_physical_memory_beos(const char* name, void* physicalAddress,
7268 	size_t numBytes, uint32 addressSpec, uint32 protection,
7269 	void** _virtualAddress)
7270 {
7271 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7272 		addressSpec, protection, _virtualAddress);
7273 }
7274 
7275 
7276 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7277 	we meddle with the \a lock parameter to force 32 bit.
7278 */
7279 extern "C" area_id
7280 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7281 	size_t size, uint32 lock, uint32 protection)
7282 {
7283 	switch (lock) {
7284 		case B_NO_LOCK:
7285 			break;
7286 		case B_FULL_LOCK:
7287 		case B_LAZY_LOCK:
7288 			lock = B_32_BIT_FULL_LOCK;
7289 			break;
7290 		case B_CONTIGUOUS:
7291 			lock = B_32_BIT_CONTIGUOUS;
7292 			break;
7293 	}
7294 
7295 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7296 		protection);
7297 }
7298 
7299 
7300 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7301 	"BASE");
7302 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7303 	"map_physical_memory@", "BASE");
7304 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7305 	"BASE");
7306 
7307 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7308 	"get_memory_map@@", "1_ALPHA3");
7309 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7310 	"map_physical_memory@@", "1_ALPHA3");
7311 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7312 	"1_ALPHA3");
7313 
7314 
7315 #else
7316 
7317 
7318 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7319 	"get_memory_map@@", "BASE");
7320 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7321 	"map_physical_memory@@", "BASE");
7322 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7323 	"BASE");
7324 
7325 
7326 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7327