xref: /haiku/src/system/kernel/vm/vm.cpp (revision 19a1dd49cff7ec6a6467cbe2f7006b88931c37d8)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 static inline bool
610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
611 {
612 	if (address < area->Base()) {
613 		offset = area->Base() - address;
614 		if (offset >= size)
615 			return false;
616 
617 		address = area->Base();
618 		size -= offset;
619 		offset = 0;
620 		if (size > area->Size())
621 			size = area->Size();
622 
623 		return true;
624 	}
625 
626 	offset = address - area->Base();
627 	if (offset >= area->Size())
628 		return false;
629 
630 	if (size >= area->Size() - offset)
631 		size = area->Size() - offset;
632 
633 	return true;
634 }
635 
636 
637 /*!	Cuts a piece out of an area. If the given cut range covers the complete
638 	area, it is deleted. If it covers the beginning or the end, the area is
639 	resized accordingly. If the range covers some part in the middle of the
640 	area, it is split in two; in this case the second area is returned via
641 	\a _secondArea (the variable is left untouched in the other cases).
642 	The address space must be write locked.
643 	The caller must ensure that no part of the given range is wired.
644 */
645 static status_t
646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
647 	addr_t size, VMArea** _secondArea, bool kernel)
648 {
649 	addr_t offset;
650 	if (!intersect_area(area, address, size, offset))
651 		return B_OK;
652 
653 	// Is the area fully covered?
654 	if (address == area->Base() && size == area->Size()) {
655 		delete_area(addressSpace, area, false);
656 		return B_OK;
657 	}
658 
659 	int priority;
660 	uint32 allocationFlags;
661 	if (addressSpace == VMAddressSpace::Kernel()) {
662 		priority = VM_PRIORITY_SYSTEM;
663 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
664 			| HEAP_DONT_LOCK_KERNEL_SPACE;
665 	} else {
666 		priority = VM_PRIORITY_USER;
667 		allocationFlags = 0;
668 	}
669 
670 	VMCache* cache = vm_area_get_locked_cache(area);
671 	VMCacheChainLocker cacheChainLocker(cache);
672 	cacheChainLocker.LockAllSourceCaches();
673 
674 	// If no one else uses the area's cache and it's an anonymous cache, we can
675 	// resize or split it, too.
676 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
677 		&& cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM;
678 
679 	// Cut the end only?
680 	if (offset > 0 && size == area->Size() - offset) {
681 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// unmap pages
687 		unmap_pages(area, address, size);
688 
689 		if (onlyCacheUser) {
690 			// Since VMCache::Resize() can temporarily drop the lock, we must
691 			// unlock all lower caches to prevent locking order inversion.
692 			cacheChainLocker.Unlock(cache);
693 			cache->Resize(cache->virtual_base + offset, priority);
694 			cache->ReleaseRefAndUnlock();
695 		}
696 
697 		return B_OK;
698 	}
699 
700 	// Cut the beginning only?
701 	if (area->Base() == address) {
702 		// resize the area
703 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
704 			allocationFlags);
705 		if (error != B_OK)
706 			return error;
707 
708 		// unmap pages
709 		unmap_pages(area, address, size);
710 
711 		if (onlyCacheUser) {
712 			// Since VMCache::Rebase() can temporarily drop the lock, we must
713 			// unlock all lower caches to prevent locking order inversion.
714 			cacheChainLocker.Unlock(cache);
715 			cache->Rebase(cache->virtual_base + size, priority);
716 			cache->ReleaseRefAndUnlock();
717 		}
718 		area->cache_offset += size;
719 
720 		return B_OK;
721 	}
722 
723 	// The tough part -- cut a piece out of the middle of the area.
724 	// We do that by shrinking the area to the begin section and creating a
725 	// new area for the end section.
726 	addr_t firstNewSize = offset;
727 	addr_t secondBase = address + size;
728 	addr_t secondSize = area->Size() - offset - size;
729 
730 	// unmap pages
731 	unmap_pages(area, address, area->Size() - firstNewSize);
732 
733 	// resize the area
734 	addr_t oldSize = area->Size();
735 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
736 		allocationFlags);
737 	if (error != B_OK)
738 		return error;
739 
740 	virtual_address_restrictions addressRestrictions = {};
741 	addressRestrictions.address = (void*)secondBase;
742 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
743 	VMArea* secondArea;
744 
745 	if (onlyCacheUser) {
746 		// Create a new cache for the second area.
747 		VMCache* secondCache;
748 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
749 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
750 		if (error != B_OK) {
751 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
752 			return error;
753 		}
754 
755 		secondCache->Lock();
756 		secondCache->temporary = cache->temporary;
757 		secondCache->virtual_base = area->cache_offset;
758 		secondCache->virtual_end = area->cache_offset + secondSize;
759 
760 		// Transfer the concerned pages from the first cache.
761 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
762 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
763 			area->cache_offset);
764 
765 		if (error == B_OK) {
766 			// Since VMCache::Resize() can temporarily drop the lock, we must
767 			// unlock all lower caches to prevent locking order inversion.
768 			cacheChainLocker.Unlock(cache);
769 			cache->Resize(cache->virtual_base + firstNewSize, priority);
770 			// Don't unlock the cache yet because we might have to resize it
771 			// back.
772 
773 			// Map the second area.
774 			error = map_backing_store(addressSpace, secondCache,
775 				area->cache_offset, area->name, secondSize, area->wiring,
776 				area->protection, REGION_NO_PRIVATE_MAP, 0,
777 				&addressRestrictions, kernel, &secondArea, NULL);
778 		}
779 
780 		if (error != B_OK) {
781 			// Restore the original cache.
782 			cache->Resize(cache->virtual_base + oldSize, priority);
783 
784 			// Move the pages back.
785 			status_t readoptStatus = cache->Adopt(secondCache,
786 				area->cache_offset, secondSize, adoptOffset);
787 			if (readoptStatus != B_OK) {
788 				// Some (swap) pages have not been moved back and will be lost
789 				// once the second cache is deleted.
790 				panic("failed to restore cache range: %s",
791 					strerror(readoptStatus));
792 
793 				// TODO: Handle out of memory cases by freeing memory and
794 				// retrying.
795 			}
796 
797 			cache->ReleaseRefAndUnlock();
798 			secondCache->ReleaseRefAndUnlock();
799 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
800 			return error;
801 		}
802 
803 		// Now we can unlock it.
804 		cache->ReleaseRefAndUnlock();
805 		secondCache->Unlock();
806 	} else {
807 		error = map_backing_store(addressSpace, cache, area->cache_offset
808 			+ (secondBase - area->Base()),
809 			area->name, secondSize, area->wiring, area->protection,
810 			REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, kernel, &secondArea,
811 			NULL);
812 		if (error != B_OK) {
813 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
814 			return error;
815 		}
816 		// We need a cache reference for the new area.
817 		cache->AcquireRefLocked();
818 	}
819 
820 	if (_secondArea != NULL)
821 		*_secondArea = secondArea;
822 
823 	return B_OK;
824 }
825 
826 
827 /*!	Deletes or cuts all areas in the given address range.
828 	The address space must be write-locked.
829 	The caller must ensure that no part of the given range is wired.
830 */
831 static status_t
832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
833 	bool kernel)
834 {
835 	size = PAGE_ALIGN(size);
836 
837 	// Check, whether the caller is allowed to modify the concerned areas.
838 	if (!kernel) {
839 		for (VMAddressSpace::AreaRangeIterator it
840 				= addressSpace->GetAreaRangeIterator(address, size);
841 			VMArea* area = it.Next();) {
842 
843 			if ((area->protection & B_KERNEL_AREA) != 0) {
844 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
845 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
846 					team_get_current_team_id(), area->id, area->name);
847 				return B_NOT_ALLOWED;
848 			}
849 		}
850 	}
851 
852 	for (VMAddressSpace::AreaRangeIterator it
853 			= addressSpace->GetAreaRangeIterator(address, size);
854 		VMArea* area = it.Next();) {
855 
856 		status_t error = cut_area(addressSpace, area, address, size, NULL,
857 			kernel);
858 		if (error != B_OK)
859 			return error;
860 			// Failing after already messing with areas is ugly, but we
861 			// can't do anything about it.
862 	}
863 
864 	return B_OK;
865 }
866 
867 
868 static status_t
869 discard_area_range(VMArea* area, addr_t address, addr_t size)
870 {
871 	addr_t offset;
872 	if (!intersect_area(area, address, size, offset))
873 		return B_OK;
874 
875 	// If someone else uses the area's cache or it's not an anonymous cache, we
876 	// can't discard.
877 	VMCache* cache = vm_area_get_locked_cache(area);
878 	if (cache->areas != area || area->cache_next != NULL
879 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
880 		return B_OK;
881 	}
882 
883 	VMCacheChainLocker cacheChainLocker(cache);
884 	cacheChainLocker.LockAllSourceCaches();
885 
886 	unmap_pages(area, address, size);
887 
888 	// Since VMCache::Discard() can temporarily drop the lock, we must
889 	// unlock all lower caches to prevent locking order inversion.
890 	cacheChainLocker.Unlock(cache);
891 	cache->Discard(cache->virtual_base + offset, size);
892 	cache->ReleaseRefAndUnlock();
893 
894 	return B_OK;
895 }
896 
897 
898 static status_t
899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
900 	bool kernel)
901 {
902 	for (VMAddressSpace::AreaRangeIterator it
903 		= addressSpace->GetAreaRangeIterator(address, size);
904 			VMArea* area = it.Next();) {
905 		status_t error = discard_area_range(area, address, size);
906 		if (error != B_OK)
907 			return error;
908 	}
909 
910 	return B_OK;
911 }
912 
913 
914 /*! You need to hold the lock of the cache and the write lock of the address
915 	space when calling this function.
916 	Note, that in case of error your cache will be temporarily unlocked.
917 	If \a addressSpec is \c B_EXACT_ADDRESS and the
918 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
919 	that no part of the specified address range (base \c *_virtualAddress, size
920 	\a size) is wired.
921 */
922 static status_t
923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
924 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
925 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
926 	bool kernel, VMArea** _area, void** _virtualAddress)
927 {
928 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
929 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
930 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
931 		addressRestrictions->address, offset, size,
932 		addressRestrictions->address_specification, wiring, protection,
933 		_area, areaName));
934 	cache->AssertLocked();
935 
936 	if (size == 0) {
937 #if KDEBUG
938 		panic("map_backing_store(): called with size=0 for area '%s'!",
939 			areaName);
940 #endif
941 		return B_BAD_VALUE;
942 	}
943 
944 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
945 		| HEAP_DONT_LOCK_KERNEL_SPACE;
946 	int priority;
947 	if (addressSpace != VMAddressSpace::Kernel()) {
948 		priority = VM_PRIORITY_USER;
949 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
950 		priority = VM_PRIORITY_VIP;
951 		allocationFlags |= HEAP_PRIORITY_VIP;
952 	} else
953 		priority = VM_PRIORITY_SYSTEM;
954 
955 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
956 		allocationFlags);
957 	if (area == NULL)
958 		return B_NO_MEMORY;
959 
960 	status_t status;
961 
962 	// if this is a private map, we need to create a new cache
963 	// to handle the private copies of pages as they are written to
964 	VMCache* sourceCache = cache;
965 	if (mapping == REGION_PRIVATE_MAP) {
966 		VMCache* newCache;
967 
968 		// create an anonymous cache
969 		status = VMCacheFactory::CreateAnonymousCache(newCache,
970 			(protection & B_STACK_AREA) != 0
971 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
972 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
973 		if (status != B_OK)
974 			goto err1;
975 
976 		newCache->Lock();
977 		newCache->temporary = 1;
978 		newCache->virtual_base = offset;
979 		newCache->virtual_end = offset + size;
980 
981 		cache->AddConsumer(newCache);
982 
983 		cache = newCache;
984 	}
985 
986 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
987 		status = cache->SetMinimalCommitment(size, priority);
988 		if (status != B_OK)
989 			goto err2;
990 	}
991 
992 	// check to see if this address space has entered DELETE state
993 	if (addressSpace->IsBeingDeleted()) {
994 		// okay, someone is trying to delete this address space now, so we can't
995 		// insert the area, so back out
996 		status = B_BAD_TEAM_ID;
997 		goto err2;
998 	}
999 
1000 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1001 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1002 		status = unmap_address_range(addressSpace,
1003 			(addr_t)addressRestrictions->address, size, kernel);
1004 		if (status != B_OK)
1005 			goto err2;
1006 	}
1007 
1008 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1009 		allocationFlags, _virtualAddress);
1010 	if (status == B_NO_MEMORY
1011 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1012 		// TODO: At present, there is no way to notify the low_resource monitor
1013 		// that kernel addresss space is fragmented, nor does it check for this
1014 		// automatically. Due to how many locks are held, we cannot wait here
1015 		// for space to be freed up, but it would be good to at least notify
1016 		// that we tried and failed to allocate some amount.
1017 	}
1018 	if (status != B_OK)
1019 		goto err2;
1020 
1021 	// attach the cache to the area
1022 	area->cache = cache;
1023 	area->cache_offset = offset;
1024 
1025 	// point the cache back to the area
1026 	cache->InsertAreaLocked(area);
1027 	if (mapping == REGION_PRIVATE_MAP)
1028 		cache->Unlock();
1029 
1030 	// insert the area in the global area hash table
1031 	VMAreaHash::Insert(area);
1032 
1033 	// grab a ref to the address space (the area holds this)
1034 	addressSpace->Get();
1035 
1036 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1037 //		cache, sourceCache, areaName, area);
1038 
1039 	*_area = area;
1040 	return B_OK;
1041 
1042 err2:
1043 	if (mapping == REGION_PRIVATE_MAP) {
1044 		// We created this cache, so we must delete it again. Note, that we
1045 		// need to temporarily unlock the source cache or we'll otherwise
1046 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1047 		sourceCache->Unlock();
1048 		cache->ReleaseRefAndUnlock();
1049 		sourceCache->Lock();
1050 	}
1051 err1:
1052 	addressSpace->DeleteArea(area, allocationFlags);
1053 	return status;
1054 }
1055 
1056 
1057 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1058 	  locker1, locker2).
1059 */
1060 template<typename LockerType1, typename LockerType2>
1061 static inline bool
1062 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1063 {
1064 	area->cache->AssertLocked();
1065 
1066 	VMAreaUnwiredWaiter waiter;
1067 	if (!area->AddWaiterIfWired(&waiter))
1068 		return false;
1069 
1070 	// unlock everything and wait
1071 	if (locker1 != NULL)
1072 		locker1->Unlock();
1073 	if (locker2 != NULL)
1074 		locker2->Unlock();
1075 
1076 	waiter.waitEntry.Wait();
1077 
1078 	return true;
1079 }
1080 
1081 
1082 /*!	Checks whether the given area has any wired ranges intersecting with the
1083 	specified range and waits, if so.
1084 
1085 	When it has to wait, the function calls \c Unlock() on both \a locker1
1086 	and \a locker2, if given.
1087 	The area's top cache must be locked and must be unlocked as a side effect
1088 	of calling \c Unlock() on either \a locker1 or \a locker2.
1089 
1090 	If the function does not have to wait it does not modify or unlock any
1091 	object.
1092 
1093 	\param area The area to be checked.
1094 	\param base The base address of the range to check.
1095 	\param size The size of the address range to check.
1096 	\param locker1 An object to be unlocked when before starting to wait (may
1097 		be \c NULL).
1098 	\param locker2 An object to be unlocked when before starting to wait (may
1099 		be \c NULL).
1100 	\return \c true, if the function had to wait, \c false otherwise.
1101 */
1102 template<typename LockerType1, typename LockerType2>
1103 static inline bool
1104 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1105 	LockerType1* locker1, LockerType2* locker2)
1106 {
1107 	area->cache->AssertLocked();
1108 
1109 	VMAreaUnwiredWaiter waiter;
1110 	if (!area->AddWaiterIfWired(&waiter, base, size))
1111 		return false;
1112 
1113 	// unlock everything and wait
1114 	if (locker1 != NULL)
1115 		locker1->Unlock();
1116 	if (locker2 != NULL)
1117 		locker2->Unlock();
1118 
1119 	waiter.waitEntry.Wait();
1120 
1121 	return true;
1122 }
1123 
1124 
1125 /*!	Checks whether the given address space has any wired ranges intersecting
1126 	with the specified range and waits, if so.
1127 
1128 	Similar to wait_if_area_range_is_wired(), with the following differences:
1129 	- All areas intersecting with the range are checked (respectively all until
1130 	  one is found that contains a wired range intersecting with the given
1131 	  range).
1132 	- The given address space must at least be read-locked and must be unlocked
1133 	  when \c Unlock() is called on \a locker.
1134 	- None of the areas' caches are allowed to be locked.
1135 */
1136 template<typename LockerType>
1137 static inline bool
1138 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1139 	size_t size, LockerType* locker)
1140 {
1141 	for (VMAddressSpace::AreaRangeIterator it
1142 		= addressSpace->GetAreaRangeIterator(base, size);
1143 			VMArea* area = it.Next();) {
1144 
1145 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1146 
1147 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1148 			return true;
1149 	}
1150 
1151 	return false;
1152 }
1153 
1154 
1155 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1156 	It must be called in a situation where the kernel address space may be
1157 	locked.
1158 */
1159 status_t
1160 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1161 {
1162 	AddressSpaceReadLocker locker;
1163 	VMArea* area;
1164 	status_t status = locker.SetFromArea(id, area);
1165 	if (status != B_OK)
1166 		return status;
1167 
1168 	if (area->page_protections == NULL) {
1169 		status = allocate_area_page_protections(area);
1170 		if (status != B_OK)
1171 			return status;
1172 	}
1173 
1174 	*cookie = (void*)area;
1175 	return B_OK;
1176 }
1177 
1178 
1179 /*!	This is a debug helper function that can only be used with very specific
1180 	use cases.
1181 	Sets protection for the given address range to the protection specified.
1182 	If \a protection is 0 then the involved pages will be marked non-present
1183 	in the translation map to cause a fault on access. The pages aren't
1184 	actually unmapped however so that they can be marked present again with
1185 	additional calls to this function. For this to work the area must be
1186 	fully locked in memory so that the pages aren't otherwise touched.
1187 	This function does not lock the kernel address space and needs to be
1188 	supplied with a \a cookie retrieved from a successful call to
1189 	vm_prepare_kernel_area_debug_protection().
1190 */
1191 status_t
1192 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1193 	uint32 protection)
1194 {
1195 	// check address range
1196 	addr_t address = (addr_t)_address;
1197 	size = PAGE_ALIGN(size);
1198 
1199 	if ((address % B_PAGE_SIZE) != 0
1200 		|| (addr_t)address + size < (addr_t)address
1201 		|| !IS_KERNEL_ADDRESS(address)
1202 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1203 		return B_BAD_VALUE;
1204 	}
1205 
1206 	// Translate the kernel protection to user protection as we only store that.
1207 	if ((protection & B_KERNEL_READ_AREA) != 0)
1208 		protection |= B_READ_AREA;
1209 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1210 		protection |= B_WRITE_AREA;
1211 
1212 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1213 	VMTranslationMap* map = addressSpace->TranslationMap();
1214 	VMArea* area = (VMArea*)cookie;
1215 
1216 	addr_t offset = address - area->Base();
1217 	if (area->Size() - offset < size) {
1218 		panic("protect range not fully within supplied area");
1219 		return B_BAD_VALUE;
1220 	}
1221 
1222 	if (area->page_protections == NULL) {
1223 		panic("area has no page protections");
1224 		return B_BAD_VALUE;
1225 	}
1226 
1227 	// Invalidate the mapping entries so any access to them will fault or
1228 	// restore the mapping entries unchanged so that lookup will success again.
1229 	map->Lock();
1230 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1231 	map->Unlock();
1232 
1233 	// And set the proper page protections so that the fault case will actually
1234 	// fail and not simply try to map a new page.
1235 	for (addr_t pageAddress = address; pageAddress < address + size;
1236 			pageAddress += B_PAGE_SIZE) {
1237 		set_area_page_protection(area, pageAddress, protection);
1238 	}
1239 
1240 	return B_OK;
1241 }
1242 
1243 
1244 status_t
1245 vm_block_address_range(const char* name, void* address, addr_t size)
1246 {
1247 	if (!arch_vm_supports_protection(0))
1248 		return B_NOT_SUPPORTED;
1249 
1250 	AddressSpaceWriteLocker locker;
1251 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1252 	if (status != B_OK)
1253 		return status;
1254 
1255 	VMAddressSpace* addressSpace = locker.AddressSpace();
1256 
1257 	// create an anonymous cache
1258 	VMCache* cache;
1259 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1260 		VM_PRIORITY_SYSTEM);
1261 	if (status != B_OK)
1262 		return status;
1263 
1264 	cache->temporary = 1;
1265 	cache->virtual_end = size;
1266 	cache->Lock();
1267 
1268 	VMArea* area;
1269 	virtual_address_restrictions addressRestrictions = {};
1270 	addressRestrictions.address = address;
1271 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1272 	status = map_backing_store(addressSpace, cache, 0, name, size,
1273 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1274 		true, &area, NULL);
1275 	if (status != B_OK) {
1276 		cache->ReleaseRefAndUnlock();
1277 		return status;
1278 	}
1279 
1280 	cache->Unlock();
1281 	area->cache_type = CACHE_TYPE_RAM;
1282 	return area->id;
1283 }
1284 
1285 
1286 status_t
1287 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1288 {
1289 	AddressSpaceWriteLocker locker(team);
1290 	if (!locker.IsLocked())
1291 		return B_BAD_TEAM_ID;
1292 
1293 	VMAddressSpace* addressSpace = locker.AddressSpace();
1294 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1295 		addressSpace == VMAddressSpace::Kernel()
1296 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1297 }
1298 
1299 
1300 status_t
1301 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1302 	addr_t size, uint32 flags)
1303 {
1304 	if (size == 0)
1305 		return B_BAD_VALUE;
1306 
1307 	AddressSpaceWriteLocker locker(team);
1308 	if (!locker.IsLocked())
1309 		return B_BAD_TEAM_ID;
1310 
1311 	virtual_address_restrictions addressRestrictions = {};
1312 	addressRestrictions.address = *_address;
1313 	addressRestrictions.address_specification = addressSpec;
1314 	VMAddressSpace* addressSpace = locker.AddressSpace();
1315 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1316 		addressSpace == VMAddressSpace::Kernel()
1317 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1318 		_address);
1319 }
1320 
1321 
1322 area_id
1323 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1324 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1325 	const virtual_address_restrictions* virtualAddressRestrictions,
1326 	const physical_address_restrictions* physicalAddressRestrictions,
1327 	bool kernel, void** _address)
1328 {
1329 	VMArea* area;
1330 	VMCache* cache;
1331 	vm_page* page = NULL;
1332 	bool isStack = (protection & B_STACK_AREA) != 0;
1333 	page_num_t guardPages;
1334 	bool canOvercommit = false;
1335 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1336 		? VM_PAGE_ALLOC_CLEAR : 0;
1337 
1338 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1339 		team, name, size));
1340 
1341 	size = PAGE_ALIGN(size);
1342 	guardSize = PAGE_ALIGN(guardSize);
1343 	guardPages = guardSize / B_PAGE_SIZE;
1344 
1345 	if (size == 0 || size < guardSize)
1346 		return B_BAD_VALUE;
1347 	if (!arch_vm_supports_protection(protection))
1348 		return B_NOT_SUPPORTED;
1349 
1350 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1351 		canOvercommit = true;
1352 
1353 #ifdef DEBUG_KERNEL_STACKS
1354 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1355 		isStack = true;
1356 #endif
1357 
1358 	// check parameters
1359 	switch (virtualAddressRestrictions->address_specification) {
1360 		case B_ANY_ADDRESS:
1361 		case B_EXACT_ADDRESS:
1362 		case B_BASE_ADDRESS:
1363 		case B_ANY_KERNEL_ADDRESS:
1364 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1365 		case B_RANDOMIZED_ANY_ADDRESS:
1366 		case B_RANDOMIZED_BASE_ADDRESS:
1367 			break;
1368 
1369 		default:
1370 			return B_BAD_VALUE;
1371 	}
1372 
1373 	// If low or high physical address restrictions are given, we force
1374 	// B_CONTIGUOUS wiring, since only then we'll use
1375 	// vm_page_allocate_page_run() which deals with those restrictions.
1376 	if (physicalAddressRestrictions->low_address != 0
1377 		|| physicalAddressRestrictions->high_address != 0) {
1378 		wiring = B_CONTIGUOUS;
1379 	}
1380 
1381 	physical_address_restrictions stackPhysicalRestrictions;
1382 	bool doReserveMemory = false;
1383 	switch (wiring) {
1384 		case B_NO_LOCK:
1385 			break;
1386 		case B_FULL_LOCK:
1387 		case B_LAZY_LOCK:
1388 		case B_CONTIGUOUS:
1389 			doReserveMemory = true;
1390 			break;
1391 		case B_ALREADY_WIRED:
1392 			break;
1393 		case B_LOMEM:
1394 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1395 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1396 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1397 			wiring = B_CONTIGUOUS;
1398 			doReserveMemory = true;
1399 			break;
1400 		case B_32_BIT_FULL_LOCK:
1401 			if (B_HAIKU_PHYSICAL_BITS <= 32
1402 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1403 				wiring = B_FULL_LOCK;
1404 				doReserveMemory = true;
1405 				break;
1406 			}
1407 			// TODO: We don't really support this mode efficiently. Just fall
1408 			// through for now ...
1409 		case B_32_BIT_CONTIGUOUS:
1410 			#if B_HAIKU_PHYSICAL_BITS > 32
1411 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1412 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1413 					stackPhysicalRestrictions.high_address
1414 						= (phys_addr_t)1 << 32;
1415 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1416 				}
1417 			#endif
1418 			wiring = B_CONTIGUOUS;
1419 			doReserveMemory = true;
1420 			break;
1421 		default:
1422 			return B_BAD_VALUE;
1423 	}
1424 
1425 	// Optimization: For a single-page contiguous allocation without low/high
1426 	// memory restriction B_FULL_LOCK wiring suffices.
1427 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1428 		&& physicalAddressRestrictions->low_address == 0
1429 		&& physicalAddressRestrictions->high_address == 0) {
1430 		wiring = B_FULL_LOCK;
1431 	}
1432 
1433 	// For full lock or contiguous areas we're also going to map the pages and
1434 	// thus need to reserve pages for the mapping backend upfront.
1435 	addr_t reservedMapPages = 0;
1436 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1437 		AddressSpaceWriteLocker locker;
1438 		status_t status = locker.SetTo(team);
1439 		if (status != B_OK)
1440 			return status;
1441 
1442 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1443 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1444 	}
1445 
1446 	int priority;
1447 	if (team != VMAddressSpace::KernelID())
1448 		priority = VM_PRIORITY_USER;
1449 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1450 		priority = VM_PRIORITY_VIP;
1451 	else
1452 		priority = VM_PRIORITY_SYSTEM;
1453 
1454 	// Reserve memory before acquiring the address space lock. This reduces the
1455 	// chances of failure, since while holding the write lock to the address
1456 	// space (if it is the kernel address space that is), the low memory handler
1457 	// won't be able to free anything for us.
1458 	addr_t reservedMemory = 0;
1459 	if (doReserveMemory) {
1460 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1461 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1462 			return B_NO_MEMORY;
1463 		reservedMemory = size;
1464 		// TODO: We don't reserve the memory for the pages for the page
1465 		// directories/tables. We actually need to do since we currently don't
1466 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1467 		// there are actually less physical pages than there should be, which
1468 		// can get the VM into trouble in low memory situations.
1469 	}
1470 
1471 	AddressSpaceWriteLocker locker;
1472 	VMAddressSpace* addressSpace;
1473 	status_t status;
1474 
1475 	// For full lock areas reserve the pages before locking the address
1476 	// space. E.g. block caches can't release their memory while we hold the
1477 	// address space lock.
1478 	page_num_t reservedPages = reservedMapPages;
1479 	if (wiring == B_FULL_LOCK)
1480 		reservedPages += size / B_PAGE_SIZE;
1481 
1482 	vm_page_reservation reservation;
1483 	if (reservedPages > 0) {
1484 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1485 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1486 					priority)) {
1487 				reservedPages = 0;
1488 				status = B_WOULD_BLOCK;
1489 				goto err0;
1490 			}
1491 		} else
1492 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1493 	}
1494 
1495 	if (wiring == B_CONTIGUOUS) {
1496 		// we try to allocate the page run here upfront as this may easily
1497 		// fail for obvious reasons
1498 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1499 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1500 		if (page == NULL) {
1501 			status = B_NO_MEMORY;
1502 			goto err0;
1503 		}
1504 	}
1505 
1506 	// Lock the address space and, if B_EXACT_ADDRESS and
1507 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1508 	// is not wired.
1509 	do {
1510 		status = locker.SetTo(team);
1511 		if (status != B_OK)
1512 			goto err1;
1513 
1514 		addressSpace = locker.AddressSpace();
1515 	} while (virtualAddressRestrictions->address_specification
1516 			== B_EXACT_ADDRESS
1517 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1518 		&& wait_if_address_range_is_wired(addressSpace,
1519 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1520 
1521 	// create an anonymous cache
1522 	// if it's a stack, make sure that two pages are available at least
1523 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1524 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1525 		wiring == B_NO_LOCK, priority);
1526 	if (status != B_OK)
1527 		goto err1;
1528 
1529 	cache->temporary = 1;
1530 	cache->virtual_end = size;
1531 	cache->committed_size = reservedMemory;
1532 		// TODO: This should be done via a method.
1533 	reservedMemory = 0;
1534 
1535 	cache->Lock();
1536 
1537 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1538 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1539 		kernel, &area, _address);
1540 
1541 	if (status != B_OK) {
1542 		cache->ReleaseRefAndUnlock();
1543 		goto err1;
1544 	}
1545 
1546 	locker.DegradeToReadLock();
1547 
1548 	switch (wiring) {
1549 		case B_NO_LOCK:
1550 		case B_LAZY_LOCK:
1551 			// do nothing - the pages are mapped in as needed
1552 			break;
1553 
1554 		case B_FULL_LOCK:
1555 		{
1556 			// Allocate and map all pages for this area
1557 
1558 			off_t offset = 0;
1559 			for (addr_t address = area->Base();
1560 					address < area->Base() + (area->Size() - 1);
1561 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1562 #ifdef DEBUG_KERNEL_STACKS
1563 #	ifdef STACK_GROWS_DOWNWARDS
1564 				if (isStack && address < area->Base()
1565 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1566 #	else
1567 				if (isStack && address >= area->Base() + area->Size()
1568 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1569 #	endif
1570 					continue;
1571 #endif
1572 				vm_page* page = vm_page_allocate_page(&reservation,
1573 					PAGE_STATE_WIRED | pageAllocFlags);
1574 				cache->InsertPage(page, offset);
1575 				map_page(area, page, address, protection, &reservation);
1576 
1577 				DEBUG_PAGE_ACCESS_END(page);
1578 			}
1579 
1580 			break;
1581 		}
1582 
1583 		case B_ALREADY_WIRED:
1584 		{
1585 			// The pages should already be mapped. This is only really useful
1586 			// during boot time. Find the appropriate vm_page objects and stick
1587 			// them in the cache object.
1588 			VMTranslationMap* map = addressSpace->TranslationMap();
1589 			off_t offset = 0;
1590 
1591 			if (!gKernelStartup)
1592 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1593 
1594 			map->Lock();
1595 
1596 			for (addr_t virtualAddress = area->Base();
1597 					virtualAddress < area->Base() + (area->Size() - 1);
1598 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1599 				phys_addr_t physicalAddress;
1600 				uint32 flags;
1601 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1602 				if (status < B_OK) {
1603 					panic("looking up mapping failed for va 0x%lx\n",
1604 						virtualAddress);
1605 				}
1606 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1607 				if (page == NULL) {
1608 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1609 						"\n", physicalAddress);
1610 				}
1611 
1612 				DEBUG_PAGE_ACCESS_START(page);
1613 
1614 				cache->InsertPage(page, offset);
1615 				increment_page_wired_count(page);
1616 				vm_page_set_state(page, PAGE_STATE_WIRED);
1617 				page->busy = false;
1618 
1619 				DEBUG_PAGE_ACCESS_END(page);
1620 			}
1621 
1622 			map->Unlock();
1623 			break;
1624 		}
1625 
1626 		case B_CONTIGUOUS:
1627 		{
1628 			// We have already allocated our continuous pages run, so we can now
1629 			// just map them in the address space
1630 			VMTranslationMap* map = addressSpace->TranslationMap();
1631 			phys_addr_t physicalAddress
1632 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1633 			addr_t virtualAddress = area->Base();
1634 			off_t offset = 0;
1635 
1636 			map->Lock();
1637 
1638 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1639 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1640 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1641 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1642 				if (page == NULL)
1643 					panic("couldn't lookup physical page just allocated\n");
1644 
1645 				status = map->Map(virtualAddress, physicalAddress, protection,
1646 					area->MemoryType(), &reservation);
1647 				if (status < B_OK)
1648 					panic("couldn't map physical page in page run\n");
1649 
1650 				cache->InsertPage(page, offset);
1651 				increment_page_wired_count(page);
1652 
1653 				DEBUG_PAGE_ACCESS_END(page);
1654 			}
1655 
1656 			map->Unlock();
1657 			break;
1658 		}
1659 
1660 		default:
1661 			break;
1662 	}
1663 
1664 	cache->Unlock();
1665 
1666 	if (reservedPages > 0)
1667 		vm_page_unreserve_pages(&reservation);
1668 
1669 	TRACE(("vm_create_anonymous_area: done\n"));
1670 
1671 	area->cache_type = CACHE_TYPE_RAM;
1672 	return area->id;
1673 
1674 err1:
1675 	if (wiring == B_CONTIGUOUS) {
1676 		// we had reserved the area space upfront...
1677 		phys_addr_t pageNumber = page->physical_page_number;
1678 		int32 i;
1679 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1680 			page = vm_lookup_page(pageNumber);
1681 			if (page == NULL)
1682 				panic("couldn't lookup physical page just allocated\n");
1683 
1684 			vm_page_set_state(page, PAGE_STATE_FREE);
1685 		}
1686 	}
1687 
1688 err0:
1689 	if (reservedPages > 0)
1690 		vm_page_unreserve_pages(&reservation);
1691 	if (reservedMemory > 0)
1692 		vm_unreserve_memory(reservedMemory);
1693 
1694 	return status;
1695 }
1696 
1697 
1698 area_id
1699 vm_map_physical_memory(team_id team, const char* name, void** _address,
1700 	uint32 addressSpec, addr_t size, uint32 protection,
1701 	phys_addr_t physicalAddress, bool alreadyWired)
1702 {
1703 	VMArea* area;
1704 	VMCache* cache;
1705 	addr_t mapOffset;
1706 
1707 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1708 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1709 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1710 		addressSpec, size, protection, physicalAddress));
1711 
1712 	if (!arch_vm_supports_protection(protection))
1713 		return B_NOT_SUPPORTED;
1714 
1715 	AddressSpaceWriteLocker locker(team);
1716 	if (!locker.IsLocked())
1717 		return B_BAD_TEAM_ID;
1718 
1719 	// if the physical address is somewhat inside a page,
1720 	// move the actual area down to align on a page boundary
1721 	mapOffset = physicalAddress % B_PAGE_SIZE;
1722 	size += mapOffset;
1723 	physicalAddress -= mapOffset;
1724 
1725 	size = PAGE_ALIGN(size);
1726 
1727 	// create a device cache
1728 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1729 	if (status != B_OK)
1730 		return status;
1731 
1732 	cache->virtual_end = size;
1733 
1734 	cache->Lock();
1735 
1736 	virtual_address_restrictions addressRestrictions = {};
1737 	addressRestrictions.address = *_address;
1738 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1739 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1740 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1741 		true, &area, _address);
1742 
1743 	if (status < B_OK)
1744 		cache->ReleaseRefLocked();
1745 
1746 	cache->Unlock();
1747 
1748 	if (status == B_OK) {
1749 		// set requested memory type -- use uncached, if not given
1750 		uint32 memoryType = addressSpec & B_MTR_MASK;
1751 		if (memoryType == 0)
1752 			memoryType = B_MTR_UC;
1753 
1754 		area->SetMemoryType(memoryType);
1755 
1756 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1757 		if (status != B_OK)
1758 			delete_area(locker.AddressSpace(), area, false);
1759 	}
1760 
1761 	if (status != B_OK)
1762 		return status;
1763 
1764 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1765 
1766 	if (alreadyWired) {
1767 		// The area is already mapped, but possibly not with the right
1768 		// memory type.
1769 		map->Lock();
1770 		map->ProtectArea(area, area->protection);
1771 		map->Unlock();
1772 	} else {
1773 		// Map the area completely.
1774 
1775 		// reserve pages needed for the mapping
1776 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1777 			area->Base() + (size - 1));
1778 		vm_page_reservation reservation;
1779 		vm_page_reserve_pages(&reservation, reservePages,
1780 			team == VMAddressSpace::KernelID()
1781 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1782 
1783 		map->Lock();
1784 
1785 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1786 			map->Map(area->Base() + offset, physicalAddress + offset,
1787 				protection, area->MemoryType(), &reservation);
1788 		}
1789 
1790 		map->Unlock();
1791 
1792 		vm_page_unreserve_pages(&reservation);
1793 	}
1794 
1795 	// modify the pointer returned to be offset back into the new area
1796 	// the same way the physical address in was offset
1797 	*_address = (void*)((addr_t)*_address + mapOffset);
1798 
1799 	area->cache_type = CACHE_TYPE_DEVICE;
1800 	return area->id;
1801 }
1802 
1803 
1804 /*!	Don't use!
1805 	TODO: This function was introduced to map physical page vecs to
1806 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1807 	use a device cache and does not track vm_page::wired_count!
1808 */
1809 area_id
1810 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1811 	uint32 addressSpec, addr_t* _size, uint32 protection,
1812 	struct generic_io_vec* vecs, uint32 vecCount)
1813 {
1814 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1815 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1816 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1817 		addressSpec, _size, protection, vecs, vecCount));
1818 
1819 	if (!arch_vm_supports_protection(protection)
1820 		|| (addressSpec & B_MTR_MASK) != 0) {
1821 		return B_NOT_SUPPORTED;
1822 	}
1823 
1824 	AddressSpaceWriteLocker locker(team);
1825 	if (!locker.IsLocked())
1826 		return B_BAD_TEAM_ID;
1827 
1828 	if (vecCount == 0)
1829 		return B_BAD_VALUE;
1830 
1831 	addr_t size = 0;
1832 	for (uint32 i = 0; i < vecCount; i++) {
1833 		if (vecs[i].base % B_PAGE_SIZE != 0
1834 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1835 			return B_BAD_VALUE;
1836 		}
1837 
1838 		size += vecs[i].length;
1839 	}
1840 
1841 	// create a device cache
1842 	VMCache* cache;
1843 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1844 	if (result != B_OK)
1845 		return result;
1846 
1847 	cache->virtual_end = size;
1848 
1849 	cache->Lock();
1850 
1851 	VMArea* area;
1852 	virtual_address_restrictions addressRestrictions = {};
1853 	addressRestrictions.address = *_address;
1854 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1855 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1856 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1857 		&addressRestrictions, true, &area, _address);
1858 
1859 	if (result != B_OK)
1860 		cache->ReleaseRefLocked();
1861 
1862 	cache->Unlock();
1863 
1864 	if (result != B_OK)
1865 		return result;
1866 
1867 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1868 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1869 		area->Base() + (size - 1));
1870 
1871 	vm_page_reservation reservation;
1872 	vm_page_reserve_pages(&reservation, reservePages,
1873 			team == VMAddressSpace::KernelID()
1874 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1875 	map->Lock();
1876 
1877 	uint32 vecIndex = 0;
1878 	size_t vecOffset = 0;
1879 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1880 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1881 			vecOffset = 0;
1882 			vecIndex++;
1883 		}
1884 
1885 		if (vecIndex >= vecCount)
1886 			break;
1887 
1888 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1889 			protection, area->MemoryType(), &reservation);
1890 
1891 		vecOffset += B_PAGE_SIZE;
1892 	}
1893 
1894 	map->Unlock();
1895 	vm_page_unreserve_pages(&reservation);
1896 
1897 	if (_size != NULL)
1898 		*_size = size;
1899 
1900 	area->cache_type = CACHE_TYPE_DEVICE;
1901 	return area->id;
1902 }
1903 
1904 
1905 area_id
1906 vm_create_null_area(team_id team, const char* name, void** address,
1907 	uint32 addressSpec, addr_t size, uint32 flags)
1908 {
1909 	size = PAGE_ALIGN(size);
1910 
1911 	// Lock the address space and, if B_EXACT_ADDRESS and
1912 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1913 	// is not wired.
1914 	AddressSpaceWriteLocker locker;
1915 	do {
1916 		if (locker.SetTo(team) != B_OK)
1917 			return B_BAD_TEAM_ID;
1918 	} while (addressSpec == B_EXACT_ADDRESS
1919 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1920 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1921 			(addr_t)*address, size, &locker));
1922 
1923 	// create a null cache
1924 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1925 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1926 	VMCache* cache;
1927 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1928 	if (status != B_OK)
1929 		return status;
1930 
1931 	cache->temporary = 1;
1932 	cache->virtual_end = size;
1933 
1934 	cache->Lock();
1935 
1936 	VMArea* area;
1937 	virtual_address_restrictions addressRestrictions = {};
1938 	addressRestrictions.address = *address;
1939 	addressRestrictions.address_specification = addressSpec;
1940 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1941 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1942 		&addressRestrictions, true, &area, address);
1943 
1944 	if (status < B_OK) {
1945 		cache->ReleaseRefAndUnlock();
1946 		return status;
1947 	}
1948 
1949 	cache->Unlock();
1950 
1951 	area->cache_type = CACHE_TYPE_NULL;
1952 	return area->id;
1953 }
1954 
1955 
1956 /*!	Creates the vnode cache for the specified \a vnode.
1957 	The vnode has to be marked busy when calling this function.
1958 */
1959 status_t
1960 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1961 {
1962 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1963 }
1964 
1965 
1966 /*!	\a cache must be locked. The area's address space must be read-locked.
1967 */
1968 static void
1969 pre_map_area_pages(VMArea* area, VMCache* cache,
1970 	vm_page_reservation* reservation)
1971 {
1972 	addr_t baseAddress = area->Base();
1973 	addr_t cacheOffset = area->cache_offset;
1974 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1975 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1976 
1977 	for (VMCachePagesTree::Iterator it
1978 				= cache->pages.GetIterator(firstPage, true, true);
1979 			vm_page* page = it.Next();) {
1980 		if (page->cache_offset >= endPage)
1981 			break;
1982 
1983 		// skip busy and inactive pages
1984 		if (page->busy || page->usage_count == 0)
1985 			continue;
1986 
1987 		DEBUG_PAGE_ACCESS_START(page);
1988 		map_page(area, page,
1989 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1990 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1991 		DEBUG_PAGE_ACCESS_END(page);
1992 	}
1993 }
1994 
1995 
1996 /*!	Will map the file specified by \a fd to an area in memory.
1997 	The file will be mirrored beginning at the specified \a offset. The
1998 	\a offset and \a size arguments have to be page aligned.
1999 */
2000 static area_id
2001 _vm_map_file(team_id team, const char* name, void** _address,
2002 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2003 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2004 {
2005 	// TODO: for binary files, we want to make sure that they get the
2006 	//	copy of a file at a given time, ie. later changes should not
2007 	//	make it into the mapped copy -- this will need quite some changes
2008 	//	to be done in a nice way
2009 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2010 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2011 
2012 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2013 	size = PAGE_ALIGN(size);
2014 
2015 	if (mapping == REGION_NO_PRIVATE_MAP)
2016 		protection |= B_SHARED_AREA;
2017 	if (addressSpec != B_EXACT_ADDRESS)
2018 		unmapAddressRange = false;
2019 
2020 	if (fd < 0) {
2021 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
2022 		virtual_address_restrictions virtualRestrictions = {};
2023 		virtualRestrictions.address = *_address;
2024 		virtualRestrictions.address_specification = addressSpec;
2025 		physical_address_restrictions physicalRestrictions = {};
2026 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2027 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2028 			_address);
2029 	}
2030 
2031 	// get the open flags of the FD
2032 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2033 	if (descriptor == NULL)
2034 		return EBADF;
2035 	int32 openMode = descriptor->open_mode;
2036 	put_fd(descriptor);
2037 
2038 	// The FD must open for reading at any rate. For shared mapping with write
2039 	// access, additionally the FD must be open for writing.
2040 	if ((openMode & O_ACCMODE) == O_WRONLY
2041 		|| (mapping == REGION_NO_PRIVATE_MAP
2042 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2043 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2044 		return EACCES;
2045 	}
2046 
2047 	// get the vnode for the object, this also grabs a ref to it
2048 	struct vnode* vnode = NULL;
2049 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2050 	if (status < B_OK)
2051 		return status;
2052 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
2053 
2054 	// If we're going to pre-map pages, we need to reserve the pages needed by
2055 	// the mapping backend upfront.
2056 	page_num_t reservedPreMapPages = 0;
2057 	vm_page_reservation reservation;
2058 	if ((protection & B_READ_AREA) != 0) {
2059 		AddressSpaceWriteLocker locker;
2060 		status = locker.SetTo(team);
2061 		if (status != B_OK)
2062 			return status;
2063 
2064 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2065 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2066 
2067 		locker.Unlock();
2068 
2069 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2070 			team == VMAddressSpace::KernelID()
2071 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2072 	}
2073 
2074 	struct PageUnreserver {
2075 		PageUnreserver(vm_page_reservation* reservation)
2076 			:
2077 			fReservation(reservation)
2078 		{
2079 		}
2080 
2081 		~PageUnreserver()
2082 		{
2083 			if (fReservation != NULL)
2084 				vm_page_unreserve_pages(fReservation);
2085 		}
2086 
2087 		vm_page_reservation* fReservation;
2088 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2089 
2090 	// Lock the address space and, if the specified address range shall be
2091 	// unmapped, ensure it is not wired.
2092 	AddressSpaceWriteLocker locker;
2093 	do {
2094 		if (locker.SetTo(team) != B_OK)
2095 			return B_BAD_TEAM_ID;
2096 	} while (unmapAddressRange
2097 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2098 			(addr_t)*_address, size, &locker));
2099 
2100 	// TODO: this only works for file systems that use the file cache
2101 	VMCache* cache;
2102 	status = vfs_get_vnode_cache(vnode, &cache, false);
2103 	if (status < B_OK)
2104 		return status;
2105 
2106 	cache->Lock();
2107 
2108 	VMArea* area;
2109 	virtual_address_restrictions addressRestrictions = {};
2110 	addressRestrictions.address = *_address;
2111 	addressRestrictions.address_specification = addressSpec;
2112 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2113 		0, protection, mapping,
2114 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2115 		&addressRestrictions, kernel, &area, _address);
2116 
2117 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2118 		// map_backing_store() cannot know we no longer need the ref
2119 		cache->ReleaseRefLocked();
2120 	}
2121 
2122 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2123 		pre_map_area_pages(area, cache, &reservation);
2124 
2125 	cache->Unlock();
2126 
2127 	if (status == B_OK) {
2128 		// TODO: this probably deserves a smarter solution, ie. don't always
2129 		// prefetch stuff, and also, probably don't trigger it at this place.
2130 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2131 			// prefetches at max 10 MB starting from "offset"
2132 	}
2133 
2134 	if (status != B_OK)
2135 		return status;
2136 
2137 	area->cache_type = CACHE_TYPE_VNODE;
2138 	return area->id;
2139 }
2140 
2141 
2142 area_id
2143 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2144 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2145 	int fd, off_t offset)
2146 {
2147 	if (!arch_vm_supports_protection(protection))
2148 		return B_NOT_SUPPORTED;
2149 
2150 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2151 		mapping, unmapAddressRange, fd, offset, true);
2152 }
2153 
2154 
2155 VMCache*
2156 vm_area_get_locked_cache(VMArea* area)
2157 {
2158 	rw_lock_read_lock(&sAreaCacheLock);
2159 
2160 	while (true) {
2161 		VMCache* cache = area->cache;
2162 
2163 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2164 			// cache has been deleted
2165 			rw_lock_read_lock(&sAreaCacheLock);
2166 			continue;
2167 		}
2168 
2169 		rw_lock_read_lock(&sAreaCacheLock);
2170 
2171 		if (cache == area->cache) {
2172 			cache->AcquireRefLocked();
2173 			rw_lock_read_unlock(&sAreaCacheLock);
2174 			return cache;
2175 		}
2176 
2177 		// the cache changed in the meantime
2178 		cache->Unlock();
2179 	}
2180 }
2181 
2182 
2183 void
2184 vm_area_put_locked_cache(VMCache* cache)
2185 {
2186 	cache->ReleaseRefAndUnlock();
2187 }
2188 
2189 
2190 area_id
2191 vm_clone_area(team_id team, const char* name, void** address,
2192 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2193 	bool kernel)
2194 {
2195 	VMArea* newArea = NULL;
2196 	VMArea* sourceArea;
2197 
2198 	// Check whether the source area exists and is cloneable. If so, mark it
2199 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2200 	{
2201 		AddressSpaceWriteLocker locker;
2202 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2203 		if (status != B_OK)
2204 			return status;
2205 
2206 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2207 			return B_NOT_ALLOWED;
2208 
2209 		sourceArea->protection |= B_SHARED_AREA;
2210 		protection |= B_SHARED_AREA;
2211 	}
2212 
2213 	// Now lock both address spaces and actually do the cloning.
2214 
2215 	MultiAddressSpaceLocker locker;
2216 	VMAddressSpace* sourceAddressSpace;
2217 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2218 	if (status != B_OK)
2219 		return status;
2220 
2221 	VMAddressSpace* targetAddressSpace;
2222 	status = locker.AddTeam(team, true, &targetAddressSpace);
2223 	if (status != B_OK)
2224 		return status;
2225 
2226 	status = locker.Lock();
2227 	if (status != B_OK)
2228 		return status;
2229 
2230 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2231 	if (sourceArea == NULL)
2232 		return B_BAD_VALUE;
2233 
2234 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2235 		return B_NOT_ALLOWED;
2236 
2237 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2238 
2239 	if (!kernel && sourceAddressSpace != targetAddressSpace
2240 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2241 #if KDEBUG
2242 		Team* team = thread_get_current_thread()->team;
2243 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2244 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2245 #endif
2246 		status = B_NOT_ALLOWED;
2247 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2248 		status = B_NOT_ALLOWED;
2249 	} else {
2250 		virtual_address_restrictions addressRestrictions = {};
2251 		addressRestrictions.address = *address;
2252 		addressRestrictions.address_specification = addressSpec;
2253 		status = map_backing_store(targetAddressSpace, cache,
2254 			sourceArea->cache_offset, name, sourceArea->Size(),
2255 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2256 			kernel, &newArea, address);
2257 	}
2258 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2259 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2260 		// to create a new cache, and has therefore already acquired a reference
2261 		// to the source cache - but otherwise it has no idea that we need
2262 		// one.
2263 		cache->AcquireRefLocked();
2264 	}
2265 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2266 		// we need to map in everything at this point
2267 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2268 			// we don't have actual pages to map but a physical area
2269 			VMTranslationMap* map
2270 				= sourceArea->address_space->TranslationMap();
2271 			map->Lock();
2272 
2273 			phys_addr_t physicalAddress;
2274 			uint32 oldProtection;
2275 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2276 
2277 			map->Unlock();
2278 
2279 			map = targetAddressSpace->TranslationMap();
2280 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2281 				newArea->Base() + (newArea->Size() - 1));
2282 
2283 			vm_page_reservation reservation;
2284 			vm_page_reserve_pages(&reservation, reservePages,
2285 				targetAddressSpace == VMAddressSpace::Kernel()
2286 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2287 			map->Lock();
2288 
2289 			for (addr_t offset = 0; offset < newArea->Size();
2290 					offset += B_PAGE_SIZE) {
2291 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2292 					protection, newArea->MemoryType(), &reservation);
2293 			}
2294 
2295 			map->Unlock();
2296 			vm_page_unreserve_pages(&reservation);
2297 		} else {
2298 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2299 			size_t reservePages = map->MaxPagesNeededToMap(
2300 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2301 			vm_page_reservation reservation;
2302 			vm_page_reserve_pages(&reservation, reservePages,
2303 				targetAddressSpace == VMAddressSpace::Kernel()
2304 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2305 
2306 			// map in all pages from source
2307 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2308 					vm_page* page  = it.Next();) {
2309 				if (!page->busy) {
2310 					DEBUG_PAGE_ACCESS_START(page);
2311 					map_page(newArea, page,
2312 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2313 							- newArea->cache_offset),
2314 						protection, &reservation);
2315 					DEBUG_PAGE_ACCESS_END(page);
2316 				}
2317 			}
2318 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2319 			// ensuring that!
2320 
2321 			vm_page_unreserve_pages(&reservation);
2322 		}
2323 	}
2324 	if (status == B_OK)
2325 		newArea->cache_type = sourceArea->cache_type;
2326 
2327 	vm_area_put_locked_cache(cache);
2328 
2329 	if (status < B_OK)
2330 		return status;
2331 
2332 	return newArea->id;
2333 }
2334 
2335 
2336 /*!	Deletes the specified area of the given address space.
2337 
2338 	The address space must be write-locked.
2339 	The caller must ensure that the area does not have any wired ranges.
2340 
2341 	\param addressSpace The address space containing the area.
2342 	\param area The area to be deleted.
2343 	\param deletingAddressSpace \c true, if the address space is in the process
2344 		of being deleted.
2345 */
2346 static void
2347 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2348 	bool deletingAddressSpace)
2349 {
2350 	ASSERT(!area->IsWired());
2351 
2352 	VMAreaHash::Remove(area);
2353 
2354 	// At this point the area is removed from the global hash table, but
2355 	// still exists in the area list.
2356 
2357 	// Unmap the virtual address space the area occupied.
2358 	{
2359 		// We need to lock the complete cache chain.
2360 		VMCache* topCache = vm_area_get_locked_cache(area);
2361 		VMCacheChainLocker cacheChainLocker(topCache);
2362 		cacheChainLocker.LockAllSourceCaches();
2363 
2364 		// If the area's top cache is a temporary cache and the area is the only
2365 		// one referencing it (besides us currently holding a second reference),
2366 		// the unmapping code doesn't need to care about preserving the accessed
2367 		// and dirty flags of the top cache page mappings.
2368 		bool ignoreTopCachePageFlags
2369 			= topCache->temporary && topCache->RefCount() == 2;
2370 
2371 		area->address_space->TranslationMap()->UnmapArea(area,
2372 			deletingAddressSpace, ignoreTopCachePageFlags);
2373 	}
2374 
2375 	if (!area->cache->temporary)
2376 		area->cache->WriteModified();
2377 
2378 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2379 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2380 
2381 	arch_vm_unset_memory_type(area);
2382 	addressSpace->RemoveArea(area, allocationFlags);
2383 	addressSpace->Put();
2384 
2385 	area->cache->RemoveArea(area);
2386 	area->cache->ReleaseRef();
2387 
2388 	addressSpace->DeleteArea(area, allocationFlags);
2389 }
2390 
2391 
2392 status_t
2393 vm_delete_area(team_id team, area_id id, bool kernel)
2394 {
2395 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2396 		team, id));
2397 
2398 	// lock the address space and make sure the area isn't wired
2399 	AddressSpaceWriteLocker locker;
2400 	VMArea* area;
2401 	AreaCacheLocker cacheLocker;
2402 
2403 	do {
2404 		status_t status = locker.SetFromArea(team, id, area);
2405 		if (status != B_OK)
2406 			return status;
2407 
2408 		cacheLocker.SetTo(area);
2409 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2410 
2411 	cacheLocker.Unlock();
2412 
2413 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2414 		return B_NOT_ALLOWED;
2415 
2416 	delete_area(locker.AddressSpace(), area, false);
2417 	return B_OK;
2418 }
2419 
2420 
2421 /*!	Creates a new cache on top of given cache, moves all areas from
2422 	the old cache to the new one, and changes the protection of all affected
2423 	areas' pages to read-only. If requested, wired pages are moved up to the
2424 	new cache and copies are added to the old cache in their place.
2425 	Preconditions:
2426 	- The given cache must be locked.
2427 	- All of the cache's areas' address spaces must be read locked.
2428 	- Either the cache must not have any wired ranges or a page reservation for
2429 	  all wired pages must be provided, so they can be copied.
2430 
2431 	\param lowerCache The cache on top of which a new cache shall be created.
2432 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2433 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2434 		has wired page. The wired pages are copied in this case.
2435 */
2436 static status_t
2437 vm_copy_on_write_area(VMCache* lowerCache,
2438 	vm_page_reservation* wiredPagesReservation)
2439 {
2440 	VMCache* upperCache;
2441 
2442 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2443 
2444 	// We need to separate the cache from its areas. The cache goes one level
2445 	// deeper and we create a new cache inbetween.
2446 
2447 	// create an anonymous cache
2448 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2449 		lowerCache->GuardSize() / B_PAGE_SIZE,
2450 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2451 		VM_PRIORITY_USER);
2452 	if (status != B_OK)
2453 		return status;
2454 
2455 	upperCache->Lock();
2456 
2457 	upperCache->temporary = 1;
2458 	upperCache->virtual_base = lowerCache->virtual_base;
2459 	upperCache->virtual_end = lowerCache->virtual_end;
2460 
2461 	// transfer the lower cache areas to the upper cache
2462 	rw_lock_write_lock(&sAreaCacheLock);
2463 	upperCache->TransferAreas(lowerCache);
2464 	rw_lock_write_unlock(&sAreaCacheLock);
2465 
2466 	lowerCache->AddConsumer(upperCache);
2467 
2468 	// We now need to remap all pages from all of the cache's areas read-only,
2469 	// so that a copy will be created on next write access. If there are wired
2470 	// pages, we keep their protection, move them to the upper cache and create
2471 	// copies for the lower cache.
2472 	if (wiredPagesReservation != NULL) {
2473 		// We need to handle wired pages -- iterate through the cache's pages.
2474 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2475 				vm_page* page = it.Next();) {
2476 			if (page->WiredCount() > 0) {
2477 				// allocate a new page and copy the wired one
2478 				vm_page* copiedPage = vm_page_allocate_page(
2479 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2480 
2481 				vm_memcpy_physical_page(
2482 					copiedPage->physical_page_number * B_PAGE_SIZE,
2483 					page->physical_page_number * B_PAGE_SIZE);
2484 
2485 				// move the wired page to the upper cache (note: removing is OK
2486 				// with the SplayTree iterator) and insert the copy
2487 				upperCache->MovePage(page);
2488 				lowerCache->InsertPage(copiedPage,
2489 					page->cache_offset * B_PAGE_SIZE);
2490 
2491 				DEBUG_PAGE_ACCESS_END(copiedPage);
2492 			} else {
2493 				// Change the protection of this page in all areas.
2494 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2495 						tempArea = tempArea->cache_next) {
2496 					// The area must be readable in the same way it was
2497 					// previously writable.
2498 					uint32 protection = B_KERNEL_READ_AREA;
2499 					if ((tempArea->protection & B_READ_AREA) != 0)
2500 						protection |= B_READ_AREA;
2501 
2502 					VMTranslationMap* map
2503 						= tempArea->address_space->TranslationMap();
2504 					map->Lock();
2505 					map->ProtectPage(tempArea,
2506 						virtual_page_address(tempArea, page), protection);
2507 					map->Unlock();
2508 				}
2509 			}
2510 		}
2511 	} else {
2512 		ASSERT(lowerCache->WiredPagesCount() == 0);
2513 
2514 		// just change the protection of all areas
2515 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2516 				tempArea = tempArea->cache_next) {
2517 			// The area must be readable in the same way it was previously
2518 			// writable.
2519 			uint32 protection = B_KERNEL_READ_AREA;
2520 			if ((tempArea->protection & B_READ_AREA) != 0)
2521 				protection |= B_READ_AREA;
2522 
2523 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2524 			map->Lock();
2525 			map->ProtectArea(tempArea, protection);
2526 			map->Unlock();
2527 		}
2528 	}
2529 
2530 	vm_area_put_locked_cache(upperCache);
2531 
2532 	return B_OK;
2533 }
2534 
2535 
2536 area_id
2537 vm_copy_area(team_id team, const char* name, void** _address,
2538 	uint32 addressSpec, area_id sourceID)
2539 {
2540 	// Do the locking: target address space, all address spaces associated with
2541 	// the source cache, and the cache itself.
2542 	MultiAddressSpaceLocker locker;
2543 	VMAddressSpace* targetAddressSpace;
2544 	VMCache* cache;
2545 	VMArea* source;
2546 	AreaCacheLocker cacheLocker;
2547 	status_t status;
2548 	bool sharedArea;
2549 
2550 	page_num_t wiredPages = 0;
2551 	vm_page_reservation wiredPagesReservation;
2552 
2553 	bool restart;
2554 	do {
2555 		restart = false;
2556 
2557 		locker.Unset();
2558 		status = locker.AddTeam(team, true, &targetAddressSpace);
2559 		if (status == B_OK) {
2560 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2561 				&cache);
2562 		}
2563 		if (status != B_OK)
2564 			return status;
2565 
2566 		cacheLocker.SetTo(cache, true);	// already locked
2567 
2568 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2569 
2570 		page_num_t oldWiredPages = wiredPages;
2571 		wiredPages = 0;
2572 
2573 		// If the source area isn't shared, count the number of wired pages in
2574 		// the cache and reserve as many pages.
2575 		if (!sharedArea) {
2576 			wiredPages = cache->WiredPagesCount();
2577 
2578 			if (wiredPages > oldWiredPages) {
2579 				cacheLocker.Unlock();
2580 				locker.Unlock();
2581 
2582 				if (oldWiredPages > 0)
2583 					vm_page_unreserve_pages(&wiredPagesReservation);
2584 
2585 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2586 					VM_PRIORITY_USER);
2587 
2588 				restart = true;
2589 			}
2590 		} else if (oldWiredPages > 0)
2591 			vm_page_unreserve_pages(&wiredPagesReservation);
2592 	} while (restart);
2593 
2594 	// unreserve pages later
2595 	struct PagesUnreserver {
2596 		PagesUnreserver(vm_page_reservation* reservation)
2597 			:
2598 			fReservation(reservation)
2599 		{
2600 		}
2601 
2602 		~PagesUnreserver()
2603 		{
2604 			if (fReservation != NULL)
2605 				vm_page_unreserve_pages(fReservation);
2606 		}
2607 
2608 	private:
2609 		vm_page_reservation*	fReservation;
2610 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2611 
2612 	bool writableCopy
2613 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2614 	uint8* targetPageProtections = NULL;
2615 
2616 	if (source->page_protections != NULL) {
2617 		size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2;
2618 		targetPageProtections = (uint8*)malloc_etc(bytes,
2619 			HEAP_DONT_LOCK_KERNEL_SPACE);
2620 		if (targetPageProtections == NULL)
2621 			return B_NO_MEMORY;
2622 
2623 		memcpy(targetPageProtections, source->page_protections, bytes);
2624 
2625 		if (!writableCopy) {
2626 			for (size_t i = 0; i < bytes; i++) {
2627 				if ((targetPageProtections[i]
2628 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2629 					writableCopy = true;
2630 					break;
2631 				}
2632 			}
2633 		}
2634 	}
2635 
2636 	if (addressSpec == B_CLONE_ADDRESS) {
2637 		addressSpec = B_EXACT_ADDRESS;
2638 		*_address = (void*)source->Base();
2639 	}
2640 
2641 	// First, create a cache on top of the source area, respectively use the
2642 	// existing one, if this is a shared area.
2643 
2644 	VMArea* target;
2645 	virtual_address_restrictions addressRestrictions = {};
2646 	addressRestrictions.address = *_address;
2647 	addressRestrictions.address_specification = addressSpec;
2648 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2649 		name, source->Size(), source->wiring, source->protection,
2650 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2651 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2652 		&addressRestrictions, true, &target, _address);
2653 	if (status < B_OK) {
2654 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2655 		return status;
2656 	}
2657 
2658 	if (targetPageProtections != NULL)
2659 		target->page_protections = targetPageProtections;
2660 
2661 	if (sharedArea) {
2662 		// The new area uses the old area's cache, but map_backing_store()
2663 		// hasn't acquired a ref. So we have to do that now.
2664 		cache->AcquireRefLocked();
2665 	}
2666 
2667 	// If the source area is writable, we need to move it one layer up as well
2668 
2669 	if (!sharedArea) {
2670 		if (writableCopy) {
2671 			// TODO: do something more useful if this fails!
2672 			if (vm_copy_on_write_area(cache,
2673 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2674 				panic("vm_copy_on_write_area() failed!\n");
2675 			}
2676 		}
2677 	}
2678 
2679 	// we return the ID of the newly created area
2680 	return target->id;
2681 }
2682 
2683 
2684 status_t
2685 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2686 	bool kernel)
2687 {
2688 	fix_protection(&newProtection);
2689 
2690 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2691 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2692 
2693 	if (!arch_vm_supports_protection(newProtection))
2694 		return B_NOT_SUPPORTED;
2695 
2696 	bool becomesWritable
2697 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2698 
2699 	// lock address spaces and cache
2700 	MultiAddressSpaceLocker locker;
2701 	VMCache* cache;
2702 	VMArea* area;
2703 	status_t status;
2704 	AreaCacheLocker cacheLocker;
2705 	bool isWritable;
2706 
2707 	bool restart;
2708 	do {
2709 		restart = false;
2710 
2711 		locker.Unset();
2712 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2713 		if (status != B_OK)
2714 			return status;
2715 
2716 		cacheLocker.SetTo(cache, true);	// already locked
2717 
2718 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2719 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2720 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2721 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2722 				" (%s)\n", team, newProtection, areaID, area->name);
2723 			return B_NOT_ALLOWED;
2724 		}
2725 
2726 		if (area->protection == newProtection)
2727 			return B_OK;
2728 
2729 		if (team != VMAddressSpace::KernelID()
2730 			&& area->address_space->ID() != team) {
2731 			// unless you're the kernel, you are only allowed to set
2732 			// the protection of your own areas
2733 			return B_NOT_ALLOWED;
2734 		}
2735 
2736 		isWritable
2737 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2738 
2739 		// Make sure the area (respectively, if we're going to call
2740 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2741 		// wired ranges.
2742 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2743 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2744 					otherArea = otherArea->cache_next) {
2745 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2746 					restart = true;
2747 					break;
2748 				}
2749 			}
2750 		} else {
2751 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2752 				restart = true;
2753 		}
2754 	} while (restart);
2755 
2756 	bool changePageProtection = true;
2757 	bool changeTopCachePagesOnly = false;
2758 
2759 	if (isWritable && !becomesWritable) {
2760 		// writable -> !writable
2761 
2762 		if (cache->source != NULL && cache->temporary) {
2763 			if (cache->CountWritableAreas(area) == 0) {
2764 				// Since this cache now lives from the pages in its source cache,
2765 				// we can change the cache's commitment to take only those pages
2766 				// into account that really are in this cache.
2767 
2768 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2769 					team == VMAddressSpace::KernelID()
2770 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2771 
2772 				// TODO: we may be able to join with our source cache, if
2773 				// count == 0
2774 			}
2775 		}
2776 
2777 		// If only the writability changes, we can just remap the pages of the
2778 		// top cache, since the pages of lower caches are mapped read-only
2779 		// anyway. That's advantageous only, if the number of pages in the cache
2780 		// is significantly smaller than the number of pages in the area,
2781 		// though.
2782 		if (newProtection
2783 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2784 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2785 			changeTopCachePagesOnly = true;
2786 		}
2787 	} else if (!isWritable && becomesWritable) {
2788 		// !writable -> writable
2789 
2790 		if (!cache->consumers.IsEmpty()) {
2791 			// There are consumers -- we have to insert a new cache. Fortunately
2792 			// vm_copy_on_write_area() does everything that's needed.
2793 			changePageProtection = false;
2794 			status = vm_copy_on_write_area(cache, NULL);
2795 		} else {
2796 			// No consumers, so we don't need to insert a new one.
2797 			if (cache->source != NULL && cache->temporary) {
2798 				// the cache's commitment must contain all possible pages
2799 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2800 					team == VMAddressSpace::KernelID()
2801 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2802 			}
2803 
2804 			if (status == B_OK && cache->source != NULL) {
2805 				// There's a source cache, hence we can't just change all pages'
2806 				// protection or we might allow writing into pages belonging to
2807 				// a lower cache.
2808 				changeTopCachePagesOnly = true;
2809 			}
2810 		}
2811 	} else {
2812 		// we don't have anything special to do in all other cases
2813 	}
2814 
2815 	if (status == B_OK) {
2816 		// remap existing pages in this cache
2817 		if (changePageProtection) {
2818 			VMTranslationMap* map = area->address_space->TranslationMap();
2819 			map->Lock();
2820 
2821 			if (changeTopCachePagesOnly) {
2822 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2823 				page_num_t lastPageOffset
2824 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2825 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2826 						vm_page* page = it.Next();) {
2827 					if (page->cache_offset >= firstPageOffset
2828 						&& page->cache_offset <= lastPageOffset) {
2829 						addr_t address = virtual_page_address(area, page);
2830 						map->ProtectPage(area, address, newProtection);
2831 					}
2832 				}
2833 			} else
2834 				map->ProtectArea(area, newProtection);
2835 
2836 			map->Unlock();
2837 		}
2838 
2839 		area->protection = newProtection;
2840 	}
2841 
2842 	return status;
2843 }
2844 
2845 
2846 status_t
2847 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2848 {
2849 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2850 	if (addressSpace == NULL)
2851 		return B_BAD_TEAM_ID;
2852 
2853 	VMTranslationMap* map = addressSpace->TranslationMap();
2854 
2855 	map->Lock();
2856 	uint32 dummyFlags;
2857 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2858 	map->Unlock();
2859 
2860 	addressSpace->Put();
2861 	return status;
2862 }
2863 
2864 
2865 /*!	The page's cache must be locked.
2866 */
2867 bool
2868 vm_test_map_modification(vm_page* page)
2869 {
2870 	if (page->modified)
2871 		return true;
2872 
2873 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2874 	vm_page_mapping* mapping;
2875 	while ((mapping = iterator.Next()) != NULL) {
2876 		VMArea* area = mapping->area;
2877 		VMTranslationMap* map = area->address_space->TranslationMap();
2878 
2879 		phys_addr_t physicalAddress;
2880 		uint32 flags;
2881 		map->Lock();
2882 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2883 		map->Unlock();
2884 
2885 		if ((flags & PAGE_MODIFIED) != 0)
2886 			return true;
2887 	}
2888 
2889 	return false;
2890 }
2891 
2892 
2893 /*!	The page's cache must be locked.
2894 */
2895 void
2896 vm_clear_map_flags(vm_page* page, uint32 flags)
2897 {
2898 	if ((flags & PAGE_ACCESSED) != 0)
2899 		page->accessed = false;
2900 	if ((flags & PAGE_MODIFIED) != 0)
2901 		page->modified = false;
2902 
2903 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2904 	vm_page_mapping* mapping;
2905 	while ((mapping = iterator.Next()) != NULL) {
2906 		VMArea* area = mapping->area;
2907 		VMTranslationMap* map = area->address_space->TranslationMap();
2908 
2909 		map->Lock();
2910 		map->ClearFlags(virtual_page_address(area, page), flags);
2911 		map->Unlock();
2912 	}
2913 }
2914 
2915 
2916 /*!	Removes all mappings from a page.
2917 	After you've called this function, the page is unmapped from memory and
2918 	the page's \c accessed and \c modified flags have been updated according
2919 	to the state of the mappings.
2920 	The page's cache must be locked.
2921 */
2922 void
2923 vm_remove_all_page_mappings(vm_page* page)
2924 {
2925 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2926 		VMArea* area = mapping->area;
2927 		VMTranslationMap* map = area->address_space->TranslationMap();
2928 		addr_t address = virtual_page_address(area, page);
2929 		map->UnmapPage(area, address, false);
2930 	}
2931 }
2932 
2933 
2934 int32
2935 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2936 {
2937 	int32 count = 0;
2938 
2939 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2940 	vm_page_mapping* mapping;
2941 	while ((mapping = iterator.Next()) != NULL) {
2942 		VMArea* area = mapping->area;
2943 		VMTranslationMap* map = area->address_space->TranslationMap();
2944 
2945 		bool modified;
2946 		if (map->ClearAccessedAndModified(area,
2947 				virtual_page_address(area, page), false, modified)) {
2948 			count++;
2949 		}
2950 
2951 		page->modified |= modified;
2952 	}
2953 
2954 
2955 	if (page->accessed) {
2956 		count++;
2957 		page->accessed = false;
2958 	}
2959 
2960 	return count;
2961 }
2962 
2963 
2964 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2965 	mappings.
2966 	The function iterates through the page mappings and removes them until
2967 	encountering one that has been accessed. From then on it will continue to
2968 	iterate, but only clear the accessed flag of the mapping. The page's
2969 	\c modified bit will be updated accordingly, the \c accessed bit will be
2970 	cleared.
2971 	\return The number of mapping accessed bits encountered, including the
2972 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2973 		of the page have been removed.
2974 */
2975 int32
2976 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2977 {
2978 	ASSERT(page->WiredCount() == 0);
2979 
2980 	if (page->accessed)
2981 		return vm_clear_page_mapping_accessed_flags(page);
2982 
2983 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2984 		VMArea* area = mapping->area;
2985 		VMTranslationMap* map = area->address_space->TranslationMap();
2986 		addr_t address = virtual_page_address(area, page);
2987 		bool modified = false;
2988 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2989 			page->accessed = true;
2990 			page->modified |= modified;
2991 			return vm_clear_page_mapping_accessed_flags(page);
2992 		}
2993 		page->modified |= modified;
2994 	}
2995 
2996 	return 0;
2997 }
2998 
2999 
3000 static int
3001 display_mem(int argc, char** argv)
3002 {
3003 	bool physical = false;
3004 	addr_t copyAddress;
3005 	int32 displayWidth;
3006 	int32 itemSize;
3007 	int32 num = -1;
3008 	addr_t address;
3009 	int i = 1, j;
3010 
3011 	if (argc > 1 && argv[1][0] == '-') {
3012 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3013 			physical = true;
3014 			i++;
3015 		} else
3016 			i = 99;
3017 	}
3018 
3019 	if (argc < i + 1 || argc > i + 2) {
3020 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3021 			"\tdl - 8 bytes\n"
3022 			"\tdw - 4 bytes\n"
3023 			"\tds - 2 bytes\n"
3024 			"\tdb - 1 byte\n"
3025 			"\tstring - a whole string\n"
3026 			"  -p or --physical only allows memory from a single page to be "
3027 			"displayed.\n");
3028 		return 0;
3029 	}
3030 
3031 	address = parse_expression(argv[i]);
3032 
3033 	if (argc > i + 1)
3034 		num = parse_expression(argv[i + 1]);
3035 
3036 	// build the format string
3037 	if (strcmp(argv[0], "db") == 0) {
3038 		itemSize = 1;
3039 		displayWidth = 16;
3040 	} else if (strcmp(argv[0], "ds") == 0) {
3041 		itemSize = 2;
3042 		displayWidth = 8;
3043 	} else if (strcmp(argv[0], "dw") == 0) {
3044 		itemSize = 4;
3045 		displayWidth = 4;
3046 	} else if (strcmp(argv[0], "dl") == 0) {
3047 		itemSize = 8;
3048 		displayWidth = 2;
3049 	} else if (strcmp(argv[0], "string") == 0) {
3050 		itemSize = 1;
3051 		displayWidth = -1;
3052 	} else {
3053 		kprintf("display_mem called in an invalid way!\n");
3054 		return 0;
3055 	}
3056 
3057 	if (num <= 0)
3058 		num = displayWidth;
3059 
3060 	void* physicalPageHandle = NULL;
3061 
3062 	if (physical) {
3063 		int32 offset = address & (B_PAGE_SIZE - 1);
3064 		if (num * itemSize + offset > B_PAGE_SIZE) {
3065 			num = (B_PAGE_SIZE - offset) / itemSize;
3066 			kprintf("NOTE: number of bytes has been cut to page size\n");
3067 		}
3068 
3069 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3070 
3071 		if (vm_get_physical_page_debug(address, &copyAddress,
3072 				&physicalPageHandle) != B_OK) {
3073 			kprintf("getting the hardware page failed.");
3074 			return 0;
3075 		}
3076 
3077 		address += offset;
3078 		copyAddress += offset;
3079 	} else
3080 		copyAddress = address;
3081 
3082 	if (!strcmp(argv[0], "string")) {
3083 		kprintf("%p \"", (char*)copyAddress);
3084 
3085 		// string mode
3086 		for (i = 0; true; i++) {
3087 			char c;
3088 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3089 					!= B_OK
3090 				|| c == '\0') {
3091 				break;
3092 			}
3093 
3094 			if (c == '\n')
3095 				kprintf("\\n");
3096 			else if (c == '\t')
3097 				kprintf("\\t");
3098 			else {
3099 				if (!isprint(c))
3100 					c = '.';
3101 
3102 				kprintf("%c", c);
3103 			}
3104 		}
3105 
3106 		kprintf("\"\n");
3107 	} else {
3108 		// number mode
3109 		for (i = 0; i < num; i++) {
3110 			uint64 value;
3111 
3112 			if ((i % displayWidth) == 0) {
3113 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3114 				if (i != 0)
3115 					kprintf("\n");
3116 
3117 				kprintf("[0x%lx]  ", address + i * itemSize);
3118 
3119 				for (j = 0; j < displayed; j++) {
3120 					char c;
3121 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3122 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3123 						displayed = j;
3124 						break;
3125 					}
3126 					if (!isprint(c))
3127 						c = '.';
3128 
3129 					kprintf("%c", c);
3130 				}
3131 				if (num > displayWidth) {
3132 					// make sure the spacing in the last line is correct
3133 					for (j = displayed; j < displayWidth * itemSize; j++)
3134 						kprintf(" ");
3135 				}
3136 				kprintf("  ");
3137 			}
3138 
3139 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3140 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3141 				kprintf("read fault");
3142 				break;
3143 			}
3144 
3145 			switch (itemSize) {
3146 				case 1:
3147 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3148 					break;
3149 				case 2:
3150 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3151 					break;
3152 				case 4:
3153 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3154 					break;
3155 				case 8:
3156 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3157 					break;
3158 			}
3159 		}
3160 
3161 		kprintf("\n");
3162 	}
3163 
3164 	if (physical) {
3165 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3166 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3167 	}
3168 	return 0;
3169 }
3170 
3171 
3172 static void
3173 dump_cache_tree_recursively(VMCache* cache, int level,
3174 	VMCache* highlightCache)
3175 {
3176 	// print this cache
3177 	for (int i = 0; i < level; i++)
3178 		kprintf("  ");
3179 	if (cache == highlightCache)
3180 		kprintf("%p <--\n", cache);
3181 	else
3182 		kprintf("%p\n", cache);
3183 
3184 	// recursively print its consumers
3185 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3186 			VMCache* consumer = it.Next();) {
3187 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3188 	}
3189 }
3190 
3191 
3192 static int
3193 dump_cache_tree(int argc, char** argv)
3194 {
3195 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3196 		kprintf("usage: %s <address>\n", argv[0]);
3197 		return 0;
3198 	}
3199 
3200 	addr_t address = parse_expression(argv[1]);
3201 	if (address == 0)
3202 		return 0;
3203 
3204 	VMCache* cache = (VMCache*)address;
3205 	VMCache* root = cache;
3206 
3207 	// find the root cache (the transitive source)
3208 	while (root->source != NULL)
3209 		root = root->source;
3210 
3211 	dump_cache_tree_recursively(root, 0, cache);
3212 
3213 	return 0;
3214 }
3215 
3216 
3217 const char*
3218 vm_cache_type_to_string(int32 type)
3219 {
3220 	switch (type) {
3221 		case CACHE_TYPE_RAM:
3222 			return "RAM";
3223 		case CACHE_TYPE_DEVICE:
3224 			return "device";
3225 		case CACHE_TYPE_VNODE:
3226 			return "vnode";
3227 		case CACHE_TYPE_NULL:
3228 			return "null";
3229 
3230 		default:
3231 			return "unknown";
3232 	}
3233 }
3234 
3235 
3236 #if DEBUG_CACHE_LIST
3237 
3238 static void
3239 update_cache_info_recursively(VMCache* cache, cache_info& info)
3240 {
3241 	info.page_count += cache->page_count;
3242 	if (cache->type == CACHE_TYPE_RAM)
3243 		info.committed += cache->committed_size;
3244 
3245 	// recurse
3246 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3247 			VMCache* consumer = it.Next();) {
3248 		update_cache_info_recursively(consumer, info);
3249 	}
3250 }
3251 
3252 
3253 static int
3254 cache_info_compare_page_count(const void* _a, const void* _b)
3255 {
3256 	const cache_info* a = (const cache_info*)_a;
3257 	const cache_info* b = (const cache_info*)_b;
3258 	if (a->page_count == b->page_count)
3259 		return 0;
3260 	return a->page_count < b->page_count ? 1 : -1;
3261 }
3262 
3263 
3264 static int
3265 cache_info_compare_committed(const void* _a, const void* _b)
3266 {
3267 	const cache_info* a = (const cache_info*)_a;
3268 	const cache_info* b = (const cache_info*)_b;
3269 	if (a->committed == b->committed)
3270 		return 0;
3271 	return a->committed < b->committed ? 1 : -1;
3272 }
3273 
3274 
3275 static void
3276 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3277 {
3278 	for (int i = 0; i < level; i++)
3279 		kprintf("  ");
3280 
3281 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3282 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3283 		cache->virtual_base, cache->virtual_end, cache->page_count);
3284 
3285 	if (level == 0)
3286 		kprintf("/%lu", info.page_count);
3287 
3288 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3289 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3290 
3291 		if (level == 0)
3292 			kprintf("/%lu", info.committed);
3293 	}
3294 
3295 	// areas
3296 	if (cache->areas != NULL) {
3297 		VMArea* area = cache->areas;
3298 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3299 			area->name, area->address_space->ID());
3300 
3301 		while (area->cache_next != NULL) {
3302 			area = area->cache_next;
3303 			kprintf(", %" B_PRId32, area->id);
3304 		}
3305 	}
3306 
3307 	kputs("\n");
3308 
3309 	// recurse
3310 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3311 			VMCache* consumer = it.Next();) {
3312 		dump_caches_recursively(consumer, info, level + 1);
3313 	}
3314 }
3315 
3316 
3317 static int
3318 dump_caches(int argc, char** argv)
3319 {
3320 	if (sCacheInfoTable == NULL) {
3321 		kprintf("No cache info table!\n");
3322 		return 0;
3323 	}
3324 
3325 	bool sortByPageCount = true;
3326 
3327 	for (int32 i = 1; i < argc; i++) {
3328 		if (strcmp(argv[i], "-c") == 0) {
3329 			sortByPageCount = false;
3330 		} else {
3331 			print_debugger_command_usage(argv[0]);
3332 			return 0;
3333 		}
3334 	}
3335 
3336 	uint32 totalCount = 0;
3337 	uint32 rootCount = 0;
3338 	off_t totalCommitted = 0;
3339 	page_num_t totalPages = 0;
3340 
3341 	VMCache* cache = gDebugCacheList;
3342 	while (cache) {
3343 		totalCount++;
3344 		if (cache->source == NULL) {
3345 			cache_info stackInfo;
3346 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3347 				? sCacheInfoTable[rootCount] : stackInfo;
3348 			rootCount++;
3349 			info.cache = cache;
3350 			info.page_count = 0;
3351 			info.committed = 0;
3352 			update_cache_info_recursively(cache, info);
3353 			totalCommitted += info.committed;
3354 			totalPages += info.page_count;
3355 		}
3356 
3357 		cache = cache->debug_next;
3358 	}
3359 
3360 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3361 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3362 			sortByPageCount
3363 				? &cache_info_compare_page_count
3364 				: &cache_info_compare_committed);
3365 	}
3366 
3367 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3368 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3369 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3370 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3371 			"page count" : "committed size");
3372 
3373 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3374 		for (uint32 i = 0; i < rootCount; i++) {
3375 			cache_info& info = sCacheInfoTable[i];
3376 			dump_caches_recursively(info.cache, info, 0);
3377 		}
3378 	} else
3379 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3380 
3381 	return 0;
3382 }
3383 
3384 #endif	// DEBUG_CACHE_LIST
3385 
3386 
3387 static int
3388 dump_cache(int argc, char** argv)
3389 {
3390 	VMCache* cache;
3391 	bool showPages = false;
3392 	int i = 1;
3393 
3394 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3395 		kprintf("usage: %s [-ps] <address>\n"
3396 			"  if -p is specified, all pages are shown, if -s is used\n"
3397 			"  only the cache info is shown respectively.\n", argv[0]);
3398 		return 0;
3399 	}
3400 	while (argv[i][0] == '-') {
3401 		char* arg = argv[i] + 1;
3402 		while (arg[0]) {
3403 			if (arg[0] == 'p')
3404 				showPages = true;
3405 			arg++;
3406 		}
3407 		i++;
3408 	}
3409 	if (argv[i] == NULL) {
3410 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3411 		return 0;
3412 	}
3413 
3414 	addr_t address = parse_expression(argv[i]);
3415 	if (address == 0)
3416 		return 0;
3417 
3418 	cache = (VMCache*)address;
3419 
3420 	cache->Dump(showPages);
3421 
3422 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3423 
3424 	return 0;
3425 }
3426 
3427 
3428 static void
3429 dump_area_struct(VMArea* area, bool mappings)
3430 {
3431 	kprintf("AREA: %p\n", area);
3432 	kprintf("name:\t\t'%s'\n", area->name);
3433 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3434 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3435 	kprintf("base:\t\t0x%lx\n", area->Base());
3436 	kprintf("size:\t\t0x%lx\n", area->Size());
3437 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3438 	kprintf("page_protection:%p\n", area->page_protections);
3439 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3440 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3441 	kprintf("cache:\t\t%p\n", area->cache);
3442 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3443 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3444 	kprintf("cache_next:\t%p\n", area->cache_next);
3445 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3446 
3447 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3448 	if (mappings) {
3449 		kprintf("page mappings:\n");
3450 		while (iterator.HasNext()) {
3451 			vm_page_mapping* mapping = iterator.Next();
3452 			kprintf("  %p", mapping->page);
3453 		}
3454 		kprintf("\n");
3455 	} else {
3456 		uint32 count = 0;
3457 		while (iterator.Next() != NULL) {
3458 			count++;
3459 		}
3460 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3461 	}
3462 }
3463 
3464 
3465 static int
3466 dump_area(int argc, char** argv)
3467 {
3468 	bool mappings = false;
3469 	bool found = false;
3470 	int32 index = 1;
3471 	VMArea* area;
3472 	addr_t num;
3473 
3474 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3475 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3476 			"All areas matching either id/address/name are listed. You can\n"
3477 			"force to check only a specific item by prefixing the specifier\n"
3478 			"with the id/contains/address/name keywords.\n"
3479 			"-m shows the area's mappings as well.\n");
3480 		return 0;
3481 	}
3482 
3483 	if (!strcmp(argv[1], "-m")) {
3484 		mappings = true;
3485 		index++;
3486 	}
3487 
3488 	int32 mode = 0xf;
3489 	if (!strcmp(argv[index], "id"))
3490 		mode = 1;
3491 	else if (!strcmp(argv[index], "contains"))
3492 		mode = 2;
3493 	else if (!strcmp(argv[index], "name"))
3494 		mode = 4;
3495 	else if (!strcmp(argv[index], "address"))
3496 		mode = 0;
3497 	if (mode != 0xf)
3498 		index++;
3499 
3500 	if (index >= argc) {
3501 		kprintf("No area specifier given.\n");
3502 		return 0;
3503 	}
3504 
3505 	num = parse_expression(argv[index]);
3506 
3507 	if (mode == 0) {
3508 		dump_area_struct((struct VMArea*)num, mappings);
3509 	} else {
3510 		// walk through the area list, looking for the arguments as a name
3511 
3512 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3513 		while ((area = it.Next()) != NULL) {
3514 			if (((mode & 4) != 0
3515 					&& !strcmp(argv[index], area->name))
3516 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3517 					|| (((mode & 2) != 0 && area->Base() <= num
3518 						&& area->Base() + area->Size() > num))))) {
3519 				dump_area_struct(area, mappings);
3520 				found = true;
3521 			}
3522 		}
3523 
3524 		if (!found)
3525 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3526 	}
3527 
3528 	return 0;
3529 }
3530 
3531 
3532 static int
3533 dump_area_list(int argc, char** argv)
3534 {
3535 	VMArea* area;
3536 	const char* name = NULL;
3537 	int32 id = 0;
3538 
3539 	if (argc > 1) {
3540 		id = parse_expression(argv[1]);
3541 		if (id == 0)
3542 			name = argv[1];
3543 	}
3544 
3545 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3546 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3547 		B_PRINTF_POINTER_WIDTH, "size");
3548 
3549 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3550 	while ((area = it.Next()) != NULL) {
3551 		if ((id != 0 && area->address_space->ID() != id)
3552 			|| (name != NULL && strstr(area->name, name) == NULL))
3553 			continue;
3554 
3555 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3556 			area->id, (void*)area->Base(), (void*)area->Size(),
3557 			area->protection, area->wiring, area->name);
3558 	}
3559 	return 0;
3560 }
3561 
3562 
3563 static int
3564 dump_available_memory(int argc, char** argv)
3565 {
3566 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3567 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3568 	return 0;
3569 }
3570 
3571 
3572 static int
3573 dump_mapping_info(int argc, char** argv)
3574 {
3575 	bool reverseLookup = false;
3576 	bool pageLookup = false;
3577 
3578 	int argi = 1;
3579 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3580 		const char* arg = argv[argi];
3581 		if (strcmp(arg, "-r") == 0) {
3582 			reverseLookup = true;
3583 		} else if (strcmp(arg, "-p") == 0) {
3584 			reverseLookup = true;
3585 			pageLookup = true;
3586 		} else {
3587 			print_debugger_command_usage(argv[0]);
3588 			return 0;
3589 		}
3590 	}
3591 
3592 	// We need at least one argument, the address. Optionally a thread ID can be
3593 	// specified.
3594 	if (argi >= argc || argi + 2 < argc) {
3595 		print_debugger_command_usage(argv[0]);
3596 		return 0;
3597 	}
3598 
3599 	uint64 addressValue;
3600 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3601 		return 0;
3602 
3603 	Team* team = NULL;
3604 	if (argi < argc) {
3605 		uint64 threadID;
3606 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3607 			return 0;
3608 
3609 		Thread* thread = Thread::GetDebug(threadID);
3610 		if (thread == NULL) {
3611 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3612 			return 0;
3613 		}
3614 
3615 		team = thread->team;
3616 	}
3617 
3618 	if (reverseLookup) {
3619 		phys_addr_t physicalAddress;
3620 		if (pageLookup) {
3621 			vm_page* page = (vm_page*)(addr_t)addressValue;
3622 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3623 		} else {
3624 			physicalAddress = (phys_addr_t)addressValue;
3625 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3626 		}
3627 
3628 		kprintf("    Team     Virtual Address      Area\n");
3629 		kprintf("--------------------------------------\n");
3630 
3631 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3632 			Callback()
3633 				:
3634 				fAddressSpace(NULL)
3635 			{
3636 			}
3637 
3638 			void SetAddressSpace(VMAddressSpace* addressSpace)
3639 			{
3640 				fAddressSpace = addressSpace;
3641 			}
3642 
3643 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3644 			{
3645 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3646 					virtualAddress);
3647 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3648 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3649 				else
3650 					kprintf("\n");
3651 				return false;
3652 			}
3653 
3654 		private:
3655 			VMAddressSpace*	fAddressSpace;
3656 		} callback;
3657 
3658 		if (team != NULL) {
3659 			// team specified -- get its address space
3660 			VMAddressSpace* addressSpace = team->address_space;
3661 			if (addressSpace == NULL) {
3662 				kprintf("Failed to get address space!\n");
3663 				return 0;
3664 			}
3665 
3666 			callback.SetAddressSpace(addressSpace);
3667 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3668 				physicalAddress, callback);
3669 		} else {
3670 			// no team specified -- iterate through all address spaces
3671 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3672 				addressSpace != NULL;
3673 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3674 				callback.SetAddressSpace(addressSpace);
3675 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3676 					physicalAddress, callback);
3677 			}
3678 		}
3679 	} else {
3680 		// get the address space
3681 		addr_t virtualAddress = (addr_t)addressValue;
3682 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3683 		VMAddressSpace* addressSpace;
3684 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3685 			addressSpace = VMAddressSpace::Kernel();
3686 		} else if (team != NULL) {
3687 			addressSpace = team->address_space;
3688 		} else {
3689 			Thread* thread = debug_get_debugged_thread();
3690 			if (thread == NULL || thread->team == NULL) {
3691 				kprintf("Failed to get team!\n");
3692 				return 0;
3693 			}
3694 
3695 			addressSpace = thread->team->address_space;
3696 		}
3697 
3698 		if (addressSpace == NULL) {
3699 			kprintf("Failed to get address space!\n");
3700 			return 0;
3701 		}
3702 
3703 		// let the translation map implementation do the job
3704 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3705 	}
3706 
3707 	return 0;
3708 }
3709 
3710 
3711 /*!	Deletes all areas and reserved regions in the given address space.
3712 
3713 	The caller must ensure that none of the areas has any wired ranges.
3714 
3715 	\param addressSpace The address space.
3716 	\param deletingAddressSpace \c true, if the address space is in the process
3717 		of being deleted.
3718 */
3719 void
3720 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3721 {
3722 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3723 		addressSpace->ID()));
3724 
3725 	addressSpace->WriteLock();
3726 
3727 	// remove all reserved areas in this address space
3728 	addressSpace->UnreserveAllAddressRanges(0);
3729 
3730 	// delete all the areas in this address space
3731 	while (VMArea* area = addressSpace->FirstArea()) {
3732 		ASSERT(!area->IsWired());
3733 		delete_area(addressSpace, area, deletingAddressSpace);
3734 	}
3735 
3736 	addressSpace->WriteUnlock();
3737 }
3738 
3739 
3740 static area_id
3741 vm_area_for(addr_t address, bool kernel)
3742 {
3743 	team_id team;
3744 	if (IS_USER_ADDRESS(address)) {
3745 		// we try the user team address space, if any
3746 		team = VMAddressSpace::CurrentID();
3747 		if (team < 0)
3748 			return team;
3749 	} else
3750 		team = VMAddressSpace::KernelID();
3751 
3752 	AddressSpaceReadLocker locker(team);
3753 	if (!locker.IsLocked())
3754 		return B_BAD_TEAM_ID;
3755 
3756 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3757 	if (area != NULL) {
3758 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3759 			return B_ERROR;
3760 
3761 		return area->id;
3762 	}
3763 
3764 	return B_ERROR;
3765 }
3766 
3767 
3768 /*!	Frees physical pages that were used during the boot process.
3769 	\a end is inclusive.
3770 */
3771 static void
3772 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3773 {
3774 	// free all physical pages in the specified range
3775 
3776 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3777 		phys_addr_t physicalAddress;
3778 		uint32 flags;
3779 
3780 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3781 			&& (flags & PAGE_PRESENT) != 0) {
3782 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3783 			if (page != NULL && page->State() != PAGE_STATE_FREE
3784 					&& page->State() != PAGE_STATE_CLEAR
3785 					&& page->State() != PAGE_STATE_UNUSED) {
3786 				DEBUG_PAGE_ACCESS_START(page);
3787 				vm_page_set_state(page, PAGE_STATE_FREE);
3788 			}
3789 		}
3790 	}
3791 
3792 	// unmap the memory
3793 	map->Unmap(start, end);
3794 }
3795 
3796 
3797 void
3798 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3799 {
3800 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3801 	addr_t end = start + (size - 1);
3802 	addr_t lastEnd = start;
3803 
3804 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3805 		(void*)start, (void*)end));
3806 
3807 	// The areas are sorted in virtual address space order, so
3808 	// we just have to find the holes between them that fall
3809 	// into the area we should dispose
3810 
3811 	map->Lock();
3812 
3813 	for (VMAddressSpace::AreaIterator it
3814 				= VMAddressSpace::Kernel()->GetAreaIterator();
3815 			VMArea* area = it.Next();) {
3816 		addr_t areaStart = area->Base();
3817 		addr_t areaEnd = areaStart + (area->Size() - 1);
3818 
3819 		if (areaEnd < start)
3820 			continue;
3821 
3822 		if (areaStart > end) {
3823 			// we are done, the area is already beyond of what we have to free
3824 			break;
3825 		}
3826 
3827 		if (areaStart > lastEnd) {
3828 			// this is something we can free
3829 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3830 				(void*)areaStart));
3831 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3832 		}
3833 
3834 		if (areaEnd >= end) {
3835 			lastEnd = areaEnd;
3836 				// no +1 to prevent potential overflow
3837 			break;
3838 		}
3839 
3840 		lastEnd = areaEnd + 1;
3841 	}
3842 
3843 	if (lastEnd < end) {
3844 		// we can also get rid of some space at the end of the area
3845 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3846 			(void*)end));
3847 		unmap_and_free_physical_pages(map, lastEnd, end);
3848 	}
3849 
3850 	map->Unlock();
3851 }
3852 
3853 
3854 static void
3855 create_preloaded_image_areas(struct preloaded_image* _image)
3856 {
3857 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3858 	char name[B_OS_NAME_LENGTH];
3859 	void* address;
3860 	int32 length;
3861 
3862 	// use file name to create a good area name
3863 	char* fileName = strrchr(image->name, '/');
3864 	if (fileName == NULL)
3865 		fileName = image->name;
3866 	else
3867 		fileName++;
3868 
3869 	length = strlen(fileName);
3870 	// make sure there is enough space for the suffix
3871 	if (length > 25)
3872 		length = 25;
3873 
3874 	memcpy(name, fileName, length);
3875 	strcpy(name + length, "_text");
3876 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3877 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3878 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3879 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3880 		// this will later be remapped read-only/executable by the
3881 		// ELF initialization code
3882 
3883 	strcpy(name + length, "_data");
3884 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3885 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3886 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3887 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3888 }
3889 
3890 
3891 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3892 	Any boot loader resources contained in that arguments must not be accessed
3893 	anymore past this point.
3894 */
3895 void
3896 vm_free_kernel_args(kernel_args* args)
3897 {
3898 	uint32 i;
3899 
3900 	TRACE(("vm_free_kernel_args()\n"));
3901 
3902 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3903 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3904 		if (area >= B_OK)
3905 			delete_area(area);
3906 	}
3907 }
3908 
3909 
3910 static void
3911 allocate_kernel_args(kernel_args* args)
3912 {
3913 	TRACE(("allocate_kernel_args()\n"));
3914 
3915 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3916 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3917 
3918 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3919 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3920 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3921 	}
3922 }
3923 
3924 
3925 static void
3926 unreserve_boot_loader_ranges(kernel_args* args)
3927 {
3928 	TRACE(("unreserve_boot_loader_ranges()\n"));
3929 
3930 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3931 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3932 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3933 			args->virtual_allocated_range[i].size);
3934 	}
3935 }
3936 
3937 
3938 static void
3939 reserve_boot_loader_ranges(kernel_args* args)
3940 {
3941 	TRACE(("reserve_boot_loader_ranges()\n"));
3942 
3943 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3944 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3945 
3946 		// If the address is no kernel address, we just skip it. The
3947 		// architecture specific code has to deal with it.
3948 		if (!IS_KERNEL_ADDRESS(address)) {
3949 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3950 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3951 			continue;
3952 		}
3953 
3954 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3955 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3956 		if (status < B_OK)
3957 			panic("could not reserve boot loader ranges\n");
3958 	}
3959 }
3960 
3961 
3962 static addr_t
3963 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3964 {
3965 	size = PAGE_ALIGN(size);
3966 
3967 	// find a slot in the virtual allocation addr range
3968 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3969 		// check to see if the space between this one and the last is big enough
3970 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3971 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3972 			+ args->virtual_allocated_range[i - 1].size;
3973 
3974 		addr_t base = alignment > 0
3975 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3976 
3977 		if (base >= KERNEL_BASE && base < rangeStart
3978 				&& rangeStart - base >= size) {
3979 			args->virtual_allocated_range[i - 1].size
3980 				+= base + size - previousRangeEnd;
3981 			return base;
3982 		}
3983 	}
3984 
3985 	// we hadn't found one between allocation ranges. this is ok.
3986 	// see if there's a gap after the last one
3987 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3988 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3989 		+ args->virtual_allocated_range[lastEntryIndex].size;
3990 	addr_t base = alignment > 0
3991 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3992 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3993 		args->virtual_allocated_range[lastEntryIndex].size
3994 			+= base + size - lastRangeEnd;
3995 		return base;
3996 	}
3997 
3998 	// see if there's a gap before the first one
3999 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4000 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4001 		base = rangeStart - size;
4002 		if (alignment > 0)
4003 			base = ROUNDDOWN(base, alignment);
4004 
4005 		if (base >= KERNEL_BASE) {
4006 			args->virtual_allocated_range[0].start = base;
4007 			args->virtual_allocated_range[0].size += rangeStart - base;
4008 			return base;
4009 		}
4010 	}
4011 
4012 	return 0;
4013 }
4014 
4015 
4016 static bool
4017 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4018 {
4019 	// TODO: horrible brute-force method of determining if the page can be
4020 	// allocated
4021 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4022 		if (address >= args->physical_memory_range[i].start
4023 			&& address < args->physical_memory_range[i].start
4024 				+ args->physical_memory_range[i].size)
4025 			return true;
4026 	}
4027 	return false;
4028 }
4029 
4030 
4031 page_num_t
4032 vm_allocate_early_physical_page(kernel_args* args)
4033 {
4034 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4035 		phys_addr_t nextPage;
4036 
4037 		nextPage = args->physical_allocated_range[i].start
4038 			+ args->physical_allocated_range[i].size;
4039 		// see if the page after the next allocated paddr run can be allocated
4040 		if (i + 1 < args->num_physical_allocated_ranges
4041 			&& args->physical_allocated_range[i + 1].size != 0) {
4042 			// see if the next page will collide with the next allocated range
4043 			if (nextPage >= args->physical_allocated_range[i+1].start)
4044 				continue;
4045 		}
4046 		// see if the next physical page fits in the memory block
4047 		if (is_page_in_physical_memory_range(args, nextPage)) {
4048 			// we got one!
4049 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4050 			return nextPage / B_PAGE_SIZE;
4051 		}
4052 	}
4053 
4054 	// Expanding upwards didn't work, try going downwards.
4055 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4056 		phys_addr_t nextPage;
4057 
4058 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4059 		// see if the page after the prev allocated paddr run can be allocated
4060 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4061 			// see if the next page will collide with the next allocated range
4062 			if (nextPage < args->physical_allocated_range[i-1].start
4063 				+ args->physical_allocated_range[i-1].size)
4064 				continue;
4065 		}
4066 		// see if the next physical page fits in the memory block
4067 		if (is_page_in_physical_memory_range(args, nextPage)) {
4068 			// we got one!
4069 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4070 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4071 			return nextPage / B_PAGE_SIZE;
4072 		}
4073 	}
4074 
4075 	return 0;
4076 		// could not allocate a block
4077 }
4078 
4079 
4080 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4081 	allocate some pages before the VM is completely up.
4082 */
4083 addr_t
4084 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4085 	uint32 attributes, addr_t alignment)
4086 {
4087 	if (physicalSize > virtualSize)
4088 		physicalSize = virtualSize;
4089 
4090 	// find the vaddr to allocate at
4091 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4092 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4093 	if (virtualBase == 0) {
4094 		panic("vm_allocate_early: could not allocate virtual address\n");
4095 		return 0;
4096 	}
4097 
4098 	// map the pages
4099 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4100 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4101 		if (physicalAddress == 0)
4102 			panic("error allocating early page!\n");
4103 
4104 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4105 
4106 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4107 			physicalAddress * B_PAGE_SIZE, attributes,
4108 			&vm_allocate_early_physical_page);
4109 	}
4110 
4111 	return virtualBase;
4112 }
4113 
4114 
4115 /*!	The main entrance point to initialize the VM. */
4116 status_t
4117 vm_init(kernel_args* args)
4118 {
4119 	struct preloaded_image* image;
4120 	void* address;
4121 	status_t err = 0;
4122 	uint32 i;
4123 
4124 	TRACE(("vm_init: entry\n"));
4125 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4126 	err = arch_vm_init(args);
4127 
4128 	// initialize some globals
4129 	vm_page_init_num_pages(args);
4130 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4131 
4132 	slab_init(args);
4133 
4134 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4135 	off_t heapSize = INITIAL_HEAP_SIZE;
4136 	// try to accomodate low memory systems
4137 	while (heapSize > sAvailableMemory / 8)
4138 		heapSize /= 2;
4139 	if (heapSize < 1024 * 1024)
4140 		panic("vm_init: go buy some RAM please.");
4141 
4142 	// map in the new heap and initialize it
4143 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4144 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4145 	TRACE(("heap at 0x%lx\n", heapBase));
4146 	heap_init(heapBase, heapSize);
4147 #endif
4148 
4149 	// initialize the free page list and physical page mapper
4150 	vm_page_init(args);
4151 
4152 	// initialize the cache allocators
4153 	vm_cache_init(args);
4154 
4155 	{
4156 		status_t error = VMAreaHash::Init();
4157 		if (error != B_OK)
4158 			panic("vm_init: error initializing area hash table\n");
4159 	}
4160 
4161 	VMAddressSpace::Init();
4162 	reserve_boot_loader_ranges(args);
4163 
4164 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4165 	heap_init_post_area();
4166 #endif
4167 
4168 	// Do any further initialization that the architecture dependant layers may
4169 	// need now
4170 	arch_vm_translation_map_init_post_area(args);
4171 	arch_vm_init_post_area(args);
4172 	vm_page_init_post_area(args);
4173 	slab_init_post_area();
4174 
4175 	// allocate areas to represent stuff that already exists
4176 
4177 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4178 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4179 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4180 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4181 #endif
4182 
4183 	allocate_kernel_args(args);
4184 
4185 	create_preloaded_image_areas(args->kernel_image);
4186 
4187 	// allocate areas for preloaded images
4188 	for (image = args->preloaded_images; image != NULL; image = image->next)
4189 		create_preloaded_image_areas(image);
4190 
4191 	// allocate kernel stacks
4192 	for (i = 0; i < args->num_cpus; i++) {
4193 		char name[64];
4194 
4195 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4196 		address = (void*)args->cpu_kstack[i].start;
4197 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4198 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4199 	}
4200 
4201 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4202 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4203 
4204 #if PARANOID_KERNEL_MALLOC
4205 	vm_block_address_range("uninitialized heap memory",
4206 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4207 #endif
4208 #if PARANOID_KERNEL_FREE
4209 	vm_block_address_range("freed heap memory",
4210 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4211 #endif
4212 
4213 	// create the object cache for the page mappings
4214 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4215 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4216 		NULL, NULL);
4217 	if (gPageMappingsObjectCache == NULL)
4218 		panic("failed to create page mappings object cache");
4219 
4220 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4221 
4222 #if DEBUG_CACHE_LIST
4223 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4224 		virtual_address_restrictions virtualRestrictions = {};
4225 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4226 		physical_address_restrictions physicalRestrictions = {};
4227 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4228 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4229 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4230 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4231 			&physicalRestrictions, (void**)&sCacheInfoTable);
4232 	}
4233 #endif	// DEBUG_CACHE_LIST
4234 
4235 	// add some debugger commands
4236 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4237 	add_debugger_command("area", &dump_area,
4238 		"Dump info about a particular area");
4239 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4240 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4241 #if DEBUG_CACHE_LIST
4242 	if (sCacheInfoTable != NULL) {
4243 		add_debugger_command_etc("caches", &dump_caches,
4244 			"List all VMCache trees",
4245 			"[ \"-c\" ]\n"
4246 			"All cache trees are listed sorted in decreasing order by number "
4247 				"of\n"
4248 			"used pages or, if \"-c\" is specified, by size of committed "
4249 				"memory.\n",
4250 			0);
4251 	}
4252 #endif
4253 	add_debugger_command("avail", &dump_available_memory,
4254 		"Dump available memory");
4255 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4256 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4257 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4258 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4259 	add_debugger_command("string", &display_mem, "dump strings");
4260 
4261 	add_debugger_command_etc("mapping", &dump_mapping_info,
4262 		"Print address mapping information",
4263 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4264 		"Prints low-level page mapping information for a given address. If\n"
4265 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4266 		"address that is looked up in the translation map of the current\n"
4267 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4268 		"\"-r\" is specified, <address> is a physical address that is\n"
4269 		"searched in the translation map of all teams, respectively the team\n"
4270 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4271 		"<address> is the address of a vm_page structure. The behavior is\n"
4272 		"equivalent to specifying \"-r\" with the physical address of that\n"
4273 		"page.\n",
4274 		0);
4275 
4276 	TRACE(("vm_init: exit\n"));
4277 
4278 	vm_cache_init_post_heap();
4279 
4280 	return err;
4281 }
4282 
4283 
4284 status_t
4285 vm_init_post_sem(kernel_args* args)
4286 {
4287 	// This frees all unused boot loader resources and makes its space available
4288 	// again
4289 	arch_vm_init_end(args);
4290 	unreserve_boot_loader_ranges(args);
4291 
4292 	// fill in all of the semaphores that were not allocated before
4293 	// since we're still single threaded and only the kernel address space
4294 	// exists, it isn't that hard to find all of the ones we need to create
4295 
4296 	arch_vm_translation_map_init_post_sem(args);
4297 
4298 	slab_init_post_sem();
4299 
4300 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4301 	heap_init_post_sem();
4302 #endif
4303 
4304 	return B_OK;
4305 }
4306 
4307 
4308 status_t
4309 vm_init_post_thread(kernel_args* args)
4310 {
4311 	vm_page_init_post_thread(args);
4312 	slab_init_post_thread();
4313 	return heap_init_post_thread();
4314 }
4315 
4316 
4317 status_t
4318 vm_init_post_modules(kernel_args* args)
4319 {
4320 	return arch_vm_init_post_modules(args);
4321 }
4322 
4323 
4324 void
4325 permit_page_faults(void)
4326 {
4327 	Thread* thread = thread_get_current_thread();
4328 	if (thread != NULL)
4329 		atomic_add(&thread->page_faults_allowed, 1);
4330 }
4331 
4332 
4333 void
4334 forbid_page_faults(void)
4335 {
4336 	Thread* thread = thread_get_current_thread();
4337 	if (thread != NULL)
4338 		atomic_add(&thread->page_faults_allowed, -1);
4339 }
4340 
4341 
4342 status_t
4343 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4344 	bool isUser, addr_t* newIP)
4345 {
4346 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4347 		faultAddress));
4348 
4349 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4350 
4351 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4352 	VMAddressSpace* addressSpace = NULL;
4353 
4354 	status_t status = B_OK;
4355 	*newIP = 0;
4356 	atomic_add((int32*)&sPageFaults, 1);
4357 
4358 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4359 		addressSpace = VMAddressSpace::GetKernel();
4360 	} else if (IS_USER_ADDRESS(pageAddress)) {
4361 		addressSpace = VMAddressSpace::GetCurrent();
4362 		if (addressSpace == NULL) {
4363 			if (!isUser) {
4364 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4365 					"memory!\n");
4366 				status = B_BAD_ADDRESS;
4367 				TPF(PageFaultError(-1,
4368 					VMPageFaultTracing
4369 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4370 			} else {
4371 				// XXX weird state.
4372 				panic("vm_page_fault: non kernel thread accessing user memory "
4373 					"that doesn't exist!\n");
4374 				status = B_BAD_ADDRESS;
4375 			}
4376 		}
4377 	} else {
4378 		// the hit was probably in the 64k DMZ between kernel and user space
4379 		// this keeps a user space thread from passing a buffer that crosses
4380 		// into kernel space
4381 		status = B_BAD_ADDRESS;
4382 		TPF(PageFaultError(-1,
4383 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4384 	}
4385 
4386 	if (status == B_OK) {
4387 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4388 			isUser, NULL);
4389 	}
4390 
4391 	if (status < B_OK) {
4392 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4393 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4394 			strerror(status), address, faultAddress, isWrite, isUser,
4395 			thread_get_current_thread_id());
4396 		if (!isUser) {
4397 			Thread* thread = thread_get_current_thread();
4398 			if (thread != NULL && thread->fault_handler != 0) {
4399 				// this will cause the arch dependant page fault handler to
4400 				// modify the IP on the interrupt frame or whatever to return
4401 				// to this address
4402 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4403 			} else {
4404 				// unhandled page fault in the kernel
4405 				panic("vm_page_fault: unhandled page fault in kernel space at "
4406 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4407 			}
4408 		} else {
4409 			Thread* thread = thread_get_current_thread();
4410 
4411 #ifdef TRACE_FAULTS
4412 			VMArea* area = NULL;
4413 			if (addressSpace != NULL) {
4414 				addressSpace->ReadLock();
4415 				area = addressSpace->LookupArea(faultAddress);
4416 			}
4417 
4418 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4419 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4420 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4421 				thread->team->Name(), thread->team->id,
4422 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4423 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4424 					area->Base() : 0x0));
4425 
4426 			if (addressSpace != NULL)
4427 				addressSpace->ReadUnlock();
4428 #endif
4429 
4430 			// If the thread has a signal handler for SIGSEGV, we simply
4431 			// send it the signal. Otherwise we notify the user debugger
4432 			// first.
4433 			struct sigaction action;
4434 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4435 					&& action.sa_handler != SIG_DFL
4436 					&& action.sa_handler != SIG_IGN)
4437 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4438 					SIGSEGV)) {
4439 				Signal signal(SIGSEGV,
4440 					status == B_PERMISSION_DENIED
4441 						? SEGV_ACCERR : SEGV_MAPERR,
4442 					EFAULT, thread->team->id);
4443 				signal.SetAddress((void*)address);
4444 				send_signal_to_thread(thread, signal, 0);
4445 			}
4446 		}
4447 	}
4448 
4449 	if (addressSpace != NULL)
4450 		addressSpace->Put();
4451 
4452 	return B_HANDLED_INTERRUPT;
4453 }
4454 
4455 
4456 struct PageFaultContext {
4457 	AddressSpaceReadLocker	addressSpaceLocker;
4458 	VMCacheChainLocker		cacheChainLocker;
4459 
4460 	VMTranslationMap*		map;
4461 	VMCache*				topCache;
4462 	off_t					cacheOffset;
4463 	vm_page_reservation		reservation;
4464 	bool					isWrite;
4465 
4466 	// return values
4467 	vm_page*				page;
4468 	bool					restart;
4469 	bool					pageAllocated;
4470 
4471 
4472 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4473 		:
4474 		addressSpaceLocker(addressSpace, true),
4475 		map(addressSpace->TranslationMap()),
4476 		isWrite(isWrite)
4477 	{
4478 	}
4479 
4480 	~PageFaultContext()
4481 	{
4482 		UnlockAll();
4483 		vm_page_unreserve_pages(&reservation);
4484 	}
4485 
4486 	void Prepare(VMCache* topCache, off_t cacheOffset)
4487 	{
4488 		this->topCache = topCache;
4489 		this->cacheOffset = cacheOffset;
4490 		page = NULL;
4491 		restart = false;
4492 		pageAllocated = false;
4493 
4494 		cacheChainLocker.SetTo(topCache);
4495 	}
4496 
4497 	void UnlockAll(VMCache* exceptCache = NULL)
4498 	{
4499 		topCache = NULL;
4500 		addressSpaceLocker.Unlock();
4501 		cacheChainLocker.Unlock(exceptCache);
4502 	}
4503 };
4504 
4505 
4506 /*!	Gets the page that should be mapped into the area.
4507 	Returns an error code other than \c B_OK, if the page couldn't be found or
4508 	paged in. The locking state of the address space and the caches is undefined
4509 	in that case.
4510 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4511 	had to unlock the address space and all caches and is supposed to be called
4512 	again.
4513 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4514 	found. It is returned in \c context.page. The address space will still be
4515 	locked as well as all caches starting from the top cache to at least the
4516 	cache the page lives in.
4517 */
4518 static status_t
4519 fault_get_page(PageFaultContext& context)
4520 {
4521 	VMCache* cache = context.topCache;
4522 	VMCache* lastCache = NULL;
4523 	vm_page* page = NULL;
4524 
4525 	while (cache != NULL) {
4526 		// We already hold the lock of the cache at this point.
4527 
4528 		lastCache = cache;
4529 
4530 		page = cache->LookupPage(context.cacheOffset);
4531 		if (page != NULL && page->busy) {
4532 			// page must be busy -- wait for it to become unbusy
4533 			context.UnlockAll(cache);
4534 			cache->ReleaseRefLocked();
4535 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4536 
4537 			// restart the whole process
4538 			context.restart = true;
4539 			return B_OK;
4540 		}
4541 
4542 		if (page != NULL)
4543 			break;
4544 
4545 		// The current cache does not contain the page we're looking for.
4546 
4547 		// see if the backing store has it
4548 		if (cache->HasPage(context.cacheOffset)) {
4549 			// insert a fresh page and mark it busy -- we're going to read it in
4550 			page = vm_page_allocate_page(&context.reservation,
4551 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4552 			cache->InsertPage(page, context.cacheOffset);
4553 
4554 			// We need to unlock all caches and the address space while reading
4555 			// the page in. Keep a reference to the cache around.
4556 			cache->AcquireRefLocked();
4557 			context.UnlockAll();
4558 
4559 			// read the page in
4560 			generic_io_vec vec;
4561 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4562 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4563 
4564 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4565 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4566 
4567 			cache->Lock();
4568 
4569 			if (status < B_OK) {
4570 				// on error remove and free the page
4571 				dprintf("reading page from cache %p returned: %s!\n",
4572 					cache, strerror(status));
4573 
4574 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4575 				cache->RemovePage(page);
4576 				vm_page_set_state(page, PAGE_STATE_FREE);
4577 
4578 				cache->ReleaseRefAndUnlock();
4579 				return status;
4580 			}
4581 
4582 			// mark the page unbusy again
4583 			cache->MarkPageUnbusy(page);
4584 
4585 			DEBUG_PAGE_ACCESS_END(page);
4586 
4587 			// Since we needed to unlock everything temporarily, the area
4588 			// situation might have changed. So we need to restart the whole
4589 			// process.
4590 			cache->ReleaseRefAndUnlock();
4591 			context.restart = true;
4592 			return B_OK;
4593 		}
4594 
4595 		cache = context.cacheChainLocker.LockSourceCache();
4596 	}
4597 
4598 	if (page == NULL) {
4599 		// There was no adequate page, determine the cache for a clean one.
4600 		// Read-only pages come in the deepest cache, only the top most cache
4601 		// may have direct write access.
4602 		cache = context.isWrite ? context.topCache : lastCache;
4603 
4604 		// allocate a clean page
4605 		page = vm_page_allocate_page(&context.reservation,
4606 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4607 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4608 			page->physical_page_number));
4609 
4610 		// insert the new page into our cache
4611 		cache->InsertPage(page, context.cacheOffset);
4612 		context.pageAllocated = true;
4613 	} else if (page->Cache() != context.topCache && context.isWrite) {
4614 		// We have a page that has the data we want, but in the wrong cache
4615 		// object so we need to copy it and stick it into the top cache.
4616 		vm_page* sourcePage = page;
4617 
4618 		// TODO: If memory is low, it might be a good idea to steal the page
4619 		// from our source cache -- if possible, that is.
4620 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4621 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4622 
4623 		// To not needlessly kill concurrency we unlock all caches but the top
4624 		// one while copying the page. Lacking another mechanism to ensure that
4625 		// the source page doesn't disappear, we mark it busy.
4626 		sourcePage->busy = true;
4627 		context.cacheChainLocker.UnlockKeepRefs(true);
4628 
4629 		// copy the page
4630 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4631 			sourcePage->physical_page_number * B_PAGE_SIZE);
4632 
4633 		context.cacheChainLocker.RelockCaches(true);
4634 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4635 
4636 		// insert the new page into our cache
4637 		context.topCache->InsertPage(page, context.cacheOffset);
4638 		context.pageAllocated = true;
4639 	} else
4640 		DEBUG_PAGE_ACCESS_START(page);
4641 
4642 	context.page = page;
4643 	return B_OK;
4644 }
4645 
4646 
4647 /*!	Makes sure the address in the given address space is mapped.
4648 
4649 	\param addressSpace The address space.
4650 	\param originalAddress The address. Doesn't need to be page aligned.
4651 	\param isWrite If \c true the address shall be write-accessible.
4652 	\param isUser If \c true the access is requested by a userland team.
4653 	\param wirePage On success, if non \c NULL, the wired count of the page
4654 		mapped at the given address is incremented and the page is returned
4655 		via this parameter.
4656 	\return \c B_OK on success, another error code otherwise.
4657 */
4658 static status_t
4659 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4660 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4661 {
4662 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4663 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4664 		originalAddress, isWrite, isUser));
4665 
4666 	PageFaultContext context(addressSpace, isWrite);
4667 
4668 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4669 	status_t status = B_OK;
4670 
4671 	addressSpace->IncrementFaultCount();
4672 
4673 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4674 	// the pages upfront makes sure we don't have any cache locked, so that the
4675 	// page daemon/thief can do their job without problems.
4676 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4677 		originalAddress);
4678 	context.addressSpaceLocker.Unlock();
4679 	vm_page_reserve_pages(&context.reservation, reservePages,
4680 		addressSpace == VMAddressSpace::Kernel()
4681 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4682 
4683 	while (true) {
4684 		context.addressSpaceLocker.Lock();
4685 
4686 		// get the area the fault was in
4687 		VMArea* area = addressSpace->LookupArea(address);
4688 		if (area == NULL) {
4689 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4690 				"space\n", originalAddress);
4691 			TPF(PageFaultError(-1,
4692 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4693 			status = B_BAD_ADDRESS;
4694 			break;
4695 		}
4696 
4697 		// check permissions
4698 		uint32 protection = get_area_page_protection(area, address);
4699 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4700 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4701 				area->id, (void*)originalAddress);
4702 			TPF(PageFaultError(area->id,
4703 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4704 			status = B_PERMISSION_DENIED;
4705 			break;
4706 		}
4707 		if (isWrite && (protection
4708 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4709 			dprintf("write access attempted on write-protected area 0x%"
4710 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4711 			TPF(PageFaultError(area->id,
4712 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4713 			status = B_PERMISSION_DENIED;
4714 			break;
4715 		} else if (isExecute && (protection
4716 				& (B_EXECUTE_AREA
4717 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4718 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4719 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4720 			TPF(PageFaultError(area->id,
4721 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4722 			status = B_PERMISSION_DENIED;
4723 			break;
4724 		} else if (!isWrite && !isExecute && (protection
4725 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4726 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4727 				" at %p\n", area->id, (void*)originalAddress);
4728 			TPF(PageFaultError(area->id,
4729 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4730 			status = B_PERMISSION_DENIED;
4731 			break;
4732 		}
4733 
4734 		// We have the area, it was a valid access, so let's try to resolve the
4735 		// page fault now.
4736 		// At first, the top most cache from the area is investigated.
4737 
4738 		context.Prepare(vm_area_get_locked_cache(area),
4739 			address - area->Base() + area->cache_offset);
4740 
4741 		// See if this cache has a fault handler -- this will do all the work
4742 		// for us.
4743 		{
4744 			// Note, since the page fault is resolved with interrupts enabled,
4745 			// the fault handler could be called more than once for the same
4746 			// reason -- the store must take this into account.
4747 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4748 			if (status != B_BAD_HANDLER)
4749 				break;
4750 		}
4751 
4752 		// The top most cache has no fault handler, so let's see if the cache or
4753 		// its sources already have the page we're searching for (we're going
4754 		// from top to bottom).
4755 		status = fault_get_page(context);
4756 		if (status != B_OK) {
4757 			TPF(PageFaultError(area->id, status));
4758 			break;
4759 		}
4760 
4761 		if (context.restart)
4762 			continue;
4763 
4764 		// All went fine, all there is left to do is to map the page into the
4765 		// address space.
4766 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4767 			context.page));
4768 
4769 		// If the page doesn't reside in the area's cache, we need to make sure
4770 		// it's mapped in read-only, so that we cannot overwrite someone else's
4771 		// data (copy-on-write)
4772 		uint32 newProtection = protection;
4773 		if (context.page->Cache() != context.topCache && !isWrite)
4774 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4775 
4776 		bool unmapPage = false;
4777 		bool mapPage = true;
4778 
4779 		// check whether there's already a page mapped at the address
4780 		context.map->Lock();
4781 
4782 		phys_addr_t physicalAddress;
4783 		uint32 flags;
4784 		vm_page* mappedPage = NULL;
4785 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4786 			&& (flags & PAGE_PRESENT) != 0
4787 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4788 				!= NULL) {
4789 			// Yep there's already a page. If it's ours, we can simply adjust
4790 			// its protection. Otherwise we have to unmap it.
4791 			if (mappedPage == context.page) {
4792 				context.map->ProtectPage(area, address, newProtection);
4793 					// Note: We assume that ProtectPage() is atomic (i.e.
4794 					// the page isn't temporarily unmapped), otherwise we'd have
4795 					// to make sure it isn't wired.
4796 				mapPage = false;
4797 			} else
4798 				unmapPage = true;
4799 		}
4800 
4801 		context.map->Unlock();
4802 
4803 		if (unmapPage) {
4804 			// If the page is wired, we can't unmap it. Wait until it is unwired
4805 			// again and restart. Note that the page cannot be wired for
4806 			// writing, since it it isn't in the topmost cache. So we can safely
4807 			// ignore ranges wired for writing (our own and other concurrent
4808 			// wiring attempts in progress) and in fact have to do that to avoid
4809 			// a deadlock.
4810 			VMAreaUnwiredWaiter waiter;
4811 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4812 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4813 				// unlock everything and wait
4814 				if (context.pageAllocated) {
4815 					// ... but since we allocated a page and inserted it into
4816 					// the top cache, remove and free it first. Otherwise we'd
4817 					// have a page from a lower cache mapped while an upper
4818 					// cache has a page that would shadow it.
4819 					context.topCache->RemovePage(context.page);
4820 					vm_page_free_etc(context.topCache, context.page,
4821 						&context.reservation);
4822 				} else
4823 					DEBUG_PAGE_ACCESS_END(context.page);
4824 
4825 				context.UnlockAll();
4826 				waiter.waitEntry.Wait();
4827 				continue;
4828 			}
4829 
4830 			// Note: The mapped page is a page of a lower cache. We are
4831 			// guaranteed to have that cached locked, our new page is a copy of
4832 			// that page, and the page is not busy. The logic for that guarantee
4833 			// is as follows: Since the page is mapped, it must live in the top
4834 			// cache (ruled out above) or any of its lower caches, and there is
4835 			// (was before the new page was inserted) no other page in any
4836 			// cache between the top cache and the page's cache (otherwise that
4837 			// would be mapped instead). That in turn means that our algorithm
4838 			// must have found it and therefore it cannot be busy either.
4839 			DEBUG_PAGE_ACCESS_START(mappedPage);
4840 			unmap_page(area, address);
4841 			DEBUG_PAGE_ACCESS_END(mappedPage);
4842 		}
4843 
4844 		if (mapPage) {
4845 			if (map_page(area, context.page, address, newProtection,
4846 					&context.reservation) != B_OK) {
4847 				// Mapping can only fail, when the page mapping object couldn't
4848 				// be allocated. Save for the missing mapping everything is
4849 				// fine, though. If this was a regular page fault, we'll simply
4850 				// leave and probably fault again. To make sure we'll have more
4851 				// luck then, we ensure that the minimum object reserve is
4852 				// available.
4853 				DEBUG_PAGE_ACCESS_END(context.page);
4854 
4855 				context.UnlockAll();
4856 
4857 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4858 						!= B_OK) {
4859 					// Apparently the situation is serious. Let's get ourselves
4860 					// killed.
4861 					status = B_NO_MEMORY;
4862 				} else if (wirePage != NULL) {
4863 					// The caller expects us to wire the page. Since
4864 					// object_cache_reserve() succeeded, we should now be able
4865 					// to allocate a mapping structure. Restart.
4866 					continue;
4867 				}
4868 
4869 				break;
4870 			}
4871 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4872 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4873 
4874 		// also wire the page, if requested
4875 		if (wirePage != NULL && status == B_OK) {
4876 			increment_page_wired_count(context.page);
4877 			*wirePage = context.page;
4878 		}
4879 
4880 		DEBUG_PAGE_ACCESS_END(context.page);
4881 
4882 		break;
4883 	}
4884 
4885 	return status;
4886 }
4887 
4888 
4889 status_t
4890 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4891 {
4892 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4893 }
4894 
4895 status_t
4896 vm_put_physical_page(addr_t vaddr, void* handle)
4897 {
4898 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4899 }
4900 
4901 
4902 status_t
4903 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4904 	void** _handle)
4905 {
4906 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4907 }
4908 
4909 status_t
4910 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4911 {
4912 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4913 }
4914 
4915 
4916 status_t
4917 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4918 {
4919 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4920 }
4921 
4922 status_t
4923 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4924 {
4925 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4926 }
4927 
4928 
4929 void
4930 vm_get_info(system_info* info)
4931 {
4932 	swap_get_info(info);
4933 
4934 	MutexLocker locker(sAvailableMemoryLock);
4935 	info->needed_memory = sNeededMemory;
4936 	info->free_memory = sAvailableMemory;
4937 }
4938 
4939 
4940 uint32
4941 vm_num_page_faults(void)
4942 {
4943 	return sPageFaults;
4944 }
4945 
4946 
4947 off_t
4948 vm_available_memory(void)
4949 {
4950 	MutexLocker locker(sAvailableMemoryLock);
4951 	return sAvailableMemory;
4952 }
4953 
4954 
4955 off_t
4956 vm_available_not_needed_memory(void)
4957 {
4958 	MutexLocker locker(sAvailableMemoryLock);
4959 	return sAvailableMemory - sNeededMemory;
4960 }
4961 
4962 
4963 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4964 	debugger.
4965 */
4966 off_t
4967 vm_available_not_needed_memory_debug(void)
4968 {
4969 	return sAvailableMemory - sNeededMemory;
4970 }
4971 
4972 
4973 size_t
4974 vm_kernel_address_space_left(void)
4975 {
4976 	return VMAddressSpace::Kernel()->FreeSpace();
4977 }
4978 
4979 
4980 void
4981 vm_unreserve_memory(size_t amount)
4982 {
4983 	mutex_lock(&sAvailableMemoryLock);
4984 
4985 	sAvailableMemory += amount;
4986 
4987 	mutex_unlock(&sAvailableMemoryLock);
4988 }
4989 
4990 
4991 status_t
4992 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4993 {
4994 	size_t reserve = kMemoryReserveForPriority[priority];
4995 
4996 	MutexLocker locker(sAvailableMemoryLock);
4997 
4998 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4999 
5000 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5001 		sAvailableMemory -= amount;
5002 		return B_OK;
5003 	}
5004 
5005 	if (timeout <= 0)
5006 		return B_NO_MEMORY;
5007 
5008 	// turn timeout into an absolute timeout
5009 	timeout += system_time();
5010 
5011 	// loop until we've got the memory or the timeout occurs
5012 	do {
5013 		sNeededMemory += amount;
5014 
5015 		// call the low resource manager
5016 		locker.Unlock();
5017 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5018 			B_ABSOLUTE_TIMEOUT, timeout);
5019 		locker.Lock();
5020 
5021 		sNeededMemory -= amount;
5022 
5023 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5024 			sAvailableMemory -= amount;
5025 			return B_OK;
5026 		}
5027 	} while (timeout > system_time());
5028 
5029 	return B_NO_MEMORY;
5030 }
5031 
5032 
5033 status_t
5034 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5035 {
5036 	// NOTE: The caller is responsible for synchronizing calls to this function!
5037 
5038 	AddressSpaceReadLocker locker;
5039 	VMArea* area;
5040 	status_t status = locker.SetFromArea(id, area);
5041 	if (status != B_OK)
5042 		return status;
5043 
5044 	// nothing to do, if the type doesn't change
5045 	uint32 oldType = area->MemoryType();
5046 	if (type == oldType)
5047 		return B_OK;
5048 
5049 	// set the memory type of the area and the mapped pages
5050 	VMTranslationMap* map = area->address_space->TranslationMap();
5051 	map->Lock();
5052 	area->SetMemoryType(type);
5053 	map->ProtectArea(area, area->protection);
5054 	map->Unlock();
5055 
5056 	// set the physical memory type
5057 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
5058 	if (error != B_OK) {
5059 		// reset the memory type of the area and the mapped pages
5060 		map->Lock();
5061 		area->SetMemoryType(oldType);
5062 		map->ProtectArea(area, area->protection);
5063 		map->Unlock();
5064 		return error;
5065 	}
5066 
5067 	return B_OK;
5068 
5069 }
5070 
5071 
5072 /*!	This function enforces some protection properties:
5073 	 - kernel areas must be W^X (after kernel startup)
5074 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5075 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5076 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
5077 	   and B_KERNEL_WRITE_AREA.
5078 */
5079 static void
5080 fix_protection(uint32* protection)
5081 {
5082 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5083 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5084 			|| (*protection & B_WRITE_AREA) != 0)
5085 		&& !gKernelStartup)
5086 		panic("kernel areas cannot be both writable and executable!");
5087 
5088 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5089 		if ((*protection & B_USER_PROTECTION) == 0
5090 			|| (*protection & B_WRITE_AREA) != 0)
5091 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5092 		else
5093 			*protection |= B_KERNEL_READ_AREA;
5094 	}
5095 }
5096 
5097 
5098 static void
5099 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5100 {
5101 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5102 	info->area = area->id;
5103 	info->address = (void*)area->Base();
5104 	info->size = area->Size();
5105 	info->protection = area->protection;
5106 	info->lock = B_FULL_LOCK;
5107 	info->team = area->address_space->ID();
5108 	info->copy_count = 0;
5109 	info->in_count = 0;
5110 	info->out_count = 0;
5111 		// TODO: retrieve real values here!
5112 
5113 	VMCache* cache = vm_area_get_locked_cache(area);
5114 
5115 	// Note, this is a simplification; the cache could be larger than this area
5116 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5117 
5118 	vm_area_put_locked_cache(cache);
5119 }
5120 
5121 
5122 static status_t
5123 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5124 {
5125 	// is newSize a multiple of B_PAGE_SIZE?
5126 	if (newSize & (B_PAGE_SIZE - 1))
5127 		return B_BAD_VALUE;
5128 
5129 	// lock all affected address spaces and the cache
5130 	VMArea* area;
5131 	VMCache* cache;
5132 
5133 	MultiAddressSpaceLocker locker;
5134 	AreaCacheLocker cacheLocker;
5135 
5136 	status_t status;
5137 	size_t oldSize;
5138 	bool anyKernelArea;
5139 	bool restart;
5140 
5141 	do {
5142 		anyKernelArea = false;
5143 		restart = false;
5144 
5145 		locker.Unset();
5146 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5147 		if (status != B_OK)
5148 			return status;
5149 		cacheLocker.SetTo(cache, true);	// already locked
5150 
5151 		// enforce restrictions
5152 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5153 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5154 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5155 				"resize kernel area %" B_PRId32 " (%s)\n",
5156 				team_get_current_team_id(), areaID, area->name);
5157 			return B_NOT_ALLOWED;
5158 		}
5159 		// TODO: Enforce all restrictions (team, etc.)!
5160 
5161 		oldSize = area->Size();
5162 		if (newSize == oldSize)
5163 			return B_OK;
5164 
5165 		if (cache->type != CACHE_TYPE_RAM)
5166 			return B_NOT_ALLOWED;
5167 
5168 		if (oldSize < newSize) {
5169 			// We need to check if all areas of this cache can be resized.
5170 			for (VMArea* current = cache->areas; current != NULL;
5171 					current = current->cache_next) {
5172 				if (!current->address_space->CanResizeArea(current, newSize))
5173 					return B_ERROR;
5174 				anyKernelArea
5175 					|= current->address_space == VMAddressSpace::Kernel();
5176 			}
5177 		} else {
5178 			// We're shrinking the areas, so we must make sure the affected
5179 			// ranges are not wired.
5180 			for (VMArea* current = cache->areas; current != NULL;
5181 					current = current->cache_next) {
5182 				anyKernelArea
5183 					|= current->address_space == VMAddressSpace::Kernel();
5184 
5185 				if (wait_if_area_range_is_wired(current,
5186 						current->Base() + newSize, oldSize - newSize, &locker,
5187 						&cacheLocker)) {
5188 					restart = true;
5189 					break;
5190 				}
5191 			}
5192 		}
5193 	} while (restart);
5194 
5195 	// Okay, looks good so far, so let's do it
5196 
5197 	int priority = kernel && anyKernelArea
5198 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5199 	uint32 allocationFlags = kernel && anyKernelArea
5200 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5201 
5202 	if (oldSize < newSize) {
5203 		// Growing the cache can fail, so we do it first.
5204 		status = cache->Resize(cache->virtual_base + newSize, priority);
5205 		if (status != B_OK)
5206 			return status;
5207 	}
5208 
5209 	for (VMArea* current = cache->areas; current != NULL;
5210 			current = current->cache_next) {
5211 		status = current->address_space->ResizeArea(current, newSize,
5212 			allocationFlags);
5213 		if (status != B_OK)
5214 			break;
5215 
5216 		// We also need to unmap all pages beyond the new size, if the area has
5217 		// shrunk
5218 		if (newSize < oldSize) {
5219 			VMCacheChainLocker cacheChainLocker(cache);
5220 			cacheChainLocker.LockAllSourceCaches();
5221 
5222 			unmap_pages(current, current->Base() + newSize,
5223 				oldSize - newSize);
5224 
5225 			cacheChainLocker.Unlock(cache);
5226 		}
5227 	}
5228 
5229 	if (status == B_OK) {
5230 		// Shrink or grow individual page protections if in use.
5231 		if (area->page_protections != NULL) {
5232 			size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5233 			uint8* newProtections
5234 				= (uint8*)realloc(area->page_protections, bytes);
5235 			if (newProtections == NULL)
5236 				status = B_NO_MEMORY;
5237 			else {
5238 				area->page_protections = newProtections;
5239 
5240 				if (oldSize < newSize) {
5241 					// init the additional page protections to that of the area
5242 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5243 					uint32 areaProtection = area->protection
5244 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5245 					memset(area->page_protections + offset,
5246 						areaProtection | (areaProtection << 4), bytes - offset);
5247 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5248 						uint8& entry = area->page_protections[offset - 1];
5249 						entry = (entry & 0x0f) | (areaProtection << 4);
5250 					}
5251 				}
5252 			}
5253 		}
5254 	}
5255 
5256 	// shrinking the cache can't fail, so we do it now
5257 	if (status == B_OK && newSize < oldSize)
5258 		status = cache->Resize(cache->virtual_base + newSize, priority);
5259 
5260 	if (status != B_OK) {
5261 		// Something failed -- resize the areas back to their original size.
5262 		// This can fail, too, in which case we're seriously screwed.
5263 		for (VMArea* current = cache->areas; current != NULL;
5264 				current = current->cache_next) {
5265 			if (current->address_space->ResizeArea(current, oldSize,
5266 					allocationFlags) != B_OK) {
5267 				panic("vm_resize_area(): Failed and not being able to restore "
5268 					"original state.");
5269 			}
5270 		}
5271 
5272 		cache->Resize(cache->virtual_base + oldSize, priority);
5273 	}
5274 
5275 	// TODO: we must honour the lock restrictions of this area
5276 	return status;
5277 }
5278 
5279 
5280 status_t
5281 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5282 {
5283 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5284 }
5285 
5286 
5287 status_t
5288 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5289 {
5290 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5291 }
5292 
5293 
5294 status_t
5295 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5296 	bool user)
5297 {
5298 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5299 }
5300 
5301 
5302 void
5303 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5304 {
5305 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5306 }
5307 
5308 
5309 /*!	Copies a range of memory directly from/to a page that might not be mapped
5310 	at the moment.
5311 
5312 	For \a unsafeMemory the current mapping (if any is ignored). The function
5313 	walks through the respective area's cache chain to find the physical page
5314 	and copies from/to it directly.
5315 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5316 	must not cross a page boundary.
5317 
5318 	\param teamID The team ID identifying the address space \a unsafeMemory is
5319 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5320 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5321 		is passed, the address space of the thread returned by
5322 		debug_get_debugged_thread() is used.
5323 	\param unsafeMemory The start of the unsafe memory range to be copied
5324 		from/to.
5325 	\param buffer A safely accessible kernel buffer to be copied from/to.
5326 	\param size The number of bytes to be copied.
5327 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5328 		\a unsafeMemory, the other way around otherwise.
5329 */
5330 status_t
5331 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5332 	size_t size, bool copyToUnsafe)
5333 {
5334 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5335 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5336 		return B_BAD_VALUE;
5337 	}
5338 
5339 	// get the address space for the debugged thread
5340 	VMAddressSpace* addressSpace;
5341 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5342 		addressSpace = VMAddressSpace::Kernel();
5343 	} else if (teamID == B_CURRENT_TEAM) {
5344 		Thread* thread = debug_get_debugged_thread();
5345 		if (thread == NULL || thread->team == NULL)
5346 			return B_BAD_ADDRESS;
5347 
5348 		addressSpace = thread->team->address_space;
5349 	} else
5350 		addressSpace = VMAddressSpace::DebugGet(teamID);
5351 
5352 	if (addressSpace == NULL)
5353 		return B_BAD_ADDRESS;
5354 
5355 	// get the area
5356 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5357 	if (area == NULL)
5358 		return B_BAD_ADDRESS;
5359 
5360 	// search the page
5361 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5362 		+ area->cache_offset;
5363 	VMCache* cache = area->cache;
5364 	vm_page* page = NULL;
5365 	while (cache != NULL) {
5366 		page = cache->DebugLookupPage(cacheOffset);
5367 		if (page != NULL)
5368 			break;
5369 
5370 		// Page not found in this cache -- if it is paged out, we must not try
5371 		// to get it from lower caches.
5372 		if (cache->DebugHasPage(cacheOffset))
5373 			break;
5374 
5375 		cache = cache->source;
5376 	}
5377 
5378 	if (page == NULL)
5379 		return B_UNSUPPORTED;
5380 
5381 	// copy from/to physical memory
5382 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5383 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5384 
5385 	if (copyToUnsafe) {
5386 		if (page->Cache() != area->cache)
5387 			return B_UNSUPPORTED;
5388 
5389 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5390 	}
5391 
5392 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5393 }
5394 
5395 
5396 static inline bool
5397 validate_user_range(const void* addr, size_t size)
5398 {
5399 	addr_t address = (addr_t)addr;
5400 
5401 	// Check for overflows on all addresses.
5402 	if ((address + size) < address)
5403 		return false;
5404 
5405 	// Validate that the address does not cross the kernel/user boundary.
5406 	if (IS_USER_ADDRESS(address))
5407 		return IS_USER_ADDRESS(address + size);
5408 	else
5409 		return !IS_USER_ADDRESS(address + size);
5410 }
5411 
5412 
5413 //	#pragma mark - kernel public API
5414 
5415 
5416 status_t
5417 user_memcpy(void* to, const void* from, size_t size)
5418 {
5419 	if (!validate_user_range(to, size) || !validate_user_range(from, size))
5420 		return B_BAD_ADDRESS;
5421 
5422 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5423 		return B_BAD_ADDRESS;
5424 
5425 	return B_OK;
5426 }
5427 
5428 
5429 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5430 	the string in \a to, NULL-terminating the result.
5431 
5432 	\param to Pointer to the destination C-string.
5433 	\param from Pointer to the source C-string.
5434 	\param size Size in bytes of the string buffer pointed to by \a to.
5435 
5436 	\return strlen(\a from).
5437 */
5438 ssize_t
5439 user_strlcpy(char* to, const char* from, size_t size)
5440 {
5441 	if (to == NULL && size != 0)
5442 		return B_BAD_VALUE;
5443 	if (from == NULL)
5444 		return B_BAD_ADDRESS;
5445 
5446 	// Protect the source address from overflows.
5447 	size_t maxSize = size;
5448 	if ((addr_t)from + maxSize < (addr_t)from)
5449 		maxSize -= (addr_t)from + maxSize;
5450 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5451 		maxSize = USER_TOP - (addr_t)from;
5452 
5453 	if (!validate_user_range(to, maxSize))
5454 		return B_BAD_ADDRESS;
5455 
5456 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5457 	if (result < 0)
5458 		return result;
5459 
5460 	// If we hit the address overflow boundary, fail.
5461 	if ((size_t)result >= maxSize && maxSize < size)
5462 		return B_BAD_ADDRESS;
5463 
5464 	return result;
5465 }
5466 
5467 
5468 status_t
5469 user_memset(void* s, char c, size_t count)
5470 {
5471 	if (!validate_user_range(s, count))
5472 		return B_BAD_ADDRESS;
5473 
5474 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5475 		return B_BAD_ADDRESS;
5476 
5477 	return B_OK;
5478 }
5479 
5480 
5481 /*!	Wires a single page at the given address.
5482 
5483 	\param team The team whose address space the address belongs to. Supports
5484 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5485 		parameter is ignored.
5486 	\param address address The virtual address to wire down. Does not need to
5487 		be page aligned.
5488 	\param writable If \c true the page shall be writable.
5489 	\param info On success the info is filled in, among other things
5490 		containing the physical address the given virtual one translates to.
5491 	\return \c B_OK, when the page could be wired, another error code otherwise.
5492 */
5493 status_t
5494 vm_wire_page(team_id team, addr_t address, bool writable,
5495 	VMPageWiringInfo* info)
5496 {
5497 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5498 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5499 
5500 	// compute the page protection that is required
5501 	bool isUser = IS_USER_ADDRESS(address);
5502 	uint32 requiredProtection = PAGE_PRESENT
5503 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5504 	if (writable)
5505 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5506 
5507 	// get and read lock the address space
5508 	VMAddressSpace* addressSpace = NULL;
5509 	if (isUser) {
5510 		if (team == B_CURRENT_TEAM)
5511 			addressSpace = VMAddressSpace::GetCurrent();
5512 		else
5513 			addressSpace = VMAddressSpace::Get(team);
5514 	} else
5515 		addressSpace = VMAddressSpace::GetKernel();
5516 	if (addressSpace == NULL)
5517 		return B_ERROR;
5518 
5519 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5520 
5521 	VMTranslationMap* map = addressSpace->TranslationMap();
5522 	status_t error = B_OK;
5523 
5524 	// get the area
5525 	VMArea* area = addressSpace->LookupArea(pageAddress);
5526 	if (area == NULL) {
5527 		addressSpace->Put();
5528 		return B_BAD_ADDRESS;
5529 	}
5530 
5531 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5532 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5533 
5534 	// mark the area range wired
5535 	area->Wire(&info->range);
5536 
5537 	// Lock the area's cache chain and the translation map. Needed to look
5538 	// up the page and play with its wired count.
5539 	cacheChainLocker.LockAllSourceCaches();
5540 	map->Lock();
5541 
5542 	phys_addr_t physicalAddress;
5543 	uint32 flags;
5544 	vm_page* page;
5545 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5546 		&& (flags & requiredProtection) == requiredProtection
5547 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5548 			!= NULL) {
5549 		// Already mapped with the correct permissions -- just increment
5550 		// the page's wired count.
5551 		increment_page_wired_count(page);
5552 
5553 		map->Unlock();
5554 		cacheChainLocker.Unlock();
5555 		addressSpaceLocker.Unlock();
5556 	} else {
5557 		// Let vm_soft_fault() map the page for us, if possible. We need
5558 		// to fully unlock to avoid deadlocks. Since we have already
5559 		// wired the area itself, nothing disturbing will happen with it
5560 		// in the meantime.
5561 		map->Unlock();
5562 		cacheChainLocker.Unlock();
5563 		addressSpaceLocker.Unlock();
5564 
5565 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5566 			isUser, &page);
5567 
5568 		if (error != B_OK) {
5569 			// The page could not be mapped -- clean up.
5570 			VMCache* cache = vm_area_get_locked_cache(area);
5571 			area->Unwire(&info->range);
5572 			cache->ReleaseRefAndUnlock();
5573 			addressSpace->Put();
5574 			return error;
5575 		}
5576 	}
5577 
5578 	info->physicalAddress
5579 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5580 			+ address % B_PAGE_SIZE;
5581 	info->page = page;
5582 
5583 	return B_OK;
5584 }
5585 
5586 
5587 /*!	Unwires a single page previously wired via vm_wire_page().
5588 
5589 	\param info The same object passed to vm_wire_page() before.
5590 */
5591 void
5592 vm_unwire_page(VMPageWiringInfo* info)
5593 {
5594 	// lock the address space
5595 	VMArea* area = info->range.area;
5596 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5597 		// takes over our reference
5598 
5599 	// lock the top cache
5600 	VMCache* cache = vm_area_get_locked_cache(area);
5601 	VMCacheChainLocker cacheChainLocker(cache);
5602 
5603 	if (info->page->Cache() != cache) {
5604 		// The page is not in the top cache, so we lock the whole cache chain
5605 		// before touching the page's wired count.
5606 		cacheChainLocker.LockAllSourceCaches();
5607 	}
5608 
5609 	decrement_page_wired_count(info->page);
5610 
5611 	// remove the wired range from the range
5612 	area->Unwire(&info->range);
5613 
5614 	cacheChainLocker.Unlock();
5615 }
5616 
5617 
5618 /*!	Wires down the given address range in the specified team's address space.
5619 
5620 	If successful the function
5621 	- acquires a reference to the specified team's address space,
5622 	- adds respective wired ranges to all areas that intersect with the given
5623 	  address range,
5624 	- makes sure all pages in the given address range are mapped with the
5625 	  requested access permissions and increments their wired count.
5626 
5627 	It fails, when \a team doesn't specify a valid address space, when any part
5628 	of the specified address range is not covered by areas, when the concerned
5629 	areas don't allow mapping with the requested permissions, or when mapping
5630 	failed for another reason.
5631 
5632 	When successful the call must be balanced by a unlock_memory_etc() call with
5633 	the exact same parameters.
5634 
5635 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5636 		supported.
5637 	\param address The start of the address range to be wired.
5638 	\param numBytes The size of the address range to be wired.
5639 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5640 		requests that the range must be wired writable ("read from device
5641 		into memory").
5642 	\return \c B_OK on success, another error code otherwise.
5643 */
5644 status_t
5645 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5646 {
5647 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5648 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5649 
5650 	// compute the page protection that is required
5651 	bool isUser = IS_USER_ADDRESS(address);
5652 	bool writable = (flags & B_READ_DEVICE) == 0;
5653 	uint32 requiredProtection = PAGE_PRESENT
5654 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5655 	if (writable)
5656 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5657 
5658 	uint32 mallocFlags = isUser
5659 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5660 
5661 	// get and read lock the address space
5662 	VMAddressSpace* addressSpace = NULL;
5663 	if (isUser) {
5664 		if (team == B_CURRENT_TEAM)
5665 			addressSpace = VMAddressSpace::GetCurrent();
5666 		else
5667 			addressSpace = VMAddressSpace::Get(team);
5668 	} else
5669 		addressSpace = VMAddressSpace::GetKernel();
5670 	if (addressSpace == NULL)
5671 		return B_ERROR;
5672 
5673 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5674 		// We get a new address space reference here. The one we got above will
5675 		// be freed by unlock_memory_etc().
5676 
5677 	VMTranslationMap* map = addressSpace->TranslationMap();
5678 	status_t error = B_OK;
5679 
5680 	// iterate through all concerned areas
5681 	addr_t nextAddress = lockBaseAddress;
5682 	while (nextAddress != lockEndAddress) {
5683 		// get the next area
5684 		VMArea* area = addressSpace->LookupArea(nextAddress);
5685 		if (area == NULL) {
5686 			error = B_BAD_ADDRESS;
5687 			break;
5688 		}
5689 
5690 		addr_t areaStart = nextAddress;
5691 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5692 
5693 		// allocate the wired range (do that before locking the cache to avoid
5694 		// deadlocks)
5695 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5696 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5697 		if (range == NULL) {
5698 			error = B_NO_MEMORY;
5699 			break;
5700 		}
5701 
5702 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5703 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5704 
5705 		// mark the area range wired
5706 		area->Wire(range);
5707 
5708 		// Depending on the area cache type and the wiring, we may not need to
5709 		// look at the individual pages.
5710 		if (area->cache_type == CACHE_TYPE_NULL
5711 			|| area->cache_type == CACHE_TYPE_DEVICE
5712 			|| area->wiring == B_FULL_LOCK
5713 			|| area->wiring == B_CONTIGUOUS) {
5714 			nextAddress = areaEnd;
5715 			continue;
5716 		}
5717 
5718 		// Lock the area's cache chain and the translation map. Needed to look
5719 		// up pages and play with their wired count.
5720 		cacheChainLocker.LockAllSourceCaches();
5721 		map->Lock();
5722 
5723 		// iterate through the pages and wire them
5724 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5725 			phys_addr_t physicalAddress;
5726 			uint32 flags;
5727 
5728 			vm_page* page;
5729 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5730 				&& (flags & requiredProtection) == requiredProtection
5731 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5732 					!= NULL) {
5733 				// Already mapped with the correct permissions -- just increment
5734 				// the page's wired count.
5735 				increment_page_wired_count(page);
5736 			} else {
5737 				// Let vm_soft_fault() map the page for us, if possible. We need
5738 				// to fully unlock to avoid deadlocks. Since we have already
5739 				// wired the area itself, nothing disturbing will happen with it
5740 				// in the meantime.
5741 				map->Unlock();
5742 				cacheChainLocker.Unlock();
5743 				addressSpaceLocker.Unlock();
5744 
5745 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5746 					false, isUser, &page);
5747 
5748 				addressSpaceLocker.Lock();
5749 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5750 				cacheChainLocker.LockAllSourceCaches();
5751 				map->Lock();
5752 			}
5753 
5754 			if (error != B_OK)
5755 				break;
5756 		}
5757 
5758 		map->Unlock();
5759 
5760 		if (error == B_OK) {
5761 			cacheChainLocker.Unlock();
5762 		} else {
5763 			// An error occurred, so abort right here. If the current address
5764 			// is the first in this area, unwire the area, since we won't get
5765 			// to it when reverting what we've done so far.
5766 			if (nextAddress == areaStart) {
5767 				area->Unwire(range);
5768 				cacheChainLocker.Unlock();
5769 				range->~VMAreaWiredRange();
5770 				free_etc(range, mallocFlags);
5771 			} else
5772 				cacheChainLocker.Unlock();
5773 
5774 			break;
5775 		}
5776 	}
5777 
5778 	if (error != B_OK) {
5779 		// An error occurred, so unwire all that we've already wired. Note that
5780 		// even if not a single page was wired, unlock_memory_etc() is called
5781 		// to put the address space reference.
5782 		addressSpaceLocker.Unlock();
5783 		unlock_memory_etc(team, (void*)lockBaseAddress,
5784 			nextAddress - lockBaseAddress, flags);
5785 	}
5786 
5787 	return error;
5788 }
5789 
5790 
5791 status_t
5792 lock_memory(void* address, size_t numBytes, uint32 flags)
5793 {
5794 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5795 }
5796 
5797 
5798 /*!	Unwires an address range previously wired with lock_memory_etc().
5799 
5800 	Note that a call to this function must balance a previous lock_memory_etc()
5801 	call with exactly the same parameters.
5802 */
5803 status_t
5804 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5805 {
5806 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5807 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5808 
5809 	// compute the page protection that is required
5810 	bool isUser = IS_USER_ADDRESS(address);
5811 	bool writable = (flags & B_READ_DEVICE) == 0;
5812 	uint32 requiredProtection = PAGE_PRESENT
5813 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5814 	if (writable)
5815 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5816 
5817 	uint32 mallocFlags = isUser
5818 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5819 
5820 	// get and read lock the address space
5821 	VMAddressSpace* addressSpace = NULL;
5822 	if (isUser) {
5823 		if (team == B_CURRENT_TEAM)
5824 			addressSpace = VMAddressSpace::GetCurrent();
5825 		else
5826 			addressSpace = VMAddressSpace::Get(team);
5827 	} else
5828 		addressSpace = VMAddressSpace::GetKernel();
5829 	if (addressSpace == NULL)
5830 		return B_ERROR;
5831 
5832 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5833 		// Take over the address space reference. We don't unlock until we're
5834 		// done.
5835 
5836 	VMTranslationMap* map = addressSpace->TranslationMap();
5837 	status_t error = B_OK;
5838 
5839 	// iterate through all concerned areas
5840 	addr_t nextAddress = lockBaseAddress;
5841 	while (nextAddress != lockEndAddress) {
5842 		// get the next area
5843 		VMArea* area = addressSpace->LookupArea(nextAddress);
5844 		if (area == NULL) {
5845 			error = B_BAD_ADDRESS;
5846 			break;
5847 		}
5848 
5849 		addr_t areaStart = nextAddress;
5850 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5851 
5852 		// Lock the area's top cache. This is a requirement for
5853 		// VMArea::Unwire().
5854 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5855 
5856 		// Depending on the area cache type and the wiring, we may not need to
5857 		// look at the individual pages.
5858 		if (area->cache_type == CACHE_TYPE_NULL
5859 			|| area->cache_type == CACHE_TYPE_DEVICE
5860 			|| area->wiring == B_FULL_LOCK
5861 			|| area->wiring == B_CONTIGUOUS) {
5862 			// unwire the range (to avoid deadlocks we delete the range after
5863 			// unlocking the cache)
5864 			nextAddress = areaEnd;
5865 			VMAreaWiredRange* range = area->Unwire(areaStart,
5866 				areaEnd - areaStart, writable);
5867 			cacheChainLocker.Unlock();
5868 			if (range != NULL) {
5869 				range->~VMAreaWiredRange();
5870 				free_etc(range, mallocFlags);
5871 			}
5872 			continue;
5873 		}
5874 
5875 		// Lock the area's cache chain and the translation map. Needed to look
5876 		// up pages and play with their wired count.
5877 		cacheChainLocker.LockAllSourceCaches();
5878 		map->Lock();
5879 
5880 		// iterate through the pages and unwire them
5881 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5882 			phys_addr_t physicalAddress;
5883 			uint32 flags;
5884 
5885 			vm_page* page;
5886 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5887 				&& (flags & PAGE_PRESENT) != 0
5888 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5889 					!= NULL) {
5890 				// Already mapped with the correct permissions -- just increment
5891 				// the page's wired count.
5892 				decrement_page_wired_count(page);
5893 			} else {
5894 				panic("unlock_memory_etc(): Failed to unwire page: address "
5895 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5896 					nextAddress);
5897 				error = B_BAD_VALUE;
5898 				break;
5899 			}
5900 		}
5901 
5902 		map->Unlock();
5903 
5904 		// All pages are unwired. Remove the area's wired range as well (to
5905 		// avoid deadlocks we delete the range after unlocking the cache).
5906 		VMAreaWiredRange* range = area->Unwire(areaStart,
5907 			areaEnd - areaStart, writable);
5908 
5909 		cacheChainLocker.Unlock();
5910 
5911 		if (range != NULL) {
5912 			range->~VMAreaWiredRange();
5913 			free_etc(range, mallocFlags);
5914 		}
5915 
5916 		if (error != B_OK)
5917 			break;
5918 	}
5919 
5920 	// get rid of the address space reference lock_memory_etc() acquired
5921 	addressSpace->Put();
5922 
5923 	return error;
5924 }
5925 
5926 
5927 status_t
5928 unlock_memory(void* address, size_t numBytes, uint32 flags)
5929 {
5930 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5931 }
5932 
5933 
5934 /*!	Similar to get_memory_map(), but also allows to specify the address space
5935 	for the memory in question and has a saner semantics.
5936 	Returns \c B_OK when the complete range could be translated or
5937 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5938 	case the actual number of entries is written to \c *_numEntries. Any other
5939 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5940 	in this case.
5941 */
5942 status_t
5943 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5944 	physical_entry* table, uint32* _numEntries)
5945 {
5946 	uint32 numEntries = *_numEntries;
5947 	*_numEntries = 0;
5948 
5949 	VMAddressSpace* addressSpace;
5950 	addr_t virtualAddress = (addr_t)address;
5951 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5952 	phys_addr_t physicalAddress;
5953 	status_t status = B_OK;
5954 	int32 index = -1;
5955 	addr_t offset = 0;
5956 	bool interrupts = are_interrupts_enabled();
5957 
5958 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5959 		"entries)\n", team, address, numBytes, numEntries));
5960 
5961 	if (numEntries == 0 || numBytes == 0)
5962 		return B_BAD_VALUE;
5963 
5964 	// in which address space is the address to be found?
5965 	if (IS_USER_ADDRESS(virtualAddress)) {
5966 		if (team == B_CURRENT_TEAM)
5967 			addressSpace = VMAddressSpace::GetCurrent();
5968 		else
5969 			addressSpace = VMAddressSpace::Get(team);
5970 	} else
5971 		addressSpace = VMAddressSpace::GetKernel();
5972 
5973 	if (addressSpace == NULL)
5974 		return B_ERROR;
5975 
5976 	VMTranslationMap* map = addressSpace->TranslationMap();
5977 
5978 	if (interrupts)
5979 		map->Lock();
5980 
5981 	while (offset < numBytes) {
5982 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5983 		uint32 flags;
5984 
5985 		if (interrupts) {
5986 			status = map->Query((addr_t)address + offset, &physicalAddress,
5987 				&flags);
5988 		} else {
5989 			status = map->QueryInterrupt((addr_t)address + offset,
5990 				&physicalAddress, &flags);
5991 		}
5992 		if (status < B_OK)
5993 			break;
5994 		if ((flags & PAGE_PRESENT) == 0) {
5995 			panic("get_memory_map() called on unmapped memory!");
5996 			return B_BAD_ADDRESS;
5997 		}
5998 
5999 		if (index < 0 && pageOffset > 0) {
6000 			physicalAddress += pageOffset;
6001 			if (bytes > B_PAGE_SIZE - pageOffset)
6002 				bytes = B_PAGE_SIZE - pageOffset;
6003 		}
6004 
6005 		// need to switch to the next physical_entry?
6006 		if (index < 0 || table[index].address
6007 				!= physicalAddress - table[index].size) {
6008 			if ((uint32)++index + 1 > numEntries) {
6009 				// table to small
6010 				break;
6011 			}
6012 			table[index].address = physicalAddress;
6013 			table[index].size = bytes;
6014 		} else {
6015 			// page does fit in current entry
6016 			table[index].size += bytes;
6017 		}
6018 
6019 		offset += bytes;
6020 	}
6021 
6022 	if (interrupts)
6023 		map->Unlock();
6024 
6025 	if (status != B_OK)
6026 		return status;
6027 
6028 	if ((uint32)index + 1 > numEntries) {
6029 		*_numEntries = index;
6030 		return B_BUFFER_OVERFLOW;
6031 	}
6032 
6033 	*_numEntries = index + 1;
6034 	return B_OK;
6035 }
6036 
6037 
6038 /*!	According to the BeBook, this function should always succeed.
6039 	This is no longer the case.
6040 */
6041 extern "C" int32
6042 __get_memory_map_haiku(const void* address, size_t numBytes,
6043 	physical_entry* table, int32 numEntries)
6044 {
6045 	uint32 entriesRead = numEntries;
6046 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6047 		table, &entriesRead);
6048 	if (error != B_OK)
6049 		return error;
6050 
6051 	// close the entry list
6052 
6053 	// if it's only one entry, we will silently accept the missing ending
6054 	if (numEntries == 1)
6055 		return B_OK;
6056 
6057 	if (entriesRead + 1 > (uint32)numEntries)
6058 		return B_BUFFER_OVERFLOW;
6059 
6060 	table[entriesRead].address = 0;
6061 	table[entriesRead].size = 0;
6062 
6063 	return B_OK;
6064 }
6065 
6066 
6067 area_id
6068 area_for(void* address)
6069 {
6070 	return vm_area_for((addr_t)address, true);
6071 }
6072 
6073 
6074 area_id
6075 find_area(const char* name)
6076 {
6077 	return VMAreaHash::Find(name);
6078 }
6079 
6080 
6081 status_t
6082 _get_area_info(area_id id, area_info* info, size_t size)
6083 {
6084 	if (size != sizeof(area_info) || info == NULL)
6085 		return B_BAD_VALUE;
6086 
6087 	AddressSpaceReadLocker locker;
6088 	VMArea* area;
6089 	status_t status = locker.SetFromArea(id, area);
6090 	if (status != B_OK)
6091 		return status;
6092 
6093 	fill_area_info(area, info, size);
6094 	return B_OK;
6095 }
6096 
6097 
6098 status_t
6099 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6100 {
6101 	addr_t nextBase = *(addr_t*)cookie;
6102 
6103 	// we're already through the list
6104 	if (nextBase == (addr_t)-1)
6105 		return B_ENTRY_NOT_FOUND;
6106 
6107 	if (team == B_CURRENT_TEAM)
6108 		team = team_get_current_team_id();
6109 
6110 	AddressSpaceReadLocker locker(team);
6111 	if (!locker.IsLocked())
6112 		return B_BAD_TEAM_ID;
6113 
6114 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6115 	if (area == NULL) {
6116 		nextBase = (addr_t)-1;
6117 		return B_ENTRY_NOT_FOUND;
6118 	}
6119 
6120 	fill_area_info(area, info, size);
6121 	*cookie = (ssize_t)(area->Base() + 1);
6122 
6123 	return B_OK;
6124 }
6125 
6126 
6127 status_t
6128 set_area_protection(area_id area, uint32 newProtection)
6129 {
6130 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6131 		newProtection, true);
6132 }
6133 
6134 
6135 status_t
6136 resize_area(area_id areaID, size_t newSize)
6137 {
6138 	return vm_resize_area(areaID, newSize, true);
6139 }
6140 
6141 
6142 /*!	Transfers the specified area to a new team. The caller must be the owner
6143 	of the area.
6144 */
6145 area_id
6146 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6147 	bool kernel)
6148 {
6149 	area_info info;
6150 	status_t status = get_area_info(id, &info);
6151 	if (status != B_OK)
6152 		return status;
6153 
6154 	if (info.team != thread_get_current_thread()->team->id)
6155 		return B_PERMISSION_DENIED;
6156 
6157 	// We need to mark the area cloneable so the following operations work.
6158 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6159 	if (status != B_OK)
6160 		return status;
6161 
6162 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6163 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6164 	if (clonedArea < 0)
6165 		return clonedArea;
6166 
6167 	status = vm_delete_area(info.team, id, kernel);
6168 	if (status != B_OK) {
6169 		vm_delete_area(target, clonedArea, kernel);
6170 		return status;
6171 	}
6172 
6173 	// Now we can reset the protection to whatever it was before.
6174 	set_area_protection(clonedArea, info.protection);
6175 
6176 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6177 
6178 	return clonedArea;
6179 }
6180 
6181 
6182 extern "C" area_id
6183 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6184 	size_t numBytes, uint32 addressSpec, uint32 protection,
6185 	void** _virtualAddress)
6186 {
6187 	if (!arch_vm_supports_protection(protection))
6188 		return B_NOT_SUPPORTED;
6189 
6190 	fix_protection(&protection);
6191 
6192 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6193 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6194 		false);
6195 }
6196 
6197 
6198 area_id
6199 clone_area(const char* name, void** _address, uint32 addressSpec,
6200 	uint32 protection, area_id source)
6201 {
6202 	if ((protection & B_KERNEL_PROTECTION) == 0)
6203 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6204 
6205 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6206 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6207 }
6208 
6209 
6210 area_id
6211 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6212 	uint32 protection, uint32 flags, uint32 guardSize,
6213 	const virtual_address_restrictions* virtualAddressRestrictions,
6214 	const physical_address_restrictions* physicalAddressRestrictions,
6215 	void** _address)
6216 {
6217 	fix_protection(&protection);
6218 
6219 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6220 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6221 		true, _address);
6222 }
6223 
6224 
6225 extern "C" area_id
6226 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6227 	size_t size, uint32 lock, uint32 protection)
6228 {
6229 	fix_protection(&protection);
6230 
6231 	virtual_address_restrictions virtualRestrictions = {};
6232 	virtualRestrictions.address = *_address;
6233 	virtualRestrictions.address_specification = addressSpec;
6234 	physical_address_restrictions physicalRestrictions = {};
6235 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6236 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6237 		true, _address);
6238 }
6239 
6240 
6241 status_t
6242 delete_area(area_id area)
6243 {
6244 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6245 }
6246 
6247 
6248 //	#pragma mark - Userland syscalls
6249 
6250 
6251 status_t
6252 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6253 	addr_t size)
6254 {
6255 	// filter out some unavailable values (for userland)
6256 	switch (addressSpec) {
6257 		case B_ANY_KERNEL_ADDRESS:
6258 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6259 			return B_BAD_VALUE;
6260 	}
6261 
6262 	addr_t address;
6263 
6264 	if (!IS_USER_ADDRESS(userAddress)
6265 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6266 		return B_BAD_ADDRESS;
6267 
6268 	status_t status = vm_reserve_address_range(
6269 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6270 		RESERVED_AVOID_BASE);
6271 	if (status != B_OK)
6272 		return status;
6273 
6274 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6275 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6276 			(void*)address, size);
6277 		return B_BAD_ADDRESS;
6278 	}
6279 
6280 	return B_OK;
6281 }
6282 
6283 
6284 status_t
6285 _user_unreserve_address_range(addr_t address, addr_t size)
6286 {
6287 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6288 		(void*)address, size);
6289 }
6290 
6291 
6292 area_id
6293 _user_area_for(void* address)
6294 {
6295 	return vm_area_for((addr_t)address, false);
6296 }
6297 
6298 
6299 area_id
6300 _user_find_area(const char* userName)
6301 {
6302 	char name[B_OS_NAME_LENGTH];
6303 
6304 	if (!IS_USER_ADDRESS(userName)
6305 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6306 		return B_BAD_ADDRESS;
6307 
6308 	return find_area(name);
6309 }
6310 
6311 
6312 status_t
6313 _user_get_area_info(area_id area, area_info* userInfo)
6314 {
6315 	if (!IS_USER_ADDRESS(userInfo))
6316 		return B_BAD_ADDRESS;
6317 
6318 	area_info info;
6319 	status_t status = get_area_info(area, &info);
6320 	if (status < B_OK)
6321 		return status;
6322 
6323 	// TODO: do we want to prevent userland from seeing kernel protections?
6324 	//info.protection &= B_USER_PROTECTION;
6325 
6326 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6327 		return B_BAD_ADDRESS;
6328 
6329 	return status;
6330 }
6331 
6332 
6333 status_t
6334 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6335 {
6336 	ssize_t cookie;
6337 
6338 	if (!IS_USER_ADDRESS(userCookie)
6339 		|| !IS_USER_ADDRESS(userInfo)
6340 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6341 		return B_BAD_ADDRESS;
6342 
6343 	area_info info;
6344 	status_t status = _get_next_area_info(team, &cookie, &info,
6345 		sizeof(area_info));
6346 	if (status != B_OK)
6347 		return status;
6348 
6349 	//info.protection &= B_USER_PROTECTION;
6350 
6351 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6352 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6353 		return B_BAD_ADDRESS;
6354 
6355 	return status;
6356 }
6357 
6358 
6359 status_t
6360 _user_set_area_protection(area_id area, uint32 newProtection)
6361 {
6362 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6363 		return B_BAD_VALUE;
6364 
6365 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6366 		newProtection, false);
6367 }
6368 
6369 
6370 status_t
6371 _user_resize_area(area_id area, size_t newSize)
6372 {
6373 	// TODO: Since we restrict deleting of areas to those owned by the team,
6374 	// we should also do that for resizing (check other functions, too).
6375 	return vm_resize_area(area, newSize, false);
6376 }
6377 
6378 
6379 area_id
6380 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6381 	team_id target)
6382 {
6383 	// filter out some unavailable values (for userland)
6384 	switch (addressSpec) {
6385 		case B_ANY_KERNEL_ADDRESS:
6386 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6387 			return B_BAD_VALUE;
6388 	}
6389 
6390 	void* address;
6391 	if (!IS_USER_ADDRESS(userAddress)
6392 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6393 		return B_BAD_ADDRESS;
6394 
6395 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6396 	if (newArea < B_OK)
6397 		return newArea;
6398 
6399 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6400 		return B_BAD_ADDRESS;
6401 
6402 	return newArea;
6403 }
6404 
6405 
6406 area_id
6407 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6408 	uint32 protection, area_id sourceArea)
6409 {
6410 	char name[B_OS_NAME_LENGTH];
6411 	void* address;
6412 
6413 	// filter out some unavailable values (for userland)
6414 	switch (addressSpec) {
6415 		case B_ANY_KERNEL_ADDRESS:
6416 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6417 			return B_BAD_VALUE;
6418 	}
6419 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6420 		return B_BAD_VALUE;
6421 
6422 	if (!IS_USER_ADDRESS(userName)
6423 		|| !IS_USER_ADDRESS(userAddress)
6424 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6425 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6426 		return B_BAD_ADDRESS;
6427 
6428 	fix_protection(&protection);
6429 
6430 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6431 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6432 		false);
6433 	if (clonedArea < B_OK)
6434 		return clonedArea;
6435 
6436 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6437 		delete_area(clonedArea);
6438 		return B_BAD_ADDRESS;
6439 	}
6440 
6441 	return clonedArea;
6442 }
6443 
6444 
6445 area_id
6446 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6447 	size_t size, uint32 lock, uint32 protection)
6448 {
6449 	char name[B_OS_NAME_LENGTH];
6450 	void* address;
6451 
6452 	// filter out some unavailable values (for userland)
6453 	switch (addressSpec) {
6454 		case B_ANY_KERNEL_ADDRESS:
6455 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6456 			return B_BAD_VALUE;
6457 	}
6458 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6459 		return B_BAD_VALUE;
6460 
6461 	if (!IS_USER_ADDRESS(userName)
6462 		|| !IS_USER_ADDRESS(userAddress)
6463 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6464 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6465 		return B_BAD_ADDRESS;
6466 
6467 	if (addressSpec == B_EXACT_ADDRESS
6468 		&& IS_KERNEL_ADDRESS(address))
6469 		return B_BAD_VALUE;
6470 
6471 	if (addressSpec == B_ANY_ADDRESS)
6472 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6473 	if (addressSpec == B_BASE_ADDRESS)
6474 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6475 
6476 	fix_protection(&protection);
6477 
6478 	virtual_address_restrictions virtualRestrictions = {};
6479 	virtualRestrictions.address = address;
6480 	virtualRestrictions.address_specification = addressSpec;
6481 	physical_address_restrictions physicalRestrictions = {};
6482 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6483 		size, lock, protection, 0, 0, &virtualRestrictions,
6484 		&physicalRestrictions, false, &address);
6485 
6486 	if (area >= B_OK
6487 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6488 		delete_area(area);
6489 		return B_BAD_ADDRESS;
6490 	}
6491 
6492 	return area;
6493 }
6494 
6495 
6496 status_t
6497 _user_delete_area(area_id area)
6498 {
6499 	// Unlike the BeOS implementation, you can now only delete areas
6500 	// that you have created yourself from userland.
6501 	// The documentation to delete_area() explicitly states that this
6502 	// will be restricted in the future, and so it will.
6503 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6504 }
6505 
6506 
6507 // TODO: create a BeOS style call for this!
6508 
6509 area_id
6510 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6511 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6512 	int fd, off_t offset)
6513 {
6514 	char name[B_OS_NAME_LENGTH];
6515 	void* address;
6516 	area_id area;
6517 
6518 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6519 		return B_BAD_VALUE;
6520 
6521 	fix_protection(&protection);
6522 
6523 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6524 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6525 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6526 		return B_BAD_ADDRESS;
6527 
6528 	if (addressSpec == B_EXACT_ADDRESS) {
6529 		if ((addr_t)address + size < (addr_t)address
6530 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6531 			return B_BAD_VALUE;
6532 		}
6533 		if (!IS_USER_ADDRESS(address)
6534 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6535 			return B_BAD_ADDRESS;
6536 		}
6537 	}
6538 
6539 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6540 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6541 		false);
6542 	if (area < B_OK)
6543 		return area;
6544 
6545 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6546 		return B_BAD_ADDRESS;
6547 
6548 	return area;
6549 }
6550 
6551 
6552 status_t
6553 _user_unmap_memory(void* _address, size_t size)
6554 {
6555 	addr_t address = (addr_t)_address;
6556 
6557 	// check params
6558 	if (size == 0 || (addr_t)address + size < (addr_t)address
6559 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6560 		return B_BAD_VALUE;
6561 	}
6562 
6563 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6564 		return B_BAD_ADDRESS;
6565 
6566 	// Write lock the address space and ensure the address range is not wired.
6567 	AddressSpaceWriteLocker locker;
6568 	do {
6569 		status_t status = locker.SetTo(team_get_current_team_id());
6570 		if (status != B_OK)
6571 			return status;
6572 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6573 			size, &locker));
6574 
6575 	// unmap
6576 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6577 }
6578 
6579 
6580 status_t
6581 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6582 {
6583 	// check address range
6584 	addr_t address = (addr_t)_address;
6585 	size = PAGE_ALIGN(size);
6586 
6587 	if ((address % B_PAGE_SIZE) != 0)
6588 		return B_BAD_VALUE;
6589 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6590 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6591 		// weird error code required by POSIX
6592 		return ENOMEM;
6593 	}
6594 
6595 	// extend and check protection
6596 	if ((protection & ~B_USER_PROTECTION) != 0)
6597 		return B_BAD_VALUE;
6598 
6599 	fix_protection(&protection);
6600 
6601 	// We need to write lock the address space, since we're going to play with
6602 	// the areas. Also make sure that none of the areas is wired and that we're
6603 	// actually allowed to change the protection.
6604 	AddressSpaceWriteLocker locker;
6605 
6606 	bool restart;
6607 	do {
6608 		restart = false;
6609 
6610 		status_t status = locker.SetTo(team_get_current_team_id());
6611 		if (status != B_OK)
6612 			return status;
6613 
6614 		// First round: Check whether the whole range is covered by areas and we
6615 		// are allowed to modify them.
6616 		addr_t currentAddress = address;
6617 		size_t sizeLeft = size;
6618 		while (sizeLeft > 0) {
6619 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6620 			if (area == NULL)
6621 				return B_NO_MEMORY;
6622 
6623 			if ((area->protection & B_KERNEL_AREA) != 0)
6624 				return B_NOT_ALLOWED;
6625 
6626 			// TODO: For (shared) mapped files we should check whether the new
6627 			// protections are compatible with the file permissions. We don't
6628 			// have a way to do that yet, though.
6629 
6630 			addr_t offset = currentAddress - area->Base();
6631 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6632 
6633 			AreaCacheLocker cacheLocker(area);
6634 
6635 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6636 					&locker, &cacheLocker)) {
6637 				restart = true;
6638 				break;
6639 			}
6640 
6641 			cacheLocker.Unlock();
6642 
6643 			currentAddress += rangeSize;
6644 			sizeLeft -= rangeSize;
6645 		}
6646 	} while (restart);
6647 
6648 	// Second round: If the protections differ from that of the area, create a
6649 	// page protection array and re-map mapped pages.
6650 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6651 	addr_t currentAddress = address;
6652 	size_t sizeLeft = size;
6653 	while (sizeLeft > 0) {
6654 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6655 		if (area == NULL)
6656 			return B_NO_MEMORY;
6657 
6658 		addr_t offset = currentAddress - area->Base();
6659 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6660 
6661 		currentAddress += rangeSize;
6662 		sizeLeft -= rangeSize;
6663 
6664 		if (area->page_protections == NULL) {
6665 			if (area->protection == protection)
6666 				continue;
6667 
6668 			status_t status = allocate_area_page_protections(area);
6669 			if (status != B_OK)
6670 				return status;
6671 		}
6672 
6673 		// We need to lock the complete cache chain, since we potentially unmap
6674 		// pages of lower caches.
6675 		VMCache* topCache = vm_area_get_locked_cache(area);
6676 		VMCacheChainLocker cacheChainLocker(topCache);
6677 		cacheChainLocker.LockAllSourceCaches();
6678 
6679 		for (addr_t pageAddress = area->Base() + offset;
6680 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6681 			map->Lock();
6682 
6683 			set_area_page_protection(area, pageAddress, protection);
6684 
6685 			phys_addr_t physicalAddress;
6686 			uint32 flags;
6687 
6688 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6689 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6690 				map->Unlock();
6691 				continue;
6692 			}
6693 
6694 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6695 			if (page == NULL) {
6696 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6697 					"\n", area, physicalAddress);
6698 				map->Unlock();
6699 				return B_ERROR;
6700 			}
6701 
6702 			// If the page is not in the topmost cache and write access is
6703 			// requested, we have to unmap it. Otherwise we can re-map it with
6704 			// the new protection.
6705 			bool unmapPage = page->Cache() != topCache
6706 				&& (protection & B_WRITE_AREA) != 0;
6707 
6708 			if (!unmapPage)
6709 				map->ProtectPage(area, pageAddress, protection);
6710 
6711 			map->Unlock();
6712 
6713 			if (unmapPage) {
6714 				DEBUG_PAGE_ACCESS_START(page);
6715 				unmap_page(area, pageAddress);
6716 				DEBUG_PAGE_ACCESS_END(page);
6717 			}
6718 		}
6719 	}
6720 
6721 	return B_OK;
6722 }
6723 
6724 
6725 status_t
6726 _user_sync_memory(void* _address, size_t size, uint32 flags)
6727 {
6728 	addr_t address = (addr_t)_address;
6729 	size = PAGE_ALIGN(size);
6730 
6731 	// check params
6732 	if ((address % B_PAGE_SIZE) != 0)
6733 		return B_BAD_VALUE;
6734 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6735 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6736 		// weird error code required by POSIX
6737 		return ENOMEM;
6738 	}
6739 
6740 	bool writeSync = (flags & MS_SYNC) != 0;
6741 	bool writeAsync = (flags & MS_ASYNC) != 0;
6742 	if (writeSync && writeAsync)
6743 		return B_BAD_VALUE;
6744 
6745 	if (size == 0 || (!writeSync && !writeAsync))
6746 		return B_OK;
6747 
6748 	// iterate through the range and sync all concerned areas
6749 	while (size > 0) {
6750 		// read lock the address space
6751 		AddressSpaceReadLocker locker;
6752 		status_t error = locker.SetTo(team_get_current_team_id());
6753 		if (error != B_OK)
6754 			return error;
6755 
6756 		// get the first area
6757 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6758 		if (area == NULL)
6759 			return B_NO_MEMORY;
6760 
6761 		uint32 offset = address - area->Base();
6762 		size_t rangeSize = min_c(area->Size() - offset, size);
6763 		offset += area->cache_offset;
6764 
6765 		// lock the cache
6766 		AreaCacheLocker cacheLocker(area);
6767 		if (!cacheLocker)
6768 			return B_BAD_VALUE;
6769 		VMCache* cache = area->cache;
6770 
6771 		locker.Unlock();
6772 
6773 		uint32 firstPage = offset >> PAGE_SHIFT;
6774 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6775 
6776 		// write the pages
6777 		if (cache->type == CACHE_TYPE_VNODE) {
6778 			if (writeSync) {
6779 				// synchronous
6780 				error = vm_page_write_modified_page_range(cache, firstPage,
6781 					endPage);
6782 				if (error != B_OK)
6783 					return error;
6784 			} else {
6785 				// asynchronous
6786 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6787 				// TODO: This is probably not quite what is supposed to happen.
6788 				// Especially when a lot has to be written, it might take ages
6789 				// until it really hits the disk.
6790 			}
6791 		}
6792 
6793 		address += rangeSize;
6794 		size -= rangeSize;
6795 	}
6796 
6797 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6798 	// synchronize multiple mappings of the same file. In our VM they never get
6799 	// out of sync, though, so we don't have to do anything.
6800 
6801 	return B_OK;
6802 }
6803 
6804 
6805 status_t
6806 _user_memory_advice(void* _address, size_t size, uint32 advice)
6807 {
6808 	addr_t address = (addr_t)_address;
6809 	if ((address % B_PAGE_SIZE) != 0)
6810 		return B_BAD_VALUE;
6811 
6812 	size = PAGE_ALIGN(size);
6813 	if (address + size < address || !IS_USER_ADDRESS(address)
6814 		|| !IS_USER_ADDRESS(address + size)) {
6815 		// weird error code required by POSIX
6816 		return B_NO_MEMORY;
6817 	}
6818 
6819 	switch (advice) {
6820 		case MADV_NORMAL:
6821 		case MADV_SEQUENTIAL:
6822 		case MADV_RANDOM:
6823 		case MADV_WILLNEED:
6824 		case MADV_DONTNEED:
6825 			// TODO: Implement!
6826 			break;
6827 
6828 		case MADV_FREE:
6829 		{
6830 			AddressSpaceWriteLocker locker;
6831 			do {
6832 				status_t status = locker.SetTo(team_get_current_team_id());
6833 				if (status != B_OK)
6834 					return status;
6835 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6836 					address, size, &locker));
6837 
6838 			discard_address_range(locker.AddressSpace(), address, size, false);
6839 			break;
6840 		}
6841 
6842 		default:
6843 			return B_BAD_VALUE;
6844 	}
6845 
6846 	return B_OK;
6847 }
6848 
6849 
6850 status_t
6851 _user_get_memory_properties(team_id teamID, const void* address,
6852 	uint32* _protected, uint32* _lock)
6853 {
6854 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6855 		return B_BAD_ADDRESS;
6856 
6857 	AddressSpaceReadLocker locker;
6858 	status_t error = locker.SetTo(teamID);
6859 	if (error != B_OK)
6860 		return error;
6861 
6862 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6863 	if (area == NULL)
6864 		return B_NO_MEMORY;
6865 
6866 
6867 	uint32 protection = area->protection;
6868 	if (area->page_protections != NULL)
6869 		protection = get_area_page_protection(area, (addr_t)address);
6870 
6871 	uint32 wiring = area->wiring;
6872 
6873 	locker.Unlock();
6874 
6875 	error = user_memcpy(_protected, &protection, sizeof(protection));
6876 	if (error != B_OK)
6877 		return error;
6878 
6879 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6880 
6881 	return error;
6882 }
6883 
6884 
6885 // #pragma mark -- compatibility
6886 
6887 
6888 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6889 
6890 
6891 struct physical_entry_beos {
6892 	uint32	address;
6893 	uint32	size;
6894 };
6895 
6896 
6897 /*!	The physical_entry structure has changed. We need to translate it to the
6898 	old one.
6899 */
6900 extern "C" int32
6901 __get_memory_map_beos(const void* _address, size_t numBytes,
6902 	physical_entry_beos* table, int32 numEntries)
6903 {
6904 	if (numEntries <= 0)
6905 		return B_BAD_VALUE;
6906 
6907 	const uint8* address = (const uint8*)_address;
6908 
6909 	int32 count = 0;
6910 	while (numBytes > 0 && count < numEntries) {
6911 		physical_entry entry;
6912 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6913 		if (result < 0) {
6914 			if (result != B_BUFFER_OVERFLOW)
6915 				return result;
6916 		}
6917 
6918 		if (entry.address >= (phys_addr_t)1 << 32) {
6919 			panic("get_memory_map(): Address is greater 4 GB!");
6920 			return B_ERROR;
6921 		}
6922 
6923 		table[count].address = entry.address;
6924 		table[count++].size = entry.size;
6925 
6926 		address += entry.size;
6927 		numBytes -= entry.size;
6928 	}
6929 
6930 	// null-terminate the table, if possible
6931 	if (count < numEntries) {
6932 		table[count].address = 0;
6933 		table[count].size = 0;
6934 	}
6935 
6936 	return B_OK;
6937 }
6938 
6939 
6940 /*!	The type of the \a physicalAddress parameter has changed from void* to
6941 	phys_addr_t.
6942 */
6943 extern "C" area_id
6944 __map_physical_memory_beos(const char* name, void* physicalAddress,
6945 	size_t numBytes, uint32 addressSpec, uint32 protection,
6946 	void** _virtualAddress)
6947 {
6948 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6949 		addressSpec, protection, _virtualAddress);
6950 }
6951 
6952 
6953 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6954 	we meddle with the \a lock parameter to force 32 bit.
6955 */
6956 extern "C" area_id
6957 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6958 	size_t size, uint32 lock, uint32 protection)
6959 {
6960 	switch (lock) {
6961 		case B_NO_LOCK:
6962 			break;
6963 		case B_FULL_LOCK:
6964 		case B_LAZY_LOCK:
6965 			lock = B_32_BIT_FULL_LOCK;
6966 			break;
6967 		case B_CONTIGUOUS:
6968 			lock = B_32_BIT_CONTIGUOUS;
6969 			break;
6970 	}
6971 
6972 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6973 		protection);
6974 }
6975 
6976 
6977 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6978 	"BASE");
6979 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6980 	"map_physical_memory@", "BASE");
6981 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6982 	"BASE");
6983 
6984 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6985 	"get_memory_map@@", "1_ALPHA3");
6986 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6987 	"map_physical_memory@@", "1_ALPHA3");
6988 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6989 	"1_ALPHA3");
6990 
6991 
6992 #else
6993 
6994 
6995 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6996 	"get_memory_map@@", "BASE");
6997 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6998 	"map_physical_memory@@", "BASE");
6999 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7000 	"BASE");
7001 
7002 
7003 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7004