xref: /haiku/src/system/kernel/vm/vm.cpp (revision 0d452c8f34013b611a54c746a71c05e28796eae2)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <boot/elf.h>
31 #include <boot/stage2.h>
32 #include <condition_variable.h>
33 #include <console.h>
34 #include <debug.h>
35 #include <file_cache.h>
36 #include <fs/fd.h>
37 #include <heap.h>
38 #include <kernel.h>
39 #include <int.h>
40 #include <lock.h>
41 #include <low_resource_manager.h>
42 #include <slab/Slab.h>
43 #include <smp.h>
44 #include <system_info.h>
45 #include <thread.h>
46 #include <team.h>
47 #include <tracing.h>
48 #include <util/AutoLock.h>
49 #include <util/khash.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "IORequest.h"
59 
60 
61 //#define TRACE_VM
62 //#define TRACE_FAULTS
63 #ifdef TRACE_VM
64 #	define TRACE(x) dprintf x
65 #else
66 #	define TRACE(x) ;
67 #endif
68 #ifdef TRACE_FAULTS
69 #	define FTRACE(x) dprintf x
70 #else
71 #	define FTRACE(x) ;
72 #endif
73 
74 
75 class AreaCacheLocking {
76 public:
77 	inline bool Lock(VMCache* lockable)
78 	{
79 		return false;
80 	}
81 
82 	inline void Unlock(VMCache* lockable)
83 	{
84 		vm_area_put_locked_cache(lockable);
85 	}
86 };
87 
88 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
89 public:
90 	inline AreaCacheLocker(VMCache* cache = NULL)
91 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
92 	{
93 	}
94 
95 	inline AreaCacheLocker(VMArea* area)
96 		: AutoLocker<VMCache, AreaCacheLocking>()
97 	{
98 		SetTo(area);
99 	}
100 
101 	inline void SetTo(VMCache* cache, bool alreadyLocked)
102 	{
103 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
104 	}
105 
106 	inline void SetTo(VMArea* area)
107 	{
108 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
109 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
110 	}
111 };
112 
113 
114 class VMCacheChainLocker {
115 public:
116 	VMCacheChainLocker()
117 		:
118 		fTopCache(NULL),
119 		fBottomCache(NULL)
120 	{
121 	}
122 
123 	VMCacheChainLocker(VMCache* topCache)
124 		:
125 		fTopCache(topCache),
126 		fBottomCache(topCache)
127 	{
128 	}
129 
130 	~VMCacheChainLocker()
131 	{
132 		Unlock();
133 	}
134 
135 	void SetTo(VMCache* topCache)
136 	{
137 		fTopCache = topCache;
138 		fBottomCache = topCache;
139 
140 		if (topCache != NULL)
141 			topCache->SetUserData(NULL);
142 	}
143 
144 	VMCache* LockSourceCache()
145 	{
146 		if (fBottomCache == NULL || fBottomCache->source == NULL)
147 			return NULL;
148 
149 		VMCache* previousCache = fBottomCache;
150 
151 		fBottomCache = fBottomCache->source;
152 		fBottomCache->Lock();
153 		fBottomCache->AcquireRefLocked();
154 		fBottomCache->SetUserData(previousCache);
155 
156 		return fBottomCache;
157 	}
158 
159 	void LockAllSourceCaches()
160 	{
161 		while (LockSourceCache() != NULL) {
162 		}
163 	}
164 
165 	void Unlock(VMCache* exceptCache = NULL)
166 	{
167 		if (fTopCache == NULL)
168 			return;
169 
170 		// Unlock caches in source -> consumer direction. This is important to
171 		// avoid double-locking and a reversal of locking order in case a cache
172 		// is eligable for merging.
173 		VMCache* cache = fBottomCache;
174 		while (cache != NULL) {
175 			VMCache* nextCache = (VMCache*)cache->UserData();
176 			if (cache != exceptCache)
177 				cache->ReleaseRefAndUnlock(cache != fTopCache);
178 
179 			if (cache == fTopCache)
180 				break;
181 
182 			cache = nextCache;
183 		}
184 
185 		fTopCache = NULL;
186 		fBottomCache = NULL;
187 	}
188 
189 	void UnlockKeepRefs(bool keepTopCacheLocked)
190 	{
191 		if (fTopCache == NULL)
192 			return;
193 
194 		VMCache* nextCache = fBottomCache;
195 		VMCache* cache = NULL;
196 
197 		while (keepTopCacheLocked
198 				? nextCache != fTopCache : cache != fTopCache) {
199 			cache = nextCache;
200 			nextCache = (VMCache*)cache->UserData();
201 			cache->Unlock(cache != fTopCache);
202 		}
203 	}
204 
205 	void RelockCaches(bool topCacheLocked)
206 	{
207 		if (fTopCache == NULL)
208 			return;
209 
210 		VMCache* nextCache = fTopCache;
211 		VMCache* cache = NULL;
212 		if (topCacheLocked) {
213 			cache = nextCache;
214 			nextCache = cache->source;
215 		}
216 
217 		while (cache != fBottomCache && nextCache != NULL) {
218 			VMCache* consumer = cache;
219 			cache = nextCache;
220 			nextCache = cache->source;
221 			cache->Lock();
222 			cache->SetUserData(consumer);
223 		}
224 	}
225 
226 private:
227 	VMCache*	fTopCache;
228 	VMCache*	fBottomCache;
229 };
230 
231 
232 // The memory reserve an allocation of the certain priority must not touch.
233 static const size_t kMemoryReserveForPriority[] = {
234 	VM_MEMORY_RESERVE_USER,		// user
235 	VM_MEMORY_RESERVE_SYSTEM,	// system
236 	0							// VIP
237 };
238 
239 
240 ObjectCache* gPageMappingsObjectCache;
241 
242 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
243 
244 static off_t sAvailableMemory;
245 static off_t sNeededMemory;
246 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
247 static uint32 sPageFaults;
248 
249 static VMPhysicalPageMapper* sPhysicalPageMapper;
250 
251 #if DEBUG_CACHE_LIST
252 
253 struct cache_info {
254 	VMCache*	cache;
255 	addr_t		page_count;
256 	addr_t		committed;
257 };
258 
259 static const int kCacheInfoTableCount = 100 * 1024;
260 static cache_info* sCacheInfoTable;
261 
262 #endif	// DEBUG_CACHE_LIST
263 
264 
265 // function declarations
266 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
267 	bool addressSpaceCleanup);
268 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
269 	bool isWrite, bool isUser, vm_page** wirePage,
270 	VMAreaWiredRange* wiredRange = NULL);
271 static status_t map_backing_store(VMAddressSpace* addressSpace,
272 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
273 	int protection, int mapping, uint32 flags,
274 	const virtual_address_restrictions* addressRestrictions, bool kernel,
275 	VMArea** _area, void** _virtualAddress);
276 
277 
278 //	#pragma mark -
279 
280 
281 #if VM_PAGE_FAULT_TRACING
282 
283 namespace VMPageFaultTracing {
284 
285 class PageFaultStart : public AbstractTraceEntry {
286 public:
287 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
288 		:
289 		fAddress(address),
290 		fPC(pc),
291 		fWrite(write),
292 		fUser(user)
293 	{
294 		Initialized();
295 	}
296 
297 	virtual void AddDump(TraceOutput& out)
298 	{
299 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
300 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
301 	}
302 
303 private:
304 	addr_t	fAddress;
305 	addr_t	fPC;
306 	bool	fWrite;
307 	bool	fUser;
308 };
309 
310 
311 // page fault errors
312 enum {
313 	PAGE_FAULT_ERROR_NO_AREA		= 0,
314 	PAGE_FAULT_ERROR_KERNEL_ONLY,
315 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
316 	PAGE_FAULT_ERROR_READ_PROTECTED,
317 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
318 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
319 };
320 
321 
322 class PageFaultError : public AbstractTraceEntry {
323 public:
324 	PageFaultError(area_id area, status_t error)
325 		:
326 		fArea(area),
327 		fError(error)
328 	{
329 		Initialized();
330 	}
331 
332 	virtual void AddDump(TraceOutput& out)
333 	{
334 		switch (fError) {
335 			case PAGE_FAULT_ERROR_NO_AREA:
336 				out.Print("page fault error: no area");
337 				break;
338 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
339 				out.Print("page fault error: area: %ld, kernel only", fArea);
340 				break;
341 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
342 				out.Print("page fault error: area: %ld, write protected",
343 					fArea);
344 				break;
345 			case PAGE_FAULT_ERROR_READ_PROTECTED:
346 				out.Print("page fault error: area: %ld, read protected", fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
349 				out.Print("page fault error: kernel touching bad user memory");
350 				break;
351 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
352 				out.Print("page fault error: no address space");
353 				break;
354 			default:
355 				out.Print("page fault error: area: %ld, error: %s", fArea,
356 					strerror(fError));
357 				break;
358 		}
359 	}
360 
361 private:
362 	area_id		fArea;
363 	status_t	fError;
364 };
365 
366 
367 class PageFaultDone : public AbstractTraceEntry {
368 public:
369 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
370 			vm_page* page)
371 		:
372 		fArea(area),
373 		fTopCache(topCache),
374 		fCache(cache),
375 		fPage(page)
376 	{
377 		Initialized();
378 	}
379 
380 	virtual void AddDump(TraceOutput& out)
381 	{
382 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
383 			"page: %p", fArea, fTopCache, fCache, fPage);
384 	}
385 
386 private:
387 	area_id		fArea;
388 	VMCache*	fTopCache;
389 	VMCache*	fCache;
390 	vm_page*	fPage;
391 };
392 
393 }	// namespace VMPageFaultTracing
394 
395 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
396 #else
397 #	define TPF(x) ;
398 #endif	// VM_PAGE_FAULT_TRACING
399 
400 
401 //	#pragma mark -
402 
403 
404 /*!	The page's cache must be locked.
405 */
406 static inline void
407 increment_page_wired_count(vm_page* page)
408 {
409 	if (!page->IsMapped())
410 		atomic_add(&gMappedPagesCount, 1);
411 	page->IncrementWiredCount();
412 }
413 
414 
415 /*!	The page's cache must be locked.
416 */
417 static inline void
418 decrement_page_wired_count(vm_page* page)
419 {
420 	page->DecrementWiredCount();
421 	if (!page->IsMapped())
422 		atomic_add(&gMappedPagesCount, -1);
423 }
424 
425 
426 static inline addr_t
427 virtual_page_address(VMArea* area, vm_page* page)
428 {
429 	return area->Base()
430 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
431 }
432 
433 
434 //! You need to have the address space locked when calling this function
435 static VMArea*
436 lookup_area(VMAddressSpace* addressSpace, area_id id)
437 {
438 	VMAreaHash::ReadLock();
439 
440 	VMArea* area = VMAreaHash::LookupLocked(id);
441 	if (area != NULL && area->address_space != addressSpace)
442 		area = NULL;
443 
444 	VMAreaHash::ReadUnlock();
445 
446 	return area;
447 }
448 
449 
450 static inline void
451 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
452 {
453 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
454 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
455 	uint8& entry = area->page_protections[pageIndex / 2];
456 	if (pageIndex % 2 == 0)
457 		entry = (entry & 0xf0) | protection;
458 	else
459 		entry = (entry & 0x0f) | (protection << 4);
460 }
461 
462 
463 static inline uint32
464 get_area_page_protection(VMArea* area, addr_t pageAddress)
465 {
466 	if (area->page_protections == NULL)
467 		return area->protection;
468 
469 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
470 	uint32 protection = area->page_protections[pageIndex / 2];
471 	if (pageIndex % 2 == 0)
472 		protection &= 0x0f;
473 	else
474 		protection >>= 4;
475 
476 	return protection | B_KERNEL_READ_AREA
477 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
478 }
479 
480 
481 /*!	The caller must have reserved enough pages the translation map
482 	implementation might need to map this page.
483 	The page's cache must be locked.
484 */
485 static status_t
486 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
487 	vm_page_reservation* reservation)
488 {
489 	VMTranslationMap* map = area->address_space->TranslationMap();
490 
491 	bool wasMapped = page->IsMapped();
492 
493 	if (area->wiring == B_NO_LOCK) {
494 		DEBUG_PAGE_ACCESS_CHECK(page);
495 
496 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
497 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
498 			gPageMappingsObjectCache,
499 			CACHE_DONT_WAIT_FOR_MEMORY
500 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
501 		if (mapping == NULL)
502 			return B_NO_MEMORY;
503 
504 		mapping->page = page;
505 		mapping->area = area;
506 
507 		map->Lock();
508 
509 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
510 			area->MemoryType(), reservation);
511 
512 		// insert mapping into lists
513 		if (!page->IsMapped())
514 			atomic_add(&gMappedPagesCount, 1);
515 
516 		page->mappings.Add(mapping);
517 		area->mappings.Add(mapping);
518 
519 		map->Unlock();
520 	} else {
521 		DEBUG_PAGE_ACCESS_CHECK(page);
522 
523 		map->Lock();
524 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
525 			area->MemoryType(), reservation);
526 		map->Unlock();
527 
528 		increment_page_wired_count(page);
529 	}
530 
531 	if (!wasMapped) {
532 		// The page is mapped now, so we must not remain in the cached queue.
533 		// It also makes sense to move it from the inactive to the active, since
534 		// otherwise the page daemon wouldn't come to keep track of it (in idle
535 		// mode) -- if the page isn't touched, it will be deactivated after a
536 		// full iteration through the queue at the latest.
537 		if (page->State() == PAGE_STATE_CACHED
538 				|| page->State() == PAGE_STATE_INACTIVE) {
539 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
540 		}
541 	}
542 
543 	return B_OK;
544 }
545 
546 
547 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
548 	page's cache.
549 */
550 static inline bool
551 unmap_page(VMArea* area, addr_t virtualAddress)
552 {
553 	return area->address_space->TranslationMap()->UnmapPage(area,
554 		virtualAddress, true);
555 }
556 
557 
558 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
559 	mapped pages' caches.
560 */
561 static inline void
562 unmap_pages(VMArea* area, addr_t base, size_t size)
563 {
564 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
565 }
566 
567 
568 /*!	Cuts a piece out of an area. If the given cut range covers the complete
569 	area, it is deleted. If it covers the beginning or the end, the area is
570 	resized accordingly. If the range covers some part in the middle of the
571 	area, it is split in two; in this case the second area is returned via
572 	\a _secondArea (the variable is left untouched in the other cases).
573 	The address space must be write locked.
574 	The caller must ensure that no part of the given range is wired.
575 */
576 static status_t
577 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
578 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
579 {
580 	// Does the cut range intersect with the area at all?
581 	addr_t areaLast = area->Base() + (area->Size() - 1);
582 	if (area->Base() > lastAddress || areaLast < address)
583 		return B_OK;
584 
585 	// Is the area fully covered?
586 	if (area->Base() >= address && areaLast <= lastAddress) {
587 		delete_area(addressSpace, area, false);
588 		return B_OK;
589 	}
590 
591 	int priority;
592 	uint32 allocationFlags;
593 	if (addressSpace == VMAddressSpace::Kernel()) {
594 		priority = VM_PRIORITY_SYSTEM;
595 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
596 			| HEAP_DONT_LOCK_KERNEL_SPACE;
597 	} else {
598 		priority = VM_PRIORITY_USER;
599 		allocationFlags = 0;
600 	}
601 
602 	VMCache* cache = vm_area_get_locked_cache(area);
603 	VMCacheChainLocker cacheChainLocker(cache);
604 	cacheChainLocker.LockAllSourceCaches();
605 
606 	// Cut the end only?
607 	if (areaLast <= lastAddress) {
608 		size_t oldSize = area->Size();
609 		size_t newSize = address - area->Base();
610 
611 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
612 			allocationFlags);
613 		if (error != B_OK)
614 			return error;
615 
616 		// unmap pages
617 		unmap_pages(area, address, oldSize - newSize);
618 
619 		// If no one else uses the area's cache, we can resize it, too.
620 		if (cache->areas == area && area->cache_next == NULL
621 			&& cache->consumers.IsEmpty()
622 			&& cache->type == CACHE_TYPE_RAM) {
623 			// Since VMCache::Resize() can temporarily drop the lock, we must
624 			// unlock all lower caches to prevent locking order inversion.
625 			cacheChainLocker.Unlock(cache);
626 			cache->Resize(cache->virtual_base + newSize, priority);
627 			cache->ReleaseRefAndUnlock();
628 		}
629 
630 		return B_OK;
631 	}
632 
633 	// Cut the beginning only?
634 	if (area->Base() >= address) {
635 		addr_t oldBase = area->Base();
636 		addr_t newBase = lastAddress + 1;
637 		size_t newSize = areaLast - lastAddress;
638 
639 		// unmap pages
640 		unmap_pages(area, oldBase, newBase - oldBase);
641 
642 		// resize the area
643 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
644 			allocationFlags);
645 		if (error != B_OK)
646 			return error;
647 
648 		// TODO: If no one else uses the area's cache, we should resize it, too!
649 
650 		area->cache_offset += newBase - oldBase;
651 
652 		return B_OK;
653 	}
654 
655 	// The tough part -- cut a piece out of the middle of the area.
656 	// We do that by shrinking the area to the begin section and creating a
657 	// new area for the end section.
658 
659 	addr_t firstNewSize = address - area->Base();
660 	addr_t secondBase = lastAddress + 1;
661 	addr_t secondSize = areaLast - lastAddress;
662 
663 	// unmap pages
664 	unmap_pages(area, address, area->Size() - firstNewSize);
665 
666 	// resize the area
667 	addr_t oldSize = area->Size();
668 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
669 		allocationFlags);
670 	if (error != B_OK)
671 		return error;
672 
673 	// TODO: If no one else uses the area's cache, we might want to create a
674 	// new cache for the second area, transfer the concerned pages from the
675 	// first cache to it and resize the first cache.
676 
677 	// map the second area
678 	virtual_address_restrictions addressRestrictions = {};
679 	addressRestrictions.address = (void*)secondBase;
680 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
681 	VMArea* secondArea;
682 	error = map_backing_store(addressSpace, cache,
683 		area->cache_offset + (secondBase - area->Base()), area->name,
684 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
685 		&addressRestrictions, kernel, &secondArea, NULL);
686 	if (error != B_OK) {
687 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
688 		return error;
689 	}
690 
691 	// We need a cache reference for the new area.
692 	cache->AcquireRefLocked();
693 
694 	if (_secondArea != NULL)
695 		*_secondArea = secondArea;
696 
697 	return B_OK;
698 }
699 
700 
701 /*!	Deletes all areas in the given address range.
702 	The address space must be write-locked.
703 	The caller must ensure that no part of the given range is wired.
704 */
705 static status_t
706 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
707 	bool kernel)
708 {
709 	size = PAGE_ALIGN(size);
710 	addr_t lastAddress = address + (size - 1);
711 
712 	// Check, whether the caller is allowed to modify the concerned areas.
713 	if (!kernel) {
714 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
715 				VMArea* area = it.Next();) {
716 			addr_t areaLast = area->Base() + (area->Size() - 1);
717 			if (area->Base() < lastAddress && address < areaLast) {
718 				if ((area->protection & B_KERNEL_AREA) != 0)
719 					return B_NOT_ALLOWED;
720 			}
721 		}
722 	}
723 
724 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
725 			VMArea* area = it.Next();) {
726 		addr_t areaLast = area->Base() + (area->Size() - 1);
727 		if (area->Base() < lastAddress && address < areaLast) {
728 			status_t error = cut_area(addressSpace, area, address,
729 				lastAddress, NULL, kernel);
730 			if (error != B_OK)
731 				return error;
732 				// Failing after already messing with areas is ugly, but we
733 				// can't do anything about it.
734 		}
735 	}
736 
737 	return B_OK;
738 }
739 
740 
741 /*! You need to hold the lock of the cache and the write lock of the address
742 	space when calling this function.
743 	Note, that in case of error your cache will be temporarily unlocked.
744 	If \a addressSpec is \c B_EXACT_ADDRESS and the
745 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
746 	that no part of the specified address range (base \c *_virtualAddress, size
747 	\a size) is wired.
748 */
749 static status_t
750 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
751 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
752 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
753 	bool kernel, VMArea** _area, void** _virtualAddress)
754 {
755 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%Lx, "
756 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
757 		"'%s'\n", addressSpace, cache, addressRestrictions->address, offset,
758 		size, addressRestrictions->address_specification, wiring, protection,
759 		_area, areaName));
760 	cache->AssertLocked();
761 
762 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
763 		| HEAP_DONT_LOCK_KERNEL_SPACE;
764 	int priority;
765 	if (addressSpace != VMAddressSpace::Kernel()) {
766 		priority = VM_PRIORITY_USER;
767 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
768 		priority = VM_PRIORITY_VIP;
769 		allocationFlags |= HEAP_PRIORITY_VIP;
770 	} else
771 		priority = VM_PRIORITY_SYSTEM;
772 
773 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
774 		allocationFlags);
775 	if (area == NULL)
776 		return B_NO_MEMORY;
777 
778 	status_t status;
779 
780 	// if this is a private map, we need to create a new cache
781 	// to handle the private copies of pages as they are written to
782 	VMCache* sourceCache = cache;
783 	if (mapping == REGION_PRIVATE_MAP) {
784 		VMCache* newCache;
785 
786 		// create an anonymous cache
787 		bool isStack = (protection & B_STACK_AREA) != 0;
788 		status = VMCacheFactory::CreateAnonymousCache(newCache,
789 			isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0,
790 			isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER);
791 		if (status != B_OK)
792 			goto err1;
793 
794 		newCache->Lock();
795 		newCache->temporary = 1;
796 		newCache->virtual_base = offset;
797 		newCache->virtual_end = offset + size;
798 
799 		cache->AddConsumer(newCache);
800 
801 		cache = newCache;
802 	}
803 
804 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
805 		status = cache->SetMinimalCommitment(size, priority);
806 		if (status != B_OK)
807 			goto err2;
808 	}
809 
810 	// check to see if this address space has entered DELETE state
811 	if (addressSpace->IsBeingDeleted()) {
812 		// okay, someone is trying to delete this address space now, so we can't
813 		// insert the area, so back out
814 		status = B_BAD_TEAM_ID;
815 		goto err2;
816 	}
817 
818 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
819 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
820 		status = unmap_address_range(addressSpace,
821 			(addr_t)addressRestrictions->address, size, kernel);
822 		if (status != B_OK)
823 			goto err2;
824 	}
825 
826 	status = addressSpace->InsertArea(area, size, addressRestrictions,
827 		allocationFlags, _virtualAddress);
828 	if (status != B_OK) {
829 		// TODO: wait and try again once this is working in the backend
830 #if 0
831 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
832 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
833 				0, 0);
834 		}
835 #endif
836 		goto err2;
837 	}
838 
839 	// attach the cache to the area
840 	area->cache = cache;
841 	area->cache_offset = offset;
842 
843 	// point the cache back to the area
844 	cache->InsertAreaLocked(area);
845 	if (mapping == REGION_PRIVATE_MAP)
846 		cache->Unlock();
847 
848 	// insert the area in the global area hash table
849 	VMAreaHash::Insert(area);
850 
851 	// grab a ref to the address space (the area holds this)
852 	addressSpace->Get();
853 
854 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
855 //		cache, sourceCache, areaName, area);
856 
857 	*_area = area;
858 	return B_OK;
859 
860 err2:
861 	if (mapping == REGION_PRIVATE_MAP) {
862 		// We created this cache, so we must delete it again. Note, that we
863 		// need to temporarily unlock the source cache or we'll otherwise
864 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
865 		sourceCache->Unlock();
866 		cache->ReleaseRefAndUnlock();
867 		sourceCache->Lock();
868 	}
869 err1:
870 	addressSpace->DeleteArea(area, allocationFlags);
871 	return status;
872 }
873 
874 
875 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
876 	  locker1, locker2).
877 */
878 template<typename LockerType1, typename LockerType2>
879 static inline bool
880 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
881 {
882 	area->cache->AssertLocked();
883 
884 	VMAreaUnwiredWaiter waiter;
885 	if (!area->AddWaiterIfWired(&waiter))
886 		return false;
887 
888 	// unlock everything and wait
889 	if (locker1 != NULL)
890 		locker1->Unlock();
891 	if (locker2 != NULL)
892 		locker2->Unlock();
893 
894 	waiter.waitEntry.Wait();
895 
896 	return true;
897 }
898 
899 
900 /*!	Checks whether the given area has any wired ranges intersecting with the
901 	specified range and waits, if so.
902 
903 	When it has to wait, the function calls \c Unlock() on both \a locker1
904 	and \a locker2, if given.
905 	The area's top cache must be locked and must be unlocked as a side effect
906 	of calling \c Unlock() on either \a locker1 or \a locker2.
907 
908 	If the function does not have to wait it does not modify or unlock any
909 	object.
910 
911 	\param area The area to be checked.
912 	\param base The base address of the range to check.
913 	\param size The size of the address range to check.
914 	\param locker1 An object to be unlocked when before starting to wait (may
915 		be \c NULL).
916 	\param locker2 An object to be unlocked when before starting to wait (may
917 		be \c NULL).
918 	\return \c true, if the function had to wait, \c false otherwise.
919 */
920 template<typename LockerType1, typename LockerType2>
921 static inline bool
922 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
923 	LockerType1* locker1, LockerType2* locker2)
924 {
925 	area->cache->AssertLocked();
926 
927 	VMAreaUnwiredWaiter waiter;
928 	if (!area->AddWaiterIfWired(&waiter, base, size))
929 		return false;
930 
931 	// unlock everything and wait
932 	if (locker1 != NULL)
933 		locker1->Unlock();
934 	if (locker2 != NULL)
935 		locker2->Unlock();
936 
937 	waiter.waitEntry.Wait();
938 
939 	return true;
940 }
941 
942 
943 /*!	Checks whether the given address space has any wired ranges intersecting
944 	with the specified range and waits, if so.
945 
946 	Similar to wait_if_area_range_is_wired(), with the following differences:
947 	- All areas intersecting with the range are checked (respectively all until
948 	  one is found that contains a wired range intersecting with the given
949 	  range).
950 	- The given address space must at least be read-locked and must be unlocked
951 	  when \c Unlock() is called on \a locker.
952 	- None of the areas' caches are allowed to be locked.
953 */
954 template<typename LockerType>
955 static inline bool
956 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
957 	size_t size, LockerType* locker)
958 {
959 	addr_t end = base + size - 1;
960 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
961 			VMArea* area = it.Next();) {
962 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
963 		if (area->Base() > end)
964 			return false;
965 
966 		if (base >= area->Base() + area->Size() - 1)
967 			continue;
968 
969 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
970 
971 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
972 			return true;
973 	}
974 
975 	return false;
976 }
977 
978 
979 status_t
980 vm_block_address_range(const char* name, void* address, addr_t size)
981 {
982 	if (!arch_vm_supports_protection(0))
983 		return B_NOT_SUPPORTED;
984 
985 	AddressSpaceWriteLocker locker;
986 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
987 	if (status != B_OK)
988 		return status;
989 
990 	VMAddressSpace* addressSpace = locker.AddressSpace();
991 
992 	// create an anonymous cache
993 	VMCache* cache;
994 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
995 		VM_PRIORITY_SYSTEM);
996 	if (status != B_OK)
997 		return status;
998 
999 	cache->temporary = 1;
1000 	cache->virtual_end = size;
1001 	cache->Lock();
1002 
1003 	VMArea* area;
1004 	virtual_address_restrictions addressRestrictions = {};
1005 	addressRestrictions.address = address;
1006 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1007 	status = map_backing_store(addressSpace, cache, 0, name, size,
1008 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1009 		true, &area, NULL);
1010 	if (status != B_OK) {
1011 		cache->ReleaseRefAndUnlock();
1012 		return status;
1013 	}
1014 
1015 	cache->Unlock();
1016 	area->cache_type = CACHE_TYPE_RAM;
1017 	return area->id;
1018 }
1019 
1020 
1021 status_t
1022 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1023 {
1024 	AddressSpaceWriteLocker locker(team);
1025 	if (!locker.IsLocked())
1026 		return B_BAD_TEAM_ID;
1027 
1028 	VMAddressSpace* addressSpace = locker.AddressSpace();
1029 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1030 		addressSpace == VMAddressSpace::Kernel()
1031 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1032 }
1033 
1034 
1035 status_t
1036 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1037 	addr_t size, uint32 flags)
1038 {
1039 	if (size == 0)
1040 		return B_BAD_VALUE;
1041 
1042 	AddressSpaceWriteLocker locker(team);
1043 	if (!locker.IsLocked())
1044 		return B_BAD_TEAM_ID;
1045 
1046 	virtual_address_restrictions addressRestrictions = {};
1047 	addressRestrictions.address = *_address;
1048 	addressRestrictions.address_specification = addressSpec;
1049 	VMAddressSpace* addressSpace = locker.AddressSpace();
1050 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1051 		addressSpace == VMAddressSpace::Kernel()
1052 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1053 		_address);
1054 }
1055 
1056 
1057 area_id
1058 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1059 	uint32 wiring, uint32 protection, uint32 flags,
1060 	const virtual_address_restrictions* virtualAddressRestrictions,
1061 	const physical_address_restrictions* physicalAddressRestrictions,
1062 	bool kernel, void** _address)
1063 {
1064 	VMArea* area;
1065 	VMCache* cache;
1066 	vm_page* page = NULL;
1067 	bool isStack = (protection & B_STACK_AREA) != 0;
1068 	page_num_t guardPages;
1069 	bool canOvercommit = false;
1070 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1071 		? VM_PAGE_ALLOC_CLEAR : 0;
1072 
1073 	TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size));
1074 
1075 	size = PAGE_ALIGN(size);
1076 
1077 	if (size == 0)
1078 		return B_BAD_VALUE;
1079 	if (!arch_vm_supports_protection(protection))
1080 		return B_NOT_SUPPORTED;
1081 
1082 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1083 		canOvercommit = true;
1084 
1085 #ifdef DEBUG_KERNEL_STACKS
1086 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1087 		isStack = true;
1088 #endif
1089 
1090 	// check parameters
1091 	switch (virtualAddressRestrictions->address_specification) {
1092 		case B_ANY_ADDRESS:
1093 		case B_EXACT_ADDRESS:
1094 		case B_BASE_ADDRESS:
1095 		case B_ANY_KERNEL_ADDRESS:
1096 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1097 			break;
1098 
1099 		default:
1100 			return B_BAD_VALUE;
1101 	}
1102 
1103 	// If low or high physical address restrictions are given, we force
1104 	// B_CONTIGUOUS wiring, since only then we'll use
1105 	// vm_page_allocate_page_run() which deals with those restrictions.
1106 	if (physicalAddressRestrictions->low_address != 0
1107 		|| physicalAddressRestrictions->high_address != 0) {
1108 		wiring = B_CONTIGUOUS;
1109 	}
1110 
1111 	physical_address_restrictions stackPhysicalRestrictions;
1112 	bool doReserveMemory = false;
1113 	switch (wiring) {
1114 		case B_NO_LOCK:
1115 			break;
1116 		case B_FULL_LOCK:
1117 		case B_LAZY_LOCK:
1118 		case B_CONTIGUOUS:
1119 			doReserveMemory = true;
1120 			break;
1121 		case B_ALREADY_WIRED:
1122 			break;
1123 		case B_LOMEM:
1124 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1125 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1126 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1127 			wiring = B_CONTIGUOUS;
1128 			doReserveMemory = true;
1129 			break;
1130 		case B_32_BIT_FULL_LOCK:
1131 			if (B_HAIKU_PHYSICAL_BITS <= 32
1132 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1133 				wiring = B_FULL_LOCK;
1134 				doReserveMemory = true;
1135 				break;
1136 			}
1137 			// TODO: We don't really support this mode efficiently. Just fall
1138 			// through for now ...
1139 		case B_32_BIT_CONTIGUOUS:
1140 			#if B_HAIKU_PHYSICAL_BITS > 32
1141 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1142 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1143 					stackPhysicalRestrictions.high_address
1144 						= (phys_addr_t)1 << 32;
1145 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1146 				}
1147 			#endif
1148 			wiring = B_CONTIGUOUS;
1149 			doReserveMemory = true;
1150 			break;
1151 		default:
1152 			return B_BAD_VALUE;
1153 	}
1154 
1155 	// Optimization: For a single-page contiguous allocation without low/high
1156 	// memory restriction B_FULL_LOCK wiring suffices.
1157 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1158 		&& physicalAddressRestrictions->low_address == 0
1159 		&& physicalAddressRestrictions->high_address == 0) {
1160 		wiring = B_FULL_LOCK;
1161 	}
1162 
1163 	// For full lock or contiguous areas we're also going to map the pages and
1164 	// thus need to reserve pages for the mapping backend upfront.
1165 	addr_t reservedMapPages = 0;
1166 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1167 		AddressSpaceWriteLocker locker;
1168 		status_t status = locker.SetTo(team);
1169 		if (status != B_OK)
1170 			return status;
1171 
1172 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1173 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1174 	}
1175 
1176 	int priority;
1177 	if (team != VMAddressSpace::KernelID())
1178 		priority = VM_PRIORITY_USER;
1179 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1180 		priority = VM_PRIORITY_VIP;
1181 	else
1182 		priority = VM_PRIORITY_SYSTEM;
1183 
1184 	// Reserve memory before acquiring the address space lock. This reduces the
1185 	// chances of failure, since while holding the write lock to the address
1186 	// space (if it is the kernel address space that is), the low memory handler
1187 	// won't be able to free anything for us.
1188 	addr_t reservedMemory = 0;
1189 	if (doReserveMemory) {
1190 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1191 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1192 			return B_NO_MEMORY;
1193 		reservedMemory = size;
1194 		// TODO: We don't reserve the memory for the pages for the page
1195 		// directories/tables. We actually need to do since we currently don't
1196 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1197 		// there are actually less physical pages than there should be, which
1198 		// can get the VM into trouble in low memory situations.
1199 	}
1200 
1201 	AddressSpaceWriteLocker locker;
1202 	VMAddressSpace* addressSpace;
1203 	status_t status;
1204 
1205 	// For full lock areas reserve the pages before locking the address
1206 	// space. E.g. block caches can't release their memory while we hold the
1207 	// address space lock.
1208 	page_num_t reservedPages = reservedMapPages;
1209 	if (wiring == B_FULL_LOCK)
1210 		reservedPages += size / B_PAGE_SIZE;
1211 
1212 	vm_page_reservation reservation;
1213 	if (reservedPages > 0) {
1214 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1215 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1216 					priority)) {
1217 				reservedPages = 0;
1218 				status = B_WOULD_BLOCK;
1219 				goto err0;
1220 			}
1221 		} else
1222 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1223 	}
1224 
1225 	if (wiring == B_CONTIGUOUS) {
1226 		// we try to allocate the page run here upfront as this may easily
1227 		// fail for obvious reasons
1228 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1229 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1230 		if (page == NULL) {
1231 			status = B_NO_MEMORY;
1232 			goto err0;
1233 		}
1234 	}
1235 
1236 	// Lock the address space and, if B_EXACT_ADDRESS and
1237 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1238 	// is not wired.
1239 	do {
1240 		status = locker.SetTo(team);
1241 		if (status != B_OK)
1242 			goto err1;
1243 
1244 		addressSpace = locker.AddressSpace();
1245 	} while (virtualAddressRestrictions->address_specification
1246 			== B_EXACT_ADDRESS
1247 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1248 		&& wait_if_address_range_is_wired(addressSpace,
1249 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1250 
1251 	// create an anonymous cache
1252 	// if it's a stack, make sure that two pages are available at least
1253 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1254 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1255 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1256 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1257 		wiring == B_NO_LOCK, priority);
1258 	if (status != B_OK)
1259 		goto err1;
1260 
1261 	cache->temporary = 1;
1262 	cache->virtual_end = size;
1263 	cache->committed_size = reservedMemory;
1264 		// TODO: This should be done via a method.
1265 	reservedMemory = 0;
1266 
1267 	cache->Lock();
1268 
1269 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1270 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1271 		kernel, &area, _address);
1272 
1273 	if (status != B_OK) {
1274 		cache->ReleaseRefAndUnlock();
1275 		goto err1;
1276 	}
1277 
1278 	locker.DegradeToReadLock();
1279 
1280 	switch (wiring) {
1281 		case B_NO_LOCK:
1282 		case B_LAZY_LOCK:
1283 			// do nothing - the pages are mapped in as needed
1284 			break;
1285 
1286 		case B_FULL_LOCK:
1287 		{
1288 			// Allocate and map all pages for this area
1289 
1290 			off_t offset = 0;
1291 			for (addr_t address = area->Base();
1292 					address < area->Base() + (area->Size() - 1);
1293 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1294 #ifdef DEBUG_KERNEL_STACKS
1295 #	ifdef STACK_GROWS_DOWNWARDS
1296 				if (isStack && address < area->Base()
1297 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1298 #	else
1299 				if (isStack && address >= area->Base() + area->Size()
1300 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1301 #	endif
1302 					continue;
1303 #endif
1304 				vm_page* page = vm_page_allocate_page(&reservation,
1305 					PAGE_STATE_WIRED | pageAllocFlags);
1306 				cache->InsertPage(page, offset);
1307 				map_page(area, page, address, protection, &reservation);
1308 
1309 				DEBUG_PAGE_ACCESS_END(page);
1310 			}
1311 
1312 			break;
1313 		}
1314 
1315 		case B_ALREADY_WIRED:
1316 		{
1317 			// The pages should already be mapped. This is only really useful
1318 			// during boot time. Find the appropriate vm_page objects and stick
1319 			// them in the cache object.
1320 			VMTranslationMap* map = addressSpace->TranslationMap();
1321 			off_t offset = 0;
1322 
1323 			if (!gKernelStartup)
1324 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1325 
1326 			map->Lock();
1327 
1328 			for (addr_t virtualAddress = area->Base();
1329 					virtualAddress < area->Base() + (area->Size() - 1);
1330 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1331 				phys_addr_t physicalAddress;
1332 				uint32 flags;
1333 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1334 				if (status < B_OK) {
1335 					panic("looking up mapping failed for va 0x%lx\n",
1336 						virtualAddress);
1337 				}
1338 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1339 				if (page == NULL) {
1340 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1341 						"\n", physicalAddress);
1342 				}
1343 
1344 				DEBUG_PAGE_ACCESS_START(page);
1345 
1346 				cache->InsertPage(page, offset);
1347 				increment_page_wired_count(page);
1348 				vm_page_set_state(page, PAGE_STATE_WIRED);
1349 				page->busy = false;
1350 
1351 				DEBUG_PAGE_ACCESS_END(page);
1352 			}
1353 
1354 			map->Unlock();
1355 			break;
1356 		}
1357 
1358 		case B_CONTIGUOUS:
1359 		{
1360 			// We have already allocated our continuous pages run, so we can now
1361 			// just map them in the address space
1362 			VMTranslationMap* map = addressSpace->TranslationMap();
1363 			phys_addr_t physicalAddress
1364 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1365 			addr_t virtualAddress = area->Base();
1366 			off_t offset = 0;
1367 
1368 			map->Lock();
1369 
1370 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1371 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1372 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1373 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1374 				if (page == NULL)
1375 					panic("couldn't lookup physical page just allocated\n");
1376 
1377 				status = map->Map(virtualAddress, physicalAddress, protection,
1378 					area->MemoryType(), &reservation);
1379 				if (status < B_OK)
1380 					panic("couldn't map physical page in page run\n");
1381 
1382 				cache->InsertPage(page, offset);
1383 				increment_page_wired_count(page);
1384 
1385 				DEBUG_PAGE_ACCESS_END(page);
1386 			}
1387 
1388 			map->Unlock();
1389 			break;
1390 		}
1391 
1392 		default:
1393 			break;
1394 	}
1395 
1396 	cache->Unlock();
1397 
1398 	if (reservedPages > 0)
1399 		vm_page_unreserve_pages(&reservation);
1400 
1401 	TRACE(("vm_create_anonymous_area: done\n"));
1402 
1403 	area->cache_type = CACHE_TYPE_RAM;
1404 	return area->id;
1405 
1406 err1:
1407 	if (wiring == B_CONTIGUOUS) {
1408 		// we had reserved the area space upfront...
1409 		phys_addr_t pageNumber = page->physical_page_number;
1410 		int32 i;
1411 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1412 			page = vm_lookup_page(pageNumber);
1413 			if (page == NULL)
1414 				panic("couldn't lookup physical page just allocated\n");
1415 
1416 			vm_page_set_state(page, PAGE_STATE_FREE);
1417 		}
1418 	}
1419 
1420 err0:
1421 	if (reservedPages > 0)
1422 		vm_page_unreserve_pages(&reservation);
1423 	if (reservedMemory > 0)
1424 		vm_unreserve_memory(reservedMemory);
1425 
1426 	return status;
1427 }
1428 
1429 
1430 area_id
1431 vm_map_physical_memory(team_id team, const char* name, void** _address,
1432 	uint32 addressSpec, addr_t size, uint32 protection,
1433 	phys_addr_t physicalAddress, bool alreadyWired)
1434 {
1435 	VMArea* area;
1436 	VMCache* cache;
1437 	addr_t mapOffset;
1438 
1439 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1440 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
1441 		name, *_address, addressSpec, size, protection, physicalAddress));
1442 
1443 	if (!arch_vm_supports_protection(protection))
1444 		return B_NOT_SUPPORTED;
1445 
1446 	AddressSpaceWriteLocker locker(team);
1447 	if (!locker.IsLocked())
1448 		return B_BAD_TEAM_ID;
1449 
1450 	// if the physical address is somewhat inside a page,
1451 	// move the actual area down to align on a page boundary
1452 	mapOffset = physicalAddress % B_PAGE_SIZE;
1453 	size += mapOffset;
1454 	physicalAddress -= mapOffset;
1455 
1456 	size = PAGE_ALIGN(size);
1457 
1458 	// create a device cache
1459 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1460 	if (status != B_OK)
1461 		return status;
1462 
1463 	cache->virtual_end = size;
1464 
1465 	cache->Lock();
1466 
1467 	virtual_address_restrictions addressRestrictions = {};
1468 	addressRestrictions.address = *_address;
1469 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1470 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1471 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1472 		true, &area, _address);
1473 
1474 	if (status < B_OK)
1475 		cache->ReleaseRefLocked();
1476 
1477 	cache->Unlock();
1478 
1479 	if (status == B_OK) {
1480 		// set requested memory type -- use uncached, if not given
1481 		uint32 memoryType = addressSpec & B_MTR_MASK;
1482 		if (memoryType == 0)
1483 			memoryType = B_MTR_UC;
1484 
1485 		area->SetMemoryType(memoryType);
1486 
1487 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1488 		if (status != B_OK)
1489 			delete_area(locker.AddressSpace(), area, false);
1490 	}
1491 
1492 	if (status != B_OK)
1493 		return status;
1494 
1495 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1496 
1497 	if (alreadyWired) {
1498 		// The area is already mapped, but possibly not with the right
1499 		// memory type.
1500 		map->Lock();
1501 		map->ProtectArea(area, area->protection);
1502 		map->Unlock();
1503 	} else {
1504 		// Map the area completely.
1505 
1506 		// reserve pages needed for the mapping
1507 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1508 			area->Base() + (size - 1));
1509 		vm_page_reservation reservation;
1510 		vm_page_reserve_pages(&reservation, reservePages,
1511 			team == VMAddressSpace::KernelID()
1512 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1513 
1514 		map->Lock();
1515 
1516 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1517 			map->Map(area->Base() + offset, physicalAddress + offset,
1518 				protection, area->MemoryType(), &reservation);
1519 		}
1520 
1521 		map->Unlock();
1522 
1523 		vm_page_unreserve_pages(&reservation);
1524 	}
1525 
1526 	// modify the pointer returned to be offset back into the new area
1527 	// the same way the physical address in was offset
1528 	*_address = (void*)((addr_t)*_address + mapOffset);
1529 
1530 	area->cache_type = CACHE_TYPE_DEVICE;
1531 	return area->id;
1532 }
1533 
1534 
1535 /*!	Don't use!
1536 	TODO: This function was introduced to map physical page vecs to
1537 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1538 	use a device cache and does not track vm_page::wired_count!
1539 */
1540 area_id
1541 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1542 	uint32 addressSpec, addr_t* _size, uint32 protection,
1543 	struct generic_io_vec* vecs, uint32 vecCount)
1544 {
1545 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
1546 		"spec = %ld, _size = %p, protection = %ld, vecs = %p, "
1547 		"vecCount = %ld)\n", team, name, *_address, addressSpec, _size,
1548 		protection, vecs, vecCount));
1549 
1550 	if (!arch_vm_supports_protection(protection)
1551 		|| (addressSpec & B_MTR_MASK) != 0) {
1552 		return B_NOT_SUPPORTED;
1553 	}
1554 
1555 	AddressSpaceWriteLocker locker(team);
1556 	if (!locker.IsLocked())
1557 		return B_BAD_TEAM_ID;
1558 
1559 	if (vecCount == 0)
1560 		return B_BAD_VALUE;
1561 
1562 	addr_t size = 0;
1563 	for (uint32 i = 0; i < vecCount; i++) {
1564 		if (vecs[i].base % B_PAGE_SIZE != 0
1565 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1566 			return B_BAD_VALUE;
1567 		}
1568 
1569 		size += vecs[i].length;
1570 	}
1571 
1572 	// create a device cache
1573 	VMCache* cache;
1574 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1575 	if (result != B_OK)
1576 		return result;
1577 
1578 	cache->virtual_end = size;
1579 
1580 	cache->Lock();
1581 
1582 	VMArea* area;
1583 	virtual_address_restrictions addressRestrictions = {};
1584 	addressRestrictions.address = *_address;
1585 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1586 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1587 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1588 		&addressRestrictions, true, &area, _address);
1589 
1590 	if (result != B_OK)
1591 		cache->ReleaseRefLocked();
1592 
1593 	cache->Unlock();
1594 
1595 	if (result != B_OK)
1596 		return result;
1597 
1598 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1599 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1600 		area->Base() + (size - 1));
1601 
1602 	vm_page_reservation reservation;
1603 	vm_page_reserve_pages(&reservation, reservePages,
1604 			team == VMAddressSpace::KernelID()
1605 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1606 	map->Lock();
1607 
1608 	uint32 vecIndex = 0;
1609 	size_t vecOffset = 0;
1610 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1611 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1612 			vecOffset = 0;
1613 			vecIndex++;
1614 		}
1615 
1616 		if (vecIndex >= vecCount)
1617 			break;
1618 
1619 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1620 			protection, area->MemoryType(), &reservation);
1621 
1622 		vecOffset += B_PAGE_SIZE;
1623 	}
1624 
1625 	map->Unlock();
1626 	vm_page_unreserve_pages(&reservation);
1627 
1628 	if (_size != NULL)
1629 		*_size = size;
1630 
1631 	area->cache_type = CACHE_TYPE_DEVICE;
1632 	return area->id;
1633 }
1634 
1635 
1636 area_id
1637 vm_create_null_area(team_id team, const char* name, void** address,
1638 	uint32 addressSpec, addr_t size, uint32 flags)
1639 {
1640 	size = PAGE_ALIGN(size);
1641 
1642 	// Lock the address space and, if B_EXACT_ADDRESS and
1643 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1644 	// is not wired.
1645 	AddressSpaceWriteLocker locker;
1646 	do {
1647 		if (locker.SetTo(team) != B_OK)
1648 			return B_BAD_TEAM_ID;
1649 	} while (addressSpec == B_EXACT_ADDRESS
1650 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1651 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1652 			(addr_t)*address, size, &locker));
1653 
1654 	// create a null cache
1655 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1656 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1657 	VMCache* cache;
1658 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1659 	if (status != B_OK)
1660 		return status;
1661 
1662 	cache->temporary = 1;
1663 	cache->virtual_end = size;
1664 
1665 	cache->Lock();
1666 
1667 	VMArea* area;
1668 	virtual_address_restrictions addressRestrictions = {};
1669 	addressRestrictions.address = *address;
1670 	addressRestrictions.address_specification = addressSpec;
1671 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1672 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1673 		&addressRestrictions, true, &area, address);
1674 
1675 	if (status < B_OK) {
1676 		cache->ReleaseRefAndUnlock();
1677 		return status;
1678 	}
1679 
1680 	cache->Unlock();
1681 
1682 	area->cache_type = CACHE_TYPE_NULL;
1683 	return area->id;
1684 }
1685 
1686 
1687 /*!	Creates the vnode cache for the specified \a vnode.
1688 	The vnode has to be marked busy when calling this function.
1689 */
1690 status_t
1691 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1692 {
1693 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1694 }
1695 
1696 
1697 /*!	\a cache must be locked. The area's address space must be read-locked.
1698 */
1699 static void
1700 pre_map_area_pages(VMArea* area, VMCache* cache,
1701 	vm_page_reservation* reservation)
1702 {
1703 	addr_t baseAddress = area->Base();
1704 	addr_t cacheOffset = area->cache_offset;
1705 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1706 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1707 
1708 	for (VMCachePagesTree::Iterator it
1709 				= cache->pages.GetIterator(firstPage, true, true);
1710 			vm_page* page = it.Next();) {
1711 		if (page->cache_offset >= endPage)
1712 			break;
1713 
1714 		// skip busy and inactive pages
1715 		if (page->busy || page->usage_count == 0)
1716 			continue;
1717 
1718 		DEBUG_PAGE_ACCESS_START(page);
1719 		map_page(area, page,
1720 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1721 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1722 		DEBUG_PAGE_ACCESS_END(page);
1723 	}
1724 }
1725 
1726 
1727 /*!	Will map the file specified by \a fd to an area in memory.
1728 	The file will be mirrored beginning at the specified \a offset. The
1729 	\a offset and \a size arguments have to be page aligned.
1730 */
1731 static area_id
1732 _vm_map_file(team_id team, const char* name, void** _address,
1733 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1734 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1735 {
1736 	// TODO: for binary files, we want to make sure that they get the
1737 	//	copy of a file at a given time, ie. later changes should not
1738 	//	make it into the mapped copy -- this will need quite some changes
1739 	//	to be done in a nice way
1740 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
1741 		fd, offset, size, mapping));
1742 
1743 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1744 	size = PAGE_ALIGN(size);
1745 
1746 	if (mapping == REGION_NO_PRIVATE_MAP)
1747 		protection |= B_SHARED_AREA;
1748 	if (addressSpec != B_EXACT_ADDRESS)
1749 		unmapAddressRange = false;
1750 
1751 	if (fd < 0) {
1752 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1753 		virtual_address_restrictions virtualRestrictions = {};
1754 		virtualRestrictions.address = *_address;
1755 		virtualRestrictions.address_specification = addressSpec;
1756 		physical_address_restrictions physicalRestrictions = {};
1757 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1758 			flags, &virtualRestrictions, &physicalRestrictions, kernel,
1759 			_address);
1760 	}
1761 
1762 	// get the open flags of the FD
1763 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1764 	if (descriptor == NULL)
1765 		return EBADF;
1766 	int32 openMode = descriptor->open_mode;
1767 	put_fd(descriptor);
1768 
1769 	// The FD must open for reading at any rate. For shared mapping with write
1770 	// access, additionally the FD must be open for writing.
1771 	if ((openMode & O_ACCMODE) == O_WRONLY
1772 		|| (mapping == REGION_NO_PRIVATE_MAP
1773 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1774 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1775 		return EACCES;
1776 	}
1777 
1778 	// get the vnode for the object, this also grabs a ref to it
1779 	struct vnode* vnode = NULL;
1780 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1781 	if (status < B_OK)
1782 		return status;
1783 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1784 
1785 	// If we're going to pre-map pages, we need to reserve the pages needed by
1786 	// the mapping backend upfront.
1787 	page_num_t reservedPreMapPages = 0;
1788 	vm_page_reservation reservation;
1789 	if ((protection & B_READ_AREA) != 0) {
1790 		AddressSpaceWriteLocker locker;
1791 		status = locker.SetTo(team);
1792 		if (status != B_OK)
1793 			return status;
1794 
1795 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1796 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1797 
1798 		locker.Unlock();
1799 
1800 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1801 			team == VMAddressSpace::KernelID()
1802 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1803 	}
1804 
1805 	struct PageUnreserver {
1806 		PageUnreserver(vm_page_reservation* reservation)
1807 			:
1808 			fReservation(reservation)
1809 		{
1810 		}
1811 
1812 		~PageUnreserver()
1813 		{
1814 			if (fReservation != NULL)
1815 				vm_page_unreserve_pages(fReservation);
1816 		}
1817 
1818 		vm_page_reservation* fReservation;
1819 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1820 
1821 	// Lock the address space and, if the specified address range shall be
1822 	// unmapped, ensure it is not wired.
1823 	AddressSpaceWriteLocker locker;
1824 	do {
1825 		if (locker.SetTo(team) != B_OK)
1826 			return B_BAD_TEAM_ID;
1827 	} while (unmapAddressRange
1828 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1829 			(addr_t)*_address, size, &locker));
1830 
1831 	// TODO: this only works for file systems that use the file cache
1832 	VMCache* cache;
1833 	status = vfs_get_vnode_cache(vnode, &cache, false);
1834 	if (status < B_OK)
1835 		return status;
1836 
1837 	cache->Lock();
1838 
1839 	VMArea* area;
1840 	virtual_address_restrictions addressRestrictions = {};
1841 	addressRestrictions.address = *_address;
1842 	addressRestrictions.address_specification = addressSpec;
1843 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1844 		0, protection, mapping,
1845 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1846 		&addressRestrictions, kernel, &area, _address);
1847 
1848 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1849 		// map_backing_store() cannot know we no longer need the ref
1850 		cache->ReleaseRefLocked();
1851 	}
1852 
1853 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1854 		pre_map_area_pages(area, cache, &reservation);
1855 
1856 	cache->Unlock();
1857 
1858 	if (status == B_OK) {
1859 		// TODO: this probably deserves a smarter solution, ie. don't always
1860 		// prefetch stuff, and also, probably don't trigger it at this place.
1861 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1862 			// prefetches at max 10 MB starting from "offset"
1863 	}
1864 
1865 	if (status != B_OK)
1866 		return status;
1867 
1868 	area->cache_type = CACHE_TYPE_VNODE;
1869 	return area->id;
1870 }
1871 
1872 
1873 area_id
1874 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1875 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
1876 	int fd, off_t offset)
1877 {
1878 	if (!arch_vm_supports_protection(protection))
1879 		return B_NOT_SUPPORTED;
1880 
1881 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
1882 		mapping, unmapAddressRange, fd, offset, true);
1883 }
1884 
1885 
1886 VMCache*
1887 vm_area_get_locked_cache(VMArea* area)
1888 {
1889 	rw_lock_read_lock(&sAreaCacheLock);
1890 
1891 	while (true) {
1892 		VMCache* cache = area->cache;
1893 
1894 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
1895 			// cache has been deleted
1896 			rw_lock_read_lock(&sAreaCacheLock);
1897 			continue;
1898 		}
1899 
1900 		rw_lock_read_lock(&sAreaCacheLock);
1901 
1902 		if (cache == area->cache) {
1903 			cache->AcquireRefLocked();
1904 			rw_lock_read_unlock(&sAreaCacheLock);
1905 			return cache;
1906 		}
1907 
1908 		// the cache changed in the meantime
1909 		cache->Unlock();
1910 	}
1911 }
1912 
1913 
1914 void
1915 vm_area_put_locked_cache(VMCache* cache)
1916 {
1917 	cache->ReleaseRefAndUnlock();
1918 }
1919 
1920 
1921 area_id
1922 vm_clone_area(team_id team, const char* name, void** address,
1923 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
1924 	bool kernel)
1925 {
1926 	VMArea* newArea = NULL;
1927 	VMArea* sourceArea;
1928 
1929 	// Check whether the source area exists and is cloneable. If so, mark it
1930 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
1931 	{
1932 		AddressSpaceWriteLocker locker;
1933 		status_t status = locker.SetFromArea(sourceID, sourceArea);
1934 		if (status != B_OK)
1935 			return status;
1936 
1937 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
1938 			return B_NOT_ALLOWED;
1939 
1940 		sourceArea->protection |= B_SHARED_AREA;
1941 		protection |= B_SHARED_AREA;
1942 	}
1943 
1944 	// Now lock both address spaces and actually do the cloning.
1945 
1946 	MultiAddressSpaceLocker locker;
1947 	VMAddressSpace* sourceAddressSpace;
1948 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
1949 	if (status != B_OK)
1950 		return status;
1951 
1952 	VMAddressSpace* targetAddressSpace;
1953 	status = locker.AddTeam(team, true, &targetAddressSpace);
1954 	if (status != B_OK)
1955 		return status;
1956 
1957 	status = locker.Lock();
1958 	if (status != B_OK)
1959 		return status;
1960 
1961 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
1962 	if (sourceArea == NULL)
1963 		return B_BAD_VALUE;
1964 
1965 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
1966 		return B_NOT_ALLOWED;
1967 
1968 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
1969 
1970 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
1971 	//	have been adapted. Maybe it should be part of the kernel settings,
1972 	//	anyway (so that old drivers can always work).
1973 #if 0
1974 	if (sourceArea->aspace == VMAddressSpace::Kernel()
1975 		&& addressSpace != VMAddressSpace::Kernel()
1976 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1977 		// kernel areas must not be cloned in userland, unless explicitly
1978 		// declared user-cloneable upon construction
1979 		status = B_NOT_ALLOWED;
1980 	} else
1981 #endif
1982 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
1983 		status = B_NOT_ALLOWED;
1984 	else {
1985 		virtual_address_restrictions addressRestrictions = {};
1986 		addressRestrictions.address = *address;
1987 		addressRestrictions.address_specification = addressSpec;
1988 		status = map_backing_store(targetAddressSpace, cache,
1989 			sourceArea->cache_offset, name, sourceArea->Size(),
1990 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
1991 			kernel, &newArea, address);
1992 	}
1993 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
1994 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
1995 		// to create a new cache, and has therefore already acquired a reference
1996 		// to the source cache - but otherwise it has no idea that we need
1997 		// one.
1998 		cache->AcquireRefLocked();
1999 	}
2000 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2001 		// we need to map in everything at this point
2002 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2003 			// we don't have actual pages to map but a physical area
2004 			VMTranslationMap* map
2005 				= sourceArea->address_space->TranslationMap();
2006 			map->Lock();
2007 
2008 			phys_addr_t physicalAddress;
2009 			uint32 oldProtection;
2010 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2011 
2012 			map->Unlock();
2013 
2014 			map = targetAddressSpace->TranslationMap();
2015 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2016 				newArea->Base() + (newArea->Size() - 1));
2017 
2018 			vm_page_reservation reservation;
2019 			vm_page_reserve_pages(&reservation, reservePages,
2020 				targetAddressSpace == VMAddressSpace::Kernel()
2021 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2022 			map->Lock();
2023 
2024 			for (addr_t offset = 0; offset < newArea->Size();
2025 					offset += B_PAGE_SIZE) {
2026 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2027 					protection, newArea->MemoryType(), &reservation);
2028 			}
2029 
2030 			map->Unlock();
2031 			vm_page_unreserve_pages(&reservation);
2032 		} else {
2033 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2034 			size_t reservePages = map->MaxPagesNeededToMap(
2035 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2036 			vm_page_reservation reservation;
2037 			vm_page_reserve_pages(&reservation, reservePages,
2038 				targetAddressSpace == VMAddressSpace::Kernel()
2039 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2040 
2041 			// map in all pages from source
2042 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2043 					vm_page* page  = it.Next();) {
2044 				if (!page->busy) {
2045 					DEBUG_PAGE_ACCESS_START(page);
2046 					map_page(newArea, page,
2047 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2048 							- newArea->cache_offset),
2049 						protection, &reservation);
2050 					DEBUG_PAGE_ACCESS_END(page);
2051 				}
2052 			}
2053 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2054 			// ensuring that!
2055 
2056 			vm_page_unreserve_pages(&reservation);
2057 		}
2058 	}
2059 	if (status == B_OK)
2060 		newArea->cache_type = sourceArea->cache_type;
2061 
2062 	vm_area_put_locked_cache(cache);
2063 
2064 	if (status < B_OK)
2065 		return status;
2066 
2067 	return newArea->id;
2068 }
2069 
2070 
2071 /*!	Deletes the specified area of the given address space.
2072 
2073 	The address space must be write-locked.
2074 	The caller must ensure that the area does not have any wired ranges.
2075 
2076 	\param addressSpace The address space containing the area.
2077 	\param area The area to be deleted.
2078 	\param deletingAddressSpace \c true, if the address space is in the process
2079 		of being deleted.
2080 */
2081 static void
2082 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2083 	bool deletingAddressSpace)
2084 {
2085 	ASSERT(!area->IsWired());
2086 
2087 	VMAreaHash::Remove(area);
2088 
2089 	// At this point the area is removed from the global hash table, but
2090 	// still exists in the area list.
2091 
2092 	// Unmap the virtual address space the area occupied.
2093 	{
2094 		// We need to lock the complete cache chain.
2095 		VMCache* topCache = vm_area_get_locked_cache(area);
2096 		VMCacheChainLocker cacheChainLocker(topCache);
2097 		cacheChainLocker.LockAllSourceCaches();
2098 
2099 		// If the area's top cache is a temporary cache and the area is the only
2100 		// one referencing it (besides us currently holding a second reference),
2101 		// the unmapping code doesn't need to care about preserving the accessed
2102 		// and dirty flags of the top cache page mappings.
2103 		bool ignoreTopCachePageFlags
2104 			= topCache->temporary && topCache->RefCount() == 2;
2105 
2106 		area->address_space->TranslationMap()->UnmapArea(area,
2107 			deletingAddressSpace, ignoreTopCachePageFlags);
2108 	}
2109 
2110 	if (!area->cache->temporary)
2111 		area->cache->WriteModified();
2112 
2113 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2114 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2115 
2116 	arch_vm_unset_memory_type(area);
2117 	addressSpace->RemoveArea(area, allocationFlags);
2118 	addressSpace->Put();
2119 
2120 	area->cache->RemoveArea(area);
2121 	area->cache->ReleaseRef();
2122 
2123 	addressSpace->DeleteArea(area, allocationFlags);
2124 }
2125 
2126 
2127 status_t
2128 vm_delete_area(team_id team, area_id id, bool kernel)
2129 {
2130 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2131 
2132 	// lock the address space and make sure the area isn't wired
2133 	AddressSpaceWriteLocker locker;
2134 	VMArea* area;
2135 	AreaCacheLocker cacheLocker;
2136 
2137 	do {
2138 		status_t status = locker.SetFromArea(team, id, area);
2139 		if (status != B_OK)
2140 			return status;
2141 
2142 		cacheLocker.SetTo(area);
2143 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2144 
2145 	cacheLocker.Unlock();
2146 
2147 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2148 		return B_NOT_ALLOWED;
2149 
2150 	delete_area(locker.AddressSpace(), area, false);
2151 	return B_OK;
2152 }
2153 
2154 
2155 /*!	Creates a new cache on top of given cache, moves all areas from
2156 	the old cache to the new one, and changes the protection of all affected
2157 	areas' pages to read-only. If requested, wired pages are moved up to the
2158 	new cache and copies are added to the old cache in their place.
2159 	Preconditions:
2160 	- The given cache must be locked.
2161 	- All of the cache's areas' address spaces must be read locked.
2162 	- Either the cache must not have any wired ranges or a page reservation for
2163 	  all wired pages must be provided, so they can be copied.
2164 
2165 	\param lowerCache The cache on top of which a new cache shall be created.
2166 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2167 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2168 		has wired page. The wired pages are copied in this case.
2169 */
2170 static status_t
2171 vm_copy_on_write_area(VMCache* lowerCache,
2172 	vm_page_reservation* wiredPagesReservation)
2173 {
2174 	VMCache* upperCache;
2175 
2176 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2177 
2178 	// We need to separate the cache from its areas. The cache goes one level
2179 	// deeper and we create a new cache inbetween.
2180 
2181 	// create an anonymous cache
2182 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2183 		0, true, VM_PRIORITY_USER);
2184 	if (status != B_OK)
2185 		return status;
2186 
2187 	upperCache->Lock();
2188 
2189 	upperCache->temporary = 1;
2190 	upperCache->virtual_base = lowerCache->virtual_base;
2191 	upperCache->virtual_end = lowerCache->virtual_end;
2192 
2193 	// transfer the lower cache areas to the upper cache
2194 	rw_lock_write_lock(&sAreaCacheLock);
2195 	upperCache->TransferAreas(lowerCache);
2196 	rw_lock_write_unlock(&sAreaCacheLock);
2197 
2198 	lowerCache->AddConsumer(upperCache);
2199 
2200 	// We now need to remap all pages from all of the cache's areas read-only,
2201 	// so that a copy will be created on next write access. If there are wired
2202 	// pages, we keep their protection, move them to the upper cache and create
2203 	// copies for the lower cache.
2204 	if (wiredPagesReservation != NULL) {
2205 		// We need to handle wired pages -- iterate through the cache's pages.
2206 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2207 				vm_page* page = it.Next();) {
2208 			if (page->WiredCount() > 0) {
2209 				// allocate a new page and copy the wired one
2210 				vm_page* copiedPage = vm_page_allocate_page(
2211 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2212 
2213 				vm_memcpy_physical_page(
2214 					copiedPage->physical_page_number * B_PAGE_SIZE,
2215 					page->physical_page_number * B_PAGE_SIZE);
2216 
2217 				// move the wired page to the upper cache (note: removing is OK
2218 				// with the SplayTree iterator) and insert the copy
2219 				upperCache->MovePage(page);
2220 				lowerCache->InsertPage(copiedPage,
2221 					page->cache_offset * B_PAGE_SIZE);
2222 
2223 				DEBUG_PAGE_ACCESS_END(copiedPage);
2224 			} else {
2225 				// Change the protection of this page in all areas.
2226 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2227 						tempArea = tempArea->cache_next) {
2228 					// The area must be readable in the same way it was
2229 					// previously writable.
2230 					uint32 protection = B_KERNEL_READ_AREA;
2231 					if ((tempArea->protection & B_READ_AREA) != 0)
2232 						protection |= B_READ_AREA;
2233 
2234 					VMTranslationMap* map
2235 						= tempArea->address_space->TranslationMap();
2236 					map->Lock();
2237 					map->ProtectPage(tempArea,
2238 						virtual_page_address(tempArea, page), protection);
2239 					map->Unlock();
2240 				}
2241 			}
2242 		}
2243 	} else {
2244 		// just change the protection of all areas
2245 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2246 				tempArea = tempArea->cache_next) {
2247 			// The area must be readable in the same way it was previously
2248 			// writable.
2249 			uint32 protection = B_KERNEL_READ_AREA;
2250 			if ((tempArea->protection & B_READ_AREA) != 0)
2251 				protection |= B_READ_AREA;
2252 
2253 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2254 			map->Lock();
2255 			map->ProtectArea(tempArea, protection);
2256 			map->Unlock();
2257 		}
2258 	}
2259 
2260 	vm_area_put_locked_cache(upperCache);
2261 
2262 	return B_OK;
2263 }
2264 
2265 
2266 area_id
2267 vm_copy_area(team_id team, const char* name, void** _address,
2268 	uint32 addressSpec, uint32 protection, area_id sourceID)
2269 {
2270 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2271 
2272 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2273 		// set the same protection for the kernel as for userland
2274 		protection |= B_KERNEL_READ_AREA;
2275 		if (writableCopy)
2276 			protection |= B_KERNEL_WRITE_AREA;
2277 	}
2278 
2279 	// Do the locking: target address space, all address spaces associated with
2280 	// the source cache, and the cache itself.
2281 	MultiAddressSpaceLocker locker;
2282 	VMAddressSpace* targetAddressSpace;
2283 	VMCache* cache;
2284 	VMArea* source;
2285 	AreaCacheLocker cacheLocker;
2286 	status_t status;
2287 	bool sharedArea;
2288 
2289 	page_num_t wiredPages = 0;
2290 	vm_page_reservation wiredPagesReservation;
2291 
2292 	bool restart;
2293 	do {
2294 		restart = false;
2295 
2296 		locker.Unset();
2297 		status = locker.AddTeam(team, true, &targetAddressSpace);
2298 		if (status == B_OK) {
2299 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2300 				&cache);
2301 		}
2302 		if (status != B_OK)
2303 			return status;
2304 
2305 		cacheLocker.SetTo(cache, true);	// already locked
2306 
2307 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2308 
2309 		page_num_t oldWiredPages = wiredPages;
2310 		wiredPages = 0;
2311 
2312 		// If the source area isn't shared, count the number of wired pages in
2313 		// the cache and reserve as many pages.
2314 		if (!sharedArea) {
2315 			wiredPages = cache->WiredPagesCount();
2316 
2317 			if (wiredPages > oldWiredPages) {
2318 				cacheLocker.Unlock();
2319 				locker.Unlock();
2320 
2321 				if (oldWiredPages > 0)
2322 					vm_page_unreserve_pages(&wiredPagesReservation);
2323 
2324 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2325 					VM_PRIORITY_USER);
2326 
2327 				restart = true;
2328 			}
2329 		} else if (oldWiredPages > 0)
2330 			vm_page_unreserve_pages(&wiredPagesReservation);
2331 	} while (restart);
2332 
2333 	// unreserve pages later
2334 	struct PagesUnreserver {
2335 		PagesUnreserver(vm_page_reservation* reservation)
2336 			:
2337 			fReservation(reservation)
2338 		{
2339 		}
2340 
2341 		~PagesUnreserver()
2342 		{
2343 			if (fReservation != NULL)
2344 				vm_page_unreserve_pages(fReservation);
2345 		}
2346 
2347 	private:
2348 		vm_page_reservation*	fReservation;
2349 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2350 
2351 	if (addressSpec == B_CLONE_ADDRESS) {
2352 		addressSpec = B_EXACT_ADDRESS;
2353 		*_address = (void*)source->Base();
2354 	}
2355 
2356 	// First, create a cache on top of the source area, respectively use the
2357 	// existing one, if this is a shared area.
2358 
2359 	VMArea* target;
2360 	virtual_address_restrictions addressRestrictions = {};
2361 	addressRestrictions.address = *_address;
2362 	addressRestrictions.address_specification = addressSpec;
2363 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2364 		name, source->Size(), source->wiring, protection,
2365 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2366 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2367 		&addressRestrictions, true, &target, _address);
2368 	if (status < B_OK)
2369 		return status;
2370 
2371 	if (sharedArea) {
2372 		// The new area uses the old area's cache, but map_backing_store()
2373 		// hasn't acquired a ref. So we have to do that now.
2374 		cache->AcquireRefLocked();
2375 	}
2376 
2377 	// If the source area is writable, we need to move it one layer up as well
2378 
2379 	if (!sharedArea) {
2380 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2381 			// TODO: do something more useful if this fails!
2382 			if (vm_copy_on_write_area(cache,
2383 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2384 				panic("vm_copy_on_write_area() failed!\n");
2385 			}
2386 		}
2387 	}
2388 
2389 	// we return the ID of the newly created area
2390 	return target->id;
2391 }
2392 
2393 
2394 static status_t
2395 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2396 	bool kernel)
2397 {
2398 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2399 		"%#lx)\n", team, areaID, newProtection));
2400 
2401 	if (!arch_vm_supports_protection(newProtection))
2402 		return B_NOT_SUPPORTED;
2403 
2404 	bool becomesWritable
2405 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2406 
2407 	// lock address spaces and cache
2408 	MultiAddressSpaceLocker locker;
2409 	VMCache* cache;
2410 	VMArea* area;
2411 	status_t status;
2412 	AreaCacheLocker cacheLocker;
2413 	bool isWritable;
2414 
2415 	bool restart;
2416 	do {
2417 		restart = false;
2418 
2419 		locker.Unset();
2420 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2421 		if (status != B_OK)
2422 			return status;
2423 
2424 		cacheLocker.SetTo(cache, true);	// already locked
2425 
2426 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2427 			return B_NOT_ALLOWED;
2428 
2429 		if (area->protection == newProtection)
2430 			return B_OK;
2431 
2432 		if (team != VMAddressSpace::KernelID()
2433 			&& area->address_space->ID() != team) {
2434 			// unless you're the kernel, you are only allowed to set
2435 			// the protection of your own areas
2436 			return B_NOT_ALLOWED;
2437 		}
2438 
2439 		isWritable
2440 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2441 
2442 		// Make sure the area (respectively, if we're going to call
2443 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2444 		// wired ranges.
2445 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2446 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2447 					otherArea = otherArea->cache_next) {
2448 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2449 					restart = true;
2450 					break;
2451 				}
2452 			}
2453 		} else {
2454 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2455 				restart = true;
2456 		}
2457 	} while (restart);
2458 
2459 	bool changePageProtection = true;
2460 	bool changeTopCachePagesOnly = false;
2461 
2462 	if (isWritable && !becomesWritable) {
2463 		// writable -> !writable
2464 
2465 		if (cache->source != NULL && cache->temporary) {
2466 			if (cache->CountWritableAreas(area) == 0) {
2467 				// Since this cache now lives from the pages in its source cache,
2468 				// we can change the cache's commitment to take only those pages
2469 				// into account that really are in this cache.
2470 
2471 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2472 					team == VMAddressSpace::KernelID()
2473 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2474 
2475 				// TODO: we may be able to join with our source cache, if
2476 				// count == 0
2477 			}
2478 		}
2479 
2480 		// If only the writability changes, we can just remap the pages of the
2481 		// top cache, since the pages of lower caches are mapped read-only
2482 		// anyway. That's advantageous only, if the number of pages in the cache
2483 		// is significantly smaller than the number of pages in the area,
2484 		// though.
2485 		if (newProtection
2486 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2487 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2488 			changeTopCachePagesOnly = true;
2489 		}
2490 	} else if (!isWritable && becomesWritable) {
2491 		// !writable -> writable
2492 
2493 		if (!cache->consumers.IsEmpty()) {
2494 			// There are consumers -- we have to insert a new cache. Fortunately
2495 			// vm_copy_on_write_area() does everything that's needed.
2496 			changePageProtection = false;
2497 			status = vm_copy_on_write_area(cache, NULL);
2498 		} else {
2499 			// No consumers, so we don't need to insert a new one.
2500 			if (cache->source != NULL && cache->temporary) {
2501 				// the cache's commitment must contain all possible pages
2502 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2503 					team == VMAddressSpace::KernelID()
2504 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2505 			}
2506 
2507 			if (status == B_OK && cache->source != NULL) {
2508 				// There's a source cache, hence we can't just change all pages'
2509 				// protection or we might allow writing into pages belonging to
2510 				// a lower cache.
2511 				changeTopCachePagesOnly = true;
2512 			}
2513 		}
2514 	} else {
2515 		// we don't have anything special to do in all other cases
2516 	}
2517 
2518 	if (status == B_OK) {
2519 		// remap existing pages in this cache
2520 		if (changePageProtection) {
2521 			VMTranslationMap* map = area->address_space->TranslationMap();
2522 			map->Lock();
2523 
2524 			if (changeTopCachePagesOnly) {
2525 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2526 				page_num_t lastPageOffset
2527 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2528 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2529 						vm_page* page = it.Next();) {
2530 					if (page->cache_offset >= firstPageOffset
2531 						&& page->cache_offset <= lastPageOffset) {
2532 						addr_t address = virtual_page_address(area, page);
2533 						map->ProtectPage(area, address, newProtection);
2534 					}
2535 				}
2536 			} else
2537 				map->ProtectArea(area, newProtection);
2538 
2539 			map->Unlock();
2540 		}
2541 
2542 		area->protection = newProtection;
2543 	}
2544 
2545 	return status;
2546 }
2547 
2548 
2549 status_t
2550 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2551 {
2552 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2553 	if (addressSpace == NULL)
2554 		return B_BAD_TEAM_ID;
2555 
2556 	VMTranslationMap* map = addressSpace->TranslationMap();
2557 
2558 	map->Lock();
2559 	uint32 dummyFlags;
2560 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2561 	map->Unlock();
2562 
2563 	addressSpace->Put();
2564 	return status;
2565 }
2566 
2567 
2568 /*!	The page's cache must be locked.
2569 */
2570 bool
2571 vm_test_map_modification(vm_page* page)
2572 {
2573 	if (page->modified)
2574 		return true;
2575 
2576 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2577 	vm_page_mapping* mapping;
2578 	while ((mapping = iterator.Next()) != NULL) {
2579 		VMArea* area = mapping->area;
2580 		VMTranslationMap* map = area->address_space->TranslationMap();
2581 
2582 		phys_addr_t physicalAddress;
2583 		uint32 flags;
2584 		map->Lock();
2585 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2586 		map->Unlock();
2587 
2588 		if ((flags & PAGE_MODIFIED) != 0)
2589 			return true;
2590 	}
2591 
2592 	return false;
2593 }
2594 
2595 
2596 /*!	The page's cache must be locked.
2597 */
2598 void
2599 vm_clear_map_flags(vm_page* page, uint32 flags)
2600 {
2601 	if ((flags & PAGE_ACCESSED) != 0)
2602 		page->accessed = false;
2603 	if ((flags & PAGE_MODIFIED) != 0)
2604 		page->modified = false;
2605 
2606 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2607 	vm_page_mapping* mapping;
2608 	while ((mapping = iterator.Next()) != NULL) {
2609 		VMArea* area = mapping->area;
2610 		VMTranslationMap* map = area->address_space->TranslationMap();
2611 
2612 		map->Lock();
2613 		map->ClearFlags(virtual_page_address(area, page), flags);
2614 		map->Unlock();
2615 	}
2616 }
2617 
2618 
2619 /*!	Removes all mappings from a page.
2620 	After you've called this function, the page is unmapped from memory and
2621 	the page's \c accessed and \c modified flags have been updated according
2622 	to the state of the mappings.
2623 	The page's cache must be locked.
2624 */
2625 void
2626 vm_remove_all_page_mappings(vm_page* page)
2627 {
2628 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2629 		VMArea* area = mapping->area;
2630 		VMTranslationMap* map = area->address_space->TranslationMap();
2631 		addr_t address = virtual_page_address(area, page);
2632 		map->UnmapPage(area, address, false);
2633 	}
2634 }
2635 
2636 
2637 int32
2638 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2639 {
2640 	int32 count = 0;
2641 
2642 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2643 	vm_page_mapping* mapping;
2644 	while ((mapping = iterator.Next()) != NULL) {
2645 		VMArea* area = mapping->area;
2646 		VMTranslationMap* map = area->address_space->TranslationMap();
2647 
2648 		bool modified;
2649 		if (map->ClearAccessedAndModified(area,
2650 				virtual_page_address(area, page), false, modified)) {
2651 			count++;
2652 		}
2653 
2654 		page->modified |= modified;
2655 	}
2656 
2657 
2658 	if (page->accessed) {
2659 		count++;
2660 		page->accessed = false;
2661 	}
2662 
2663 	return count;
2664 }
2665 
2666 
2667 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2668 	mappings.
2669 	The function iterates through the page mappings and removes them until
2670 	encountering one that has been accessed. From then on it will continue to
2671 	iterate, but only clear the accessed flag of the mapping. The page's
2672 	\c modified bit will be updated accordingly, the \c accessed bit will be
2673 	cleared.
2674 	\return The number of mapping accessed bits encountered, including the
2675 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2676 		of the page have been removed.
2677 */
2678 int32
2679 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2680 {
2681 	ASSERT(page->WiredCount() == 0);
2682 
2683 	if (page->accessed)
2684 		return vm_clear_page_mapping_accessed_flags(page);
2685 
2686 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2687 		VMArea* area = mapping->area;
2688 		VMTranslationMap* map = area->address_space->TranslationMap();
2689 		addr_t address = virtual_page_address(area, page);
2690 		bool modified = false;
2691 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2692 			page->accessed = true;
2693 			page->modified |= modified;
2694 			return vm_clear_page_mapping_accessed_flags(page);
2695 		}
2696 		page->modified |= modified;
2697 	}
2698 
2699 	return 0;
2700 }
2701 
2702 
2703 static int
2704 display_mem(int argc, char** argv)
2705 {
2706 	bool physical = false;
2707 	addr_t copyAddress;
2708 	int32 displayWidth;
2709 	int32 itemSize;
2710 	int32 num = -1;
2711 	addr_t address;
2712 	int i = 1, j;
2713 
2714 	if (argc > 1 && argv[1][0] == '-') {
2715 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2716 			physical = true;
2717 			i++;
2718 		} else
2719 			i = 99;
2720 	}
2721 
2722 	if (argc < i + 1 || argc > i + 2) {
2723 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2724 			"\tdl - 8 bytes\n"
2725 			"\tdw - 4 bytes\n"
2726 			"\tds - 2 bytes\n"
2727 			"\tdb - 1 byte\n"
2728 			"\tstring - a whole string\n"
2729 			"  -p or --physical only allows memory from a single page to be "
2730 			"displayed.\n");
2731 		return 0;
2732 	}
2733 
2734 	address = parse_expression(argv[i]);
2735 
2736 	if (argc > i + 1)
2737 		num = parse_expression(argv[i + 1]);
2738 
2739 	// build the format string
2740 	if (strcmp(argv[0], "db") == 0) {
2741 		itemSize = 1;
2742 		displayWidth = 16;
2743 	} else if (strcmp(argv[0], "ds") == 0) {
2744 		itemSize = 2;
2745 		displayWidth = 8;
2746 	} else if (strcmp(argv[0], "dw") == 0) {
2747 		itemSize = 4;
2748 		displayWidth = 4;
2749 	} else if (strcmp(argv[0], "dl") == 0) {
2750 		itemSize = 8;
2751 		displayWidth = 2;
2752 	} else if (strcmp(argv[0], "string") == 0) {
2753 		itemSize = 1;
2754 		displayWidth = -1;
2755 	} else {
2756 		kprintf("display_mem called in an invalid way!\n");
2757 		return 0;
2758 	}
2759 
2760 	if (num <= 0)
2761 		num = displayWidth;
2762 
2763 	void* physicalPageHandle = NULL;
2764 
2765 	if (physical) {
2766 		int32 offset = address & (B_PAGE_SIZE - 1);
2767 		if (num * itemSize + offset > B_PAGE_SIZE) {
2768 			num = (B_PAGE_SIZE - offset) / itemSize;
2769 			kprintf("NOTE: number of bytes has been cut to page size\n");
2770 		}
2771 
2772 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2773 
2774 		if (vm_get_physical_page_debug(address, &copyAddress,
2775 				&physicalPageHandle) != B_OK) {
2776 			kprintf("getting the hardware page failed.");
2777 			return 0;
2778 		}
2779 
2780 		address += offset;
2781 		copyAddress += offset;
2782 	} else
2783 		copyAddress = address;
2784 
2785 	if (!strcmp(argv[0], "string")) {
2786 		kprintf("%p \"", (char*)copyAddress);
2787 
2788 		// string mode
2789 		for (i = 0; true; i++) {
2790 			char c;
2791 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2792 					!= B_OK
2793 				|| c == '\0') {
2794 				break;
2795 			}
2796 
2797 			if (c == '\n')
2798 				kprintf("\\n");
2799 			else if (c == '\t')
2800 				kprintf("\\t");
2801 			else {
2802 				if (!isprint(c))
2803 					c = '.';
2804 
2805 				kprintf("%c", c);
2806 			}
2807 		}
2808 
2809 		kprintf("\"\n");
2810 	} else {
2811 		// number mode
2812 		for (i = 0; i < num; i++) {
2813 			uint32 value;
2814 
2815 			if ((i % displayWidth) == 0) {
2816 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2817 				if (i != 0)
2818 					kprintf("\n");
2819 
2820 				kprintf("[0x%lx]  ", address + i * itemSize);
2821 
2822 				for (j = 0; j < displayed; j++) {
2823 					char c;
2824 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2825 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2826 						displayed = j;
2827 						break;
2828 					}
2829 					if (!isprint(c))
2830 						c = '.';
2831 
2832 					kprintf("%c", c);
2833 				}
2834 				if (num > displayWidth) {
2835 					// make sure the spacing in the last line is correct
2836 					for (j = displayed; j < displayWidth * itemSize; j++)
2837 						kprintf(" ");
2838 				}
2839 				kprintf("  ");
2840 			}
2841 
2842 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2843 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2844 				kprintf("read fault");
2845 				break;
2846 			}
2847 
2848 			switch (itemSize) {
2849 				case 1:
2850 					kprintf(" %02x", *(uint8*)&value);
2851 					break;
2852 				case 2:
2853 					kprintf(" %04x", *(uint16*)&value);
2854 					break;
2855 				case 4:
2856 					kprintf(" %08lx", *(uint32*)&value);
2857 					break;
2858 				case 8:
2859 					kprintf(" %016Lx", *(uint64*)&value);
2860 					break;
2861 			}
2862 		}
2863 
2864 		kprintf("\n");
2865 	}
2866 
2867 	if (physical) {
2868 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2869 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2870 	}
2871 	return 0;
2872 }
2873 
2874 
2875 static void
2876 dump_cache_tree_recursively(VMCache* cache, int level,
2877 	VMCache* highlightCache)
2878 {
2879 	// print this cache
2880 	for (int i = 0; i < level; i++)
2881 		kprintf("  ");
2882 	if (cache == highlightCache)
2883 		kprintf("%p <--\n", cache);
2884 	else
2885 		kprintf("%p\n", cache);
2886 
2887 	// recursively print its consumers
2888 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
2889 			VMCache* consumer = it.Next();) {
2890 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
2891 	}
2892 }
2893 
2894 
2895 static int
2896 dump_cache_tree(int argc, char** argv)
2897 {
2898 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2899 		kprintf("usage: %s <address>\n", argv[0]);
2900 		return 0;
2901 	}
2902 
2903 	addr_t address = parse_expression(argv[1]);
2904 	if (address == 0)
2905 		return 0;
2906 
2907 	VMCache* cache = (VMCache*)address;
2908 	VMCache* root = cache;
2909 
2910 	// find the root cache (the transitive source)
2911 	while (root->source != NULL)
2912 		root = root->source;
2913 
2914 	dump_cache_tree_recursively(root, 0, cache);
2915 
2916 	return 0;
2917 }
2918 
2919 
2920 const char*
2921 vm_cache_type_to_string(int32 type)
2922 {
2923 	switch (type) {
2924 		case CACHE_TYPE_RAM:
2925 			return "RAM";
2926 		case CACHE_TYPE_DEVICE:
2927 			return "device";
2928 		case CACHE_TYPE_VNODE:
2929 			return "vnode";
2930 		case CACHE_TYPE_NULL:
2931 			return "null";
2932 
2933 		default:
2934 			return "unknown";
2935 	}
2936 }
2937 
2938 
2939 #if DEBUG_CACHE_LIST
2940 
2941 static void
2942 update_cache_info_recursively(VMCache* cache, cache_info& info)
2943 {
2944 	info.page_count += cache->page_count;
2945 	if (cache->type == CACHE_TYPE_RAM)
2946 		info.committed += cache->committed_size;
2947 
2948 	// recurse
2949 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
2950 			VMCache* consumer = it.Next();) {
2951 		update_cache_info_recursively(consumer, info);
2952 	}
2953 }
2954 
2955 
2956 static int
2957 cache_info_compare_page_count(const void* _a, const void* _b)
2958 {
2959 	const cache_info* a = (const cache_info*)_a;
2960 	const cache_info* b = (const cache_info*)_b;
2961 	if (a->page_count == b->page_count)
2962 		return 0;
2963 	return a->page_count < b->page_count ? 1 : -1;
2964 }
2965 
2966 
2967 static int
2968 cache_info_compare_committed(const void* _a, const void* _b)
2969 {
2970 	const cache_info* a = (const cache_info*)_a;
2971 	const cache_info* b = (const cache_info*)_b;
2972 	if (a->committed == b->committed)
2973 		return 0;
2974 	return a->committed < b->committed ? 1 : -1;
2975 }
2976 
2977 
2978 static void
2979 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
2980 {
2981 	for (int i = 0; i < level; i++)
2982 		kprintf("  ");
2983 
2984 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
2985 		vm_cache_type_to_string(cache->type), cache->virtual_base,
2986 		cache->virtual_end, cache->page_count);
2987 
2988 	if (level == 0)
2989 		kprintf("/%lu", info.page_count);
2990 
2991 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
2992 		kprintf(", committed: %lld", cache->committed_size);
2993 
2994 		if (level == 0)
2995 			kprintf("/%lu", info.committed);
2996 	}
2997 
2998 	// areas
2999 	if (cache->areas != NULL) {
3000 		VMArea* area = cache->areas;
3001 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
3002 			area->address_space->ID());
3003 
3004 		while (area->cache_next != NULL) {
3005 			area = area->cache_next;
3006 			kprintf(", %ld", area->id);
3007 		}
3008 	}
3009 
3010 	kputs("\n");
3011 
3012 	// recurse
3013 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3014 			VMCache* consumer = it.Next();) {
3015 		dump_caches_recursively(consumer, info, level + 1);
3016 	}
3017 }
3018 
3019 
3020 static int
3021 dump_caches(int argc, char** argv)
3022 {
3023 	if (sCacheInfoTable == NULL) {
3024 		kprintf("No cache info table!\n");
3025 		return 0;
3026 	}
3027 
3028 	bool sortByPageCount = true;
3029 
3030 	for (int32 i = 1; i < argc; i++) {
3031 		if (strcmp(argv[i], "-c") == 0) {
3032 			sortByPageCount = false;
3033 		} else {
3034 			print_debugger_command_usage(argv[0]);
3035 			return 0;
3036 		}
3037 	}
3038 
3039 	uint32 totalCount = 0;
3040 	uint32 rootCount = 0;
3041 	off_t totalCommitted = 0;
3042 	page_num_t totalPages = 0;
3043 
3044 	VMCache* cache = gDebugCacheList;
3045 	while (cache) {
3046 		totalCount++;
3047 		if (cache->source == NULL) {
3048 			cache_info stackInfo;
3049 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3050 				? sCacheInfoTable[rootCount] : stackInfo;
3051 			rootCount++;
3052 			info.cache = cache;
3053 			info.page_count = 0;
3054 			info.committed = 0;
3055 			update_cache_info_recursively(cache, info);
3056 			totalCommitted += info.committed;
3057 			totalPages += info.page_count;
3058 		}
3059 
3060 		cache = cache->debug_next;
3061 	}
3062 
3063 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3064 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3065 			sortByPageCount
3066 				? &cache_info_compare_page_count
3067 				: &cache_info_compare_committed);
3068 	}
3069 
3070 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3071 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3072 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
3073 		"tree...\n\n", totalCount, rootCount,
3074 		sortByPageCount ? "page count" : "committed size");
3075 
3076 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3077 		for (uint32 i = 0; i < rootCount; i++) {
3078 			cache_info& info = sCacheInfoTable[i];
3079 			dump_caches_recursively(info.cache, info, 0);
3080 		}
3081 	} else
3082 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3083 
3084 	return 0;
3085 }
3086 
3087 #endif	// DEBUG_CACHE_LIST
3088 
3089 
3090 static int
3091 dump_cache(int argc, char** argv)
3092 {
3093 	VMCache* cache;
3094 	bool showPages = false;
3095 	int i = 1;
3096 
3097 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3098 		kprintf("usage: %s [-ps] <address>\n"
3099 			"  if -p is specified, all pages are shown, if -s is used\n"
3100 			"  only the cache info is shown respectively.\n", argv[0]);
3101 		return 0;
3102 	}
3103 	while (argv[i][0] == '-') {
3104 		char* arg = argv[i] + 1;
3105 		while (arg[0]) {
3106 			if (arg[0] == 'p')
3107 				showPages = true;
3108 			arg++;
3109 		}
3110 		i++;
3111 	}
3112 	if (argv[i] == NULL) {
3113 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3114 		return 0;
3115 	}
3116 
3117 	addr_t address = parse_expression(argv[i]);
3118 	if (address == 0)
3119 		return 0;
3120 
3121 	cache = (VMCache*)address;
3122 
3123 	cache->Dump(showPages);
3124 
3125 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3126 
3127 	return 0;
3128 }
3129 
3130 
3131 static void
3132 dump_area_struct(VMArea* area, bool mappings)
3133 {
3134 	kprintf("AREA: %p\n", area);
3135 	kprintf("name:\t\t'%s'\n", area->name);
3136 	kprintf("owner:\t\t0x%lx\n", area->address_space->ID());
3137 	kprintf("id:\t\t0x%lx\n", area->id);
3138 	kprintf("base:\t\t0x%lx\n", area->Base());
3139 	kprintf("size:\t\t0x%lx\n", area->Size());
3140 	kprintf("protection:\t0x%lx\n", area->protection);
3141 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3142 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3143 	kprintf("cache:\t\t%p\n", area->cache);
3144 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3145 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3146 	kprintf("cache_next:\t%p\n", area->cache_next);
3147 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3148 
3149 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3150 	if (mappings) {
3151 		kprintf("page mappings:\n");
3152 		while (iterator.HasNext()) {
3153 			vm_page_mapping* mapping = iterator.Next();
3154 			kprintf("  %p", mapping->page);
3155 		}
3156 		kprintf("\n");
3157 	} else {
3158 		uint32 count = 0;
3159 		while (iterator.Next() != NULL) {
3160 			count++;
3161 		}
3162 		kprintf("page mappings:\t%lu\n", count);
3163 	}
3164 }
3165 
3166 
3167 static int
3168 dump_area(int argc, char** argv)
3169 {
3170 	bool mappings = false;
3171 	bool found = false;
3172 	int32 index = 1;
3173 	VMArea* area;
3174 	addr_t num;
3175 
3176 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3177 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3178 			"All areas matching either id/address/name are listed. You can\n"
3179 			"force to check only a specific item by prefixing the specifier\n"
3180 			"with the id/contains/address/name keywords.\n"
3181 			"-m shows the area's mappings as well.\n");
3182 		return 0;
3183 	}
3184 
3185 	if (!strcmp(argv[1], "-m")) {
3186 		mappings = true;
3187 		index++;
3188 	}
3189 
3190 	int32 mode = 0xf;
3191 	if (!strcmp(argv[index], "id"))
3192 		mode = 1;
3193 	else if (!strcmp(argv[index], "contains"))
3194 		mode = 2;
3195 	else if (!strcmp(argv[index], "name"))
3196 		mode = 4;
3197 	else if (!strcmp(argv[index], "address"))
3198 		mode = 0;
3199 	if (mode != 0xf)
3200 		index++;
3201 
3202 	if (index >= argc) {
3203 		kprintf("No area specifier given.\n");
3204 		return 0;
3205 	}
3206 
3207 	num = parse_expression(argv[index]);
3208 
3209 	if (mode == 0) {
3210 		dump_area_struct((struct VMArea*)num, mappings);
3211 	} else {
3212 		// walk through the area list, looking for the arguments as a name
3213 
3214 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3215 		while ((area = it.Next()) != NULL) {
3216 			if (((mode & 4) != 0 && area->name != NULL
3217 					&& !strcmp(argv[index], area->name))
3218 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3219 					|| (((mode & 2) != 0 && area->Base() <= num
3220 						&& area->Base() + area->Size() > num))))) {
3221 				dump_area_struct(area, mappings);
3222 				found = true;
3223 			}
3224 		}
3225 
3226 		if (!found)
3227 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3228 	}
3229 
3230 	return 0;
3231 }
3232 
3233 
3234 static int
3235 dump_area_list(int argc, char** argv)
3236 {
3237 	VMArea* area;
3238 	const char* name = NULL;
3239 	int32 id = 0;
3240 
3241 	if (argc > 1) {
3242 		id = parse_expression(argv[1]);
3243 		if (id == 0)
3244 			name = argv[1];
3245 	}
3246 
3247 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3248 
3249 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3250 	while ((area = it.Next()) != NULL) {
3251 		if ((id != 0 && area->address_space->ID() != id)
3252 			|| (name != NULL && strstr(area->name, name) == NULL))
3253 			continue;
3254 
3255 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
3256 			(void*)area->Base(), (void*)area->Size(), area->protection,
3257 			area->wiring, area->name);
3258 	}
3259 	return 0;
3260 }
3261 
3262 
3263 static int
3264 dump_available_memory(int argc, char** argv)
3265 {
3266 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3267 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3268 	return 0;
3269 }
3270 
3271 
3272 /*!	Deletes all areas and reserved regions in the given address space.
3273 
3274 	The caller must ensure that none of the areas has any wired ranges.
3275 
3276 	\param addressSpace The address space.
3277 	\param deletingAddressSpace \c true, if the address space is in the process
3278 		of being deleted.
3279 */
3280 void
3281 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3282 {
3283 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3284 		addressSpace->ID()));
3285 
3286 	addressSpace->WriteLock();
3287 
3288 	// remove all reserved areas in this address space
3289 	addressSpace->UnreserveAllAddressRanges(0);
3290 
3291 	// delete all the areas in this address space
3292 	while (VMArea* area = addressSpace->FirstArea()) {
3293 		ASSERT(!area->IsWired());
3294 		delete_area(addressSpace, area, deletingAddressSpace);
3295 	}
3296 
3297 	addressSpace->WriteUnlock();
3298 }
3299 
3300 
3301 static area_id
3302 vm_area_for(addr_t address, bool kernel)
3303 {
3304 	team_id team;
3305 	if (IS_USER_ADDRESS(address)) {
3306 		// we try the user team address space, if any
3307 		team = VMAddressSpace::CurrentID();
3308 		if (team < 0)
3309 			return team;
3310 	} else
3311 		team = VMAddressSpace::KernelID();
3312 
3313 	AddressSpaceReadLocker locker(team);
3314 	if (!locker.IsLocked())
3315 		return B_BAD_TEAM_ID;
3316 
3317 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3318 	if (area != NULL) {
3319 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3320 			return B_ERROR;
3321 
3322 		return area->id;
3323 	}
3324 
3325 	return B_ERROR;
3326 }
3327 
3328 
3329 /*!	Frees physical pages that were used during the boot process.
3330 	\a end is inclusive.
3331 */
3332 static void
3333 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3334 {
3335 	// free all physical pages in the specified range
3336 
3337 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3338 		phys_addr_t physicalAddress;
3339 		uint32 flags;
3340 
3341 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3342 			&& (flags & PAGE_PRESENT) != 0) {
3343 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3344 			if (page != NULL && page->State() != PAGE_STATE_FREE
3345 					 && page->State() != PAGE_STATE_CLEAR
3346 					 && page->State() != PAGE_STATE_UNUSED) {
3347 				DEBUG_PAGE_ACCESS_START(page);
3348 				vm_page_set_state(page, PAGE_STATE_FREE);
3349 			}
3350 		}
3351 	}
3352 
3353 	// unmap the memory
3354 	map->Unmap(start, end);
3355 }
3356 
3357 
3358 void
3359 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3360 {
3361 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3362 	addr_t end = start + (size - 1);
3363 	addr_t lastEnd = start;
3364 
3365 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3366 		(void*)start, (void*)end));
3367 
3368 	// The areas are sorted in virtual address space order, so
3369 	// we just have to find the holes between them that fall
3370 	// into the area we should dispose
3371 
3372 	map->Lock();
3373 
3374 	for (VMAddressSpace::AreaIterator it
3375 				= VMAddressSpace::Kernel()->GetAreaIterator();
3376 			VMArea* area = it.Next();) {
3377 		addr_t areaStart = area->Base();
3378 		addr_t areaEnd = areaStart + (area->Size() - 1);
3379 
3380 		if (areaEnd < start)
3381 			continue;
3382 
3383 		if (areaStart > end) {
3384 			// we are done, the area is already beyond of what we have to free
3385 			break;
3386 		}
3387 
3388 		if (areaStart > lastEnd) {
3389 			// this is something we can free
3390 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3391 				(void*)areaStart));
3392 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3393 		}
3394 
3395 		if (areaEnd >= end) {
3396 			lastEnd = areaEnd;
3397 				// no +1 to prevent potential overflow
3398 			break;
3399 		}
3400 
3401 		lastEnd = areaEnd + 1;
3402 	}
3403 
3404 	if (lastEnd < end) {
3405 		// we can also get rid of some space at the end of the area
3406 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3407 			(void*)end));
3408 		unmap_and_free_physical_pages(map, lastEnd, end);
3409 	}
3410 
3411 	map->Unlock();
3412 }
3413 
3414 
3415 static void
3416 create_preloaded_image_areas(struct preloaded_image* image)
3417 {
3418 	char name[B_OS_NAME_LENGTH];
3419 	void* address;
3420 	int32 length;
3421 
3422 	// use file name to create a good area name
3423 	char* fileName = strrchr(image->name, '/');
3424 	if (fileName == NULL)
3425 		fileName = image->name;
3426 	else
3427 		fileName++;
3428 
3429 	length = strlen(fileName);
3430 	// make sure there is enough space for the suffix
3431 	if (length > 25)
3432 		length = 25;
3433 
3434 	memcpy(name, fileName, length);
3435 	strcpy(name + length, "_text");
3436 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3437 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3438 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3439 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3440 		// this will later be remapped read-only/executable by the
3441 		// ELF initialization code
3442 
3443 	strcpy(name + length, "_data");
3444 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3445 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3446 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3447 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3448 }
3449 
3450 
3451 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3452 	Any boot loader resources contained in that arguments must not be accessed
3453 	anymore past this point.
3454 */
3455 void
3456 vm_free_kernel_args(kernel_args* args)
3457 {
3458 	uint32 i;
3459 
3460 	TRACE(("vm_free_kernel_args()\n"));
3461 
3462 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3463 		area_id area = area_for((void*)args->kernel_args_range[i].start);
3464 		if (area >= B_OK)
3465 			delete_area(area);
3466 	}
3467 }
3468 
3469 
3470 static void
3471 allocate_kernel_args(kernel_args* args)
3472 {
3473 	TRACE(("allocate_kernel_args()\n"));
3474 
3475 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3476 		void* address = (void*)args->kernel_args_range[i].start;
3477 
3478 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3479 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3480 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3481 	}
3482 }
3483 
3484 
3485 static void
3486 unreserve_boot_loader_ranges(kernel_args* args)
3487 {
3488 	TRACE(("unreserve_boot_loader_ranges()\n"));
3489 
3490 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3491 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3492 			(void*)args->virtual_allocated_range[i].start,
3493 			args->virtual_allocated_range[i].size);
3494 	}
3495 }
3496 
3497 
3498 static void
3499 reserve_boot_loader_ranges(kernel_args* args)
3500 {
3501 	TRACE(("reserve_boot_loader_ranges()\n"));
3502 
3503 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3504 		void* address = (void*)args->virtual_allocated_range[i].start;
3505 
3506 		// If the address is no kernel address, we just skip it. The
3507 		// architecture specific code has to deal with it.
3508 		if (!IS_KERNEL_ADDRESS(address)) {
3509 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3510 				address, args->virtual_allocated_range[i].size);
3511 			continue;
3512 		}
3513 
3514 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3515 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3516 		if (status < B_OK)
3517 			panic("could not reserve boot loader ranges\n");
3518 	}
3519 }
3520 
3521 
3522 static addr_t
3523 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3524 {
3525 	size = PAGE_ALIGN(size);
3526 
3527 	// find a slot in the virtual allocation addr range
3528 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3529 		// check to see if the space between this one and the last is big enough
3530 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3531 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3532 			+ args->virtual_allocated_range[i - 1].size;
3533 
3534 		addr_t base = alignment > 0
3535 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3536 
3537 		if (base >= KERNEL_BASE && base < rangeStart
3538 				&& rangeStart - base >= size) {
3539 			args->virtual_allocated_range[i - 1].size
3540 				+= base + size - previousRangeEnd;
3541 			return base;
3542 		}
3543 	}
3544 
3545 	// we hadn't found one between allocation ranges. this is ok.
3546 	// see if there's a gap after the last one
3547 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3548 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3549 		+ args->virtual_allocated_range[lastEntryIndex].size;
3550 	addr_t base = alignment > 0
3551 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3552 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3553 		args->virtual_allocated_range[lastEntryIndex].size
3554 			+= base + size - lastRangeEnd;
3555 		return base;
3556 	}
3557 
3558 	// see if there's a gap before the first one
3559 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3560 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3561 		base = rangeStart - size;
3562 		if (alignment > 0)
3563 			base = ROUNDDOWN(base, alignment);
3564 
3565 		if (base >= KERNEL_BASE) {
3566 			args->virtual_allocated_range[0].start = base;
3567 			args->virtual_allocated_range[0].size += rangeStart - base;
3568 			return base;
3569 		}
3570 	}
3571 
3572 	return 0;
3573 }
3574 
3575 
3576 static bool
3577 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3578 {
3579 	// TODO: horrible brute-force method of determining if the page can be
3580 	// allocated
3581 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3582 		if (address >= args->physical_memory_range[i].start
3583 			&& address < args->physical_memory_range[i].start
3584 				+ args->physical_memory_range[i].size)
3585 			return true;
3586 	}
3587 	return false;
3588 }
3589 
3590 
3591 page_num_t
3592 vm_allocate_early_physical_page(kernel_args* args)
3593 {
3594 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3595 		phys_addr_t nextPage;
3596 
3597 		nextPage = args->physical_allocated_range[i].start
3598 			+ args->physical_allocated_range[i].size;
3599 		// see if the page after the next allocated paddr run can be allocated
3600 		if (i + 1 < args->num_physical_allocated_ranges
3601 			&& args->physical_allocated_range[i + 1].size != 0) {
3602 			// see if the next page will collide with the next allocated range
3603 			if (nextPage >= args->physical_allocated_range[i+1].start)
3604 				continue;
3605 		}
3606 		// see if the next physical page fits in the memory block
3607 		if (is_page_in_physical_memory_range(args, nextPage)) {
3608 			// we got one!
3609 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3610 			return nextPage / B_PAGE_SIZE;
3611 		}
3612 	}
3613 
3614 	return 0;
3615 		// could not allocate a block
3616 }
3617 
3618 
3619 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3620 	allocate some pages before the VM is completely up.
3621 */
3622 addr_t
3623 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3624 	uint32 attributes, addr_t alignment)
3625 {
3626 	if (physicalSize > virtualSize)
3627 		physicalSize = virtualSize;
3628 
3629 	// find the vaddr to allocate at
3630 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3631 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3632 
3633 	// map the pages
3634 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3635 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3636 		if (physicalAddress == 0)
3637 			panic("error allocating early page!\n");
3638 
3639 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3640 
3641 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3642 			physicalAddress * B_PAGE_SIZE, attributes,
3643 			&vm_allocate_early_physical_page);
3644 	}
3645 
3646 	return virtualBase;
3647 }
3648 
3649 
3650 /*!	The main entrance point to initialize the VM. */
3651 status_t
3652 vm_init(kernel_args* args)
3653 {
3654 	struct preloaded_image* image;
3655 	void* address;
3656 	status_t err = 0;
3657 	uint32 i;
3658 
3659 	TRACE(("vm_init: entry\n"));
3660 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3661 	err = arch_vm_init(args);
3662 
3663 	// initialize some globals
3664 	vm_page_init_num_pages(args);
3665 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3666 
3667 	size_t heapSize = INITIAL_HEAP_SIZE;
3668 	// try to accomodate low memory systems
3669 	while (heapSize > sAvailableMemory / 8)
3670 		heapSize /= 2;
3671 	if (heapSize < 1024 * 1024)
3672 		panic("vm_init: go buy some RAM please.");
3673 
3674 	slab_init(args);
3675 
3676 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3677 	// map in the new heap and initialize it
3678 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3679 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3680 	TRACE(("heap at 0x%lx\n", heapBase));
3681 	heap_init(heapBase, heapSize);
3682 #endif
3683 
3684 	// initialize the free page list and physical page mapper
3685 	vm_page_init(args);
3686 
3687 	// initialize the cache allocators
3688 	vm_cache_init(args);
3689 
3690 	{
3691 		status_t error = VMAreaHash::Init();
3692 		if (error != B_OK)
3693 			panic("vm_init: error initializing area hash table\n");
3694 	}
3695 
3696 	VMAddressSpace::Init();
3697 	reserve_boot_loader_ranges(args);
3698 
3699 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
3700 	heap_init_post_area();
3701 #endif
3702 
3703 	// Do any further initialization that the architecture dependant layers may
3704 	// need now
3705 	arch_vm_translation_map_init_post_area(args);
3706 	arch_vm_init_post_area(args);
3707 	vm_page_init_post_area(args);
3708 	slab_init_post_area();
3709 
3710 	// allocate areas to represent stuff that already exists
3711 
3712 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3713 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3714 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3715 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3716 #endif
3717 
3718 	allocate_kernel_args(args);
3719 
3720 	create_preloaded_image_areas(&args->kernel_image);
3721 
3722 	// allocate areas for preloaded images
3723 	for (image = args->preloaded_images; image != NULL; image = image->next)
3724 		create_preloaded_image_areas(image);
3725 
3726 	// allocate kernel stacks
3727 	for (i = 0; i < args->num_cpus; i++) {
3728 		char name[64];
3729 
3730 		sprintf(name, "idle thread %lu kstack", i + 1);
3731 		address = (void*)args->cpu_kstack[i].start;
3732 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3733 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3734 	}
3735 
3736 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3737 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3738 
3739 #if PARANOID_KERNEL_MALLOC
3740 	vm_block_address_range("uninitialized heap memory",
3741 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3742 #endif
3743 #if PARANOID_KERNEL_FREE
3744 	vm_block_address_range("freed heap memory",
3745 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3746 #endif
3747 
3748 	// create the object cache for the page mappings
3749 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3750 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3751 		NULL, NULL);
3752 	if (gPageMappingsObjectCache == NULL)
3753 		panic("failed to create page mappings object cache");
3754 
3755 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3756 
3757 #if DEBUG_CACHE_LIST
3758 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
3759 		virtual_address_restrictions virtualRestrictions = {};
3760 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
3761 		physical_address_restrictions physicalRestrictions = {};
3762 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
3763 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3764 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
3765 			CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions,
3766 			(void**)&sCacheInfoTable);
3767 	}
3768 #endif	// DEBUG_CACHE_LIST
3769 
3770 	// add some debugger commands
3771 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3772 	add_debugger_command("area", &dump_area,
3773 		"Dump info about a particular area");
3774 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3775 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3776 #if DEBUG_CACHE_LIST
3777 	if (sCacheInfoTable != NULL) {
3778 		add_debugger_command_etc("caches", &dump_caches,
3779 			"List all VMCache trees",
3780 			"[ \"-c\" ]\n"
3781 			"All cache trees are listed sorted in decreasing order by number "
3782 				"of\n"
3783 			"used pages or, if \"-c\" is specified, by size of committed "
3784 				"memory.\n",
3785 			0);
3786 	}
3787 #endif
3788 	add_debugger_command("avail", &dump_available_memory,
3789 		"Dump available memory");
3790 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3791 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3792 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3793 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3794 	add_debugger_command("string", &display_mem, "dump strings");
3795 
3796 	TRACE(("vm_init: exit\n"));
3797 
3798 	vm_cache_init_post_heap();
3799 
3800 	return err;
3801 }
3802 
3803 
3804 status_t
3805 vm_init_post_sem(kernel_args* args)
3806 {
3807 	// This frees all unused boot loader resources and makes its space available
3808 	// again
3809 	arch_vm_init_end(args);
3810 	unreserve_boot_loader_ranges(args);
3811 
3812 	// fill in all of the semaphores that were not allocated before
3813 	// since we're still single threaded and only the kernel address space
3814 	// exists, it isn't that hard to find all of the ones we need to create
3815 
3816 	arch_vm_translation_map_init_post_sem(args);
3817 
3818 	slab_init_post_sem();
3819 
3820 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3821 	heap_init_post_sem();
3822 #endif
3823 
3824 	return B_OK;
3825 }
3826 
3827 
3828 status_t
3829 vm_init_post_thread(kernel_args* args)
3830 {
3831 	vm_page_init_post_thread(args);
3832 	slab_init_post_thread();
3833 	return heap_init_post_thread();
3834 }
3835 
3836 
3837 status_t
3838 vm_init_post_modules(kernel_args* args)
3839 {
3840 	return arch_vm_init_post_modules(args);
3841 }
3842 
3843 
3844 void
3845 permit_page_faults(void)
3846 {
3847 	Thread* thread = thread_get_current_thread();
3848 	if (thread != NULL)
3849 		atomic_add(&thread->page_faults_allowed, 1);
3850 }
3851 
3852 
3853 void
3854 forbid_page_faults(void)
3855 {
3856 	Thread* thread = thread_get_current_thread();
3857 	if (thread != NULL)
3858 		atomic_add(&thread->page_faults_allowed, -1);
3859 }
3860 
3861 
3862 status_t
3863 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3864 	addr_t* newIP)
3865 {
3866 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3867 		faultAddress));
3868 
3869 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
3870 
3871 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
3872 	VMAddressSpace* addressSpace = NULL;
3873 
3874 	status_t status = B_OK;
3875 	*newIP = 0;
3876 	atomic_add((int32*)&sPageFaults, 1);
3877 
3878 	if (IS_KERNEL_ADDRESS(pageAddress)) {
3879 		addressSpace = VMAddressSpace::GetKernel();
3880 	} else if (IS_USER_ADDRESS(pageAddress)) {
3881 		addressSpace = VMAddressSpace::GetCurrent();
3882 		if (addressSpace == NULL) {
3883 			if (!isUser) {
3884 				dprintf("vm_page_fault: kernel thread accessing invalid user "
3885 					"memory!\n");
3886 				status = B_BAD_ADDRESS;
3887 				TPF(PageFaultError(-1,
3888 					VMPageFaultTracing
3889 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
3890 			} else {
3891 				// XXX weird state.
3892 				panic("vm_page_fault: non kernel thread accessing user memory "
3893 					"that doesn't exist!\n");
3894 				status = B_BAD_ADDRESS;
3895 			}
3896 		}
3897 	} else {
3898 		// the hit was probably in the 64k DMZ between kernel and user space
3899 		// this keeps a user space thread from passing a buffer that crosses
3900 		// into kernel space
3901 		status = B_BAD_ADDRESS;
3902 		TPF(PageFaultError(-1,
3903 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
3904 	}
3905 
3906 	if (status == B_OK) {
3907 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser,
3908 			NULL);
3909 	}
3910 
3911 	if (status < B_OK) {
3912 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
3913 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
3914 			strerror(status), address, faultAddress, isWrite, isUser,
3915 			thread_get_current_thread_id());
3916 		if (!isUser) {
3917 			Thread* thread = thread_get_current_thread();
3918 			if (thread != NULL && thread->fault_handler != 0) {
3919 				// this will cause the arch dependant page fault handler to
3920 				// modify the IP on the interrupt frame or whatever to return
3921 				// to this address
3922 				*newIP = thread->fault_handler;
3923 			} else {
3924 				// unhandled page fault in the kernel
3925 				panic("vm_page_fault: unhandled page fault in kernel space at "
3926 					"0x%lx, ip 0x%lx\n", address, faultAddress);
3927 			}
3928 		} else {
3929 #if 1
3930 			addressSpace->ReadLock();
3931 
3932 			// TODO: remove me once we have proper userland debugging support
3933 			// (and tools)
3934 			VMArea* area = addressSpace->LookupArea(faultAddress);
3935 
3936 			Thread* thread = thread_get_current_thread();
3937 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
3938 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
3939 				thread->name, thread->id, thread->team->Name(),
3940 				thread->team->id, isWrite ? "write" : "read", address,
3941 				faultAddress, area ? area->name : "???",
3942 				faultAddress - (area ? area->Base() : 0x0));
3943 
3944 			// We can print a stack trace of the userland thread here.
3945 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
3946 // fault and someone is already waiting for a write lock on the same address
3947 // space. This thread will then try to acquire the lock again and will
3948 // be queued after the writer.
3949 #	if 0
3950 			if (area) {
3951 				struct stack_frame {
3952 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
3953 						struct stack_frame*	previous;
3954 						void*				return_address;
3955 					#else
3956 						// ...
3957 					#warning writeme
3958 					#endif
3959 				} frame;
3960 #		ifdef __INTEL__
3961 				struct iframe* iframe = i386_get_user_iframe();
3962 				if (iframe == NULL)
3963 					panic("iframe is NULL!");
3964 
3965 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
3966 					sizeof(struct stack_frame));
3967 #		elif defined(__POWERPC__)
3968 				struct iframe* iframe = ppc_get_user_iframe();
3969 				if (iframe == NULL)
3970 					panic("iframe is NULL!");
3971 
3972 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
3973 					sizeof(struct stack_frame));
3974 #		else
3975 #			warning "vm_page_fault() stack trace won't work"
3976 				status = B_ERROR;
3977 #		endif
3978 
3979 				dprintf("stack trace:\n");
3980 				int32 maxFrames = 50;
3981 				while (status == B_OK && --maxFrames >= 0
3982 						&& frame.return_address != NULL) {
3983 					dprintf("  %p", frame.return_address);
3984 					area = addressSpace->LookupArea(
3985 						(addr_t)frame.return_address);
3986 					if (area) {
3987 						dprintf(" (%s + %#lx)", area->name,
3988 							(addr_t)frame.return_address - area->Base());
3989 					}
3990 					dprintf("\n");
3991 
3992 					status = user_memcpy(&frame, frame.previous,
3993 						sizeof(struct stack_frame));
3994 				}
3995 			}
3996 #	endif	// 0 (stack trace)
3997 
3998 			addressSpace->ReadUnlock();
3999 #endif
4000 
4001 			// TODO: the fault_callback is a temporary solution for vm86
4002 			if (thread->fault_callback == NULL
4003 				|| thread->fault_callback(address, faultAddress, isWrite)) {
4004 				// If the thread has a signal handler for SIGSEGV, we simply
4005 				// send it the signal. Otherwise we notify the user debugger
4006 				// first.
4007 				struct sigaction action;
4008 				if ((sigaction(SIGSEGV, NULL, &action) == 0
4009 						&& action.sa_handler != SIG_DFL
4010 						&& action.sa_handler != SIG_IGN)
4011 					|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4012 						SIGSEGV)) {
4013 					Signal signal(SIGSEGV,
4014 						status == B_PERMISSION_DENIED
4015 							? SEGV_ACCERR : SEGV_MAPERR,
4016 						EFAULT, thread->team->id);
4017 					signal.SetAddress((void*)address);
4018 					send_signal_to_thread(thread, signal, 0);
4019 				}
4020 			}
4021 		}
4022 	}
4023 
4024 	if (addressSpace != NULL)
4025 		addressSpace->Put();
4026 
4027 	return B_HANDLED_INTERRUPT;
4028 }
4029 
4030 
4031 struct PageFaultContext {
4032 	AddressSpaceReadLocker	addressSpaceLocker;
4033 	VMCacheChainLocker		cacheChainLocker;
4034 
4035 	VMTranslationMap*		map;
4036 	VMCache*				topCache;
4037 	off_t					cacheOffset;
4038 	vm_page_reservation		reservation;
4039 	bool					isWrite;
4040 
4041 	// return values
4042 	vm_page*				page;
4043 	bool					restart;
4044 
4045 
4046 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4047 		:
4048 		addressSpaceLocker(addressSpace, true),
4049 		map(addressSpace->TranslationMap()),
4050 		isWrite(isWrite)
4051 	{
4052 	}
4053 
4054 	~PageFaultContext()
4055 	{
4056 		UnlockAll();
4057 		vm_page_unreserve_pages(&reservation);
4058 	}
4059 
4060 	void Prepare(VMCache* topCache, off_t cacheOffset)
4061 	{
4062 		this->topCache = topCache;
4063 		this->cacheOffset = cacheOffset;
4064 		page = NULL;
4065 		restart = false;
4066 
4067 		cacheChainLocker.SetTo(topCache);
4068 	}
4069 
4070 	void UnlockAll(VMCache* exceptCache = NULL)
4071 	{
4072 		topCache = NULL;
4073 		addressSpaceLocker.Unlock();
4074 		cacheChainLocker.Unlock(exceptCache);
4075 	}
4076 };
4077 
4078 
4079 /*!	Gets the page that should be mapped into the area.
4080 	Returns an error code other than \c B_OK, if the page couldn't be found or
4081 	paged in. The locking state of the address space and the caches is undefined
4082 	in that case.
4083 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4084 	had to unlock the address space and all caches and is supposed to be called
4085 	again.
4086 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4087 	found. It is returned in \c context.page. The address space will still be
4088 	locked as well as all caches starting from the top cache to at least the
4089 	cache the page lives in.
4090 */
4091 static status_t
4092 fault_get_page(PageFaultContext& context)
4093 {
4094 	VMCache* cache = context.topCache;
4095 	VMCache* lastCache = NULL;
4096 	vm_page* page = NULL;
4097 
4098 	while (cache != NULL) {
4099 		// We already hold the lock of the cache at this point.
4100 
4101 		lastCache = cache;
4102 
4103 		page = cache->LookupPage(context.cacheOffset);
4104 		if (page != NULL && page->busy) {
4105 			// page must be busy -- wait for it to become unbusy
4106 			context.UnlockAll(cache);
4107 			cache->ReleaseRefLocked();
4108 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4109 
4110 			// restart the whole process
4111 			context.restart = true;
4112 			return B_OK;
4113 		}
4114 
4115 		if (page != NULL)
4116 			break;
4117 
4118 		// The current cache does not contain the page we're looking for.
4119 
4120 		// see if the backing store has it
4121 		if (cache->HasPage(context.cacheOffset)) {
4122 			// insert a fresh page and mark it busy -- we're going to read it in
4123 			page = vm_page_allocate_page(&context.reservation,
4124 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4125 			cache->InsertPage(page, context.cacheOffset);
4126 
4127 			// We need to unlock all caches and the address space while reading
4128 			// the page in. Keep a reference to the cache around.
4129 			cache->AcquireRefLocked();
4130 			context.UnlockAll();
4131 
4132 			// read the page in
4133 			generic_io_vec vec;
4134 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4135 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4136 
4137 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4138 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4139 
4140 			cache->Lock();
4141 
4142 			if (status < B_OK) {
4143 				// on error remove and free the page
4144 				dprintf("reading page from cache %p returned: %s!\n",
4145 					cache, strerror(status));
4146 
4147 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4148 				cache->RemovePage(page);
4149 				vm_page_set_state(page, PAGE_STATE_FREE);
4150 
4151 				cache->ReleaseRefAndUnlock();
4152 				return status;
4153 			}
4154 
4155 			// mark the page unbusy again
4156 			cache->MarkPageUnbusy(page);
4157 
4158 			DEBUG_PAGE_ACCESS_END(page);
4159 
4160 			// Since we needed to unlock everything temporarily, the area
4161 			// situation might have changed. So we need to restart the whole
4162 			// process.
4163 			cache->ReleaseRefAndUnlock();
4164 			context.restart = true;
4165 			return B_OK;
4166 		}
4167 
4168 		cache = context.cacheChainLocker.LockSourceCache();
4169 	}
4170 
4171 	if (page == NULL) {
4172 		// There was no adequate page, determine the cache for a clean one.
4173 		// Read-only pages come in the deepest cache, only the top most cache
4174 		// may have direct write access.
4175 		cache = context.isWrite ? context.topCache : lastCache;
4176 
4177 		// allocate a clean page
4178 		page = vm_page_allocate_page(&context.reservation,
4179 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4180 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4181 			page->physical_page_number));
4182 
4183 		// insert the new page into our cache
4184 		cache->InsertPage(page, context.cacheOffset);
4185 	} else if (page->Cache() != context.topCache && context.isWrite) {
4186 		// We have a page that has the data we want, but in the wrong cache
4187 		// object so we need to copy it and stick it into the top cache.
4188 		vm_page* sourcePage = page;
4189 
4190 		// TODO: If memory is low, it might be a good idea to steal the page
4191 		// from our source cache -- if possible, that is.
4192 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4193 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4194 
4195 		// To not needlessly kill concurrency we unlock all caches but the top
4196 		// one while copying the page. Lacking another mechanism to ensure that
4197 		// the source page doesn't disappear, we mark it busy.
4198 		sourcePage->busy = true;
4199 		context.cacheChainLocker.UnlockKeepRefs(true);
4200 
4201 		// copy the page
4202 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4203 			sourcePage->physical_page_number * B_PAGE_SIZE);
4204 
4205 		context.cacheChainLocker.RelockCaches(true);
4206 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4207 
4208 		// insert the new page into our cache
4209 		context.topCache->InsertPage(page, context.cacheOffset);
4210 	} else
4211 		DEBUG_PAGE_ACCESS_START(page);
4212 
4213 	context.page = page;
4214 	return B_OK;
4215 }
4216 
4217 
4218 /*!	Makes sure the address in the given address space is mapped.
4219 
4220 	\param addressSpace The address space.
4221 	\param originalAddress The address. Doesn't need to be page aligned.
4222 	\param isWrite If \c true the address shall be write-accessible.
4223 	\param isUser If \c true the access is requested by a userland team.
4224 	\param wirePage On success, if non \c NULL, the wired count of the page
4225 		mapped at the given address is incremented and the page is returned
4226 		via this parameter.
4227 	\param wiredRange If given, this wiredRange is ignored when checking whether
4228 		an already mapped page at the virtual address can be unmapped.
4229 	\return \c B_OK on success, another error code otherwise.
4230 */
4231 static status_t
4232 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4233 	bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange)
4234 {
4235 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4236 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4237 
4238 	PageFaultContext context(addressSpace, isWrite);
4239 
4240 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4241 	status_t status = B_OK;
4242 
4243 	addressSpace->IncrementFaultCount();
4244 
4245 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4246 	// the pages upfront makes sure we don't have any cache locked, so that the
4247 	// page daemon/thief can do their job without problems.
4248 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4249 		originalAddress);
4250 	context.addressSpaceLocker.Unlock();
4251 	vm_page_reserve_pages(&context.reservation, reservePages,
4252 		addressSpace == VMAddressSpace::Kernel()
4253 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4254 
4255 	while (true) {
4256 		context.addressSpaceLocker.Lock();
4257 
4258 		// get the area the fault was in
4259 		VMArea* area = addressSpace->LookupArea(address);
4260 		if (area == NULL) {
4261 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4262 				"space\n", originalAddress);
4263 			TPF(PageFaultError(-1,
4264 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4265 			status = B_BAD_ADDRESS;
4266 			break;
4267 		}
4268 
4269 		// check permissions
4270 		uint32 protection = get_area_page_protection(area, address);
4271 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4272 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
4273 				(void*)originalAddress);
4274 			TPF(PageFaultError(area->id,
4275 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4276 			status = B_PERMISSION_DENIED;
4277 			break;
4278 		}
4279 		if (isWrite && (protection
4280 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4281 			dprintf("write access attempted on write-protected area 0x%lx at"
4282 				" %p\n", area->id, (void*)originalAddress);
4283 			TPF(PageFaultError(area->id,
4284 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4285 			status = B_PERMISSION_DENIED;
4286 			break;
4287 		} else if (!isWrite && (protection
4288 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4289 			dprintf("read access attempted on read-protected area 0x%lx at"
4290 				" %p\n", area->id, (void*)originalAddress);
4291 			TPF(PageFaultError(area->id,
4292 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4293 			status = B_PERMISSION_DENIED;
4294 			break;
4295 		}
4296 
4297 		// We have the area, it was a valid access, so let's try to resolve the
4298 		// page fault now.
4299 		// At first, the top most cache from the area is investigated.
4300 
4301 		context.Prepare(vm_area_get_locked_cache(area),
4302 			address - area->Base() + area->cache_offset);
4303 
4304 		// See if this cache has a fault handler -- this will do all the work
4305 		// for us.
4306 		{
4307 			// Note, since the page fault is resolved with interrupts enabled,
4308 			// the fault handler could be called more than once for the same
4309 			// reason -- the store must take this into account.
4310 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4311 			if (status != B_BAD_HANDLER)
4312 				break;
4313 		}
4314 
4315 		// The top most cache has no fault handler, so let's see if the cache or
4316 		// its sources already have the page we're searching for (we're going
4317 		// from top to bottom).
4318 		status = fault_get_page(context);
4319 		if (status != B_OK) {
4320 			TPF(PageFaultError(area->id, status));
4321 			break;
4322 		}
4323 
4324 		if (context.restart)
4325 			continue;
4326 
4327 		// All went fine, all there is left to do is to map the page into the
4328 		// address space.
4329 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4330 			context.page));
4331 
4332 		// If the page doesn't reside in the area's cache, we need to make sure
4333 		// it's mapped in read-only, so that we cannot overwrite someone else's
4334 		// data (copy-on-write)
4335 		uint32 newProtection = protection;
4336 		if (context.page->Cache() != context.topCache && !isWrite)
4337 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4338 
4339 		bool unmapPage = false;
4340 		bool mapPage = true;
4341 
4342 		// check whether there's already a page mapped at the address
4343 		context.map->Lock();
4344 
4345 		phys_addr_t physicalAddress;
4346 		uint32 flags;
4347 		vm_page* mappedPage = NULL;
4348 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4349 			&& (flags & PAGE_PRESENT) != 0
4350 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4351 				!= NULL) {
4352 			// Yep there's already a page. If it's ours, we can simply adjust
4353 			// its protection. Otherwise we have to unmap it.
4354 			if (mappedPage == context.page) {
4355 				context.map->ProtectPage(area, address, newProtection);
4356 					// Note: We assume that ProtectPage() is atomic (i.e.
4357 					// the page isn't temporarily unmapped), otherwise we'd have
4358 					// to make sure it isn't wired.
4359 				mapPage = false;
4360 			} else
4361 				unmapPage = true;
4362 		}
4363 
4364 		context.map->Unlock();
4365 
4366 		if (unmapPage) {
4367 			// If the page is wired, we can't unmap it. Wait until it is unwired
4368 			// again and restart.
4369 			VMAreaUnwiredWaiter waiter;
4370 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4371 					wiredRange)) {
4372 				// unlock everything and wait
4373 				context.UnlockAll();
4374 				waiter.waitEntry.Wait();
4375 				continue;
4376 			}
4377 
4378 			// Note: The mapped page is a page of a lower cache. We are
4379 			// guaranteed to have that cached locked, our new page is a copy of
4380 			// that page, and the page is not busy. The logic for that guarantee
4381 			// is as follows: Since the page is mapped, it must live in the top
4382 			// cache (ruled out above) or any of its lower caches, and there is
4383 			// (was before the new page was inserted) no other page in any
4384 			// cache between the top cache and the page's cache (otherwise that
4385 			// would be mapped instead). That in turn means that our algorithm
4386 			// must have found it and therefore it cannot be busy either.
4387 			DEBUG_PAGE_ACCESS_START(mappedPage);
4388 			unmap_page(area, address);
4389 			DEBUG_PAGE_ACCESS_END(mappedPage);
4390 		}
4391 
4392 		if (mapPage) {
4393 			if (map_page(area, context.page, address, newProtection,
4394 					&context.reservation) != B_OK) {
4395 				// Mapping can only fail, when the page mapping object couldn't
4396 				// be allocated. Save for the missing mapping everything is
4397 				// fine, though. If this was a regular page fault, we'll simply
4398 				// leave and probably fault again. To make sure we'll have more
4399 				// luck then, we ensure that the minimum object reserve is
4400 				// available.
4401 				DEBUG_PAGE_ACCESS_END(context.page);
4402 
4403 				context.UnlockAll();
4404 
4405 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4406 						!= B_OK) {
4407 					// Apparently the situation is serious. Let's get ourselves
4408 					// killed.
4409 					status = B_NO_MEMORY;
4410 				} else if (wirePage != NULL) {
4411 					// The caller expects us to wire the page. Since
4412 					// object_cache_reserve() succeeded, we should now be able
4413 					// to allocate a mapping structure. Restart.
4414 					continue;
4415 				}
4416 
4417 				break;
4418 			}
4419 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4420 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4421 
4422 		// also wire the page, if requested
4423 		if (wirePage != NULL && status == B_OK) {
4424 			increment_page_wired_count(context.page);
4425 			*wirePage = context.page;
4426 		}
4427 
4428 		DEBUG_PAGE_ACCESS_END(context.page);
4429 
4430 		break;
4431 	}
4432 
4433 	return status;
4434 }
4435 
4436 
4437 status_t
4438 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4439 {
4440 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4441 }
4442 
4443 status_t
4444 vm_put_physical_page(addr_t vaddr, void* handle)
4445 {
4446 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4447 }
4448 
4449 
4450 status_t
4451 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4452 	void** _handle)
4453 {
4454 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4455 }
4456 
4457 status_t
4458 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4459 {
4460 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4461 }
4462 
4463 
4464 status_t
4465 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4466 {
4467 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4468 }
4469 
4470 status_t
4471 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4472 {
4473 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4474 }
4475 
4476 
4477 void
4478 vm_get_info(system_memory_info* info)
4479 {
4480 	swap_get_info(info);
4481 
4482 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4483 	info->page_faults = sPageFaults;
4484 
4485 	MutexLocker locker(sAvailableMemoryLock);
4486 	info->free_memory = sAvailableMemory;
4487 	info->needed_memory = sNeededMemory;
4488 }
4489 
4490 
4491 uint32
4492 vm_num_page_faults(void)
4493 {
4494 	return sPageFaults;
4495 }
4496 
4497 
4498 off_t
4499 vm_available_memory(void)
4500 {
4501 	MutexLocker locker(sAvailableMemoryLock);
4502 	return sAvailableMemory;
4503 }
4504 
4505 
4506 off_t
4507 vm_available_not_needed_memory(void)
4508 {
4509 	MutexLocker locker(sAvailableMemoryLock);
4510 	return sAvailableMemory - sNeededMemory;
4511 }
4512 
4513 
4514 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4515 	debugger.
4516 */
4517 off_t
4518 vm_available_not_needed_memory_debug(void)
4519 {
4520 	return sAvailableMemory - sNeededMemory;
4521 }
4522 
4523 
4524 size_t
4525 vm_kernel_address_space_left(void)
4526 {
4527 	return VMAddressSpace::Kernel()->FreeSpace();
4528 }
4529 
4530 
4531 void
4532 vm_unreserve_memory(size_t amount)
4533 {
4534 	mutex_lock(&sAvailableMemoryLock);
4535 
4536 	sAvailableMemory += amount;
4537 
4538 	mutex_unlock(&sAvailableMemoryLock);
4539 }
4540 
4541 
4542 status_t
4543 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4544 {
4545 	size_t reserve = kMemoryReserveForPriority[priority];
4546 
4547 	MutexLocker locker(sAvailableMemoryLock);
4548 
4549 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4550 
4551 	if (sAvailableMemory >= amount + reserve) {
4552 		sAvailableMemory -= amount;
4553 		return B_OK;
4554 	}
4555 
4556 	if (timeout <= 0)
4557 		return B_NO_MEMORY;
4558 
4559 	// turn timeout into an absolute timeout
4560 	timeout += system_time();
4561 
4562 	// loop until we've got the memory or the timeout occurs
4563 	do {
4564 		sNeededMemory += amount;
4565 
4566 		// call the low resource manager
4567 		locker.Unlock();
4568 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4569 			B_ABSOLUTE_TIMEOUT, timeout);
4570 		locker.Lock();
4571 
4572 		sNeededMemory -= amount;
4573 
4574 		if (sAvailableMemory >= amount + reserve) {
4575 			sAvailableMemory -= amount;
4576 			return B_OK;
4577 		}
4578 	} while (timeout > system_time());
4579 
4580 	return B_NO_MEMORY;
4581 }
4582 
4583 
4584 status_t
4585 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4586 {
4587 	// NOTE: The caller is responsible for synchronizing calls to this function!
4588 
4589 	AddressSpaceReadLocker locker;
4590 	VMArea* area;
4591 	status_t status = locker.SetFromArea(id, area);
4592 	if (status != B_OK)
4593 		return status;
4594 
4595 	// nothing to do, if the type doesn't change
4596 	uint32 oldType = area->MemoryType();
4597 	if (type == oldType)
4598 		return B_OK;
4599 
4600 	// set the memory type of the area and the mapped pages
4601 	VMTranslationMap* map = area->address_space->TranslationMap();
4602 	map->Lock();
4603 	area->SetMemoryType(type);
4604 	map->ProtectArea(area, area->protection);
4605 	map->Unlock();
4606 
4607 	// set the physical memory type
4608 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4609 	if (error != B_OK) {
4610 		// reset the memory type of the area and the mapped pages
4611 		map->Lock();
4612 		area->SetMemoryType(oldType);
4613 		map->ProtectArea(area, area->protection);
4614 		map->Unlock();
4615 		return error;
4616 	}
4617 
4618 	return B_OK;
4619 
4620 }
4621 
4622 
4623 /*!	This function enforces some protection properties:
4624 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4625 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4626 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4627 	   and B_KERNEL_WRITE_AREA.
4628 */
4629 static void
4630 fix_protection(uint32* protection)
4631 {
4632 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4633 		if ((*protection & B_USER_PROTECTION) == 0
4634 			|| (*protection & B_WRITE_AREA) != 0)
4635 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4636 		else
4637 			*protection |= B_KERNEL_READ_AREA;
4638 	}
4639 }
4640 
4641 
4642 static void
4643 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4644 {
4645 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4646 	info->area = area->id;
4647 	info->address = (void*)area->Base();
4648 	info->size = area->Size();
4649 	info->protection = area->protection;
4650 	info->lock = B_FULL_LOCK;
4651 	info->team = area->address_space->ID();
4652 	info->copy_count = 0;
4653 	info->in_count = 0;
4654 	info->out_count = 0;
4655 		// TODO: retrieve real values here!
4656 
4657 	VMCache* cache = vm_area_get_locked_cache(area);
4658 
4659 	// Note, this is a simplification; the cache could be larger than this area
4660 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4661 
4662 	vm_area_put_locked_cache(cache);
4663 }
4664 
4665 
4666 static status_t
4667 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4668 {
4669 	// is newSize a multiple of B_PAGE_SIZE?
4670 	if (newSize & (B_PAGE_SIZE - 1))
4671 		return B_BAD_VALUE;
4672 
4673 	// lock all affected address spaces and the cache
4674 	VMArea* area;
4675 	VMCache* cache;
4676 
4677 	MultiAddressSpaceLocker locker;
4678 	AreaCacheLocker cacheLocker;
4679 
4680 	status_t status;
4681 	size_t oldSize;
4682 	bool anyKernelArea;
4683 	bool restart;
4684 
4685 	do {
4686 		anyKernelArea = false;
4687 		restart = false;
4688 
4689 		locker.Unset();
4690 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4691 		if (status != B_OK)
4692 			return status;
4693 		cacheLocker.SetTo(cache, true);	// already locked
4694 
4695 		// enforce restrictions
4696 		if (!kernel) {
4697 			if ((area->protection & B_KERNEL_AREA) != 0)
4698 				return B_NOT_ALLOWED;
4699 			// TODO: Enforce all restrictions (team, etc.)!
4700 		}
4701 
4702 		oldSize = area->Size();
4703 		if (newSize == oldSize)
4704 			return B_OK;
4705 
4706 		if (cache->type != CACHE_TYPE_RAM)
4707 			return B_NOT_ALLOWED;
4708 
4709 		if (oldSize < newSize) {
4710 			// We need to check if all areas of this cache can be resized.
4711 			for (VMArea* current = cache->areas; current != NULL;
4712 					current = current->cache_next) {
4713 				if (!current->address_space->CanResizeArea(current, newSize))
4714 					return B_ERROR;
4715 				anyKernelArea
4716 					|= current->address_space == VMAddressSpace::Kernel();
4717 			}
4718 		} else {
4719 			// We're shrinking the areas, so we must make sure the affected
4720 			// ranges are not wired.
4721 			for (VMArea* current = cache->areas; current != NULL;
4722 					current = current->cache_next) {
4723 				anyKernelArea
4724 					|= current->address_space == VMAddressSpace::Kernel();
4725 
4726 				if (wait_if_area_range_is_wired(current,
4727 						current->Base() + newSize, oldSize - newSize, &locker,
4728 						&cacheLocker)) {
4729 					restart = true;
4730 					break;
4731 				}
4732 			}
4733 		}
4734 	} while (restart);
4735 
4736 	// Okay, looks good so far, so let's do it
4737 
4738 	int priority = kernel && anyKernelArea
4739 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4740 	uint32 allocationFlags = kernel && anyKernelArea
4741 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4742 
4743 	if (oldSize < newSize) {
4744 		// Growing the cache can fail, so we do it first.
4745 		status = cache->Resize(cache->virtual_base + newSize, priority);
4746 		if (status != B_OK)
4747 			return status;
4748 	}
4749 
4750 	for (VMArea* current = cache->areas; current != NULL;
4751 			current = current->cache_next) {
4752 		status = current->address_space->ResizeArea(current, newSize,
4753 			allocationFlags);
4754 		if (status != B_OK)
4755 			break;
4756 
4757 		// We also need to unmap all pages beyond the new size, if the area has
4758 		// shrunk
4759 		if (newSize < oldSize) {
4760 			VMCacheChainLocker cacheChainLocker(cache);
4761 			cacheChainLocker.LockAllSourceCaches();
4762 
4763 			unmap_pages(current, current->Base() + newSize,
4764 				oldSize - newSize);
4765 
4766 			cacheChainLocker.Unlock(cache);
4767 		}
4768 	}
4769 
4770 	if (status == B_OK) {
4771 		// Shrink or grow individual page protections if in use.
4772 		if (area->page_protections != NULL) {
4773 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
4774 			uint8* newProtections
4775 				= (uint8*)realloc(area->page_protections, bytes);
4776 			if (newProtections == NULL)
4777 				status = B_NO_MEMORY;
4778 			else {
4779 				area->page_protections = newProtections;
4780 
4781 				if (oldSize < newSize) {
4782 					// init the additional page protections to that of the area
4783 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
4784 					uint32 areaProtection = area->protection
4785 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4786 					memset(area->page_protections + offset,
4787 						areaProtection | (areaProtection << 4), bytes - offset);
4788 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4789 						uint8& entry = area->page_protections[offset - 1];
4790 						entry = (entry & 0x0f) | (areaProtection << 4);
4791 					}
4792 				}
4793 			}
4794 		}
4795 	}
4796 
4797 	// shrinking the cache can't fail, so we do it now
4798 	if (status == B_OK && newSize < oldSize)
4799 		status = cache->Resize(cache->virtual_base + newSize, priority);
4800 
4801 	if (status != B_OK) {
4802 		// Something failed -- resize the areas back to their original size.
4803 		// This can fail, too, in which case we're seriously screwed.
4804 		for (VMArea* current = cache->areas; current != NULL;
4805 				current = current->cache_next) {
4806 			if (current->address_space->ResizeArea(current, oldSize,
4807 					allocationFlags) != B_OK) {
4808 				panic("vm_resize_area(): Failed and not being able to restore "
4809 					"original state.");
4810 			}
4811 		}
4812 
4813 		cache->Resize(cache->virtual_base + oldSize, priority);
4814 	}
4815 
4816 	// TODO: we must honour the lock restrictions of this area
4817 	return status;
4818 }
4819 
4820 
4821 status_t
4822 vm_memset_physical(phys_addr_t address, int value, size_t length)
4823 {
4824 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4825 }
4826 
4827 
4828 status_t
4829 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
4830 {
4831 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4832 }
4833 
4834 
4835 status_t
4836 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
4837 	bool user)
4838 {
4839 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4840 }
4841 
4842 
4843 void
4844 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
4845 {
4846 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4847 }
4848 
4849 
4850 /*!	Copies a range of memory directly from/to a page that might not be mapped
4851 	at the moment.
4852 
4853 	For \a unsafeMemory the current mapping (if any is ignored). The function
4854 	walks through the respective area's cache chain to find the physical page
4855 	and copies from/to it directly.
4856 	The memory range starting at \a unsafeMemory with a length of \a size bytes
4857 	must not cross a page boundary.
4858 
4859 	\param teamID The team ID identifying the address space \a unsafeMemory is
4860 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
4861 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
4862 		is passed, the address space of the thread returned by
4863 		debug_get_debugged_thread() is used.
4864 	\param unsafeMemory The start of the unsafe memory range to be copied
4865 		from/to.
4866 	\param buffer A safely accessible kernel buffer to be copied from/to.
4867 	\param size The number of bytes to be copied.
4868 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
4869 		\a unsafeMemory, the other way around otherwise.
4870 */
4871 status_t
4872 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
4873 	size_t size, bool copyToUnsafe)
4874 {
4875 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
4876 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
4877 		return B_BAD_VALUE;
4878 	}
4879 
4880 	// get the address space for the debugged thread
4881 	VMAddressSpace* addressSpace;
4882 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
4883 		addressSpace = VMAddressSpace::Kernel();
4884 	} else if (teamID == B_CURRENT_TEAM) {
4885 		Thread* thread = debug_get_debugged_thread();
4886 		if (thread == NULL || thread->team == NULL)
4887 			return B_BAD_ADDRESS;
4888 
4889 		addressSpace = thread->team->address_space;
4890 	} else
4891 		addressSpace = VMAddressSpace::DebugGet(teamID);
4892 
4893 	if (addressSpace == NULL)
4894 		return B_BAD_ADDRESS;
4895 
4896 	// get the area
4897 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
4898 	if (area == NULL)
4899 		return B_BAD_ADDRESS;
4900 
4901 	// search the page
4902 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
4903 		+ area->cache_offset;
4904 	VMCache* cache = area->cache;
4905 	vm_page* page = NULL;
4906 	while (cache != NULL) {
4907 		page = cache->DebugLookupPage(cacheOffset);
4908 		if (page != NULL)
4909 			break;
4910 
4911 		// Page not found in this cache -- if it is paged out, we must not try
4912 		// to get it from lower caches.
4913 		if (cache->DebugHasPage(cacheOffset))
4914 			break;
4915 
4916 		cache = cache->source;
4917 	}
4918 
4919 	if (page == NULL)
4920 		return B_UNSUPPORTED;
4921 
4922 	// copy from/to physical memory
4923 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
4924 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
4925 
4926 	if (copyToUnsafe) {
4927 		if (page->Cache() != area->cache)
4928 			return B_UNSUPPORTED;
4929 
4930 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
4931 	}
4932 
4933 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
4934 }
4935 
4936 
4937 //	#pragma mark - kernel public API
4938 
4939 
4940 status_t
4941 user_memcpy(void* to, const void* from, size_t size)
4942 {
4943 	// don't allow address overflows
4944 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
4945 		return B_BAD_ADDRESS;
4946 
4947 	if (arch_cpu_user_memcpy(to, from, size,
4948 			&thread_get_current_thread()->fault_handler) < B_OK)
4949 		return B_BAD_ADDRESS;
4950 
4951 	return B_OK;
4952 }
4953 
4954 
4955 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
4956 	the string in \a to, NULL-terminating the result.
4957 
4958 	\param to Pointer to the destination C-string.
4959 	\param from Pointer to the source C-string.
4960 	\param size Size in bytes of the string buffer pointed to by \a to.
4961 
4962 	\return strlen(\a from).
4963 */
4964 ssize_t
4965 user_strlcpy(char* to, const char* from, size_t size)
4966 {
4967 	if (to == NULL && size != 0)
4968 		return B_BAD_VALUE;
4969 	if (from == NULL)
4970 		return B_BAD_ADDRESS;
4971 
4972 	// limit size to avoid address overflows
4973 	size_t maxSize = std::min(size,
4974 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
4975 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
4976 		// the source address might still overflow.
4977 
4978 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
4979 		&thread_get_current_thread()->fault_handler);
4980 
4981 	// If we hit the address overflow boundary, fail.
4982 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
4983 		return B_BAD_ADDRESS;
4984 
4985 	return result;
4986 }
4987 
4988 
4989 status_t
4990 user_memset(void* s, char c, size_t count)
4991 {
4992 	// don't allow address overflows
4993 	if ((addr_t)s + count < (addr_t)s)
4994 		return B_BAD_ADDRESS;
4995 
4996 	if (arch_cpu_user_memset(s, c, count,
4997 			&thread_get_current_thread()->fault_handler) < B_OK)
4998 		return B_BAD_ADDRESS;
4999 
5000 	return B_OK;
5001 }
5002 
5003 
5004 /*!	Wires a single page at the given address.
5005 
5006 	\param team The team whose address space the address belongs to. Supports
5007 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5008 		parameter is ignored.
5009 	\param address address The virtual address to wire down. Does not need to
5010 		be page aligned.
5011 	\param writable If \c true the page shall be writable.
5012 	\param info On success the info is filled in, among other things
5013 		containing the physical address the given virtual one translates to.
5014 	\return \c B_OK, when the page could be wired, another error code otherwise.
5015 */
5016 status_t
5017 vm_wire_page(team_id team, addr_t address, bool writable,
5018 	VMPageWiringInfo* info)
5019 {
5020 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5021 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5022 
5023 	// compute the page protection that is required
5024 	bool isUser = IS_USER_ADDRESS(address);
5025 	uint32 requiredProtection = PAGE_PRESENT
5026 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5027 	if (writable)
5028 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5029 
5030 	// get and read lock the address space
5031 	VMAddressSpace* addressSpace = NULL;
5032 	if (isUser) {
5033 		if (team == B_CURRENT_TEAM)
5034 			addressSpace = VMAddressSpace::GetCurrent();
5035 		else
5036 			addressSpace = VMAddressSpace::Get(team);
5037 	} else
5038 		addressSpace = VMAddressSpace::GetKernel();
5039 	if (addressSpace == NULL)
5040 		return B_ERROR;
5041 
5042 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5043 
5044 	VMTranslationMap* map = addressSpace->TranslationMap();
5045 	status_t error = B_OK;
5046 
5047 	// get the area
5048 	VMArea* area = addressSpace->LookupArea(pageAddress);
5049 	if (area == NULL) {
5050 		addressSpace->Put();
5051 		return B_BAD_ADDRESS;
5052 	}
5053 
5054 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5055 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5056 
5057 	// mark the area range wired
5058 	area->Wire(&info->range);
5059 
5060 	// Lock the area's cache chain and the translation map. Needed to look
5061 	// up the page and play with its wired count.
5062 	cacheChainLocker.LockAllSourceCaches();
5063 	map->Lock();
5064 
5065 	phys_addr_t physicalAddress;
5066 	uint32 flags;
5067 	vm_page* page;
5068 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5069 		&& (flags & requiredProtection) == requiredProtection
5070 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5071 			!= NULL) {
5072 		// Already mapped with the correct permissions -- just increment
5073 		// the page's wired count.
5074 		increment_page_wired_count(page);
5075 
5076 		map->Unlock();
5077 		cacheChainLocker.Unlock();
5078 		addressSpaceLocker.Unlock();
5079 	} else {
5080 		// Let vm_soft_fault() map the page for us, if possible. We need
5081 		// to fully unlock to avoid deadlocks. Since we have already
5082 		// wired the area itself, nothing disturbing will happen with it
5083 		// in the meantime.
5084 		map->Unlock();
5085 		cacheChainLocker.Unlock();
5086 		addressSpaceLocker.Unlock();
5087 
5088 		error = vm_soft_fault(addressSpace, pageAddress, writable, isUser,
5089 			&page, &info->range);
5090 
5091 		if (error != B_OK) {
5092 			// The page could not be mapped -- clean up.
5093 			VMCache* cache = vm_area_get_locked_cache(area);
5094 			area->Unwire(&info->range);
5095 			cache->ReleaseRefAndUnlock();
5096 			addressSpace->Put();
5097 			return error;
5098 		}
5099 	}
5100 
5101 	info->physicalAddress
5102 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5103 			+ address % B_PAGE_SIZE;
5104 	info->page = page;
5105 
5106 	return B_OK;
5107 }
5108 
5109 
5110 /*!	Unwires a single page previously wired via vm_wire_page().
5111 
5112 	\param info The same object passed to vm_wire_page() before.
5113 */
5114 void
5115 vm_unwire_page(VMPageWiringInfo* info)
5116 {
5117 	// lock the address space
5118 	VMArea* area = info->range.area;
5119 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5120 		// takes over our reference
5121 
5122 	// lock the top cache
5123 	VMCache* cache = vm_area_get_locked_cache(area);
5124 	VMCacheChainLocker cacheChainLocker(cache);
5125 
5126 	if (info->page->Cache() != cache) {
5127 		// The page is not in the top cache, so we lock the whole cache chain
5128 		// before touching the page's wired count.
5129 		cacheChainLocker.LockAllSourceCaches();
5130 	}
5131 
5132 	decrement_page_wired_count(info->page);
5133 
5134 	// remove the wired range from the range
5135 	area->Unwire(&info->range);
5136 
5137 	cacheChainLocker.Unlock();
5138 }
5139 
5140 
5141 /*!	Wires down the given address range in the specified team's address space.
5142 
5143 	If successful the function
5144 	- acquires a reference to the specified team's address space,
5145 	- adds respective wired ranges to all areas that intersect with the given
5146 	  address range,
5147 	- makes sure all pages in the given address range are mapped with the
5148 	  requested access permissions and increments their wired count.
5149 
5150 	It fails, when \a team doesn't specify a valid address space, when any part
5151 	of the specified address range is not covered by areas, when the concerned
5152 	areas don't allow mapping with the requested permissions, or when mapping
5153 	failed for another reason.
5154 
5155 	When successful the call must be balanced by a unlock_memory_etc() call with
5156 	the exact same parameters.
5157 
5158 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5159 		supported.
5160 	\param address The start of the address range to be wired.
5161 	\param numBytes The size of the address range to be wired.
5162 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5163 		requests that the range must be wired writable ("read from device
5164 		into memory").
5165 	\return \c B_OK on success, another error code otherwise.
5166 */
5167 status_t
5168 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5169 {
5170 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5171 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5172 
5173 	// compute the page protection that is required
5174 	bool isUser = IS_USER_ADDRESS(address);
5175 	bool writable = (flags & B_READ_DEVICE) == 0;
5176 	uint32 requiredProtection = PAGE_PRESENT
5177 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5178 	if (writable)
5179 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5180 
5181 	uint32 mallocFlags = isUser
5182 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5183 
5184 	// get and read lock the address space
5185 	VMAddressSpace* addressSpace = NULL;
5186 	if (isUser) {
5187 		if (team == B_CURRENT_TEAM)
5188 			addressSpace = VMAddressSpace::GetCurrent();
5189 		else
5190 			addressSpace = VMAddressSpace::Get(team);
5191 	} else
5192 		addressSpace = VMAddressSpace::GetKernel();
5193 	if (addressSpace == NULL)
5194 		return B_ERROR;
5195 
5196 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5197 
5198 	VMTranslationMap* map = addressSpace->TranslationMap();
5199 	status_t error = B_OK;
5200 
5201 	// iterate through all concerned areas
5202 	addr_t nextAddress = lockBaseAddress;
5203 	while (nextAddress != lockEndAddress) {
5204 		// get the next area
5205 		VMArea* area = addressSpace->LookupArea(nextAddress);
5206 		if (area == NULL) {
5207 			error = B_BAD_ADDRESS;
5208 			break;
5209 		}
5210 
5211 		addr_t areaStart = nextAddress;
5212 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5213 
5214 		// allocate the wired range (do that before locking the cache to avoid
5215 		// deadlocks)
5216 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5217 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5218 		if (range == NULL) {
5219 			error = B_NO_MEMORY;
5220 			break;
5221 		}
5222 
5223 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5224 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5225 
5226 		// mark the area range wired
5227 		area->Wire(range);
5228 
5229 		// Depending on the area cache type and the wiring, we may not need to
5230 		// look at the individual pages.
5231 		if (area->cache_type == CACHE_TYPE_NULL
5232 			|| area->cache_type == CACHE_TYPE_DEVICE
5233 			|| area->wiring == B_FULL_LOCK
5234 			|| area->wiring == B_CONTIGUOUS) {
5235 			nextAddress = areaEnd;
5236 			continue;
5237 		}
5238 
5239 		// Lock the area's cache chain and the translation map. Needed to look
5240 		// up pages and play with their wired count.
5241 		cacheChainLocker.LockAllSourceCaches();
5242 		map->Lock();
5243 
5244 		// iterate through the pages and wire them
5245 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5246 			phys_addr_t physicalAddress;
5247 			uint32 flags;
5248 
5249 			vm_page* page;
5250 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5251 				&& (flags & requiredProtection) == requiredProtection
5252 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5253 					!= NULL) {
5254 				// Already mapped with the correct permissions -- just increment
5255 				// the page's wired count.
5256 				increment_page_wired_count(page);
5257 			} else {
5258 				// Let vm_soft_fault() map the page for us, if possible. We need
5259 				// to fully unlock to avoid deadlocks. Since we have already
5260 				// wired the area itself, nothing disturbing will happen with it
5261 				// in the meantime.
5262 				map->Unlock();
5263 				cacheChainLocker.Unlock();
5264 				addressSpaceLocker.Unlock();
5265 
5266 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5267 					isUser, &page, range);
5268 
5269 				addressSpaceLocker.Lock();
5270 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5271 				cacheChainLocker.LockAllSourceCaches();
5272 				map->Lock();
5273 			}
5274 
5275 			if (error != B_OK)
5276 				break;
5277 		}
5278 
5279 		map->Unlock();
5280 
5281 		if (error == B_OK) {
5282 			cacheChainLocker.Unlock();
5283 		} else {
5284 			// An error occurred, so abort right here. If the current address
5285 			// is the first in this area, unwire the area, since we won't get
5286 			// to it when reverting what we've done so far.
5287 			if (nextAddress == areaStart) {
5288 				area->Unwire(range);
5289 				cacheChainLocker.Unlock();
5290 				range->~VMAreaWiredRange();
5291 				free_etc(range, mallocFlags);
5292 			} else
5293 				cacheChainLocker.Unlock();
5294 
5295 			break;
5296 		}
5297 	}
5298 
5299 	if (error != B_OK) {
5300 		// An error occurred, so unwire all that we've already wired. Note that
5301 		// even if not a single page was wired, unlock_memory_etc() is called
5302 		// to put the address space reference.
5303 		addressSpaceLocker.Unlock();
5304 		unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress,
5305 			flags);
5306 	}
5307 
5308 	return error;
5309 }
5310 
5311 
5312 status_t
5313 lock_memory(void* address, size_t numBytes, uint32 flags)
5314 {
5315 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5316 }
5317 
5318 
5319 /*!	Unwires an address range previously wired with lock_memory_etc().
5320 
5321 	Note that a call to this function must balance a previous lock_memory_etc()
5322 	call with exactly the same parameters.
5323 */
5324 status_t
5325 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5326 {
5327 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5328 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5329 
5330 	// compute the page protection that is required
5331 	bool isUser = IS_USER_ADDRESS(address);
5332 	bool writable = (flags & B_READ_DEVICE) == 0;
5333 	uint32 requiredProtection = PAGE_PRESENT
5334 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5335 	if (writable)
5336 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5337 
5338 	uint32 mallocFlags = isUser
5339 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5340 
5341 	// get and read lock the address space
5342 	VMAddressSpace* addressSpace = NULL;
5343 	if (isUser) {
5344 		if (team == B_CURRENT_TEAM)
5345 			addressSpace = VMAddressSpace::GetCurrent();
5346 		else
5347 			addressSpace = VMAddressSpace::Get(team);
5348 	} else
5349 		addressSpace = VMAddressSpace::GetKernel();
5350 	if (addressSpace == NULL)
5351 		return B_ERROR;
5352 
5353 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5354 
5355 	VMTranslationMap* map = addressSpace->TranslationMap();
5356 	status_t error = B_OK;
5357 
5358 	// iterate through all concerned areas
5359 	addr_t nextAddress = lockBaseAddress;
5360 	while (nextAddress != lockEndAddress) {
5361 		// get the next area
5362 		VMArea* area = addressSpace->LookupArea(nextAddress);
5363 		if (area == NULL) {
5364 			error = B_BAD_ADDRESS;
5365 			break;
5366 		}
5367 
5368 		addr_t areaStart = nextAddress;
5369 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5370 
5371 		// Lock the area's top cache. This is a requirement for
5372 		// VMArea::Unwire().
5373 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5374 
5375 		// Depending on the area cache type and the wiring, we may not need to
5376 		// look at the individual pages.
5377 		if (area->cache_type == CACHE_TYPE_NULL
5378 			|| area->cache_type == CACHE_TYPE_DEVICE
5379 			|| area->wiring == B_FULL_LOCK
5380 			|| area->wiring == B_CONTIGUOUS) {
5381 			// unwire the range (to avoid deadlocks we delete the range after
5382 			// unlocking the cache)
5383 			nextAddress = areaEnd;
5384 			VMAreaWiredRange* range = area->Unwire(areaStart,
5385 				areaEnd - areaStart, writable);
5386 			cacheChainLocker.Unlock();
5387 			if (range != NULL) {
5388 				range->~VMAreaWiredRange();
5389 				free_etc(range, mallocFlags);
5390 			}
5391 			continue;
5392 		}
5393 
5394 		// Lock the area's cache chain and the translation map. Needed to look
5395 		// up pages and play with their wired count.
5396 		cacheChainLocker.LockAllSourceCaches();
5397 		map->Lock();
5398 
5399 		// iterate through the pages and unwire them
5400 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5401 			phys_addr_t physicalAddress;
5402 			uint32 flags;
5403 
5404 			vm_page* page;
5405 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5406 				&& (flags & PAGE_PRESENT) != 0
5407 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5408 					!= NULL) {
5409 				// Already mapped with the correct permissions -- just increment
5410 				// the page's wired count.
5411 				decrement_page_wired_count(page);
5412 			} else {
5413 				panic("unlock_memory_etc(): Failed to unwire page: address "
5414 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5415 					nextAddress);
5416 				error = B_BAD_VALUE;
5417 				break;
5418 			}
5419 		}
5420 
5421 		map->Unlock();
5422 
5423 		// All pages are unwired. Remove the area's wired range as well (to
5424 		// avoid deadlocks we delete the range after unlocking the cache).
5425 		VMAreaWiredRange* range = area->Unwire(areaStart,
5426 			areaEnd - areaStart, writable);
5427 
5428 		cacheChainLocker.Unlock();
5429 
5430 		if (range != NULL) {
5431 			range->~VMAreaWiredRange();
5432 			free_etc(range, mallocFlags);
5433 		}
5434 
5435 		if (error != B_OK)
5436 			break;
5437 	}
5438 
5439 	// get rid of the address space reference
5440 	addressSpace->Put();
5441 
5442 	return error;
5443 }
5444 
5445 
5446 status_t
5447 unlock_memory(void* address, size_t numBytes, uint32 flags)
5448 {
5449 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5450 }
5451 
5452 
5453 /*!	Similar to get_memory_map(), but also allows to specify the address space
5454 	for the memory in question and has a saner semantics.
5455 	Returns \c B_OK when the complete range could be translated or
5456 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5457 	case the actual number of entries is written to \c *_numEntries. Any other
5458 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5459 	in this case.
5460 */
5461 status_t
5462 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5463 	physical_entry* table, uint32* _numEntries)
5464 {
5465 	uint32 numEntries = *_numEntries;
5466 	*_numEntries = 0;
5467 
5468 	VMAddressSpace* addressSpace;
5469 	addr_t virtualAddress = (addr_t)address;
5470 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5471 	phys_addr_t physicalAddress;
5472 	status_t status = B_OK;
5473 	int32 index = -1;
5474 	addr_t offset = 0;
5475 	bool interrupts = are_interrupts_enabled();
5476 
5477 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5478 		address, numBytes, numEntries));
5479 
5480 	if (numEntries == 0 || numBytes == 0)
5481 		return B_BAD_VALUE;
5482 
5483 	// in which address space is the address to be found?
5484 	if (IS_USER_ADDRESS(virtualAddress)) {
5485 		if (team == B_CURRENT_TEAM)
5486 			addressSpace = VMAddressSpace::GetCurrent();
5487 		else
5488 			addressSpace = VMAddressSpace::Get(team);
5489 	} else
5490 		addressSpace = VMAddressSpace::GetKernel();
5491 
5492 	if (addressSpace == NULL)
5493 		return B_ERROR;
5494 
5495 	VMTranslationMap* map = addressSpace->TranslationMap();
5496 
5497 	if (interrupts)
5498 		map->Lock();
5499 
5500 	while (offset < numBytes) {
5501 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5502 		uint32 flags;
5503 
5504 		if (interrupts) {
5505 			status = map->Query((addr_t)address + offset, &physicalAddress,
5506 				&flags);
5507 		} else {
5508 			status = map->QueryInterrupt((addr_t)address + offset,
5509 				&physicalAddress, &flags);
5510 		}
5511 		if (status < B_OK)
5512 			break;
5513 		if ((flags & PAGE_PRESENT) == 0) {
5514 			panic("get_memory_map() called on unmapped memory!");
5515 			return B_BAD_ADDRESS;
5516 		}
5517 
5518 		if (index < 0 && pageOffset > 0) {
5519 			physicalAddress += pageOffset;
5520 			if (bytes > B_PAGE_SIZE - pageOffset)
5521 				bytes = B_PAGE_SIZE - pageOffset;
5522 		}
5523 
5524 		// need to switch to the next physical_entry?
5525 		if (index < 0 || table[index].address
5526 				!= physicalAddress - table[index].size) {
5527 			if ((uint32)++index + 1 > numEntries) {
5528 				// table to small
5529 				break;
5530 			}
5531 			table[index].address = physicalAddress;
5532 			table[index].size = bytes;
5533 		} else {
5534 			// page does fit in current entry
5535 			table[index].size += bytes;
5536 		}
5537 
5538 		offset += bytes;
5539 	}
5540 
5541 	if (interrupts)
5542 		map->Unlock();
5543 
5544 	if (status != B_OK)
5545 		return status;
5546 
5547 	if ((uint32)index + 1 > numEntries) {
5548 		*_numEntries = index;
5549 		return B_BUFFER_OVERFLOW;
5550 	}
5551 
5552 	*_numEntries = index + 1;
5553 	return B_OK;
5554 }
5555 
5556 
5557 /*!	According to the BeBook, this function should always succeed.
5558 	This is no longer the case.
5559 */
5560 extern "C" int32
5561 __get_memory_map_haiku(const void* address, size_t numBytes,
5562 	physical_entry* table, int32 numEntries)
5563 {
5564 	uint32 entriesRead = numEntries;
5565 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5566 		table, &entriesRead);
5567 	if (error != B_OK)
5568 		return error;
5569 
5570 	// close the entry list
5571 
5572 	// if it's only one entry, we will silently accept the missing ending
5573 	if (numEntries == 1)
5574 		return B_OK;
5575 
5576 	if (entriesRead + 1 > (uint32)numEntries)
5577 		return B_BUFFER_OVERFLOW;
5578 
5579 	table[entriesRead].address = 0;
5580 	table[entriesRead].size = 0;
5581 
5582 	return B_OK;
5583 }
5584 
5585 
5586 area_id
5587 area_for(void* address)
5588 {
5589 	return vm_area_for((addr_t)address, true);
5590 }
5591 
5592 
5593 area_id
5594 find_area(const char* name)
5595 {
5596 	return VMAreaHash::Find(name);
5597 }
5598 
5599 
5600 status_t
5601 _get_area_info(area_id id, area_info* info, size_t size)
5602 {
5603 	if (size != sizeof(area_info) || info == NULL)
5604 		return B_BAD_VALUE;
5605 
5606 	AddressSpaceReadLocker locker;
5607 	VMArea* area;
5608 	status_t status = locker.SetFromArea(id, area);
5609 	if (status != B_OK)
5610 		return status;
5611 
5612 	fill_area_info(area, info, size);
5613 	return B_OK;
5614 }
5615 
5616 
5617 status_t
5618 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5619 {
5620 	addr_t nextBase = *(addr_t*)cookie;
5621 
5622 	// we're already through the list
5623 	if (nextBase == (addr_t)-1)
5624 		return B_ENTRY_NOT_FOUND;
5625 
5626 	if (team == B_CURRENT_TEAM)
5627 		team = team_get_current_team_id();
5628 
5629 	AddressSpaceReadLocker locker(team);
5630 	if (!locker.IsLocked())
5631 		return B_BAD_TEAM_ID;
5632 
5633 	VMArea* area;
5634 	for (VMAddressSpace::AreaIterator it
5635 				= locker.AddressSpace()->GetAreaIterator();
5636 			(area = it.Next()) != NULL;) {
5637 		if (area->Base() > nextBase)
5638 			break;
5639 	}
5640 
5641 	if (area == NULL) {
5642 		nextBase = (addr_t)-1;
5643 		return B_ENTRY_NOT_FOUND;
5644 	}
5645 
5646 	fill_area_info(area, info, size);
5647 	*cookie = (int32)(area->Base());
5648 		// TODO: Not 64 bit safe!
5649 
5650 	return B_OK;
5651 }
5652 
5653 
5654 status_t
5655 set_area_protection(area_id area, uint32 newProtection)
5656 {
5657 	fix_protection(&newProtection);
5658 
5659 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5660 		newProtection, true);
5661 }
5662 
5663 
5664 status_t
5665 resize_area(area_id areaID, size_t newSize)
5666 {
5667 	return vm_resize_area(areaID, newSize, true);
5668 }
5669 
5670 
5671 /*!	Transfers the specified area to a new team. The caller must be the owner
5672 	of the area.
5673 */
5674 area_id
5675 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5676 	bool kernel)
5677 {
5678 	area_info info;
5679 	status_t status = get_area_info(id, &info);
5680 	if (status != B_OK)
5681 		return status;
5682 
5683 	if (info.team != thread_get_current_thread()->team->id)
5684 		return B_PERMISSION_DENIED;
5685 
5686 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5687 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5688 	if (clonedArea < 0)
5689 		return clonedArea;
5690 
5691 	status = vm_delete_area(info.team, id, kernel);
5692 	if (status != B_OK) {
5693 		vm_delete_area(target, clonedArea, kernel);
5694 		return status;
5695 	}
5696 
5697 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5698 
5699 	return clonedArea;
5700 }
5701 
5702 
5703 extern "C" area_id
5704 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5705 	size_t numBytes, uint32 addressSpec, uint32 protection,
5706 	void** _virtualAddress)
5707 {
5708 	if (!arch_vm_supports_protection(protection))
5709 		return B_NOT_SUPPORTED;
5710 
5711 	fix_protection(&protection);
5712 
5713 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5714 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5715 		false);
5716 }
5717 
5718 
5719 area_id
5720 clone_area(const char* name, void** _address, uint32 addressSpec,
5721 	uint32 protection, area_id source)
5722 {
5723 	if ((protection & B_KERNEL_PROTECTION) == 0)
5724 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5725 
5726 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5727 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5728 }
5729 
5730 
5731 area_id
5732 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
5733 	uint32 protection, uint32 flags,
5734 	const virtual_address_restrictions* virtualAddressRestrictions,
5735 	const physical_address_restrictions* physicalAddressRestrictions,
5736 	void** _address)
5737 {
5738 	fix_protection(&protection);
5739 
5740 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5741 		virtualAddressRestrictions, physicalAddressRestrictions, true,
5742 		_address);
5743 }
5744 
5745 
5746 extern "C" area_id
5747 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5748 	size_t size, uint32 lock, uint32 protection)
5749 {
5750 	fix_protection(&protection);
5751 
5752 	virtual_address_restrictions virtualRestrictions = {};
5753 	virtualRestrictions.address = *_address;
5754 	virtualRestrictions.address_specification = addressSpec;
5755 	physical_address_restrictions physicalRestrictions = {};
5756 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5757 		lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true,
5758 		_address);
5759 }
5760 
5761 
5762 status_t
5763 delete_area(area_id area)
5764 {
5765 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5766 }
5767 
5768 
5769 //	#pragma mark - Userland syscalls
5770 
5771 
5772 status_t
5773 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5774 	addr_t size)
5775 {
5776 	// filter out some unavailable values (for userland)
5777 	switch (addressSpec) {
5778 		case B_ANY_KERNEL_ADDRESS:
5779 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5780 			return B_BAD_VALUE;
5781 	}
5782 
5783 	addr_t address;
5784 
5785 	if (!IS_USER_ADDRESS(userAddress)
5786 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5787 		return B_BAD_ADDRESS;
5788 
5789 	status_t status = vm_reserve_address_range(
5790 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5791 		RESERVED_AVOID_BASE);
5792 	if (status != B_OK)
5793 		return status;
5794 
5795 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5796 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5797 			(void*)address, size);
5798 		return B_BAD_ADDRESS;
5799 	}
5800 
5801 	return B_OK;
5802 }
5803 
5804 
5805 status_t
5806 _user_unreserve_address_range(addr_t address, addr_t size)
5807 {
5808 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5809 		(void*)address, size);
5810 }
5811 
5812 
5813 area_id
5814 _user_area_for(void* address)
5815 {
5816 	return vm_area_for((addr_t)address, false);
5817 }
5818 
5819 
5820 area_id
5821 _user_find_area(const char* userName)
5822 {
5823 	char name[B_OS_NAME_LENGTH];
5824 
5825 	if (!IS_USER_ADDRESS(userName)
5826 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5827 		return B_BAD_ADDRESS;
5828 
5829 	return find_area(name);
5830 }
5831 
5832 
5833 status_t
5834 _user_get_area_info(area_id area, area_info* userInfo)
5835 {
5836 	if (!IS_USER_ADDRESS(userInfo))
5837 		return B_BAD_ADDRESS;
5838 
5839 	area_info info;
5840 	status_t status = get_area_info(area, &info);
5841 	if (status < B_OK)
5842 		return status;
5843 
5844 	// TODO: do we want to prevent userland from seeing kernel protections?
5845 	//info.protection &= B_USER_PROTECTION;
5846 
5847 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5848 		return B_BAD_ADDRESS;
5849 
5850 	return status;
5851 }
5852 
5853 
5854 status_t
5855 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
5856 {
5857 	int32 cookie;
5858 
5859 	if (!IS_USER_ADDRESS(userCookie)
5860 		|| !IS_USER_ADDRESS(userInfo)
5861 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5862 		return B_BAD_ADDRESS;
5863 
5864 	area_info info;
5865 	status_t status = _get_next_area_info(team, &cookie, &info,
5866 		sizeof(area_info));
5867 	if (status != B_OK)
5868 		return status;
5869 
5870 	//info.protection &= B_USER_PROTECTION;
5871 
5872 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5873 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5874 		return B_BAD_ADDRESS;
5875 
5876 	return status;
5877 }
5878 
5879 
5880 status_t
5881 _user_set_area_protection(area_id area, uint32 newProtection)
5882 {
5883 	if ((newProtection & ~B_USER_PROTECTION) != 0)
5884 		return B_BAD_VALUE;
5885 
5886 	fix_protection(&newProtection);
5887 
5888 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
5889 		newProtection, false);
5890 }
5891 
5892 
5893 status_t
5894 _user_resize_area(area_id area, size_t newSize)
5895 {
5896 	// TODO: Since we restrict deleting of areas to those owned by the team,
5897 	// we should also do that for resizing (check other functions, too).
5898 	return vm_resize_area(area, newSize, false);
5899 }
5900 
5901 
5902 area_id
5903 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
5904 	team_id target)
5905 {
5906 	// filter out some unavailable values (for userland)
5907 	switch (addressSpec) {
5908 		case B_ANY_KERNEL_ADDRESS:
5909 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5910 			return B_BAD_VALUE;
5911 	}
5912 
5913 	void* address;
5914 	if (!IS_USER_ADDRESS(userAddress)
5915 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5916 		return B_BAD_ADDRESS;
5917 
5918 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
5919 	if (newArea < B_OK)
5920 		return newArea;
5921 
5922 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5923 		return B_BAD_ADDRESS;
5924 
5925 	return newArea;
5926 }
5927 
5928 
5929 area_id
5930 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
5931 	uint32 protection, area_id sourceArea)
5932 {
5933 	char name[B_OS_NAME_LENGTH];
5934 	void* address;
5935 
5936 	// filter out some unavailable values (for userland)
5937 	switch (addressSpec) {
5938 		case B_ANY_KERNEL_ADDRESS:
5939 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5940 			return B_BAD_VALUE;
5941 	}
5942 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
5943 		return B_BAD_VALUE;
5944 
5945 	if (!IS_USER_ADDRESS(userName)
5946 		|| !IS_USER_ADDRESS(userAddress)
5947 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5948 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5949 		return B_BAD_ADDRESS;
5950 
5951 	fix_protection(&protection);
5952 
5953 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
5954 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
5955 		false);
5956 	if (clonedArea < B_OK)
5957 		return clonedArea;
5958 
5959 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5960 		delete_area(clonedArea);
5961 		return B_BAD_ADDRESS;
5962 	}
5963 
5964 	return clonedArea;
5965 }
5966 
5967 
5968 area_id
5969 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
5970 	size_t size, uint32 lock, uint32 protection)
5971 {
5972 	char name[B_OS_NAME_LENGTH];
5973 	void* address;
5974 
5975 	// filter out some unavailable values (for userland)
5976 	switch (addressSpec) {
5977 		case B_ANY_KERNEL_ADDRESS:
5978 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5979 			return B_BAD_VALUE;
5980 	}
5981 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
5982 		return B_BAD_VALUE;
5983 
5984 	if (!IS_USER_ADDRESS(userName)
5985 		|| !IS_USER_ADDRESS(userAddress)
5986 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5987 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5988 		return B_BAD_ADDRESS;
5989 
5990 	if (addressSpec == B_EXACT_ADDRESS
5991 		&& IS_KERNEL_ADDRESS(address))
5992 		return B_BAD_VALUE;
5993 
5994 	fix_protection(&protection);
5995 
5996 	virtual_address_restrictions virtualRestrictions = {};
5997 	virtualRestrictions.address = address;
5998 	virtualRestrictions.address_specification = addressSpec;
5999 	physical_address_restrictions physicalRestrictions = {};
6000 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6001 		size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions,
6002 		false, &address);
6003 
6004 	if (area >= B_OK
6005 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6006 		delete_area(area);
6007 		return B_BAD_ADDRESS;
6008 	}
6009 
6010 	return area;
6011 }
6012 
6013 
6014 status_t
6015 _user_delete_area(area_id area)
6016 {
6017 	// Unlike the BeOS implementation, you can now only delete areas
6018 	// that you have created yourself from userland.
6019 	// The documentation to delete_area() explicitly states that this
6020 	// will be restricted in the future, and so it will.
6021 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6022 }
6023 
6024 
6025 // TODO: create a BeOS style call for this!
6026 
6027 area_id
6028 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6029 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6030 	int fd, off_t offset)
6031 {
6032 	char name[B_OS_NAME_LENGTH];
6033 	void* address;
6034 	area_id area;
6035 
6036 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6037 		return B_BAD_VALUE;
6038 
6039 	fix_protection(&protection);
6040 
6041 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6042 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6043 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6044 		return B_BAD_ADDRESS;
6045 
6046 	if (addressSpec == B_EXACT_ADDRESS) {
6047 		if ((addr_t)address + size < (addr_t)address
6048 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6049 			return B_BAD_VALUE;
6050 		}
6051 		if (!IS_USER_ADDRESS(address)
6052 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6053 			return B_BAD_ADDRESS;
6054 		}
6055 	}
6056 
6057 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6058 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6059 		false);
6060 	if (area < B_OK)
6061 		return area;
6062 
6063 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6064 		return B_BAD_ADDRESS;
6065 
6066 	return area;
6067 }
6068 
6069 
6070 status_t
6071 _user_unmap_memory(void* _address, size_t size)
6072 {
6073 	addr_t address = (addr_t)_address;
6074 
6075 	// check params
6076 	if (size == 0 || (addr_t)address + size < (addr_t)address
6077 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6078 		return B_BAD_VALUE;
6079 	}
6080 
6081 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6082 		return B_BAD_ADDRESS;
6083 
6084 	// Write lock the address space and ensure the address range is not wired.
6085 	AddressSpaceWriteLocker locker;
6086 	do {
6087 		status_t status = locker.SetTo(team_get_current_team_id());
6088 		if (status != B_OK)
6089 			return status;
6090 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6091 			size, &locker));
6092 
6093 	// unmap
6094 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6095 }
6096 
6097 
6098 status_t
6099 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6100 {
6101 	// check address range
6102 	addr_t address = (addr_t)_address;
6103 	size = PAGE_ALIGN(size);
6104 
6105 	if ((address % B_PAGE_SIZE) != 0)
6106 		return B_BAD_VALUE;
6107 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6108 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6109 		// weird error code required by POSIX
6110 		return ENOMEM;
6111 	}
6112 
6113 	// extend and check protection
6114 	if ((protection & ~B_USER_PROTECTION) != 0)
6115 		return B_BAD_VALUE;
6116 
6117 	fix_protection(&protection);
6118 
6119 	// We need to write lock the address space, since we're going to play with
6120 	// the areas. Also make sure that none of the areas is wired and that we're
6121 	// actually allowed to change the protection.
6122 	AddressSpaceWriteLocker locker;
6123 
6124 	bool restart;
6125 	do {
6126 		restart = false;
6127 
6128 		status_t status = locker.SetTo(team_get_current_team_id());
6129 		if (status != B_OK)
6130 			return status;
6131 
6132 		// First round: Check whether the whole range is covered by areas and we
6133 		// are allowed to modify them.
6134 		addr_t currentAddress = address;
6135 		size_t sizeLeft = size;
6136 		while (sizeLeft > 0) {
6137 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6138 			if (area == NULL)
6139 				return B_NO_MEMORY;
6140 
6141 			if ((area->protection & B_KERNEL_AREA) != 0)
6142 				return B_NOT_ALLOWED;
6143 
6144 			AreaCacheLocker cacheLocker(area);
6145 
6146 			if (wait_if_area_is_wired(area, &locker, &cacheLocker)) {
6147 				restart = true;
6148 				break;
6149 			}
6150 
6151 			cacheLocker.Unlock();
6152 
6153 			// TODO: For (shared) mapped files we should check whether the new
6154 			// protections are compatible with the file permissions. We don't
6155 			// have a way to do that yet, though.
6156 
6157 			addr_t offset = currentAddress - area->Base();
6158 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6159 
6160 			currentAddress += rangeSize;
6161 			sizeLeft -= rangeSize;
6162 		}
6163 	} while (restart);
6164 
6165 	// Second round: If the protections differ from that of the area, create a
6166 	// page protection array and re-map mapped pages.
6167 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6168 	addr_t currentAddress = address;
6169 	size_t sizeLeft = size;
6170 	while (sizeLeft > 0) {
6171 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6172 		if (area == NULL)
6173 			return B_NO_MEMORY;
6174 
6175 		addr_t offset = currentAddress - area->Base();
6176 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6177 
6178 		currentAddress += rangeSize;
6179 		sizeLeft -= rangeSize;
6180 
6181 		if (area->page_protections == NULL) {
6182 			if (area->protection == protection)
6183 				continue;
6184 
6185 			// In the page protections we store only the three user protections,
6186 			// so we use 4 bits per page.
6187 			uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
6188 			area->page_protections = (uint8*)malloc(bytes);
6189 			if (area->page_protections == NULL)
6190 				return B_NO_MEMORY;
6191 
6192 			// init the page protections for all pages to that of the area
6193 			uint32 areaProtection = area->protection
6194 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
6195 			memset(area->page_protections,
6196 				areaProtection | (areaProtection << 4), bytes);
6197 		}
6198 
6199 		// We need to lock the complete cache chain, since we potentially unmap
6200 		// pages of lower caches.
6201 		VMCache* topCache = vm_area_get_locked_cache(area);
6202 		VMCacheChainLocker cacheChainLocker(topCache);
6203 		cacheChainLocker.LockAllSourceCaches();
6204 
6205 		for (addr_t pageAddress = area->Base() + offset;
6206 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6207 			map->Lock();
6208 
6209 			set_area_page_protection(area, pageAddress, protection);
6210 
6211 			phys_addr_t physicalAddress;
6212 			uint32 flags;
6213 
6214 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6215 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6216 				map->Unlock();
6217 				continue;
6218 			}
6219 
6220 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6221 			if (page == NULL) {
6222 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6223 					"\n", area, physicalAddress);
6224 				map->Unlock();
6225 				return B_ERROR;
6226 			}
6227 
6228 			// If the page is not in the topmost cache and write access is
6229 			// requested, we have to unmap it. Otherwise we can re-map it with
6230 			// the new protection.
6231 			bool unmapPage = page->Cache() != topCache
6232 				&& (protection & B_WRITE_AREA) != 0;
6233 
6234 			if (!unmapPage)
6235 				map->ProtectPage(area, pageAddress, protection);
6236 
6237 			map->Unlock();
6238 
6239 			if (unmapPage) {
6240 				DEBUG_PAGE_ACCESS_START(page);
6241 				unmap_page(area, pageAddress);
6242 				DEBUG_PAGE_ACCESS_END(page);
6243 			}
6244 		}
6245 	}
6246 
6247 	return B_OK;
6248 }
6249 
6250 
6251 status_t
6252 _user_sync_memory(void* _address, size_t size, uint32 flags)
6253 {
6254 	addr_t address = (addr_t)_address;
6255 	size = PAGE_ALIGN(size);
6256 
6257 	// check params
6258 	if ((address % B_PAGE_SIZE) != 0)
6259 		return B_BAD_VALUE;
6260 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6261 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6262 		// weird error code required by POSIX
6263 		return ENOMEM;
6264 	}
6265 
6266 	bool writeSync = (flags & MS_SYNC) != 0;
6267 	bool writeAsync = (flags & MS_ASYNC) != 0;
6268 	if (writeSync && writeAsync)
6269 		return B_BAD_VALUE;
6270 
6271 	if (size == 0 || (!writeSync && !writeAsync))
6272 		return B_OK;
6273 
6274 	// iterate through the range and sync all concerned areas
6275 	while (size > 0) {
6276 		// read lock the address space
6277 		AddressSpaceReadLocker locker;
6278 		status_t error = locker.SetTo(team_get_current_team_id());
6279 		if (error != B_OK)
6280 			return error;
6281 
6282 		// get the first area
6283 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6284 		if (area == NULL)
6285 			return B_NO_MEMORY;
6286 
6287 		uint32 offset = address - area->Base();
6288 		size_t rangeSize = min_c(area->Size() - offset, size);
6289 		offset += area->cache_offset;
6290 
6291 		// lock the cache
6292 		AreaCacheLocker cacheLocker(area);
6293 		if (!cacheLocker)
6294 			return B_BAD_VALUE;
6295 		VMCache* cache = area->cache;
6296 
6297 		locker.Unlock();
6298 
6299 		uint32 firstPage = offset >> PAGE_SHIFT;
6300 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6301 
6302 		// write the pages
6303 		if (cache->type == CACHE_TYPE_VNODE) {
6304 			if (writeSync) {
6305 				// synchronous
6306 				error = vm_page_write_modified_page_range(cache, firstPage,
6307 					endPage);
6308 				if (error != B_OK)
6309 					return error;
6310 			} else {
6311 				// asynchronous
6312 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6313 				// TODO: This is probably not quite what is supposed to happen.
6314 				// Especially when a lot has to be written, it might take ages
6315 				// until it really hits the disk.
6316 			}
6317 		}
6318 
6319 		address += rangeSize;
6320 		size -= rangeSize;
6321 	}
6322 
6323 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6324 	// synchronize multiple mappings of the same file. In our VM they never get
6325 	// out of sync, though, so we don't have to do anything.
6326 
6327 	return B_OK;
6328 }
6329 
6330 
6331 status_t
6332 _user_memory_advice(void* address, size_t size, uint32 advice)
6333 {
6334 	// TODO: Implement!
6335 	return B_OK;
6336 }
6337 
6338 
6339 status_t
6340 _user_get_memory_properties(team_id teamID, const void* address,
6341 	uint32* _protected, uint32* _lock)
6342 {
6343 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6344 		return B_BAD_ADDRESS;
6345 
6346 	AddressSpaceReadLocker locker;
6347 	status_t error = locker.SetTo(teamID);
6348 	if (error != B_OK)
6349 		return error;
6350 
6351 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6352 	if (area == NULL)
6353 		return B_NO_MEMORY;
6354 
6355 
6356 	uint32 protection = area->protection;
6357 	if (area->page_protections != NULL)
6358 		protection = get_area_page_protection(area, (addr_t)address);
6359 
6360 	uint32 wiring = area->wiring;
6361 
6362 	locker.Unlock();
6363 
6364 	error = user_memcpy(_protected, &protection, sizeof(protection));
6365 	if (error != B_OK)
6366 		return error;
6367 
6368 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6369 
6370 	return error;
6371 }
6372 
6373 
6374 // #pragma mark -- compatibility
6375 
6376 
6377 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6378 
6379 
6380 struct physical_entry_beos {
6381 	uint32	address;
6382 	uint32	size;
6383 };
6384 
6385 
6386 /*!	The physical_entry structure has changed. We need to translate it to the
6387 	old one.
6388 */
6389 extern "C" int32
6390 __get_memory_map_beos(const void* _address, size_t numBytes,
6391 	physical_entry_beos* table, int32 numEntries)
6392 {
6393 	if (numEntries <= 0)
6394 		return B_BAD_VALUE;
6395 
6396 	const uint8* address = (const uint8*)_address;
6397 
6398 	int32 count = 0;
6399 	while (numBytes > 0 && count < numEntries) {
6400 		physical_entry entry;
6401 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6402 		if (result < 0) {
6403 			if (result != B_BUFFER_OVERFLOW)
6404 				return result;
6405 		}
6406 
6407 		if (entry.address >= (phys_addr_t)1 << 32) {
6408 			panic("get_memory_map(): Address is greater 4 GB!");
6409 			return B_ERROR;
6410 		}
6411 
6412 		table[count].address = entry.address;
6413 		table[count++].size = entry.size;
6414 
6415 		address += entry.size;
6416 		numBytes -= entry.size;
6417 	}
6418 
6419 	// null-terminate the table, if possible
6420 	if (count < numEntries) {
6421 		table[count].address = 0;
6422 		table[count].size = 0;
6423 	}
6424 
6425 	return B_OK;
6426 }
6427 
6428 
6429 /*!	The type of the \a physicalAddress parameter has changed from void* to
6430 	phys_addr_t.
6431 */
6432 extern "C" area_id
6433 __map_physical_memory_beos(const char* name, void* physicalAddress,
6434 	size_t numBytes, uint32 addressSpec, uint32 protection,
6435 	void** _virtualAddress)
6436 {
6437 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6438 		addressSpec, protection, _virtualAddress);
6439 }
6440 
6441 
6442 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6443 	we meddle with the \a lock parameter to force 32 bit.
6444 */
6445 extern "C" area_id
6446 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6447 	size_t size, uint32 lock, uint32 protection)
6448 {
6449 	switch (lock) {
6450 		case B_NO_LOCK:
6451 			break;
6452 		case B_FULL_LOCK:
6453 		case B_LAZY_LOCK:
6454 			lock = B_32_BIT_FULL_LOCK;
6455 			break;
6456 		case B_CONTIGUOUS:
6457 			lock = B_32_BIT_CONTIGUOUS;
6458 			break;
6459 	}
6460 
6461 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6462 		protection);
6463 }
6464 
6465 
6466 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6467 	"BASE");
6468 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6469 	"map_physical_memory@", "BASE");
6470 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6471 	"BASE");
6472 
6473 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6474 	"get_memory_map@@", "1_ALPHA3");
6475 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6476 	"map_physical_memory@@", "1_ALPHA3");
6477 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6478 	"1_ALPHA3");
6479 
6480 
6481 #else
6482 
6483 
6484 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6485 	"get_memory_map@@", "BASE");
6486 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6487 	"map_physical_memory@@", "BASE");
6488 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6489 	"BASE");
6490 
6491 
6492 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6493