xref: /haiku/src/system/kernel/vm/vm.cpp (revision a5bf12376daeded4049521eb17a6cc41192250d9)
1 /*
2  * Copyright 2009-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <arch/cpu.h>
27 #include <arch/vm.h>
28 #include <boot/elf.h>
29 #include <boot/stage2.h>
30 #include <condition_variable.h>
31 #include <console.h>
32 #include <debug.h>
33 #include <file_cache.h>
34 #include <fs/fd.h>
35 #include <heap.h>
36 #include <kernel.h>
37 #include <int.h>
38 #include <lock.h>
39 #include <low_resource_manager.h>
40 #include <slab/Slab.h>
41 #include <smp.h>
42 #include <system_info.h>
43 #include <thread.h>
44 #include <team.h>
45 #include <tracing.h>
46 #include <util/AutoLock.h>
47 #include <util/khash.h>
48 #include <vm/vm_page.h>
49 #include <vm/vm_priv.h>
50 #include <vm/VMAddressSpace.h>
51 #include <vm/VMArea.h>
52 #include <vm/VMCache.h>
53 
54 #include "VMAddressSpaceLocking.h"
55 #include "VMAnonymousCache.h"
56 #include "IORequest.h"
57 
58 
59 //#define TRACE_VM
60 //#define TRACE_FAULTS
61 #ifdef TRACE_VM
62 #	define TRACE(x) dprintf x
63 #else
64 #	define TRACE(x) ;
65 #endif
66 #ifdef TRACE_FAULTS
67 #	define FTRACE(x) dprintf x
68 #else
69 #	define FTRACE(x) ;
70 #endif
71 
72 
73 class AreaCacheLocking {
74 public:
75 	inline bool Lock(VMCache* lockable)
76 	{
77 		return false;
78 	}
79 
80 	inline void Unlock(VMCache* lockable)
81 	{
82 		vm_area_put_locked_cache(lockable);
83 	}
84 };
85 
86 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
87 public:
88 	inline AreaCacheLocker(VMCache* cache = NULL)
89 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
90 	{
91 	}
92 
93 	inline AreaCacheLocker(VMArea* area)
94 		: AutoLocker<VMCache, AreaCacheLocking>()
95 	{
96 		SetTo(area);
97 	}
98 
99 	inline void SetTo(VMCache* cache, bool alreadyLocked)
100 	{
101 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
102 	}
103 
104 	inline void SetTo(VMArea* area)
105 	{
106 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
107 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
108 	}
109 };
110 
111 
112 class VMCacheChainLocker {
113 public:
114 	VMCacheChainLocker()
115 		:
116 		fTopCache(NULL),
117 		fBottomCache(NULL)
118 	{
119 	}
120 
121 	VMCacheChainLocker(VMCache* topCache)
122 		:
123 		fTopCache(topCache),
124 		fBottomCache(topCache)
125 	{
126 	}
127 
128 	~VMCacheChainLocker()
129 	{
130 		Unlock();
131 	}
132 
133 	void SetTo(VMCache* topCache)
134 	{
135 		fTopCache = topCache;
136 		fBottomCache = topCache;
137 
138 		if (topCache != NULL)
139 			topCache->SetUserData(NULL);
140 	}
141 
142 	VMCache* LockSourceCache()
143 	{
144 		if (fBottomCache == NULL || fBottomCache->source == NULL)
145 			return NULL;
146 
147 		VMCache* previousCache = fBottomCache;
148 
149 		fBottomCache = fBottomCache->source;
150 		fBottomCache->Lock();
151 		fBottomCache->AcquireRefLocked();
152 		fBottomCache->SetUserData(previousCache);
153 
154 		return fBottomCache;
155 	}
156 
157 	void LockAllSourceCaches()
158 	{
159 		while (LockSourceCache() != NULL) {
160 		}
161 	}
162 
163 	void Unlock(VMCache* exceptCache = NULL)
164 	{
165 		if (fTopCache == NULL)
166 			return;
167 
168 		// Unlock caches in source -> consumer direction. This is important to
169 		// avoid double-locking and a reversal of locking order in case a cache
170 		// is eligable for merging.
171 		VMCache* cache = fBottomCache;
172 		while (cache != NULL) {
173 			VMCache* nextCache = (VMCache*)cache->UserData();
174 			if (cache != exceptCache)
175 				cache->ReleaseRefAndUnlock(cache != fTopCache);
176 
177 			if (cache == fTopCache)
178 				break;
179 
180 			cache = nextCache;
181 		}
182 
183 		fTopCache = NULL;
184 		fBottomCache = NULL;
185 	}
186 
187 	void UnlockKeepRefs(bool keepTopCacheLocked)
188 	{
189 		if (fTopCache == NULL)
190 			return;
191 
192 		VMCache* nextCache = fBottomCache;
193 		VMCache* cache = NULL;
194 
195 		while (keepTopCacheLocked
196 				? nextCache != fTopCache : cache != fTopCache) {
197 			cache = nextCache;
198 			nextCache = (VMCache*)cache->UserData();
199 			cache->Unlock(cache != fTopCache);
200 		}
201 	}
202 
203 	void RelockCaches(bool topCacheLocked)
204 	{
205 		if (fTopCache == NULL)
206 			return;
207 
208 		VMCache* nextCache = fTopCache;
209 		VMCache* cache = NULL;
210 		if (topCacheLocked) {
211 			cache = nextCache;
212 			nextCache = cache->source;
213 		}
214 
215 		while (cache != fBottomCache && nextCache != NULL) {
216 			VMCache* consumer = cache;
217 			cache = nextCache;
218 			nextCache = cache->source;
219 			cache->Lock();
220 			cache->SetUserData(consumer);
221 		}
222 	}
223 
224 private:
225 	VMCache*	fTopCache;
226 	VMCache*	fBottomCache;
227 };
228 
229 
230 // The memory reserve an allocation of the certain priority must not touch.
231 static const size_t kMemoryReserveForPriority[] = {
232 	VM_MEMORY_RESERVE_USER,		// user
233 	VM_MEMORY_RESERVE_SYSTEM,	// system
234 	0							// VIP
235 };
236 
237 
238 ObjectCache* gPageMappingsObjectCache;
239 
240 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
241 
242 static off_t sAvailableMemory;
243 static off_t sNeededMemory;
244 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
245 static uint32 sPageFaults;
246 
247 static VMPhysicalPageMapper* sPhysicalPageMapper;
248 
249 #if DEBUG_CACHE_LIST
250 
251 struct cache_info {
252 	VMCache*	cache;
253 	addr_t		page_count;
254 	addr_t		committed;
255 };
256 
257 static const int kCacheInfoTableCount = 100 * 1024;
258 static cache_info* sCacheInfoTable;
259 
260 #endif	// DEBUG_CACHE_LIST
261 
262 
263 // function declarations
264 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
265 	bool addressSpaceCleanup);
266 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
267 	bool isWrite, bool isUser, vm_page** wirePage,
268 	VMAreaWiredRange* wiredRange = NULL);
269 static status_t map_backing_store(VMAddressSpace* addressSpace,
270 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
271 	int protection, int mapping, uint32 flags,
272 	const virtual_address_restrictions* addressRestrictions, bool kernel,
273 	VMArea** _area, void** _virtualAddress);
274 
275 
276 //	#pragma mark -
277 
278 
279 #if VM_PAGE_FAULT_TRACING
280 
281 namespace VMPageFaultTracing {
282 
283 class PageFaultStart : public AbstractTraceEntry {
284 public:
285 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
286 		:
287 		fAddress(address),
288 		fPC(pc),
289 		fWrite(write),
290 		fUser(user)
291 	{
292 		Initialized();
293 	}
294 
295 	virtual void AddDump(TraceOutput& out)
296 	{
297 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
298 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
299 	}
300 
301 private:
302 	addr_t	fAddress;
303 	addr_t	fPC;
304 	bool	fWrite;
305 	bool	fUser;
306 };
307 
308 
309 // page fault errors
310 enum {
311 	PAGE_FAULT_ERROR_NO_AREA		= 0,
312 	PAGE_FAULT_ERROR_KERNEL_ONLY,
313 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
314 	PAGE_FAULT_ERROR_READ_PROTECTED,
315 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
316 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
317 };
318 
319 
320 class PageFaultError : public AbstractTraceEntry {
321 public:
322 	PageFaultError(area_id area, status_t error)
323 		:
324 		fArea(area),
325 		fError(error)
326 	{
327 		Initialized();
328 	}
329 
330 	virtual void AddDump(TraceOutput& out)
331 	{
332 		switch (fError) {
333 			case PAGE_FAULT_ERROR_NO_AREA:
334 				out.Print("page fault error: no area");
335 				break;
336 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
337 				out.Print("page fault error: area: %ld, kernel only", fArea);
338 				break;
339 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
340 				out.Print("page fault error: area: %ld, write protected",
341 					fArea);
342 				break;
343 			case PAGE_FAULT_ERROR_READ_PROTECTED:
344 				out.Print("page fault error: area: %ld, read protected", fArea);
345 				break;
346 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
347 				out.Print("page fault error: kernel touching bad user memory");
348 				break;
349 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
350 				out.Print("page fault error: no address space");
351 				break;
352 			default:
353 				out.Print("page fault error: area: %ld, error: %s", fArea,
354 					strerror(fError));
355 				break;
356 		}
357 	}
358 
359 private:
360 	area_id		fArea;
361 	status_t	fError;
362 };
363 
364 
365 class PageFaultDone : public AbstractTraceEntry {
366 public:
367 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
368 			vm_page* page)
369 		:
370 		fArea(area),
371 		fTopCache(topCache),
372 		fCache(cache),
373 		fPage(page)
374 	{
375 		Initialized();
376 	}
377 
378 	virtual void AddDump(TraceOutput& out)
379 	{
380 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
381 			"page: %p", fArea, fTopCache, fCache, fPage);
382 	}
383 
384 private:
385 	area_id		fArea;
386 	VMCache*	fTopCache;
387 	VMCache*	fCache;
388 	vm_page*	fPage;
389 };
390 
391 }	// namespace VMPageFaultTracing
392 
393 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
394 #else
395 #	define TPF(x) ;
396 #endif	// VM_PAGE_FAULT_TRACING
397 
398 
399 //	#pragma mark -
400 
401 
402 /*!	The page's cache must be locked.
403 */
404 static inline void
405 increment_page_wired_count(vm_page* page)
406 {
407 	if (page->wired_count++ == 0 && page->mappings.IsEmpty())
408 		atomic_add(&gMappedPagesCount, 1);
409 }
410 
411 
412 /*!	The page's cache must be locked.
413 */
414 static inline void
415 decrement_page_wired_count(vm_page* page)
416 {
417 	if (--page->wired_count == 0 && page->mappings.IsEmpty())
418 		atomic_add(&gMappedPagesCount, -1);
419 }
420 
421 
422 static inline addr_t
423 virtual_page_address(VMArea* area, vm_page* page)
424 {
425 	return area->Base()
426 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
427 }
428 
429 
430 //! You need to have the address space locked when calling this function
431 static VMArea*
432 lookup_area(VMAddressSpace* addressSpace, area_id id)
433 {
434 	VMAreaHash::ReadLock();
435 
436 	VMArea* area = VMAreaHash::LookupLocked(id);
437 	if (area != NULL && area->address_space != addressSpace)
438 		area = NULL;
439 
440 	VMAreaHash::ReadUnlock();
441 
442 	return area;
443 }
444 
445 
446 static inline void
447 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
448 {
449 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
450 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
451 	uint8& entry = area->page_protections[pageIndex / 2];
452 	if (pageIndex % 2 == 0)
453 		entry = (entry & 0xf0) | protection;
454 	else
455 		entry = (entry & 0x0f) | (protection << 4);
456 }
457 
458 
459 static inline uint32
460 get_area_page_protection(VMArea* area, addr_t pageAddress)
461 {
462 	if (area->page_protections == NULL)
463 		return area->protection;
464 
465 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
466 	uint32 protection = area->page_protections[pageIndex / 2];
467 	if (pageIndex % 2 == 0)
468 		protection &= 0x0f;
469 	else
470 		protection >>= 4;
471 
472 	return protection | B_KERNEL_READ_AREA
473 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
474 }
475 
476 
477 /*!	The caller must have reserved enough pages the translation map
478 	implementation might need to map this page.
479 	The page's cache must be locked.
480 */
481 static status_t
482 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
483 	vm_page_reservation* reservation)
484 {
485 	VMTranslationMap* map = area->address_space->TranslationMap();
486 
487 	bool wasMapped = page->wired_count > 0 || !page->mappings.IsEmpty();
488 
489 	if (area->wiring == B_NO_LOCK) {
490 		DEBUG_PAGE_ACCESS_CHECK(page);
491 
492 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
493 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
494 			gPageMappingsObjectCache,
495 			CACHE_DONT_WAIT_FOR_MEMORY
496 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
497 		if (mapping == NULL)
498 			return B_NO_MEMORY;
499 
500 		mapping->page = page;
501 		mapping->area = area;
502 
503 		map->Lock();
504 
505 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
506 			area->MemoryType(), reservation);
507 
508 		// insert mapping into lists
509 		if (page->mappings.IsEmpty() && page->wired_count == 0)
510 			atomic_add(&gMappedPagesCount, 1);
511 
512 		page->mappings.Add(mapping);
513 		area->mappings.Add(mapping);
514 
515 		map->Unlock();
516 	} else {
517 		DEBUG_PAGE_ACCESS_CHECK(page);
518 
519 		map->Lock();
520 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
521 			area->MemoryType(), reservation);
522 		map->Unlock();
523 
524 		increment_page_wired_count(page);
525 	}
526 
527 	if (!wasMapped) {
528 		// The page is mapped now, so we must not remain in the cached queue.
529 		// It also makes sense to move it from the inactive to the active, since
530 		// otherwise the page daemon wouldn't come to keep track of it (in idle
531 		// mode) -- if the page isn't touched, it will be deactivated after a
532 		// full iteration through the queue at the latest.
533 		if (page->State() == PAGE_STATE_CACHED
534 				|| page->State() == PAGE_STATE_INACTIVE) {
535 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
536 		}
537 	}
538 
539 	return B_OK;
540 }
541 
542 
543 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
544 	page's cache.
545 */
546 static inline bool
547 unmap_page(VMArea* area, addr_t virtualAddress)
548 {
549 	return area->address_space->TranslationMap()->UnmapPage(area,
550 		virtualAddress, true);
551 }
552 
553 
554 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
555 	mapped pages' caches.
556 */
557 static inline void
558 unmap_pages(VMArea* area, addr_t base, size_t size)
559 {
560 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
561 }
562 
563 
564 /*!	Cuts a piece out of an area. If the given cut range covers the complete
565 	area, it is deleted. If it covers the beginning or the end, the area is
566 	resized accordingly. If the range covers some part in the middle of the
567 	area, it is split in two; in this case the second area is returned via
568 	\a _secondArea (the variable is left untouched in the other cases).
569 	The address space must be write locked.
570 	The caller must ensure that no part of the given range is wired.
571 */
572 static status_t
573 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
574 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
575 {
576 	// Does the cut range intersect with the area at all?
577 	addr_t areaLast = area->Base() + (area->Size() - 1);
578 	if (area->Base() > lastAddress || areaLast < address)
579 		return B_OK;
580 
581 	// Is the area fully covered?
582 	if (area->Base() >= address && areaLast <= lastAddress) {
583 		delete_area(addressSpace, area, false);
584 		return B_OK;
585 	}
586 
587 	int priority;
588 	uint32 allocationFlags;
589 	if (addressSpace == VMAddressSpace::Kernel()) {
590 		priority = VM_PRIORITY_SYSTEM;
591 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
592 			| HEAP_DONT_LOCK_KERNEL_SPACE;
593 	} else {
594 		priority = VM_PRIORITY_USER;
595 		allocationFlags = 0;
596 	}
597 
598 	VMCache* cache = vm_area_get_locked_cache(area);
599 	VMCacheChainLocker cacheChainLocker(cache);
600 	cacheChainLocker.LockAllSourceCaches();
601 
602 	// Cut the end only?
603 	if (areaLast <= lastAddress) {
604 		size_t oldSize = area->Size();
605 		size_t newSize = address - area->Base();
606 
607 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
608 			allocationFlags);
609 		if (error != B_OK)
610 			return error;
611 
612 		// unmap pages
613 		unmap_pages(area, address, oldSize - newSize);
614 
615 		// If no one else uses the area's cache, we can resize it, too.
616 		if (cache->areas == area && area->cache_next == NULL
617 			&& list_is_empty(&cache->consumers)
618 			&& cache->type == CACHE_TYPE_RAM) {
619 			// Since VMCache::Resize() can temporarily drop the lock, we must
620 			// unlock all lower caches to prevent locking order inversion.
621 			cacheChainLocker.Unlock(cache);
622 			cache->Resize(cache->virtual_base + newSize, priority);
623 			cache->ReleaseRefAndUnlock();
624 		}
625 
626 		return B_OK;
627 	}
628 
629 	// Cut the beginning only?
630 	if (area->Base() >= address) {
631 		addr_t oldBase = area->Base();
632 		addr_t newBase = lastAddress + 1;
633 		size_t newSize = areaLast - lastAddress;
634 
635 		// unmap pages
636 		unmap_pages(area, oldBase, newBase - oldBase);
637 
638 		// resize the area
639 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
640 			allocationFlags);
641 		if (error != B_OK)
642 			return error;
643 
644 		// TODO: If no one else uses the area's cache, we should resize it, too!
645 
646 		area->cache_offset += newBase - oldBase;
647 
648 		return B_OK;
649 	}
650 
651 	// The tough part -- cut a piece out of the middle of the area.
652 	// We do that by shrinking the area to the begin section and creating a
653 	// new area for the end section.
654 
655 	addr_t firstNewSize = address - area->Base();
656 	addr_t secondBase = lastAddress + 1;
657 	addr_t secondSize = areaLast - lastAddress;
658 
659 	// unmap pages
660 	unmap_pages(area, address, area->Size() - firstNewSize);
661 
662 	// resize the area
663 	addr_t oldSize = area->Size();
664 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
665 		allocationFlags);
666 	if (error != B_OK)
667 		return error;
668 
669 	// TODO: If no one else uses the area's cache, we might want to create a
670 	// new cache for the second area, transfer the concerned pages from the
671 	// first cache to it and resize the first cache.
672 
673 	// map the second area
674 	virtual_address_restrictions addressRestrictions = {};
675 	addressRestrictions.address = (void*)secondBase;
676 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
677 	VMArea* secondArea;
678 	error = map_backing_store(addressSpace, cache,
679 		area->cache_offset + (secondBase - area->Base()), area->name,
680 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
681 		&addressRestrictions, kernel, &secondArea, NULL);
682 	if (error != B_OK) {
683 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
684 		return error;
685 	}
686 
687 	// We need a cache reference for the new area.
688 	cache->AcquireRefLocked();
689 
690 	if (_secondArea != NULL)
691 		*_secondArea = secondArea;
692 
693 	return B_OK;
694 }
695 
696 
697 /*!	Deletes all areas in the given address range.
698 	The address space must be write-locked.
699 	The caller must ensure that no part of the given range is wired.
700 */
701 static status_t
702 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
703 	bool kernel)
704 {
705 	size = PAGE_ALIGN(size);
706 	addr_t lastAddress = address + (size - 1);
707 
708 	// Check, whether the caller is allowed to modify the concerned areas.
709 	if (!kernel) {
710 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
711 				VMArea* area = it.Next();) {
712 			addr_t areaLast = area->Base() + (area->Size() - 1);
713 			if (area->Base() < lastAddress && address < areaLast) {
714 				if ((area->protection & B_KERNEL_AREA) != 0)
715 					return B_NOT_ALLOWED;
716 			}
717 		}
718 	}
719 
720 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
721 			VMArea* area = it.Next();) {
722 		addr_t areaLast = area->Base() + (area->Size() - 1);
723 		if (area->Base() < lastAddress && address < areaLast) {
724 			status_t error = cut_area(addressSpace, area, address,
725 				lastAddress, NULL, kernel);
726 			if (error != B_OK)
727 				return error;
728 				// Failing after already messing with areas is ugly, but we
729 				// can't do anything about it.
730 		}
731 	}
732 
733 	return B_OK;
734 }
735 
736 
737 /*! You need to hold the lock of the cache and the write lock of the address
738 	space when calling this function.
739 	Note, that in case of error your cache will be temporarily unlocked.
740 	If \a addressSpec is \c B_EXACT_ADDRESS and the
741 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
742 	that no part of the specified address range (base \c *_virtualAddress, size
743 	\a size) is wired.
744 */
745 static status_t
746 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
747 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
748 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
749 	bool kernel, VMArea** _area, void** _virtualAddress)
750 {
751 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%Lx, "
752 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
753 		"'%s'\n", addressSpace, cache, addressRestrictions->address, offset,
754 		size, addressRestrictions->address_specification, wiring, protection,
755 		_area, areaName));
756 	cache->AssertLocked();
757 
758 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
759 		| HEAP_DONT_LOCK_KERNEL_SPACE;
760 	int priority;
761 	if (addressSpace != VMAddressSpace::Kernel()) {
762 		priority = VM_PRIORITY_USER;
763 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
764 		priority = VM_PRIORITY_VIP;
765 		allocationFlags |= HEAP_PRIORITY_VIP;
766 	} else
767 		priority = VM_PRIORITY_SYSTEM;
768 
769 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
770 		allocationFlags);
771 	if (area == NULL)
772 		return B_NO_MEMORY;
773 
774 	status_t status;
775 
776 	// if this is a private map, we need to create a new cache
777 	// to handle the private copies of pages as they are written to
778 	VMCache* sourceCache = cache;
779 	if (mapping == REGION_PRIVATE_MAP) {
780 		VMCache* newCache;
781 
782 		// create an anonymous cache
783 		bool isStack = (protection & B_STACK_AREA) != 0;
784 		status = VMCacheFactory::CreateAnonymousCache(newCache,
785 			isStack || (protection & B_OVERCOMMITTING_AREA) != 0, 0,
786 			isStack ? USER_STACK_GUARD_PAGES : 0, true, VM_PRIORITY_USER);
787 		if (status != B_OK)
788 			goto err1;
789 
790 		newCache->Lock();
791 		newCache->temporary = 1;
792 		newCache->scan_skip = cache->scan_skip;
793 		newCache->virtual_base = offset;
794 		newCache->virtual_end = offset + size;
795 
796 		cache->AddConsumer(newCache);
797 
798 		cache = newCache;
799 	}
800 
801 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
802 		status = cache->SetMinimalCommitment(size, priority);
803 		if (status != B_OK)
804 			goto err2;
805 	}
806 
807 	// check to see if this address space has entered DELETE state
808 	if (addressSpace->IsBeingDeleted()) {
809 		// okay, someone is trying to delete this address space now, so we can't
810 		// insert the area, so back out
811 		status = B_BAD_TEAM_ID;
812 		goto err2;
813 	}
814 
815 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
816 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
817 		status = unmap_address_range(addressSpace,
818 			(addr_t)addressRestrictions->address, size, kernel);
819 		if (status != B_OK)
820 			goto err2;
821 	}
822 
823 	status = addressSpace->InsertArea(area, size, addressRestrictions,
824 		allocationFlags, _virtualAddress);
825 	if (status != B_OK) {
826 		// TODO: wait and try again once this is working in the backend
827 #if 0
828 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
829 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
830 				0, 0);
831 		}
832 #endif
833 		goto err2;
834 	}
835 
836 	// attach the cache to the area
837 	area->cache = cache;
838 	area->cache_offset = offset;
839 
840 	// point the cache back to the area
841 	cache->InsertAreaLocked(area);
842 	if (mapping == REGION_PRIVATE_MAP)
843 		cache->Unlock();
844 
845 	// insert the area in the global area hash table
846 	VMAreaHash::Insert(area);
847 
848 	// grab a ref to the address space (the area holds this)
849 	addressSpace->Get();
850 
851 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
852 //		cache, sourceCache, areaName, area);
853 
854 	*_area = area;
855 	return B_OK;
856 
857 err2:
858 	if (mapping == REGION_PRIVATE_MAP) {
859 		// We created this cache, so we must delete it again. Note, that we
860 		// need to temporarily unlock the source cache or we'll otherwise
861 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
862 		sourceCache->Unlock();
863 		cache->ReleaseRefAndUnlock();
864 		sourceCache->Lock();
865 	}
866 err1:
867 	addressSpace->DeleteArea(area, allocationFlags);
868 	return status;
869 }
870 
871 
872 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
873 	  locker1, locker2).
874 */
875 template<typename LockerType1, typename LockerType2>
876 static inline bool
877 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
878 {
879 	area->cache->AssertLocked();
880 
881 	VMAreaUnwiredWaiter waiter;
882 	if (!area->AddWaiterIfWired(&waiter))
883 		return false;
884 
885 	// unlock everything and wait
886 	if (locker1 != NULL)
887 		locker1->Unlock();
888 	if (locker2 != NULL)
889 		locker2->Unlock();
890 
891 	waiter.waitEntry.Wait();
892 
893 	return true;
894 }
895 
896 
897 /*!	Checks whether the given area has any wired ranges intersecting with the
898 	specified range and waits, if so.
899 
900 	When it has to wait, the function calls \c Unlock() on both \a locker1
901 	and \a locker2, if given.
902 	The area's top cache must be locked and must be unlocked as a side effect
903 	of calling \c Unlock() on either \a locker1 or \a locker2.
904 
905 	If the function does not have to wait it does not modify or unlock any
906 	object.
907 
908 	\param area The area to be checked.
909 	\param base The base address of the range to check.
910 	\param size The size of the address range to check.
911 	\param locker1 An object to be unlocked when before starting to wait (may
912 		be \c NULL).
913 	\param locker2 An object to be unlocked when before starting to wait (may
914 		be \c NULL).
915 	\return \c true, if the function had to wait, \c false otherwise.
916 */
917 template<typename LockerType1, typename LockerType2>
918 static inline bool
919 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
920 	LockerType1* locker1, LockerType2* locker2)
921 {
922 	area->cache->AssertLocked();
923 
924 	VMAreaUnwiredWaiter waiter;
925 	if (!area->AddWaiterIfWired(&waiter, base, size))
926 		return false;
927 
928 	// unlock everything and wait
929 	if (locker1 != NULL)
930 		locker1->Unlock();
931 	if (locker2 != NULL)
932 		locker2->Unlock();
933 
934 	waiter.waitEntry.Wait();
935 
936 	return true;
937 }
938 
939 
940 /*!	Checks whether the given address space has any wired ranges intersecting
941 	with the specified range and waits, if so.
942 
943 	Similar to wait_if_area_range_is_wired(), with the following differences:
944 	- All areas intersecting with the range are checked (respectively all until
945 	  one is found that contains a wired range intersecting with the given
946 	  range).
947 	- The given address space must at least be read-locked and must be unlocked
948 	  when \c Unlock() is called on \a locker.
949 	- None of the areas' caches are allowed to be locked.
950 */
951 template<typename LockerType>
952 static inline bool
953 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
954 	size_t size, LockerType* locker)
955 {
956 	addr_t end = base + size - 1;
957 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
958 			VMArea* area = it.Next();) {
959 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
960 		if (area->Base() > end)
961 			return false;
962 
963 		if (base >= area->Base() + area->Size() - 1)
964 			continue;
965 
966 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
967 
968 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
969 			return true;
970 	}
971 
972 	return false;
973 }
974 
975 
976 status_t
977 vm_block_address_range(const char* name, void* address, addr_t size)
978 {
979 	if (!arch_vm_supports_protection(0))
980 		return B_NOT_SUPPORTED;
981 
982 	AddressSpaceWriteLocker locker;
983 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
984 	if (status != B_OK)
985 		return status;
986 
987 	VMAddressSpace* addressSpace = locker.AddressSpace();
988 
989 	// create an anonymous cache
990 	VMCache* cache;
991 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
992 		VM_PRIORITY_SYSTEM);
993 	if (status != B_OK)
994 		return status;
995 
996 	cache->temporary = 1;
997 	cache->virtual_end = size;
998 	cache->scan_skip = 1;
999 	cache->Lock();
1000 
1001 	VMArea* area;
1002 	virtual_address_restrictions addressRestrictions = {};
1003 	addressRestrictions.address = address;
1004 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1005 	status = map_backing_store(addressSpace, cache, 0, name, size,
1006 		B_ALREADY_WIRED, B_ALREADY_WIRED, REGION_NO_PRIVATE_MAP, 0,
1007 		&addressRestrictions, true, &area, NULL);
1008 	if (status != B_OK) {
1009 		cache->ReleaseRefAndUnlock();
1010 		return status;
1011 	}
1012 
1013 	cache->Unlock();
1014 	area->cache_type = CACHE_TYPE_RAM;
1015 	return area->id;
1016 }
1017 
1018 
1019 status_t
1020 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1021 {
1022 	AddressSpaceWriteLocker locker(team);
1023 	if (!locker.IsLocked())
1024 		return B_BAD_TEAM_ID;
1025 
1026 	VMAddressSpace* addressSpace = locker.AddressSpace();
1027 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1028 		addressSpace == VMAddressSpace::Kernel()
1029 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1030 }
1031 
1032 
1033 status_t
1034 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1035 	addr_t size, uint32 flags)
1036 {
1037 	if (size == 0)
1038 		return B_BAD_VALUE;
1039 
1040 	AddressSpaceWriteLocker locker(team);
1041 	if (!locker.IsLocked())
1042 		return B_BAD_TEAM_ID;
1043 
1044 	virtual_address_restrictions addressRestrictions = {};
1045 	addressRestrictions.address = *_address;
1046 	addressRestrictions.address_specification = addressSpec;
1047 	VMAddressSpace* addressSpace = locker.AddressSpace();
1048 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1049 		addressSpace == VMAddressSpace::Kernel()
1050 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1051 		_address);
1052 }
1053 
1054 
1055 area_id
1056 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1057 	uint32 wiring, uint32 protection, uint32 flags,
1058 	const virtual_address_restrictions* virtualAddressRestrictions,
1059 	const physical_address_restrictions* physicalAddressRestrictions,
1060 	bool kernel, void** _address)
1061 {
1062 	VMArea* area;
1063 	VMCache* cache;
1064 	vm_page* page = NULL;
1065 	bool isStack = (protection & B_STACK_AREA) != 0;
1066 	page_num_t guardPages;
1067 	bool canOvercommit = false;
1068 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1069 		? VM_PAGE_ALLOC_CLEAR : 0;
1070 
1071 	TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size));
1072 
1073 	size = PAGE_ALIGN(size);
1074 
1075 	if (size == 0)
1076 		return B_BAD_VALUE;
1077 	if (!arch_vm_supports_protection(protection))
1078 		return B_NOT_SUPPORTED;
1079 
1080 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1081 		canOvercommit = true;
1082 
1083 #ifdef DEBUG_KERNEL_STACKS
1084 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1085 		isStack = true;
1086 #endif
1087 
1088 	// check parameters
1089 	switch (virtualAddressRestrictions->address_specification) {
1090 		case B_ANY_ADDRESS:
1091 		case B_EXACT_ADDRESS:
1092 		case B_BASE_ADDRESS:
1093 		case B_ANY_KERNEL_ADDRESS:
1094 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1095 			break;
1096 
1097 		default:
1098 			return B_BAD_VALUE;
1099 	}
1100 
1101 	if (physicalAddressRestrictions->low_address != 0
1102 		&& physicalAddressRestrictions->high_address != 0) {
1103 		wiring = B_CONTIGUOUS;
1104 	}
1105 
1106 	bool doReserveMemory = false;
1107 	switch (wiring) {
1108 		case B_NO_LOCK:
1109 			break;
1110 		case B_FULL_LOCK:
1111 		case B_LAZY_LOCK:
1112 		case B_CONTIGUOUS:
1113 			doReserveMemory = true;
1114 			break;
1115 		case B_ALREADY_WIRED:
1116 			break;
1117 		case B_LOMEM:
1118 		//case B_SLOWMEM:
1119 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1120 			wiring = B_FULL_LOCK;
1121 			doReserveMemory = true;
1122 			break;
1123 		default:
1124 			return B_BAD_VALUE;
1125 	}
1126 
1127 	// For full lock or contiguous areas we're also going to map the pages and
1128 	// thus need to reserve pages for the mapping backend upfront.
1129 	addr_t reservedMapPages = 0;
1130 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1131 		AddressSpaceWriteLocker locker;
1132 		status_t status = locker.SetTo(team);
1133 		if (status != B_OK)
1134 			return status;
1135 
1136 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1137 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1138 	}
1139 
1140 	int priority;
1141 	if (team != VMAddressSpace::KernelID())
1142 		priority = VM_PRIORITY_USER;
1143 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1144 		priority = VM_PRIORITY_VIP;
1145 	else
1146 		priority = VM_PRIORITY_SYSTEM;
1147 
1148 	// Reserve memory before acquiring the address space lock. This reduces the
1149 	// chances of failure, since while holding the write lock to the address
1150 	// space (if it is the kernel address space that is), the low memory handler
1151 	// won't be able to free anything for us.
1152 	addr_t reservedMemory = 0;
1153 	if (doReserveMemory) {
1154 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1155 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1156 			return B_NO_MEMORY;
1157 		reservedMemory = size;
1158 		// TODO: We don't reserve the memory for the pages for the page
1159 		// directories/tables. We actually need to do since we currently don't
1160 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1161 		// there are actually less physical pages than there should be, which
1162 		// can get the VM into trouble in low memory situations.
1163 	}
1164 
1165 	AddressSpaceWriteLocker locker;
1166 	VMAddressSpace* addressSpace;
1167 	status_t status;
1168 
1169 	// For full lock areas reserve the pages before locking the address
1170 	// space. E.g. block caches can't release their memory while we hold the
1171 	// address space lock.
1172 	page_num_t reservedPages = reservedMapPages;
1173 	if (wiring == B_FULL_LOCK)
1174 		reservedPages += size / B_PAGE_SIZE;
1175 
1176 	vm_page_reservation reservation;
1177 	if (reservedPages > 0) {
1178 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1179 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1180 					priority)) {
1181 				reservedPages = 0;
1182 				status = B_WOULD_BLOCK;
1183 				goto err0;
1184 			}
1185 		} else
1186 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1187 	}
1188 
1189 	if (wiring == B_CONTIGUOUS) {
1190 		// we try to allocate the page run here upfront as this may easily
1191 		// fail for obvious reasons
1192 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1193 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1194 		if (page == NULL) {
1195 			status = B_NO_MEMORY;
1196 			goto err0;
1197 		}
1198 	}
1199 
1200 	// Lock the address space and, if B_EXACT_ADDRESS and
1201 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1202 	// is not wired.
1203 	do {
1204 		status = locker.SetTo(team);
1205 		if (status != B_OK)
1206 			goto err1;
1207 
1208 		addressSpace = locker.AddressSpace();
1209 	} while (virtualAddressRestrictions->address_specification
1210 			== B_EXACT_ADDRESS
1211 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1212 		&& wait_if_address_range_is_wired(addressSpace,
1213 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1214 
1215 	// create an anonymous cache
1216 	// if it's a stack, make sure that two pages are available at least
1217 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1218 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1219 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1220 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1221 		wiring == B_NO_LOCK, priority);
1222 	if (status != B_OK)
1223 		goto err1;
1224 
1225 	cache->temporary = 1;
1226 	cache->virtual_end = size;
1227 	cache->committed_size = reservedMemory;
1228 		// TODO: This should be done via a method.
1229 	reservedMemory = 0;
1230 
1231 	switch (wiring) {
1232 		case B_LAZY_LOCK:
1233 		case B_FULL_LOCK:
1234 		case B_CONTIGUOUS:
1235 		case B_ALREADY_WIRED:
1236 			cache->scan_skip = 1;
1237 			break;
1238 		case B_NO_LOCK:
1239 			cache->scan_skip = 0;
1240 			break;
1241 	}
1242 
1243 	cache->Lock();
1244 
1245 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1246 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1247 		kernel, &area, _address);
1248 
1249 	if (status != B_OK) {
1250 		cache->ReleaseRefAndUnlock();
1251 		goto err1;
1252 	}
1253 
1254 	locker.DegradeToReadLock();
1255 
1256 	switch (wiring) {
1257 		case B_NO_LOCK:
1258 		case B_LAZY_LOCK:
1259 			// do nothing - the pages are mapped in as needed
1260 			break;
1261 
1262 		case B_FULL_LOCK:
1263 		{
1264 			// Allocate and map all pages for this area
1265 
1266 			off_t offset = 0;
1267 			for (addr_t address = area->Base();
1268 					address < area->Base() + (area->Size() - 1);
1269 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1270 #ifdef DEBUG_KERNEL_STACKS
1271 #	ifdef STACK_GROWS_DOWNWARDS
1272 				if (isStack && address < area->Base()
1273 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1274 #	else
1275 				if (isStack && address >= area->Base() + area->Size()
1276 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1277 #	endif
1278 					continue;
1279 #endif
1280 				vm_page* page = vm_page_allocate_page(&reservation,
1281 					PAGE_STATE_WIRED | pageAllocFlags);
1282 				cache->InsertPage(page, offset);
1283 				map_page(area, page, address, protection, &reservation);
1284 
1285 				DEBUG_PAGE_ACCESS_END(page);
1286 			}
1287 
1288 			break;
1289 		}
1290 
1291 		case B_ALREADY_WIRED:
1292 		{
1293 			// The pages should already be mapped. This is only really useful
1294 			// during boot time. Find the appropriate vm_page objects and stick
1295 			// them in the cache object.
1296 			VMTranslationMap* map = addressSpace->TranslationMap();
1297 			off_t offset = 0;
1298 
1299 			if (!gKernelStartup)
1300 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1301 
1302 			map->Lock();
1303 
1304 			for (addr_t virtualAddress = area->Base();
1305 					virtualAddress < area->Base() + (area->Size() - 1);
1306 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1307 				phys_addr_t physicalAddress;
1308 				uint32 flags;
1309 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1310 				if (status < B_OK) {
1311 					panic("looking up mapping failed for va 0x%lx\n",
1312 						virtualAddress);
1313 				}
1314 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1315 				if (page == NULL) {
1316 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1317 						"\n", physicalAddress);
1318 				}
1319 
1320 				DEBUG_PAGE_ACCESS_START(page);
1321 
1322 				increment_page_wired_count(page);
1323 				cache->InsertPage(page, offset);
1324 				vm_page_set_state(page, PAGE_STATE_WIRED);
1325 				page->busy = false;
1326 
1327 				DEBUG_PAGE_ACCESS_END(page);
1328 			}
1329 
1330 			map->Unlock();
1331 			break;
1332 		}
1333 
1334 		case B_CONTIGUOUS:
1335 		{
1336 			// We have already allocated our continuous pages run, so we can now
1337 			// just map them in the address space
1338 			VMTranslationMap* map = addressSpace->TranslationMap();
1339 			phys_addr_t physicalAddress
1340 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1341 			addr_t virtualAddress = area->Base();
1342 			off_t offset = 0;
1343 
1344 			map->Lock();
1345 
1346 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1347 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1348 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1349 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1350 				if (page == NULL)
1351 					panic("couldn't lookup physical page just allocated\n");
1352 
1353 				status = map->Map(virtualAddress, physicalAddress, protection,
1354 					area->MemoryType(), &reservation);
1355 				if (status < B_OK)
1356 					panic("couldn't map physical page in page run\n");
1357 
1358 				increment_page_wired_count(page);
1359 				cache->InsertPage(page, offset);
1360 
1361 				DEBUG_PAGE_ACCESS_END(page);
1362 			}
1363 
1364 			map->Unlock();
1365 			break;
1366 		}
1367 
1368 		default:
1369 			break;
1370 	}
1371 
1372 	cache->Unlock();
1373 
1374 	if (reservedPages > 0)
1375 		vm_page_unreserve_pages(&reservation);
1376 
1377 	TRACE(("vm_create_anonymous_area: done\n"));
1378 
1379 	area->cache_type = CACHE_TYPE_RAM;
1380 	return area->id;
1381 
1382 err1:
1383 	if (wiring == B_CONTIGUOUS) {
1384 		// we had reserved the area space upfront...
1385 		phys_addr_t pageNumber = page->physical_page_number;
1386 		int32 i;
1387 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1388 			page = vm_lookup_page(pageNumber);
1389 			if (page == NULL)
1390 				panic("couldn't lookup physical page just allocated\n");
1391 
1392 			vm_page_set_state(page, PAGE_STATE_FREE);
1393 		}
1394 	}
1395 
1396 err0:
1397 	if (reservedPages > 0)
1398 		vm_page_unreserve_pages(&reservation);
1399 	if (reservedMemory > 0)
1400 		vm_unreserve_memory(reservedMemory);
1401 
1402 	return status;
1403 }
1404 
1405 
1406 area_id
1407 vm_map_physical_memory(team_id team, const char* name, void** _address,
1408 	uint32 addressSpec, addr_t size, uint32 protection,
1409 	phys_addr_t physicalAddress, bool alreadyWired)
1410 {
1411 	VMArea* area;
1412 	VMCache* cache;
1413 	addr_t mapOffset;
1414 
1415 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1416 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
1417 		name, *_address, addressSpec, size, protection, physicalAddress));
1418 
1419 	if (!arch_vm_supports_protection(protection))
1420 		return B_NOT_SUPPORTED;
1421 
1422 	AddressSpaceWriteLocker locker(team);
1423 	if (!locker.IsLocked())
1424 		return B_BAD_TEAM_ID;
1425 
1426 	// if the physical address is somewhat inside a page,
1427 	// move the actual area down to align on a page boundary
1428 	mapOffset = physicalAddress % B_PAGE_SIZE;
1429 	size += mapOffset;
1430 	physicalAddress -= mapOffset;
1431 
1432 	size = PAGE_ALIGN(size);
1433 
1434 	// create a device cache
1435 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1436 	if (status != B_OK)
1437 		return status;
1438 
1439 	// tell the page scanner to skip over this area, it's pages are special
1440 	cache->scan_skip = 1;
1441 	cache->virtual_end = size;
1442 
1443 	cache->Lock();
1444 
1445 	virtual_address_restrictions addressRestrictions = {};
1446 	addressRestrictions.address = *_address;
1447 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1448 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1449 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1450 		true, &area, _address);
1451 
1452 	if (status < B_OK)
1453 		cache->ReleaseRefLocked();
1454 
1455 	cache->Unlock();
1456 
1457 	if (status == B_OK) {
1458 		// set requested memory type -- use uncached, if not given
1459 		uint32 memoryType = addressSpec & B_MTR_MASK;
1460 		if (memoryType == 0)
1461 			memoryType = B_MTR_UC;
1462 
1463 		area->SetMemoryType(memoryType);
1464 
1465 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1466 		if (status != B_OK)
1467 			delete_area(locker.AddressSpace(), area, false);
1468 	}
1469 
1470 	if (status != B_OK)
1471 		return status;
1472 
1473 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1474 
1475 	if (alreadyWired) {
1476 		// The area is already mapped, but possibly not with the right
1477 		// memory type.
1478 		map->Lock();
1479 		map->ProtectArea(area, area->protection);
1480 		map->Unlock();
1481 	} else {
1482 		// Map the area completely.
1483 
1484 		// reserve pages needed for the mapping
1485 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1486 			area->Base() + (size - 1));
1487 		vm_page_reservation reservation;
1488 		vm_page_reserve_pages(&reservation, reservePages,
1489 			team == VMAddressSpace::KernelID()
1490 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1491 
1492 		map->Lock();
1493 
1494 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1495 			map->Map(area->Base() + offset, physicalAddress + offset,
1496 				protection, area->MemoryType(), &reservation);
1497 		}
1498 
1499 		map->Unlock();
1500 
1501 		vm_page_unreserve_pages(&reservation);
1502 	}
1503 
1504 	// modify the pointer returned to be offset back into the new area
1505 	// the same way the physical address in was offset
1506 	*_address = (void*)((addr_t)*_address + mapOffset);
1507 
1508 	area->cache_type = CACHE_TYPE_DEVICE;
1509 	return area->id;
1510 }
1511 
1512 
1513 /*!	Don't use!
1514 	TODO: This function was introduced to map physical page vecs to
1515 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1516 	use a device cache and does not track vm_page::wired_count!
1517 */
1518 area_id
1519 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1520 	uint32 addressSpec, addr_t* _size, uint32 protection,
1521 	struct generic_io_vec* vecs, uint32 vecCount)
1522 {
1523 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
1524 		"spec = %ld, _size = %p, protection = %ld, vecs = %p, "
1525 		"vecCount = %ld)\n", team, name, *_address, addressSpec, _size,
1526 		protection, vecs, vecCount));
1527 
1528 	if (!arch_vm_supports_protection(protection)
1529 		|| (addressSpec & B_MTR_MASK) != 0) {
1530 		return B_NOT_SUPPORTED;
1531 	}
1532 
1533 	AddressSpaceWriteLocker locker(team);
1534 	if (!locker.IsLocked())
1535 		return B_BAD_TEAM_ID;
1536 
1537 	if (vecCount == 0)
1538 		return B_BAD_VALUE;
1539 
1540 	addr_t size = 0;
1541 	for (uint32 i = 0; i < vecCount; i++) {
1542 		if (vecs[i].base % B_PAGE_SIZE != 0
1543 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1544 			return B_BAD_VALUE;
1545 		}
1546 
1547 		size += vecs[i].length;
1548 	}
1549 
1550 	// create a device cache
1551 	VMCache* cache;
1552 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1553 	if (result != B_OK)
1554 		return result;
1555 
1556 	// tell the page scanner to skip over this area, it's pages are special
1557 	cache->scan_skip = 1;
1558 	cache->virtual_end = size;
1559 
1560 	cache->Lock();
1561 
1562 	VMArea* area;
1563 	virtual_address_restrictions addressRestrictions = {};
1564 	addressRestrictions.address = *_address;
1565 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1566 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1567 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1568 		&addressRestrictions, true, &area, _address);
1569 
1570 	if (result != B_OK)
1571 		cache->ReleaseRefLocked();
1572 
1573 	cache->Unlock();
1574 
1575 	if (result != B_OK)
1576 		return result;
1577 
1578 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1579 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1580 		area->Base() + (size - 1));
1581 
1582 	vm_page_reservation reservation;
1583 	vm_page_reserve_pages(&reservation, reservePages,
1584 			team == VMAddressSpace::KernelID()
1585 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1586 	map->Lock();
1587 
1588 	uint32 vecIndex = 0;
1589 	size_t vecOffset = 0;
1590 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1591 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1592 			vecOffset = 0;
1593 			vecIndex++;
1594 		}
1595 
1596 		if (vecIndex >= vecCount)
1597 			break;
1598 
1599 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1600 			protection, area->MemoryType(), &reservation);
1601 
1602 		vecOffset += B_PAGE_SIZE;
1603 	}
1604 
1605 	map->Unlock();
1606 	vm_page_unreserve_pages(&reservation);
1607 
1608 	if (_size != NULL)
1609 		*_size = size;
1610 
1611 	area->cache_type = CACHE_TYPE_DEVICE;
1612 	return area->id;
1613 }
1614 
1615 
1616 area_id
1617 vm_create_null_area(team_id team, const char* name, void** address,
1618 	uint32 addressSpec, addr_t size, uint32 flags)
1619 {
1620 	size = PAGE_ALIGN(size);
1621 
1622 	// Lock the address space and, if B_EXACT_ADDRESS and
1623 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1624 	// is not wired.
1625 	AddressSpaceWriteLocker locker;
1626 	do {
1627 		if (locker.SetTo(team) != B_OK)
1628 			return B_BAD_TEAM_ID;
1629 	} while (addressSpec == B_EXACT_ADDRESS
1630 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1631 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1632 			(addr_t)*address, size, &locker));
1633 
1634 	// create a null cache
1635 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1636 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1637 	VMCache* cache;
1638 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1639 	if (status != B_OK)
1640 		return status;
1641 
1642 	// tell the page scanner to skip over this area, no pages will be mapped
1643 	// here
1644 	cache->scan_skip = 1;
1645 	cache->virtual_end = size;
1646 
1647 	cache->Lock();
1648 
1649 	VMArea* area;
1650 	virtual_address_restrictions addressRestrictions = {};
1651 	addressRestrictions.address = *address;
1652 	addressRestrictions.address_specification = addressSpec;
1653 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1654 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1655 		&addressRestrictions, true, &area, address);
1656 
1657 	if (status < B_OK) {
1658 		cache->ReleaseRefAndUnlock();
1659 		return status;
1660 	}
1661 
1662 	cache->Unlock();
1663 
1664 	area->cache_type = CACHE_TYPE_NULL;
1665 	return area->id;
1666 }
1667 
1668 
1669 /*!	Creates the vnode cache for the specified \a vnode.
1670 	The vnode has to be marked busy when calling this function.
1671 */
1672 status_t
1673 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1674 {
1675 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1676 }
1677 
1678 
1679 /*!	\a cache must be locked. The area's address space must be read-locked.
1680 */
1681 static void
1682 pre_map_area_pages(VMArea* area, VMCache* cache,
1683 	vm_page_reservation* reservation)
1684 {
1685 	addr_t baseAddress = area->Base();
1686 	addr_t cacheOffset = area->cache_offset;
1687 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1688 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1689 
1690 	for (VMCachePagesTree::Iterator it
1691 				= cache->pages.GetIterator(firstPage, true, true);
1692 			vm_page* page = it.Next();) {
1693 		if (page->cache_offset >= endPage)
1694 			break;
1695 
1696 		// skip busy and inactive pages
1697 		if (page->busy || page->usage_count == 0)
1698 			continue;
1699 
1700 		DEBUG_PAGE_ACCESS_START(page);
1701 		map_page(area, page,
1702 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1703 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1704 		DEBUG_PAGE_ACCESS_END(page);
1705 	}
1706 }
1707 
1708 
1709 /*!	Will map the file specified by \a fd to an area in memory.
1710 	The file will be mirrored beginning at the specified \a offset. The
1711 	\a offset and \a size arguments have to be page aligned.
1712 */
1713 static area_id
1714 _vm_map_file(team_id team, const char* name, void** _address,
1715 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1716 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1717 {
1718 	// TODO: for binary files, we want to make sure that they get the
1719 	//	copy of a file at a given time, ie. later changes should not
1720 	//	make it into the mapped copy -- this will need quite some changes
1721 	//	to be done in a nice way
1722 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
1723 		fd, offset, size, mapping));
1724 
1725 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1726 	size = PAGE_ALIGN(size);
1727 
1728 	if (mapping == REGION_NO_PRIVATE_MAP)
1729 		protection |= B_SHARED_AREA;
1730 	if (addressSpec != B_EXACT_ADDRESS)
1731 		unmapAddressRange = false;
1732 
1733 	if (fd < 0) {
1734 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1735 		virtual_address_restrictions virtualRestrictions = {};
1736 		virtualRestrictions.address = *_address;
1737 		virtualRestrictions.address_specification = addressSpec;
1738 		physical_address_restrictions physicalRestrictions = {};
1739 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1740 			flags, &virtualRestrictions, &physicalRestrictions, kernel,
1741 			_address);
1742 	}
1743 
1744 	// get the open flags of the FD
1745 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1746 	if (descriptor == NULL)
1747 		return EBADF;
1748 	int32 openMode = descriptor->open_mode;
1749 	put_fd(descriptor);
1750 
1751 	// The FD must open for reading at any rate. For shared mapping with write
1752 	// access, additionally the FD must be open for writing.
1753 	if ((openMode & O_ACCMODE) == O_WRONLY
1754 		|| (mapping == REGION_NO_PRIVATE_MAP
1755 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1756 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1757 		return EACCES;
1758 	}
1759 
1760 	// get the vnode for the object, this also grabs a ref to it
1761 	struct vnode* vnode = NULL;
1762 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1763 	if (status < B_OK)
1764 		return status;
1765 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1766 
1767 	// If we're going to pre-map pages, we need to reserve the pages needed by
1768 	// the mapping backend upfront.
1769 	page_num_t reservedPreMapPages = 0;
1770 	vm_page_reservation reservation;
1771 	if ((protection & B_READ_AREA) != 0) {
1772 		AddressSpaceWriteLocker locker;
1773 		status = locker.SetTo(team);
1774 		if (status != B_OK)
1775 			return status;
1776 
1777 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1778 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1779 
1780 		locker.Unlock();
1781 
1782 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1783 			team == VMAddressSpace::KernelID()
1784 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1785 	}
1786 
1787 	struct PageUnreserver {
1788 		PageUnreserver(vm_page_reservation* reservation)
1789 			:
1790 			fReservation(reservation)
1791 		{
1792 		}
1793 
1794 		~PageUnreserver()
1795 		{
1796 			if (fReservation != NULL)
1797 				vm_page_unreserve_pages(fReservation);
1798 		}
1799 
1800 		vm_page_reservation* fReservation;
1801 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1802 
1803 	// Lock the address space and, if the specified address range shall be
1804 	// unmapped, ensure it is not wired.
1805 	AddressSpaceWriteLocker locker;
1806 	do {
1807 		if (locker.SetTo(team) != B_OK)
1808 			return B_BAD_TEAM_ID;
1809 	} while (unmapAddressRange
1810 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1811 			(addr_t)*_address, size, &locker));
1812 
1813 	// TODO: this only works for file systems that use the file cache
1814 	VMCache* cache;
1815 	status = vfs_get_vnode_cache(vnode, &cache, false);
1816 	if (status < B_OK)
1817 		return status;
1818 
1819 	cache->Lock();
1820 
1821 	VMArea* area;
1822 	virtual_address_restrictions addressRestrictions = {};
1823 	addressRestrictions.address = *_address;
1824 	addressRestrictions.address_specification = addressSpec;
1825 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1826 		0, protection, mapping,
1827 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1828 		&addressRestrictions, kernel, &area, _address);
1829 
1830 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1831 		// map_backing_store() cannot know we no longer need the ref
1832 		cache->ReleaseRefLocked();
1833 	}
1834 
1835 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1836 		pre_map_area_pages(area, cache, &reservation);
1837 
1838 	cache->Unlock();
1839 
1840 	if (status == B_OK) {
1841 		// TODO: this probably deserves a smarter solution, ie. don't always
1842 		// prefetch stuff, and also, probably don't trigger it at this place.
1843 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1844 			// prefetches at max 10 MB starting from "offset"
1845 	}
1846 
1847 	if (status != B_OK)
1848 		return status;
1849 
1850 	area->cache_type = CACHE_TYPE_VNODE;
1851 	return area->id;
1852 }
1853 
1854 
1855 area_id
1856 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1857 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
1858 	int fd, off_t offset)
1859 {
1860 	if (!arch_vm_supports_protection(protection))
1861 		return B_NOT_SUPPORTED;
1862 
1863 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
1864 		mapping, unmapAddressRange, fd, offset, true);
1865 }
1866 
1867 
1868 VMCache*
1869 vm_area_get_locked_cache(VMArea* area)
1870 {
1871 	rw_lock_read_lock(&sAreaCacheLock);
1872 
1873 	while (true) {
1874 		VMCache* cache = area->cache;
1875 
1876 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
1877 			// cache has been deleted
1878 			rw_lock_read_lock(&sAreaCacheLock);
1879 			continue;
1880 		}
1881 
1882 		rw_lock_read_lock(&sAreaCacheLock);
1883 
1884 		if (cache == area->cache) {
1885 			cache->AcquireRefLocked();
1886 			rw_lock_read_unlock(&sAreaCacheLock);
1887 			return cache;
1888 		}
1889 
1890 		// the cache changed in the meantime
1891 		cache->Unlock();
1892 	}
1893 }
1894 
1895 
1896 void
1897 vm_area_put_locked_cache(VMCache* cache)
1898 {
1899 	cache->ReleaseRefAndUnlock();
1900 }
1901 
1902 
1903 area_id
1904 vm_clone_area(team_id team, const char* name, void** address,
1905 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
1906 	bool kernel)
1907 {
1908 	VMArea* newArea = NULL;
1909 	VMArea* sourceArea;
1910 
1911 	// Check whether the source area exists and is cloneable. If so, mark it
1912 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
1913 	{
1914 		AddressSpaceWriteLocker locker;
1915 		status_t status = locker.SetFromArea(sourceID, sourceArea);
1916 		if (status != B_OK)
1917 			return status;
1918 
1919 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
1920 			return B_NOT_ALLOWED;
1921 
1922 		sourceArea->protection |= B_SHARED_AREA;
1923 		protection |= B_SHARED_AREA;
1924 	}
1925 
1926 	// Now lock both address spaces and actually do the cloning.
1927 
1928 	MultiAddressSpaceLocker locker;
1929 	VMAddressSpace* sourceAddressSpace;
1930 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
1931 	if (status != B_OK)
1932 		return status;
1933 
1934 	VMAddressSpace* targetAddressSpace;
1935 	status = locker.AddTeam(team, true, &targetAddressSpace);
1936 	if (status != B_OK)
1937 		return status;
1938 
1939 	status = locker.Lock();
1940 	if (status != B_OK)
1941 		return status;
1942 
1943 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
1944 	if (sourceArea == NULL)
1945 		return B_BAD_VALUE;
1946 
1947 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
1948 		return B_NOT_ALLOWED;
1949 
1950 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
1951 
1952 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
1953 	//	have been adapted. Maybe it should be part of the kernel settings,
1954 	//	anyway (so that old drivers can always work).
1955 #if 0
1956 	if (sourceArea->aspace == VMAddressSpace::Kernel()
1957 		&& addressSpace != VMAddressSpace::Kernel()
1958 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1959 		// kernel areas must not be cloned in userland, unless explicitly
1960 		// declared user-cloneable upon construction
1961 		status = B_NOT_ALLOWED;
1962 	} else
1963 #endif
1964 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
1965 		status = B_NOT_ALLOWED;
1966 	else {
1967 		virtual_address_restrictions addressRestrictions = {};
1968 		addressRestrictions.address = *address;
1969 		addressRestrictions.address_specification = addressSpec;
1970 		status = map_backing_store(targetAddressSpace, cache,
1971 			sourceArea->cache_offset, name, sourceArea->Size(),
1972 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
1973 			kernel, &newArea, address);
1974 	}
1975 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
1976 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
1977 		// to create a new cache, and has therefore already acquired a reference
1978 		// to the source cache - but otherwise it has no idea that we need
1979 		// one.
1980 		cache->AcquireRefLocked();
1981 	}
1982 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
1983 		// we need to map in everything at this point
1984 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
1985 			// we don't have actual pages to map but a physical area
1986 			VMTranslationMap* map
1987 				= sourceArea->address_space->TranslationMap();
1988 			map->Lock();
1989 
1990 			phys_addr_t physicalAddress;
1991 			uint32 oldProtection;
1992 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
1993 
1994 			map->Unlock();
1995 
1996 			map = targetAddressSpace->TranslationMap();
1997 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
1998 				newArea->Base() + (newArea->Size() - 1));
1999 
2000 			vm_page_reservation reservation;
2001 			vm_page_reserve_pages(&reservation, reservePages,
2002 				targetAddressSpace == VMAddressSpace::Kernel()
2003 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2004 			map->Lock();
2005 
2006 			for (addr_t offset = 0; offset < newArea->Size();
2007 					offset += B_PAGE_SIZE) {
2008 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2009 					protection, newArea->MemoryType(), &reservation);
2010 			}
2011 
2012 			map->Unlock();
2013 			vm_page_unreserve_pages(&reservation);
2014 		} else {
2015 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2016 			size_t reservePages = map->MaxPagesNeededToMap(
2017 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2018 			vm_page_reservation reservation;
2019 			vm_page_reserve_pages(&reservation, reservePages,
2020 				targetAddressSpace == VMAddressSpace::Kernel()
2021 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2022 
2023 			// map in all pages from source
2024 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2025 					vm_page* page  = it.Next();) {
2026 				if (!page->busy) {
2027 					DEBUG_PAGE_ACCESS_START(page);
2028 					map_page(newArea, page,
2029 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2030 							- newArea->cache_offset),
2031 						protection, &reservation);
2032 					DEBUG_PAGE_ACCESS_END(page);
2033 				}
2034 			}
2035 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2036 			// ensuring that!
2037 
2038 			vm_page_unreserve_pages(&reservation);
2039 		}
2040 	}
2041 	if (status == B_OK)
2042 		newArea->cache_type = sourceArea->cache_type;
2043 
2044 	vm_area_put_locked_cache(cache);
2045 
2046 	if (status < B_OK)
2047 		return status;
2048 
2049 	return newArea->id;
2050 }
2051 
2052 
2053 /*!	Deletes the specified area of the given address space.
2054 
2055 	The address space must be write-locked.
2056 	The caller must ensure that the area does not have any wired ranges.
2057 
2058 	\param addressSpace The address space containing the area.
2059 	\param area The area to be deleted.
2060 	\param deletingAddressSpace \c true, if the address space is in the process
2061 		of being deleted.
2062 */
2063 static void
2064 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2065 	bool deletingAddressSpace)
2066 {
2067 	ASSERT(!area->IsWired());
2068 
2069 	VMAreaHash::Remove(area);
2070 
2071 	// At this point the area is removed from the global hash table, but
2072 	// still exists in the area list.
2073 
2074 	// Unmap the virtual address space the area occupied.
2075 	{
2076 		// We need to lock the complete cache chain.
2077 		VMCache* topCache = vm_area_get_locked_cache(area);
2078 		VMCacheChainLocker cacheChainLocker(topCache);
2079 		cacheChainLocker.LockAllSourceCaches();
2080 
2081 		// If the area's top cache is a temporary cache and the area is the only
2082 		// one referencing it (besides us currently holding a second reference),
2083 		// the unmapping code doesn't need to care about preserving the accessed
2084 		// and dirty flags of the top cache page mappings.
2085 		bool ignoreTopCachePageFlags
2086 			= topCache->temporary && topCache->RefCount() == 2;
2087 
2088 		area->address_space->TranslationMap()->UnmapArea(area,
2089 			deletingAddressSpace, ignoreTopCachePageFlags);
2090 	}
2091 
2092 	if (!area->cache->temporary)
2093 		area->cache->WriteModified();
2094 
2095 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2096 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2097 
2098 	arch_vm_unset_memory_type(area);
2099 	addressSpace->RemoveArea(area, allocationFlags);
2100 	addressSpace->Put();
2101 
2102 	area->cache->RemoveArea(area);
2103 	area->cache->ReleaseRef();
2104 
2105 	addressSpace->DeleteArea(area, allocationFlags);
2106 }
2107 
2108 
2109 status_t
2110 vm_delete_area(team_id team, area_id id, bool kernel)
2111 {
2112 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2113 
2114 	// lock the address space and make sure the area isn't wired
2115 	AddressSpaceWriteLocker locker;
2116 	VMArea* area;
2117 	AreaCacheLocker cacheLocker;
2118 
2119 	do {
2120 		status_t status = locker.SetFromArea(team, id, area);
2121 		if (status != B_OK)
2122 			return status;
2123 
2124 		cacheLocker.SetTo(area);
2125 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2126 
2127 	cacheLocker.Unlock();
2128 
2129 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2130 		return B_NOT_ALLOWED;
2131 
2132 	delete_area(locker.AddressSpace(), area, false);
2133 	return B_OK;
2134 }
2135 
2136 
2137 /*!	Creates a new cache on top of given cache, moves all areas from
2138 	the old cache to the new one, and changes the protection of all affected
2139 	areas' pages to read-only.
2140 	Preconditions:
2141 	- The given cache must be locked.
2142 	- All of the cache's areas' address spaces must be read locked.
2143 	- None of the cache's areas must have any wired ranges.
2144 */
2145 static status_t
2146 vm_copy_on_write_area(VMCache* lowerCache)
2147 {
2148 	VMCache* upperCache;
2149 
2150 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2151 
2152 	// We need to separate the cache from its areas. The cache goes one level
2153 	// deeper and we create a new cache inbetween.
2154 
2155 	// create an anonymous cache
2156 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2157 		0, true, VM_PRIORITY_USER);
2158 	if (status != B_OK)
2159 		return status;
2160 
2161 	upperCache->Lock();
2162 
2163 	upperCache->temporary = 1;
2164 	upperCache->scan_skip = lowerCache->scan_skip;
2165 	upperCache->virtual_base = lowerCache->virtual_base;
2166 	upperCache->virtual_end = lowerCache->virtual_end;
2167 
2168 	// transfer the lower cache areas to the upper cache
2169 	rw_lock_write_lock(&sAreaCacheLock);
2170 	upperCache->TransferAreas(lowerCache);
2171 	rw_lock_write_unlock(&sAreaCacheLock);
2172 
2173 	lowerCache->AddConsumer(upperCache);
2174 
2175 	// We now need to remap all pages from all of the cache's areas read-only, so
2176 	// that a copy will be created on next write access
2177 
2178 	for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2179 			tempArea = tempArea->cache_next) {
2180 		// The area must be readable in the same way it was previously writable
2181 		uint32 protection = B_KERNEL_READ_AREA;
2182 		if ((tempArea->protection & B_READ_AREA) != 0)
2183 			protection |= B_READ_AREA;
2184 
2185 		VMTranslationMap* map = tempArea->address_space->TranslationMap();
2186 		map->Lock();
2187 		map->ProtectArea(tempArea, protection);
2188 		map->Unlock();
2189 	}
2190 
2191 	vm_area_put_locked_cache(upperCache);
2192 
2193 	return B_OK;
2194 }
2195 
2196 
2197 area_id
2198 vm_copy_area(team_id team, const char* name, void** _address,
2199 	uint32 addressSpec, uint32 protection, area_id sourceID)
2200 {
2201 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2202 
2203 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2204 		// set the same protection for the kernel as for userland
2205 		protection |= B_KERNEL_READ_AREA;
2206 		if (writableCopy)
2207 			protection |= B_KERNEL_WRITE_AREA;
2208 	}
2209 
2210 	// Do the locking: target address space, all address spaces associated with
2211 	// the source cache, and the cache itself.
2212 	MultiAddressSpaceLocker locker;
2213 	VMAddressSpace* targetAddressSpace;
2214 	VMCache* cache;
2215 	VMArea* source;
2216 	AreaCacheLocker cacheLocker;
2217 	status_t status;
2218 	bool sharedArea;
2219 
2220 	bool restart;
2221 	do {
2222 		restart = false;
2223 
2224 		locker.Unset();
2225 		status = locker.AddTeam(team, true, &targetAddressSpace);
2226 		if (status == B_OK) {
2227 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2228 				&cache);
2229 		}
2230 		if (status != B_OK)
2231 			return status;
2232 
2233 		cacheLocker.SetTo(cache, true);	// already locked
2234 
2235 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2236 
2237 		// Make sure the source area (respectively, if not shared, all areas of
2238 		// the cache) doesn't have any wired ranges.
2239 		if (sharedArea) {
2240 			if (wait_if_area_is_wired(source, &locker, &cacheLocker))
2241 				restart = true;
2242 		} else {
2243 			for (VMArea* area = cache->areas; area != NULL;
2244 					area = area->cache_next) {
2245 				if (wait_if_area_is_wired(area, &locker, &cacheLocker)) {
2246 					restart = true;
2247 					break;
2248 				}
2249 			}
2250 		}
2251 	} while (restart);
2252 
2253 	if (addressSpec == B_CLONE_ADDRESS) {
2254 		addressSpec = B_EXACT_ADDRESS;
2255 		*_address = (void*)source->Base();
2256 	}
2257 
2258 	// First, create a cache on top of the source area, respectively use the
2259 	// existing one, if this is a shared area.
2260 
2261 	VMArea* target;
2262 	virtual_address_restrictions addressRestrictions = {};
2263 	addressRestrictions.address = *_address;
2264 	addressRestrictions.address_specification = addressSpec;
2265 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2266 		name, source->Size(), source->wiring, protection,
2267 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2268 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2269 		&addressRestrictions, true, &target, _address);
2270 	if (status < B_OK)
2271 		return status;
2272 
2273 	if (sharedArea) {
2274 		// The new area uses the old area's cache, but map_backing_store()
2275 		// hasn't acquired a ref. So we have to do that now.
2276 		cache->AcquireRefLocked();
2277 	}
2278 
2279 	// If the source area is writable, we need to move it one layer up as well
2280 
2281 	if (!sharedArea) {
2282 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2283 			// TODO: do something more useful if this fails!
2284 			if (vm_copy_on_write_area(cache) < B_OK)
2285 				panic("vm_copy_on_write_area() failed!\n");
2286 		}
2287 	}
2288 
2289 	// we return the ID of the newly created area
2290 	return target->id;
2291 }
2292 
2293 
2294 static status_t
2295 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2296 	bool kernel)
2297 {
2298 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2299 		"%#lx)\n", team, areaID, newProtection));
2300 
2301 	if (!arch_vm_supports_protection(newProtection))
2302 		return B_NOT_SUPPORTED;
2303 
2304 	bool becomesWritable
2305 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2306 
2307 	// lock address spaces and cache
2308 	MultiAddressSpaceLocker locker;
2309 	VMCache* cache;
2310 	VMArea* area;
2311 	status_t status;
2312 	AreaCacheLocker cacheLocker;
2313 	bool isWritable;
2314 
2315 	bool restart;
2316 	do {
2317 		restart = false;
2318 
2319 		locker.Unset();
2320 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2321 		if (status != B_OK)
2322 			return status;
2323 
2324 		cacheLocker.SetTo(cache, true);	// already locked
2325 
2326 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2327 			return B_NOT_ALLOWED;
2328 
2329 		if (area->protection == newProtection)
2330 			return B_OK;
2331 
2332 		if (team != VMAddressSpace::KernelID()
2333 			&& area->address_space->ID() != team) {
2334 			// unless you're the kernel, you are only allowed to set
2335 			// the protection of your own areas
2336 			return B_NOT_ALLOWED;
2337 		}
2338 
2339 		isWritable
2340 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2341 
2342 		// Make sure the area (respectively, if we're going to call
2343 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2344 		// wired ranges.
2345 		if (!isWritable && becomesWritable && !list_is_empty(&cache->consumers)) {
2346 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2347 					otherArea = otherArea->cache_next) {
2348 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2349 					restart = true;
2350 					break;
2351 				}
2352 			}
2353 		} else {
2354 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2355 				restart = true;
2356 		}
2357 	} while (restart);
2358 
2359 	bool changePageProtection = true;
2360 	bool changeTopCachePagesOnly = false;
2361 
2362 	if (isWritable && !becomesWritable) {
2363 		// writable -> !writable
2364 
2365 		if (cache->source != NULL && cache->temporary) {
2366 			if (cache->CountWritableAreas(area) == 0) {
2367 				// Since this cache now lives from the pages in its source cache,
2368 				// we can change the cache's commitment to take only those pages
2369 				// into account that really are in this cache.
2370 
2371 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2372 					team == VMAddressSpace::KernelID()
2373 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2374 
2375 				// TODO: we may be able to join with our source cache, if
2376 				// count == 0
2377 			}
2378 		}
2379 
2380 		// If only the writability changes, we can just remap the pages of the
2381 		// top cache, since the pages of lower caches are mapped read-only
2382 		// anyway. That's advantageous only, if the number of pages in the cache
2383 		// is significantly smaller than the number of pages in the area,
2384 		// though.
2385 		if (newProtection
2386 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2387 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2388 			changeTopCachePagesOnly = true;
2389 		}
2390 	} else if (!isWritable && becomesWritable) {
2391 		// !writable -> writable
2392 
2393 		if (!list_is_empty(&cache->consumers)) {
2394 			// There are consumers -- we have to insert a new cache. Fortunately
2395 			// vm_copy_on_write_area() does everything that's needed.
2396 			changePageProtection = false;
2397 			status = vm_copy_on_write_area(cache);
2398 		} else {
2399 			// No consumers, so we don't need to insert a new one.
2400 			if (cache->source != NULL && cache->temporary) {
2401 				// the cache's commitment must contain all possible pages
2402 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2403 					team == VMAddressSpace::KernelID()
2404 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2405 			}
2406 
2407 			if (status == B_OK && cache->source != NULL) {
2408 				// There's a source cache, hence we can't just change all pages'
2409 				// protection or we might allow writing into pages belonging to
2410 				// a lower cache.
2411 				changeTopCachePagesOnly = true;
2412 			}
2413 		}
2414 	} else {
2415 		// we don't have anything special to do in all other cases
2416 	}
2417 
2418 	if (status == B_OK) {
2419 		// remap existing pages in this cache
2420 		if (changePageProtection) {
2421 			VMTranslationMap* map = area->address_space->TranslationMap();
2422 			map->Lock();
2423 
2424 			if (changeTopCachePagesOnly) {
2425 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2426 				page_num_t lastPageOffset
2427 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2428 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2429 						vm_page* page = it.Next();) {
2430 					if (page->cache_offset >= firstPageOffset
2431 						&& page->cache_offset <= lastPageOffset) {
2432 						addr_t address = virtual_page_address(area, page);
2433 						map->ProtectPage(area, address, newProtection);
2434 					}
2435 				}
2436 			} else
2437 				map->ProtectArea(area, newProtection);
2438 
2439 			map->Unlock();
2440 		}
2441 
2442 		area->protection = newProtection;
2443 	}
2444 
2445 	return status;
2446 }
2447 
2448 
2449 status_t
2450 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2451 {
2452 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2453 	if (addressSpace == NULL)
2454 		return B_BAD_TEAM_ID;
2455 
2456 	VMTranslationMap* map = addressSpace->TranslationMap();
2457 
2458 	map->Lock();
2459 	uint32 dummyFlags;
2460 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2461 	map->Unlock();
2462 
2463 	addressSpace->Put();
2464 	return status;
2465 }
2466 
2467 
2468 /*!	The page's cache must be locked.
2469 */
2470 bool
2471 vm_test_map_modification(vm_page* page)
2472 {
2473 	if (page->modified)
2474 		return true;
2475 
2476 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2477 	vm_page_mapping* mapping;
2478 	while ((mapping = iterator.Next()) != NULL) {
2479 		VMArea* area = mapping->area;
2480 		VMTranslationMap* map = area->address_space->TranslationMap();
2481 
2482 		phys_addr_t physicalAddress;
2483 		uint32 flags;
2484 		map->Lock();
2485 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2486 		map->Unlock();
2487 
2488 		if ((flags & PAGE_MODIFIED) != 0)
2489 			return true;
2490 	}
2491 
2492 	return false;
2493 }
2494 
2495 
2496 /*!	The page's cache must be locked.
2497 */
2498 void
2499 vm_clear_map_flags(vm_page* page, uint32 flags)
2500 {
2501 	if ((flags & PAGE_ACCESSED) != 0)
2502 		page->accessed = false;
2503 	if ((flags & PAGE_MODIFIED) != 0)
2504 		page->modified = false;
2505 
2506 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2507 	vm_page_mapping* mapping;
2508 	while ((mapping = iterator.Next()) != NULL) {
2509 		VMArea* area = mapping->area;
2510 		VMTranslationMap* map = area->address_space->TranslationMap();
2511 
2512 		map->Lock();
2513 		map->ClearFlags(virtual_page_address(area, page), flags);
2514 		map->Unlock();
2515 	}
2516 }
2517 
2518 
2519 /*!	Removes all mappings from a page.
2520 	After you've called this function, the page is unmapped from memory and
2521 	the page's \c accessed and \c modified flags have been updated according
2522 	to the state of the mappings.
2523 	The page's cache must be locked.
2524 */
2525 void
2526 vm_remove_all_page_mappings(vm_page* page)
2527 {
2528 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2529 		VMArea* area = mapping->area;
2530 		VMTranslationMap* map = area->address_space->TranslationMap();
2531 		addr_t address = virtual_page_address(area, page);
2532 		map->UnmapPage(area, address, false);
2533 	}
2534 }
2535 
2536 
2537 int32
2538 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2539 {
2540 	int32 count = 0;
2541 
2542 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2543 	vm_page_mapping* mapping;
2544 	while ((mapping = iterator.Next()) != NULL) {
2545 		VMArea* area = mapping->area;
2546 		VMTranslationMap* map = area->address_space->TranslationMap();
2547 
2548 		bool modified;
2549 		if (map->ClearAccessedAndModified(area,
2550 				virtual_page_address(area, page), false, modified)) {
2551 			count++;
2552 		}
2553 
2554 		page->modified |= modified;
2555 	}
2556 
2557 
2558 	if (page->accessed) {
2559 		count++;
2560 		page->accessed = false;
2561 	}
2562 
2563 	return count;
2564 }
2565 
2566 
2567 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2568 	mappings.
2569 	The function iterates through the page mappings and removes them until
2570 	encountering one that has been accessed. From then on it will continue to
2571 	iterate, but only clear the accessed flag of the mapping. The page's
2572 	\c modified bit will be updated accordingly, the \c accessed bit will be
2573 	cleared.
2574 	\return The number of mapping accessed bits encountered, including the
2575 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2576 		of the page have been removed.
2577 */
2578 int32
2579 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2580 {
2581 	ASSERT(page->wired_count == 0);
2582 
2583 	if (page->accessed)
2584 		return vm_clear_page_mapping_accessed_flags(page);
2585 
2586 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2587 		VMArea* area = mapping->area;
2588 		VMTranslationMap* map = area->address_space->TranslationMap();
2589 		addr_t address = virtual_page_address(area, page);
2590 		bool modified = false;
2591 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2592 			page->accessed = true;
2593 			page->modified |= modified;
2594 			return vm_clear_page_mapping_accessed_flags(page);
2595 		}
2596 		page->modified |= modified;
2597 	}
2598 
2599 	return 0;
2600 }
2601 
2602 
2603 static int
2604 display_mem(int argc, char** argv)
2605 {
2606 	bool physical = false;
2607 	addr_t copyAddress;
2608 	int32 displayWidth;
2609 	int32 itemSize;
2610 	int32 num = -1;
2611 	addr_t address;
2612 	int i = 1, j;
2613 
2614 	if (argc > 1 && argv[1][0] == '-') {
2615 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2616 			physical = true;
2617 			i++;
2618 		} else
2619 			i = 99;
2620 	}
2621 
2622 	if (argc < i + 1 || argc > i + 2) {
2623 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2624 			"\tdl - 8 bytes\n"
2625 			"\tdw - 4 bytes\n"
2626 			"\tds - 2 bytes\n"
2627 			"\tdb - 1 byte\n"
2628 			"\tstring - a whole string\n"
2629 			"  -p or --physical only allows memory from a single page to be "
2630 			"displayed.\n");
2631 		return 0;
2632 	}
2633 
2634 	address = parse_expression(argv[i]);
2635 
2636 	if (argc > i + 1)
2637 		num = parse_expression(argv[i + 1]);
2638 
2639 	// build the format string
2640 	if (strcmp(argv[0], "db") == 0) {
2641 		itemSize = 1;
2642 		displayWidth = 16;
2643 	} else if (strcmp(argv[0], "ds") == 0) {
2644 		itemSize = 2;
2645 		displayWidth = 8;
2646 	} else if (strcmp(argv[0], "dw") == 0) {
2647 		itemSize = 4;
2648 		displayWidth = 4;
2649 	} else if (strcmp(argv[0], "dl") == 0) {
2650 		itemSize = 8;
2651 		displayWidth = 2;
2652 	} else if (strcmp(argv[0], "string") == 0) {
2653 		itemSize = 1;
2654 		displayWidth = -1;
2655 	} else {
2656 		kprintf("display_mem called in an invalid way!\n");
2657 		return 0;
2658 	}
2659 
2660 	if (num <= 0)
2661 		num = displayWidth;
2662 
2663 	void* physicalPageHandle = NULL;
2664 
2665 	if (physical) {
2666 		int32 offset = address & (B_PAGE_SIZE - 1);
2667 		if (num * itemSize + offset > B_PAGE_SIZE) {
2668 			num = (B_PAGE_SIZE - offset) / itemSize;
2669 			kprintf("NOTE: number of bytes has been cut to page size\n");
2670 		}
2671 
2672 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2673 
2674 		if (vm_get_physical_page_debug(address, &copyAddress,
2675 				&physicalPageHandle) != B_OK) {
2676 			kprintf("getting the hardware page failed.");
2677 			return 0;
2678 		}
2679 
2680 		address += offset;
2681 		copyAddress += offset;
2682 	} else
2683 		copyAddress = address;
2684 
2685 	if (!strcmp(argv[0], "string")) {
2686 		kprintf("%p \"", (char*)copyAddress);
2687 
2688 		// string mode
2689 		for (i = 0; true; i++) {
2690 			char c;
2691 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2692 					!= B_OK
2693 				|| c == '\0') {
2694 				break;
2695 			}
2696 
2697 			if (c == '\n')
2698 				kprintf("\\n");
2699 			else if (c == '\t')
2700 				kprintf("\\t");
2701 			else {
2702 				if (!isprint(c))
2703 					c = '.';
2704 
2705 				kprintf("%c", c);
2706 			}
2707 		}
2708 
2709 		kprintf("\"\n");
2710 	} else {
2711 		// number mode
2712 		for (i = 0; i < num; i++) {
2713 			uint32 value;
2714 
2715 			if ((i % displayWidth) == 0) {
2716 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2717 				if (i != 0)
2718 					kprintf("\n");
2719 
2720 				kprintf("[0x%lx]  ", address + i * itemSize);
2721 
2722 				for (j = 0; j < displayed; j++) {
2723 					char c;
2724 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2725 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2726 						displayed = j;
2727 						break;
2728 					}
2729 					if (!isprint(c))
2730 						c = '.';
2731 
2732 					kprintf("%c", c);
2733 				}
2734 				if (num > displayWidth) {
2735 					// make sure the spacing in the last line is correct
2736 					for (j = displayed; j < displayWidth * itemSize; j++)
2737 						kprintf(" ");
2738 				}
2739 				kprintf("  ");
2740 			}
2741 
2742 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2743 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2744 				kprintf("read fault");
2745 				break;
2746 			}
2747 
2748 			switch (itemSize) {
2749 				case 1:
2750 					kprintf(" %02x", *(uint8*)&value);
2751 					break;
2752 				case 2:
2753 					kprintf(" %04x", *(uint16*)&value);
2754 					break;
2755 				case 4:
2756 					kprintf(" %08lx", *(uint32*)&value);
2757 					break;
2758 				case 8:
2759 					kprintf(" %016Lx", *(uint64*)&value);
2760 					break;
2761 			}
2762 		}
2763 
2764 		kprintf("\n");
2765 	}
2766 
2767 	if (physical) {
2768 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2769 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2770 	}
2771 	return 0;
2772 }
2773 
2774 
2775 static void
2776 dump_cache_tree_recursively(VMCache* cache, int level,
2777 	VMCache* highlightCache)
2778 {
2779 	// print this cache
2780 	for (int i = 0; i < level; i++)
2781 		kprintf("  ");
2782 	if (cache == highlightCache)
2783 		kprintf("%p <--\n", cache);
2784 	else
2785 		kprintf("%p\n", cache);
2786 
2787 	// recursively print its consumers
2788 	VMCache* consumer = NULL;
2789 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2790 			consumer)) != NULL) {
2791 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
2792 	}
2793 }
2794 
2795 
2796 static int
2797 dump_cache_tree(int argc, char** argv)
2798 {
2799 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2800 		kprintf("usage: %s <address>\n", argv[0]);
2801 		return 0;
2802 	}
2803 
2804 	addr_t address = parse_expression(argv[1]);
2805 	if (address == 0)
2806 		return 0;
2807 
2808 	VMCache* cache = (VMCache*)address;
2809 	VMCache* root = cache;
2810 
2811 	// find the root cache (the transitive source)
2812 	while (root->source != NULL)
2813 		root = root->source;
2814 
2815 	dump_cache_tree_recursively(root, 0, cache);
2816 
2817 	return 0;
2818 }
2819 
2820 
2821 static const char*
2822 cache_type_to_string(int32 type)
2823 {
2824 	switch (type) {
2825 		case CACHE_TYPE_RAM:
2826 			return "RAM";
2827 		case CACHE_TYPE_DEVICE:
2828 			return "device";
2829 		case CACHE_TYPE_VNODE:
2830 			return "vnode";
2831 		case CACHE_TYPE_NULL:
2832 			return "null";
2833 
2834 		default:
2835 			return "unknown";
2836 	}
2837 }
2838 
2839 
2840 #if DEBUG_CACHE_LIST
2841 
2842 static void
2843 update_cache_info_recursively(VMCache* cache, cache_info& info)
2844 {
2845 	info.page_count += cache->page_count;
2846 	if (cache->type == CACHE_TYPE_RAM)
2847 		info.committed += cache->committed_size;
2848 
2849 	// recurse
2850 	VMCache* consumer = NULL;
2851 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2852 			consumer)) != NULL) {
2853 		update_cache_info_recursively(consumer, info);
2854 	}
2855 }
2856 
2857 
2858 static int
2859 cache_info_compare_page_count(const void* _a, const void* _b)
2860 {
2861 	const cache_info* a = (const cache_info*)_a;
2862 	const cache_info* b = (const cache_info*)_b;
2863 	if (a->page_count == b->page_count)
2864 		return 0;
2865 	return a->page_count < b->page_count ? 1 : -1;
2866 }
2867 
2868 
2869 static int
2870 cache_info_compare_committed(const void* _a, const void* _b)
2871 {
2872 	const cache_info* a = (const cache_info*)_a;
2873 	const cache_info* b = (const cache_info*)_b;
2874 	if (a->committed == b->committed)
2875 		return 0;
2876 	return a->committed < b->committed ? 1 : -1;
2877 }
2878 
2879 
2880 static void
2881 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
2882 {
2883 	for (int i = 0; i < level; i++)
2884 		kprintf("  ");
2885 
2886 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
2887 		cache_type_to_string(cache->type), cache->virtual_base,
2888 		cache->virtual_end, cache->page_count);
2889 
2890 	if (level == 0)
2891 		kprintf("/%lu", info.page_count);
2892 
2893 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
2894 		kprintf(", committed: %lld", cache->committed_size);
2895 
2896 		if (level == 0)
2897 			kprintf("/%lu", info.committed);
2898 	}
2899 
2900 	// areas
2901 	if (cache->areas != NULL) {
2902 		VMArea* area = cache->areas;
2903 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
2904 			area->address_space->ID());
2905 
2906 		while (area->cache_next != NULL) {
2907 			area = area->cache_next;
2908 			kprintf(", %ld", area->id);
2909 		}
2910 	}
2911 
2912 	kputs("\n");
2913 
2914 	// recurse
2915 	VMCache* consumer = NULL;
2916 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2917 			consumer)) != NULL) {
2918 		dump_caches_recursively(consumer, info, level + 1);
2919 	}
2920 }
2921 
2922 
2923 static int
2924 dump_caches(int argc, char** argv)
2925 {
2926 	if (sCacheInfoTable == NULL) {
2927 		kprintf("No cache info table!\n");
2928 		return 0;
2929 	}
2930 
2931 	bool sortByPageCount = true;
2932 
2933 	for (int32 i = 1; i < argc; i++) {
2934 		if (strcmp(argv[i], "-c") == 0) {
2935 			sortByPageCount = false;
2936 		} else {
2937 			print_debugger_command_usage(argv[0]);
2938 			return 0;
2939 		}
2940 	}
2941 
2942 	uint32 totalCount = 0;
2943 	uint32 rootCount = 0;
2944 	off_t totalCommitted = 0;
2945 	page_num_t totalPages = 0;
2946 
2947 	VMCache* cache = gDebugCacheList;
2948 	while (cache) {
2949 		totalCount++;
2950 		if (cache->source == NULL) {
2951 			cache_info stackInfo;
2952 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
2953 				? sCacheInfoTable[rootCount] : stackInfo;
2954 			rootCount++;
2955 			info.cache = cache;
2956 			info.page_count = 0;
2957 			info.committed = 0;
2958 			update_cache_info_recursively(cache, info);
2959 			totalCommitted += info.committed;
2960 			totalPages += info.page_count;
2961 		}
2962 
2963 		cache = cache->debug_next;
2964 	}
2965 
2966 	if (rootCount <= (uint32)kCacheInfoTableCount) {
2967 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
2968 			sortByPageCount
2969 				? &cache_info_compare_page_count
2970 				: &cache_info_compare_committed);
2971 	}
2972 
2973 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
2974 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
2975 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
2976 		"tree...\n\n", totalCount, rootCount,
2977 		sortByPageCount ? "page count" : "committed size");
2978 
2979 	if (rootCount <= (uint32)kCacheInfoTableCount) {
2980 		for (uint32 i = 0; i < rootCount; i++) {
2981 			cache_info& info = sCacheInfoTable[i];
2982 			dump_caches_recursively(info.cache, info, 0);
2983 		}
2984 	} else
2985 		kprintf("Cache info table too small! Can't sort and print caches!\n");
2986 
2987 	return 0;
2988 }
2989 
2990 #endif	// DEBUG_CACHE_LIST
2991 
2992 
2993 static int
2994 dump_cache(int argc, char** argv)
2995 {
2996 	VMCache* cache;
2997 	bool showPages = false;
2998 	int i = 1;
2999 
3000 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3001 		kprintf("usage: %s [-ps] <address>\n"
3002 			"  if -p is specified, all pages are shown, if -s is used\n"
3003 			"  only the cache info is shown respectively.\n", argv[0]);
3004 		return 0;
3005 	}
3006 	while (argv[i][0] == '-') {
3007 		char* arg = argv[i] + 1;
3008 		while (arg[0]) {
3009 			if (arg[0] == 'p')
3010 				showPages = true;
3011 			arg++;
3012 		}
3013 		i++;
3014 	}
3015 	if (argv[i] == NULL) {
3016 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3017 		return 0;
3018 	}
3019 
3020 	addr_t address = parse_expression(argv[i]);
3021 	if (address == 0)
3022 		return 0;
3023 
3024 	cache = (VMCache*)address;
3025 
3026 	kprintf("CACHE %p:\n", cache);
3027 	kprintf("  ref_count:    %ld\n", cache->RefCount());
3028 	kprintf("  source:       %p\n", cache->source);
3029 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
3030 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
3031 	kprintf("  virtual_end:  0x%Lx\n", cache->virtual_end);
3032 	kprintf("  temporary:    %ld\n", cache->temporary);
3033 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
3034 	kprintf("  lock:         %p\n", cache->GetLock());
3035 #if KDEBUG
3036 	kprintf("  lock.holder:  %ld\n", cache->GetLock()->holder);
3037 #endif
3038 	kprintf("  areas:\n");
3039 
3040 	for (VMArea* area = cache->areas; area != NULL; area = area->cache_next) {
3041 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
3042 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->Base(),
3043 			area->Size());
3044 		kprintf("\tprotection: 0x%lx\n", area->protection);
3045 		kprintf("\towner:      0x%lx\n", area->address_space->ID());
3046 	}
3047 
3048 	kprintf("  consumers:\n");
3049 	VMCache* consumer = NULL;
3050 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
3051 				consumer)) != NULL) {
3052 		kprintf("\t%p\n", consumer);
3053 	}
3054 
3055 	kprintf("  pages:\n");
3056 	if (showPages) {
3057 		for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3058 				vm_page* page = it.Next();) {
3059 			if (!vm_page_is_dummy(page)) {
3060 				kprintf("\t%p ppn %#" B_PRIxPHYSADDR " offset %#" B_PRIxPHYSADDR
3061 					" state %u (%s) wired_count %u\n", page,
3062 					page->physical_page_number, page->cache_offset,
3063 					page->State(), page_state_to_string(page->State()),
3064 					page->wired_count);
3065 			} else {
3066 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
3067 					page, page->State(), page_state_to_string(page->State()));
3068 			}
3069 		}
3070 	} else
3071 		kprintf("\t%ld in cache\n", cache->page_count);
3072 
3073 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3074 
3075 	return 0;
3076 }
3077 
3078 
3079 static void
3080 dump_area_struct(VMArea* area, bool mappings)
3081 {
3082 	kprintf("AREA: %p\n", area);
3083 	kprintf("name:\t\t'%s'\n", area->name);
3084 	kprintf("owner:\t\t0x%lx\n", area->address_space->ID());
3085 	kprintf("id:\t\t0x%lx\n", area->id);
3086 	kprintf("base:\t\t0x%lx\n", area->Base());
3087 	kprintf("size:\t\t0x%lx\n", area->Size());
3088 	kprintf("protection:\t0x%lx\n", area->protection);
3089 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3090 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3091 	kprintf("cache:\t\t%p\n", area->cache);
3092 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
3093 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3094 	kprintf("cache_next:\t%p\n", area->cache_next);
3095 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3096 
3097 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3098 	if (mappings) {
3099 		kprintf("page mappings:\n");
3100 		while (iterator.HasNext()) {
3101 			vm_page_mapping* mapping = iterator.Next();
3102 			kprintf("  %p", mapping->page);
3103 		}
3104 		kprintf("\n");
3105 	} else {
3106 		uint32 count = 0;
3107 		while (iterator.Next() != NULL) {
3108 			count++;
3109 		}
3110 		kprintf("page mappings:\t%lu\n", count);
3111 	}
3112 }
3113 
3114 
3115 static int
3116 dump_area(int argc, char** argv)
3117 {
3118 	bool mappings = false;
3119 	bool found = false;
3120 	int32 index = 1;
3121 	VMArea* area;
3122 	addr_t num;
3123 
3124 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3125 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3126 			"All areas matching either id/address/name are listed. You can\n"
3127 			"force to check only a specific item by prefixing the specifier\n"
3128 			"with the id/contains/address/name keywords.\n"
3129 			"-m shows the area's mappings as well.\n");
3130 		return 0;
3131 	}
3132 
3133 	if (!strcmp(argv[1], "-m")) {
3134 		mappings = true;
3135 		index++;
3136 	}
3137 
3138 	int32 mode = 0xf;
3139 	if (!strcmp(argv[index], "id"))
3140 		mode = 1;
3141 	else if (!strcmp(argv[index], "contains"))
3142 		mode = 2;
3143 	else if (!strcmp(argv[index], "name"))
3144 		mode = 4;
3145 	else if (!strcmp(argv[index], "address"))
3146 		mode = 0;
3147 	if (mode != 0xf)
3148 		index++;
3149 
3150 	if (index >= argc) {
3151 		kprintf("No area specifier given.\n");
3152 		return 0;
3153 	}
3154 
3155 	num = parse_expression(argv[index]);
3156 
3157 	if (mode == 0) {
3158 		dump_area_struct((struct VMArea*)num, mappings);
3159 	} else {
3160 		// walk through the area list, looking for the arguments as a name
3161 
3162 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3163 		while ((area = it.Next()) != NULL) {
3164 			if (((mode & 4) != 0 && area->name != NULL
3165 					&& !strcmp(argv[index], area->name))
3166 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3167 					|| (((mode & 2) != 0 && area->Base() <= num
3168 						&& area->Base() + area->Size() > num))))) {
3169 				dump_area_struct(area, mappings);
3170 				found = true;
3171 			}
3172 		}
3173 
3174 		if (!found)
3175 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3176 	}
3177 
3178 	return 0;
3179 }
3180 
3181 
3182 static int
3183 dump_area_list(int argc, char** argv)
3184 {
3185 	VMArea* area;
3186 	const char* name = NULL;
3187 	int32 id = 0;
3188 
3189 	if (argc > 1) {
3190 		id = parse_expression(argv[1]);
3191 		if (id == 0)
3192 			name = argv[1];
3193 	}
3194 
3195 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3196 
3197 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3198 	while ((area = it.Next()) != NULL) {
3199 		if ((id != 0 && area->address_space->ID() != id)
3200 			|| (name != NULL && strstr(area->name, name) == NULL))
3201 			continue;
3202 
3203 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
3204 			(void*)area->Base(), (void*)area->Size(), area->protection,
3205 			area->wiring, area->name);
3206 	}
3207 	return 0;
3208 }
3209 
3210 
3211 static int
3212 dump_available_memory(int argc, char** argv)
3213 {
3214 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3215 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3216 	return 0;
3217 }
3218 
3219 
3220 /*!	Deletes all areas and reserved regions in the given address space.
3221 
3222 	The caller must ensure that none of the areas has any wired ranges.
3223 
3224 	\param addressSpace The address space.
3225 	\param deletingAddressSpace \c true, if the address space is in the process
3226 		of being deleted.
3227 */
3228 void
3229 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3230 {
3231 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3232 		addressSpace->ID()));
3233 
3234 	addressSpace->WriteLock();
3235 
3236 	// remove all reserved areas in this address space
3237 	addressSpace->UnreserveAllAddressRanges(0);
3238 
3239 	// delete all the areas in this address space
3240 	while (VMArea* area = addressSpace->FirstArea()) {
3241 		ASSERT(!area->IsWired());
3242 		delete_area(addressSpace, area, deletingAddressSpace);
3243 	}
3244 
3245 	addressSpace->WriteUnlock();
3246 }
3247 
3248 
3249 static area_id
3250 vm_area_for(addr_t address, bool kernel)
3251 {
3252 	team_id team;
3253 	if (IS_USER_ADDRESS(address)) {
3254 		// we try the user team address space, if any
3255 		team = VMAddressSpace::CurrentID();
3256 		if (team < 0)
3257 			return team;
3258 	} else
3259 		team = VMAddressSpace::KernelID();
3260 
3261 	AddressSpaceReadLocker locker(team);
3262 	if (!locker.IsLocked())
3263 		return B_BAD_TEAM_ID;
3264 
3265 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3266 	if (area != NULL) {
3267 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3268 			return B_ERROR;
3269 
3270 		return area->id;
3271 	}
3272 
3273 	return B_ERROR;
3274 }
3275 
3276 
3277 /*!	Frees physical pages that were used during the boot process.
3278 	\a end is inclusive.
3279 */
3280 static void
3281 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3282 {
3283 	// free all physical pages in the specified range
3284 
3285 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3286 		phys_addr_t physicalAddress;
3287 		uint32 flags;
3288 
3289 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3290 			&& (flags & PAGE_PRESENT) != 0) {
3291 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3292 			if (page != NULL && page->State() != PAGE_STATE_FREE
3293 					 && page->State() != PAGE_STATE_CLEAR
3294 					 && page->State() != PAGE_STATE_UNUSED) {
3295 				DEBUG_PAGE_ACCESS_START(page);
3296 				vm_page_set_state(page, PAGE_STATE_FREE);
3297 			}
3298 		}
3299 	}
3300 
3301 	// unmap the memory
3302 	map->Unmap(start, end);
3303 }
3304 
3305 
3306 void
3307 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3308 {
3309 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3310 	addr_t end = start + (size - 1);
3311 	addr_t lastEnd = start;
3312 
3313 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3314 		(void*)start, (void*)end));
3315 
3316 	// The areas are sorted in virtual address space order, so
3317 	// we just have to find the holes between them that fall
3318 	// into the area we should dispose
3319 
3320 	map->Lock();
3321 
3322 	for (VMAddressSpace::AreaIterator it
3323 				= VMAddressSpace::Kernel()->GetAreaIterator();
3324 			VMArea* area = it.Next();) {
3325 		addr_t areaStart = area->Base();
3326 		addr_t areaEnd = areaStart + (area->Size() - 1);
3327 
3328 		if (areaEnd < start)
3329 			continue;
3330 
3331 		if (areaStart > end) {
3332 			// we are done, the area is already beyond of what we have to free
3333 			break;
3334 		}
3335 
3336 		if (areaStart > lastEnd) {
3337 			// this is something we can free
3338 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3339 				(void*)areaStart));
3340 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3341 		}
3342 
3343 		if (areaEnd >= end) {
3344 			lastEnd = areaEnd;
3345 				// no +1 to prevent potential overflow
3346 			break;
3347 		}
3348 
3349 		lastEnd = areaEnd + 1;
3350 	}
3351 
3352 	if (lastEnd < end) {
3353 		// we can also get rid of some space at the end of the area
3354 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3355 			(void*)end));
3356 		unmap_and_free_physical_pages(map, lastEnd, end);
3357 	}
3358 
3359 	map->Unlock();
3360 }
3361 
3362 
3363 static void
3364 create_preloaded_image_areas(struct preloaded_image* image)
3365 {
3366 	char name[B_OS_NAME_LENGTH];
3367 	void* address;
3368 	int32 length;
3369 
3370 	// use file name to create a good area name
3371 	char* fileName = strrchr(image->name, '/');
3372 	if (fileName == NULL)
3373 		fileName = image->name;
3374 	else
3375 		fileName++;
3376 
3377 	length = strlen(fileName);
3378 	// make sure there is enough space for the suffix
3379 	if (length > 25)
3380 		length = 25;
3381 
3382 	memcpy(name, fileName, length);
3383 	strcpy(name + length, "_text");
3384 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3385 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3386 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3387 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3388 		// this will later be remapped read-only/executable by the
3389 		// ELF initialization code
3390 
3391 	strcpy(name + length, "_data");
3392 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3393 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3394 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3395 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3396 }
3397 
3398 
3399 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3400 	Any boot loader resources contained in that arguments must not be accessed
3401 	anymore past this point.
3402 */
3403 void
3404 vm_free_kernel_args(kernel_args* args)
3405 {
3406 	uint32 i;
3407 
3408 	TRACE(("vm_free_kernel_args()\n"));
3409 
3410 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3411 		area_id area = area_for((void*)args->kernel_args_range[i].start);
3412 		if (area >= B_OK)
3413 			delete_area(area);
3414 	}
3415 }
3416 
3417 
3418 static void
3419 allocate_kernel_args(kernel_args* args)
3420 {
3421 	TRACE(("allocate_kernel_args()\n"));
3422 
3423 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3424 		void* address = (void*)args->kernel_args_range[i].start;
3425 
3426 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3427 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3428 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3429 	}
3430 }
3431 
3432 
3433 static void
3434 unreserve_boot_loader_ranges(kernel_args* args)
3435 {
3436 	TRACE(("unreserve_boot_loader_ranges()\n"));
3437 
3438 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3439 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3440 			(void*)args->virtual_allocated_range[i].start,
3441 			args->virtual_allocated_range[i].size);
3442 	}
3443 }
3444 
3445 
3446 static void
3447 reserve_boot_loader_ranges(kernel_args* args)
3448 {
3449 	TRACE(("reserve_boot_loader_ranges()\n"));
3450 
3451 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3452 		void* address = (void*)args->virtual_allocated_range[i].start;
3453 
3454 		// If the address is no kernel address, we just skip it. The
3455 		// architecture specific code has to deal with it.
3456 		if (!IS_KERNEL_ADDRESS(address)) {
3457 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3458 				address, args->virtual_allocated_range[i].size);
3459 			continue;
3460 		}
3461 
3462 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3463 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3464 		if (status < B_OK)
3465 			panic("could not reserve boot loader ranges\n");
3466 	}
3467 }
3468 
3469 
3470 static addr_t
3471 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3472 {
3473 	size = PAGE_ALIGN(size);
3474 
3475 	// find a slot in the virtual allocation addr range
3476 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3477 		// check to see if the space between this one and the last is big enough
3478 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3479 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3480 			+ args->virtual_allocated_range[i - 1].size;
3481 
3482 		addr_t base = alignment > 0
3483 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3484 
3485 		if (base >= KERNEL_BASE && base < rangeStart
3486 				&& rangeStart - base >= size) {
3487 			args->virtual_allocated_range[i - 1].size
3488 				+= base + size - previousRangeEnd;
3489 			return base;
3490 		}
3491 	}
3492 
3493 	// we hadn't found one between allocation ranges. this is ok.
3494 	// see if there's a gap after the last one
3495 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3496 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3497 		+ args->virtual_allocated_range[lastEntryIndex].size;
3498 	addr_t base = alignment > 0
3499 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3500 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3501 		args->virtual_allocated_range[lastEntryIndex].size
3502 			+= base + size - lastRangeEnd;
3503 		return base;
3504 	}
3505 
3506 	// see if there's a gap before the first one
3507 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3508 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3509 		base = rangeStart - size;
3510 		if (alignment > 0)
3511 			base = ROUNDDOWN(base, alignment);
3512 
3513 		if (base >= KERNEL_BASE) {
3514 			args->virtual_allocated_range[0].start = base;
3515 			args->virtual_allocated_range[0].size += rangeStart - base;
3516 			return base;
3517 		}
3518 	}
3519 
3520 	return 0;
3521 }
3522 
3523 
3524 static bool
3525 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3526 {
3527 	// TODO: horrible brute-force method of determining if the page can be
3528 	// allocated
3529 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3530 		if (address >= args->physical_memory_range[i].start
3531 			&& address < args->physical_memory_range[i].start
3532 				+ args->physical_memory_range[i].size)
3533 			return true;
3534 	}
3535 	return false;
3536 }
3537 
3538 
3539 page_num_t
3540 vm_allocate_early_physical_page(kernel_args* args)
3541 {
3542 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3543 		phys_addr_t nextPage;
3544 
3545 		nextPage = args->physical_allocated_range[i].start
3546 			+ args->physical_allocated_range[i].size;
3547 		// see if the page after the next allocated paddr run can be allocated
3548 		if (i + 1 < args->num_physical_allocated_ranges
3549 			&& args->physical_allocated_range[i + 1].size != 0) {
3550 			// see if the next page will collide with the next allocated range
3551 			if (nextPage >= args->physical_allocated_range[i+1].start)
3552 				continue;
3553 		}
3554 		// see if the next physical page fits in the memory block
3555 		if (is_page_in_physical_memory_range(args, nextPage)) {
3556 			// we got one!
3557 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3558 			return nextPage / B_PAGE_SIZE;
3559 		}
3560 	}
3561 
3562 	return 0;
3563 		// could not allocate a block
3564 }
3565 
3566 
3567 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3568 	allocate some pages before the VM is completely up.
3569 */
3570 addr_t
3571 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3572 	uint32 attributes, addr_t alignment)
3573 {
3574 	if (physicalSize > virtualSize)
3575 		physicalSize = virtualSize;
3576 
3577 	// find the vaddr to allocate at
3578 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3579 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
3580 
3581 	// map the pages
3582 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3583 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3584 		if (physicalAddress == 0)
3585 			panic("error allocating early page!\n");
3586 
3587 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3588 
3589 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3590 			physicalAddress * B_PAGE_SIZE, attributes,
3591 			&vm_allocate_early_physical_page);
3592 	}
3593 
3594 	return virtualBase;
3595 }
3596 
3597 
3598 /*!	The main entrance point to initialize the VM. */
3599 status_t
3600 vm_init(kernel_args* args)
3601 {
3602 	struct preloaded_image* image;
3603 	void* address;
3604 	status_t err = 0;
3605 	uint32 i;
3606 
3607 	TRACE(("vm_init: entry\n"));
3608 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3609 	err = arch_vm_init(args);
3610 
3611 	// initialize some globals
3612 	vm_page_init_num_pages(args);
3613 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3614 
3615 	size_t heapSize = INITIAL_HEAP_SIZE;
3616 	// try to accomodate low memory systems
3617 	while (heapSize > sAvailableMemory / 8)
3618 		heapSize /= 2;
3619 	if (heapSize < 1024 * 1024)
3620 		panic("vm_init: go buy some RAM please.");
3621 
3622 	slab_init(args);
3623 
3624 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3625 	// map in the new heap and initialize it
3626 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3627 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3628 	TRACE(("heap at 0x%lx\n", heapBase));
3629 	heap_init(heapBase, heapSize);
3630 #endif
3631 
3632 	// initialize the free page list and physical page mapper
3633 	vm_page_init(args);
3634 
3635 	// initialize the hash table that stores the pages mapped to caches
3636 	vm_cache_init(args);
3637 
3638 	{
3639 		status_t error = VMAreaHash::Init();
3640 		if (error != B_OK)
3641 			panic("vm_init: error initializing area hash table\n");
3642 	}
3643 
3644 	VMAddressSpace::Init();
3645 	reserve_boot_loader_ranges(args);
3646 
3647 #if !USE_SLAB_ALLOCATOR_FOR_MALLOC
3648 	heap_init_post_area();
3649 #endif
3650 
3651 	// Do any further initialization that the architecture dependant layers may
3652 	// need now
3653 	arch_vm_translation_map_init_post_area(args);
3654 	arch_vm_init_post_area(args);
3655 	vm_page_init_post_area(args);
3656 	slab_init_post_area();
3657 
3658 	// allocate areas to represent stuff that already exists
3659 
3660 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3661 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3662 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3663 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3664 #endif
3665 
3666 	allocate_kernel_args(args);
3667 
3668 	create_preloaded_image_areas(&args->kernel_image);
3669 
3670 	// allocate areas for preloaded images
3671 	for (image = args->preloaded_images; image != NULL; image = image->next)
3672 		create_preloaded_image_areas(image);
3673 
3674 	// allocate kernel stacks
3675 	for (i = 0; i < args->num_cpus; i++) {
3676 		char name[64];
3677 
3678 		sprintf(name, "idle thread %lu kstack", i + 1);
3679 		address = (void*)args->cpu_kstack[i].start;
3680 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3681 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3682 	}
3683 
3684 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3685 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3686 
3687 	// create the object cache for the page mappings
3688 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3689 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3690 		NULL, NULL);
3691 	if (gPageMappingsObjectCache == NULL)
3692 		panic("failed to create page mappings object cache");
3693 
3694 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3695 
3696 #if DEBUG_CACHE_LIST
3697 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
3698 		virtual_address_restrictions virtualRestrictions = {};
3699 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
3700 		physical_address_restrictions physicalRestrictions = {};
3701 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
3702 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3703 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
3704 			CREATE_AREA_DONT_WAIT, &virtualRestrictions, &physicalRestrictions,
3705 			(void**)&sCacheInfoTable);
3706 	}
3707 #endif	// DEBUG_CACHE_LIST
3708 
3709 	// add some debugger commands
3710 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3711 	add_debugger_command("area", &dump_area,
3712 		"Dump info about a particular area");
3713 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3714 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3715 #if DEBUG_CACHE_LIST
3716 	if (sCacheInfoTable != NULL) {
3717 		add_debugger_command_etc("caches", &dump_caches,
3718 			"List all VMCache trees",
3719 			"[ \"-c\" ]\n"
3720 			"All cache trees are listed sorted in decreasing order by number "
3721 				"of\n"
3722 			"used pages or, if \"-c\" is specified, by size of committed "
3723 				"memory.\n",
3724 			0);
3725 	}
3726 #endif
3727 	add_debugger_command("avail", &dump_available_memory,
3728 		"Dump available memory");
3729 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3730 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3731 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3732 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3733 	add_debugger_command("string", &display_mem, "dump strings");
3734 
3735 	TRACE(("vm_init: exit\n"));
3736 
3737 	vm_cache_init_post_heap();
3738 
3739 	return err;
3740 }
3741 
3742 
3743 status_t
3744 vm_init_post_sem(kernel_args* args)
3745 {
3746 	// This frees all unused boot loader resources and makes its space available
3747 	// again
3748 	arch_vm_init_end(args);
3749 	unreserve_boot_loader_ranges(args);
3750 
3751 	// fill in all of the semaphores that were not allocated before
3752 	// since we're still single threaded and only the kernel address space
3753 	// exists, it isn't that hard to find all of the ones we need to create
3754 
3755 	arch_vm_translation_map_init_post_sem(args);
3756 
3757 	slab_init_post_sem();
3758 
3759 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3760 	heap_init_post_sem();
3761 #endif
3762 
3763 	return B_OK;
3764 }
3765 
3766 
3767 status_t
3768 vm_init_post_thread(kernel_args* args)
3769 {
3770 	vm_page_init_post_thread(args);
3771 	slab_init_post_thread();
3772 	return heap_init_post_thread();
3773 }
3774 
3775 
3776 status_t
3777 vm_init_post_modules(kernel_args* args)
3778 {
3779 	return arch_vm_init_post_modules(args);
3780 }
3781 
3782 
3783 void
3784 permit_page_faults(void)
3785 {
3786 	struct thread* thread = thread_get_current_thread();
3787 	if (thread != NULL)
3788 		atomic_add(&thread->page_faults_allowed, 1);
3789 }
3790 
3791 
3792 void
3793 forbid_page_faults(void)
3794 {
3795 	struct thread* thread = thread_get_current_thread();
3796 	if (thread != NULL)
3797 		atomic_add(&thread->page_faults_allowed, -1);
3798 }
3799 
3800 
3801 status_t
3802 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3803 	addr_t* newIP)
3804 {
3805 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3806 		faultAddress));
3807 
3808 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
3809 
3810 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
3811 	VMAddressSpace* addressSpace = NULL;
3812 
3813 	status_t status = B_OK;
3814 	*newIP = 0;
3815 	atomic_add((int32*)&sPageFaults, 1);
3816 
3817 	if (IS_KERNEL_ADDRESS(pageAddress)) {
3818 		addressSpace = VMAddressSpace::GetKernel();
3819 	} else if (IS_USER_ADDRESS(pageAddress)) {
3820 		addressSpace = VMAddressSpace::GetCurrent();
3821 		if (addressSpace == NULL) {
3822 			if (!isUser) {
3823 				dprintf("vm_page_fault: kernel thread accessing invalid user "
3824 					"memory!\n");
3825 				status = B_BAD_ADDRESS;
3826 				TPF(PageFaultError(-1,
3827 					VMPageFaultTracing
3828 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
3829 			} else {
3830 				// XXX weird state.
3831 				panic("vm_page_fault: non kernel thread accessing user memory "
3832 					"that doesn't exist!\n");
3833 				status = B_BAD_ADDRESS;
3834 			}
3835 		}
3836 	} else {
3837 		// the hit was probably in the 64k DMZ between kernel and user space
3838 		// this keeps a user space thread from passing a buffer that crosses
3839 		// into kernel space
3840 		status = B_BAD_ADDRESS;
3841 		TPF(PageFaultError(-1,
3842 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
3843 	}
3844 
3845 	if (status == B_OK) {
3846 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser,
3847 			NULL);
3848 	}
3849 
3850 	if (status < B_OK) {
3851 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
3852 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
3853 			strerror(status), address, faultAddress, isWrite, isUser,
3854 			thread_get_current_thread_id());
3855 		if (!isUser) {
3856 			struct thread* thread = thread_get_current_thread();
3857 			if (thread != NULL && thread->fault_handler != 0) {
3858 				// this will cause the arch dependant page fault handler to
3859 				// modify the IP on the interrupt frame or whatever to return
3860 				// to this address
3861 				*newIP = thread->fault_handler;
3862 			} else {
3863 				// unhandled page fault in the kernel
3864 				panic("vm_page_fault: unhandled page fault in kernel space at "
3865 					"0x%lx, ip 0x%lx\n", address, faultAddress);
3866 			}
3867 		} else {
3868 #if 1
3869 			addressSpace->ReadLock();
3870 
3871 			// TODO: remove me once we have proper userland debugging support
3872 			// (and tools)
3873 			VMArea* area = addressSpace->LookupArea(faultAddress);
3874 
3875 			struct thread* thread = thread_get_current_thread();
3876 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
3877 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
3878 				thread->name, thread->id, thread->team->name, thread->team->id,
3879 				isWrite ? "write" : "read", address, faultAddress,
3880 				area ? area->name : "???",
3881 				faultAddress - (area ? area->Base() : 0x0));
3882 
3883 			// We can print a stack trace of the userland thread here.
3884 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
3885 // fault and someone is already waiting for a write lock on the same address
3886 // space. This thread will then try to acquire the lock again and will
3887 // be queued after the writer.
3888 #	if 0
3889 			if (area) {
3890 				struct stack_frame {
3891 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
3892 						struct stack_frame*	previous;
3893 						void*				return_address;
3894 					#else
3895 						// ...
3896 					#warning writeme
3897 					#endif
3898 				} frame;
3899 #		ifdef __INTEL__
3900 				struct iframe* iframe = i386_get_user_iframe();
3901 				if (iframe == NULL)
3902 					panic("iframe is NULL!");
3903 
3904 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
3905 					sizeof(struct stack_frame));
3906 #		elif defined(__POWERPC__)
3907 				struct iframe* iframe = ppc_get_user_iframe();
3908 				if (iframe == NULL)
3909 					panic("iframe is NULL!");
3910 
3911 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
3912 					sizeof(struct stack_frame));
3913 #		else
3914 #			warning "vm_page_fault() stack trace won't work"
3915 				status = B_ERROR;
3916 #		endif
3917 
3918 				dprintf("stack trace:\n");
3919 				int32 maxFrames = 50;
3920 				while (status == B_OK && --maxFrames >= 0
3921 						&& frame.return_address != NULL) {
3922 					dprintf("  %p", frame.return_address);
3923 					area = addressSpace->LookupArea(
3924 						(addr_t)frame.return_address);
3925 					if (area) {
3926 						dprintf(" (%s + %#lx)", area->name,
3927 							(addr_t)frame.return_address - area->Base());
3928 					}
3929 					dprintf("\n");
3930 
3931 					status = user_memcpy(&frame, frame.previous,
3932 						sizeof(struct stack_frame));
3933 				}
3934 			}
3935 #	endif	// 0 (stack trace)
3936 
3937 			addressSpace->ReadUnlock();
3938 #endif
3939 
3940 			// TODO: the fault_callback is a temporary solution for vm86
3941 			if (thread->fault_callback == NULL
3942 				|| thread->fault_callback(address, faultAddress, isWrite)) {
3943 				// If the thread has a signal handler for SIGSEGV, we simply
3944 				// send it the signal. Otherwise we notify the user debugger
3945 				// first.
3946 				struct sigaction action;
3947 				if (sigaction(SIGSEGV, NULL, &action) == 0
3948 					&& action.sa_handler != SIG_DFL
3949 					&& action.sa_handler != SIG_IGN) {
3950 					send_signal(thread->id, SIGSEGV);
3951 				} else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION,
3952 						SIGSEGV)) {
3953 					send_signal(thread->id, SIGSEGV);
3954 				}
3955 			}
3956 		}
3957 	}
3958 
3959 	if (addressSpace != NULL)
3960 		addressSpace->Put();
3961 
3962 	return B_HANDLED_INTERRUPT;
3963 }
3964 
3965 
3966 struct PageFaultContext {
3967 	AddressSpaceReadLocker	addressSpaceLocker;
3968 	VMCacheChainLocker		cacheChainLocker;
3969 
3970 	VMTranslationMap*		map;
3971 	VMCache*				topCache;
3972 	off_t					cacheOffset;
3973 	vm_page_reservation		reservation;
3974 	bool					isWrite;
3975 
3976 	// return values
3977 	vm_page*				page;
3978 	bool					restart;
3979 
3980 
3981 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
3982 		:
3983 		addressSpaceLocker(addressSpace, true),
3984 		map(addressSpace->TranslationMap()),
3985 		isWrite(isWrite)
3986 	{
3987 	}
3988 
3989 	~PageFaultContext()
3990 	{
3991 		UnlockAll();
3992 		vm_page_unreserve_pages(&reservation);
3993 	}
3994 
3995 	void Prepare(VMCache* topCache, off_t cacheOffset)
3996 	{
3997 		this->topCache = topCache;
3998 		this->cacheOffset = cacheOffset;
3999 		page = NULL;
4000 		restart = false;
4001 
4002 		cacheChainLocker.SetTo(topCache);
4003 	}
4004 
4005 	void UnlockAll(VMCache* exceptCache = NULL)
4006 	{
4007 		topCache = NULL;
4008 		addressSpaceLocker.Unlock();
4009 		cacheChainLocker.Unlock(exceptCache);
4010 	}
4011 };
4012 
4013 
4014 /*!	Gets the page that should be mapped into the area.
4015 	Returns an error code other than \c B_OK, if the page couldn't be found or
4016 	paged in. The locking state of the address space and the caches is undefined
4017 	in that case.
4018 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4019 	had to unlock the address space and all caches and is supposed to be called
4020 	again.
4021 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4022 	found. It is returned in \c context.page. The address space will still be
4023 	locked as well as all caches starting from the top cache to at least the
4024 	cache the page lives in.
4025 */
4026 static status_t
4027 fault_get_page(PageFaultContext& context)
4028 {
4029 	VMCache* cache = context.topCache;
4030 	VMCache* lastCache = NULL;
4031 	vm_page* page = NULL;
4032 
4033 	while (cache != NULL) {
4034 		// We already hold the lock of the cache at this point.
4035 
4036 		lastCache = cache;
4037 
4038 		for (;;) {
4039 			page = cache->LookupPage(context.cacheOffset);
4040 			if (page == NULL || !page->busy) {
4041 				// Either there is no page or there is one and it is not busy.
4042 				break;
4043 			}
4044 
4045 			// page must be busy -- wait for it to become unbusy
4046 			context.UnlockAll(cache);
4047 			cache->ReleaseRefLocked();
4048 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4049 
4050 			// restart the whole process
4051 			context.restart = true;
4052 			return B_OK;
4053 		}
4054 
4055 		if (page != NULL)
4056 			break;
4057 
4058 		// The current cache does not contain the page we're looking for.
4059 
4060 		// see if the backing store has it
4061 		if (cache->HasPage(context.cacheOffset)) {
4062 			// insert a fresh page and mark it busy -- we're going to read it in
4063 			page = vm_page_allocate_page(&context.reservation,
4064 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4065 			cache->InsertPage(page, context.cacheOffset);
4066 
4067 			// We need to unlock all caches and the address space while reading
4068 			// the page in. Keep a reference to the cache around.
4069 			cache->AcquireRefLocked();
4070 			context.UnlockAll();
4071 
4072 			// read the page in
4073 			generic_io_vec vec;
4074 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4075 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4076 
4077 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4078 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4079 
4080 			cache->Lock();
4081 
4082 			if (status < B_OK) {
4083 				// on error remove and free the page
4084 				dprintf("reading page from cache %p returned: %s!\n",
4085 					cache, strerror(status));
4086 
4087 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4088 				cache->RemovePage(page);
4089 				vm_page_set_state(page, PAGE_STATE_FREE);
4090 
4091 				cache->ReleaseRefAndUnlock();
4092 				return status;
4093 			}
4094 
4095 			// mark the page unbusy again
4096 			cache->MarkPageUnbusy(page);
4097 
4098 			DEBUG_PAGE_ACCESS_END(page);
4099 
4100 			// Since we needed to unlock everything temporarily, the area
4101 			// situation might have changed. So we need to restart the whole
4102 			// process.
4103 			cache->ReleaseRefAndUnlock();
4104 			context.restart = true;
4105 			return B_OK;
4106 		}
4107 
4108 		cache = context.cacheChainLocker.LockSourceCache();
4109 	}
4110 
4111 	if (page == NULL) {
4112 		// There was no adequate page, determine the cache for a clean one.
4113 		// Read-only pages come in the deepest cache, only the top most cache
4114 		// may have direct write access.
4115 		cache = context.isWrite ? context.topCache : lastCache;
4116 
4117 		// allocate a clean page
4118 		page = vm_page_allocate_page(&context.reservation,
4119 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4120 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4121 			page->physical_page_number));
4122 
4123 		// insert the new page into our cache
4124 		cache->InsertPage(page, context.cacheOffset);
4125 	} else if (page->Cache() != context.topCache && context.isWrite) {
4126 		// We have a page that has the data we want, but in the wrong cache
4127 		// object so we need to copy it and stick it into the top cache.
4128 		vm_page* sourcePage = page;
4129 
4130 		// TODO: If memory is low, it might be a good idea to steal the page
4131 		// from our source cache -- if possible, that is.
4132 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4133 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4134 
4135 		// To not needlessly kill concurrency we unlock all caches but the top
4136 		// one while copying the page. Lacking another mechanism to ensure that
4137 		// the source page doesn't disappear, we mark it busy.
4138 		sourcePage->busy = true;
4139 		context.cacheChainLocker.UnlockKeepRefs(true);
4140 
4141 		// copy the page
4142 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4143 			sourcePage->physical_page_number * B_PAGE_SIZE);
4144 
4145 		context.cacheChainLocker.RelockCaches(true);
4146 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4147 
4148 		// insert the new page into our cache
4149 		context.topCache->InsertPage(page, context.cacheOffset);
4150 	} else
4151 		DEBUG_PAGE_ACCESS_START(page);
4152 
4153 	context.page = page;
4154 	return B_OK;
4155 }
4156 
4157 
4158 /*!	Makes sure the address in the given address space is mapped.
4159 
4160 	\param addressSpace The address space.
4161 	\param originalAddress The address. Doesn't need to be page aligned.
4162 	\param isWrite If \c true the address shall be write-accessible.
4163 	\param isUser If \c true the access is requested by a userland team.
4164 	\param wirePage On success, if non \c NULL, the wired count of the page
4165 		mapped at the given address is incremented and the page is returned
4166 		via this parameter.
4167 	\param wiredRange If given, this wiredRange is ignored when checking whether
4168 		an already mapped page at the virtual address can be unmapped.
4169 	\return \c B_OK on success, another error code otherwise.
4170 */
4171 static status_t
4172 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4173 	bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange)
4174 {
4175 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4176 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4177 
4178 	PageFaultContext context(addressSpace, isWrite);
4179 
4180 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4181 	status_t status = B_OK;
4182 
4183 	addressSpace->IncrementFaultCount();
4184 
4185 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4186 	// the pages upfront makes sure we don't have any cache locked, so that the
4187 	// page daemon/thief can do their job without problems.
4188 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4189 		originalAddress);
4190 	context.addressSpaceLocker.Unlock();
4191 	vm_page_reserve_pages(&context.reservation, reservePages,
4192 		addressSpace == VMAddressSpace::Kernel()
4193 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4194 
4195 	while (true) {
4196 		context.addressSpaceLocker.Lock();
4197 
4198 		// get the area the fault was in
4199 		VMArea* area = addressSpace->LookupArea(address);
4200 		if (area == NULL) {
4201 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4202 				"space\n", originalAddress);
4203 			TPF(PageFaultError(-1,
4204 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4205 			status = B_BAD_ADDRESS;
4206 			break;
4207 		}
4208 
4209 		// check permissions
4210 		uint32 protection = get_area_page_protection(area, address);
4211 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4212 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
4213 				(void*)originalAddress);
4214 			TPF(PageFaultError(area->id,
4215 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4216 			status = B_PERMISSION_DENIED;
4217 			break;
4218 		}
4219 		if (isWrite && (protection
4220 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4221 			dprintf("write access attempted on write-protected area 0x%lx at"
4222 				" %p\n", area->id, (void*)originalAddress);
4223 			TPF(PageFaultError(area->id,
4224 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4225 			status = B_PERMISSION_DENIED;
4226 			break;
4227 		} else if (!isWrite && (protection
4228 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4229 			dprintf("read access attempted on read-protected area 0x%lx at"
4230 				" %p\n", area->id, (void*)originalAddress);
4231 			TPF(PageFaultError(area->id,
4232 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4233 			status = B_PERMISSION_DENIED;
4234 			break;
4235 		}
4236 
4237 		// We have the area, it was a valid access, so let's try to resolve the
4238 		// page fault now.
4239 		// At first, the top most cache from the area is investigated.
4240 
4241 		context.Prepare(vm_area_get_locked_cache(area),
4242 			address - area->Base() + area->cache_offset);
4243 
4244 		// See if this cache has a fault handler -- this will do all the work
4245 		// for us.
4246 		{
4247 			// Note, since the page fault is resolved with interrupts enabled,
4248 			// the fault handler could be called more than once for the same
4249 			// reason -- the store must take this into account.
4250 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4251 			if (status != B_BAD_HANDLER)
4252 				break;
4253 		}
4254 
4255 		// The top most cache has no fault handler, so let's see if the cache or
4256 		// its sources already have the page we're searching for (we're going
4257 		// from top to bottom).
4258 		status = fault_get_page(context);
4259 		if (status != B_OK) {
4260 			TPF(PageFaultError(area->id, status));
4261 			break;
4262 		}
4263 
4264 		if (context.restart)
4265 			continue;
4266 
4267 		// All went fine, all there is left to do is to map the page into the
4268 		// address space.
4269 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4270 			context.page));
4271 
4272 		// If the page doesn't reside in the area's cache, we need to make sure
4273 		// it's mapped in read-only, so that we cannot overwrite someone else's
4274 		// data (copy-on-write)
4275 		uint32 newProtection = protection;
4276 		if (context.page->Cache() != context.topCache && !isWrite)
4277 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4278 
4279 		bool unmapPage = false;
4280 		bool mapPage = true;
4281 
4282 		// check whether there's already a page mapped at the address
4283 		context.map->Lock();
4284 
4285 		phys_addr_t physicalAddress;
4286 		uint32 flags;
4287 		vm_page* mappedPage = NULL;
4288 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4289 			&& (flags & PAGE_PRESENT) != 0
4290 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4291 				!= NULL) {
4292 			// Yep there's already a page. If it's ours, we can simply adjust
4293 			// its protection. Otherwise we have to unmap it.
4294 			if (mappedPage == context.page) {
4295 				context.map->ProtectPage(area, address, newProtection);
4296 					// Note: We assume that ProtectPage() is atomic (i.e.
4297 					// the page isn't temporarily unmapped), otherwise we'd have
4298 					// to make sure it isn't wired.
4299 				mapPage = false;
4300 			} else
4301 				unmapPage = true;
4302 		}
4303 
4304 		context.map->Unlock();
4305 
4306 		if (unmapPage) {
4307 			// If the page is wired, we can't unmap it. Wait until it is unwired
4308 			// again and restart.
4309 			VMAreaUnwiredWaiter waiter;
4310 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4311 					wiredRange)) {
4312 				// unlock everything and wait
4313 				context.UnlockAll();
4314 				waiter.waitEntry.Wait();
4315 				continue;
4316 			}
4317 
4318 			// Note: The mapped page is a page of a lower cache. We are
4319 			// guaranteed to have that cached locked, our new page is a copy of
4320 			// that page, and the page is not busy. The logic for that guarantee
4321 			// is as follows: Since the page is mapped, it must live in the top
4322 			// cache (ruled out above) or any of its lower caches, and there is
4323 			// (was before the new page was inserted) no other page in any
4324 			// cache between the top cache and the page's cache (otherwise that
4325 			// would be mapped instead). That in turn means that our algorithm
4326 			// must have found it and therefore it cannot be busy either.
4327 			DEBUG_PAGE_ACCESS_START(mappedPage);
4328 			unmap_page(area, address);
4329 			DEBUG_PAGE_ACCESS_END(mappedPage);
4330 		}
4331 
4332 		if (mapPage) {
4333 			if (map_page(area, context.page, address, newProtection,
4334 					&context.reservation) != B_OK) {
4335 				// Mapping can only fail, when the page mapping object couldn't
4336 				// be allocated. Save for the missing mapping everything is
4337 				// fine, though. If this was a regular page fault, we'll simply
4338 				// leave and probably fault again. To make sure we'll have more
4339 				// luck then, we ensure that the minimum object reserve is
4340 				// available.
4341 				DEBUG_PAGE_ACCESS_END(context.page);
4342 
4343 				context.UnlockAll();
4344 
4345 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4346 						!= B_OK) {
4347 					// Apparently the situation is serious. Let's get ourselves
4348 					// killed.
4349 					status = B_NO_MEMORY;
4350 				} else if (wirePage != NULL) {
4351 					// The caller expects us to wire the page. Since
4352 					// object_cache_reserve() succeeded, we should now be able
4353 					// to allocate a mapping structure. Restart.
4354 					continue;
4355 				}
4356 
4357 				break;
4358 			}
4359 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4360 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4361 
4362 		// also wire the page, if requested
4363 		if (wirePage != NULL && status == B_OK) {
4364 			increment_page_wired_count(context.page);
4365 			*wirePage = context.page;
4366 		}
4367 
4368 		DEBUG_PAGE_ACCESS_END(context.page);
4369 
4370 		break;
4371 	}
4372 
4373 	return status;
4374 }
4375 
4376 
4377 status_t
4378 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4379 {
4380 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4381 }
4382 
4383 status_t
4384 vm_put_physical_page(addr_t vaddr, void* handle)
4385 {
4386 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4387 }
4388 
4389 
4390 status_t
4391 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4392 	void** _handle)
4393 {
4394 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4395 }
4396 
4397 status_t
4398 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4399 {
4400 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4401 }
4402 
4403 
4404 status_t
4405 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4406 {
4407 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4408 }
4409 
4410 status_t
4411 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4412 {
4413 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4414 }
4415 
4416 
4417 void
4418 vm_get_info(system_memory_info* info)
4419 {
4420 	swap_get_info(info);
4421 
4422 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4423 	info->page_faults = sPageFaults;
4424 
4425 	MutexLocker locker(sAvailableMemoryLock);
4426 	info->free_memory = sAvailableMemory;
4427 	info->needed_memory = sNeededMemory;
4428 }
4429 
4430 
4431 uint32
4432 vm_num_page_faults(void)
4433 {
4434 	return sPageFaults;
4435 }
4436 
4437 
4438 off_t
4439 vm_available_memory(void)
4440 {
4441 	MutexLocker locker(sAvailableMemoryLock);
4442 	return sAvailableMemory;
4443 }
4444 
4445 
4446 off_t
4447 vm_available_not_needed_memory(void)
4448 {
4449 	MutexLocker locker(sAvailableMemoryLock);
4450 	return sAvailableMemory - sNeededMemory;
4451 }
4452 
4453 
4454 size_t
4455 vm_kernel_address_space_left(void)
4456 {
4457 	return VMAddressSpace::Kernel()->FreeSpace();
4458 }
4459 
4460 
4461 void
4462 vm_unreserve_memory(size_t amount)
4463 {
4464 	mutex_lock(&sAvailableMemoryLock);
4465 
4466 	sAvailableMemory += amount;
4467 
4468 	mutex_unlock(&sAvailableMemoryLock);
4469 }
4470 
4471 
4472 status_t
4473 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4474 {
4475 	size_t reserve = kMemoryReserveForPriority[priority];
4476 
4477 	MutexLocker locker(sAvailableMemoryLock);
4478 
4479 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4480 
4481 	if (sAvailableMemory >= amount + reserve) {
4482 		sAvailableMemory -= amount;
4483 		return B_OK;
4484 	}
4485 
4486 	if (timeout <= 0)
4487 		return B_NO_MEMORY;
4488 
4489 	// turn timeout into an absolute timeout
4490 	timeout += system_time();
4491 
4492 	// loop until we've got the memory or the timeout occurs
4493 	do {
4494 		sNeededMemory += amount;
4495 
4496 		// call the low resource manager
4497 		locker.Unlock();
4498 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4499 			B_ABSOLUTE_TIMEOUT, timeout);
4500 		locker.Lock();
4501 
4502 		sNeededMemory -= amount;
4503 
4504 		if (sAvailableMemory >= amount + reserve) {
4505 			sAvailableMemory -= amount;
4506 			return B_OK;
4507 		}
4508 	} while (timeout > system_time());
4509 
4510 	return B_NO_MEMORY;
4511 }
4512 
4513 
4514 status_t
4515 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4516 {
4517 	// NOTE: The caller is responsible for synchronizing calls to this function!
4518 
4519 	AddressSpaceReadLocker locker;
4520 	VMArea* area;
4521 	status_t status = locker.SetFromArea(id, area);
4522 	if (status != B_OK)
4523 		return status;
4524 
4525 	// nothing to do, if the type doesn't change
4526 	uint32 oldType = area->MemoryType();
4527 	if (type == oldType)
4528 		return B_OK;
4529 
4530 	// set the memory type of the area and the mapped pages
4531 	VMTranslationMap* map = area->address_space->TranslationMap();
4532 	map->Lock();
4533 	area->SetMemoryType(type);
4534 	map->ProtectArea(area, area->protection);
4535 	map->Unlock();
4536 
4537 	// set the physical memory type
4538 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4539 	if (error != B_OK) {
4540 		// reset the memory type of the area and the mapped pages
4541 		map->Lock();
4542 		area->SetMemoryType(oldType);
4543 		map->ProtectArea(area, area->protection);
4544 		map->Unlock();
4545 		return error;
4546 	}
4547 
4548 	return B_OK;
4549 
4550 }
4551 
4552 
4553 /*!	This function enforces some protection properties:
4554 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4555 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4556 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4557 	   and B_KERNEL_WRITE_AREA.
4558 */
4559 static void
4560 fix_protection(uint32* protection)
4561 {
4562 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4563 		if ((*protection & B_USER_PROTECTION) == 0
4564 			|| (*protection & B_WRITE_AREA) != 0)
4565 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4566 		else
4567 			*protection |= B_KERNEL_READ_AREA;
4568 	}
4569 }
4570 
4571 
4572 static void
4573 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4574 {
4575 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4576 	info->area = area->id;
4577 	info->address = (void*)area->Base();
4578 	info->size = area->Size();
4579 	info->protection = area->protection;
4580 	info->lock = B_FULL_LOCK;
4581 	info->team = area->address_space->ID();
4582 	info->copy_count = 0;
4583 	info->in_count = 0;
4584 	info->out_count = 0;
4585 		// TODO: retrieve real values here!
4586 
4587 	VMCache* cache = vm_area_get_locked_cache(area);
4588 
4589 	// Note, this is a simplification; the cache could be larger than this area
4590 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4591 
4592 	vm_area_put_locked_cache(cache);
4593 }
4594 
4595 
4596 static status_t
4597 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4598 {
4599 	// is newSize a multiple of B_PAGE_SIZE?
4600 	if (newSize & (B_PAGE_SIZE - 1))
4601 		return B_BAD_VALUE;
4602 
4603 	// lock all affected address spaces and the cache
4604 	VMArea* area;
4605 	VMCache* cache;
4606 
4607 	MultiAddressSpaceLocker locker;
4608 	AreaCacheLocker cacheLocker;
4609 
4610 	status_t status;
4611 	size_t oldSize;
4612 	bool anyKernelArea;
4613 	bool restart;
4614 
4615 	do {
4616 		anyKernelArea = false;
4617 		restart = false;
4618 
4619 		locker.Unset();
4620 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4621 		if (status != B_OK)
4622 			return status;
4623 		cacheLocker.SetTo(cache, true);	// already locked
4624 
4625 		// enforce restrictions
4626 		if (!kernel) {
4627 			if ((area->protection & B_KERNEL_AREA) != 0)
4628 				return B_NOT_ALLOWED;
4629 			// TODO: Enforce all restrictions (team, etc.)!
4630 		}
4631 
4632 		oldSize = area->Size();
4633 		if (newSize == oldSize)
4634 			return B_OK;
4635 
4636 		if (cache->type != CACHE_TYPE_RAM)
4637 			return B_NOT_ALLOWED;
4638 
4639 		if (oldSize < newSize) {
4640 			// We need to check if all areas of this cache can be resized.
4641 			for (VMArea* current = cache->areas; current != NULL;
4642 					current = current->cache_next) {
4643 				if (!current->address_space->CanResizeArea(current, newSize))
4644 					return B_ERROR;
4645 				anyKernelArea
4646 					|= current->address_space == VMAddressSpace::Kernel();
4647 			}
4648 		} else {
4649 			// We're shrinking the areas, so we must make sure the affected
4650 			// ranges are not wired.
4651 			for (VMArea* current = cache->areas; current != NULL;
4652 					current = current->cache_next) {
4653 				anyKernelArea
4654 					|= current->address_space == VMAddressSpace::Kernel();
4655 
4656 				if (wait_if_area_range_is_wired(current,
4657 						current->Base() + newSize, oldSize - newSize, &locker,
4658 						&cacheLocker)) {
4659 					restart = true;
4660 					break;
4661 				}
4662 			}
4663 		}
4664 	} while (restart);
4665 
4666 	// Okay, looks good so far, so let's do it
4667 
4668 	int priority = kernel && anyKernelArea
4669 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4670 	uint32 allocationFlags = kernel && anyKernelArea
4671 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4672 
4673 	if (oldSize < newSize) {
4674 		// Growing the cache can fail, so we do it first.
4675 		status = cache->Resize(cache->virtual_base + newSize, priority);
4676 		if (status != B_OK)
4677 			return status;
4678 	}
4679 
4680 	for (VMArea* current = cache->areas; current != NULL;
4681 			current = current->cache_next) {
4682 		status = current->address_space->ResizeArea(current, newSize,
4683 			allocationFlags);
4684 		if (status != B_OK)
4685 			break;
4686 
4687 		// We also need to unmap all pages beyond the new size, if the area has
4688 		// shrunk
4689 		if (newSize < oldSize) {
4690 			VMCacheChainLocker cacheChainLocker(cache);
4691 			cacheChainLocker.LockAllSourceCaches();
4692 
4693 			unmap_pages(current, current->Base() + newSize,
4694 				oldSize - newSize);
4695 
4696 			cacheChainLocker.Unlock(cache);
4697 		}
4698 	}
4699 
4700 	// shrinking the cache can't fail, so we do it now
4701 	if (status == B_OK && newSize < oldSize)
4702 		status = cache->Resize(cache->virtual_base + newSize, priority);
4703 
4704 	if (status != B_OK) {
4705 		// Something failed -- resize the areas back to their original size.
4706 		// This can fail, too, in which case we're seriously screwed.
4707 		for (VMArea* current = cache->areas; current != NULL;
4708 				current = current->cache_next) {
4709 			if (current->address_space->ResizeArea(current, oldSize,
4710 					allocationFlags) != B_OK) {
4711 				panic("vm_resize_area(): Failed and not being able to restore "
4712 					"original state.");
4713 			}
4714 		}
4715 
4716 		cache->Resize(cache->virtual_base + oldSize, priority);
4717 	}
4718 
4719 	// TODO: we must honour the lock restrictions of this area
4720 	return status;
4721 }
4722 
4723 
4724 status_t
4725 vm_memset_physical(phys_addr_t address, int value, size_t length)
4726 {
4727 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4728 }
4729 
4730 
4731 status_t
4732 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
4733 {
4734 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4735 }
4736 
4737 
4738 status_t
4739 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
4740 	bool user)
4741 {
4742 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4743 }
4744 
4745 
4746 void
4747 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
4748 {
4749 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4750 }
4751 
4752 
4753 /*!	Copies a range of memory directly from/to a page that might not be mapped
4754 	at the moment.
4755 
4756 	For \a unsafeMemory the current mapping (if any is ignored). The function
4757 	walks through the respective area's cache chain to find the physical page
4758 	and copies from/to it directly.
4759 	The memory range starting at \a unsafeMemory with a length of \a size bytes
4760 	must not cross a page boundary.
4761 
4762 	\param teamID The team ID identifying the address space \a unsafeMemory is
4763 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
4764 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
4765 		is passed, the address space of the thread returned by
4766 		debug_get_debugged_thread() is used.
4767 	\param unsafeMemory The start of the unsafe memory range to be copied
4768 		from/to.
4769 	\param buffer A safely accessible kernel buffer to be copied from/to.
4770 	\param size The number of bytes to be copied.
4771 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
4772 		\a unsafeMemory, the other way around otherwise.
4773 */
4774 status_t
4775 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
4776 	size_t size, bool copyToUnsafe)
4777 {
4778 	if (size > B_PAGE_SIZE
4779 			|| ((addr_t)unsafeMemory + size) % B_PAGE_SIZE < size) {
4780 		return B_BAD_VALUE;
4781 	}
4782 
4783 	// get the address space for the debugged thread
4784 	VMAddressSpace* addressSpace;
4785 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
4786 		addressSpace = VMAddressSpace::Kernel();
4787 	} else if (teamID == B_CURRENT_TEAM) {
4788 		struct thread* thread = debug_get_debugged_thread();
4789 		if (thread == NULL || thread->team == NULL)
4790 			return B_BAD_ADDRESS;
4791 
4792 		addressSpace = thread->team->address_space;
4793 	} else
4794 		addressSpace = VMAddressSpace::DebugGet(teamID);
4795 
4796 	if (addressSpace == NULL)
4797 		return B_BAD_ADDRESS;
4798 
4799 	// get the area
4800 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
4801 	if (area == NULL)
4802 		return B_BAD_ADDRESS;
4803 
4804 	// search the page
4805 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
4806 		+ area->cache_offset;
4807 	VMCache* cache = area->cache;
4808 	vm_page* page = NULL;
4809 	while (cache != NULL) {
4810 		page = cache->DebugLookupPage(cacheOffset);
4811 		if (page != NULL)
4812 			break;
4813 
4814 		// Page not found in this cache -- if it is paged out, we must not try
4815 		// to get it from lower caches.
4816 		if (cache->DebugHasPage(cacheOffset))
4817 			break;
4818 
4819 		cache = cache->source;
4820 	}
4821 
4822 	if (page == NULL)
4823 		return B_UNSUPPORTED;
4824 
4825 	// copy from/to physical memory
4826 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
4827 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
4828 
4829 	if (copyToUnsafe) {
4830 		if (page->Cache() != area->cache)
4831 			return B_UNSUPPORTED;
4832 
4833 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
4834 	}
4835 
4836 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
4837 }
4838 
4839 
4840 //	#pragma mark - kernel public API
4841 
4842 
4843 status_t
4844 user_memcpy(void* to, const void* from, size_t size)
4845 {
4846 	// don't allow address overflows
4847 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
4848 		return B_BAD_ADDRESS;
4849 
4850 	if (arch_cpu_user_memcpy(to, from, size,
4851 			&thread_get_current_thread()->fault_handler) < B_OK)
4852 		return B_BAD_ADDRESS;
4853 
4854 	return B_OK;
4855 }
4856 
4857 
4858 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
4859 	the string in \a to, NULL-terminating the result.
4860 
4861 	\param to Pointer to the destination C-string.
4862 	\param from Pointer to the source C-string.
4863 	\param size Size in bytes of the string buffer pointed to by \a to.
4864 
4865 	\return strlen(\a from).
4866 */
4867 ssize_t
4868 user_strlcpy(char* to, const char* from, size_t size)
4869 {
4870 	if (to == NULL && size != 0)
4871 		return B_BAD_VALUE;
4872 	if (from == NULL)
4873 		return B_BAD_ADDRESS;
4874 
4875 	// limit size to avoid address overflows
4876 	size_t maxSize = std::min(size,
4877 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
4878 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
4879 		// the source address might still overflow.
4880 
4881 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
4882 		&thread_get_current_thread()->fault_handler);
4883 
4884 	// If we hit the address overflow boundary, fail.
4885 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
4886 		return B_BAD_ADDRESS;
4887 
4888 	return result;
4889 }
4890 
4891 
4892 status_t
4893 user_memset(void* s, char c, size_t count)
4894 {
4895 	// don't allow address overflows
4896 	if ((addr_t)s + count < (addr_t)s)
4897 		return B_BAD_ADDRESS;
4898 
4899 	if (arch_cpu_user_memset(s, c, count,
4900 			&thread_get_current_thread()->fault_handler) < B_OK)
4901 		return B_BAD_ADDRESS;
4902 
4903 	return B_OK;
4904 }
4905 
4906 
4907 /*!	Wires a single page at the given address.
4908 
4909 	\param team The team whose address space the address belongs to. Supports
4910 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
4911 		parameter is ignored.
4912 	\param address address The virtual address to wire down. Does not need to
4913 		be page aligned.
4914 	\param writable If \c true the page shall be writable.
4915 	\param info On success the info is filled in, among other things
4916 		containing the physical address the given virtual one translates to.
4917 	\return \c B_OK, when the page could be wired, another error code otherwise.
4918 */
4919 status_t
4920 vm_wire_page(team_id team, addr_t address, bool writable,
4921 	VMPageWiringInfo* info)
4922 {
4923 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
4924 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
4925 
4926 	// compute the page protection that is required
4927 	bool isUser = IS_USER_ADDRESS(address);
4928 	uint32 requiredProtection = PAGE_PRESENT
4929 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
4930 	if (writable)
4931 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
4932 
4933 	// get and read lock the address space
4934 	VMAddressSpace* addressSpace = NULL;
4935 	if (isUser) {
4936 		if (team == B_CURRENT_TEAM)
4937 			addressSpace = VMAddressSpace::GetCurrent();
4938 		else
4939 			addressSpace = VMAddressSpace::Get(team);
4940 	} else
4941 		addressSpace = VMAddressSpace::GetKernel();
4942 	if (addressSpace == NULL)
4943 		return B_ERROR;
4944 
4945 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
4946 
4947 	VMTranslationMap* map = addressSpace->TranslationMap();
4948 	status_t error = B_OK;
4949 
4950 	// get the area
4951 	VMArea* area = addressSpace->LookupArea(pageAddress);
4952 	if (area == NULL) {
4953 		addressSpace->Put();
4954 		return B_BAD_ADDRESS;
4955 	}
4956 
4957 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
4958 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
4959 
4960 	// mark the area range wired
4961 	area->Wire(&info->range);
4962 
4963 	// Lock the area's cache chain and the translation map. Needed to look
4964 	// up the page and play with its wired count.
4965 	cacheChainLocker.LockAllSourceCaches();
4966 	map->Lock();
4967 
4968 	phys_addr_t physicalAddress;
4969 	uint32 flags;
4970 	vm_page* page;
4971 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
4972 		&& (flags & requiredProtection) == requiredProtection
4973 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4974 			!= NULL) {
4975 		// Already mapped with the correct permissions -- just increment
4976 		// the page's wired count.
4977 		increment_page_wired_count(page);
4978 
4979 		map->Unlock();
4980 		cacheChainLocker.Unlock();
4981 		addressSpaceLocker.Unlock();
4982 	} else {
4983 		// Let vm_soft_fault() map the page for us, if possible. We need
4984 		// to fully unlock to avoid deadlocks. Since we have already
4985 		// wired the area itself, nothing disturbing will happen with it
4986 		// in the meantime.
4987 		map->Unlock();
4988 		cacheChainLocker.Unlock();
4989 		addressSpaceLocker.Unlock();
4990 
4991 		error = vm_soft_fault(addressSpace, pageAddress, writable, isUser,
4992 			&page, &info->range);
4993 
4994 		if (error != B_OK) {
4995 			// The page could not be mapped -- clean up.
4996 			VMCache* cache = vm_area_get_locked_cache(area);
4997 			area->Unwire(&info->range);
4998 			cache->ReleaseRefAndUnlock();
4999 			addressSpace->Put();
5000 			return error;
5001 		}
5002 	}
5003 
5004 	info->physicalAddress
5005 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5006 			+ address % B_PAGE_SIZE;
5007 	info->page = page;
5008 
5009 	return B_OK;
5010 }
5011 
5012 
5013 /*!	Unwires a single page previously wired via vm_wire_page().
5014 
5015 	\param info The same object passed to vm_wire_page() before.
5016 */
5017 void
5018 vm_unwire_page(VMPageWiringInfo* info)
5019 {
5020 	// lock the address space
5021 	VMArea* area = info->range.area;
5022 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5023 		// takes over our reference
5024 
5025 	// lock the top cache
5026 	VMCache* cache = vm_area_get_locked_cache(area);
5027 	VMCacheChainLocker cacheChainLocker(cache);
5028 
5029 	if (info->page->Cache() != cache) {
5030 		// The page is not in the top cache, so we lock the whole cache chain
5031 		// before touching the page's wired count.
5032 		cacheChainLocker.LockAllSourceCaches();
5033 	}
5034 
5035 	decrement_page_wired_count(info->page);
5036 
5037 	// remove the wired range from the range
5038 	area->Unwire(&info->range);
5039 
5040 	cacheChainLocker.Unlock();
5041 }
5042 
5043 
5044 /*!	Wires down the given address range in the specified team's address space.
5045 
5046 	If successful the function
5047 	- acquires a reference to the specified team's address space,
5048 	- adds respective wired ranges to all areas that intersect with the given
5049 	  address range,
5050 	- makes sure all pages in the given address range are mapped with the
5051 	  requested access permissions and increments their wired count.
5052 
5053 	It fails, when \a team doesn't specify a valid address space, when any part
5054 	of the specified address range is not covered by areas, when the concerned
5055 	areas don't allow mapping with the requested permissions, or when mapping
5056 	failed for another reason.
5057 
5058 	When successful the call must be balanced by a unlock_memory_etc() call with
5059 	the exact same parameters.
5060 
5061 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5062 		supported.
5063 	\param address The start of the address range to be wired.
5064 	\param numBytes The size of the address range to be wired.
5065 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5066 		requests that the range must be wired writable ("read from device
5067 		into memory").
5068 	\return \c B_OK on success, another error code otherwise.
5069 */
5070 status_t
5071 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5072 {
5073 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5074 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5075 
5076 	// compute the page protection that is required
5077 	bool isUser = IS_USER_ADDRESS(address);
5078 	bool writable = (flags & B_READ_DEVICE) == 0;
5079 	uint32 requiredProtection = PAGE_PRESENT
5080 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5081 	if (writable)
5082 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5083 
5084 	uint32 mallocFlags = isUser
5085 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5086 
5087 	// get and read lock the address space
5088 	VMAddressSpace* addressSpace = NULL;
5089 	if (isUser) {
5090 		if (team == B_CURRENT_TEAM)
5091 			addressSpace = VMAddressSpace::GetCurrent();
5092 		else
5093 			addressSpace = VMAddressSpace::Get(team);
5094 	} else
5095 		addressSpace = VMAddressSpace::GetKernel();
5096 	if (addressSpace == NULL)
5097 		return B_ERROR;
5098 
5099 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5100 
5101 	VMTranslationMap* map = addressSpace->TranslationMap();
5102 	status_t error = B_OK;
5103 
5104 	// iterate through all concerned areas
5105 	addr_t nextAddress = lockBaseAddress;
5106 	while (nextAddress != lockEndAddress) {
5107 		// get the next area
5108 		VMArea* area = addressSpace->LookupArea(nextAddress);
5109 		if (area == NULL) {
5110 			error = B_BAD_ADDRESS;
5111 			break;
5112 		}
5113 
5114 		addr_t areaStart = nextAddress;
5115 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5116 
5117 		// allocate the wired range (do that before locking the cache to avoid
5118 		// deadlocks)
5119 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5120 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5121 		if (range == NULL) {
5122 			error = B_NO_MEMORY;
5123 			break;
5124 		}
5125 
5126 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5127 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5128 
5129 		// mark the area range wired
5130 		area->Wire(range);
5131 
5132 		// Depending on the area cache type and the wiring, we may not need to
5133 		// look at the individual pages.
5134 		if (area->cache_type == CACHE_TYPE_NULL
5135 			|| area->cache_type == CACHE_TYPE_DEVICE
5136 			|| area->wiring == B_FULL_LOCK
5137 			|| area->wiring == B_CONTIGUOUS) {
5138 			nextAddress = areaEnd;
5139 			continue;
5140 		}
5141 
5142 		// Lock the area's cache chain and the translation map. Needed to look
5143 		// up pages and play with their wired count.
5144 		cacheChainLocker.LockAllSourceCaches();
5145 		map->Lock();
5146 
5147 		// iterate through the pages and wire them
5148 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5149 			phys_addr_t physicalAddress;
5150 			uint32 flags;
5151 
5152 			vm_page* page;
5153 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5154 				&& (flags & requiredProtection) == requiredProtection
5155 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5156 					!= NULL) {
5157 				// Already mapped with the correct permissions -- just increment
5158 				// the page's wired count.
5159 				increment_page_wired_count(page);
5160 			} else {
5161 				// Let vm_soft_fault() map the page for us, if possible. We need
5162 				// to fully unlock to avoid deadlocks. Since we have already
5163 				// wired the area itself, nothing disturbing will happen with it
5164 				// in the meantime.
5165 				map->Unlock();
5166 				cacheChainLocker.Unlock();
5167 				addressSpaceLocker.Unlock();
5168 
5169 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5170 					isUser, &page, range);
5171 
5172 				addressSpaceLocker.Lock();
5173 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5174 				cacheChainLocker.LockAllSourceCaches();
5175 				map->Lock();
5176 			}
5177 
5178 			if (error != B_OK)
5179 				break;
5180 		}
5181 
5182 		map->Unlock();
5183 
5184 		if (error == B_OK) {
5185 			cacheChainLocker.Unlock();
5186 		} else {
5187 			// An error occurred, so abort right here. If the current address
5188 			// is the first in this area, unwire the area, since we won't get
5189 			// to it when reverting what we've done so far.
5190 			if (nextAddress == areaStart) {
5191 				area->Unwire(range);
5192 				cacheChainLocker.Unlock();
5193 				range->~VMAreaWiredRange();
5194 				free_etc(range, mallocFlags);
5195 			} else
5196 				cacheChainLocker.Unlock();
5197 
5198 			break;
5199 		}
5200 	}
5201 
5202 	if (error != B_OK) {
5203 		// An error occurred, so unwire all that we've already wired. Note that
5204 		// even if not a single page was wired, unlock_memory_etc() is called
5205 		// to put the address space reference.
5206 		addressSpaceLocker.Unlock();
5207 		unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress,
5208 			flags);
5209 	}
5210 
5211 	return error;
5212 }
5213 
5214 
5215 status_t
5216 lock_memory(void* address, size_t numBytes, uint32 flags)
5217 {
5218 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5219 }
5220 
5221 
5222 /*!	Unwires an address range previously wired with lock_memory_etc().
5223 
5224 	Note that a call to this function must balance a previous lock_memory_etc()
5225 	call with exactly the same parameters.
5226 */
5227 status_t
5228 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5229 {
5230 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5231 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5232 
5233 	// compute the page protection that is required
5234 	bool isUser = IS_USER_ADDRESS(address);
5235 	bool writable = (flags & B_READ_DEVICE) == 0;
5236 	uint32 requiredProtection = PAGE_PRESENT
5237 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5238 	if (writable)
5239 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5240 
5241 	uint32 mallocFlags = isUser
5242 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5243 
5244 	// get and read lock the address space
5245 	VMAddressSpace* addressSpace = NULL;
5246 	if (isUser) {
5247 		if (team == B_CURRENT_TEAM)
5248 			addressSpace = VMAddressSpace::GetCurrent();
5249 		else
5250 			addressSpace = VMAddressSpace::Get(team);
5251 	} else
5252 		addressSpace = VMAddressSpace::GetKernel();
5253 	if (addressSpace == NULL)
5254 		return B_ERROR;
5255 
5256 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5257 
5258 	VMTranslationMap* map = addressSpace->TranslationMap();
5259 	status_t error = B_OK;
5260 
5261 	// iterate through all concerned areas
5262 	addr_t nextAddress = lockBaseAddress;
5263 	while (nextAddress != lockEndAddress) {
5264 		// get the next area
5265 		VMArea* area = addressSpace->LookupArea(nextAddress);
5266 		if (area == NULL) {
5267 			error = B_BAD_ADDRESS;
5268 			break;
5269 		}
5270 
5271 		addr_t areaStart = nextAddress;
5272 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5273 
5274 		// Lock the area's top cache. This is a requirement for
5275 		// VMArea::Unwire().
5276 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5277 
5278 		// Depending on the area cache type and the wiring, we may not need to
5279 		// look at the individual pages.
5280 		if (area->cache_type == CACHE_TYPE_NULL
5281 			|| area->cache_type == CACHE_TYPE_DEVICE
5282 			|| area->wiring == B_FULL_LOCK
5283 			|| area->wiring == B_CONTIGUOUS) {
5284 			// unwire the range (to avoid deadlocks we delete the range after
5285 			// unlocking the cache)
5286 			nextAddress = areaEnd;
5287 			VMAreaWiredRange* range = area->Unwire(areaStart,
5288 				areaEnd - areaStart, writable);
5289 			cacheChainLocker.Unlock();
5290 			if (range != NULL) {
5291 				range->~VMAreaWiredRange();
5292 				free_etc(range, mallocFlags);
5293 			}
5294 			continue;
5295 		}
5296 
5297 		// Lock the area's cache chain and the translation map. Needed to look
5298 		// up pages and play with their wired count.
5299 		cacheChainLocker.LockAllSourceCaches();
5300 		map->Lock();
5301 
5302 		// iterate through the pages and unwire them
5303 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5304 			phys_addr_t physicalAddress;
5305 			uint32 flags;
5306 
5307 			vm_page* page;
5308 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5309 				&& (flags & PAGE_PRESENT) != 0
5310 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5311 					!= NULL) {
5312 				// Already mapped with the correct permissions -- just increment
5313 				// the page's wired count.
5314 				decrement_page_wired_count(page);
5315 			} else {
5316 				panic("unlock_memory_etc(): Failed to unwire page: address "
5317 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5318 					nextAddress);
5319 				error = B_BAD_VALUE;
5320 				break;
5321 			}
5322 		}
5323 
5324 		map->Unlock();
5325 
5326 		// All pages are unwired. Remove the area's wired range as well (to
5327 		// avoid deadlocks we delete the range after unlocking the cache).
5328 		VMAreaWiredRange* range = area->Unwire(areaStart,
5329 			areaEnd - areaStart, writable);
5330 
5331 		cacheChainLocker.Unlock();
5332 
5333 		if (range != NULL) {
5334 			range->~VMAreaWiredRange();
5335 			free_etc(range, mallocFlags);
5336 		}
5337 
5338 		if (error != B_OK)
5339 			break;
5340 	}
5341 
5342 	// get rid of the address space reference
5343 	addressSpace->Put();
5344 
5345 	return error;
5346 }
5347 
5348 
5349 status_t
5350 unlock_memory(void* address, size_t numBytes, uint32 flags)
5351 {
5352 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5353 }
5354 
5355 
5356 /*!	Similar to get_memory_map(), but also allows to specify the address space
5357 	for the memory in question and has a saner semantics.
5358 	Returns \c B_OK when the complete range could be translated or
5359 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5360 	case the actual number of entries is written to \c *_numEntries. Any other
5361 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5362 	in this case.
5363 */
5364 status_t
5365 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5366 	physical_entry* table, uint32* _numEntries)
5367 {
5368 	uint32 numEntries = *_numEntries;
5369 	*_numEntries = 0;
5370 
5371 	VMAddressSpace* addressSpace;
5372 	addr_t virtualAddress = (addr_t)address;
5373 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5374 	phys_addr_t physicalAddress;
5375 	status_t status = B_OK;
5376 	int32 index = -1;
5377 	addr_t offset = 0;
5378 	bool interrupts = are_interrupts_enabled();
5379 
5380 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5381 		address, numBytes, numEntries));
5382 
5383 	if (numEntries == 0 || numBytes == 0)
5384 		return B_BAD_VALUE;
5385 
5386 	// in which address space is the address to be found?
5387 	if (IS_USER_ADDRESS(virtualAddress)) {
5388 		if (team == B_CURRENT_TEAM)
5389 			addressSpace = VMAddressSpace::GetCurrent();
5390 		else
5391 			addressSpace = VMAddressSpace::Get(team);
5392 	} else
5393 		addressSpace = VMAddressSpace::GetKernel();
5394 
5395 	if (addressSpace == NULL)
5396 		return B_ERROR;
5397 
5398 	VMTranslationMap* map = addressSpace->TranslationMap();
5399 
5400 	if (interrupts)
5401 		map->Lock();
5402 
5403 	while (offset < numBytes) {
5404 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5405 		uint32 flags;
5406 
5407 		if (interrupts) {
5408 			status = map->Query((addr_t)address + offset, &physicalAddress,
5409 				&flags);
5410 		} else {
5411 			status = map->QueryInterrupt((addr_t)address + offset,
5412 				&physicalAddress, &flags);
5413 		}
5414 		if (status < B_OK)
5415 			break;
5416 		if ((flags & PAGE_PRESENT) == 0) {
5417 			panic("get_memory_map() called on unmapped memory!");
5418 			return B_BAD_ADDRESS;
5419 		}
5420 
5421 		if (index < 0 && pageOffset > 0) {
5422 			physicalAddress += pageOffset;
5423 			if (bytes > B_PAGE_SIZE - pageOffset)
5424 				bytes = B_PAGE_SIZE - pageOffset;
5425 		}
5426 
5427 		// need to switch to the next physical_entry?
5428 		if (index < 0 || table[index].address
5429 				!= physicalAddress - table[index].size) {
5430 			if ((uint32)++index + 1 > numEntries) {
5431 				// table to small
5432 				break;
5433 			}
5434 			table[index].address = physicalAddress;
5435 			table[index].size = bytes;
5436 		} else {
5437 			// page does fit in current entry
5438 			table[index].size += bytes;
5439 		}
5440 
5441 		offset += bytes;
5442 	}
5443 
5444 	if (interrupts)
5445 		map->Unlock();
5446 
5447 	if (status != B_OK)
5448 		return status;
5449 
5450 	if ((uint32)index + 1 > numEntries) {
5451 		*_numEntries = index;
5452 		return B_BUFFER_OVERFLOW;
5453 	}
5454 
5455 	*_numEntries = index + 1;
5456 	return B_OK;
5457 }
5458 
5459 
5460 /*!	According to the BeBook, this function should always succeed.
5461 	This is no longer the case.
5462 */
5463 long
5464 get_memory_map(const void* address, ulong numBytes, physical_entry* table,
5465 	long numEntries)
5466 {
5467 	uint32 entriesRead = numEntries;
5468 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5469 		table, &entriesRead);
5470 	if (error != B_OK)
5471 		return error;
5472 
5473 	// close the entry list
5474 
5475 	// if it's only one entry, we will silently accept the missing ending
5476 	if (numEntries == 1)
5477 		return B_OK;
5478 
5479 	if (entriesRead + 1 > (uint32)numEntries)
5480 		return B_BUFFER_OVERFLOW;
5481 
5482 	table[entriesRead].address = 0;
5483 	table[entriesRead].size = 0;
5484 
5485 	return B_OK;
5486 }
5487 
5488 
5489 area_id
5490 area_for(void* address)
5491 {
5492 	return vm_area_for((addr_t)address, true);
5493 }
5494 
5495 
5496 area_id
5497 find_area(const char* name)
5498 {
5499 	return VMAreaHash::Find(name);
5500 }
5501 
5502 
5503 status_t
5504 _get_area_info(area_id id, area_info* info, size_t size)
5505 {
5506 	if (size != sizeof(area_info) || info == NULL)
5507 		return B_BAD_VALUE;
5508 
5509 	AddressSpaceReadLocker locker;
5510 	VMArea* area;
5511 	status_t status = locker.SetFromArea(id, area);
5512 	if (status != B_OK)
5513 		return status;
5514 
5515 	fill_area_info(area, info, size);
5516 	return B_OK;
5517 }
5518 
5519 
5520 status_t
5521 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5522 {
5523 	addr_t nextBase = *(addr_t*)cookie;
5524 
5525 	// we're already through the list
5526 	if (nextBase == (addr_t)-1)
5527 		return B_ENTRY_NOT_FOUND;
5528 
5529 	if (team == B_CURRENT_TEAM)
5530 		team = team_get_current_team_id();
5531 
5532 	AddressSpaceReadLocker locker(team);
5533 	if (!locker.IsLocked())
5534 		return B_BAD_TEAM_ID;
5535 
5536 	VMArea* area;
5537 	for (VMAddressSpace::AreaIterator it
5538 				= locker.AddressSpace()->GetAreaIterator();
5539 			(area = it.Next()) != NULL;) {
5540 		if (area->Base() > nextBase)
5541 			break;
5542 	}
5543 
5544 	if (area == NULL) {
5545 		nextBase = (addr_t)-1;
5546 		return B_ENTRY_NOT_FOUND;
5547 	}
5548 
5549 	fill_area_info(area, info, size);
5550 	*cookie = (int32)(area->Base());
5551 		// TODO: Not 64 bit safe!
5552 
5553 	return B_OK;
5554 }
5555 
5556 
5557 status_t
5558 set_area_protection(area_id area, uint32 newProtection)
5559 {
5560 	fix_protection(&newProtection);
5561 
5562 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5563 		newProtection, true);
5564 }
5565 
5566 
5567 status_t
5568 resize_area(area_id areaID, size_t newSize)
5569 {
5570 	return vm_resize_area(areaID, newSize, true);
5571 }
5572 
5573 
5574 /*!	Transfers the specified area to a new team. The caller must be the owner
5575 	of the area.
5576 */
5577 area_id
5578 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5579 	bool kernel)
5580 {
5581 	area_info info;
5582 	status_t status = get_area_info(id, &info);
5583 	if (status != B_OK)
5584 		return status;
5585 
5586 	if (info.team != thread_get_current_thread()->team->id)
5587 		return B_PERMISSION_DENIED;
5588 
5589 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5590 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5591 	if (clonedArea < 0)
5592 		return clonedArea;
5593 
5594 	status = vm_delete_area(info.team, id, kernel);
5595 	if (status != B_OK) {
5596 		vm_delete_area(target, clonedArea, kernel);
5597 		return status;
5598 	}
5599 
5600 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5601 
5602 	return clonedArea;
5603 }
5604 
5605 
5606 area_id
5607 map_physical_memory(const char* name, phys_addr_t physicalAddress,
5608 	size_t numBytes, uint32 addressSpec, uint32 protection,
5609 	void** _virtualAddress)
5610 {
5611 	if (!arch_vm_supports_protection(protection))
5612 		return B_NOT_SUPPORTED;
5613 
5614 	fix_protection(&protection);
5615 
5616 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5617 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5618 		false);
5619 }
5620 
5621 
5622 area_id
5623 clone_area(const char* name, void** _address, uint32 addressSpec,
5624 	uint32 protection, area_id source)
5625 {
5626 	if ((protection & B_KERNEL_PROTECTION) == 0)
5627 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5628 
5629 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5630 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5631 }
5632 
5633 
5634 area_id
5635 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
5636 	uint32 protection, uint32 flags,
5637 	const virtual_address_restrictions* virtualAddressRestrictions,
5638 	const physical_address_restrictions* physicalAddressRestrictions,
5639 	void** _address)
5640 {
5641 	fix_protection(&protection);
5642 
5643 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5644 		virtualAddressRestrictions, physicalAddressRestrictions, true,
5645 		_address);
5646 }
5647 
5648 
5649 area_id
5650 create_area(const char* name, void** _address, uint32 addressSpec, size_t size,
5651 	uint32 lock, uint32 protection)
5652 {
5653 	fix_protection(&protection);
5654 
5655 	virtual_address_restrictions virtualRestrictions = {};
5656 	virtualRestrictions.address = *_address;
5657 	virtualRestrictions.address_specification = addressSpec;
5658 	physical_address_restrictions physicalRestrictions = {};
5659 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5660 		lock, protection, 0, &virtualRestrictions, &physicalRestrictions, true,
5661 		_address);
5662 }
5663 
5664 
5665 status_t
5666 delete_area(area_id area)
5667 {
5668 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5669 }
5670 
5671 
5672 //	#pragma mark - Userland syscalls
5673 
5674 
5675 status_t
5676 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5677 	addr_t size)
5678 {
5679 	// filter out some unavailable values (for userland)
5680 	switch (addressSpec) {
5681 		case B_ANY_KERNEL_ADDRESS:
5682 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5683 			return B_BAD_VALUE;
5684 	}
5685 
5686 	addr_t address;
5687 
5688 	if (!IS_USER_ADDRESS(userAddress)
5689 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5690 		return B_BAD_ADDRESS;
5691 
5692 	status_t status = vm_reserve_address_range(
5693 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5694 		RESERVED_AVOID_BASE);
5695 	if (status != B_OK)
5696 		return status;
5697 
5698 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5699 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5700 			(void*)address, size);
5701 		return B_BAD_ADDRESS;
5702 	}
5703 
5704 	return B_OK;
5705 }
5706 
5707 
5708 status_t
5709 _user_unreserve_address_range(addr_t address, addr_t size)
5710 {
5711 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5712 		(void*)address, size);
5713 }
5714 
5715 
5716 area_id
5717 _user_area_for(void* address)
5718 {
5719 	return vm_area_for((addr_t)address, false);
5720 }
5721 
5722 
5723 area_id
5724 _user_find_area(const char* userName)
5725 {
5726 	char name[B_OS_NAME_LENGTH];
5727 
5728 	if (!IS_USER_ADDRESS(userName)
5729 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5730 		return B_BAD_ADDRESS;
5731 
5732 	return find_area(name);
5733 }
5734 
5735 
5736 status_t
5737 _user_get_area_info(area_id area, area_info* userInfo)
5738 {
5739 	if (!IS_USER_ADDRESS(userInfo))
5740 		return B_BAD_ADDRESS;
5741 
5742 	area_info info;
5743 	status_t status = get_area_info(area, &info);
5744 	if (status < B_OK)
5745 		return status;
5746 
5747 	// TODO: do we want to prevent userland from seeing kernel protections?
5748 	//info.protection &= B_USER_PROTECTION;
5749 
5750 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5751 		return B_BAD_ADDRESS;
5752 
5753 	return status;
5754 }
5755 
5756 
5757 status_t
5758 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
5759 {
5760 	int32 cookie;
5761 
5762 	if (!IS_USER_ADDRESS(userCookie)
5763 		|| !IS_USER_ADDRESS(userInfo)
5764 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5765 		return B_BAD_ADDRESS;
5766 
5767 	area_info info;
5768 	status_t status = _get_next_area_info(team, &cookie, &info,
5769 		sizeof(area_info));
5770 	if (status != B_OK)
5771 		return status;
5772 
5773 	//info.protection &= B_USER_PROTECTION;
5774 
5775 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5776 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5777 		return B_BAD_ADDRESS;
5778 
5779 	return status;
5780 }
5781 
5782 
5783 status_t
5784 _user_set_area_protection(area_id area, uint32 newProtection)
5785 {
5786 	if ((newProtection & ~B_USER_PROTECTION) != 0)
5787 		return B_BAD_VALUE;
5788 
5789 	fix_protection(&newProtection);
5790 
5791 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
5792 		newProtection, false);
5793 }
5794 
5795 
5796 status_t
5797 _user_resize_area(area_id area, size_t newSize)
5798 {
5799 	// TODO: Since we restrict deleting of areas to those owned by the team,
5800 	// we should also do that for resizing (check other functions, too).
5801 	return vm_resize_area(area, newSize, false);
5802 }
5803 
5804 
5805 area_id
5806 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
5807 	team_id target)
5808 {
5809 	// filter out some unavailable values (for userland)
5810 	switch (addressSpec) {
5811 		case B_ANY_KERNEL_ADDRESS:
5812 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5813 			return B_BAD_VALUE;
5814 	}
5815 
5816 	void* address;
5817 	if (!IS_USER_ADDRESS(userAddress)
5818 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5819 		return B_BAD_ADDRESS;
5820 
5821 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
5822 	if (newArea < B_OK)
5823 		return newArea;
5824 
5825 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5826 		return B_BAD_ADDRESS;
5827 
5828 	return newArea;
5829 }
5830 
5831 
5832 area_id
5833 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
5834 	uint32 protection, area_id sourceArea)
5835 {
5836 	char name[B_OS_NAME_LENGTH];
5837 	void* address;
5838 
5839 	// filter out some unavailable values (for userland)
5840 	switch (addressSpec) {
5841 		case B_ANY_KERNEL_ADDRESS:
5842 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5843 			return B_BAD_VALUE;
5844 	}
5845 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
5846 		return B_BAD_VALUE;
5847 
5848 	if (!IS_USER_ADDRESS(userName)
5849 		|| !IS_USER_ADDRESS(userAddress)
5850 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5851 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5852 		return B_BAD_ADDRESS;
5853 
5854 	fix_protection(&protection);
5855 
5856 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
5857 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
5858 		false);
5859 	if (clonedArea < B_OK)
5860 		return clonedArea;
5861 
5862 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5863 		delete_area(clonedArea);
5864 		return B_BAD_ADDRESS;
5865 	}
5866 
5867 	return clonedArea;
5868 }
5869 
5870 
5871 area_id
5872 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
5873 	size_t size, uint32 lock, uint32 protection)
5874 {
5875 	char name[B_OS_NAME_LENGTH];
5876 	void* address;
5877 
5878 	// filter out some unavailable values (for userland)
5879 	switch (addressSpec) {
5880 		case B_ANY_KERNEL_ADDRESS:
5881 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5882 			return B_BAD_VALUE;
5883 	}
5884 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
5885 		return B_BAD_VALUE;
5886 
5887 	if (!IS_USER_ADDRESS(userName)
5888 		|| !IS_USER_ADDRESS(userAddress)
5889 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5890 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5891 		return B_BAD_ADDRESS;
5892 
5893 	if (addressSpec == B_EXACT_ADDRESS
5894 		&& IS_KERNEL_ADDRESS(address))
5895 		return B_BAD_VALUE;
5896 
5897 	fix_protection(&protection);
5898 
5899 	virtual_address_restrictions virtualRestrictions = {};
5900 	virtualRestrictions.address = address;
5901 	virtualRestrictions.address_specification = addressSpec;
5902 	physical_address_restrictions physicalRestrictions = {};
5903 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
5904 		size, lock, protection, 0, &virtualRestrictions, &physicalRestrictions,
5905 		false, &address);
5906 
5907 	if (area >= B_OK
5908 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5909 		delete_area(area);
5910 		return B_BAD_ADDRESS;
5911 	}
5912 
5913 	return area;
5914 }
5915 
5916 
5917 status_t
5918 _user_delete_area(area_id area)
5919 {
5920 	// Unlike the BeOS implementation, you can now only delete areas
5921 	// that you have created yourself from userland.
5922 	// The documentation to delete_area() explicitly states that this
5923 	// will be restricted in the future, and so it will.
5924 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
5925 }
5926 
5927 
5928 // TODO: create a BeOS style call for this!
5929 
5930 area_id
5931 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
5932 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
5933 	int fd, off_t offset)
5934 {
5935 	char name[B_OS_NAME_LENGTH];
5936 	void* address;
5937 	area_id area;
5938 
5939 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
5940 		return B_BAD_VALUE;
5941 
5942 	fix_protection(&protection);
5943 
5944 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
5945 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
5946 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5947 		return B_BAD_ADDRESS;
5948 
5949 	if (addressSpec == B_EXACT_ADDRESS) {
5950 		if ((addr_t)address + size < (addr_t)address
5951 				|| (addr_t)address % B_PAGE_SIZE != 0) {
5952 			return B_BAD_VALUE;
5953 		}
5954 		if (!IS_USER_ADDRESS(address)
5955 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
5956 			return B_BAD_ADDRESS;
5957 		}
5958 	}
5959 
5960 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
5961 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
5962 		false);
5963 	if (area < B_OK)
5964 		return area;
5965 
5966 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5967 		return B_BAD_ADDRESS;
5968 
5969 	return area;
5970 }
5971 
5972 
5973 status_t
5974 _user_unmap_memory(void* _address, size_t size)
5975 {
5976 	addr_t address = (addr_t)_address;
5977 
5978 	// check params
5979 	if (size == 0 || (addr_t)address + size < (addr_t)address
5980 		|| (addr_t)address % B_PAGE_SIZE != 0) {
5981 		return B_BAD_VALUE;
5982 	}
5983 
5984 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
5985 		return B_BAD_ADDRESS;
5986 
5987 	// Write lock the address space and ensure the address range is not wired.
5988 	AddressSpaceWriteLocker locker;
5989 	do {
5990 		status_t status = locker.SetTo(team_get_current_team_id());
5991 		if (status != B_OK)
5992 			return status;
5993 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
5994 			size, &locker));
5995 
5996 	// unmap
5997 	return unmap_address_range(locker.AddressSpace(), address, size, false);
5998 }
5999 
6000 
6001 status_t
6002 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6003 {
6004 	// check address range
6005 	addr_t address = (addr_t)_address;
6006 	size = PAGE_ALIGN(size);
6007 
6008 	if ((address % B_PAGE_SIZE) != 0)
6009 		return B_BAD_VALUE;
6010 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6011 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6012 		// weird error code required by POSIX
6013 		return ENOMEM;
6014 	}
6015 
6016 	// extend and check protection
6017 	if ((protection & ~B_USER_PROTECTION) != 0)
6018 		return B_BAD_VALUE;
6019 
6020 	fix_protection(&protection);
6021 
6022 	// We need to write lock the address space, since we're going to play with
6023 	// the areas. Also make sure that none of the areas is wired and that we're
6024 	// actually allowed to change the protection.
6025 	AddressSpaceWriteLocker locker;
6026 
6027 	bool restart;
6028 	do {
6029 		restart = false;
6030 
6031 		status_t status = locker.SetTo(team_get_current_team_id());
6032 		if (status != B_OK)
6033 			return status;
6034 
6035 		// First round: Check whether the whole range is covered by areas and we
6036 		// are allowed to modify them.
6037 		addr_t currentAddress = address;
6038 		size_t sizeLeft = size;
6039 		while (sizeLeft > 0) {
6040 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6041 			if (area == NULL)
6042 				return B_NO_MEMORY;
6043 
6044 			if ((area->protection & B_KERNEL_AREA) != 0)
6045 				return B_NOT_ALLOWED;
6046 
6047 			AreaCacheLocker cacheLocker(area);
6048 
6049 			if (wait_if_area_is_wired(area, &locker, &cacheLocker)) {
6050 				restart = true;
6051 				break;
6052 			}
6053 
6054 			cacheLocker.Unlock();
6055 
6056 			// TODO: For (shared) mapped files we should check whether the new
6057 			// protections are compatible with the file permissions. We don't
6058 			// have a way to do that yet, though.
6059 
6060 			addr_t offset = currentAddress - area->Base();
6061 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6062 
6063 			currentAddress += rangeSize;
6064 			sizeLeft -= rangeSize;
6065 		}
6066 	} while (restart);
6067 
6068 	// Second round: If the protections differ from that of the area, create a
6069 	// page protection array and re-map mapped pages.
6070 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6071 	addr_t currentAddress = address;
6072 	size_t sizeLeft = size;
6073 	while (sizeLeft > 0) {
6074 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6075 		if (area == NULL)
6076 			return B_NO_MEMORY;
6077 
6078 		addr_t offset = currentAddress - area->Base();
6079 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6080 
6081 		currentAddress += rangeSize;
6082 		sizeLeft -= rangeSize;
6083 
6084 		if (area->page_protections == NULL) {
6085 			if (area->protection == protection)
6086 				continue;
6087 
6088 			// In the page protections we store only the three user protections,
6089 			// so we use 4 bits per page.
6090 			uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
6091 			area->page_protections = (uint8*)malloc(bytes);
6092 			if (area->page_protections == NULL)
6093 				return B_NO_MEMORY;
6094 
6095 			// init the page protections for all pages to that of the area
6096 			uint32 areaProtection = area->protection
6097 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
6098 			memset(area->page_protections,
6099 				areaProtection | (areaProtection << 4), bytes);
6100 		}
6101 
6102 		// We need to lock the complete cache chain, since we potentially unmap
6103 		// pages of lower caches.
6104 		VMCache* topCache = vm_area_get_locked_cache(area);
6105 		VMCacheChainLocker cacheChainLocker(topCache);
6106 		cacheChainLocker.LockAllSourceCaches();
6107 
6108 		for (addr_t pageAddress = area->Base() + offset;
6109 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6110 			map->Lock();
6111 
6112 			set_area_page_protection(area, pageAddress, protection);
6113 
6114 			phys_addr_t physicalAddress;
6115 			uint32 flags;
6116 
6117 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6118 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6119 				map->Unlock();
6120 				continue;
6121 			}
6122 
6123 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6124 			if (page == NULL) {
6125 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6126 					"\n", area, physicalAddress);
6127 				map->Unlock();
6128 				return B_ERROR;
6129 			}
6130 
6131 			// If the page is not in the topmost cache and write access is
6132 			// requested, we have to unmap it. Otherwise we can re-map it with
6133 			// the new protection.
6134 			bool unmapPage = page->Cache() != topCache
6135 				&& (protection & B_WRITE_AREA) != 0;
6136 
6137 			if (!unmapPage)
6138 				map->ProtectPage(area, pageAddress, protection);
6139 
6140 			map->Unlock();
6141 
6142 			if (unmapPage) {
6143 				DEBUG_PAGE_ACCESS_START(page);
6144 				unmap_page(area, pageAddress);
6145 				DEBUG_PAGE_ACCESS_END(page);
6146 			}
6147 		}
6148 	}
6149 
6150 	return B_OK;
6151 }
6152 
6153 
6154 status_t
6155 _user_sync_memory(void* _address, size_t size, uint32 flags)
6156 {
6157 	addr_t address = (addr_t)_address;
6158 	size = PAGE_ALIGN(size);
6159 
6160 	// check params
6161 	if ((address % B_PAGE_SIZE) != 0)
6162 		return B_BAD_VALUE;
6163 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6164 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6165 		// weird error code required by POSIX
6166 		return ENOMEM;
6167 	}
6168 
6169 	bool writeSync = (flags & MS_SYNC) != 0;
6170 	bool writeAsync = (flags & MS_ASYNC) != 0;
6171 	if (writeSync && writeAsync)
6172 		return B_BAD_VALUE;
6173 
6174 	if (size == 0 || (!writeSync && !writeAsync))
6175 		return B_OK;
6176 
6177 	// iterate through the range and sync all concerned areas
6178 	while (size > 0) {
6179 		// read lock the address space
6180 		AddressSpaceReadLocker locker;
6181 		status_t error = locker.SetTo(team_get_current_team_id());
6182 		if (error != B_OK)
6183 			return error;
6184 
6185 		// get the first area
6186 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6187 		if (area == NULL)
6188 			return B_NO_MEMORY;
6189 
6190 		uint32 offset = address - area->Base();
6191 		size_t rangeSize = min_c(area->Size() - offset, size);
6192 		offset += area->cache_offset;
6193 
6194 		// lock the cache
6195 		AreaCacheLocker cacheLocker(area);
6196 		if (!cacheLocker)
6197 			return B_BAD_VALUE;
6198 		VMCache* cache = area->cache;
6199 
6200 		locker.Unlock();
6201 
6202 		uint32 firstPage = offset >> PAGE_SHIFT;
6203 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6204 
6205 		// write the pages
6206 		if (cache->type == CACHE_TYPE_VNODE) {
6207 			if (writeSync) {
6208 				// synchronous
6209 				error = vm_page_write_modified_page_range(cache, firstPage,
6210 					endPage);
6211 				if (error != B_OK)
6212 					return error;
6213 			} else {
6214 				// asynchronous
6215 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6216 				// TODO: This is probably not quite what is supposed to happen.
6217 				// Especially when a lot has to be written, it might take ages
6218 				// until it really hits the disk.
6219 			}
6220 		}
6221 
6222 		address += rangeSize;
6223 		size -= rangeSize;
6224 	}
6225 
6226 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6227 	// synchronize multiple mappings of the same file. In our VM they never get
6228 	// out of sync, though, so we don't have to do anything.
6229 
6230 	return B_OK;
6231 }
6232 
6233 
6234 status_t
6235 _user_memory_advice(void* address, size_t size, uint32 advice)
6236 {
6237 	// TODO: Implement!
6238 	return B_OK;
6239 }
6240