xref: /haiku/src/system/kernel/vm/vm.cpp (revision 7bdeef54a24d3417300f251af891df962b638b9b)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// Cut the end only?
648 	if (areaLast <= lastAddress) {
649 		size_t oldSize = area->Size();
650 		size_t newSize = address - area->Base();
651 
652 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 			allocationFlags);
654 		if (error != B_OK)
655 			return error;
656 
657 		// unmap pages
658 		unmap_pages(area, address, oldSize - newSize);
659 
660 		// If no one else uses the area's cache, we can resize it, too.
661 		if (cache->areas == area && area->cache_next == NULL
662 			&& cache->consumers.IsEmpty()
663 			&& cache->type == CACHE_TYPE_RAM) {
664 			// Since VMCache::Resize() can temporarily drop the lock, we must
665 			// unlock all lower caches to prevent locking order inversion.
666 			cacheChainLocker.Unlock(cache);
667 			cache->Resize(cache->virtual_base + newSize, priority);
668 			cache->ReleaseRefAndUnlock();
669 		}
670 
671 		return B_OK;
672 	}
673 
674 	// Cut the beginning only?
675 	if (area->Base() >= address) {
676 		addr_t oldBase = area->Base();
677 		addr_t newBase = lastAddress + 1;
678 		size_t newSize = areaLast - lastAddress;
679 
680 		// unmap pages
681 		unmap_pages(area, oldBase, newBase - oldBase);
682 
683 		// resize the area
684 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 			allocationFlags);
686 		if (error != B_OK)
687 			return error;
688 
689 		// TODO: If no one else uses the area's cache, we should resize it, too!
690 
691 		area->cache_offset += newBase - oldBase;
692 
693 		return B_OK;
694 	}
695 
696 	// The tough part -- cut a piece out of the middle of the area.
697 	// We do that by shrinking the area to the begin section and creating a
698 	// new area for the end section.
699 
700 	addr_t firstNewSize = address - area->Base();
701 	addr_t secondBase = lastAddress + 1;
702 	addr_t secondSize = areaLast - lastAddress;
703 
704 	// unmap pages
705 	unmap_pages(area, address, area->Size() - firstNewSize);
706 
707 	// resize the area
708 	addr_t oldSize = area->Size();
709 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 		allocationFlags);
711 	if (error != B_OK)
712 		return error;
713 
714 	// TODO: If no one else uses the area's cache, we might want to create a
715 	// new cache for the second area, transfer the concerned pages from the
716 	// first cache to it and resize the first cache.
717 
718 	// map the second area
719 	virtual_address_restrictions addressRestrictions = {};
720 	addressRestrictions.address = (void*)secondBase;
721 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 	VMArea* secondArea;
723 	error = map_backing_store(addressSpace, cache,
724 		area->cache_offset + (secondBase - area->Base()), area->name,
725 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 		&addressRestrictions, kernel, &secondArea, NULL);
727 	if (error != B_OK) {
728 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 		return error;
730 	}
731 
732 	// We need a cache reference for the new area.
733 	cache->AcquireRefLocked();
734 
735 	if (_secondArea != NULL)
736 		*_secondArea = secondArea;
737 
738 	return B_OK;
739 }
740 
741 
742 /*!	Deletes all areas in the given address range.
743 	The address space must be write-locked.
744 	The caller must ensure that no part of the given range is wired.
745 */
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 	bool kernel)
749 {
750 	size = PAGE_ALIGN(size);
751 	addr_t lastAddress = address + (size - 1);
752 
753 	// Check, whether the caller is allowed to modify the concerned areas.
754 	if (!kernel) {
755 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 				VMArea* area = it.Next();) {
757 			addr_t areaLast = area->Base() + (area->Size() - 1);
758 			if (area->Base() < lastAddress && address < areaLast) {
759 				if ((area->protection & B_KERNEL_AREA) != 0)
760 					return B_NOT_ALLOWED;
761 			}
762 		}
763 	}
764 
765 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
766 			VMArea* area = it.Next();) {
767 		addr_t areaLast = area->Base() + (area->Size() - 1);
768 		if (area->Base() < lastAddress && address < areaLast) {
769 			status_t error = cut_area(addressSpace, area, address,
770 				lastAddress, NULL, kernel);
771 			if (error != B_OK)
772 				return error;
773 				// Failing after already messing with areas is ugly, but we
774 				// can't do anything about it.
775 		}
776 	}
777 
778 	return B_OK;
779 }
780 
781 
782 /*! You need to hold the lock of the cache and the write lock of the address
783 	space when calling this function.
784 	Note, that in case of error your cache will be temporarily unlocked.
785 	If \a addressSpec is \c B_EXACT_ADDRESS and the
786 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
787 	that no part of the specified address range (base \c *_virtualAddress, size
788 	\a size) is wired.
789 */
790 static status_t
791 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
792 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
793 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
794 	bool kernel, VMArea** _area, void** _virtualAddress)
795 {
796 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
797 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
798 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
799 		addressRestrictions->address, offset, size,
800 		addressRestrictions->address_specification, wiring, protection,
801 		_area, areaName));
802 	cache->AssertLocked();
803 
804 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
805 		| HEAP_DONT_LOCK_KERNEL_SPACE;
806 	int priority;
807 	if (addressSpace != VMAddressSpace::Kernel()) {
808 		priority = VM_PRIORITY_USER;
809 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
810 		priority = VM_PRIORITY_VIP;
811 		allocationFlags |= HEAP_PRIORITY_VIP;
812 	} else
813 		priority = VM_PRIORITY_SYSTEM;
814 
815 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
816 		allocationFlags);
817 	if (area == NULL)
818 		return B_NO_MEMORY;
819 
820 	status_t status;
821 
822 	// if this is a private map, we need to create a new cache
823 	// to handle the private copies of pages as they are written to
824 	VMCache* sourceCache = cache;
825 	if (mapping == REGION_PRIVATE_MAP) {
826 		VMCache* newCache;
827 
828 		// create an anonymous cache
829 		status = VMCacheFactory::CreateAnonymousCache(newCache,
830 			(protection & B_STACK_AREA) != 0
831 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
832 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
833 		if (status != B_OK)
834 			goto err1;
835 
836 		newCache->Lock();
837 		newCache->temporary = 1;
838 		newCache->virtual_base = offset;
839 		newCache->virtual_end = offset + size;
840 
841 		cache->AddConsumer(newCache);
842 
843 		cache = newCache;
844 	}
845 
846 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
847 		status = cache->SetMinimalCommitment(size, priority);
848 		if (status != B_OK)
849 			goto err2;
850 	}
851 
852 	// check to see if this address space has entered DELETE state
853 	if (addressSpace->IsBeingDeleted()) {
854 		// okay, someone is trying to delete this address space now, so we can't
855 		// insert the area, so back out
856 		status = B_BAD_TEAM_ID;
857 		goto err2;
858 	}
859 
860 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
861 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
862 		status = unmap_address_range(addressSpace,
863 			(addr_t)addressRestrictions->address, size, kernel);
864 		if (status != B_OK)
865 			goto err2;
866 	}
867 
868 	status = addressSpace->InsertArea(area, size, addressRestrictions,
869 		allocationFlags, _virtualAddress);
870 	if (status != B_OK) {
871 		// TODO: wait and try again once this is working in the backend
872 #if 0
873 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
874 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
875 				0, 0);
876 		}
877 #endif
878 		goto err2;
879 	}
880 
881 	// attach the cache to the area
882 	area->cache = cache;
883 	area->cache_offset = offset;
884 
885 	// point the cache back to the area
886 	cache->InsertAreaLocked(area);
887 	if (mapping == REGION_PRIVATE_MAP)
888 		cache->Unlock();
889 
890 	// insert the area in the global area hash table
891 	VMAreaHash::Insert(area);
892 
893 	// grab a ref to the address space (the area holds this)
894 	addressSpace->Get();
895 
896 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
897 //		cache, sourceCache, areaName, area);
898 
899 	*_area = area;
900 	return B_OK;
901 
902 err2:
903 	if (mapping == REGION_PRIVATE_MAP) {
904 		// We created this cache, so we must delete it again. Note, that we
905 		// need to temporarily unlock the source cache or we'll otherwise
906 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
907 		sourceCache->Unlock();
908 		cache->ReleaseRefAndUnlock();
909 		sourceCache->Lock();
910 	}
911 err1:
912 	addressSpace->DeleteArea(area, allocationFlags);
913 	return status;
914 }
915 
916 
917 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
918 	  locker1, locker2).
919 */
920 template<typename LockerType1, typename LockerType2>
921 static inline bool
922 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
923 {
924 	area->cache->AssertLocked();
925 
926 	VMAreaUnwiredWaiter waiter;
927 	if (!area->AddWaiterIfWired(&waiter))
928 		return false;
929 
930 	// unlock everything and wait
931 	if (locker1 != NULL)
932 		locker1->Unlock();
933 	if (locker2 != NULL)
934 		locker2->Unlock();
935 
936 	waiter.waitEntry.Wait();
937 
938 	return true;
939 }
940 
941 
942 /*!	Checks whether the given area has any wired ranges intersecting with the
943 	specified range and waits, if so.
944 
945 	When it has to wait, the function calls \c Unlock() on both \a locker1
946 	and \a locker2, if given.
947 	The area's top cache must be locked and must be unlocked as a side effect
948 	of calling \c Unlock() on either \a locker1 or \a locker2.
949 
950 	If the function does not have to wait it does not modify or unlock any
951 	object.
952 
953 	\param area The area to be checked.
954 	\param base The base address of the range to check.
955 	\param size The size of the address range to check.
956 	\param locker1 An object to be unlocked when before starting to wait (may
957 		be \c NULL).
958 	\param locker2 An object to be unlocked when before starting to wait (may
959 		be \c NULL).
960 	\return \c true, if the function had to wait, \c false otherwise.
961 */
962 template<typename LockerType1, typename LockerType2>
963 static inline bool
964 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
965 	LockerType1* locker1, LockerType2* locker2)
966 {
967 	area->cache->AssertLocked();
968 
969 	VMAreaUnwiredWaiter waiter;
970 	if (!area->AddWaiterIfWired(&waiter, base, size))
971 		return false;
972 
973 	// unlock everything and wait
974 	if (locker1 != NULL)
975 		locker1->Unlock();
976 	if (locker2 != NULL)
977 		locker2->Unlock();
978 
979 	waiter.waitEntry.Wait();
980 
981 	return true;
982 }
983 
984 
985 /*!	Checks whether the given address space has any wired ranges intersecting
986 	with the specified range and waits, if so.
987 
988 	Similar to wait_if_area_range_is_wired(), with the following differences:
989 	- All areas intersecting with the range are checked (respectively all until
990 	  one is found that contains a wired range intersecting with the given
991 	  range).
992 	- The given address space must at least be read-locked and must be unlocked
993 	  when \c Unlock() is called on \a locker.
994 	- None of the areas' caches are allowed to be locked.
995 */
996 template<typename LockerType>
997 static inline bool
998 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
999 	size_t size, LockerType* locker)
1000 {
1001 	addr_t end = base + size - 1;
1002 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1003 			VMArea* area = it.Next();) {
1004 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1005 		if (area->Base() > end)
1006 			return false;
1007 
1008 		if (base >= area->Base() + area->Size() - 1)
1009 			continue;
1010 
1011 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1012 
1013 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1014 			return true;
1015 	}
1016 
1017 	return false;
1018 }
1019 
1020 
1021 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1022 	It must be called in a situation where the kernel address space may be
1023 	locked.
1024 */
1025 status_t
1026 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1027 {
1028 	AddressSpaceReadLocker locker;
1029 	VMArea* area;
1030 	status_t status = locker.SetFromArea(id, area);
1031 	if (status != B_OK)
1032 		return status;
1033 
1034 	if (area->page_protections == NULL) {
1035 		status = allocate_area_page_protections(area);
1036 		if (status != B_OK)
1037 			return status;
1038 	}
1039 
1040 	*cookie = (void*)area;
1041 	return B_OK;
1042 }
1043 
1044 
1045 /*!	This is a debug helper function that can only be used with very specific
1046 	use cases.
1047 	Sets protection for the given address range to the protection specified.
1048 	If \a protection is 0 then the involved pages will be marked non-present
1049 	in the translation map to cause a fault on access. The pages aren't
1050 	actually unmapped however so that they can be marked present again with
1051 	additional calls to this function. For this to work the area must be
1052 	fully locked in memory so that the pages aren't otherwise touched.
1053 	This function does not lock the kernel address space and needs to be
1054 	supplied with a \a cookie retrieved from a successful call to
1055 	vm_prepare_kernel_area_debug_protection().
1056 */
1057 status_t
1058 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1059 	uint32 protection)
1060 {
1061 	// check address range
1062 	addr_t address = (addr_t)_address;
1063 	size = PAGE_ALIGN(size);
1064 
1065 	if ((address % B_PAGE_SIZE) != 0
1066 		|| (addr_t)address + size < (addr_t)address
1067 		|| !IS_KERNEL_ADDRESS(address)
1068 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1069 		return B_BAD_VALUE;
1070 	}
1071 
1072 	// Translate the kernel protection to user protection as we only store that.
1073 	if ((protection & B_KERNEL_READ_AREA) != 0)
1074 		protection |= B_READ_AREA;
1075 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1076 		protection |= B_WRITE_AREA;
1077 
1078 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1079 	VMTranslationMap* map = addressSpace->TranslationMap();
1080 	VMArea* area = (VMArea*)cookie;
1081 
1082 	addr_t offset = address - area->Base();
1083 	if (area->Size() - offset < size) {
1084 		panic("protect range not fully within supplied area");
1085 		return B_BAD_VALUE;
1086 	}
1087 
1088 	if (area->page_protections == NULL) {
1089 		panic("area has no page protections");
1090 		return B_BAD_VALUE;
1091 	}
1092 
1093 	// Invalidate the mapping entries so any access to them will fault or
1094 	// restore the mapping entries unchanged so that lookup will success again.
1095 	map->Lock();
1096 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1097 	map->Unlock();
1098 
1099 	// And set the proper page protections so that the fault case will actually
1100 	// fail and not simply try to map a new page.
1101 	for (addr_t pageAddress = address; pageAddress < address + size;
1102 			pageAddress += B_PAGE_SIZE) {
1103 		set_area_page_protection(area, pageAddress, protection);
1104 	}
1105 
1106 	return B_OK;
1107 }
1108 
1109 
1110 status_t
1111 vm_block_address_range(const char* name, void* address, addr_t size)
1112 {
1113 	if (!arch_vm_supports_protection(0))
1114 		return B_NOT_SUPPORTED;
1115 
1116 	AddressSpaceWriteLocker locker;
1117 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1118 	if (status != B_OK)
1119 		return status;
1120 
1121 	VMAddressSpace* addressSpace = locker.AddressSpace();
1122 
1123 	// create an anonymous cache
1124 	VMCache* cache;
1125 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1126 		VM_PRIORITY_SYSTEM);
1127 	if (status != B_OK)
1128 		return status;
1129 
1130 	cache->temporary = 1;
1131 	cache->virtual_end = size;
1132 	cache->Lock();
1133 
1134 	VMArea* area;
1135 	virtual_address_restrictions addressRestrictions = {};
1136 	addressRestrictions.address = address;
1137 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1138 	status = map_backing_store(addressSpace, cache, 0, name, size,
1139 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1140 		true, &area, NULL);
1141 	if (status != B_OK) {
1142 		cache->ReleaseRefAndUnlock();
1143 		return status;
1144 	}
1145 
1146 	cache->Unlock();
1147 	area->cache_type = CACHE_TYPE_RAM;
1148 	return area->id;
1149 }
1150 
1151 
1152 status_t
1153 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1154 {
1155 	AddressSpaceWriteLocker locker(team);
1156 	if (!locker.IsLocked())
1157 		return B_BAD_TEAM_ID;
1158 
1159 	VMAddressSpace* addressSpace = locker.AddressSpace();
1160 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1161 		addressSpace == VMAddressSpace::Kernel()
1162 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1163 }
1164 
1165 
1166 status_t
1167 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1168 	addr_t size, uint32 flags)
1169 {
1170 	if (size == 0)
1171 		return B_BAD_VALUE;
1172 
1173 	AddressSpaceWriteLocker locker(team);
1174 	if (!locker.IsLocked())
1175 		return B_BAD_TEAM_ID;
1176 
1177 	virtual_address_restrictions addressRestrictions = {};
1178 	addressRestrictions.address = *_address;
1179 	addressRestrictions.address_specification = addressSpec;
1180 	VMAddressSpace* addressSpace = locker.AddressSpace();
1181 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1182 		addressSpace == VMAddressSpace::Kernel()
1183 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1184 		_address);
1185 }
1186 
1187 
1188 area_id
1189 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1190 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1191 	const virtual_address_restrictions* virtualAddressRestrictions,
1192 	const physical_address_restrictions* physicalAddressRestrictions,
1193 	bool kernel, void** _address)
1194 {
1195 	VMArea* area;
1196 	VMCache* cache;
1197 	vm_page* page = NULL;
1198 	bool isStack = (protection & B_STACK_AREA) != 0;
1199 	page_num_t guardPages;
1200 	bool canOvercommit = false;
1201 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1202 		? VM_PAGE_ALLOC_CLEAR : 0;
1203 
1204 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1205 		team, name, size));
1206 
1207 	size = PAGE_ALIGN(size);
1208 	guardSize = PAGE_ALIGN(guardSize);
1209 	guardPages = guardSize / B_PAGE_SIZE;
1210 
1211 	if (size == 0 || size < guardSize)
1212 		return B_BAD_VALUE;
1213 	if (!arch_vm_supports_protection(protection))
1214 		return B_NOT_SUPPORTED;
1215 
1216 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1217 		canOvercommit = true;
1218 
1219 #ifdef DEBUG_KERNEL_STACKS
1220 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1221 		isStack = true;
1222 #endif
1223 
1224 	// check parameters
1225 	switch (virtualAddressRestrictions->address_specification) {
1226 		case B_ANY_ADDRESS:
1227 		case B_EXACT_ADDRESS:
1228 		case B_BASE_ADDRESS:
1229 		case B_ANY_KERNEL_ADDRESS:
1230 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1231 		case B_RANDOMIZED_ANY_ADDRESS:
1232 		case B_RANDOMIZED_BASE_ADDRESS:
1233 			break;
1234 
1235 		default:
1236 			return B_BAD_VALUE;
1237 	}
1238 
1239 	// If low or high physical address restrictions are given, we force
1240 	// B_CONTIGUOUS wiring, since only then we'll use
1241 	// vm_page_allocate_page_run() which deals with those restrictions.
1242 	if (physicalAddressRestrictions->low_address != 0
1243 		|| physicalAddressRestrictions->high_address != 0) {
1244 		wiring = B_CONTIGUOUS;
1245 	}
1246 
1247 	physical_address_restrictions stackPhysicalRestrictions;
1248 	bool doReserveMemory = false;
1249 	switch (wiring) {
1250 		case B_NO_LOCK:
1251 			break;
1252 		case B_FULL_LOCK:
1253 		case B_LAZY_LOCK:
1254 		case B_CONTIGUOUS:
1255 			doReserveMemory = true;
1256 			break;
1257 		case B_ALREADY_WIRED:
1258 			break;
1259 		case B_LOMEM:
1260 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1261 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1262 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1263 			wiring = B_CONTIGUOUS;
1264 			doReserveMemory = true;
1265 			break;
1266 		case B_32_BIT_FULL_LOCK:
1267 			if (B_HAIKU_PHYSICAL_BITS <= 32
1268 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1269 				wiring = B_FULL_LOCK;
1270 				doReserveMemory = true;
1271 				break;
1272 			}
1273 			// TODO: We don't really support this mode efficiently. Just fall
1274 			// through for now ...
1275 		case B_32_BIT_CONTIGUOUS:
1276 			#if B_HAIKU_PHYSICAL_BITS > 32
1277 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1278 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1279 					stackPhysicalRestrictions.high_address
1280 						= (phys_addr_t)1 << 32;
1281 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1282 				}
1283 			#endif
1284 			wiring = B_CONTIGUOUS;
1285 			doReserveMemory = true;
1286 			break;
1287 		default:
1288 			return B_BAD_VALUE;
1289 	}
1290 
1291 	// Optimization: For a single-page contiguous allocation without low/high
1292 	// memory restriction B_FULL_LOCK wiring suffices.
1293 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1294 		&& physicalAddressRestrictions->low_address == 0
1295 		&& physicalAddressRestrictions->high_address == 0) {
1296 		wiring = B_FULL_LOCK;
1297 	}
1298 
1299 	// For full lock or contiguous areas we're also going to map the pages and
1300 	// thus need to reserve pages for the mapping backend upfront.
1301 	addr_t reservedMapPages = 0;
1302 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1303 		AddressSpaceWriteLocker locker;
1304 		status_t status = locker.SetTo(team);
1305 		if (status != B_OK)
1306 			return status;
1307 
1308 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1309 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1310 	}
1311 
1312 	int priority;
1313 	if (team != VMAddressSpace::KernelID())
1314 		priority = VM_PRIORITY_USER;
1315 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1316 		priority = VM_PRIORITY_VIP;
1317 	else
1318 		priority = VM_PRIORITY_SYSTEM;
1319 
1320 	// Reserve memory before acquiring the address space lock. This reduces the
1321 	// chances of failure, since while holding the write lock to the address
1322 	// space (if it is the kernel address space that is), the low memory handler
1323 	// won't be able to free anything for us.
1324 	addr_t reservedMemory = 0;
1325 	if (doReserveMemory) {
1326 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1327 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1328 			return B_NO_MEMORY;
1329 		reservedMemory = size;
1330 		// TODO: We don't reserve the memory for the pages for the page
1331 		// directories/tables. We actually need to do since we currently don't
1332 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1333 		// there are actually less physical pages than there should be, which
1334 		// can get the VM into trouble in low memory situations.
1335 	}
1336 
1337 	AddressSpaceWriteLocker locker;
1338 	VMAddressSpace* addressSpace;
1339 	status_t status;
1340 
1341 	// For full lock areas reserve the pages before locking the address
1342 	// space. E.g. block caches can't release their memory while we hold the
1343 	// address space lock.
1344 	page_num_t reservedPages = reservedMapPages;
1345 	if (wiring == B_FULL_LOCK)
1346 		reservedPages += size / B_PAGE_SIZE;
1347 
1348 	vm_page_reservation reservation;
1349 	if (reservedPages > 0) {
1350 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1351 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1352 					priority)) {
1353 				reservedPages = 0;
1354 				status = B_WOULD_BLOCK;
1355 				goto err0;
1356 			}
1357 		} else
1358 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1359 	}
1360 
1361 	if (wiring == B_CONTIGUOUS) {
1362 		// we try to allocate the page run here upfront as this may easily
1363 		// fail for obvious reasons
1364 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1365 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1366 		if (page == NULL) {
1367 			status = B_NO_MEMORY;
1368 			goto err0;
1369 		}
1370 	}
1371 
1372 	// Lock the address space and, if B_EXACT_ADDRESS and
1373 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1374 	// is not wired.
1375 	do {
1376 		status = locker.SetTo(team);
1377 		if (status != B_OK)
1378 			goto err1;
1379 
1380 		addressSpace = locker.AddressSpace();
1381 	} while (virtualAddressRestrictions->address_specification
1382 			== B_EXACT_ADDRESS
1383 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1384 		&& wait_if_address_range_is_wired(addressSpace,
1385 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1386 
1387 	// create an anonymous cache
1388 	// if it's a stack, make sure that two pages are available at least
1389 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1390 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1391 		wiring == B_NO_LOCK, priority);
1392 	if (status != B_OK)
1393 		goto err1;
1394 
1395 	cache->temporary = 1;
1396 	cache->virtual_end = size;
1397 	cache->committed_size = reservedMemory;
1398 		// TODO: This should be done via a method.
1399 	reservedMemory = 0;
1400 
1401 	cache->Lock();
1402 
1403 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1404 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1405 		kernel, &area, _address);
1406 
1407 	if (status != B_OK) {
1408 		cache->ReleaseRefAndUnlock();
1409 		goto err1;
1410 	}
1411 
1412 	locker.DegradeToReadLock();
1413 
1414 	switch (wiring) {
1415 		case B_NO_LOCK:
1416 		case B_LAZY_LOCK:
1417 			// do nothing - the pages are mapped in as needed
1418 			break;
1419 
1420 		case B_FULL_LOCK:
1421 		{
1422 			// Allocate and map all pages for this area
1423 
1424 			off_t offset = 0;
1425 			for (addr_t address = area->Base();
1426 					address < area->Base() + (area->Size() - 1);
1427 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1428 #ifdef DEBUG_KERNEL_STACKS
1429 #	ifdef STACK_GROWS_DOWNWARDS
1430 				if (isStack && address < area->Base()
1431 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1432 #	else
1433 				if (isStack && address >= area->Base() + area->Size()
1434 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1435 #	endif
1436 					continue;
1437 #endif
1438 				vm_page* page = vm_page_allocate_page(&reservation,
1439 					PAGE_STATE_WIRED | pageAllocFlags);
1440 				cache->InsertPage(page, offset);
1441 				map_page(area, page, address, protection, &reservation);
1442 
1443 				DEBUG_PAGE_ACCESS_END(page);
1444 			}
1445 
1446 			break;
1447 		}
1448 
1449 		case B_ALREADY_WIRED:
1450 		{
1451 			// The pages should already be mapped. This is only really useful
1452 			// during boot time. Find the appropriate vm_page objects and stick
1453 			// them in the cache object.
1454 			VMTranslationMap* map = addressSpace->TranslationMap();
1455 			off_t offset = 0;
1456 
1457 			if (!gKernelStartup)
1458 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1459 
1460 			map->Lock();
1461 
1462 			for (addr_t virtualAddress = area->Base();
1463 					virtualAddress < area->Base() + (area->Size() - 1);
1464 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1465 				phys_addr_t physicalAddress;
1466 				uint32 flags;
1467 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1468 				if (status < B_OK) {
1469 					panic("looking up mapping failed for va 0x%lx\n",
1470 						virtualAddress);
1471 				}
1472 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1473 				if (page == NULL) {
1474 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1475 						"\n", physicalAddress);
1476 				}
1477 
1478 				DEBUG_PAGE_ACCESS_START(page);
1479 
1480 				cache->InsertPage(page, offset);
1481 				increment_page_wired_count(page);
1482 				vm_page_set_state(page, PAGE_STATE_WIRED);
1483 				page->busy = false;
1484 
1485 				DEBUG_PAGE_ACCESS_END(page);
1486 			}
1487 
1488 			map->Unlock();
1489 			break;
1490 		}
1491 
1492 		case B_CONTIGUOUS:
1493 		{
1494 			// We have already allocated our continuous pages run, so we can now
1495 			// just map them in the address space
1496 			VMTranslationMap* map = addressSpace->TranslationMap();
1497 			phys_addr_t physicalAddress
1498 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1499 			addr_t virtualAddress = area->Base();
1500 			off_t offset = 0;
1501 
1502 			map->Lock();
1503 
1504 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1505 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1506 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1507 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1508 				if (page == NULL)
1509 					panic("couldn't lookup physical page just allocated\n");
1510 
1511 				status = map->Map(virtualAddress, physicalAddress, protection,
1512 					area->MemoryType(), &reservation);
1513 				if (status < B_OK)
1514 					panic("couldn't map physical page in page run\n");
1515 
1516 				cache->InsertPage(page, offset);
1517 				increment_page_wired_count(page);
1518 
1519 				DEBUG_PAGE_ACCESS_END(page);
1520 			}
1521 
1522 			map->Unlock();
1523 			break;
1524 		}
1525 
1526 		default:
1527 			break;
1528 	}
1529 
1530 	cache->Unlock();
1531 
1532 	if (reservedPages > 0)
1533 		vm_page_unreserve_pages(&reservation);
1534 
1535 	TRACE(("vm_create_anonymous_area: done\n"));
1536 
1537 	area->cache_type = CACHE_TYPE_RAM;
1538 	return area->id;
1539 
1540 err1:
1541 	if (wiring == B_CONTIGUOUS) {
1542 		// we had reserved the area space upfront...
1543 		phys_addr_t pageNumber = page->physical_page_number;
1544 		int32 i;
1545 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1546 			page = vm_lookup_page(pageNumber);
1547 			if (page == NULL)
1548 				panic("couldn't lookup physical page just allocated\n");
1549 
1550 			vm_page_set_state(page, PAGE_STATE_FREE);
1551 		}
1552 	}
1553 
1554 err0:
1555 	if (reservedPages > 0)
1556 		vm_page_unreserve_pages(&reservation);
1557 	if (reservedMemory > 0)
1558 		vm_unreserve_memory(reservedMemory);
1559 
1560 	return status;
1561 }
1562 
1563 
1564 area_id
1565 vm_map_physical_memory(team_id team, const char* name, void** _address,
1566 	uint32 addressSpec, addr_t size, uint32 protection,
1567 	phys_addr_t physicalAddress, bool alreadyWired)
1568 {
1569 	VMArea* area;
1570 	VMCache* cache;
1571 	addr_t mapOffset;
1572 
1573 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1574 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1575 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1576 		addressSpec, size, protection, physicalAddress));
1577 
1578 	if (!arch_vm_supports_protection(protection))
1579 		return B_NOT_SUPPORTED;
1580 
1581 	AddressSpaceWriteLocker locker(team);
1582 	if (!locker.IsLocked())
1583 		return B_BAD_TEAM_ID;
1584 
1585 	// if the physical address is somewhat inside a page,
1586 	// move the actual area down to align on a page boundary
1587 	mapOffset = physicalAddress % B_PAGE_SIZE;
1588 	size += mapOffset;
1589 	physicalAddress -= mapOffset;
1590 
1591 	size = PAGE_ALIGN(size);
1592 
1593 	// create a device cache
1594 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1595 	if (status != B_OK)
1596 		return status;
1597 
1598 	cache->virtual_end = size;
1599 
1600 	cache->Lock();
1601 
1602 	virtual_address_restrictions addressRestrictions = {};
1603 	addressRestrictions.address = *_address;
1604 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1605 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1606 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1607 		true, &area, _address);
1608 
1609 	if (status < B_OK)
1610 		cache->ReleaseRefLocked();
1611 
1612 	cache->Unlock();
1613 
1614 	if (status == B_OK) {
1615 		// set requested memory type -- use uncached, if not given
1616 		uint32 memoryType = addressSpec & B_MTR_MASK;
1617 		if (memoryType == 0)
1618 			memoryType = B_MTR_UC;
1619 
1620 		area->SetMemoryType(memoryType);
1621 
1622 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1623 		if (status != B_OK)
1624 			delete_area(locker.AddressSpace(), area, false);
1625 	}
1626 
1627 	if (status != B_OK)
1628 		return status;
1629 
1630 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1631 
1632 	if (alreadyWired) {
1633 		// The area is already mapped, but possibly not with the right
1634 		// memory type.
1635 		map->Lock();
1636 		map->ProtectArea(area, area->protection);
1637 		map->Unlock();
1638 	} else {
1639 		// Map the area completely.
1640 
1641 		// reserve pages needed for the mapping
1642 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1643 			area->Base() + (size - 1));
1644 		vm_page_reservation reservation;
1645 		vm_page_reserve_pages(&reservation, reservePages,
1646 			team == VMAddressSpace::KernelID()
1647 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1648 
1649 		map->Lock();
1650 
1651 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1652 			map->Map(area->Base() + offset, physicalAddress + offset,
1653 				protection, area->MemoryType(), &reservation);
1654 		}
1655 
1656 		map->Unlock();
1657 
1658 		vm_page_unreserve_pages(&reservation);
1659 	}
1660 
1661 	// modify the pointer returned to be offset back into the new area
1662 	// the same way the physical address in was offset
1663 	*_address = (void*)((addr_t)*_address + mapOffset);
1664 
1665 	area->cache_type = CACHE_TYPE_DEVICE;
1666 	return area->id;
1667 }
1668 
1669 
1670 /*!	Don't use!
1671 	TODO: This function was introduced to map physical page vecs to
1672 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1673 	use a device cache and does not track vm_page::wired_count!
1674 */
1675 area_id
1676 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1677 	uint32 addressSpec, addr_t* _size, uint32 protection,
1678 	struct generic_io_vec* vecs, uint32 vecCount)
1679 {
1680 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1681 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1682 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1683 		addressSpec, _size, protection, vecs, vecCount));
1684 
1685 	if (!arch_vm_supports_protection(protection)
1686 		|| (addressSpec & B_MTR_MASK) != 0) {
1687 		return B_NOT_SUPPORTED;
1688 	}
1689 
1690 	AddressSpaceWriteLocker locker(team);
1691 	if (!locker.IsLocked())
1692 		return B_BAD_TEAM_ID;
1693 
1694 	if (vecCount == 0)
1695 		return B_BAD_VALUE;
1696 
1697 	addr_t size = 0;
1698 	for (uint32 i = 0; i < vecCount; i++) {
1699 		if (vecs[i].base % B_PAGE_SIZE != 0
1700 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1701 			return B_BAD_VALUE;
1702 		}
1703 
1704 		size += vecs[i].length;
1705 	}
1706 
1707 	// create a device cache
1708 	VMCache* cache;
1709 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1710 	if (result != B_OK)
1711 		return result;
1712 
1713 	cache->virtual_end = size;
1714 
1715 	cache->Lock();
1716 
1717 	VMArea* area;
1718 	virtual_address_restrictions addressRestrictions = {};
1719 	addressRestrictions.address = *_address;
1720 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1721 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1722 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1723 		&addressRestrictions, true, &area, _address);
1724 
1725 	if (result != B_OK)
1726 		cache->ReleaseRefLocked();
1727 
1728 	cache->Unlock();
1729 
1730 	if (result != B_OK)
1731 		return result;
1732 
1733 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1734 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1735 		area->Base() + (size - 1));
1736 
1737 	vm_page_reservation reservation;
1738 	vm_page_reserve_pages(&reservation, reservePages,
1739 			team == VMAddressSpace::KernelID()
1740 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1741 	map->Lock();
1742 
1743 	uint32 vecIndex = 0;
1744 	size_t vecOffset = 0;
1745 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1746 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1747 			vecOffset = 0;
1748 			vecIndex++;
1749 		}
1750 
1751 		if (vecIndex >= vecCount)
1752 			break;
1753 
1754 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1755 			protection, area->MemoryType(), &reservation);
1756 
1757 		vecOffset += B_PAGE_SIZE;
1758 	}
1759 
1760 	map->Unlock();
1761 	vm_page_unreserve_pages(&reservation);
1762 
1763 	if (_size != NULL)
1764 		*_size = size;
1765 
1766 	area->cache_type = CACHE_TYPE_DEVICE;
1767 	return area->id;
1768 }
1769 
1770 
1771 area_id
1772 vm_create_null_area(team_id team, const char* name, void** address,
1773 	uint32 addressSpec, addr_t size, uint32 flags)
1774 {
1775 	size = PAGE_ALIGN(size);
1776 
1777 	// Lock the address space and, if B_EXACT_ADDRESS and
1778 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1779 	// is not wired.
1780 	AddressSpaceWriteLocker locker;
1781 	do {
1782 		if (locker.SetTo(team) != B_OK)
1783 			return B_BAD_TEAM_ID;
1784 	} while (addressSpec == B_EXACT_ADDRESS
1785 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1786 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1787 			(addr_t)*address, size, &locker));
1788 
1789 	// create a null cache
1790 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1791 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1792 	VMCache* cache;
1793 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1794 	if (status != B_OK)
1795 		return status;
1796 
1797 	cache->temporary = 1;
1798 	cache->virtual_end = size;
1799 
1800 	cache->Lock();
1801 
1802 	VMArea* area;
1803 	virtual_address_restrictions addressRestrictions = {};
1804 	addressRestrictions.address = *address;
1805 	addressRestrictions.address_specification = addressSpec;
1806 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1807 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1808 		&addressRestrictions, true, &area, address);
1809 
1810 	if (status < B_OK) {
1811 		cache->ReleaseRefAndUnlock();
1812 		return status;
1813 	}
1814 
1815 	cache->Unlock();
1816 
1817 	area->cache_type = CACHE_TYPE_NULL;
1818 	return area->id;
1819 }
1820 
1821 
1822 /*!	Creates the vnode cache for the specified \a vnode.
1823 	The vnode has to be marked busy when calling this function.
1824 */
1825 status_t
1826 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1827 {
1828 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1829 }
1830 
1831 
1832 /*!	\a cache must be locked. The area's address space must be read-locked.
1833 */
1834 static void
1835 pre_map_area_pages(VMArea* area, VMCache* cache,
1836 	vm_page_reservation* reservation)
1837 {
1838 	addr_t baseAddress = area->Base();
1839 	addr_t cacheOffset = area->cache_offset;
1840 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1841 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1842 
1843 	for (VMCachePagesTree::Iterator it
1844 				= cache->pages.GetIterator(firstPage, true, true);
1845 			vm_page* page = it.Next();) {
1846 		if (page->cache_offset >= endPage)
1847 			break;
1848 
1849 		// skip busy and inactive pages
1850 		if (page->busy || page->usage_count == 0)
1851 			continue;
1852 
1853 		DEBUG_PAGE_ACCESS_START(page);
1854 		map_page(area, page,
1855 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1856 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1857 		DEBUG_PAGE_ACCESS_END(page);
1858 	}
1859 }
1860 
1861 
1862 /*!	Will map the file specified by \a fd to an area in memory.
1863 	The file will be mirrored beginning at the specified \a offset. The
1864 	\a offset and \a size arguments have to be page aligned.
1865 */
1866 static area_id
1867 _vm_map_file(team_id team, const char* name, void** _address,
1868 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1869 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1870 {
1871 	// TODO: for binary files, we want to make sure that they get the
1872 	//	copy of a file at a given time, ie. later changes should not
1873 	//	make it into the mapped copy -- this will need quite some changes
1874 	//	to be done in a nice way
1875 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1876 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1877 
1878 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1879 	size = PAGE_ALIGN(size);
1880 
1881 	if (mapping == REGION_NO_PRIVATE_MAP)
1882 		protection |= B_SHARED_AREA;
1883 	if (addressSpec != B_EXACT_ADDRESS)
1884 		unmapAddressRange = false;
1885 
1886 	if (fd < 0) {
1887 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1888 		virtual_address_restrictions virtualRestrictions = {};
1889 		virtualRestrictions.address = *_address;
1890 		virtualRestrictions.address_specification = addressSpec;
1891 		physical_address_restrictions physicalRestrictions = {};
1892 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1893 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1894 			_address);
1895 	}
1896 
1897 	// get the open flags of the FD
1898 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1899 	if (descriptor == NULL)
1900 		return EBADF;
1901 	int32 openMode = descriptor->open_mode;
1902 	put_fd(descriptor);
1903 
1904 	// The FD must open for reading at any rate. For shared mapping with write
1905 	// access, additionally the FD must be open for writing.
1906 	if ((openMode & O_ACCMODE) == O_WRONLY
1907 		|| (mapping == REGION_NO_PRIVATE_MAP
1908 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1909 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1910 		return EACCES;
1911 	}
1912 
1913 	// get the vnode for the object, this also grabs a ref to it
1914 	struct vnode* vnode = NULL;
1915 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1916 	if (status < B_OK)
1917 		return status;
1918 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1919 
1920 	// If we're going to pre-map pages, we need to reserve the pages needed by
1921 	// the mapping backend upfront.
1922 	page_num_t reservedPreMapPages = 0;
1923 	vm_page_reservation reservation;
1924 	if ((protection & B_READ_AREA) != 0) {
1925 		AddressSpaceWriteLocker locker;
1926 		status = locker.SetTo(team);
1927 		if (status != B_OK)
1928 			return status;
1929 
1930 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1931 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1932 
1933 		locker.Unlock();
1934 
1935 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1936 			team == VMAddressSpace::KernelID()
1937 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1938 	}
1939 
1940 	struct PageUnreserver {
1941 		PageUnreserver(vm_page_reservation* reservation)
1942 			:
1943 			fReservation(reservation)
1944 		{
1945 		}
1946 
1947 		~PageUnreserver()
1948 		{
1949 			if (fReservation != NULL)
1950 				vm_page_unreserve_pages(fReservation);
1951 		}
1952 
1953 		vm_page_reservation* fReservation;
1954 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1955 
1956 	// Lock the address space and, if the specified address range shall be
1957 	// unmapped, ensure it is not wired.
1958 	AddressSpaceWriteLocker locker;
1959 	do {
1960 		if (locker.SetTo(team) != B_OK)
1961 			return B_BAD_TEAM_ID;
1962 	} while (unmapAddressRange
1963 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1964 			(addr_t)*_address, size, &locker));
1965 
1966 	// TODO: this only works for file systems that use the file cache
1967 	VMCache* cache;
1968 	status = vfs_get_vnode_cache(vnode, &cache, false);
1969 	if (status < B_OK)
1970 		return status;
1971 
1972 	cache->Lock();
1973 
1974 	VMArea* area;
1975 	virtual_address_restrictions addressRestrictions = {};
1976 	addressRestrictions.address = *_address;
1977 	addressRestrictions.address_specification = addressSpec;
1978 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1979 		0, protection, mapping,
1980 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1981 		&addressRestrictions, kernel, &area, _address);
1982 
1983 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1984 		// map_backing_store() cannot know we no longer need the ref
1985 		cache->ReleaseRefLocked();
1986 	}
1987 
1988 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1989 		pre_map_area_pages(area, cache, &reservation);
1990 
1991 	cache->Unlock();
1992 
1993 	if (status == B_OK) {
1994 		// TODO: this probably deserves a smarter solution, ie. don't always
1995 		// prefetch stuff, and also, probably don't trigger it at this place.
1996 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1997 			// prefetches at max 10 MB starting from "offset"
1998 	}
1999 
2000 	if (status != B_OK)
2001 		return status;
2002 
2003 	area->cache_type = CACHE_TYPE_VNODE;
2004 	return area->id;
2005 }
2006 
2007 
2008 area_id
2009 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2010 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2011 	int fd, off_t offset)
2012 {
2013 	if (!arch_vm_supports_protection(protection))
2014 		return B_NOT_SUPPORTED;
2015 
2016 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2017 		mapping, unmapAddressRange, fd, offset, true);
2018 }
2019 
2020 
2021 VMCache*
2022 vm_area_get_locked_cache(VMArea* area)
2023 {
2024 	rw_lock_read_lock(&sAreaCacheLock);
2025 
2026 	while (true) {
2027 		VMCache* cache = area->cache;
2028 
2029 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2030 			// cache has been deleted
2031 			rw_lock_read_lock(&sAreaCacheLock);
2032 			continue;
2033 		}
2034 
2035 		rw_lock_read_lock(&sAreaCacheLock);
2036 
2037 		if (cache == area->cache) {
2038 			cache->AcquireRefLocked();
2039 			rw_lock_read_unlock(&sAreaCacheLock);
2040 			return cache;
2041 		}
2042 
2043 		// the cache changed in the meantime
2044 		cache->Unlock();
2045 	}
2046 }
2047 
2048 
2049 void
2050 vm_area_put_locked_cache(VMCache* cache)
2051 {
2052 	cache->ReleaseRefAndUnlock();
2053 }
2054 
2055 
2056 area_id
2057 vm_clone_area(team_id team, const char* name, void** address,
2058 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2059 	bool kernel)
2060 {
2061 	VMArea* newArea = NULL;
2062 	VMArea* sourceArea;
2063 
2064 	// Check whether the source area exists and is cloneable. If so, mark it
2065 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2066 	{
2067 		AddressSpaceWriteLocker locker;
2068 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2069 		if (status != B_OK)
2070 			return status;
2071 
2072 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2073 			return B_NOT_ALLOWED;
2074 
2075 		sourceArea->protection |= B_SHARED_AREA;
2076 		protection |= B_SHARED_AREA;
2077 	}
2078 
2079 	// Now lock both address spaces and actually do the cloning.
2080 
2081 	MultiAddressSpaceLocker locker;
2082 	VMAddressSpace* sourceAddressSpace;
2083 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2084 	if (status != B_OK)
2085 		return status;
2086 
2087 	VMAddressSpace* targetAddressSpace;
2088 	status = locker.AddTeam(team, true, &targetAddressSpace);
2089 	if (status != B_OK)
2090 		return status;
2091 
2092 	status = locker.Lock();
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2097 	if (sourceArea == NULL)
2098 		return B_BAD_VALUE;
2099 
2100 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2101 		return B_NOT_ALLOWED;
2102 
2103 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2104 
2105 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2106 	//	have been adapted. Maybe it should be part of the kernel settings,
2107 	//	anyway (so that old drivers can always work).
2108 #if 0
2109 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2110 		&& addressSpace != VMAddressSpace::Kernel()
2111 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2112 		// kernel areas must not be cloned in userland, unless explicitly
2113 		// declared user-cloneable upon construction
2114 		status = B_NOT_ALLOWED;
2115 	} else
2116 #endif
2117 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2118 		status = B_NOT_ALLOWED;
2119 	else {
2120 		virtual_address_restrictions addressRestrictions = {};
2121 		addressRestrictions.address = *address;
2122 		addressRestrictions.address_specification = addressSpec;
2123 		status = map_backing_store(targetAddressSpace, cache,
2124 			sourceArea->cache_offset, name, sourceArea->Size(),
2125 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2126 			kernel, &newArea, address);
2127 	}
2128 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2129 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2130 		// to create a new cache, and has therefore already acquired a reference
2131 		// to the source cache - but otherwise it has no idea that we need
2132 		// one.
2133 		cache->AcquireRefLocked();
2134 	}
2135 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2136 		// we need to map in everything at this point
2137 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2138 			// we don't have actual pages to map but a physical area
2139 			VMTranslationMap* map
2140 				= sourceArea->address_space->TranslationMap();
2141 			map->Lock();
2142 
2143 			phys_addr_t physicalAddress;
2144 			uint32 oldProtection;
2145 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2146 
2147 			map->Unlock();
2148 
2149 			map = targetAddressSpace->TranslationMap();
2150 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2151 				newArea->Base() + (newArea->Size() - 1));
2152 
2153 			vm_page_reservation reservation;
2154 			vm_page_reserve_pages(&reservation, reservePages,
2155 				targetAddressSpace == VMAddressSpace::Kernel()
2156 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2157 			map->Lock();
2158 
2159 			for (addr_t offset = 0; offset < newArea->Size();
2160 					offset += B_PAGE_SIZE) {
2161 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2162 					protection, newArea->MemoryType(), &reservation);
2163 			}
2164 
2165 			map->Unlock();
2166 			vm_page_unreserve_pages(&reservation);
2167 		} else {
2168 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2169 			size_t reservePages = map->MaxPagesNeededToMap(
2170 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2171 			vm_page_reservation reservation;
2172 			vm_page_reserve_pages(&reservation, reservePages,
2173 				targetAddressSpace == VMAddressSpace::Kernel()
2174 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2175 
2176 			// map in all pages from source
2177 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2178 					vm_page* page  = it.Next();) {
2179 				if (!page->busy) {
2180 					DEBUG_PAGE_ACCESS_START(page);
2181 					map_page(newArea, page,
2182 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2183 							- newArea->cache_offset),
2184 						protection, &reservation);
2185 					DEBUG_PAGE_ACCESS_END(page);
2186 				}
2187 			}
2188 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2189 			// ensuring that!
2190 
2191 			vm_page_unreserve_pages(&reservation);
2192 		}
2193 	}
2194 	if (status == B_OK)
2195 		newArea->cache_type = sourceArea->cache_type;
2196 
2197 	vm_area_put_locked_cache(cache);
2198 
2199 	if (status < B_OK)
2200 		return status;
2201 
2202 	return newArea->id;
2203 }
2204 
2205 
2206 /*!	Deletes the specified area of the given address space.
2207 
2208 	The address space must be write-locked.
2209 	The caller must ensure that the area does not have any wired ranges.
2210 
2211 	\param addressSpace The address space containing the area.
2212 	\param area The area to be deleted.
2213 	\param deletingAddressSpace \c true, if the address space is in the process
2214 		of being deleted.
2215 */
2216 static void
2217 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2218 	bool deletingAddressSpace)
2219 {
2220 	ASSERT(!area->IsWired());
2221 
2222 	VMAreaHash::Remove(area);
2223 
2224 	// At this point the area is removed from the global hash table, but
2225 	// still exists in the area list.
2226 
2227 	// Unmap the virtual address space the area occupied.
2228 	{
2229 		// We need to lock the complete cache chain.
2230 		VMCache* topCache = vm_area_get_locked_cache(area);
2231 		VMCacheChainLocker cacheChainLocker(topCache);
2232 		cacheChainLocker.LockAllSourceCaches();
2233 
2234 		// If the area's top cache is a temporary cache and the area is the only
2235 		// one referencing it (besides us currently holding a second reference),
2236 		// the unmapping code doesn't need to care about preserving the accessed
2237 		// and dirty flags of the top cache page mappings.
2238 		bool ignoreTopCachePageFlags
2239 			= topCache->temporary && topCache->RefCount() == 2;
2240 
2241 		area->address_space->TranslationMap()->UnmapArea(area,
2242 			deletingAddressSpace, ignoreTopCachePageFlags);
2243 	}
2244 
2245 	if (!area->cache->temporary)
2246 		area->cache->WriteModified();
2247 
2248 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2249 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2250 
2251 	arch_vm_unset_memory_type(area);
2252 	addressSpace->RemoveArea(area, allocationFlags);
2253 	addressSpace->Put();
2254 
2255 	area->cache->RemoveArea(area);
2256 	area->cache->ReleaseRef();
2257 
2258 	addressSpace->DeleteArea(area, allocationFlags);
2259 }
2260 
2261 
2262 status_t
2263 vm_delete_area(team_id team, area_id id, bool kernel)
2264 {
2265 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2266 		team, id));
2267 
2268 	// lock the address space and make sure the area isn't wired
2269 	AddressSpaceWriteLocker locker;
2270 	VMArea* area;
2271 	AreaCacheLocker cacheLocker;
2272 
2273 	do {
2274 		status_t status = locker.SetFromArea(team, id, area);
2275 		if (status != B_OK)
2276 			return status;
2277 
2278 		cacheLocker.SetTo(area);
2279 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2280 
2281 	cacheLocker.Unlock();
2282 
2283 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2284 		return B_NOT_ALLOWED;
2285 
2286 	delete_area(locker.AddressSpace(), area, false);
2287 	return B_OK;
2288 }
2289 
2290 
2291 /*!	Creates a new cache on top of given cache, moves all areas from
2292 	the old cache to the new one, and changes the protection of all affected
2293 	areas' pages to read-only. If requested, wired pages are moved up to the
2294 	new cache and copies are added to the old cache in their place.
2295 	Preconditions:
2296 	- The given cache must be locked.
2297 	- All of the cache's areas' address spaces must be read locked.
2298 	- Either the cache must not have any wired ranges or a page reservation for
2299 	  all wired pages must be provided, so they can be copied.
2300 
2301 	\param lowerCache The cache on top of which a new cache shall be created.
2302 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2303 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2304 		has wired page. The wired pages are copied in this case.
2305 */
2306 static status_t
2307 vm_copy_on_write_area(VMCache* lowerCache,
2308 	vm_page_reservation* wiredPagesReservation)
2309 {
2310 	VMCache* upperCache;
2311 
2312 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2313 
2314 	// We need to separate the cache from its areas. The cache goes one level
2315 	// deeper and we create a new cache inbetween.
2316 
2317 	// create an anonymous cache
2318 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2319 		lowerCache->GuardSize() / B_PAGE_SIZE,
2320 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2321 		VM_PRIORITY_USER);
2322 	if (status != B_OK)
2323 		return status;
2324 
2325 	upperCache->Lock();
2326 
2327 	upperCache->temporary = 1;
2328 	upperCache->virtual_base = lowerCache->virtual_base;
2329 	upperCache->virtual_end = lowerCache->virtual_end;
2330 
2331 	// transfer the lower cache areas to the upper cache
2332 	rw_lock_write_lock(&sAreaCacheLock);
2333 	upperCache->TransferAreas(lowerCache);
2334 	rw_lock_write_unlock(&sAreaCacheLock);
2335 
2336 	lowerCache->AddConsumer(upperCache);
2337 
2338 	// We now need to remap all pages from all of the cache's areas read-only,
2339 	// so that a copy will be created on next write access. If there are wired
2340 	// pages, we keep their protection, move them to the upper cache and create
2341 	// copies for the lower cache.
2342 	if (wiredPagesReservation != NULL) {
2343 		// We need to handle wired pages -- iterate through the cache's pages.
2344 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2345 				vm_page* page = it.Next();) {
2346 			if (page->WiredCount() > 0) {
2347 				// allocate a new page and copy the wired one
2348 				vm_page* copiedPage = vm_page_allocate_page(
2349 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2350 
2351 				vm_memcpy_physical_page(
2352 					copiedPage->physical_page_number * B_PAGE_SIZE,
2353 					page->physical_page_number * B_PAGE_SIZE);
2354 
2355 				// move the wired page to the upper cache (note: removing is OK
2356 				// with the SplayTree iterator) and insert the copy
2357 				upperCache->MovePage(page);
2358 				lowerCache->InsertPage(copiedPage,
2359 					page->cache_offset * B_PAGE_SIZE);
2360 
2361 				DEBUG_PAGE_ACCESS_END(copiedPage);
2362 			} else {
2363 				// Change the protection of this page in all areas.
2364 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2365 						tempArea = tempArea->cache_next) {
2366 					// The area must be readable in the same way it was
2367 					// previously writable.
2368 					uint32 protection = B_KERNEL_READ_AREA;
2369 					if ((tempArea->protection & B_READ_AREA) != 0)
2370 						protection |= B_READ_AREA;
2371 
2372 					VMTranslationMap* map
2373 						= tempArea->address_space->TranslationMap();
2374 					map->Lock();
2375 					map->ProtectPage(tempArea,
2376 						virtual_page_address(tempArea, page), protection);
2377 					map->Unlock();
2378 				}
2379 			}
2380 		}
2381 	} else {
2382 		ASSERT(lowerCache->WiredPagesCount() == 0);
2383 
2384 		// just change the protection of all areas
2385 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2386 				tempArea = tempArea->cache_next) {
2387 			// The area must be readable in the same way it was previously
2388 			// writable.
2389 			uint32 protection = B_KERNEL_READ_AREA;
2390 			if ((tempArea->protection & B_READ_AREA) != 0)
2391 				protection |= B_READ_AREA;
2392 
2393 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2394 			map->Lock();
2395 			map->ProtectArea(tempArea, protection);
2396 			map->Unlock();
2397 		}
2398 	}
2399 
2400 	vm_area_put_locked_cache(upperCache);
2401 
2402 	return B_OK;
2403 }
2404 
2405 
2406 area_id
2407 vm_copy_area(team_id team, const char* name, void** _address,
2408 	uint32 addressSpec, uint32 protection, area_id sourceID)
2409 {
2410 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2411 
2412 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2413 		// set the same protection for the kernel as for userland
2414 		protection |= B_KERNEL_READ_AREA;
2415 		if (writableCopy)
2416 			protection |= B_KERNEL_WRITE_AREA;
2417 	}
2418 
2419 	// Do the locking: target address space, all address spaces associated with
2420 	// the source cache, and the cache itself.
2421 	MultiAddressSpaceLocker locker;
2422 	VMAddressSpace* targetAddressSpace;
2423 	VMCache* cache;
2424 	VMArea* source;
2425 	AreaCacheLocker cacheLocker;
2426 	status_t status;
2427 	bool sharedArea;
2428 
2429 	page_num_t wiredPages = 0;
2430 	vm_page_reservation wiredPagesReservation;
2431 
2432 	bool restart;
2433 	do {
2434 		restart = false;
2435 
2436 		locker.Unset();
2437 		status = locker.AddTeam(team, true, &targetAddressSpace);
2438 		if (status == B_OK) {
2439 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2440 				&cache);
2441 		}
2442 		if (status != B_OK)
2443 			return status;
2444 
2445 		cacheLocker.SetTo(cache, true);	// already locked
2446 
2447 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2448 
2449 		page_num_t oldWiredPages = wiredPages;
2450 		wiredPages = 0;
2451 
2452 		// If the source area isn't shared, count the number of wired pages in
2453 		// the cache and reserve as many pages.
2454 		if (!sharedArea) {
2455 			wiredPages = cache->WiredPagesCount();
2456 
2457 			if (wiredPages > oldWiredPages) {
2458 				cacheLocker.Unlock();
2459 				locker.Unlock();
2460 
2461 				if (oldWiredPages > 0)
2462 					vm_page_unreserve_pages(&wiredPagesReservation);
2463 
2464 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2465 					VM_PRIORITY_USER);
2466 
2467 				restart = true;
2468 			}
2469 		} else if (oldWiredPages > 0)
2470 			vm_page_unreserve_pages(&wiredPagesReservation);
2471 	} while (restart);
2472 
2473 	// unreserve pages later
2474 	struct PagesUnreserver {
2475 		PagesUnreserver(vm_page_reservation* reservation)
2476 			:
2477 			fReservation(reservation)
2478 		{
2479 		}
2480 
2481 		~PagesUnreserver()
2482 		{
2483 			if (fReservation != NULL)
2484 				vm_page_unreserve_pages(fReservation);
2485 		}
2486 
2487 	private:
2488 		vm_page_reservation*	fReservation;
2489 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2490 
2491 	if (addressSpec == B_CLONE_ADDRESS) {
2492 		addressSpec = B_EXACT_ADDRESS;
2493 		*_address = (void*)source->Base();
2494 	}
2495 
2496 	// First, create a cache on top of the source area, respectively use the
2497 	// existing one, if this is a shared area.
2498 
2499 	VMArea* target;
2500 	virtual_address_restrictions addressRestrictions = {};
2501 	addressRestrictions.address = *_address;
2502 	addressRestrictions.address_specification = addressSpec;
2503 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2504 		name, source->Size(), source->wiring, protection,
2505 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2506 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2507 		&addressRestrictions, true, &target, _address);
2508 	if (status < B_OK)
2509 		return status;
2510 
2511 	if (sharedArea) {
2512 		// The new area uses the old area's cache, but map_backing_store()
2513 		// hasn't acquired a ref. So we have to do that now.
2514 		cache->AcquireRefLocked();
2515 	}
2516 
2517 	// If the source area is writable, we need to move it one layer up as well
2518 
2519 	if (!sharedArea) {
2520 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2521 			// TODO: do something more useful if this fails!
2522 			if (vm_copy_on_write_area(cache,
2523 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2524 				panic("vm_copy_on_write_area() failed!\n");
2525 			}
2526 		}
2527 	}
2528 
2529 	// we return the ID of the newly created area
2530 	return target->id;
2531 }
2532 
2533 
2534 status_t
2535 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2536 	bool kernel)
2537 {
2538 	fix_protection(&newProtection);
2539 
2540 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2541 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2542 
2543 	if (!arch_vm_supports_protection(newProtection))
2544 		return B_NOT_SUPPORTED;
2545 
2546 	bool becomesWritable
2547 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2548 
2549 	// lock address spaces and cache
2550 	MultiAddressSpaceLocker locker;
2551 	VMCache* cache;
2552 	VMArea* area;
2553 	status_t status;
2554 	AreaCacheLocker cacheLocker;
2555 	bool isWritable;
2556 
2557 	bool restart;
2558 	do {
2559 		restart = false;
2560 
2561 		locker.Unset();
2562 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2563 		if (status != B_OK)
2564 			return status;
2565 
2566 		cacheLocker.SetTo(cache, true);	// already locked
2567 
2568 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2569 			return B_NOT_ALLOWED;
2570 
2571 		if (area->protection == newProtection)
2572 			return B_OK;
2573 
2574 		if (team != VMAddressSpace::KernelID()
2575 			&& area->address_space->ID() != team) {
2576 			// unless you're the kernel, you are only allowed to set
2577 			// the protection of your own areas
2578 			return B_NOT_ALLOWED;
2579 		}
2580 
2581 		isWritable
2582 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2583 
2584 		// Make sure the area (respectively, if we're going to call
2585 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2586 		// wired ranges.
2587 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2588 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2589 					otherArea = otherArea->cache_next) {
2590 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2591 					restart = true;
2592 					break;
2593 				}
2594 			}
2595 		} else {
2596 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2597 				restart = true;
2598 		}
2599 	} while (restart);
2600 
2601 	bool changePageProtection = true;
2602 	bool changeTopCachePagesOnly = false;
2603 
2604 	if (isWritable && !becomesWritable) {
2605 		// writable -> !writable
2606 
2607 		if (cache->source != NULL && cache->temporary) {
2608 			if (cache->CountWritableAreas(area) == 0) {
2609 				// Since this cache now lives from the pages in its source cache,
2610 				// we can change the cache's commitment to take only those pages
2611 				// into account that really are in this cache.
2612 
2613 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2614 					team == VMAddressSpace::KernelID()
2615 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2616 
2617 				// TODO: we may be able to join with our source cache, if
2618 				// count == 0
2619 			}
2620 		}
2621 
2622 		// If only the writability changes, we can just remap the pages of the
2623 		// top cache, since the pages of lower caches are mapped read-only
2624 		// anyway. That's advantageous only, if the number of pages in the cache
2625 		// is significantly smaller than the number of pages in the area,
2626 		// though.
2627 		if (newProtection
2628 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2629 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2630 			changeTopCachePagesOnly = true;
2631 		}
2632 	} else if (!isWritable && becomesWritable) {
2633 		// !writable -> writable
2634 
2635 		if (!cache->consumers.IsEmpty()) {
2636 			// There are consumers -- we have to insert a new cache. Fortunately
2637 			// vm_copy_on_write_area() does everything that's needed.
2638 			changePageProtection = false;
2639 			status = vm_copy_on_write_area(cache, NULL);
2640 		} else {
2641 			// No consumers, so we don't need to insert a new one.
2642 			if (cache->source != NULL && cache->temporary) {
2643 				// the cache's commitment must contain all possible pages
2644 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2645 					team == VMAddressSpace::KernelID()
2646 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2647 			}
2648 
2649 			if (status == B_OK && cache->source != NULL) {
2650 				// There's a source cache, hence we can't just change all pages'
2651 				// protection or we might allow writing into pages belonging to
2652 				// a lower cache.
2653 				changeTopCachePagesOnly = true;
2654 			}
2655 		}
2656 	} else {
2657 		// we don't have anything special to do in all other cases
2658 	}
2659 
2660 	if (status == B_OK) {
2661 		// remap existing pages in this cache
2662 		if (changePageProtection) {
2663 			VMTranslationMap* map = area->address_space->TranslationMap();
2664 			map->Lock();
2665 
2666 			if (changeTopCachePagesOnly) {
2667 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2668 				page_num_t lastPageOffset
2669 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2670 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2671 						vm_page* page = it.Next();) {
2672 					if (page->cache_offset >= firstPageOffset
2673 						&& page->cache_offset <= lastPageOffset) {
2674 						addr_t address = virtual_page_address(area, page);
2675 						map->ProtectPage(area, address, newProtection);
2676 					}
2677 				}
2678 			} else
2679 				map->ProtectArea(area, newProtection);
2680 
2681 			map->Unlock();
2682 		}
2683 
2684 		area->protection = newProtection;
2685 	}
2686 
2687 	return status;
2688 }
2689 
2690 
2691 status_t
2692 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2693 {
2694 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2695 	if (addressSpace == NULL)
2696 		return B_BAD_TEAM_ID;
2697 
2698 	VMTranslationMap* map = addressSpace->TranslationMap();
2699 
2700 	map->Lock();
2701 	uint32 dummyFlags;
2702 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2703 	map->Unlock();
2704 
2705 	addressSpace->Put();
2706 	return status;
2707 }
2708 
2709 
2710 /*!	The page's cache must be locked.
2711 */
2712 bool
2713 vm_test_map_modification(vm_page* page)
2714 {
2715 	if (page->modified)
2716 		return true;
2717 
2718 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2719 	vm_page_mapping* mapping;
2720 	while ((mapping = iterator.Next()) != NULL) {
2721 		VMArea* area = mapping->area;
2722 		VMTranslationMap* map = area->address_space->TranslationMap();
2723 
2724 		phys_addr_t physicalAddress;
2725 		uint32 flags;
2726 		map->Lock();
2727 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2728 		map->Unlock();
2729 
2730 		if ((flags & PAGE_MODIFIED) != 0)
2731 			return true;
2732 	}
2733 
2734 	return false;
2735 }
2736 
2737 
2738 /*!	The page's cache must be locked.
2739 */
2740 void
2741 vm_clear_map_flags(vm_page* page, uint32 flags)
2742 {
2743 	if ((flags & PAGE_ACCESSED) != 0)
2744 		page->accessed = false;
2745 	if ((flags & PAGE_MODIFIED) != 0)
2746 		page->modified = false;
2747 
2748 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2749 	vm_page_mapping* mapping;
2750 	while ((mapping = iterator.Next()) != NULL) {
2751 		VMArea* area = mapping->area;
2752 		VMTranslationMap* map = area->address_space->TranslationMap();
2753 
2754 		map->Lock();
2755 		map->ClearFlags(virtual_page_address(area, page), flags);
2756 		map->Unlock();
2757 	}
2758 }
2759 
2760 
2761 /*!	Removes all mappings from a page.
2762 	After you've called this function, the page is unmapped from memory and
2763 	the page's \c accessed and \c modified flags have been updated according
2764 	to the state of the mappings.
2765 	The page's cache must be locked.
2766 */
2767 void
2768 vm_remove_all_page_mappings(vm_page* page)
2769 {
2770 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2771 		VMArea* area = mapping->area;
2772 		VMTranslationMap* map = area->address_space->TranslationMap();
2773 		addr_t address = virtual_page_address(area, page);
2774 		map->UnmapPage(area, address, false);
2775 	}
2776 }
2777 
2778 
2779 int32
2780 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2781 {
2782 	int32 count = 0;
2783 
2784 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2785 	vm_page_mapping* mapping;
2786 	while ((mapping = iterator.Next()) != NULL) {
2787 		VMArea* area = mapping->area;
2788 		VMTranslationMap* map = area->address_space->TranslationMap();
2789 
2790 		bool modified;
2791 		if (map->ClearAccessedAndModified(area,
2792 				virtual_page_address(area, page), false, modified)) {
2793 			count++;
2794 		}
2795 
2796 		page->modified |= modified;
2797 	}
2798 
2799 
2800 	if (page->accessed) {
2801 		count++;
2802 		page->accessed = false;
2803 	}
2804 
2805 	return count;
2806 }
2807 
2808 
2809 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2810 	mappings.
2811 	The function iterates through the page mappings and removes them until
2812 	encountering one that has been accessed. From then on it will continue to
2813 	iterate, but only clear the accessed flag of the mapping. The page's
2814 	\c modified bit will be updated accordingly, the \c accessed bit will be
2815 	cleared.
2816 	\return The number of mapping accessed bits encountered, including the
2817 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2818 		of the page have been removed.
2819 */
2820 int32
2821 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2822 {
2823 	ASSERT(page->WiredCount() == 0);
2824 
2825 	if (page->accessed)
2826 		return vm_clear_page_mapping_accessed_flags(page);
2827 
2828 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2829 		VMArea* area = mapping->area;
2830 		VMTranslationMap* map = area->address_space->TranslationMap();
2831 		addr_t address = virtual_page_address(area, page);
2832 		bool modified = false;
2833 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2834 			page->accessed = true;
2835 			page->modified |= modified;
2836 			return vm_clear_page_mapping_accessed_flags(page);
2837 		}
2838 		page->modified |= modified;
2839 	}
2840 
2841 	return 0;
2842 }
2843 
2844 
2845 static int
2846 display_mem(int argc, char** argv)
2847 {
2848 	bool physical = false;
2849 	addr_t copyAddress;
2850 	int32 displayWidth;
2851 	int32 itemSize;
2852 	int32 num = -1;
2853 	addr_t address;
2854 	int i = 1, j;
2855 
2856 	if (argc > 1 && argv[1][0] == '-') {
2857 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2858 			physical = true;
2859 			i++;
2860 		} else
2861 			i = 99;
2862 	}
2863 
2864 	if (argc < i + 1 || argc > i + 2) {
2865 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2866 			"\tdl - 8 bytes\n"
2867 			"\tdw - 4 bytes\n"
2868 			"\tds - 2 bytes\n"
2869 			"\tdb - 1 byte\n"
2870 			"\tstring - a whole string\n"
2871 			"  -p or --physical only allows memory from a single page to be "
2872 			"displayed.\n");
2873 		return 0;
2874 	}
2875 
2876 	address = parse_expression(argv[i]);
2877 
2878 	if (argc > i + 1)
2879 		num = parse_expression(argv[i + 1]);
2880 
2881 	// build the format string
2882 	if (strcmp(argv[0], "db") == 0) {
2883 		itemSize = 1;
2884 		displayWidth = 16;
2885 	} else if (strcmp(argv[0], "ds") == 0) {
2886 		itemSize = 2;
2887 		displayWidth = 8;
2888 	} else if (strcmp(argv[0], "dw") == 0) {
2889 		itemSize = 4;
2890 		displayWidth = 4;
2891 	} else if (strcmp(argv[0], "dl") == 0) {
2892 		itemSize = 8;
2893 		displayWidth = 2;
2894 	} else if (strcmp(argv[0], "string") == 0) {
2895 		itemSize = 1;
2896 		displayWidth = -1;
2897 	} else {
2898 		kprintf("display_mem called in an invalid way!\n");
2899 		return 0;
2900 	}
2901 
2902 	if (num <= 0)
2903 		num = displayWidth;
2904 
2905 	void* physicalPageHandle = NULL;
2906 
2907 	if (physical) {
2908 		int32 offset = address & (B_PAGE_SIZE - 1);
2909 		if (num * itemSize + offset > B_PAGE_SIZE) {
2910 			num = (B_PAGE_SIZE - offset) / itemSize;
2911 			kprintf("NOTE: number of bytes has been cut to page size\n");
2912 		}
2913 
2914 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2915 
2916 		if (vm_get_physical_page_debug(address, &copyAddress,
2917 				&physicalPageHandle) != B_OK) {
2918 			kprintf("getting the hardware page failed.");
2919 			return 0;
2920 		}
2921 
2922 		address += offset;
2923 		copyAddress += offset;
2924 	} else
2925 		copyAddress = address;
2926 
2927 	if (!strcmp(argv[0], "string")) {
2928 		kprintf("%p \"", (char*)copyAddress);
2929 
2930 		// string mode
2931 		for (i = 0; true; i++) {
2932 			char c;
2933 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2934 					!= B_OK
2935 				|| c == '\0') {
2936 				break;
2937 			}
2938 
2939 			if (c == '\n')
2940 				kprintf("\\n");
2941 			else if (c == '\t')
2942 				kprintf("\\t");
2943 			else {
2944 				if (!isprint(c))
2945 					c = '.';
2946 
2947 				kprintf("%c", c);
2948 			}
2949 		}
2950 
2951 		kprintf("\"\n");
2952 	} else {
2953 		// number mode
2954 		for (i = 0; i < num; i++) {
2955 			uint32 value;
2956 
2957 			if ((i % displayWidth) == 0) {
2958 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2959 				if (i != 0)
2960 					kprintf("\n");
2961 
2962 				kprintf("[0x%lx]  ", address + i * itemSize);
2963 
2964 				for (j = 0; j < displayed; j++) {
2965 					char c;
2966 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2967 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2968 						displayed = j;
2969 						break;
2970 					}
2971 					if (!isprint(c))
2972 						c = '.';
2973 
2974 					kprintf("%c", c);
2975 				}
2976 				if (num > displayWidth) {
2977 					// make sure the spacing in the last line is correct
2978 					for (j = displayed; j < displayWidth * itemSize; j++)
2979 						kprintf(" ");
2980 				}
2981 				kprintf("  ");
2982 			}
2983 
2984 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2985 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2986 				kprintf("read fault");
2987 				break;
2988 			}
2989 
2990 			switch (itemSize) {
2991 				case 1:
2992 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2993 					break;
2994 				case 2:
2995 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2996 					break;
2997 				case 4:
2998 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2999 					break;
3000 				case 8:
3001 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3002 					break;
3003 			}
3004 		}
3005 
3006 		kprintf("\n");
3007 	}
3008 
3009 	if (physical) {
3010 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3011 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3012 	}
3013 	return 0;
3014 }
3015 
3016 
3017 static void
3018 dump_cache_tree_recursively(VMCache* cache, int level,
3019 	VMCache* highlightCache)
3020 {
3021 	// print this cache
3022 	for (int i = 0; i < level; i++)
3023 		kprintf("  ");
3024 	if (cache == highlightCache)
3025 		kprintf("%p <--\n", cache);
3026 	else
3027 		kprintf("%p\n", cache);
3028 
3029 	// recursively print its consumers
3030 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3031 			VMCache* consumer = it.Next();) {
3032 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3033 	}
3034 }
3035 
3036 
3037 static int
3038 dump_cache_tree(int argc, char** argv)
3039 {
3040 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3041 		kprintf("usage: %s <address>\n", argv[0]);
3042 		return 0;
3043 	}
3044 
3045 	addr_t address = parse_expression(argv[1]);
3046 	if (address == 0)
3047 		return 0;
3048 
3049 	VMCache* cache = (VMCache*)address;
3050 	VMCache* root = cache;
3051 
3052 	// find the root cache (the transitive source)
3053 	while (root->source != NULL)
3054 		root = root->source;
3055 
3056 	dump_cache_tree_recursively(root, 0, cache);
3057 
3058 	return 0;
3059 }
3060 
3061 
3062 const char*
3063 vm_cache_type_to_string(int32 type)
3064 {
3065 	switch (type) {
3066 		case CACHE_TYPE_RAM:
3067 			return "RAM";
3068 		case CACHE_TYPE_DEVICE:
3069 			return "device";
3070 		case CACHE_TYPE_VNODE:
3071 			return "vnode";
3072 		case CACHE_TYPE_NULL:
3073 			return "null";
3074 
3075 		default:
3076 			return "unknown";
3077 	}
3078 }
3079 
3080 
3081 #if DEBUG_CACHE_LIST
3082 
3083 static void
3084 update_cache_info_recursively(VMCache* cache, cache_info& info)
3085 {
3086 	info.page_count += cache->page_count;
3087 	if (cache->type == CACHE_TYPE_RAM)
3088 		info.committed += cache->committed_size;
3089 
3090 	// recurse
3091 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3092 			VMCache* consumer = it.Next();) {
3093 		update_cache_info_recursively(consumer, info);
3094 	}
3095 }
3096 
3097 
3098 static int
3099 cache_info_compare_page_count(const void* _a, const void* _b)
3100 {
3101 	const cache_info* a = (const cache_info*)_a;
3102 	const cache_info* b = (const cache_info*)_b;
3103 	if (a->page_count == b->page_count)
3104 		return 0;
3105 	return a->page_count < b->page_count ? 1 : -1;
3106 }
3107 
3108 
3109 static int
3110 cache_info_compare_committed(const void* _a, const void* _b)
3111 {
3112 	const cache_info* a = (const cache_info*)_a;
3113 	const cache_info* b = (const cache_info*)_b;
3114 	if (a->committed == b->committed)
3115 		return 0;
3116 	return a->committed < b->committed ? 1 : -1;
3117 }
3118 
3119 
3120 static void
3121 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3122 {
3123 	for (int i = 0; i < level; i++)
3124 		kprintf("  ");
3125 
3126 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3127 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3128 		cache->virtual_base, cache->virtual_end, cache->page_count);
3129 
3130 	if (level == 0)
3131 		kprintf("/%lu", info.page_count);
3132 
3133 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3134 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3135 
3136 		if (level == 0)
3137 			kprintf("/%lu", info.committed);
3138 	}
3139 
3140 	// areas
3141 	if (cache->areas != NULL) {
3142 		VMArea* area = cache->areas;
3143 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3144 			area->name, area->address_space->ID());
3145 
3146 		while (area->cache_next != NULL) {
3147 			area = area->cache_next;
3148 			kprintf(", %" B_PRId32, area->id);
3149 		}
3150 	}
3151 
3152 	kputs("\n");
3153 
3154 	// recurse
3155 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3156 			VMCache* consumer = it.Next();) {
3157 		dump_caches_recursively(consumer, info, level + 1);
3158 	}
3159 }
3160 
3161 
3162 static int
3163 dump_caches(int argc, char** argv)
3164 {
3165 	if (sCacheInfoTable == NULL) {
3166 		kprintf("No cache info table!\n");
3167 		return 0;
3168 	}
3169 
3170 	bool sortByPageCount = true;
3171 
3172 	for (int32 i = 1; i < argc; i++) {
3173 		if (strcmp(argv[i], "-c") == 0) {
3174 			sortByPageCount = false;
3175 		} else {
3176 			print_debugger_command_usage(argv[0]);
3177 			return 0;
3178 		}
3179 	}
3180 
3181 	uint32 totalCount = 0;
3182 	uint32 rootCount = 0;
3183 	off_t totalCommitted = 0;
3184 	page_num_t totalPages = 0;
3185 
3186 	VMCache* cache = gDebugCacheList;
3187 	while (cache) {
3188 		totalCount++;
3189 		if (cache->source == NULL) {
3190 			cache_info stackInfo;
3191 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3192 				? sCacheInfoTable[rootCount] : stackInfo;
3193 			rootCount++;
3194 			info.cache = cache;
3195 			info.page_count = 0;
3196 			info.committed = 0;
3197 			update_cache_info_recursively(cache, info);
3198 			totalCommitted += info.committed;
3199 			totalPages += info.page_count;
3200 		}
3201 
3202 		cache = cache->debug_next;
3203 	}
3204 
3205 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3206 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3207 			sortByPageCount
3208 				? &cache_info_compare_page_count
3209 				: &cache_info_compare_committed);
3210 	}
3211 
3212 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3213 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3214 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3215 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3216 			"page count" : "committed size");
3217 
3218 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3219 		for (uint32 i = 0; i < rootCount; i++) {
3220 			cache_info& info = sCacheInfoTable[i];
3221 			dump_caches_recursively(info.cache, info, 0);
3222 		}
3223 	} else
3224 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3225 
3226 	return 0;
3227 }
3228 
3229 #endif	// DEBUG_CACHE_LIST
3230 
3231 
3232 static int
3233 dump_cache(int argc, char** argv)
3234 {
3235 	VMCache* cache;
3236 	bool showPages = false;
3237 	int i = 1;
3238 
3239 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3240 		kprintf("usage: %s [-ps] <address>\n"
3241 			"  if -p is specified, all pages are shown, if -s is used\n"
3242 			"  only the cache info is shown respectively.\n", argv[0]);
3243 		return 0;
3244 	}
3245 	while (argv[i][0] == '-') {
3246 		char* arg = argv[i] + 1;
3247 		while (arg[0]) {
3248 			if (arg[0] == 'p')
3249 				showPages = true;
3250 			arg++;
3251 		}
3252 		i++;
3253 	}
3254 	if (argv[i] == NULL) {
3255 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3256 		return 0;
3257 	}
3258 
3259 	addr_t address = parse_expression(argv[i]);
3260 	if (address == 0)
3261 		return 0;
3262 
3263 	cache = (VMCache*)address;
3264 
3265 	cache->Dump(showPages);
3266 
3267 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3268 
3269 	return 0;
3270 }
3271 
3272 
3273 static void
3274 dump_area_struct(VMArea* area, bool mappings)
3275 {
3276 	kprintf("AREA: %p\n", area);
3277 	kprintf("name:\t\t'%s'\n", area->name);
3278 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3279 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3280 	kprintf("base:\t\t0x%lx\n", area->Base());
3281 	kprintf("size:\t\t0x%lx\n", area->Size());
3282 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3283 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3284 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3285 	kprintf("cache:\t\t%p\n", area->cache);
3286 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3287 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3288 	kprintf("cache_next:\t%p\n", area->cache_next);
3289 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3290 
3291 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3292 	if (mappings) {
3293 		kprintf("page mappings:\n");
3294 		while (iterator.HasNext()) {
3295 			vm_page_mapping* mapping = iterator.Next();
3296 			kprintf("  %p", mapping->page);
3297 		}
3298 		kprintf("\n");
3299 	} else {
3300 		uint32 count = 0;
3301 		while (iterator.Next() != NULL) {
3302 			count++;
3303 		}
3304 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3305 	}
3306 }
3307 
3308 
3309 static int
3310 dump_area(int argc, char** argv)
3311 {
3312 	bool mappings = false;
3313 	bool found = false;
3314 	int32 index = 1;
3315 	VMArea* area;
3316 	addr_t num;
3317 
3318 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3319 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3320 			"All areas matching either id/address/name are listed. You can\n"
3321 			"force to check only a specific item by prefixing the specifier\n"
3322 			"with the id/contains/address/name keywords.\n"
3323 			"-m shows the area's mappings as well.\n");
3324 		return 0;
3325 	}
3326 
3327 	if (!strcmp(argv[1], "-m")) {
3328 		mappings = true;
3329 		index++;
3330 	}
3331 
3332 	int32 mode = 0xf;
3333 	if (!strcmp(argv[index], "id"))
3334 		mode = 1;
3335 	else if (!strcmp(argv[index], "contains"))
3336 		mode = 2;
3337 	else if (!strcmp(argv[index], "name"))
3338 		mode = 4;
3339 	else if (!strcmp(argv[index], "address"))
3340 		mode = 0;
3341 	if (mode != 0xf)
3342 		index++;
3343 
3344 	if (index >= argc) {
3345 		kprintf("No area specifier given.\n");
3346 		return 0;
3347 	}
3348 
3349 	num = parse_expression(argv[index]);
3350 
3351 	if (mode == 0) {
3352 		dump_area_struct((struct VMArea*)num, mappings);
3353 	} else {
3354 		// walk through the area list, looking for the arguments as a name
3355 
3356 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3357 		while ((area = it.Next()) != NULL) {
3358 			if (((mode & 4) != 0 && area->name != NULL
3359 					&& !strcmp(argv[index], area->name))
3360 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3361 					|| (((mode & 2) != 0 && area->Base() <= num
3362 						&& area->Base() + area->Size() > num))))) {
3363 				dump_area_struct(area, mappings);
3364 				found = true;
3365 			}
3366 		}
3367 
3368 		if (!found)
3369 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3370 	}
3371 
3372 	return 0;
3373 }
3374 
3375 
3376 static int
3377 dump_area_list(int argc, char** argv)
3378 {
3379 	VMArea* area;
3380 	const char* name = NULL;
3381 	int32 id = 0;
3382 
3383 	if (argc > 1) {
3384 		id = parse_expression(argv[1]);
3385 		if (id == 0)
3386 			name = argv[1];
3387 	}
3388 
3389 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3390 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3391 		B_PRINTF_POINTER_WIDTH, "size");
3392 
3393 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3394 	while ((area = it.Next()) != NULL) {
3395 		if ((id != 0 && area->address_space->ID() != id)
3396 			|| (name != NULL && strstr(area->name, name) == NULL))
3397 			continue;
3398 
3399 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3400 			area->id, (void*)area->Base(), (void*)area->Size(),
3401 			area->protection, area->wiring, area->name);
3402 	}
3403 	return 0;
3404 }
3405 
3406 
3407 static int
3408 dump_available_memory(int argc, char** argv)
3409 {
3410 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3411 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3412 	return 0;
3413 }
3414 
3415 
3416 static int
3417 dump_mapping_info(int argc, char** argv)
3418 {
3419 	bool reverseLookup = false;
3420 	bool pageLookup = false;
3421 
3422 	int argi = 1;
3423 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3424 		const char* arg = argv[argi];
3425 		if (strcmp(arg, "-r") == 0) {
3426 			reverseLookup = true;
3427 		} else if (strcmp(arg, "-p") == 0) {
3428 			reverseLookup = true;
3429 			pageLookup = true;
3430 		} else {
3431 			print_debugger_command_usage(argv[0]);
3432 			return 0;
3433 		}
3434 	}
3435 
3436 	// We need at least one argument, the address. Optionally a thread ID can be
3437 	// specified.
3438 	if (argi >= argc || argi + 2 < argc) {
3439 		print_debugger_command_usage(argv[0]);
3440 		return 0;
3441 	}
3442 
3443 	uint64 addressValue;
3444 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3445 		return 0;
3446 
3447 	Team* team = NULL;
3448 	if (argi < argc) {
3449 		uint64 threadID;
3450 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3451 			return 0;
3452 
3453 		Thread* thread = Thread::GetDebug(threadID);
3454 		if (thread == NULL) {
3455 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3456 			return 0;
3457 		}
3458 
3459 		team = thread->team;
3460 	}
3461 
3462 	if (reverseLookup) {
3463 		phys_addr_t physicalAddress;
3464 		if (pageLookup) {
3465 			vm_page* page = (vm_page*)(addr_t)addressValue;
3466 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3467 		} else {
3468 			physicalAddress = (phys_addr_t)addressValue;
3469 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3470 		}
3471 
3472 		kprintf("    Team     Virtual Address      Area\n");
3473 		kprintf("--------------------------------------\n");
3474 
3475 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3476 			Callback()
3477 				:
3478 				fAddressSpace(NULL)
3479 			{
3480 			}
3481 
3482 			void SetAddressSpace(VMAddressSpace* addressSpace)
3483 			{
3484 				fAddressSpace = addressSpace;
3485 			}
3486 
3487 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3488 			{
3489 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3490 					virtualAddress);
3491 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3492 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3493 				else
3494 					kprintf("\n");
3495 				return false;
3496 			}
3497 
3498 		private:
3499 			VMAddressSpace*	fAddressSpace;
3500 		} callback;
3501 
3502 		if (team != NULL) {
3503 			// team specified -- get its address space
3504 			VMAddressSpace* addressSpace = team->address_space;
3505 			if (addressSpace == NULL) {
3506 				kprintf("Failed to get address space!\n");
3507 				return 0;
3508 			}
3509 
3510 			callback.SetAddressSpace(addressSpace);
3511 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3512 				physicalAddress, callback);
3513 		} else {
3514 			// no team specified -- iterate through all address spaces
3515 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3516 				addressSpace != NULL;
3517 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3518 				callback.SetAddressSpace(addressSpace);
3519 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3520 					physicalAddress, callback);
3521 			}
3522 		}
3523 	} else {
3524 		// get the address space
3525 		addr_t virtualAddress = (addr_t)addressValue;
3526 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3527 		VMAddressSpace* addressSpace;
3528 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3529 			addressSpace = VMAddressSpace::Kernel();
3530 		} else if (team != NULL) {
3531 			addressSpace = team->address_space;
3532 		} else {
3533 			Thread* thread = debug_get_debugged_thread();
3534 			if (thread == NULL || thread->team == NULL) {
3535 				kprintf("Failed to get team!\n");
3536 				return 0;
3537 			}
3538 
3539 			addressSpace = thread->team->address_space;
3540 		}
3541 
3542 		if (addressSpace == NULL) {
3543 			kprintf("Failed to get address space!\n");
3544 			return 0;
3545 		}
3546 
3547 		// let the translation map implementation do the job
3548 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3549 	}
3550 
3551 	return 0;
3552 }
3553 
3554 
3555 /*!	Deletes all areas and reserved regions in the given address space.
3556 
3557 	The caller must ensure that none of the areas has any wired ranges.
3558 
3559 	\param addressSpace The address space.
3560 	\param deletingAddressSpace \c true, if the address space is in the process
3561 		of being deleted.
3562 */
3563 void
3564 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3565 {
3566 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3567 		addressSpace->ID()));
3568 
3569 	addressSpace->WriteLock();
3570 
3571 	// remove all reserved areas in this address space
3572 	addressSpace->UnreserveAllAddressRanges(0);
3573 
3574 	// delete all the areas in this address space
3575 	while (VMArea* area = addressSpace->FirstArea()) {
3576 		ASSERT(!area->IsWired());
3577 		delete_area(addressSpace, area, deletingAddressSpace);
3578 	}
3579 
3580 	addressSpace->WriteUnlock();
3581 }
3582 
3583 
3584 static area_id
3585 vm_area_for(addr_t address, bool kernel)
3586 {
3587 	team_id team;
3588 	if (IS_USER_ADDRESS(address)) {
3589 		// we try the user team address space, if any
3590 		team = VMAddressSpace::CurrentID();
3591 		if (team < 0)
3592 			return team;
3593 	} else
3594 		team = VMAddressSpace::KernelID();
3595 
3596 	AddressSpaceReadLocker locker(team);
3597 	if (!locker.IsLocked())
3598 		return B_BAD_TEAM_ID;
3599 
3600 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3601 	if (area != NULL) {
3602 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3603 			return B_ERROR;
3604 
3605 		return area->id;
3606 	}
3607 
3608 	return B_ERROR;
3609 }
3610 
3611 
3612 /*!	Frees physical pages that were used during the boot process.
3613 	\a end is inclusive.
3614 */
3615 static void
3616 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3617 {
3618 	// free all physical pages in the specified range
3619 
3620 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3621 		phys_addr_t physicalAddress;
3622 		uint32 flags;
3623 
3624 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3625 			&& (flags & PAGE_PRESENT) != 0) {
3626 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3627 			if (page != NULL && page->State() != PAGE_STATE_FREE
3628 					 && page->State() != PAGE_STATE_CLEAR
3629 					 && page->State() != PAGE_STATE_UNUSED) {
3630 				DEBUG_PAGE_ACCESS_START(page);
3631 				vm_page_set_state(page, PAGE_STATE_FREE);
3632 			}
3633 		}
3634 	}
3635 
3636 	// unmap the memory
3637 	map->Unmap(start, end);
3638 }
3639 
3640 
3641 void
3642 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3643 {
3644 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3645 	addr_t end = start + (size - 1);
3646 	addr_t lastEnd = start;
3647 
3648 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3649 		(void*)start, (void*)end));
3650 
3651 	// The areas are sorted in virtual address space order, so
3652 	// we just have to find the holes between them that fall
3653 	// into the area we should dispose
3654 
3655 	map->Lock();
3656 
3657 	for (VMAddressSpace::AreaIterator it
3658 				= VMAddressSpace::Kernel()->GetAreaIterator();
3659 			VMArea* area = it.Next();) {
3660 		addr_t areaStart = area->Base();
3661 		addr_t areaEnd = areaStart + (area->Size() - 1);
3662 
3663 		if (areaEnd < start)
3664 			continue;
3665 
3666 		if (areaStart > end) {
3667 			// we are done, the area is already beyond of what we have to free
3668 			break;
3669 		}
3670 
3671 		if (areaStart > lastEnd) {
3672 			// this is something we can free
3673 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3674 				(void*)areaStart));
3675 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3676 		}
3677 
3678 		if (areaEnd >= end) {
3679 			lastEnd = areaEnd;
3680 				// no +1 to prevent potential overflow
3681 			break;
3682 		}
3683 
3684 		lastEnd = areaEnd + 1;
3685 	}
3686 
3687 	if (lastEnd < end) {
3688 		// we can also get rid of some space at the end of the area
3689 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3690 			(void*)end));
3691 		unmap_and_free_physical_pages(map, lastEnd, end);
3692 	}
3693 
3694 	map->Unlock();
3695 }
3696 
3697 
3698 static void
3699 create_preloaded_image_areas(struct preloaded_image* _image)
3700 {
3701 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3702 	char name[B_OS_NAME_LENGTH];
3703 	void* address;
3704 	int32 length;
3705 
3706 	// use file name to create a good area name
3707 	char* fileName = strrchr(image->name, '/');
3708 	if (fileName == NULL)
3709 		fileName = image->name;
3710 	else
3711 		fileName++;
3712 
3713 	length = strlen(fileName);
3714 	// make sure there is enough space for the suffix
3715 	if (length > 25)
3716 		length = 25;
3717 
3718 	memcpy(name, fileName, length);
3719 	strcpy(name + length, "_text");
3720 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3721 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3722 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3723 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3724 		// this will later be remapped read-only/executable by the
3725 		// ELF initialization code
3726 
3727 	strcpy(name + length, "_data");
3728 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3729 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3730 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3731 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3732 }
3733 
3734 
3735 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3736 	Any boot loader resources contained in that arguments must not be accessed
3737 	anymore past this point.
3738 */
3739 void
3740 vm_free_kernel_args(kernel_args* args)
3741 {
3742 	uint32 i;
3743 
3744 	TRACE(("vm_free_kernel_args()\n"));
3745 
3746 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3747 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3748 		if (area >= B_OK)
3749 			delete_area(area);
3750 	}
3751 }
3752 
3753 
3754 static void
3755 allocate_kernel_args(kernel_args* args)
3756 {
3757 	TRACE(("allocate_kernel_args()\n"));
3758 
3759 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3760 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3761 
3762 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3763 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3764 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3765 	}
3766 }
3767 
3768 
3769 static void
3770 unreserve_boot_loader_ranges(kernel_args* args)
3771 {
3772 	TRACE(("unreserve_boot_loader_ranges()\n"));
3773 
3774 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3775 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3776 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3777 			args->virtual_allocated_range[i].size);
3778 	}
3779 }
3780 
3781 
3782 static void
3783 reserve_boot_loader_ranges(kernel_args* args)
3784 {
3785 	TRACE(("reserve_boot_loader_ranges()\n"));
3786 
3787 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3788 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3789 
3790 		// If the address is no kernel address, we just skip it. The
3791 		// architecture specific code has to deal with it.
3792 		if (!IS_KERNEL_ADDRESS(address)) {
3793 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3794 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3795 			continue;
3796 		}
3797 
3798 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3799 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3800 		if (status < B_OK)
3801 			panic("could not reserve boot loader ranges\n");
3802 	}
3803 }
3804 
3805 
3806 static addr_t
3807 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3808 {
3809 	size = PAGE_ALIGN(size);
3810 
3811 	// find a slot in the virtual allocation addr range
3812 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3813 		// check to see if the space between this one and the last is big enough
3814 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3815 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3816 			+ args->virtual_allocated_range[i - 1].size;
3817 
3818 		addr_t base = alignment > 0
3819 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3820 
3821 		if (base >= KERNEL_BASE && base < rangeStart
3822 				&& rangeStart - base >= size) {
3823 			args->virtual_allocated_range[i - 1].size
3824 				+= base + size - previousRangeEnd;
3825 			return base;
3826 		}
3827 	}
3828 
3829 	// we hadn't found one between allocation ranges. this is ok.
3830 	// see if there's a gap after the last one
3831 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3832 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3833 		+ args->virtual_allocated_range[lastEntryIndex].size;
3834 	addr_t base = alignment > 0
3835 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3836 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3837 		args->virtual_allocated_range[lastEntryIndex].size
3838 			+= base + size - lastRangeEnd;
3839 		return base;
3840 	}
3841 
3842 	// see if there's a gap before the first one
3843 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3844 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3845 		base = rangeStart - size;
3846 		if (alignment > 0)
3847 			base = ROUNDDOWN(base, alignment);
3848 
3849 		if (base >= KERNEL_BASE) {
3850 			args->virtual_allocated_range[0].start = base;
3851 			args->virtual_allocated_range[0].size += rangeStart - base;
3852 			return base;
3853 		}
3854 	}
3855 
3856 	return 0;
3857 }
3858 
3859 
3860 static bool
3861 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3862 {
3863 	// TODO: horrible brute-force method of determining if the page can be
3864 	// allocated
3865 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3866 		if (address >= args->physical_memory_range[i].start
3867 			&& address < args->physical_memory_range[i].start
3868 				+ args->physical_memory_range[i].size)
3869 			return true;
3870 	}
3871 	return false;
3872 }
3873 
3874 
3875 page_num_t
3876 vm_allocate_early_physical_page(kernel_args* args)
3877 {
3878 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3879 		phys_addr_t nextPage;
3880 
3881 		nextPage = args->physical_allocated_range[i].start
3882 			+ args->physical_allocated_range[i].size;
3883 		// see if the page after the next allocated paddr run can be allocated
3884 		if (i + 1 < args->num_physical_allocated_ranges
3885 			&& args->physical_allocated_range[i + 1].size != 0) {
3886 			// see if the next page will collide with the next allocated range
3887 			if (nextPage >= args->physical_allocated_range[i+1].start)
3888 				continue;
3889 		}
3890 		// see if the next physical page fits in the memory block
3891 		if (is_page_in_physical_memory_range(args, nextPage)) {
3892 			// we got one!
3893 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3894 			return nextPage / B_PAGE_SIZE;
3895 		}
3896 	}
3897 
3898 	// Expanding upwards didn't work, try going downwards.
3899 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3900 		phys_addr_t nextPage;
3901 
3902 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3903 		// see if the page after the prev allocated paddr run can be allocated
3904 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3905 			// see if the next page will collide with the next allocated range
3906 			if (nextPage < args->physical_allocated_range[i-1].start
3907 				+ args->physical_allocated_range[i-1].size)
3908 				continue;
3909 		}
3910 		// see if the next physical page fits in the memory block
3911 		if (is_page_in_physical_memory_range(args, nextPage)) {
3912 			// we got one!
3913 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3914 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3915 			return nextPage / B_PAGE_SIZE;
3916 		}
3917 	}
3918 
3919 	// Expanding upwards didn't work, try going downwards.
3920 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3921 		phys_addr_t nextPage;
3922 
3923 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3924 		// see if the page after the prev allocated paddr run can be allocated
3925 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3926 			// see if the next page will collide with the next allocated range
3927 			if (nextPage < args->physical_allocated_range[i-1].start + args->physical_allocated_range[i-1].size)
3928 				continue;
3929 		}
3930 		// see if the next physical page fits in the memory block
3931 		if (is_page_in_physical_memory_range(args, nextPage)) {
3932 			// we got one!
3933 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3934 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3935 			return nextPage / B_PAGE_SIZE;
3936 		}
3937 	}
3938 
3939 	return 0;
3940 		// could not allocate a block
3941 }
3942 
3943 
3944 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3945 	allocate some pages before the VM is completely up.
3946 */
3947 addr_t
3948 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3949 	uint32 attributes, addr_t alignment)
3950 {
3951 	if (physicalSize > virtualSize)
3952 		physicalSize = virtualSize;
3953 
3954 	// find the vaddr to allocate at
3955 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3956 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3957 	if (virtualBase == 0) {
3958 		panic("vm_allocate_early: could not allocate virtual address\n");
3959 		return 0;
3960 	}
3961 
3962 	// map the pages
3963 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3964 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3965 		if (physicalAddress == 0)
3966 			panic("error allocating early page!\n");
3967 
3968 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3969 
3970 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3971 			physicalAddress * B_PAGE_SIZE, attributes,
3972 			&vm_allocate_early_physical_page);
3973 	}
3974 
3975 	return virtualBase;
3976 }
3977 
3978 
3979 /*!	The main entrance point to initialize the VM. */
3980 status_t
3981 vm_init(kernel_args* args)
3982 {
3983 	struct preloaded_image* image;
3984 	void* address;
3985 	status_t err = 0;
3986 	uint32 i;
3987 
3988 	TRACE(("vm_init: entry\n"));
3989 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3990 	err = arch_vm_init(args);
3991 
3992 	// initialize some globals
3993 	vm_page_init_num_pages(args);
3994 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3995 
3996 	slab_init(args);
3997 
3998 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3999 	off_t heapSize = INITIAL_HEAP_SIZE;
4000 	// try to accomodate low memory systems
4001 	while (heapSize > sAvailableMemory / 8)
4002 		heapSize /= 2;
4003 	if (heapSize < 1024 * 1024)
4004 		panic("vm_init: go buy some RAM please.");
4005 
4006 	// map in the new heap and initialize it
4007 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4008 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4009 	TRACE(("heap at 0x%lx\n", heapBase));
4010 	heap_init(heapBase, heapSize);
4011 #endif
4012 
4013 	// initialize the free page list and physical page mapper
4014 	vm_page_init(args);
4015 
4016 	// initialize the cache allocators
4017 	vm_cache_init(args);
4018 
4019 	{
4020 		status_t error = VMAreaHash::Init();
4021 		if (error != B_OK)
4022 			panic("vm_init: error initializing area hash table\n");
4023 	}
4024 
4025 	VMAddressSpace::Init();
4026 	reserve_boot_loader_ranges(args);
4027 
4028 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4029 	heap_init_post_area();
4030 #endif
4031 
4032 	// Do any further initialization that the architecture dependant layers may
4033 	// need now
4034 	arch_vm_translation_map_init_post_area(args);
4035 	arch_vm_init_post_area(args);
4036 	vm_page_init_post_area(args);
4037 	slab_init_post_area();
4038 
4039 	// allocate areas to represent stuff that already exists
4040 
4041 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4042 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4043 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4044 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4045 #endif
4046 
4047 	allocate_kernel_args(args);
4048 
4049 	create_preloaded_image_areas(args->kernel_image);
4050 
4051 	// allocate areas for preloaded images
4052 	for (image = args->preloaded_images; image != NULL; image = image->next)
4053 		create_preloaded_image_areas(image);
4054 
4055 	// allocate kernel stacks
4056 	for (i = 0; i < args->num_cpus; i++) {
4057 		char name[64];
4058 
4059 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4060 		address = (void*)args->cpu_kstack[i].start;
4061 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4062 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4063 	}
4064 
4065 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4066 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4067 
4068 #if PARANOID_KERNEL_MALLOC
4069 	vm_block_address_range("uninitialized heap memory",
4070 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4071 #endif
4072 #if PARANOID_KERNEL_FREE
4073 	vm_block_address_range("freed heap memory",
4074 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4075 #endif
4076 
4077 	// create the object cache for the page mappings
4078 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4079 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4080 		NULL, NULL);
4081 	if (gPageMappingsObjectCache == NULL)
4082 		panic("failed to create page mappings object cache");
4083 
4084 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4085 
4086 #if DEBUG_CACHE_LIST
4087 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4088 		virtual_address_restrictions virtualRestrictions = {};
4089 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4090 		physical_address_restrictions physicalRestrictions = {};
4091 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4092 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4093 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4094 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4095 			&physicalRestrictions, (void**)&sCacheInfoTable);
4096 	}
4097 #endif	// DEBUG_CACHE_LIST
4098 
4099 	// add some debugger commands
4100 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4101 	add_debugger_command("area", &dump_area,
4102 		"Dump info about a particular area");
4103 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4104 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4105 #if DEBUG_CACHE_LIST
4106 	if (sCacheInfoTable != NULL) {
4107 		add_debugger_command_etc("caches", &dump_caches,
4108 			"List all VMCache trees",
4109 			"[ \"-c\" ]\n"
4110 			"All cache trees are listed sorted in decreasing order by number "
4111 				"of\n"
4112 			"used pages or, if \"-c\" is specified, by size of committed "
4113 				"memory.\n",
4114 			0);
4115 	}
4116 #endif
4117 	add_debugger_command("avail", &dump_available_memory,
4118 		"Dump available memory");
4119 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4120 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4121 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4122 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4123 	add_debugger_command("string", &display_mem, "dump strings");
4124 
4125 	add_debugger_command_etc("mapping", &dump_mapping_info,
4126 		"Print address mapping information",
4127 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4128 		"Prints low-level page mapping information for a given address. If\n"
4129 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4130 		"address that is looked up in the translation map of the current\n"
4131 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4132 		"\"-r\" is specified, <address> is a physical address that is\n"
4133 		"searched in the translation map of all teams, respectively the team\n"
4134 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4135 		"<address> is the address of a vm_page structure. The behavior is\n"
4136 		"equivalent to specifying \"-r\" with the physical address of that\n"
4137 		"page.\n",
4138 		0);
4139 
4140 	TRACE(("vm_init: exit\n"));
4141 
4142 	vm_cache_init_post_heap();
4143 
4144 	return err;
4145 }
4146 
4147 
4148 status_t
4149 vm_init_post_sem(kernel_args* args)
4150 {
4151 	// This frees all unused boot loader resources and makes its space available
4152 	// again
4153 	arch_vm_init_end(args);
4154 	unreserve_boot_loader_ranges(args);
4155 
4156 	// fill in all of the semaphores that were not allocated before
4157 	// since we're still single threaded and only the kernel address space
4158 	// exists, it isn't that hard to find all of the ones we need to create
4159 
4160 	arch_vm_translation_map_init_post_sem(args);
4161 
4162 	slab_init_post_sem();
4163 
4164 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4165 	heap_init_post_sem();
4166 #endif
4167 
4168 	return B_OK;
4169 }
4170 
4171 
4172 status_t
4173 vm_init_post_thread(kernel_args* args)
4174 {
4175 	vm_page_init_post_thread(args);
4176 	slab_init_post_thread();
4177 	return heap_init_post_thread();
4178 }
4179 
4180 
4181 status_t
4182 vm_init_post_modules(kernel_args* args)
4183 {
4184 	return arch_vm_init_post_modules(args);
4185 }
4186 
4187 
4188 void
4189 permit_page_faults(void)
4190 {
4191 	Thread* thread = thread_get_current_thread();
4192 	if (thread != NULL)
4193 		atomic_add(&thread->page_faults_allowed, 1);
4194 }
4195 
4196 
4197 void
4198 forbid_page_faults(void)
4199 {
4200 	Thread* thread = thread_get_current_thread();
4201 	if (thread != NULL)
4202 		atomic_add(&thread->page_faults_allowed, -1);
4203 }
4204 
4205 
4206 status_t
4207 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4208 	bool isUser, addr_t* newIP)
4209 {
4210 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4211 		faultAddress));
4212 
4213 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4214 
4215 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4216 	VMAddressSpace* addressSpace = NULL;
4217 
4218 	status_t status = B_OK;
4219 	*newIP = 0;
4220 	atomic_add((int32*)&sPageFaults, 1);
4221 
4222 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4223 		addressSpace = VMAddressSpace::GetKernel();
4224 	} else if (IS_USER_ADDRESS(pageAddress)) {
4225 		addressSpace = VMAddressSpace::GetCurrent();
4226 		if (addressSpace == NULL) {
4227 			if (!isUser) {
4228 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4229 					"memory!\n");
4230 				status = B_BAD_ADDRESS;
4231 				TPF(PageFaultError(-1,
4232 					VMPageFaultTracing
4233 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4234 			} else {
4235 				// XXX weird state.
4236 				panic("vm_page_fault: non kernel thread accessing user memory "
4237 					"that doesn't exist!\n");
4238 				status = B_BAD_ADDRESS;
4239 			}
4240 		}
4241 	} else {
4242 		// the hit was probably in the 64k DMZ between kernel and user space
4243 		// this keeps a user space thread from passing a buffer that crosses
4244 		// into kernel space
4245 		status = B_BAD_ADDRESS;
4246 		TPF(PageFaultError(-1,
4247 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4248 	}
4249 
4250 	if (status == B_OK) {
4251 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4252 			isUser, NULL);
4253 	}
4254 
4255 	if (status < B_OK) {
4256 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4257 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4258 			strerror(status), address, faultAddress, isWrite, isUser,
4259 			thread_get_current_thread_id());
4260 		if (!isUser) {
4261 			Thread* thread = thread_get_current_thread();
4262 			if (thread != NULL && thread->fault_handler != 0) {
4263 				// this will cause the arch dependant page fault handler to
4264 				// modify the IP on the interrupt frame or whatever to return
4265 				// to this address
4266 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4267 			} else {
4268 				// unhandled page fault in the kernel
4269 				panic("vm_page_fault: unhandled page fault in kernel space at "
4270 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4271 			}
4272 		} else {
4273 #if 1
4274 			// TODO: remove me once we have proper userland debugging support
4275 			// (and tools)
4276 			VMArea* area = NULL;
4277 			if (addressSpace != NULL) {
4278 				addressSpace->ReadLock();
4279 				area = addressSpace->LookupArea(faultAddress);
4280 			}
4281 
4282 			Thread* thread = thread_get_current_thread();
4283 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4284 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4285 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4286 				thread->team->Name(), thread->team->id,
4287 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4288 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4289 					area->Base() : 0x0));
4290 
4291 			// We can print a stack trace of the userland thread here.
4292 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4293 // fault and someone is already waiting for a write lock on the same address
4294 // space. This thread will then try to acquire the lock again and will
4295 // be queued after the writer.
4296 #	if 0
4297 			if (area) {
4298 				struct stack_frame {
4299 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4300 						struct stack_frame*	previous;
4301 						void*				return_address;
4302 					#else
4303 						// ...
4304 					#warning writeme
4305 					#endif
4306 				} frame;
4307 #		ifdef __INTEL__
4308 				struct iframe* iframe = x86_get_user_iframe();
4309 				if (iframe == NULL)
4310 					panic("iframe is NULL!");
4311 
4312 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4313 					sizeof(struct stack_frame));
4314 #		elif defined(__POWERPC__)
4315 				struct iframe* iframe = ppc_get_user_iframe();
4316 				if (iframe == NULL)
4317 					panic("iframe is NULL!");
4318 
4319 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4320 					sizeof(struct stack_frame));
4321 #		else
4322 #			warning "vm_page_fault() stack trace won't work"
4323 				status = B_ERROR;
4324 #		endif
4325 
4326 				dprintf("stack trace:\n");
4327 				int32 maxFrames = 50;
4328 				while (status == B_OK && --maxFrames >= 0
4329 						&& frame.return_address != NULL) {
4330 					dprintf("  %p", frame.return_address);
4331 					area = addressSpace->LookupArea(
4332 						(addr_t)frame.return_address);
4333 					if (area) {
4334 						dprintf(" (%s + %#lx)", area->name,
4335 							(addr_t)frame.return_address - area->Base());
4336 					}
4337 					dprintf("\n");
4338 
4339 					status = user_memcpy(&frame, frame.previous,
4340 						sizeof(struct stack_frame));
4341 				}
4342 			}
4343 #	endif	// 0 (stack trace)
4344 
4345 			if (addressSpace != NULL)
4346 				addressSpace->ReadUnlock();
4347 #endif
4348 
4349 			// If the thread has a signal handler for SIGSEGV, we simply
4350 			// send it the signal. Otherwise we notify the user debugger
4351 			// first.
4352 			struct sigaction action;
4353 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4354 					&& action.sa_handler != SIG_DFL
4355 					&& action.sa_handler != SIG_IGN)
4356 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4357 					SIGSEGV)) {
4358 				Signal signal(SIGSEGV,
4359 					status == B_PERMISSION_DENIED
4360 						? SEGV_ACCERR : SEGV_MAPERR,
4361 					EFAULT, thread->team->id);
4362 				signal.SetAddress((void*)address);
4363 				send_signal_to_thread(thread, signal, 0);
4364 			}
4365 		}
4366 	}
4367 
4368 	if (addressSpace != NULL)
4369 		addressSpace->Put();
4370 
4371 	return B_HANDLED_INTERRUPT;
4372 }
4373 
4374 
4375 struct PageFaultContext {
4376 	AddressSpaceReadLocker	addressSpaceLocker;
4377 	VMCacheChainLocker		cacheChainLocker;
4378 
4379 	VMTranslationMap*		map;
4380 	VMCache*				topCache;
4381 	off_t					cacheOffset;
4382 	vm_page_reservation		reservation;
4383 	bool					isWrite;
4384 
4385 	// return values
4386 	vm_page*				page;
4387 	bool					restart;
4388 	bool					pageAllocated;
4389 
4390 
4391 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4392 		:
4393 		addressSpaceLocker(addressSpace, true),
4394 		map(addressSpace->TranslationMap()),
4395 		isWrite(isWrite)
4396 	{
4397 	}
4398 
4399 	~PageFaultContext()
4400 	{
4401 		UnlockAll();
4402 		vm_page_unreserve_pages(&reservation);
4403 	}
4404 
4405 	void Prepare(VMCache* topCache, off_t cacheOffset)
4406 	{
4407 		this->topCache = topCache;
4408 		this->cacheOffset = cacheOffset;
4409 		page = NULL;
4410 		restart = false;
4411 		pageAllocated = false;
4412 
4413 		cacheChainLocker.SetTo(topCache);
4414 	}
4415 
4416 	void UnlockAll(VMCache* exceptCache = NULL)
4417 	{
4418 		topCache = NULL;
4419 		addressSpaceLocker.Unlock();
4420 		cacheChainLocker.Unlock(exceptCache);
4421 	}
4422 };
4423 
4424 
4425 /*!	Gets the page that should be mapped into the area.
4426 	Returns an error code other than \c B_OK, if the page couldn't be found or
4427 	paged in. The locking state of the address space and the caches is undefined
4428 	in that case.
4429 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4430 	had to unlock the address space and all caches and is supposed to be called
4431 	again.
4432 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4433 	found. It is returned in \c context.page. The address space will still be
4434 	locked as well as all caches starting from the top cache to at least the
4435 	cache the page lives in.
4436 */
4437 static status_t
4438 fault_get_page(PageFaultContext& context)
4439 {
4440 	VMCache* cache = context.topCache;
4441 	VMCache* lastCache = NULL;
4442 	vm_page* page = NULL;
4443 
4444 	while (cache != NULL) {
4445 		// We already hold the lock of the cache at this point.
4446 
4447 		lastCache = cache;
4448 
4449 		page = cache->LookupPage(context.cacheOffset);
4450 		if (page != NULL && page->busy) {
4451 			// page must be busy -- wait for it to become unbusy
4452 			context.UnlockAll(cache);
4453 			cache->ReleaseRefLocked();
4454 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4455 
4456 			// restart the whole process
4457 			context.restart = true;
4458 			return B_OK;
4459 		}
4460 
4461 		if (page != NULL)
4462 			break;
4463 
4464 		// The current cache does not contain the page we're looking for.
4465 
4466 		// see if the backing store has it
4467 		if (cache->HasPage(context.cacheOffset)) {
4468 			// insert a fresh page and mark it busy -- we're going to read it in
4469 			page = vm_page_allocate_page(&context.reservation,
4470 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4471 			cache->InsertPage(page, context.cacheOffset);
4472 
4473 			// We need to unlock all caches and the address space while reading
4474 			// the page in. Keep a reference to the cache around.
4475 			cache->AcquireRefLocked();
4476 			context.UnlockAll();
4477 
4478 			// read the page in
4479 			generic_io_vec vec;
4480 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4481 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4482 
4483 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4484 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4485 
4486 			cache->Lock();
4487 
4488 			if (status < B_OK) {
4489 				// on error remove and free the page
4490 				dprintf("reading page from cache %p returned: %s!\n",
4491 					cache, strerror(status));
4492 
4493 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4494 				cache->RemovePage(page);
4495 				vm_page_set_state(page, PAGE_STATE_FREE);
4496 
4497 				cache->ReleaseRefAndUnlock();
4498 				return status;
4499 			}
4500 
4501 			// mark the page unbusy again
4502 			cache->MarkPageUnbusy(page);
4503 
4504 			DEBUG_PAGE_ACCESS_END(page);
4505 
4506 			// Since we needed to unlock everything temporarily, the area
4507 			// situation might have changed. So we need to restart the whole
4508 			// process.
4509 			cache->ReleaseRefAndUnlock();
4510 			context.restart = true;
4511 			return B_OK;
4512 		}
4513 
4514 		cache = context.cacheChainLocker.LockSourceCache();
4515 	}
4516 
4517 	if (page == NULL) {
4518 		// There was no adequate page, determine the cache for a clean one.
4519 		// Read-only pages come in the deepest cache, only the top most cache
4520 		// may have direct write access.
4521 		cache = context.isWrite ? context.topCache : lastCache;
4522 
4523 		// allocate a clean page
4524 		page = vm_page_allocate_page(&context.reservation,
4525 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4526 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4527 			page->physical_page_number));
4528 
4529 		// insert the new page into our cache
4530 		cache->InsertPage(page, context.cacheOffset);
4531 		context.pageAllocated = true;
4532 	} else if (page->Cache() != context.topCache && context.isWrite) {
4533 		// We have a page that has the data we want, but in the wrong cache
4534 		// object so we need to copy it and stick it into the top cache.
4535 		vm_page* sourcePage = page;
4536 
4537 		// TODO: If memory is low, it might be a good idea to steal the page
4538 		// from our source cache -- if possible, that is.
4539 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4540 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4541 
4542 		// To not needlessly kill concurrency we unlock all caches but the top
4543 		// one while copying the page. Lacking another mechanism to ensure that
4544 		// the source page doesn't disappear, we mark it busy.
4545 		sourcePage->busy = true;
4546 		context.cacheChainLocker.UnlockKeepRefs(true);
4547 
4548 		// copy the page
4549 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4550 			sourcePage->physical_page_number * B_PAGE_SIZE);
4551 
4552 		context.cacheChainLocker.RelockCaches(true);
4553 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4554 
4555 		// insert the new page into our cache
4556 		context.topCache->InsertPage(page, context.cacheOffset);
4557 		context.pageAllocated = true;
4558 	} else
4559 		DEBUG_PAGE_ACCESS_START(page);
4560 
4561 	context.page = page;
4562 	return B_OK;
4563 }
4564 
4565 
4566 /*!	Makes sure the address in the given address space is mapped.
4567 
4568 	\param addressSpace The address space.
4569 	\param originalAddress The address. Doesn't need to be page aligned.
4570 	\param isWrite If \c true the address shall be write-accessible.
4571 	\param isUser If \c true the access is requested by a userland team.
4572 	\param wirePage On success, if non \c NULL, the wired count of the page
4573 		mapped at the given address is incremented and the page is returned
4574 		via this parameter.
4575 	\return \c B_OK on success, another error code otherwise.
4576 */
4577 static status_t
4578 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4579 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4580 {
4581 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4582 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4583 		originalAddress, isWrite, isUser));
4584 
4585 	PageFaultContext context(addressSpace, isWrite);
4586 
4587 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4588 	status_t status = B_OK;
4589 
4590 	addressSpace->IncrementFaultCount();
4591 
4592 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4593 	// the pages upfront makes sure we don't have any cache locked, so that the
4594 	// page daemon/thief can do their job without problems.
4595 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4596 		originalAddress);
4597 	context.addressSpaceLocker.Unlock();
4598 	vm_page_reserve_pages(&context.reservation, reservePages,
4599 		addressSpace == VMAddressSpace::Kernel()
4600 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4601 
4602 	while (true) {
4603 		context.addressSpaceLocker.Lock();
4604 
4605 		// get the area the fault was in
4606 		VMArea* area = addressSpace->LookupArea(address);
4607 		if (area == NULL) {
4608 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4609 				"space\n", originalAddress);
4610 			TPF(PageFaultError(-1,
4611 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4612 			status = B_BAD_ADDRESS;
4613 			break;
4614 		}
4615 
4616 		// check permissions
4617 		uint32 protection = get_area_page_protection(area, address);
4618 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4619 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4620 				area->id, (void*)originalAddress);
4621 			TPF(PageFaultError(area->id,
4622 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4623 			status = B_PERMISSION_DENIED;
4624 			break;
4625 		}
4626 		if (isWrite && (protection
4627 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4628 			dprintf("write access attempted on write-protected area 0x%"
4629 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4630 			TPF(PageFaultError(area->id,
4631 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4632 			status = B_PERMISSION_DENIED;
4633 			break;
4634 		} else if (isExecute && (protection
4635 				& (B_EXECUTE_AREA
4636 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4637 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4638 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4639 			TPF(PageFaultError(area->id,
4640 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4641 			status = B_PERMISSION_DENIED;
4642 			break;
4643 		} else if (!isWrite && !isExecute && (protection
4644 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4645 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4646 				" at %p\n", area->id, (void*)originalAddress);
4647 			TPF(PageFaultError(area->id,
4648 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4649 			status = B_PERMISSION_DENIED;
4650 			break;
4651 		}
4652 
4653 		// We have the area, it was a valid access, so let's try to resolve the
4654 		// page fault now.
4655 		// At first, the top most cache from the area is investigated.
4656 
4657 		context.Prepare(vm_area_get_locked_cache(area),
4658 			address - area->Base() + area->cache_offset);
4659 
4660 		// See if this cache has a fault handler -- this will do all the work
4661 		// for us.
4662 		{
4663 			// Note, since the page fault is resolved with interrupts enabled,
4664 			// the fault handler could be called more than once for the same
4665 			// reason -- the store must take this into account.
4666 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4667 			if (status != B_BAD_HANDLER)
4668 				break;
4669 		}
4670 
4671 		// The top most cache has no fault handler, so let's see if the cache or
4672 		// its sources already have the page we're searching for (we're going
4673 		// from top to bottom).
4674 		status = fault_get_page(context);
4675 		if (status != B_OK) {
4676 			TPF(PageFaultError(area->id, status));
4677 			break;
4678 		}
4679 
4680 		if (context.restart)
4681 			continue;
4682 
4683 		// All went fine, all there is left to do is to map the page into the
4684 		// address space.
4685 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4686 			context.page));
4687 
4688 		// If the page doesn't reside in the area's cache, we need to make sure
4689 		// it's mapped in read-only, so that we cannot overwrite someone else's
4690 		// data (copy-on-write)
4691 		uint32 newProtection = protection;
4692 		if (context.page->Cache() != context.topCache && !isWrite)
4693 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4694 
4695 		bool unmapPage = false;
4696 		bool mapPage = true;
4697 
4698 		// check whether there's already a page mapped at the address
4699 		context.map->Lock();
4700 
4701 		phys_addr_t physicalAddress;
4702 		uint32 flags;
4703 		vm_page* mappedPage = NULL;
4704 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4705 			&& (flags & PAGE_PRESENT) != 0
4706 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4707 				!= NULL) {
4708 			// Yep there's already a page. If it's ours, we can simply adjust
4709 			// its protection. Otherwise we have to unmap it.
4710 			if (mappedPage == context.page) {
4711 				context.map->ProtectPage(area, address, newProtection);
4712 					// Note: We assume that ProtectPage() is atomic (i.e.
4713 					// the page isn't temporarily unmapped), otherwise we'd have
4714 					// to make sure it isn't wired.
4715 				mapPage = false;
4716 			} else
4717 				unmapPage = true;
4718 		}
4719 
4720 		context.map->Unlock();
4721 
4722 		if (unmapPage) {
4723 			// If the page is wired, we can't unmap it. Wait until it is unwired
4724 			// again and restart. Note that the page cannot be wired for
4725 			// writing, since it it isn't in the topmost cache. So we can safely
4726 			// ignore ranges wired for writing (our own and other concurrent
4727 			// wiring attempts in progress) and in fact have to do that to avoid
4728 			// a deadlock.
4729 			VMAreaUnwiredWaiter waiter;
4730 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4731 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4732 				// unlock everything and wait
4733 				if (context.pageAllocated) {
4734 					// ... but since we allocated a page and inserted it into
4735 					// the top cache, remove and free it first. Otherwise we'd
4736 					// have a page from a lower cache mapped while an upper
4737 					// cache has a page that would shadow it.
4738 					context.topCache->RemovePage(context.page);
4739 					vm_page_free_etc(context.topCache, context.page,
4740 						&context.reservation);
4741 				} else
4742 					DEBUG_PAGE_ACCESS_END(context.page);
4743 
4744 				context.UnlockAll();
4745 				waiter.waitEntry.Wait();
4746 				continue;
4747 			}
4748 
4749 			// Note: The mapped page is a page of a lower cache. We are
4750 			// guaranteed to have that cached locked, our new page is a copy of
4751 			// that page, and the page is not busy. The logic for that guarantee
4752 			// is as follows: Since the page is mapped, it must live in the top
4753 			// cache (ruled out above) or any of its lower caches, and there is
4754 			// (was before the new page was inserted) no other page in any
4755 			// cache between the top cache and the page's cache (otherwise that
4756 			// would be mapped instead). That in turn means that our algorithm
4757 			// must have found it and therefore it cannot be busy either.
4758 			DEBUG_PAGE_ACCESS_START(mappedPage);
4759 			unmap_page(area, address);
4760 			DEBUG_PAGE_ACCESS_END(mappedPage);
4761 		}
4762 
4763 		if (mapPage) {
4764 			if (map_page(area, context.page, address, newProtection,
4765 					&context.reservation) != B_OK) {
4766 				// Mapping can only fail, when the page mapping object couldn't
4767 				// be allocated. Save for the missing mapping everything is
4768 				// fine, though. If this was a regular page fault, we'll simply
4769 				// leave and probably fault again. To make sure we'll have more
4770 				// luck then, we ensure that the minimum object reserve is
4771 				// available.
4772 				DEBUG_PAGE_ACCESS_END(context.page);
4773 
4774 				context.UnlockAll();
4775 
4776 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4777 						!= B_OK) {
4778 					// Apparently the situation is serious. Let's get ourselves
4779 					// killed.
4780 					status = B_NO_MEMORY;
4781 				} else if (wirePage != NULL) {
4782 					// The caller expects us to wire the page. Since
4783 					// object_cache_reserve() succeeded, we should now be able
4784 					// to allocate a mapping structure. Restart.
4785 					continue;
4786 				}
4787 
4788 				break;
4789 			}
4790 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4791 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4792 
4793 		// also wire the page, if requested
4794 		if (wirePage != NULL && status == B_OK) {
4795 			increment_page_wired_count(context.page);
4796 			*wirePage = context.page;
4797 		}
4798 
4799 		DEBUG_PAGE_ACCESS_END(context.page);
4800 
4801 		break;
4802 	}
4803 
4804 	return status;
4805 }
4806 
4807 
4808 status_t
4809 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4810 {
4811 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4812 }
4813 
4814 status_t
4815 vm_put_physical_page(addr_t vaddr, void* handle)
4816 {
4817 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4818 }
4819 
4820 
4821 status_t
4822 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4823 	void** _handle)
4824 {
4825 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4826 }
4827 
4828 status_t
4829 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4830 {
4831 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4832 }
4833 
4834 
4835 status_t
4836 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4837 {
4838 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4839 }
4840 
4841 status_t
4842 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4843 {
4844 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4845 }
4846 
4847 
4848 void
4849 vm_get_info(system_info* info)
4850 {
4851 	swap_get_info(info);
4852 
4853 	MutexLocker locker(sAvailableMemoryLock);
4854 	info->needed_memory = sNeededMemory;
4855 	info->free_memory = sAvailableMemory;
4856 }
4857 
4858 
4859 uint32
4860 vm_num_page_faults(void)
4861 {
4862 	return sPageFaults;
4863 }
4864 
4865 
4866 off_t
4867 vm_available_memory(void)
4868 {
4869 	MutexLocker locker(sAvailableMemoryLock);
4870 	return sAvailableMemory;
4871 }
4872 
4873 
4874 off_t
4875 vm_available_not_needed_memory(void)
4876 {
4877 	MutexLocker locker(sAvailableMemoryLock);
4878 	return sAvailableMemory - sNeededMemory;
4879 }
4880 
4881 
4882 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4883 	debugger.
4884 */
4885 off_t
4886 vm_available_not_needed_memory_debug(void)
4887 {
4888 	return sAvailableMemory - sNeededMemory;
4889 }
4890 
4891 
4892 size_t
4893 vm_kernel_address_space_left(void)
4894 {
4895 	return VMAddressSpace::Kernel()->FreeSpace();
4896 }
4897 
4898 
4899 void
4900 vm_unreserve_memory(size_t amount)
4901 {
4902 	mutex_lock(&sAvailableMemoryLock);
4903 
4904 	sAvailableMemory += amount;
4905 
4906 	mutex_unlock(&sAvailableMemoryLock);
4907 }
4908 
4909 
4910 status_t
4911 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4912 {
4913 	size_t reserve = kMemoryReserveForPriority[priority];
4914 
4915 	MutexLocker locker(sAvailableMemoryLock);
4916 
4917 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4918 
4919 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4920 		sAvailableMemory -= amount;
4921 		return B_OK;
4922 	}
4923 
4924 	if (timeout <= 0)
4925 		return B_NO_MEMORY;
4926 
4927 	// turn timeout into an absolute timeout
4928 	timeout += system_time();
4929 
4930 	// loop until we've got the memory or the timeout occurs
4931 	do {
4932 		sNeededMemory += amount;
4933 
4934 		// call the low resource manager
4935 		locker.Unlock();
4936 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4937 			B_ABSOLUTE_TIMEOUT, timeout);
4938 		locker.Lock();
4939 
4940 		sNeededMemory -= amount;
4941 
4942 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4943 			sAvailableMemory -= amount;
4944 			return B_OK;
4945 		}
4946 	} while (timeout > system_time());
4947 
4948 	return B_NO_MEMORY;
4949 }
4950 
4951 
4952 status_t
4953 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4954 {
4955 	// NOTE: The caller is responsible for synchronizing calls to this function!
4956 
4957 	AddressSpaceReadLocker locker;
4958 	VMArea* area;
4959 	status_t status = locker.SetFromArea(id, area);
4960 	if (status != B_OK)
4961 		return status;
4962 
4963 	// nothing to do, if the type doesn't change
4964 	uint32 oldType = area->MemoryType();
4965 	if (type == oldType)
4966 		return B_OK;
4967 
4968 	// set the memory type of the area and the mapped pages
4969 	VMTranslationMap* map = area->address_space->TranslationMap();
4970 	map->Lock();
4971 	area->SetMemoryType(type);
4972 	map->ProtectArea(area, area->protection);
4973 	map->Unlock();
4974 
4975 	// set the physical memory type
4976 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4977 	if (error != B_OK) {
4978 		// reset the memory type of the area and the mapped pages
4979 		map->Lock();
4980 		area->SetMemoryType(oldType);
4981 		map->ProtectArea(area, area->protection);
4982 		map->Unlock();
4983 		return error;
4984 	}
4985 
4986 	return B_OK;
4987 
4988 }
4989 
4990 
4991 /*!	This function enforces some protection properties:
4992 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4993 	 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well
4994 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4995 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4996 	   and B_KERNEL_WRITE_AREA.
4997 */
4998 static void
4999 fix_protection(uint32* protection)
5000 {
5001 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5002 		if ((*protection & B_USER_PROTECTION) == 0
5003 			|| (*protection & B_WRITE_AREA) != 0)
5004 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5005 		else
5006 			*protection |= B_KERNEL_READ_AREA;
5007 		if ((*protection & B_EXECUTE_AREA) != 0)
5008 			*protection |= B_KERNEL_EXECUTE_AREA;
5009 	}
5010 }
5011 
5012 
5013 static void
5014 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5015 {
5016 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5017 	info->area = area->id;
5018 	info->address = (void*)area->Base();
5019 	info->size = area->Size();
5020 	info->protection = area->protection;
5021 	info->lock = B_FULL_LOCK;
5022 	info->team = area->address_space->ID();
5023 	info->copy_count = 0;
5024 	info->in_count = 0;
5025 	info->out_count = 0;
5026 		// TODO: retrieve real values here!
5027 
5028 	VMCache* cache = vm_area_get_locked_cache(area);
5029 
5030 	// Note, this is a simplification; the cache could be larger than this area
5031 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5032 
5033 	vm_area_put_locked_cache(cache);
5034 }
5035 
5036 
5037 static status_t
5038 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5039 {
5040 	// is newSize a multiple of B_PAGE_SIZE?
5041 	if (newSize & (B_PAGE_SIZE - 1))
5042 		return B_BAD_VALUE;
5043 
5044 	// lock all affected address spaces and the cache
5045 	VMArea* area;
5046 	VMCache* cache;
5047 
5048 	MultiAddressSpaceLocker locker;
5049 	AreaCacheLocker cacheLocker;
5050 
5051 	status_t status;
5052 	size_t oldSize;
5053 	bool anyKernelArea;
5054 	bool restart;
5055 
5056 	do {
5057 		anyKernelArea = false;
5058 		restart = false;
5059 
5060 		locker.Unset();
5061 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5062 		if (status != B_OK)
5063 			return status;
5064 		cacheLocker.SetTo(cache, true);	// already locked
5065 
5066 		// enforce restrictions
5067 		if (!kernel) {
5068 			if ((area->protection & B_KERNEL_AREA) != 0)
5069 				return B_NOT_ALLOWED;
5070 			// TODO: Enforce all restrictions (team, etc.)!
5071 		}
5072 
5073 		oldSize = area->Size();
5074 		if (newSize == oldSize)
5075 			return B_OK;
5076 
5077 		if (cache->type != CACHE_TYPE_RAM)
5078 			return B_NOT_ALLOWED;
5079 
5080 		if (oldSize < newSize) {
5081 			// We need to check if all areas of this cache can be resized.
5082 			for (VMArea* current = cache->areas; current != NULL;
5083 					current = current->cache_next) {
5084 				if (!current->address_space->CanResizeArea(current, newSize))
5085 					return B_ERROR;
5086 				anyKernelArea
5087 					|= current->address_space == VMAddressSpace::Kernel();
5088 			}
5089 		} else {
5090 			// We're shrinking the areas, so we must make sure the affected
5091 			// ranges are not wired.
5092 			for (VMArea* current = cache->areas; current != NULL;
5093 					current = current->cache_next) {
5094 				anyKernelArea
5095 					|= current->address_space == VMAddressSpace::Kernel();
5096 
5097 				if (wait_if_area_range_is_wired(current,
5098 						current->Base() + newSize, oldSize - newSize, &locker,
5099 						&cacheLocker)) {
5100 					restart = true;
5101 					break;
5102 				}
5103 			}
5104 		}
5105 	} while (restart);
5106 
5107 	// Okay, looks good so far, so let's do it
5108 
5109 	int priority = kernel && anyKernelArea
5110 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5111 	uint32 allocationFlags = kernel && anyKernelArea
5112 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5113 
5114 	if (oldSize < newSize) {
5115 		// Growing the cache can fail, so we do it first.
5116 		status = cache->Resize(cache->virtual_base + newSize, priority);
5117 		if (status != B_OK)
5118 			return status;
5119 	}
5120 
5121 	for (VMArea* current = cache->areas; current != NULL;
5122 			current = current->cache_next) {
5123 		status = current->address_space->ResizeArea(current, newSize,
5124 			allocationFlags);
5125 		if (status != B_OK)
5126 			break;
5127 
5128 		// We also need to unmap all pages beyond the new size, if the area has
5129 		// shrunk
5130 		if (newSize < oldSize) {
5131 			VMCacheChainLocker cacheChainLocker(cache);
5132 			cacheChainLocker.LockAllSourceCaches();
5133 
5134 			unmap_pages(current, current->Base() + newSize,
5135 				oldSize - newSize);
5136 
5137 			cacheChainLocker.Unlock(cache);
5138 		}
5139 	}
5140 
5141 	if (status == B_OK) {
5142 		// Shrink or grow individual page protections if in use.
5143 		if (area->page_protections != NULL) {
5144 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5145 			uint8* newProtections
5146 				= (uint8*)realloc(area->page_protections, bytes);
5147 			if (newProtections == NULL)
5148 				status = B_NO_MEMORY;
5149 			else {
5150 				area->page_protections = newProtections;
5151 
5152 				if (oldSize < newSize) {
5153 					// init the additional page protections to that of the area
5154 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5155 					uint32 areaProtection = area->protection
5156 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5157 					memset(area->page_protections + offset,
5158 						areaProtection | (areaProtection << 4), bytes - offset);
5159 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5160 						uint8& entry = area->page_protections[offset - 1];
5161 						entry = (entry & 0x0f) | (areaProtection << 4);
5162 					}
5163 				}
5164 			}
5165 		}
5166 	}
5167 
5168 	// shrinking the cache can't fail, so we do it now
5169 	if (status == B_OK && newSize < oldSize)
5170 		status = cache->Resize(cache->virtual_base + newSize, priority);
5171 
5172 	if (status != B_OK) {
5173 		// Something failed -- resize the areas back to their original size.
5174 		// This can fail, too, in which case we're seriously screwed.
5175 		for (VMArea* current = cache->areas; current != NULL;
5176 				current = current->cache_next) {
5177 			if (current->address_space->ResizeArea(current, oldSize,
5178 					allocationFlags) != B_OK) {
5179 				panic("vm_resize_area(): Failed and not being able to restore "
5180 					"original state.");
5181 			}
5182 		}
5183 
5184 		cache->Resize(cache->virtual_base + oldSize, priority);
5185 	}
5186 
5187 	// TODO: we must honour the lock restrictions of this area
5188 	return status;
5189 }
5190 
5191 
5192 status_t
5193 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5194 {
5195 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5196 }
5197 
5198 
5199 status_t
5200 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5201 {
5202 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5203 }
5204 
5205 
5206 status_t
5207 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5208 	bool user)
5209 {
5210 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5211 }
5212 
5213 
5214 void
5215 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5216 {
5217 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5218 }
5219 
5220 
5221 /*!	Copies a range of memory directly from/to a page that might not be mapped
5222 	at the moment.
5223 
5224 	For \a unsafeMemory the current mapping (if any is ignored). The function
5225 	walks through the respective area's cache chain to find the physical page
5226 	and copies from/to it directly.
5227 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5228 	must not cross a page boundary.
5229 
5230 	\param teamID The team ID identifying the address space \a unsafeMemory is
5231 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5232 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5233 		is passed, the address space of the thread returned by
5234 		debug_get_debugged_thread() is used.
5235 	\param unsafeMemory The start of the unsafe memory range to be copied
5236 		from/to.
5237 	\param buffer A safely accessible kernel buffer to be copied from/to.
5238 	\param size The number of bytes to be copied.
5239 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5240 		\a unsafeMemory, the other way around otherwise.
5241 */
5242 status_t
5243 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5244 	size_t size, bool copyToUnsafe)
5245 {
5246 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5247 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5248 		return B_BAD_VALUE;
5249 	}
5250 
5251 	// get the address space for the debugged thread
5252 	VMAddressSpace* addressSpace;
5253 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5254 		addressSpace = VMAddressSpace::Kernel();
5255 	} else if (teamID == B_CURRENT_TEAM) {
5256 		Thread* thread = debug_get_debugged_thread();
5257 		if (thread == NULL || thread->team == NULL)
5258 			return B_BAD_ADDRESS;
5259 
5260 		addressSpace = thread->team->address_space;
5261 	} else
5262 		addressSpace = VMAddressSpace::DebugGet(teamID);
5263 
5264 	if (addressSpace == NULL)
5265 		return B_BAD_ADDRESS;
5266 
5267 	// get the area
5268 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5269 	if (area == NULL)
5270 		return B_BAD_ADDRESS;
5271 
5272 	// search the page
5273 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5274 		+ area->cache_offset;
5275 	VMCache* cache = area->cache;
5276 	vm_page* page = NULL;
5277 	while (cache != NULL) {
5278 		page = cache->DebugLookupPage(cacheOffset);
5279 		if (page != NULL)
5280 			break;
5281 
5282 		// Page not found in this cache -- if it is paged out, we must not try
5283 		// to get it from lower caches.
5284 		if (cache->DebugHasPage(cacheOffset))
5285 			break;
5286 
5287 		cache = cache->source;
5288 	}
5289 
5290 	if (page == NULL)
5291 		return B_UNSUPPORTED;
5292 
5293 	// copy from/to physical memory
5294 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5295 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5296 
5297 	if (copyToUnsafe) {
5298 		if (page->Cache() != area->cache)
5299 			return B_UNSUPPORTED;
5300 
5301 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5302 	}
5303 
5304 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5305 }
5306 
5307 
5308 //	#pragma mark - kernel public API
5309 
5310 
5311 status_t
5312 user_memcpy(void* to, const void* from, size_t size)
5313 {
5314 	// don't allow address overflows
5315 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5316 		return B_BAD_ADDRESS;
5317 
5318 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5319 		return B_BAD_ADDRESS;
5320 
5321 	return B_OK;
5322 }
5323 
5324 
5325 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5326 	the string in \a to, NULL-terminating the result.
5327 
5328 	\param to Pointer to the destination C-string.
5329 	\param from Pointer to the source C-string.
5330 	\param size Size in bytes of the string buffer pointed to by \a to.
5331 
5332 	\return strlen(\a from).
5333 */
5334 ssize_t
5335 user_strlcpy(char* to, const char* from, size_t size)
5336 {
5337 	if (to == NULL && size != 0)
5338 		return B_BAD_VALUE;
5339 	if (from == NULL)
5340 		return B_BAD_ADDRESS;
5341 
5342 	// limit size to avoid address overflows
5343 	size_t maxSize = std::min((addr_t)size,
5344 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5345 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5346 		// the source address might still overflow.
5347 
5348 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5349 
5350 	// If we hit the address overflow boundary, fail.
5351 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5352 			&& maxSize < size)) {
5353 		return B_BAD_ADDRESS;
5354 	}
5355 
5356 	return result;
5357 }
5358 
5359 
5360 status_t
5361 user_memset(void* s, char c, size_t count)
5362 {
5363 	// don't allow address overflows
5364 	if ((addr_t)s + count < (addr_t)s)
5365 		return B_BAD_ADDRESS;
5366 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5367 		return B_BAD_ADDRESS;
5368 
5369 	return B_OK;
5370 }
5371 
5372 
5373 /*!	Wires a single page at the given address.
5374 
5375 	\param team The team whose address space the address belongs to. Supports
5376 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5377 		parameter is ignored.
5378 	\param address address The virtual address to wire down. Does not need to
5379 		be page aligned.
5380 	\param writable If \c true the page shall be writable.
5381 	\param info On success the info is filled in, among other things
5382 		containing the physical address the given virtual one translates to.
5383 	\return \c B_OK, when the page could be wired, another error code otherwise.
5384 */
5385 status_t
5386 vm_wire_page(team_id team, addr_t address, bool writable,
5387 	VMPageWiringInfo* info)
5388 {
5389 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5390 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5391 
5392 	// compute the page protection that is required
5393 	bool isUser = IS_USER_ADDRESS(address);
5394 	uint32 requiredProtection = PAGE_PRESENT
5395 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5396 	if (writable)
5397 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5398 
5399 	// get and read lock the address space
5400 	VMAddressSpace* addressSpace = NULL;
5401 	if (isUser) {
5402 		if (team == B_CURRENT_TEAM)
5403 			addressSpace = VMAddressSpace::GetCurrent();
5404 		else
5405 			addressSpace = VMAddressSpace::Get(team);
5406 	} else
5407 		addressSpace = VMAddressSpace::GetKernel();
5408 	if (addressSpace == NULL)
5409 		return B_ERROR;
5410 
5411 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5412 
5413 	VMTranslationMap* map = addressSpace->TranslationMap();
5414 	status_t error = B_OK;
5415 
5416 	// get the area
5417 	VMArea* area = addressSpace->LookupArea(pageAddress);
5418 	if (area == NULL) {
5419 		addressSpace->Put();
5420 		return B_BAD_ADDRESS;
5421 	}
5422 
5423 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5424 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5425 
5426 	// mark the area range wired
5427 	area->Wire(&info->range);
5428 
5429 	// Lock the area's cache chain and the translation map. Needed to look
5430 	// up the page and play with its wired count.
5431 	cacheChainLocker.LockAllSourceCaches();
5432 	map->Lock();
5433 
5434 	phys_addr_t physicalAddress;
5435 	uint32 flags;
5436 	vm_page* page;
5437 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5438 		&& (flags & requiredProtection) == requiredProtection
5439 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5440 			!= NULL) {
5441 		// Already mapped with the correct permissions -- just increment
5442 		// the page's wired count.
5443 		increment_page_wired_count(page);
5444 
5445 		map->Unlock();
5446 		cacheChainLocker.Unlock();
5447 		addressSpaceLocker.Unlock();
5448 	} else {
5449 		// Let vm_soft_fault() map the page for us, if possible. We need
5450 		// to fully unlock to avoid deadlocks. Since we have already
5451 		// wired the area itself, nothing disturbing will happen with it
5452 		// in the meantime.
5453 		map->Unlock();
5454 		cacheChainLocker.Unlock();
5455 		addressSpaceLocker.Unlock();
5456 
5457 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5458 			isUser, &page);
5459 
5460 		if (error != B_OK) {
5461 			// The page could not be mapped -- clean up.
5462 			VMCache* cache = vm_area_get_locked_cache(area);
5463 			area->Unwire(&info->range);
5464 			cache->ReleaseRefAndUnlock();
5465 			addressSpace->Put();
5466 			return error;
5467 		}
5468 	}
5469 
5470 	info->physicalAddress
5471 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5472 			+ address % B_PAGE_SIZE;
5473 	info->page = page;
5474 
5475 	return B_OK;
5476 }
5477 
5478 
5479 /*!	Unwires a single page previously wired via vm_wire_page().
5480 
5481 	\param info The same object passed to vm_wire_page() before.
5482 */
5483 void
5484 vm_unwire_page(VMPageWiringInfo* info)
5485 {
5486 	// lock the address space
5487 	VMArea* area = info->range.area;
5488 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5489 		// takes over our reference
5490 
5491 	// lock the top cache
5492 	VMCache* cache = vm_area_get_locked_cache(area);
5493 	VMCacheChainLocker cacheChainLocker(cache);
5494 
5495 	if (info->page->Cache() != cache) {
5496 		// The page is not in the top cache, so we lock the whole cache chain
5497 		// before touching the page's wired count.
5498 		cacheChainLocker.LockAllSourceCaches();
5499 	}
5500 
5501 	decrement_page_wired_count(info->page);
5502 
5503 	// remove the wired range from the range
5504 	area->Unwire(&info->range);
5505 
5506 	cacheChainLocker.Unlock();
5507 }
5508 
5509 
5510 /*!	Wires down the given address range in the specified team's address space.
5511 
5512 	If successful the function
5513 	- acquires a reference to the specified team's address space,
5514 	- adds respective wired ranges to all areas that intersect with the given
5515 	  address range,
5516 	- makes sure all pages in the given address range are mapped with the
5517 	  requested access permissions and increments their wired count.
5518 
5519 	It fails, when \a team doesn't specify a valid address space, when any part
5520 	of the specified address range is not covered by areas, when the concerned
5521 	areas don't allow mapping with the requested permissions, or when mapping
5522 	failed for another reason.
5523 
5524 	When successful the call must be balanced by a unlock_memory_etc() call with
5525 	the exact same parameters.
5526 
5527 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5528 		supported.
5529 	\param address The start of the address range to be wired.
5530 	\param numBytes The size of the address range to be wired.
5531 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5532 		requests that the range must be wired writable ("read from device
5533 		into memory").
5534 	\return \c B_OK on success, another error code otherwise.
5535 */
5536 status_t
5537 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5538 {
5539 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5540 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5541 
5542 	// compute the page protection that is required
5543 	bool isUser = IS_USER_ADDRESS(address);
5544 	bool writable = (flags & B_READ_DEVICE) == 0;
5545 	uint32 requiredProtection = PAGE_PRESENT
5546 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5547 	if (writable)
5548 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5549 
5550 	uint32 mallocFlags = isUser
5551 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5552 
5553 	// get and read lock the address space
5554 	VMAddressSpace* addressSpace = NULL;
5555 	if (isUser) {
5556 		if (team == B_CURRENT_TEAM)
5557 			addressSpace = VMAddressSpace::GetCurrent();
5558 		else
5559 			addressSpace = VMAddressSpace::Get(team);
5560 	} else
5561 		addressSpace = VMAddressSpace::GetKernel();
5562 	if (addressSpace == NULL)
5563 		return B_ERROR;
5564 
5565 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5566 		// We get a new address space reference here. The one we got above will
5567 		// be freed by unlock_memory_etc().
5568 
5569 	VMTranslationMap* map = addressSpace->TranslationMap();
5570 	status_t error = B_OK;
5571 
5572 	// iterate through all concerned areas
5573 	addr_t nextAddress = lockBaseAddress;
5574 	while (nextAddress != lockEndAddress) {
5575 		// get the next area
5576 		VMArea* area = addressSpace->LookupArea(nextAddress);
5577 		if (area == NULL) {
5578 			error = B_BAD_ADDRESS;
5579 			break;
5580 		}
5581 
5582 		addr_t areaStart = nextAddress;
5583 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5584 
5585 		// allocate the wired range (do that before locking the cache to avoid
5586 		// deadlocks)
5587 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5588 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5589 		if (range == NULL) {
5590 			error = B_NO_MEMORY;
5591 			break;
5592 		}
5593 
5594 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5595 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5596 
5597 		// mark the area range wired
5598 		area->Wire(range);
5599 
5600 		// Depending on the area cache type and the wiring, we may not need to
5601 		// look at the individual pages.
5602 		if (area->cache_type == CACHE_TYPE_NULL
5603 			|| area->cache_type == CACHE_TYPE_DEVICE
5604 			|| area->wiring == B_FULL_LOCK
5605 			|| area->wiring == B_CONTIGUOUS) {
5606 			nextAddress = areaEnd;
5607 			continue;
5608 		}
5609 
5610 		// Lock the area's cache chain and the translation map. Needed to look
5611 		// up pages and play with their wired count.
5612 		cacheChainLocker.LockAllSourceCaches();
5613 		map->Lock();
5614 
5615 		// iterate through the pages and wire them
5616 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5617 			phys_addr_t physicalAddress;
5618 			uint32 flags;
5619 
5620 			vm_page* page;
5621 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5622 				&& (flags & requiredProtection) == requiredProtection
5623 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5624 					!= NULL) {
5625 				// Already mapped with the correct permissions -- just increment
5626 				// the page's wired count.
5627 				increment_page_wired_count(page);
5628 			} else {
5629 				// Let vm_soft_fault() map the page for us, if possible. We need
5630 				// to fully unlock to avoid deadlocks. Since we have already
5631 				// wired the area itself, nothing disturbing will happen with it
5632 				// in the meantime.
5633 				map->Unlock();
5634 				cacheChainLocker.Unlock();
5635 				addressSpaceLocker.Unlock();
5636 
5637 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5638 					false, isUser, &page);
5639 
5640 				addressSpaceLocker.Lock();
5641 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5642 				cacheChainLocker.LockAllSourceCaches();
5643 				map->Lock();
5644 			}
5645 
5646 			if (error != B_OK)
5647 				break;
5648 		}
5649 
5650 		map->Unlock();
5651 
5652 		if (error == B_OK) {
5653 			cacheChainLocker.Unlock();
5654 		} else {
5655 			// An error occurred, so abort right here. If the current address
5656 			// is the first in this area, unwire the area, since we won't get
5657 			// to it when reverting what we've done so far.
5658 			if (nextAddress == areaStart) {
5659 				area->Unwire(range);
5660 				cacheChainLocker.Unlock();
5661 				range->~VMAreaWiredRange();
5662 				free_etc(range, mallocFlags);
5663 			} else
5664 				cacheChainLocker.Unlock();
5665 
5666 			break;
5667 		}
5668 	}
5669 
5670 	if (error != B_OK) {
5671 		// An error occurred, so unwire all that we've already wired. Note that
5672 		// even if not a single page was wired, unlock_memory_etc() is called
5673 		// to put the address space reference.
5674 		addressSpaceLocker.Unlock();
5675 		unlock_memory_etc(team, (void*)lockBaseAddress,
5676 			nextAddress - lockBaseAddress, flags);
5677 	}
5678 
5679 	return error;
5680 }
5681 
5682 
5683 status_t
5684 lock_memory(void* address, size_t numBytes, uint32 flags)
5685 {
5686 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5687 }
5688 
5689 
5690 /*!	Unwires an address range previously wired with lock_memory_etc().
5691 
5692 	Note that a call to this function must balance a previous lock_memory_etc()
5693 	call with exactly the same parameters.
5694 */
5695 status_t
5696 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5697 {
5698 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5699 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5700 
5701 	// compute the page protection that is required
5702 	bool isUser = IS_USER_ADDRESS(address);
5703 	bool writable = (flags & B_READ_DEVICE) == 0;
5704 	uint32 requiredProtection = PAGE_PRESENT
5705 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5706 	if (writable)
5707 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5708 
5709 	uint32 mallocFlags = isUser
5710 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5711 
5712 	// get and read lock the address space
5713 	VMAddressSpace* addressSpace = NULL;
5714 	if (isUser) {
5715 		if (team == B_CURRENT_TEAM)
5716 			addressSpace = VMAddressSpace::GetCurrent();
5717 		else
5718 			addressSpace = VMAddressSpace::Get(team);
5719 	} else
5720 		addressSpace = VMAddressSpace::GetKernel();
5721 	if (addressSpace == NULL)
5722 		return B_ERROR;
5723 
5724 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5725 		// Take over the address space reference. We don't unlock until we're
5726 		// done.
5727 
5728 	VMTranslationMap* map = addressSpace->TranslationMap();
5729 	status_t error = B_OK;
5730 
5731 	// iterate through all concerned areas
5732 	addr_t nextAddress = lockBaseAddress;
5733 	while (nextAddress != lockEndAddress) {
5734 		// get the next area
5735 		VMArea* area = addressSpace->LookupArea(nextAddress);
5736 		if (area == NULL) {
5737 			error = B_BAD_ADDRESS;
5738 			break;
5739 		}
5740 
5741 		addr_t areaStart = nextAddress;
5742 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5743 
5744 		// Lock the area's top cache. This is a requirement for
5745 		// VMArea::Unwire().
5746 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5747 
5748 		// Depending on the area cache type and the wiring, we may not need to
5749 		// look at the individual pages.
5750 		if (area->cache_type == CACHE_TYPE_NULL
5751 			|| area->cache_type == CACHE_TYPE_DEVICE
5752 			|| area->wiring == B_FULL_LOCK
5753 			|| area->wiring == B_CONTIGUOUS) {
5754 			// unwire the range (to avoid deadlocks we delete the range after
5755 			// unlocking the cache)
5756 			nextAddress = areaEnd;
5757 			VMAreaWiredRange* range = area->Unwire(areaStart,
5758 				areaEnd - areaStart, writable);
5759 			cacheChainLocker.Unlock();
5760 			if (range != NULL) {
5761 				range->~VMAreaWiredRange();
5762 				free_etc(range, mallocFlags);
5763 			}
5764 			continue;
5765 		}
5766 
5767 		// Lock the area's cache chain and the translation map. Needed to look
5768 		// up pages and play with their wired count.
5769 		cacheChainLocker.LockAllSourceCaches();
5770 		map->Lock();
5771 
5772 		// iterate through the pages and unwire them
5773 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5774 			phys_addr_t physicalAddress;
5775 			uint32 flags;
5776 
5777 			vm_page* page;
5778 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5779 				&& (flags & PAGE_PRESENT) != 0
5780 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5781 					!= NULL) {
5782 				// Already mapped with the correct permissions -- just increment
5783 				// the page's wired count.
5784 				decrement_page_wired_count(page);
5785 			} else {
5786 				panic("unlock_memory_etc(): Failed to unwire page: address "
5787 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5788 					nextAddress);
5789 				error = B_BAD_VALUE;
5790 				break;
5791 			}
5792 		}
5793 
5794 		map->Unlock();
5795 
5796 		// All pages are unwired. Remove the area's wired range as well (to
5797 		// avoid deadlocks we delete the range after unlocking the cache).
5798 		VMAreaWiredRange* range = area->Unwire(areaStart,
5799 			areaEnd - areaStart, writable);
5800 
5801 		cacheChainLocker.Unlock();
5802 
5803 		if (range != NULL) {
5804 			range->~VMAreaWiredRange();
5805 			free_etc(range, mallocFlags);
5806 		}
5807 
5808 		if (error != B_OK)
5809 			break;
5810 	}
5811 
5812 	// get rid of the address space reference lock_memory_etc() acquired
5813 	addressSpace->Put();
5814 
5815 	return error;
5816 }
5817 
5818 
5819 status_t
5820 unlock_memory(void* address, size_t numBytes, uint32 flags)
5821 {
5822 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5823 }
5824 
5825 
5826 /*!	Similar to get_memory_map(), but also allows to specify the address space
5827 	for the memory in question and has a saner semantics.
5828 	Returns \c B_OK when the complete range could be translated or
5829 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5830 	case the actual number of entries is written to \c *_numEntries. Any other
5831 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5832 	in this case.
5833 */
5834 status_t
5835 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5836 	physical_entry* table, uint32* _numEntries)
5837 {
5838 	uint32 numEntries = *_numEntries;
5839 	*_numEntries = 0;
5840 
5841 	VMAddressSpace* addressSpace;
5842 	addr_t virtualAddress = (addr_t)address;
5843 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5844 	phys_addr_t physicalAddress;
5845 	status_t status = B_OK;
5846 	int32 index = -1;
5847 	addr_t offset = 0;
5848 	bool interrupts = are_interrupts_enabled();
5849 
5850 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5851 		"entries)\n", team, address, numBytes, numEntries));
5852 
5853 	if (numEntries == 0 || numBytes == 0)
5854 		return B_BAD_VALUE;
5855 
5856 	// in which address space is the address to be found?
5857 	if (IS_USER_ADDRESS(virtualAddress)) {
5858 		if (team == B_CURRENT_TEAM)
5859 			addressSpace = VMAddressSpace::GetCurrent();
5860 		else
5861 			addressSpace = VMAddressSpace::Get(team);
5862 	} else
5863 		addressSpace = VMAddressSpace::GetKernel();
5864 
5865 	if (addressSpace == NULL)
5866 		return B_ERROR;
5867 
5868 	VMTranslationMap* map = addressSpace->TranslationMap();
5869 
5870 	if (interrupts)
5871 		map->Lock();
5872 
5873 	while (offset < numBytes) {
5874 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5875 		uint32 flags;
5876 
5877 		if (interrupts) {
5878 			status = map->Query((addr_t)address + offset, &physicalAddress,
5879 				&flags);
5880 		} else {
5881 			status = map->QueryInterrupt((addr_t)address + offset,
5882 				&physicalAddress, &flags);
5883 		}
5884 		if (status < B_OK)
5885 			break;
5886 		if ((flags & PAGE_PRESENT) == 0) {
5887 			panic("get_memory_map() called on unmapped memory!");
5888 			return B_BAD_ADDRESS;
5889 		}
5890 
5891 		if (index < 0 && pageOffset > 0) {
5892 			physicalAddress += pageOffset;
5893 			if (bytes > B_PAGE_SIZE - pageOffset)
5894 				bytes = B_PAGE_SIZE - pageOffset;
5895 		}
5896 
5897 		// need to switch to the next physical_entry?
5898 		if (index < 0 || table[index].address
5899 				!= physicalAddress - table[index].size) {
5900 			if ((uint32)++index + 1 > numEntries) {
5901 				// table to small
5902 				break;
5903 			}
5904 			table[index].address = physicalAddress;
5905 			table[index].size = bytes;
5906 		} else {
5907 			// page does fit in current entry
5908 			table[index].size += bytes;
5909 		}
5910 
5911 		offset += bytes;
5912 	}
5913 
5914 	if (interrupts)
5915 		map->Unlock();
5916 
5917 	if (status != B_OK)
5918 		return status;
5919 
5920 	if ((uint32)index + 1 > numEntries) {
5921 		*_numEntries = index;
5922 		return B_BUFFER_OVERFLOW;
5923 	}
5924 
5925 	*_numEntries = index + 1;
5926 	return B_OK;
5927 }
5928 
5929 
5930 /*!	According to the BeBook, this function should always succeed.
5931 	This is no longer the case.
5932 */
5933 extern "C" int32
5934 __get_memory_map_haiku(const void* address, size_t numBytes,
5935 	physical_entry* table, int32 numEntries)
5936 {
5937 	uint32 entriesRead = numEntries;
5938 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5939 		table, &entriesRead);
5940 	if (error != B_OK)
5941 		return error;
5942 
5943 	// close the entry list
5944 
5945 	// if it's only one entry, we will silently accept the missing ending
5946 	if (numEntries == 1)
5947 		return B_OK;
5948 
5949 	if (entriesRead + 1 > (uint32)numEntries)
5950 		return B_BUFFER_OVERFLOW;
5951 
5952 	table[entriesRead].address = 0;
5953 	table[entriesRead].size = 0;
5954 
5955 	return B_OK;
5956 }
5957 
5958 
5959 area_id
5960 area_for(void* address)
5961 {
5962 	return vm_area_for((addr_t)address, true);
5963 }
5964 
5965 
5966 area_id
5967 find_area(const char* name)
5968 {
5969 	return VMAreaHash::Find(name);
5970 }
5971 
5972 
5973 status_t
5974 _get_area_info(area_id id, area_info* info, size_t size)
5975 {
5976 	if (size != sizeof(area_info) || info == NULL)
5977 		return B_BAD_VALUE;
5978 
5979 	AddressSpaceReadLocker locker;
5980 	VMArea* area;
5981 	status_t status = locker.SetFromArea(id, area);
5982 	if (status != B_OK)
5983 		return status;
5984 
5985 	fill_area_info(area, info, size);
5986 	return B_OK;
5987 }
5988 
5989 
5990 status_t
5991 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5992 {
5993 	addr_t nextBase = *(addr_t*)cookie;
5994 
5995 	// we're already through the list
5996 	if (nextBase == (addr_t)-1)
5997 		return B_ENTRY_NOT_FOUND;
5998 
5999 	if (team == B_CURRENT_TEAM)
6000 		team = team_get_current_team_id();
6001 
6002 	AddressSpaceReadLocker locker(team);
6003 	if (!locker.IsLocked())
6004 		return B_BAD_TEAM_ID;
6005 
6006 	VMArea* area;
6007 	for (VMAddressSpace::AreaIterator it
6008 				= locker.AddressSpace()->GetAreaIterator();
6009 			(area = it.Next()) != NULL;) {
6010 		if (area->Base() > nextBase)
6011 			break;
6012 	}
6013 
6014 	if (area == NULL) {
6015 		nextBase = (addr_t)-1;
6016 		return B_ENTRY_NOT_FOUND;
6017 	}
6018 
6019 	fill_area_info(area, info, size);
6020 	*cookie = (ssize_t)(area->Base());
6021 
6022 	return B_OK;
6023 }
6024 
6025 
6026 status_t
6027 set_area_protection(area_id area, uint32 newProtection)
6028 {
6029 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6030 		newProtection, true);
6031 }
6032 
6033 
6034 status_t
6035 resize_area(area_id areaID, size_t newSize)
6036 {
6037 	return vm_resize_area(areaID, newSize, true);
6038 }
6039 
6040 
6041 /*!	Transfers the specified area to a new team. The caller must be the owner
6042 	of the area.
6043 */
6044 area_id
6045 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6046 	bool kernel)
6047 {
6048 	area_info info;
6049 	status_t status = get_area_info(id, &info);
6050 	if (status != B_OK)
6051 		return status;
6052 
6053 	if (info.team != thread_get_current_thread()->team->id)
6054 		return B_PERMISSION_DENIED;
6055 
6056 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6057 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6058 	if (clonedArea < 0)
6059 		return clonedArea;
6060 
6061 	status = vm_delete_area(info.team, id, kernel);
6062 	if (status != B_OK) {
6063 		vm_delete_area(target, clonedArea, kernel);
6064 		return status;
6065 	}
6066 
6067 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6068 
6069 	return clonedArea;
6070 }
6071 
6072 
6073 extern "C" area_id
6074 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6075 	size_t numBytes, uint32 addressSpec, uint32 protection,
6076 	void** _virtualAddress)
6077 {
6078 	if (!arch_vm_supports_protection(protection))
6079 		return B_NOT_SUPPORTED;
6080 
6081 	fix_protection(&protection);
6082 
6083 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6084 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6085 		false);
6086 }
6087 
6088 
6089 area_id
6090 clone_area(const char* name, void** _address, uint32 addressSpec,
6091 	uint32 protection, area_id source)
6092 {
6093 	if ((protection & B_KERNEL_PROTECTION) == 0)
6094 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6095 
6096 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6097 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6098 }
6099 
6100 
6101 area_id
6102 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
6103 	uint32 protection, uint32 flags, uint32 guardSize,
6104 	const virtual_address_restrictions* virtualAddressRestrictions,
6105 	const physical_address_restrictions* physicalAddressRestrictions,
6106 	void** _address)
6107 {
6108 	fix_protection(&protection);
6109 
6110 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6111 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6112 		true, _address);
6113 }
6114 
6115 
6116 extern "C" area_id
6117 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6118 	size_t size, uint32 lock, uint32 protection)
6119 {
6120 	fix_protection(&protection);
6121 
6122 	virtual_address_restrictions virtualRestrictions = {};
6123 	virtualRestrictions.address = *_address;
6124 	virtualRestrictions.address_specification = addressSpec;
6125 	physical_address_restrictions physicalRestrictions = {};
6126 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6127 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6128 		true, _address);
6129 }
6130 
6131 
6132 status_t
6133 delete_area(area_id area)
6134 {
6135 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6136 }
6137 
6138 
6139 //	#pragma mark - Userland syscalls
6140 
6141 
6142 status_t
6143 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6144 	addr_t size)
6145 {
6146 	// filter out some unavailable values (for userland)
6147 	switch (addressSpec) {
6148 		case B_ANY_KERNEL_ADDRESS:
6149 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6150 			return B_BAD_VALUE;
6151 	}
6152 
6153 	addr_t address;
6154 
6155 	if (!IS_USER_ADDRESS(userAddress)
6156 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6157 		return B_BAD_ADDRESS;
6158 
6159 	status_t status = vm_reserve_address_range(
6160 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6161 		RESERVED_AVOID_BASE);
6162 	if (status != B_OK)
6163 		return status;
6164 
6165 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6166 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6167 			(void*)address, size);
6168 		return B_BAD_ADDRESS;
6169 	}
6170 
6171 	return B_OK;
6172 }
6173 
6174 
6175 status_t
6176 _user_unreserve_address_range(addr_t address, addr_t size)
6177 {
6178 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6179 		(void*)address, size);
6180 }
6181 
6182 
6183 area_id
6184 _user_area_for(void* address)
6185 {
6186 	return vm_area_for((addr_t)address, false);
6187 }
6188 
6189 
6190 area_id
6191 _user_find_area(const char* userName)
6192 {
6193 	char name[B_OS_NAME_LENGTH];
6194 
6195 	if (!IS_USER_ADDRESS(userName)
6196 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6197 		return B_BAD_ADDRESS;
6198 
6199 	return find_area(name);
6200 }
6201 
6202 
6203 status_t
6204 _user_get_area_info(area_id area, area_info* userInfo)
6205 {
6206 	if (!IS_USER_ADDRESS(userInfo))
6207 		return B_BAD_ADDRESS;
6208 
6209 	area_info info;
6210 	status_t status = get_area_info(area, &info);
6211 	if (status < B_OK)
6212 		return status;
6213 
6214 	// TODO: do we want to prevent userland from seeing kernel protections?
6215 	//info.protection &= B_USER_PROTECTION;
6216 
6217 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6218 		return B_BAD_ADDRESS;
6219 
6220 	return status;
6221 }
6222 
6223 
6224 status_t
6225 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6226 {
6227 	ssize_t cookie;
6228 
6229 	if (!IS_USER_ADDRESS(userCookie)
6230 		|| !IS_USER_ADDRESS(userInfo)
6231 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6232 		return B_BAD_ADDRESS;
6233 
6234 	area_info info;
6235 	status_t status = _get_next_area_info(team, &cookie, &info,
6236 		sizeof(area_info));
6237 	if (status != B_OK)
6238 		return status;
6239 
6240 	//info.protection &= B_USER_PROTECTION;
6241 
6242 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6243 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6244 		return B_BAD_ADDRESS;
6245 
6246 	return status;
6247 }
6248 
6249 
6250 status_t
6251 _user_set_area_protection(area_id area, uint32 newProtection)
6252 {
6253 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6254 		return B_BAD_VALUE;
6255 
6256 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6257 		newProtection, false);
6258 }
6259 
6260 
6261 status_t
6262 _user_resize_area(area_id area, size_t newSize)
6263 {
6264 	// TODO: Since we restrict deleting of areas to those owned by the team,
6265 	// we should also do that for resizing (check other functions, too).
6266 	return vm_resize_area(area, newSize, false);
6267 }
6268 
6269 
6270 area_id
6271 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6272 	team_id target)
6273 {
6274 	// filter out some unavailable values (for userland)
6275 	switch (addressSpec) {
6276 		case B_ANY_KERNEL_ADDRESS:
6277 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6278 			return B_BAD_VALUE;
6279 	}
6280 
6281 	void* address;
6282 	if (!IS_USER_ADDRESS(userAddress)
6283 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6284 		return B_BAD_ADDRESS;
6285 
6286 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6287 	if (newArea < B_OK)
6288 		return newArea;
6289 
6290 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6291 		return B_BAD_ADDRESS;
6292 
6293 	return newArea;
6294 }
6295 
6296 
6297 area_id
6298 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6299 	uint32 protection, area_id sourceArea)
6300 {
6301 	char name[B_OS_NAME_LENGTH];
6302 	void* address;
6303 
6304 	// filter out some unavailable values (for userland)
6305 	switch (addressSpec) {
6306 		case B_ANY_KERNEL_ADDRESS:
6307 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6308 			return B_BAD_VALUE;
6309 	}
6310 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6311 		return B_BAD_VALUE;
6312 
6313 	if (!IS_USER_ADDRESS(userName)
6314 		|| !IS_USER_ADDRESS(userAddress)
6315 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6316 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6317 		return B_BAD_ADDRESS;
6318 
6319 	fix_protection(&protection);
6320 
6321 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6322 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6323 		false);
6324 	if (clonedArea < B_OK)
6325 		return clonedArea;
6326 
6327 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6328 		delete_area(clonedArea);
6329 		return B_BAD_ADDRESS;
6330 	}
6331 
6332 	return clonedArea;
6333 }
6334 
6335 
6336 area_id
6337 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6338 	size_t size, uint32 lock, uint32 protection)
6339 {
6340 	char name[B_OS_NAME_LENGTH];
6341 	void* address;
6342 
6343 	// filter out some unavailable values (for userland)
6344 	switch (addressSpec) {
6345 		case B_ANY_KERNEL_ADDRESS:
6346 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6347 			return B_BAD_VALUE;
6348 	}
6349 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6350 		return B_BAD_VALUE;
6351 
6352 	if (!IS_USER_ADDRESS(userName)
6353 		|| !IS_USER_ADDRESS(userAddress)
6354 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6355 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6356 		return B_BAD_ADDRESS;
6357 
6358 	if (addressSpec == B_EXACT_ADDRESS
6359 		&& IS_KERNEL_ADDRESS(address))
6360 		return B_BAD_VALUE;
6361 
6362 	if (addressSpec == B_ANY_ADDRESS)
6363 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6364 	if (addressSpec == B_BASE_ADDRESS)
6365 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6366 
6367 	fix_protection(&protection);
6368 
6369 	virtual_address_restrictions virtualRestrictions = {};
6370 	virtualRestrictions.address = address;
6371 	virtualRestrictions.address_specification = addressSpec;
6372 	physical_address_restrictions physicalRestrictions = {};
6373 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6374 		size, lock, protection, 0, 0, &virtualRestrictions,
6375 		&physicalRestrictions, false, &address);
6376 
6377 	if (area >= B_OK
6378 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6379 		delete_area(area);
6380 		return B_BAD_ADDRESS;
6381 	}
6382 
6383 	return area;
6384 }
6385 
6386 
6387 status_t
6388 _user_delete_area(area_id area)
6389 {
6390 	// Unlike the BeOS implementation, you can now only delete areas
6391 	// that you have created yourself from userland.
6392 	// The documentation to delete_area() explicitly states that this
6393 	// will be restricted in the future, and so it will.
6394 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6395 }
6396 
6397 
6398 // TODO: create a BeOS style call for this!
6399 
6400 area_id
6401 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6402 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6403 	int fd, off_t offset)
6404 {
6405 	char name[B_OS_NAME_LENGTH];
6406 	void* address;
6407 	area_id area;
6408 
6409 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6410 		return B_BAD_VALUE;
6411 
6412 	fix_protection(&protection);
6413 
6414 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6415 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6416 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6417 		return B_BAD_ADDRESS;
6418 
6419 	if (addressSpec == B_EXACT_ADDRESS) {
6420 		if ((addr_t)address + size < (addr_t)address
6421 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6422 			return B_BAD_VALUE;
6423 		}
6424 		if (!IS_USER_ADDRESS(address)
6425 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6426 			return B_BAD_ADDRESS;
6427 		}
6428 	}
6429 
6430 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6431 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6432 		false);
6433 	if (area < B_OK)
6434 		return area;
6435 
6436 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6437 		return B_BAD_ADDRESS;
6438 
6439 	return area;
6440 }
6441 
6442 
6443 status_t
6444 _user_unmap_memory(void* _address, size_t size)
6445 {
6446 	addr_t address = (addr_t)_address;
6447 
6448 	// check params
6449 	if (size == 0 || (addr_t)address + size < (addr_t)address
6450 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6451 		return B_BAD_VALUE;
6452 	}
6453 
6454 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6455 		return B_BAD_ADDRESS;
6456 
6457 	// Write lock the address space and ensure the address range is not wired.
6458 	AddressSpaceWriteLocker locker;
6459 	do {
6460 		status_t status = locker.SetTo(team_get_current_team_id());
6461 		if (status != B_OK)
6462 			return status;
6463 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6464 			size, &locker));
6465 
6466 	// unmap
6467 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6468 }
6469 
6470 
6471 status_t
6472 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6473 {
6474 	// check address range
6475 	addr_t address = (addr_t)_address;
6476 	size = PAGE_ALIGN(size);
6477 
6478 	if ((address % B_PAGE_SIZE) != 0)
6479 		return B_BAD_VALUE;
6480 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6481 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6482 		// weird error code required by POSIX
6483 		return ENOMEM;
6484 	}
6485 
6486 	// extend and check protection
6487 	if ((protection & ~B_USER_PROTECTION) != 0)
6488 		return B_BAD_VALUE;
6489 
6490 	fix_protection(&protection);
6491 
6492 	// We need to write lock the address space, since we're going to play with
6493 	// the areas. Also make sure that none of the areas is wired and that we're
6494 	// actually allowed to change the protection.
6495 	AddressSpaceWriteLocker locker;
6496 
6497 	bool restart;
6498 	do {
6499 		restart = false;
6500 
6501 		status_t status = locker.SetTo(team_get_current_team_id());
6502 		if (status != B_OK)
6503 			return status;
6504 
6505 		// First round: Check whether the whole range is covered by areas and we
6506 		// are allowed to modify them.
6507 		addr_t currentAddress = address;
6508 		size_t sizeLeft = size;
6509 		while (sizeLeft > 0) {
6510 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6511 			if (area == NULL)
6512 				return B_NO_MEMORY;
6513 
6514 			if ((area->protection & B_KERNEL_AREA) != 0)
6515 				return B_NOT_ALLOWED;
6516 
6517 			// TODO: For (shared) mapped files we should check whether the new
6518 			// protections are compatible with the file permissions. We don't
6519 			// have a way to do that yet, though.
6520 
6521 			addr_t offset = currentAddress - area->Base();
6522 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6523 
6524 			AreaCacheLocker cacheLocker(area);
6525 
6526 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6527 					&locker, &cacheLocker)) {
6528 				restart = true;
6529 				break;
6530 			}
6531 
6532 			cacheLocker.Unlock();
6533 
6534 			currentAddress += rangeSize;
6535 			sizeLeft -= rangeSize;
6536 		}
6537 	} while (restart);
6538 
6539 	// Second round: If the protections differ from that of the area, create a
6540 	// page protection array and re-map mapped pages.
6541 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6542 	addr_t currentAddress = address;
6543 	size_t sizeLeft = size;
6544 	while (sizeLeft > 0) {
6545 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6546 		if (area == NULL)
6547 			return B_NO_MEMORY;
6548 
6549 		addr_t offset = currentAddress - area->Base();
6550 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6551 
6552 		currentAddress += rangeSize;
6553 		sizeLeft -= rangeSize;
6554 
6555 		if (area->page_protections == NULL) {
6556 			if (area->protection == protection)
6557 				continue;
6558 
6559 			status_t status = allocate_area_page_protections(area);
6560 			if (status != B_OK)
6561 				return status;
6562 		}
6563 
6564 		// We need to lock the complete cache chain, since we potentially unmap
6565 		// pages of lower caches.
6566 		VMCache* topCache = vm_area_get_locked_cache(area);
6567 		VMCacheChainLocker cacheChainLocker(topCache);
6568 		cacheChainLocker.LockAllSourceCaches();
6569 
6570 		for (addr_t pageAddress = area->Base() + offset;
6571 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6572 			map->Lock();
6573 
6574 			set_area_page_protection(area, pageAddress, protection);
6575 
6576 			phys_addr_t physicalAddress;
6577 			uint32 flags;
6578 
6579 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6580 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6581 				map->Unlock();
6582 				continue;
6583 			}
6584 
6585 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6586 			if (page == NULL) {
6587 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6588 					"\n", area, physicalAddress);
6589 				map->Unlock();
6590 				return B_ERROR;
6591 			}
6592 
6593 			// If the page is not in the topmost cache and write access is
6594 			// requested, we have to unmap it. Otherwise we can re-map it with
6595 			// the new protection.
6596 			bool unmapPage = page->Cache() != topCache
6597 				&& (protection & B_WRITE_AREA) != 0;
6598 
6599 			if (!unmapPage)
6600 				map->ProtectPage(area, pageAddress, protection);
6601 
6602 			map->Unlock();
6603 
6604 			if (unmapPage) {
6605 				DEBUG_PAGE_ACCESS_START(page);
6606 				unmap_page(area, pageAddress);
6607 				DEBUG_PAGE_ACCESS_END(page);
6608 			}
6609 		}
6610 	}
6611 
6612 	return B_OK;
6613 }
6614 
6615 
6616 status_t
6617 _user_sync_memory(void* _address, size_t size, uint32 flags)
6618 {
6619 	addr_t address = (addr_t)_address;
6620 	size = PAGE_ALIGN(size);
6621 
6622 	// check params
6623 	if ((address % B_PAGE_SIZE) != 0)
6624 		return B_BAD_VALUE;
6625 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6626 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6627 		// weird error code required by POSIX
6628 		return ENOMEM;
6629 	}
6630 
6631 	bool writeSync = (flags & MS_SYNC) != 0;
6632 	bool writeAsync = (flags & MS_ASYNC) != 0;
6633 	if (writeSync && writeAsync)
6634 		return B_BAD_VALUE;
6635 
6636 	if (size == 0 || (!writeSync && !writeAsync))
6637 		return B_OK;
6638 
6639 	// iterate through the range and sync all concerned areas
6640 	while (size > 0) {
6641 		// read lock the address space
6642 		AddressSpaceReadLocker locker;
6643 		status_t error = locker.SetTo(team_get_current_team_id());
6644 		if (error != B_OK)
6645 			return error;
6646 
6647 		// get the first area
6648 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6649 		if (area == NULL)
6650 			return B_NO_MEMORY;
6651 
6652 		uint32 offset = address - area->Base();
6653 		size_t rangeSize = min_c(area->Size() - offset, size);
6654 		offset += area->cache_offset;
6655 
6656 		// lock the cache
6657 		AreaCacheLocker cacheLocker(area);
6658 		if (!cacheLocker)
6659 			return B_BAD_VALUE;
6660 		VMCache* cache = area->cache;
6661 
6662 		locker.Unlock();
6663 
6664 		uint32 firstPage = offset >> PAGE_SHIFT;
6665 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6666 
6667 		// write the pages
6668 		if (cache->type == CACHE_TYPE_VNODE) {
6669 			if (writeSync) {
6670 				// synchronous
6671 				error = vm_page_write_modified_page_range(cache, firstPage,
6672 					endPage);
6673 				if (error != B_OK)
6674 					return error;
6675 			} else {
6676 				// asynchronous
6677 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6678 				// TODO: This is probably not quite what is supposed to happen.
6679 				// Especially when a lot has to be written, it might take ages
6680 				// until it really hits the disk.
6681 			}
6682 		}
6683 
6684 		address += rangeSize;
6685 		size -= rangeSize;
6686 	}
6687 
6688 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6689 	// synchronize multiple mappings of the same file. In our VM they never get
6690 	// out of sync, though, so we don't have to do anything.
6691 
6692 	return B_OK;
6693 }
6694 
6695 
6696 status_t
6697 _user_memory_advice(void* address, size_t size, uint32 advice)
6698 {
6699 	// TODO: Implement!
6700 	return B_OK;
6701 }
6702 
6703 
6704 status_t
6705 _user_get_memory_properties(team_id teamID, const void* address,
6706 	uint32* _protected, uint32* _lock)
6707 {
6708 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6709 		return B_BAD_ADDRESS;
6710 
6711 	AddressSpaceReadLocker locker;
6712 	status_t error = locker.SetTo(teamID);
6713 	if (error != B_OK)
6714 		return error;
6715 
6716 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6717 	if (area == NULL)
6718 		return B_NO_MEMORY;
6719 
6720 
6721 	uint32 protection = area->protection;
6722 	if (area->page_protections != NULL)
6723 		protection = get_area_page_protection(area, (addr_t)address);
6724 
6725 	uint32 wiring = area->wiring;
6726 
6727 	locker.Unlock();
6728 
6729 	error = user_memcpy(_protected, &protection, sizeof(protection));
6730 	if (error != B_OK)
6731 		return error;
6732 
6733 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6734 
6735 	return error;
6736 }
6737 
6738 
6739 // #pragma mark -- compatibility
6740 
6741 
6742 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6743 
6744 
6745 struct physical_entry_beos {
6746 	uint32	address;
6747 	uint32	size;
6748 };
6749 
6750 
6751 /*!	The physical_entry structure has changed. We need to translate it to the
6752 	old one.
6753 */
6754 extern "C" int32
6755 __get_memory_map_beos(const void* _address, size_t numBytes,
6756 	physical_entry_beos* table, int32 numEntries)
6757 {
6758 	if (numEntries <= 0)
6759 		return B_BAD_VALUE;
6760 
6761 	const uint8* address = (const uint8*)_address;
6762 
6763 	int32 count = 0;
6764 	while (numBytes > 0 && count < numEntries) {
6765 		physical_entry entry;
6766 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6767 		if (result < 0) {
6768 			if (result != B_BUFFER_OVERFLOW)
6769 				return result;
6770 		}
6771 
6772 		if (entry.address >= (phys_addr_t)1 << 32) {
6773 			panic("get_memory_map(): Address is greater 4 GB!");
6774 			return B_ERROR;
6775 		}
6776 
6777 		table[count].address = entry.address;
6778 		table[count++].size = entry.size;
6779 
6780 		address += entry.size;
6781 		numBytes -= entry.size;
6782 	}
6783 
6784 	// null-terminate the table, if possible
6785 	if (count < numEntries) {
6786 		table[count].address = 0;
6787 		table[count].size = 0;
6788 	}
6789 
6790 	return B_OK;
6791 }
6792 
6793 
6794 /*!	The type of the \a physicalAddress parameter has changed from void* to
6795 	phys_addr_t.
6796 */
6797 extern "C" area_id
6798 __map_physical_memory_beos(const char* name, void* physicalAddress,
6799 	size_t numBytes, uint32 addressSpec, uint32 protection,
6800 	void** _virtualAddress)
6801 {
6802 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6803 		addressSpec, protection, _virtualAddress);
6804 }
6805 
6806 
6807 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6808 	we meddle with the \a lock parameter to force 32 bit.
6809 */
6810 extern "C" area_id
6811 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6812 	size_t size, uint32 lock, uint32 protection)
6813 {
6814 	switch (lock) {
6815 		case B_NO_LOCK:
6816 			break;
6817 		case B_FULL_LOCK:
6818 		case B_LAZY_LOCK:
6819 			lock = B_32_BIT_FULL_LOCK;
6820 			break;
6821 		case B_CONTIGUOUS:
6822 			lock = B_32_BIT_CONTIGUOUS;
6823 			break;
6824 	}
6825 
6826 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6827 		protection);
6828 }
6829 
6830 
6831 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6832 	"BASE");
6833 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6834 	"map_physical_memory@", "BASE");
6835 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6836 	"BASE");
6837 
6838 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6839 	"get_memory_map@@", "1_ALPHA3");
6840 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6841 	"map_physical_memory@@", "1_ALPHA3");
6842 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6843 	"1_ALPHA3");
6844 
6845 
6846 #else
6847 
6848 
6849 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6850 	"get_memory_map@@", "BASE");
6851 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6852 	"map_physical_memory@@", "BASE");
6853 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6854 	"BASE");
6855 
6856 
6857 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6858