xref: /haiku/src/system/kernel/vm/vm.cpp (revision 03e5dd5273ae9bcef15db099630c4c8cf8b7bbdc)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// Cut the end only?
648 	if (areaLast <= lastAddress) {
649 		size_t oldSize = area->Size();
650 		size_t newSize = address - area->Base();
651 
652 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 			allocationFlags);
654 		if (error != B_OK)
655 			return error;
656 
657 		// unmap pages
658 		unmap_pages(area, address, oldSize - newSize);
659 
660 		// If no one else uses the area's cache, we can resize it, too.
661 		if (cache->areas == area && area->cache_next == NULL
662 			&& cache->consumers.IsEmpty()
663 			&& cache->type == CACHE_TYPE_RAM) {
664 			// Since VMCache::Resize() can temporarily drop the lock, we must
665 			// unlock all lower caches to prevent locking order inversion.
666 			cacheChainLocker.Unlock(cache);
667 			cache->Resize(cache->virtual_base + newSize, priority);
668 			cache->ReleaseRefAndUnlock();
669 		}
670 
671 		return B_OK;
672 	}
673 
674 	// Cut the beginning only?
675 	if (area->Base() >= address) {
676 		addr_t oldBase = area->Base();
677 		addr_t newBase = lastAddress + 1;
678 		size_t newSize = areaLast - lastAddress;
679 
680 		// unmap pages
681 		unmap_pages(area, oldBase, newBase - oldBase);
682 
683 		// resize the area
684 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 			allocationFlags);
686 		if (error != B_OK)
687 			return error;
688 
689 		// TODO: If no one else uses the area's cache, we should resize it, too!
690 
691 		area->cache_offset += newBase - oldBase;
692 
693 		return B_OK;
694 	}
695 
696 	// The tough part -- cut a piece out of the middle of the area.
697 	// We do that by shrinking the area to the begin section and creating a
698 	// new area for the end section.
699 
700 	addr_t firstNewSize = address - area->Base();
701 	addr_t secondBase = lastAddress + 1;
702 	addr_t secondSize = areaLast - lastAddress;
703 
704 	// unmap pages
705 	unmap_pages(area, address, area->Size() - firstNewSize);
706 
707 	// resize the area
708 	addr_t oldSize = area->Size();
709 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 		allocationFlags);
711 	if (error != B_OK)
712 		return error;
713 
714 	// TODO: If no one else uses the area's cache, we might want to create a
715 	// new cache for the second area, transfer the concerned pages from the
716 	// first cache to it and resize the first cache.
717 
718 	// map the second area
719 	virtual_address_restrictions addressRestrictions = {};
720 	addressRestrictions.address = (void*)secondBase;
721 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 	VMArea* secondArea;
723 	error = map_backing_store(addressSpace, cache,
724 		area->cache_offset + (secondBase - area->Base()), area->name,
725 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 		&addressRestrictions, kernel, &secondArea, NULL);
727 	if (error != B_OK) {
728 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 		return error;
730 	}
731 
732 	// We need a cache reference for the new area.
733 	cache->AcquireRefLocked();
734 
735 	if (_secondArea != NULL)
736 		*_secondArea = secondArea;
737 
738 	return B_OK;
739 }
740 
741 
742 /*!	Deletes all areas in the given address range.
743 	The address space must be write-locked.
744 	The caller must ensure that no part of the given range is wired.
745 */
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 	bool kernel)
749 {
750 	size = PAGE_ALIGN(size);
751 	addr_t lastAddress = address + (size - 1);
752 
753 	// Check, whether the caller is allowed to modify the concerned areas.
754 	if (!kernel) {
755 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 				VMArea* area = it.Next();) {
757 			addr_t areaLast = area->Base() + (area->Size() - 1);
758 			if (area->Base() < lastAddress && address < areaLast) {
759 				if ((area->protection & B_KERNEL_AREA) != 0)
760 					return B_NOT_ALLOWED;
761 			}
762 		}
763 	}
764 
765 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
766 			VMArea* area = it.Next();) {
767 		addr_t areaLast = area->Base() + (area->Size() - 1);
768 		if (area->Base() < lastAddress && address < areaLast) {
769 			status_t error = cut_area(addressSpace, area, address,
770 				lastAddress, NULL, kernel);
771 			if (error != B_OK)
772 				return error;
773 				// Failing after already messing with areas is ugly, but we
774 				// can't do anything about it.
775 		}
776 	}
777 
778 	return B_OK;
779 }
780 
781 
782 /*! You need to hold the lock of the cache and the write lock of the address
783 	space when calling this function.
784 	Note, that in case of error your cache will be temporarily unlocked.
785 	If \a addressSpec is \c B_EXACT_ADDRESS and the
786 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
787 	that no part of the specified address range (base \c *_virtualAddress, size
788 	\a size) is wired.
789 */
790 static status_t
791 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
792 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
793 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
794 	bool kernel, VMArea** _area, void** _virtualAddress)
795 {
796 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
797 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
798 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
799 		addressRestrictions->address, offset, size,
800 		addressRestrictions->address_specification, wiring, protection,
801 		_area, areaName));
802 	cache->AssertLocked();
803 
804 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
805 		| HEAP_DONT_LOCK_KERNEL_SPACE;
806 	int priority;
807 	if (addressSpace != VMAddressSpace::Kernel()) {
808 		priority = VM_PRIORITY_USER;
809 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
810 		priority = VM_PRIORITY_VIP;
811 		allocationFlags |= HEAP_PRIORITY_VIP;
812 	} else
813 		priority = VM_PRIORITY_SYSTEM;
814 
815 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
816 		allocationFlags);
817 	if (area == NULL)
818 		return B_NO_MEMORY;
819 
820 	status_t status;
821 
822 	// if this is a private map, we need to create a new cache
823 	// to handle the private copies of pages as they are written to
824 	VMCache* sourceCache = cache;
825 	if (mapping == REGION_PRIVATE_MAP) {
826 		VMCache* newCache;
827 
828 		// create an anonymous cache
829 		status = VMCacheFactory::CreateAnonymousCache(newCache,
830 			(protection & B_STACK_AREA) != 0
831 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
832 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
833 		if (status != B_OK)
834 			goto err1;
835 
836 		newCache->Lock();
837 		newCache->temporary = 1;
838 		newCache->virtual_base = offset;
839 		newCache->virtual_end = offset + size;
840 
841 		cache->AddConsumer(newCache);
842 
843 		cache = newCache;
844 	}
845 
846 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
847 		status = cache->SetMinimalCommitment(size, priority);
848 		if (status != B_OK)
849 			goto err2;
850 	}
851 
852 	// check to see if this address space has entered DELETE state
853 	if (addressSpace->IsBeingDeleted()) {
854 		// okay, someone is trying to delete this address space now, so we can't
855 		// insert the area, so back out
856 		status = B_BAD_TEAM_ID;
857 		goto err2;
858 	}
859 
860 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
861 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
862 		status = unmap_address_range(addressSpace,
863 			(addr_t)addressRestrictions->address, size, kernel);
864 		if (status != B_OK)
865 			goto err2;
866 	}
867 
868 	status = addressSpace->InsertArea(area, size, addressRestrictions,
869 		allocationFlags, _virtualAddress);
870 	if (status != B_OK) {
871 		// TODO: wait and try again once this is working in the backend
872 #if 0
873 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
874 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
875 				0, 0);
876 		}
877 #endif
878 		goto err2;
879 	}
880 
881 	// attach the cache to the area
882 	area->cache = cache;
883 	area->cache_offset = offset;
884 
885 	// point the cache back to the area
886 	cache->InsertAreaLocked(area);
887 	if (mapping == REGION_PRIVATE_MAP)
888 		cache->Unlock();
889 
890 	// insert the area in the global area hash table
891 	VMAreaHash::Insert(area);
892 
893 	// grab a ref to the address space (the area holds this)
894 	addressSpace->Get();
895 
896 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
897 //		cache, sourceCache, areaName, area);
898 
899 	*_area = area;
900 	return B_OK;
901 
902 err2:
903 	if (mapping == REGION_PRIVATE_MAP) {
904 		// We created this cache, so we must delete it again. Note, that we
905 		// need to temporarily unlock the source cache or we'll otherwise
906 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
907 		sourceCache->Unlock();
908 		cache->ReleaseRefAndUnlock();
909 		sourceCache->Lock();
910 	}
911 err1:
912 	addressSpace->DeleteArea(area, allocationFlags);
913 	return status;
914 }
915 
916 
917 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
918 	  locker1, locker2).
919 */
920 template<typename LockerType1, typename LockerType2>
921 static inline bool
922 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
923 {
924 	area->cache->AssertLocked();
925 
926 	VMAreaUnwiredWaiter waiter;
927 	if (!area->AddWaiterIfWired(&waiter))
928 		return false;
929 
930 	// unlock everything and wait
931 	if (locker1 != NULL)
932 		locker1->Unlock();
933 	if (locker2 != NULL)
934 		locker2->Unlock();
935 
936 	waiter.waitEntry.Wait();
937 
938 	return true;
939 }
940 
941 
942 /*!	Checks whether the given area has any wired ranges intersecting with the
943 	specified range and waits, if so.
944 
945 	When it has to wait, the function calls \c Unlock() on both \a locker1
946 	and \a locker2, if given.
947 	The area's top cache must be locked and must be unlocked as a side effect
948 	of calling \c Unlock() on either \a locker1 or \a locker2.
949 
950 	If the function does not have to wait it does not modify or unlock any
951 	object.
952 
953 	\param area The area to be checked.
954 	\param base The base address of the range to check.
955 	\param size The size of the address range to check.
956 	\param locker1 An object to be unlocked when before starting to wait (may
957 		be \c NULL).
958 	\param locker2 An object to be unlocked when before starting to wait (may
959 		be \c NULL).
960 	\return \c true, if the function had to wait, \c false otherwise.
961 */
962 template<typename LockerType1, typename LockerType2>
963 static inline bool
964 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
965 	LockerType1* locker1, LockerType2* locker2)
966 {
967 	area->cache->AssertLocked();
968 
969 	VMAreaUnwiredWaiter waiter;
970 	if (!area->AddWaiterIfWired(&waiter, base, size))
971 		return false;
972 
973 	// unlock everything and wait
974 	if (locker1 != NULL)
975 		locker1->Unlock();
976 	if (locker2 != NULL)
977 		locker2->Unlock();
978 
979 	waiter.waitEntry.Wait();
980 
981 	return true;
982 }
983 
984 
985 /*!	Checks whether the given address space has any wired ranges intersecting
986 	with the specified range and waits, if so.
987 
988 	Similar to wait_if_area_range_is_wired(), with the following differences:
989 	- All areas intersecting with the range are checked (respectively all until
990 	  one is found that contains a wired range intersecting with the given
991 	  range).
992 	- The given address space must at least be read-locked and must be unlocked
993 	  when \c Unlock() is called on \a locker.
994 	- None of the areas' caches are allowed to be locked.
995 */
996 template<typename LockerType>
997 static inline bool
998 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
999 	size_t size, LockerType* locker)
1000 {
1001 	addr_t end = base + size - 1;
1002 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1003 			VMArea* area = it.Next();) {
1004 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1005 		if (area->Base() > end)
1006 			return false;
1007 
1008 		if (base >= area->Base() + area->Size() - 1)
1009 			continue;
1010 
1011 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1012 
1013 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1014 			return true;
1015 	}
1016 
1017 	return false;
1018 }
1019 
1020 
1021 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1022 	It must be called in a situation where the kernel address space may be
1023 	locked.
1024 */
1025 status_t
1026 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1027 {
1028 	AddressSpaceReadLocker locker;
1029 	VMArea* area;
1030 	status_t status = locker.SetFromArea(id, area);
1031 	if (status != B_OK)
1032 		return status;
1033 
1034 	if (area->page_protections == NULL) {
1035 		status = allocate_area_page_protections(area);
1036 		if (status != B_OK)
1037 			return status;
1038 	}
1039 
1040 	*cookie = (void*)area;
1041 	return B_OK;
1042 }
1043 
1044 
1045 /*!	This is a debug helper function that can only be used with very specific
1046 	use cases.
1047 	Sets protection for the given address range to the protection specified.
1048 	If \a protection is 0 then the involved pages will be marked non-present
1049 	in the translation map to cause a fault on access. The pages aren't
1050 	actually unmapped however so that they can be marked present again with
1051 	additional calls to this function. For this to work the area must be
1052 	fully locked in memory so that the pages aren't otherwise touched.
1053 	This function does not lock the kernel address space and needs to be
1054 	supplied with a \a cookie retrieved from a successful call to
1055 	vm_prepare_kernel_area_debug_protection().
1056 */
1057 status_t
1058 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1059 	uint32 protection)
1060 {
1061 	// check address range
1062 	addr_t address = (addr_t)_address;
1063 	size = PAGE_ALIGN(size);
1064 
1065 	if ((address % B_PAGE_SIZE) != 0
1066 		|| (addr_t)address + size < (addr_t)address
1067 		|| !IS_KERNEL_ADDRESS(address)
1068 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1069 		return B_BAD_VALUE;
1070 	}
1071 
1072 	// Translate the kernel protection to user protection as we only store that.
1073 	if ((protection & B_KERNEL_READ_AREA) != 0)
1074 		protection |= B_READ_AREA;
1075 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1076 		protection |= B_WRITE_AREA;
1077 
1078 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1079 	VMTranslationMap* map = addressSpace->TranslationMap();
1080 	VMArea* area = (VMArea*)cookie;
1081 
1082 	addr_t offset = address - area->Base();
1083 	if (area->Size() - offset < size) {
1084 		panic("protect range not fully within supplied area");
1085 		return B_BAD_VALUE;
1086 	}
1087 
1088 	if (area->page_protections == NULL) {
1089 		panic("area has no page protections");
1090 		return B_BAD_VALUE;
1091 	}
1092 
1093 	// Invalidate the mapping entries so any access to them will fault or
1094 	// restore the mapping entries unchanged so that lookup will success again.
1095 	map->Lock();
1096 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1097 	map->Unlock();
1098 
1099 	// And set the proper page protections so that the fault case will actually
1100 	// fail and not simply try to map a new page.
1101 	for (addr_t pageAddress = address; pageAddress < address + size;
1102 			pageAddress += B_PAGE_SIZE) {
1103 		set_area_page_protection(area, pageAddress, protection);
1104 	}
1105 
1106 	return B_OK;
1107 }
1108 
1109 
1110 status_t
1111 vm_block_address_range(const char* name, void* address, addr_t size)
1112 {
1113 	if (!arch_vm_supports_protection(0))
1114 		return B_NOT_SUPPORTED;
1115 
1116 	AddressSpaceWriteLocker locker;
1117 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1118 	if (status != B_OK)
1119 		return status;
1120 
1121 	VMAddressSpace* addressSpace = locker.AddressSpace();
1122 
1123 	// create an anonymous cache
1124 	VMCache* cache;
1125 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1126 		VM_PRIORITY_SYSTEM);
1127 	if (status != B_OK)
1128 		return status;
1129 
1130 	cache->temporary = 1;
1131 	cache->virtual_end = size;
1132 	cache->Lock();
1133 
1134 	VMArea* area;
1135 	virtual_address_restrictions addressRestrictions = {};
1136 	addressRestrictions.address = address;
1137 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1138 	status = map_backing_store(addressSpace, cache, 0, name, size,
1139 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1140 		true, &area, NULL);
1141 	if (status != B_OK) {
1142 		cache->ReleaseRefAndUnlock();
1143 		return status;
1144 	}
1145 
1146 	cache->Unlock();
1147 	area->cache_type = CACHE_TYPE_RAM;
1148 	return area->id;
1149 }
1150 
1151 
1152 status_t
1153 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1154 {
1155 	AddressSpaceWriteLocker locker(team);
1156 	if (!locker.IsLocked())
1157 		return B_BAD_TEAM_ID;
1158 
1159 	VMAddressSpace* addressSpace = locker.AddressSpace();
1160 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1161 		addressSpace == VMAddressSpace::Kernel()
1162 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1163 }
1164 
1165 
1166 status_t
1167 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1168 	addr_t size, uint32 flags)
1169 {
1170 	if (size == 0)
1171 		return B_BAD_VALUE;
1172 
1173 	AddressSpaceWriteLocker locker(team);
1174 	if (!locker.IsLocked())
1175 		return B_BAD_TEAM_ID;
1176 
1177 	virtual_address_restrictions addressRestrictions = {};
1178 	addressRestrictions.address = *_address;
1179 	addressRestrictions.address_specification = addressSpec;
1180 	VMAddressSpace* addressSpace = locker.AddressSpace();
1181 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1182 		addressSpace == VMAddressSpace::Kernel()
1183 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1184 		_address);
1185 }
1186 
1187 
1188 area_id
1189 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1190 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1191 	const virtual_address_restrictions* virtualAddressRestrictions,
1192 	const physical_address_restrictions* physicalAddressRestrictions,
1193 	bool kernel, void** _address)
1194 {
1195 	VMArea* area;
1196 	VMCache* cache;
1197 	vm_page* page = NULL;
1198 	bool isStack = (protection & B_STACK_AREA) != 0;
1199 	page_num_t guardPages;
1200 	bool canOvercommit = false;
1201 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1202 		? VM_PAGE_ALLOC_CLEAR : 0;
1203 
1204 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1205 		team, name, size));
1206 
1207 	size = PAGE_ALIGN(size);
1208 	guardSize = PAGE_ALIGN(guardSize);
1209 	guardPages = guardSize / B_PAGE_SIZE;
1210 
1211 	if (size == 0 || size < guardSize)
1212 		return B_BAD_VALUE;
1213 	if (!arch_vm_supports_protection(protection))
1214 		return B_NOT_SUPPORTED;
1215 
1216 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1217 		canOvercommit = true;
1218 
1219 #ifdef DEBUG_KERNEL_STACKS
1220 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1221 		isStack = true;
1222 #endif
1223 
1224 	// check parameters
1225 	switch (virtualAddressRestrictions->address_specification) {
1226 		case B_ANY_ADDRESS:
1227 		case B_EXACT_ADDRESS:
1228 		case B_BASE_ADDRESS:
1229 		case B_ANY_KERNEL_ADDRESS:
1230 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1231 		case B_RANDOMIZED_ANY_ADDRESS:
1232 		case B_RANDOMIZED_BASE_ADDRESS:
1233 			break;
1234 
1235 		default:
1236 			return B_BAD_VALUE;
1237 	}
1238 
1239 	// If low or high physical address restrictions are given, we force
1240 	// B_CONTIGUOUS wiring, since only then we'll use
1241 	// vm_page_allocate_page_run() which deals with those restrictions.
1242 	if (physicalAddressRestrictions->low_address != 0
1243 		|| physicalAddressRestrictions->high_address != 0) {
1244 		wiring = B_CONTIGUOUS;
1245 	}
1246 
1247 	physical_address_restrictions stackPhysicalRestrictions;
1248 	bool doReserveMemory = false;
1249 	switch (wiring) {
1250 		case B_NO_LOCK:
1251 			break;
1252 		case B_FULL_LOCK:
1253 		case B_LAZY_LOCK:
1254 		case B_CONTIGUOUS:
1255 			doReserveMemory = true;
1256 			break;
1257 		case B_ALREADY_WIRED:
1258 			break;
1259 		case B_LOMEM:
1260 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1261 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1262 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1263 			wiring = B_CONTIGUOUS;
1264 			doReserveMemory = true;
1265 			break;
1266 		case B_32_BIT_FULL_LOCK:
1267 			if (B_HAIKU_PHYSICAL_BITS <= 32
1268 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1269 				wiring = B_FULL_LOCK;
1270 				doReserveMemory = true;
1271 				break;
1272 			}
1273 			// TODO: We don't really support this mode efficiently. Just fall
1274 			// through for now ...
1275 		case B_32_BIT_CONTIGUOUS:
1276 			#if B_HAIKU_PHYSICAL_BITS > 32
1277 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1278 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1279 					stackPhysicalRestrictions.high_address
1280 						= (phys_addr_t)1 << 32;
1281 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1282 				}
1283 			#endif
1284 			wiring = B_CONTIGUOUS;
1285 			doReserveMemory = true;
1286 			break;
1287 		default:
1288 			return B_BAD_VALUE;
1289 	}
1290 
1291 	// Optimization: For a single-page contiguous allocation without low/high
1292 	// memory restriction B_FULL_LOCK wiring suffices.
1293 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1294 		&& physicalAddressRestrictions->low_address == 0
1295 		&& physicalAddressRestrictions->high_address == 0) {
1296 		wiring = B_FULL_LOCK;
1297 	}
1298 
1299 	// For full lock or contiguous areas we're also going to map the pages and
1300 	// thus need to reserve pages for the mapping backend upfront.
1301 	addr_t reservedMapPages = 0;
1302 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1303 		AddressSpaceWriteLocker locker;
1304 		status_t status = locker.SetTo(team);
1305 		if (status != B_OK)
1306 			return status;
1307 
1308 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1309 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1310 	}
1311 
1312 	int priority;
1313 	if (team != VMAddressSpace::KernelID())
1314 		priority = VM_PRIORITY_USER;
1315 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1316 		priority = VM_PRIORITY_VIP;
1317 	else
1318 		priority = VM_PRIORITY_SYSTEM;
1319 
1320 	// Reserve memory before acquiring the address space lock. This reduces the
1321 	// chances of failure, since while holding the write lock to the address
1322 	// space (if it is the kernel address space that is), the low memory handler
1323 	// won't be able to free anything for us.
1324 	addr_t reservedMemory = 0;
1325 	if (doReserveMemory) {
1326 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1327 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1328 			return B_NO_MEMORY;
1329 		reservedMemory = size;
1330 		// TODO: We don't reserve the memory for the pages for the page
1331 		// directories/tables. We actually need to do since we currently don't
1332 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1333 		// there are actually less physical pages than there should be, which
1334 		// can get the VM into trouble in low memory situations.
1335 	}
1336 
1337 	AddressSpaceWriteLocker locker;
1338 	VMAddressSpace* addressSpace;
1339 	status_t status;
1340 
1341 	// For full lock areas reserve the pages before locking the address
1342 	// space. E.g. block caches can't release their memory while we hold the
1343 	// address space lock.
1344 	page_num_t reservedPages = reservedMapPages;
1345 	if (wiring == B_FULL_LOCK)
1346 		reservedPages += size / B_PAGE_SIZE;
1347 
1348 	vm_page_reservation reservation;
1349 	if (reservedPages > 0) {
1350 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1351 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1352 					priority)) {
1353 				reservedPages = 0;
1354 				status = B_WOULD_BLOCK;
1355 				goto err0;
1356 			}
1357 		} else
1358 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1359 	}
1360 
1361 	if (wiring == B_CONTIGUOUS) {
1362 		// we try to allocate the page run here upfront as this may easily
1363 		// fail for obvious reasons
1364 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1365 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1366 		if (page == NULL) {
1367 			status = B_NO_MEMORY;
1368 			goto err0;
1369 		}
1370 	}
1371 
1372 	// Lock the address space and, if B_EXACT_ADDRESS and
1373 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1374 	// is not wired.
1375 	do {
1376 		status = locker.SetTo(team);
1377 		if (status != B_OK)
1378 			goto err1;
1379 
1380 		addressSpace = locker.AddressSpace();
1381 	} while (virtualAddressRestrictions->address_specification
1382 			== B_EXACT_ADDRESS
1383 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1384 		&& wait_if_address_range_is_wired(addressSpace,
1385 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1386 
1387 	// create an anonymous cache
1388 	// if it's a stack, make sure that two pages are available at least
1389 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1390 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1391 		wiring == B_NO_LOCK, priority);
1392 	if (status != B_OK)
1393 		goto err1;
1394 
1395 	cache->temporary = 1;
1396 	cache->virtual_end = size;
1397 	cache->committed_size = reservedMemory;
1398 		// TODO: This should be done via a method.
1399 	reservedMemory = 0;
1400 
1401 	cache->Lock();
1402 
1403 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1404 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1405 		kernel, &area, _address);
1406 
1407 	if (status != B_OK) {
1408 		cache->ReleaseRefAndUnlock();
1409 		goto err1;
1410 	}
1411 
1412 	locker.DegradeToReadLock();
1413 
1414 	switch (wiring) {
1415 		case B_NO_LOCK:
1416 		case B_LAZY_LOCK:
1417 			// do nothing - the pages are mapped in as needed
1418 			break;
1419 
1420 		case B_FULL_LOCK:
1421 		{
1422 			// Allocate and map all pages for this area
1423 
1424 			off_t offset = 0;
1425 			for (addr_t address = area->Base();
1426 					address < area->Base() + (area->Size() - 1);
1427 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1428 #ifdef DEBUG_KERNEL_STACKS
1429 #	ifdef STACK_GROWS_DOWNWARDS
1430 				if (isStack && address < area->Base()
1431 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1432 #	else
1433 				if (isStack && address >= area->Base() + area->Size()
1434 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1435 #	endif
1436 					continue;
1437 #endif
1438 				vm_page* page = vm_page_allocate_page(&reservation,
1439 					PAGE_STATE_WIRED | pageAllocFlags);
1440 				cache->InsertPage(page, offset);
1441 				map_page(area, page, address, protection, &reservation);
1442 
1443 				DEBUG_PAGE_ACCESS_END(page);
1444 			}
1445 
1446 			break;
1447 		}
1448 
1449 		case B_ALREADY_WIRED:
1450 		{
1451 			// The pages should already be mapped. This is only really useful
1452 			// during boot time. Find the appropriate vm_page objects and stick
1453 			// them in the cache object.
1454 			VMTranslationMap* map = addressSpace->TranslationMap();
1455 			off_t offset = 0;
1456 
1457 			if (!gKernelStartup)
1458 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1459 
1460 			map->Lock();
1461 
1462 			for (addr_t virtualAddress = area->Base();
1463 					virtualAddress < area->Base() + (area->Size() - 1);
1464 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1465 				phys_addr_t physicalAddress;
1466 				uint32 flags;
1467 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1468 				if (status < B_OK) {
1469 					panic("looking up mapping failed for va 0x%lx\n",
1470 						virtualAddress);
1471 				}
1472 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1473 				if (page == NULL) {
1474 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1475 						"\n", physicalAddress);
1476 				}
1477 
1478 				DEBUG_PAGE_ACCESS_START(page);
1479 
1480 				cache->InsertPage(page, offset);
1481 				increment_page_wired_count(page);
1482 				vm_page_set_state(page, PAGE_STATE_WIRED);
1483 				page->busy = false;
1484 
1485 				DEBUG_PAGE_ACCESS_END(page);
1486 			}
1487 
1488 			map->Unlock();
1489 			break;
1490 		}
1491 
1492 		case B_CONTIGUOUS:
1493 		{
1494 			// We have already allocated our continuous pages run, so we can now
1495 			// just map them in the address space
1496 			VMTranslationMap* map = addressSpace->TranslationMap();
1497 			phys_addr_t physicalAddress
1498 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1499 			addr_t virtualAddress = area->Base();
1500 			off_t offset = 0;
1501 
1502 			map->Lock();
1503 
1504 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1505 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1506 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1507 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1508 				if (page == NULL)
1509 					panic("couldn't lookup physical page just allocated\n");
1510 
1511 				status = map->Map(virtualAddress, physicalAddress, protection,
1512 					area->MemoryType(), &reservation);
1513 				if (status < B_OK)
1514 					panic("couldn't map physical page in page run\n");
1515 
1516 				cache->InsertPage(page, offset);
1517 				increment_page_wired_count(page);
1518 
1519 				DEBUG_PAGE_ACCESS_END(page);
1520 			}
1521 
1522 			map->Unlock();
1523 			break;
1524 		}
1525 
1526 		default:
1527 			break;
1528 	}
1529 
1530 	cache->Unlock();
1531 
1532 	if (reservedPages > 0)
1533 		vm_page_unreserve_pages(&reservation);
1534 
1535 	TRACE(("vm_create_anonymous_area: done\n"));
1536 
1537 	area->cache_type = CACHE_TYPE_RAM;
1538 	return area->id;
1539 
1540 err1:
1541 	if (wiring == B_CONTIGUOUS) {
1542 		// we had reserved the area space upfront...
1543 		phys_addr_t pageNumber = page->physical_page_number;
1544 		int32 i;
1545 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1546 			page = vm_lookup_page(pageNumber);
1547 			if (page == NULL)
1548 				panic("couldn't lookup physical page just allocated\n");
1549 
1550 			vm_page_set_state(page, PAGE_STATE_FREE);
1551 		}
1552 	}
1553 
1554 err0:
1555 	if (reservedPages > 0)
1556 		vm_page_unreserve_pages(&reservation);
1557 	if (reservedMemory > 0)
1558 		vm_unreserve_memory(reservedMemory);
1559 
1560 	return status;
1561 }
1562 
1563 
1564 area_id
1565 vm_map_physical_memory(team_id team, const char* name, void** _address,
1566 	uint32 addressSpec, addr_t size, uint32 protection,
1567 	phys_addr_t physicalAddress, bool alreadyWired)
1568 {
1569 	VMArea* area;
1570 	VMCache* cache;
1571 	addr_t mapOffset;
1572 
1573 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1574 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1575 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1576 		addressSpec, size, protection, physicalAddress));
1577 
1578 	if (!arch_vm_supports_protection(protection))
1579 		return B_NOT_SUPPORTED;
1580 
1581 	AddressSpaceWriteLocker locker(team);
1582 	if (!locker.IsLocked())
1583 		return B_BAD_TEAM_ID;
1584 
1585 	// if the physical address is somewhat inside a page,
1586 	// move the actual area down to align on a page boundary
1587 	mapOffset = physicalAddress % B_PAGE_SIZE;
1588 	size += mapOffset;
1589 	physicalAddress -= mapOffset;
1590 
1591 	size = PAGE_ALIGN(size);
1592 
1593 	// create a device cache
1594 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1595 	if (status != B_OK)
1596 		return status;
1597 
1598 	cache->virtual_end = size;
1599 
1600 	cache->Lock();
1601 
1602 	virtual_address_restrictions addressRestrictions = {};
1603 	addressRestrictions.address = *_address;
1604 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1605 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1606 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1607 		true, &area, _address);
1608 
1609 	if (status < B_OK)
1610 		cache->ReleaseRefLocked();
1611 
1612 	cache->Unlock();
1613 
1614 	if (status == B_OK) {
1615 		// set requested memory type -- use uncached, if not given
1616 		uint32 memoryType = addressSpec & B_MTR_MASK;
1617 		if (memoryType == 0)
1618 			memoryType = B_MTR_UC;
1619 
1620 		area->SetMemoryType(memoryType);
1621 
1622 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1623 		if (status != B_OK)
1624 			delete_area(locker.AddressSpace(), area, false);
1625 	}
1626 
1627 	if (status != B_OK)
1628 		return status;
1629 
1630 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1631 
1632 	if (alreadyWired) {
1633 		// The area is already mapped, but possibly not with the right
1634 		// memory type.
1635 		map->Lock();
1636 		map->ProtectArea(area, area->protection);
1637 		map->Unlock();
1638 	} else {
1639 		// Map the area completely.
1640 
1641 		// reserve pages needed for the mapping
1642 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1643 			area->Base() + (size - 1));
1644 		vm_page_reservation reservation;
1645 		vm_page_reserve_pages(&reservation, reservePages,
1646 			team == VMAddressSpace::KernelID()
1647 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1648 
1649 		map->Lock();
1650 
1651 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1652 			map->Map(area->Base() + offset, physicalAddress + offset,
1653 				protection, area->MemoryType(), &reservation);
1654 		}
1655 
1656 		map->Unlock();
1657 
1658 		vm_page_unreserve_pages(&reservation);
1659 	}
1660 
1661 	// modify the pointer returned to be offset back into the new area
1662 	// the same way the physical address in was offset
1663 	*_address = (void*)((addr_t)*_address + mapOffset);
1664 
1665 	area->cache_type = CACHE_TYPE_DEVICE;
1666 	return area->id;
1667 }
1668 
1669 
1670 /*!	Don't use!
1671 	TODO: This function was introduced to map physical page vecs to
1672 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1673 	use a device cache and does not track vm_page::wired_count!
1674 */
1675 area_id
1676 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1677 	uint32 addressSpec, addr_t* _size, uint32 protection,
1678 	struct generic_io_vec* vecs, uint32 vecCount)
1679 {
1680 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1681 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1682 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1683 		addressSpec, _size, protection, vecs, vecCount));
1684 
1685 	if (!arch_vm_supports_protection(protection)
1686 		|| (addressSpec & B_MTR_MASK) != 0) {
1687 		return B_NOT_SUPPORTED;
1688 	}
1689 
1690 	AddressSpaceWriteLocker locker(team);
1691 	if (!locker.IsLocked())
1692 		return B_BAD_TEAM_ID;
1693 
1694 	if (vecCount == 0)
1695 		return B_BAD_VALUE;
1696 
1697 	addr_t size = 0;
1698 	for (uint32 i = 0; i < vecCount; i++) {
1699 		if (vecs[i].base % B_PAGE_SIZE != 0
1700 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1701 			return B_BAD_VALUE;
1702 		}
1703 
1704 		size += vecs[i].length;
1705 	}
1706 
1707 	// create a device cache
1708 	VMCache* cache;
1709 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1710 	if (result != B_OK)
1711 		return result;
1712 
1713 	cache->virtual_end = size;
1714 
1715 	cache->Lock();
1716 
1717 	VMArea* area;
1718 	virtual_address_restrictions addressRestrictions = {};
1719 	addressRestrictions.address = *_address;
1720 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1721 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1722 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1723 		&addressRestrictions, true, &area, _address);
1724 
1725 	if (result != B_OK)
1726 		cache->ReleaseRefLocked();
1727 
1728 	cache->Unlock();
1729 
1730 	if (result != B_OK)
1731 		return result;
1732 
1733 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1734 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1735 		area->Base() + (size - 1));
1736 
1737 	vm_page_reservation reservation;
1738 	vm_page_reserve_pages(&reservation, reservePages,
1739 			team == VMAddressSpace::KernelID()
1740 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1741 	map->Lock();
1742 
1743 	uint32 vecIndex = 0;
1744 	size_t vecOffset = 0;
1745 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1746 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1747 			vecOffset = 0;
1748 			vecIndex++;
1749 		}
1750 
1751 		if (vecIndex >= vecCount)
1752 			break;
1753 
1754 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1755 			protection, area->MemoryType(), &reservation);
1756 
1757 		vecOffset += B_PAGE_SIZE;
1758 	}
1759 
1760 	map->Unlock();
1761 	vm_page_unreserve_pages(&reservation);
1762 
1763 	if (_size != NULL)
1764 		*_size = size;
1765 
1766 	area->cache_type = CACHE_TYPE_DEVICE;
1767 	return area->id;
1768 }
1769 
1770 
1771 area_id
1772 vm_create_null_area(team_id team, const char* name, void** address,
1773 	uint32 addressSpec, addr_t size, uint32 flags)
1774 {
1775 	size = PAGE_ALIGN(size);
1776 
1777 	// Lock the address space and, if B_EXACT_ADDRESS and
1778 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1779 	// is not wired.
1780 	AddressSpaceWriteLocker locker;
1781 	do {
1782 		if (locker.SetTo(team) != B_OK)
1783 			return B_BAD_TEAM_ID;
1784 	} while (addressSpec == B_EXACT_ADDRESS
1785 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1786 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1787 			(addr_t)*address, size, &locker));
1788 
1789 	// create a null cache
1790 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1791 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1792 	VMCache* cache;
1793 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1794 	if (status != B_OK)
1795 		return status;
1796 
1797 	cache->temporary = 1;
1798 	cache->virtual_end = size;
1799 
1800 	cache->Lock();
1801 
1802 	VMArea* area;
1803 	virtual_address_restrictions addressRestrictions = {};
1804 	addressRestrictions.address = *address;
1805 	addressRestrictions.address_specification = addressSpec;
1806 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1807 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1808 		&addressRestrictions, true, &area, address);
1809 
1810 	if (status < B_OK) {
1811 		cache->ReleaseRefAndUnlock();
1812 		return status;
1813 	}
1814 
1815 	cache->Unlock();
1816 
1817 	area->cache_type = CACHE_TYPE_NULL;
1818 	return area->id;
1819 }
1820 
1821 
1822 /*!	Creates the vnode cache for the specified \a vnode.
1823 	The vnode has to be marked busy when calling this function.
1824 */
1825 status_t
1826 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1827 {
1828 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1829 }
1830 
1831 
1832 /*!	\a cache must be locked. The area's address space must be read-locked.
1833 */
1834 static void
1835 pre_map_area_pages(VMArea* area, VMCache* cache,
1836 	vm_page_reservation* reservation)
1837 {
1838 	addr_t baseAddress = area->Base();
1839 	addr_t cacheOffset = area->cache_offset;
1840 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1841 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1842 
1843 	for (VMCachePagesTree::Iterator it
1844 				= cache->pages.GetIterator(firstPage, true, true);
1845 			vm_page* page = it.Next();) {
1846 		if (page->cache_offset >= endPage)
1847 			break;
1848 
1849 		// skip busy and inactive pages
1850 		if (page->busy || page->usage_count == 0)
1851 			continue;
1852 
1853 		DEBUG_PAGE_ACCESS_START(page);
1854 		map_page(area, page,
1855 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1856 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1857 		DEBUG_PAGE_ACCESS_END(page);
1858 	}
1859 }
1860 
1861 
1862 /*!	Will map the file specified by \a fd to an area in memory.
1863 	The file will be mirrored beginning at the specified \a offset. The
1864 	\a offset and \a size arguments have to be page aligned.
1865 */
1866 static area_id
1867 _vm_map_file(team_id team, const char* name, void** _address,
1868 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1869 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1870 {
1871 	// TODO: for binary files, we want to make sure that they get the
1872 	//	copy of a file at a given time, ie. later changes should not
1873 	//	make it into the mapped copy -- this will need quite some changes
1874 	//	to be done in a nice way
1875 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1876 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1877 
1878 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1879 	size = PAGE_ALIGN(size);
1880 
1881 	if (mapping == REGION_NO_PRIVATE_MAP)
1882 		protection |= B_SHARED_AREA;
1883 	if (addressSpec != B_EXACT_ADDRESS)
1884 		unmapAddressRange = false;
1885 
1886 	if (fd < 0) {
1887 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1888 		virtual_address_restrictions virtualRestrictions = {};
1889 		virtualRestrictions.address = *_address;
1890 		virtualRestrictions.address_specification = addressSpec;
1891 		physical_address_restrictions physicalRestrictions = {};
1892 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1893 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1894 			_address);
1895 	}
1896 
1897 	// get the open flags of the FD
1898 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1899 	if (descriptor == NULL)
1900 		return EBADF;
1901 	int32 openMode = descriptor->open_mode;
1902 	put_fd(descriptor);
1903 
1904 	// The FD must open for reading at any rate. For shared mapping with write
1905 	// access, additionally the FD must be open for writing.
1906 	if ((openMode & O_ACCMODE) == O_WRONLY
1907 		|| (mapping == REGION_NO_PRIVATE_MAP
1908 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1909 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1910 		return EACCES;
1911 	}
1912 
1913 	// get the vnode for the object, this also grabs a ref to it
1914 	struct vnode* vnode = NULL;
1915 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1916 	if (status < B_OK)
1917 		return status;
1918 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1919 
1920 	// If we're going to pre-map pages, we need to reserve the pages needed by
1921 	// the mapping backend upfront.
1922 	page_num_t reservedPreMapPages = 0;
1923 	vm_page_reservation reservation;
1924 	if ((protection & B_READ_AREA) != 0) {
1925 		AddressSpaceWriteLocker locker;
1926 		status = locker.SetTo(team);
1927 		if (status != B_OK)
1928 			return status;
1929 
1930 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1931 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1932 
1933 		locker.Unlock();
1934 
1935 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1936 			team == VMAddressSpace::KernelID()
1937 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1938 	}
1939 
1940 	struct PageUnreserver {
1941 		PageUnreserver(vm_page_reservation* reservation)
1942 			:
1943 			fReservation(reservation)
1944 		{
1945 		}
1946 
1947 		~PageUnreserver()
1948 		{
1949 			if (fReservation != NULL)
1950 				vm_page_unreserve_pages(fReservation);
1951 		}
1952 
1953 		vm_page_reservation* fReservation;
1954 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1955 
1956 	// Lock the address space and, if the specified address range shall be
1957 	// unmapped, ensure it is not wired.
1958 	AddressSpaceWriteLocker locker;
1959 	do {
1960 		if (locker.SetTo(team) != B_OK)
1961 			return B_BAD_TEAM_ID;
1962 	} while (unmapAddressRange
1963 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1964 			(addr_t)*_address, size, &locker));
1965 
1966 	// TODO: this only works for file systems that use the file cache
1967 	VMCache* cache;
1968 	status = vfs_get_vnode_cache(vnode, &cache, false);
1969 	if (status < B_OK)
1970 		return status;
1971 
1972 	cache->Lock();
1973 
1974 	VMArea* area;
1975 	virtual_address_restrictions addressRestrictions = {};
1976 	addressRestrictions.address = *_address;
1977 	addressRestrictions.address_specification = addressSpec;
1978 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1979 		0, protection, mapping,
1980 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1981 		&addressRestrictions, kernel, &area, _address);
1982 
1983 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1984 		// map_backing_store() cannot know we no longer need the ref
1985 		cache->ReleaseRefLocked();
1986 	}
1987 
1988 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1989 		pre_map_area_pages(area, cache, &reservation);
1990 
1991 	cache->Unlock();
1992 
1993 	if (status == B_OK) {
1994 		// TODO: this probably deserves a smarter solution, ie. don't always
1995 		// prefetch stuff, and also, probably don't trigger it at this place.
1996 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1997 			// prefetches at max 10 MB starting from "offset"
1998 	}
1999 
2000 	if (status != B_OK)
2001 		return status;
2002 
2003 	area->cache_type = CACHE_TYPE_VNODE;
2004 	return area->id;
2005 }
2006 
2007 
2008 area_id
2009 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2010 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2011 	int fd, off_t offset)
2012 {
2013 	if (!arch_vm_supports_protection(protection))
2014 		return B_NOT_SUPPORTED;
2015 
2016 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2017 		mapping, unmapAddressRange, fd, offset, true);
2018 }
2019 
2020 
2021 VMCache*
2022 vm_area_get_locked_cache(VMArea* area)
2023 {
2024 	rw_lock_read_lock(&sAreaCacheLock);
2025 
2026 	while (true) {
2027 		VMCache* cache = area->cache;
2028 
2029 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2030 			// cache has been deleted
2031 			rw_lock_read_lock(&sAreaCacheLock);
2032 			continue;
2033 		}
2034 
2035 		rw_lock_read_lock(&sAreaCacheLock);
2036 
2037 		if (cache == area->cache) {
2038 			cache->AcquireRefLocked();
2039 			rw_lock_read_unlock(&sAreaCacheLock);
2040 			return cache;
2041 		}
2042 
2043 		// the cache changed in the meantime
2044 		cache->Unlock();
2045 	}
2046 }
2047 
2048 
2049 void
2050 vm_area_put_locked_cache(VMCache* cache)
2051 {
2052 	cache->ReleaseRefAndUnlock();
2053 }
2054 
2055 
2056 area_id
2057 vm_clone_area(team_id team, const char* name, void** address,
2058 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2059 	bool kernel)
2060 {
2061 	VMArea* newArea = NULL;
2062 	VMArea* sourceArea;
2063 
2064 	// Check whether the source area exists and is cloneable. If so, mark it
2065 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2066 	{
2067 		AddressSpaceWriteLocker locker;
2068 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2069 		if (status != B_OK)
2070 			return status;
2071 
2072 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2073 			return B_NOT_ALLOWED;
2074 
2075 		sourceArea->protection |= B_SHARED_AREA;
2076 		protection |= B_SHARED_AREA;
2077 	}
2078 
2079 	// Now lock both address spaces and actually do the cloning.
2080 
2081 	MultiAddressSpaceLocker locker;
2082 	VMAddressSpace* sourceAddressSpace;
2083 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2084 	if (status != B_OK)
2085 		return status;
2086 
2087 	VMAddressSpace* targetAddressSpace;
2088 	status = locker.AddTeam(team, true, &targetAddressSpace);
2089 	if (status != B_OK)
2090 		return status;
2091 
2092 	status = locker.Lock();
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2097 	if (sourceArea == NULL)
2098 		return B_BAD_VALUE;
2099 
2100 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2101 		return B_NOT_ALLOWED;
2102 
2103 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2104 
2105 	if (!kernel && sourceAddressSpace == VMAddressSpace::Kernel()
2106 		&& targetAddressSpace != VMAddressSpace::Kernel()
2107 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2108 		// kernel areas must not be cloned in userland, unless explicitly
2109 		// declared user-cloneable upon construction
2110 #if KDEBUG_LEVEL_2
2111 		panic("attempting to clone non-user-cloneable kernel area!");
2112 #endif
2113 		status = B_NOT_ALLOWED;
2114 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2115 		status = B_NOT_ALLOWED;
2116 	} else {
2117 		virtual_address_restrictions addressRestrictions = {};
2118 		addressRestrictions.address = *address;
2119 		addressRestrictions.address_specification = addressSpec;
2120 		status = map_backing_store(targetAddressSpace, cache,
2121 			sourceArea->cache_offset, name, sourceArea->Size(),
2122 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2123 			kernel, &newArea, address);
2124 	}
2125 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2126 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2127 		// to create a new cache, and has therefore already acquired a reference
2128 		// to the source cache - but otherwise it has no idea that we need
2129 		// one.
2130 		cache->AcquireRefLocked();
2131 	}
2132 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2133 		// we need to map in everything at this point
2134 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2135 			// we don't have actual pages to map but a physical area
2136 			VMTranslationMap* map
2137 				= sourceArea->address_space->TranslationMap();
2138 			map->Lock();
2139 
2140 			phys_addr_t physicalAddress;
2141 			uint32 oldProtection;
2142 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2143 
2144 			map->Unlock();
2145 
2146 			map = targetAddressSpace->TranslationMap();
2147 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2148 				newArea->Base() + (newArea->Size() - 1));
2149 
2150 			vm_page_reservation reservation;
2151 			vm_page_reserve_pages(&reservation, reservePages,
2152 				targetAddressSpace == VMAddressSpace::Kernel()
2153 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2154 			map->Lock();
2155 
2156 			for (addr_t offset = 0; offset < newArea->Size();
2157 					offset += B_PAGE_SIZE) {
2158 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2159 					protection, newArea->MemoryType(), &reservation);
2160 			}
2161 
2162 			map->Unlock();
2163 			vm_page_unreserve_pages(&reservation);
2164 		} else {
2165 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2166 			size_t reservePages = map->MaxPagesNeededToMap(
2167 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2168 			vm_page_reservation reservation;
2169 			vm_page_reserve_pages(&reservation, reservePages,
2170 				targetAddressSpace == VMAddressSpace::Kernel()
2171 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2172 
2173 			// map in all pages from source
2174 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2175 					vm_page* page  = it.Next();) {
2176 				if (!page->busy) {
2177 					DEBUG_PAGE_ACCESS_START(page);
2178 					map_page(newArea, page,
2179 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2180 							- newArea->cache_offset),
2181 						protection, &reservation);
2182 					DEBUG_PAGE_ACCESS_END(page);
2183 				}
2184 			}
2185 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2186 			// ensuring that!
2187 
2188 			vm_page_unreserve_pages(&reservation);
2189 		}
2190 	}
2191 	if (status == B_OK)
2192 		newArea->cache_type = sourceArea->cache_type;
2193 
2194 	vm_area_put_locked_cache(cache);
2195 
2196 	if (status < B_OK)
2197 		return status;
2198 
2199 	return newArea->id;
2200 }
2201 
2202 
2203 /*!	Deletes the specified area of the given address space.
2204 
2205 	The address space must be write-locked.
2206 	The caller must ensure that the area does not have any wired ranges.
2207 
2208 	\param addressSpace The address space containing the area.
2209 	\param area The area to be deleted.
2210 	\param deletingAddressSpace \c true, if the address space is in the process
2211 		of being deleted.
2212 */
2213 static void
2214 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2215 	bool deletingAddressSpace)
2216 {
2217 	ASSERT(!area->IsWired());
2218 
2219 	VMAreaHash::Remove(area);
2220 
2221 	// At this point the area is removed from the global hash table, but
2222 	// still exists in the area list.
2223 
2224 	// Unmap the virtual address space the area occupied.
2225 	{
2226 		// We need to lock the complete cache chain.
2227 		VMCache* topCache = vm_area_get_locked_cache(area);
2228 		VMCacheChainLocker cacheChainLocker(topCache);
2229 		cacheChainLocker.LockAllSourceCaches();
2230 
2231 		// If the area's top cache is a temporary cache and the area is the only
2232 		// one referencing it (besides us currently holding a second reference),
2233 		// the unmapping code doesn't need to care about preserving the accessed
2234 		// and dirty flags of the top cache page mappings.
2235 		bool ignoreTopCachePageFlags
2236 			= topCache->temporary && topCache->RefCount() == 2;
2237 
2238 		area->address_space->TranslationMap()->UnmapArea(area,
2239 			deletingAddressSpace, ignoreTopCachePageFlags);
2240 	}
2241 
2242 	if (!area->cache->temporary)
2243 		area->cache->WriteModified();
2244 
2245 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2246 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2247 
2248 	arch_vm_unset_memory_type(area);
2249 	addressSpace->RemoveArea(area, allocationFlags);
2250 	addressSpace->Put();
2251 
2252 	area->cache->RemoveArea(area);
2253 	area->cache->ReleaseRef();
2254 
2255 	addressSpace->DeleteArea(area, allocationFlags);
2256 }
2257 
2258 
2259 status_t
2260 vm_delete_area(team_id team, area_id id, bool kernel)
2261 {
2262 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2263 		team, id));
2264 
2265 	// lock the address space and make sure the area isn't wired
2266 	AddressSpaceWriteLocker locker;
2267 	VMArea* area;
2268 	AreaCacheLocker cacheLocker;
2269 
2270 	do {
2271 		status_t status = locker.SetFromArea(team, id, area);
2272 		if (status != B_OK)
2273 			return status;
2274 
2275 		cacheLocker.SetTo(area);
2276 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2277 
2278 	cacheLocker.Unlock();
2279 
2280 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2281 		return B_NOT_ALLOWED;
2282 
2283 	delete_area(locker.AddressSpace(), area, false);
2284 	return B_OK;
2285 }
2286 
2287 
2288 /*!	Creates a new cache on top of given cache, moves all areas from
2289 	the old cache to the new one, and changes the protection of all affected
2290 	areas' pages to read-only. If requested, wired pages are moved up to the
2291 	new cache and copies are added to the old cache in their place.
2292 	Preconditions:
2293 	- The given cache must be locked.
2294 	- All of the cache's areas' address spaces must be read locked.
2295 	- Either the cache must not have any wired ranges or a page reservation for
2296 	  all wired pages must be provided, so they can be copied.
2297 
2298 	\param lowerCache The cache on top of which a new cache shall be created.
2299 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2300 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2301 		has wired page. The wired pages are copied in this case.
2302 */
2303 static status_t
2304 vm_copy_on_write_area(VMCache* lowerCache,
2305 	vm_page_reservation* wiredPagesReservation)
2306 {
2307 	VMCache* upperCache;
2308 
2309 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2310 
2311 	// We need to separate the cache from its areas. The cache goes one level
2312 	// deeper and we create a new cache inbetween.
2313 
2314 	// create an anonymous cache
2315 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2316 		lowerCache->GuardSize() / B_PAGE_SIZE,
2317 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2318 		VM_PRIORITY_USER);
2319 	if (status != B_OK)
2320 		return status;
2321 
2322 	upperCache->Lock();
2323 
2324 	upperCache->temporary = 1;
2325 	upperCache->virtual_base = lowerCache->virtual_base;
2326 	upperCache->virtual_end = lowerCache->virtual_end;
2327 
2328 	// transfer the lower cache areas to the upper cache
2329 	rw_lock_write_lock(&sAreaCacheLock);
2330 	upperCache->TransferAreas(lowerCache);
2331 	rw_lock_write_unlock(&sAreaCacheLock);
2332 
2333 	lowerCache->AddConsumer(upperCache);
2334 
2335 	// We now need to remap all pages from all of the cache's areas read-only,
2336 	// so that a copy will be created on next write access. If there are wired
2337 	// pages, we keep their protection, move them to the upper cache and create
2338 	// copies for the lower cache.
2339 	if (wiredPagesReservation != NULL) {
2340 		// We need to handle wired pages -- iterate through the cache's pages.
2341 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2342 				vm_page* page = it.Next();) {
2343 			if (page->WiredCount() > 0) {
2344 				// allocate a new page and copy the wired one
2345 				vm_page* copiedPage = vm_page_allocate_page(
2346 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2347 
2348 				vm_memcpy_physical_page(
2349 					copiedPage->physical_page_number * B_PAGE_SIZE,
2350 					page->physical_page_number * B_PAGE_SIZE);
2351 
2352 				// move the wired page to the upper cache (note: removing is OK
2353 				// with the SplayTree iterator) and insert the copy
2354 				upperCache->MovePage(page);
2355 				lowerCache->InsertPage(copiedPage,
2356 					page->cache_offset * B_PAGE_SIZE);
2357 
2358 				DEBUG_PAGE_ACCESS_END(copiedPage);
2359 			} else {
2360 				// Change the protection of this page in all areas.
2361 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2362 						tempArea = tempArea->cache_next) {
2363 					// The area must be readable in the same way it was
2364 					// previously writable.
2365 					uint32 protection = B_KERNEL_READ_AREA;
2366 					if ((tempArea->protection & B_READ_AREA) != 0)
2367 						protection |= B_READ_AREA;
2368 
2369 					VMTranslationMap* map
2370 						= tempArea->address_space->TranslationMap();
2371 					map->Lock();
2372 					map->ProtectPage(tempArea,
2373 						virtual_page_address(tempArea, page), protection);
2374 					map->Unlock();
2375 				}
2376 			}
2377 		}
2378 	} else {
2379 		ASSERT(lowerCache->WiredPagesCount() == 0);
2380 
2381 		// just change the protection of all areas
2382 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2383 				tempArea = tempArea->cache_next) {
2384 			// The area must be readable in the same way it was previously
2385 			// writable.
2386 			uint32 protection = B_KERNEL_READ_AREA;
2387 			if ((tempArea->protection & B_READ_AREA) != 0)
2388 				protection |= B_READ_AREA;
2389 
2390 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2391 			map->Lock();
2392 			map->ProtectArea(tempArea, protection);
2393 			map->Unlock();
2394 		}
2395 	}
2396 
2397 	vm_area_put_locked_cache(upperCache);
2398 
2399 	return B_OK;
2400 }
2401 
2402 
2403 area_id
2404 vm_copy_area(team_id team, const char* name, void** _address,
2405 	uint32 addressSpec, uint32 protection, area_id sourceID)
2406 {
2407 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2408 
2409 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2410 		// set the same protection for the kernel as for userland
2411 		protection |= B_KERNEL_READ_AREA;
2412 		if (writableCopy)
2413 			protection |= B_KERNEL_WRITE_AREA;
2414 	}
2415 
2416 	// Do the locking: target address space, all address spaces associated with
2417 	// the source cache, and the cache itself.
2418 	MultiAddressSpaceLocker locker;
2419 	VMAddressSpace* targetAddressSpace;
2420 	VMCache* cache;
2421 	VMArea* source;
2422 	AreaCacheLocker cacheLocker;
2423 	status_t status;
2424 	bool sharedArea;
2425 
2426 	page_num_t wiredPages = 0;
2427 	vm_page_reservation wiredPagesReservation;
2428 
2429 	bool restart;
2430 	do {
2431 		restart = false;
2432 
2433 		locker.Unset();
2434 		status = locker.AddTeam(team, true, &targetAddressSpace);
2435 		if (status == B_OK) {
2436 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2437 				&cache);
2438 		}
2439 		if (status != B_OK)
2440 			return status;
2441 
2442 		cacheLocker.SetTo(cache, true);	// already locked
2443 
2444 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2445 
2446 		page_num_t oldWiredPages = wiredPages;
2447 		wiredPages = 0;
2448 
2449 		// If the source area isn't shared, count the number of wired pages in
2450 		// the cache and reserve as many pages.
2451 		if (!sharedArea) {
2452 			wiredPages = cache->WiredPagesCount();
2453 
2454 			if (wiredPages > oldWiredPages) {
2455 				cacheLocker.Unlock();
2456 				locker.Unlock();
2457 
2458 				if (oldWiredPages > 0)
2459 					vm_page_unreserve_pages(&wiredPagesReservation);
2460 
2461 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2462 					VM_PRIORITY_USER);
2463 
2464 				restart = true;
2465 			}
2466 		} else if (oldWiredPages > 0)
2467 			vm_page_unreserve_pages(&wiredPagesReservation);
2468 	} while (restart);
2469 
2470 	// unreserve pages later
2471 	struct PagesUnreserver {
2472 		PagesUnreserver(vm_page_reservation* reservation)
2473 			:
2474 			fReservation(reservation)
2475 		{
2476 		}
2477 
2478 		~PagesUnreserver()
2479 		{
2480 			if (fReservation != NULL)
2481 				vm_page_unreserve_pages(fReservation);
2482 		}
2483 
2484 	private:
2485 		vm_page_reservation*	fReservation;
2486 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2487 
2488 	if (addressSpec == B_CLONE_ADDRESS) {
2489 		addressSpec = B_EXACT_ADDRESS;
2490 		*_address = (void*)source->Base();
2491 	}
2492 
2493 	// First, create a cache on top of the source area, respectively use the
2494 	// existing one, if this is a shared area.
2495 
2496 	VMArea* target;
2497 	virtual_address_restrictions addressRestrictions = {};
2498 	addressRestrictions.address = *_address;
2499 	addressRestrictions.address_specification = addressSpec;
2500 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2501 		name, source->Size(), source->wiring, protection,
2502 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2503 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2504 		&addressRestrictions, true, &target, _address);
2505 	if (status < B_OK)
2506 		return status;
2507 
2508 	if (sharedArea) {
2509 		// The new area uses the old area's cache, but map_backing_store()
2510 		// hasn't acquired a ref. So we have to do that now.
2511 		cache->AcquireRefLocked();
2512 	}
2513 
2514 	// If the source area is writable, we need to move it one layer up as well
2515 
2516 	if (!sharedArea) {
2517 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2518 			// TODO: do something more useful if this fails!
2519 			if (vm_copy_on_write_area(cache,
2520 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2521 				panic("vm_copy_on_write_area() failed!\n");
2522 			}
2523 		}
2524 	}
2525 
2526 	// we return the ID of the newly created area
2527 	return target->id;
2528 }
2529 
2530 
2531 status_t
2532 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2533 	bool kernel)
2534 {
2535 	fix_protection(&newProtection);
2536 
2537 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2538 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2539 
2540 	if (!arch_vm_supports_protection(newProtection))
2541 		return B_NOT_SUPPORTED;
2542 
2543 	bool becomesWritable
2544 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2545 
2546 	// lock address spaces and cache
2547 	MultiAddressSpaceLocker locker;
2548 	VMCache* cache;
2549 	VMArea* area;
2550 	status_t status;
2551 	AreaCacheLocker cacheLocker;
2552 	bool isWritable;
2553 
2554 	bool restart;
2555 	do {
2556 		restart = false;
2557 
2558 		locker.Unset();
2559 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2560 		if (status != B_OK)
2561 			return status;
2562 
2563 		cacheLocker.SetTo(cache, true);	// already locked
2564 
2565 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2566 			return B_NOT_ALLOWED;
2567 
2568 		if (area->protection == newProtection)
2569 			return B_OK;
2570 
2571 		if (team != VMAddressSpace::KernelID()
2572 			&& area->address_space->ID() != team) {
2573 			// unless you're the kernel, you are only allowed to set
2574 			// the protection of your own areas
2575 			return B_NOT_ALLOWED;
2576 		}
2577 
2578 		isWritable
2579 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2580 
2581 		// Make sure the area (respectively, if we're going to call
2582 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2583 		// wired ranges.
2584 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2585 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2586 					otherArea = otherArea->cache_next) {
2587 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2588 					restart = true;
2589 					break;
2590 				}
2591 			}
2592 		} else {
2593 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2594 				restart = true;
2595 		}
2596 	} while (restart);
2597 
2598 	bool changePageProtection = true;
2599 	bool changeTopCachePagesOnly = false;
2600 
2601 	if (isWritable && !becomesWritable) {
2602 		// writable -> !writable
2603 
2604 		if (cache->source != NULL && cache->temporary) {
2605 			if (cache->CountWritableAreas(area) == 0) {
2606 				// Since this cache now lives from the pages in its source cache,
2607 				// we can change the cache's commitment to take only those pages
2608 				// into account that really are in this cache.
2609 
2610 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2611 					team == VMAddressSpace::KernelID()
2612 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2613 
2614 				// TODO: we may be able to join with our source cache, if
2615 				// count == 0
2616 			}
2617 		}
2618 
2619 		// If only the writability changes, we can just remap the pages of the
2620 		// top cache, since the pages of lower caches are mapped read-only
2621 		// anyway. That's advantageous only, if the number of pages in the cache
2622 		// is significantly smaller than the number of pages in the area,
2623 		// though.
2624 		if (newProtection
2625 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2626 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2627 			changeTopCachePagesOnly = true;
2628 		}
2629 	} else if (!isWritable && becomesWritable) {
2630 		// !writable -> writable
2631 
2632 		if (!cache->consumers.IsEmpty()) {
2633 			// There are consumers -- we have to insert a new cache. Fortunately
2634 			// vm_copy_on_write_area() does everything that's needed.
2635 			changePageProtection = false;
2636 			status = vm_copy_on_write_area(cache, NULL);
2637 		} else {
2638 			// No consumers, so we don't need to insert a new one.
2639 			if (cache->source != NULL && cache->temporary) {
2640 				// the cache's commitment must contain all possible pages
2641 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2642 					team == VMAddressSpace::KernelID()
2643 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2644 			}
2645 
2646 			if (status == B_OK && cache->source != NULL) {
2647 				// There's a source cache, hence we can't just change all pages'
2648 				// protection or we might allow writing into pages belonging to
2649 				// a lower cache.
2650 				changeTopCachePagesOnly = true;
2651 			}
2652 		}
2653 	} else {
2654 		// we don't have anything special to do in all other cases
2655 	}
2656 
2657 	if (status == B_OK) {
2658 		// remap existing pages in this cache
2659 		if (changePageProtection) {
2660 			VMTranslationMap* map = area->address_space->TranslationMap();
2661 			map->Lock();
2662 
2663 			if (changeTopCachePagesOnly) {
2664 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2665 				page_num_t lastPageOffset
2666 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2667 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2668 						vm_page* page = it.Next();) {
2669 					if (page->cache_offset >= firstPageOffset
2670 						&& page->cache_offset <= lastPageOffset) {
2671 						addr_t address = virtual_page_address(area, page);
2672 						map->ProtectPage(area, address, newProtection);
2673 					}
2674 				}
2675 			} else
2676 				map->ProtectArea(area, newProtection);
2677 
2678 			map->Unlock();
2679 		}
2680 
2681 		area->protection = newProtection;
2682 	}
2683 
2684 	return status;
2685 }
2686 
2687 
2688 status_t
2689 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2690 {
2691 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2692 	if (addressSpace == NULL)
2693 		return B_BAD_TEAM_ID;
2694 
2695 	VMTranslationMap* map = addressSpace->TranslationMap();
2696 
2697 	map->Lock();
2698 	uint32 dummyFlags;
2699 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2700 	map->Unlock();
2701 
2702 	addressSpace->Put();
2703 	return status;
2704 }
2705 
2706 
2707 /*!	The page's cache must be locked.
2708 */
2709 bool
2710 vm_test_map_modification(vm_page* page)
2711 {
2712 	if (page->modified)
2713 		return true;
2714 
2715 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2716 	vm_page_mapping* mapping;
2717 	while ((mapping = iterator.Next()) != NULL) {
2718 		VMArea* area = mapping->area;
2719 		VMTranslationMap* map = area->address_space->TranslationMap();
2720 
2721 		phys_addr_t physicalAddress;
2722 		uint32 flags;
2723 		map->Lock();
2724 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2725 		map->Unlock();
2726 
2727 		if ((flags & PAGE_MODIFIED) != 0)
2728 			return true;
2729 	}
2730 
2731 	return false;
2732 }
2733 
2734 
2735 /*!	The page's cache must be locked.
2736 */
2737 void
2738 vm_clear_map_flags(vm_page* page, uint32 flags)
2739 {
2740 	if ((flags & PAGE_ACCESSED) != 0)
2741 		page->accessed = false;
2742 	if ((flags & PAGE_MODIFIED) != 0)
2743 		page->modified = false;
2744 
2745 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2746 	vm_page_mapping* mapping;
2747 	while ((mapping = iterator.Next()) != NULL) {
2748 		VMArea* area = mapping->area;
2749 		VMTranslationMap* map = area->address_space->TranslationMap();
2750 
2751 		map->Lock();
2752 		map->ClearFlags(virtual_page_address(area, page), flags);
2753 		map->Unlock();
2754 	}
2755 }
2756 
2757 
2758 /*!	Removes all mappings from a page.
2759 	After you've called this function, the page is unmapped from memory and
2760 	the page's \c accessed and \c modified flags have been updated according
2761 	to the state of the mappings.
2762 	The page's cache must be locked.
2763 */
2764 void
2765 vm_remove_all_page_mappings(vm_page* page)
2766 {
2767 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2768 		VMArea* area = mapping->area;
2769 		VMTranslationMap* map = area->address_space->TranslationMap();
2770 		addr_t address = virtual_page_address(area, page);
2771 		map->UnmapPage(area, address, false);
2772 	}
2773 }
2774 
2775 
2776 int32
2777 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2778 {
2779 	int32 count = 0;
2780 
2781 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2782 	vm_page_mapping* mapping;
2783 	while ((mapping = iterator.Next()) != NULL) {
2784 		VMArea* area = mapping->area;
2785 		VMTranslationMap* map = area->address_space->TranslationMap();
2786 
2787 		bool modified;
2788 		if (map->ClearAccessedAndModified(area,
2789 				virtual_page_address(area, page), false, modified)) {
2790 			count++;
2791 		}
2792 
2793 		page->modified |= modified;
2794 	}
2795 
2796 
2797 	if (page->accessed) {
2798 		count++;
2799 		page->accessed = false;
2800 	}
2801 
2802 	return count;
2803 }
2804 
2805 
2806 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2807 	mappings.
2808 	The function iterates through the page mappings and removes them until
2809 	encountering one that has been accessed. From then on it will continue to
2810 	iterate, but only clear the accessed flag of the mapping. The page's
2811 	\c modified bit will be updated accordingly, the \c accessed bit will be
2812 	cleared.
2813 	\return The number of mapping accessed bits encountered, including the
2814 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2815 		of the page have been removed.
2816 */
2817 int32
2818 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2819 {
2820 	ASSERT(page->WiredCount() == 0);
2821 
2822 	if (page->accessed)
2823 		return vm_clear_page_mapping_accessed_flags(page);
2824 
2825 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2826 		VMArea* area = mapping->area;
2827 		VMTranslationMap* map = area->address_space->TranslationMap();
2828 		addr_t address = virtual_page_address(area, page);
2829 		bool modified = false;
2830 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2831 			page->accessed = true;
2832 			page->modified |= modified;
2833 			return vm_clear_page_mapping_accessed_flags(page);
2834 		}
2835 		page->modified |= modified;
2836 	}
2837 
2838 	return 0;
2839 }
2840 
2841 
2842 static int
2843 display_mem(int argc, char** argv)
2844 {
2845 	bool physical = false;
2846 	addr_t copyAddress;
2847 	int32 displayWidth;
2848 	int32 itemSize;
2849 	int32 num = -1;
2850 	addr_t address;
2851 	int i = 1, j;
2852 
2853 	if (argc > 1 && argv[1][0] == '-') {
2854 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2855 			physical = true;
2856 			i++;
2857 		} else
2858 			i = 99;
2859 	}
2860 
2861 	if (argc < i + 1 || argc > i + 2) {
2862 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2863 			"\tdl - 8 bytes\n"
2864 			"\tdw - 4 bytes\n"
2865 			"\tds - 2 bytes\n"
2866 			"\tdb - 1 byte\n"
2867 			"\tstring - a whole string\n"
2868 			"  -p or --physical only allows memory from a single page to be "
2869 			"displayed.\n");
2870 		return 0;
2871 	}
2872 
2873 	address = parse_expression(argv[i]);
2874 
2875 	if (argc > i + 1)
2876 		num = parse_expression(argv[i + 1]);
2877 
2878 	// build the format string
2879 	if (strcmp(argv[0], "db") == 0) {
2880 		itemSize = 1;
2881 		displayWidth = 16;
2882 	} else if (strcmp(argv[0], "ds") == 0) {
2883 		itemSize = 2;
2884 		displayWidth = 8;
2885 	} else if (strcmp(argv[0], "dw") == 0) {
2886 		itemSize = 4;
2887 		displayWidth = 4;
2888 	} else if (strcmp(argv[0], "dl") == 0) {
2889 		itemSize = 8;
2890 		displayWidth = 2;
2891 	} else if (strcmp(argv[0], "string") == 0) {
2892 		itemSize = 1;
2893 		displayWidth = -1;
2894 	} else {
2895 		kprintf("display_mem called in an invalid way!\n");
2896 		return 0;
2897 	}
2898 
2899 	if (num <= 0)
2900 		num = displayWidth;
2901 
2902 	void* physicalPageHandle = NULL;
2903 
2904 	if (physical) {
2905 		int32 offset = address & (B_PAGE_SIZE - 1);
2906 		if (num * itemSize + offset > B_PAGE_SIZE) {
2907 			num = (B_PAGE_SIZE - offset) / itemSize;
2908 			kprintf("NOTE: number of bytes has been cut to page size\n");
2909 		}
2910 
2911 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2912 
2913 		if (vm_get_physical_page_debug(address, &copyAddress,
2914 				&physicalPageHandle) != B_OK) {
2915 			kprintf("getting the hardware page failed.");
2916 			return 0;
2917 		}
2918 
2919 		address += offset;
2920 		copyAddress += offset;
2921 	} else
2922 		copyAddress = address;
2923 
2924 	if (!strcmp(argv[0], "string")) {
2925 		kprintf("%p \"", (char*)copyAddress);
2926 
2927 		// string mode
2928 		for (i = 0; true; i++) {
2929 			char c;
2930 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2931 					!= B_OK
2932 				|| c == '\0') {
2933 				break;
2934 			}
2935 
2936 			if (c == '\n')
2937 				kprintf("\\n");
2938 			else if (c == '\t')
2939 				kprintf("\\t");
2940 			else {
2941 				if (!isprint(c))
2942 					c = '.';
2943 
2944 				kprintf("%c", c);
2945 			}
2946 		}
2947 
2948 		kprintf("\"\n");
2949 	} else {
2950 		// number mode
2951 		for (i = 0; i < num; i++) {
2952 			uint32 value;
2953 
2954 			if ((i % displayWidth) == 0) {
2955 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2956 				if (i != 0)
2957 					kprintf("\n");
2958 
2959 				kprintf("[0x%lx]  ", address + i * itemSize);
2960 
2961 				for (j = 0; j < displayed; j++) {
2962 					char c;
2963 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2964 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2965 						displayed = j;
2966 						break;
2967 					}
2968 					if (!isprint(c))
2969 						c = '.';
2970 
2971 					kprintf("%c", c);
2972 				}
2973 				if (num > displayWidth) {
2974 					// make sure the spacing in the last line is correct
2975 					for (j = displayed; j < displayWidth * itemSize; j++)
2976 						kprintf(" ");
2977 				}
2978 				kprintf("  ");
2979 			}
2980 
2981 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2982 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2983 				kprintf("read fault");
2984 				break;
2985 			}
2986 
2987 			switch (itemSize) {
2988 				case 1:
2989 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2990 					break;
2991 				case 2:
2992 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2993 					break;
2994 				case 4:
2995 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2996 					break;
2997 				case 8:
2998 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
2999 					break;
3000 			}
3001 		}
3002 
3003 		kprintf("\n");
3004 	}
3005 
3006 	if (physical) {
3007 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3008 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3009 	}
3010 	return 0;
3011 }
3012 
3013 
3014 static void
3015 dump_cache_tree_recursively(VMCache* cache, int level,
3016 	VMCache* highlightCache)
3017 {
3018 	// print this cache
3019 	for (int i = 0; i < level; i++)
3020 		kprintf("  ");
3021 	if (cache == highlightCache)
3022 		kprintf("%p <--\n", cache);
3023 	else
3024 		kprintf("%p\n", cache);
3025 
3026 	// recursively print its consumers
3027 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3028 			VMCache* consumer = it.Next();) {
3029 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3030 	}
3031 }
3032 
3033 
3034 static int
3035 dump_cache_tree(int argc, char** argv)
3036 {
3037 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3038 		kprintf("usage: %s <address>\n", argv[0]);
3039 		return 0;
3040 	}
3041 
3042 	addr_t address = parse_expression(argv[1]);
3043 	if (address == 0)
3044 		return 0;
3045 
3046 	VMCache* cache = (VMCache*)address;
3047 	VMCache* root = cache;
3048 
3049 	// find the root cache (the transitive source)
3050 	while (root->source != NULL)
3051 		root = root->source;
3052 
3053 	dump_cache_tree_recursively(root, 0, cache);
3054 
3055 	return 0;
3056 }
3057 
3058 
3059 const char*
3060 vm_cache_type_to_string(int32 type)
3061 {
3062 	switch (type) {
3063 		case CACHE_TYPE_RAM:
3064 			return "RAM";
3065 		case CACHE_TYPE_DEVICE:
3066 			return "device";
3067 		case CACHE_TYPE_VNODE:
3068 			return "vnode";
3069 		case CACHE_TYPE_NULL:
3070 			return "null";
3071 
3072 		default:
3073 			return "unknown";
3074 	}
3075 }
3076 
3077 
3078 #if DEBUG_CACHE_LIST
3079 
3080 static void
3081 update_cache_info_recursively(VMCache* cache, cache_info& info)
3082 {
3083 	info.page_count += cache->page_count;
3084 	if (cache->type == CACHE_TYPE_RAM)
3085 		info.committed += cache->committed_size;
3086 
3087 	// recurse
3088 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3089 			VMCache* consumer = it.Next();) {
3090 		update_cache_info_recursively(consumer, info);
3091 	}
3092 }
3093 
3094 
3095 static int
3096 cache_info_compare_page_count(const void* _a, const void* _b)
3097 {
3098 	const cache_info* a = (const cache_info*)_a;
3099 	const cache_info* b = (const cache_info*)_b;
3100 	if (a->page_count == b->page_count)
3101 		return 0;
3102 	return a->page_count < b->page_count ? 1 : -1;
3103 }
3104 
3105 
3106 static int
3107 cache_info_compare_committed(const void* _a, const void* _b)
3108 {
3109 	const cache_info* a = (const cache_info*)_a;
3110 	const cache_info* b = (const cache_info*)_b;
3111 	if (a->committed == b->committed)
3112 		return 0;
3113 	return a->committed < b->committed ? 1 : -1;
3114 }
3115 
3116 
3117 static void
3118 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3119 {
3120 	for (int i = 0; i < level; i++)
3121 		kprintf("  ");
3122 
3123 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3124 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3125 		cache->virtual_base, cache->virtual_end, cache->page_count);
3126 
3127 	if (level == 0)
3128 		kprintf("/%lu", info.page_count);
3129 
3130 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3131 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3132 
3133 		if (level == 0)
3134 			kprintf("/%lu", info.committed);
3135 	}
3136 
3137 	// areas
3138 	if (cache->areas != NULL) {
3139 		VMArea* area = cache->areas;
3140 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3141 			area->name, area->address_space->ID());
3142 
3143 		while (area->cache_next != NULL) {
3144 			area = area->cache_next;
3145 			kprintf(", %" B_PRId32, area->id);
3146 		}
3147 	}
3148 
3149 	kputs("\n");
3150 
3151 	// recurse
3152 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3153 			VMCache* consumer = it.Next();) {
3154 		dump_caches_recursively(consumer, info, level + 1);
3155 	}
3156 }
3157 
3158 
3159 static int
3160 dump_caches(int argc, char** argv)
3161 {
3162 	if (sCacheInfoTable == NULL) {
3163 		kprintf("No cache info table!\n");
3164 		return 0;
3165 	}
3166 
3167 	bool sortByPageCount = true;
3168 
3169 	for (int32 i = 1; i < argc; i++) {
3170 		if (strcmp(argv[i], "-c") == 0) {
3171 			sortByPageCount = false;
3172 		} else {
3173 			print_debugger_command_usage(argv[0]);
3174 			return 0;
3175 		}
3176 	}
3177 
3178 	uint32 totalCount = 0;
3179 	uint32 rootCount = 0;
3180 	off_t totalCommitted = 0;
3181 	page_num_t totalPages = 0;
3182 
3183 	VMCache* cache = gDebugCacheList;
3184 	while (cache) {
3185 		totalCount++;
3186 		if (cache->source == NULL) {
3187 			cache_info stackInfo;
3188 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3189 				? sCacheInfoTable[rootCount] : stackInfo;
3190 			rootCount++;
3191 			info.cache = cache;
3192 			info.page_count = 0;
3193 			info.committed = 0;
3194 			update_cache_info_recursively(cache, info);
3195 			totalCommitted += info.committed;
3196 			totalPages += info.page_count;
3197 		}
3198 
3199 		cache = cache->debug_next;
3200 	}
3201 
3202 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3203 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3204 			sortByPageCount
3205 				? &cache_info_compare_page_count
3206 				: &cache_info_compare_committed);
3207 	}
3208 
3209 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3210 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3211 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3212 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3213 			"page count" : "committed size");
3214 
3215 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3216 		for (uint32 i = 0; i < rootCount; i++) {
3217 			cache_info& info = sCacheInfoTable[i];
3218 			dump_caches_recursively(info.cache, info, 0);
3219 		}
3220 	} else
3221 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3222 
3223 	return 0;
3224 }
3225 
3226 #endif	// DEBUG_CACHE_LIST
3227 
3228 
3229 static int
3230 dump_cache(int argc, char** argv)
3231 {
3232 	VMCache* cache;
3233 	bool showPages = false;
3234 	int i = 1;
3235 
3236 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3237 		kprintf("usage: %s [-ps] <address>\n"
3238 			"  if -p is specified, all pages are shown, if -s is used\n"
3239 			"  only the cache info is shown respectively.\n", argv[0]);
3240 		return 0;
3241 	}
3242 	while (argv[i][0] == '-') {
3243 		char* arg = argv[i] + 1;
3244 		while (arg[0]) {
3245 			if (arg[0] == 'p')
3246 				showPages = true;
3247 			arg++;
3248 		}
3249 		i++;
3250 	}
3251 	if (argv[i] == NULL) {
3252 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3253 		return 0;
3254 	}
3255 
3256 	addr_t address = parse_expression(argv[i]);
3257 	if (address == 0)
3258 		return 0;
3259 
3260 	cache = (VMCache*)address;
3261 
3262 	cache->Dump(showPages);
3263 
3264 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3265 
3266 	return 0;
3267 }
3268 
3269 
3270 static void
3271 dump_area_struct(VMArea* area, bool mappings)
3272 {
3273 	kprintf("AREA: %p\n", area);
3274 	kprintf("name:\t\t'%s'\n", area->name);
3275 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3276 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3277 	kprintf("base:\t\t0x%lx\n", area->Base());
3278 	kprintf("size:\t\t0x%lx\n", area->Size());
3279 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3280 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3281 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3282 	kprintf("cache:\t\t%p\n", area->cache);
3283 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3284 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3285 	kprintf("cache_next:\t%p\n", area->cache_next);
3286 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3287 
3288 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3289 	if (mappings) {
3290 		kprintf("page mappings:\n");
3291 		while (iterator.HasNext()) {
3292 			vm_page_mapping* mapping = iterator.Next();
3293 			kprintf("  %p", mapping->page);
3294 		}
3295 		kprintf("\n");
3296 	} else {
3297 		uint32 count = 0;
3298 		while (iterator.Next() != NULL) {
3299 			count++;
3300 		}
3301 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3302 	}
3303 }
3304 
3305 
3306 static int
3307 dump_area(int argc, char** argv)
3308 {
3309 	bool mappings = false;
3310 	bool found = false;
3311 	int32 index = 1;
3312 	VMArea* area;
3313 	addr_t num;
3314 
3315 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3316 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3317 			"All areas matching either id/address/name are listed. You can\n"
3318 			"force to check only a specific item by prefixing the specifier\n"
3319 			"with the id/contains/address/name keywords.\n"
3320 			"-m shows the area's mappings as well.\n");
3321 		return 0;
3322 	}
3323 
3324 	if (!strcmp(argv[1], "-m")) {
3325 		mappings = true;
3326 		index++;
3327 	}
3328 
3329 	int32 mode = 0xf;
3330 	if (!strcmp(argv[index], "id"))
3331 		mode = 1;
3332 	else if (!strcmp(argv[index], "contains"))
3333 		mode = 2;
3334 	else if (!strcmp(argv[index], "name"))
3335 		mode = 4;
3336 	else if (!strcmp(argv[index], "address"))
3337 		mode = 0;
3338 	if (mode != 0xf)
3339 		index++;
3340 
3341 	if (index >= argc) {
3342 		kprintf("No area specifier given.\n");
3343 		return 0;
3344 	}
3345 
3346 	num = parse_expression(argv[index]);
3347 
3348 	if (mode == 0) {
3349 		dump_area_struct((struct VMArea*)num, mappings);
3350 	} else {
3351 		// walk through the area list, looking for the arguments as a name
3352 
3353 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3354 		while ((area = it.Next()) != NULL) {
3355 			if (((mode & 4) != 0 && area->name != NULL
3356 					&& !strcmp(argv[index], area->name))
3357 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3358 					|| (((mode & 2) != 0 && area->Base() <= num
3359 						&& area->Base() + area->Size() > num))))) {
3360 				dump_area_struct(area, mappings);
3361 				found = true;
3362 			}
3363 		}
3364 
3365 		if (!found)
3366 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3367 	}
3368 
3369 	return 0;
3370 }
3371 
3372 
3373 static int
3374 dump_area_list(int argc, char** argv)
3375 {
3376 	VMArea* area;
3377 	const char* name = NULL;
3378 	int32 id = 0;
3379 
3380 	if (argc > 1) {
3381 		id = parse_expression(argv[1]);
3382 		if (id == 0)
3383 			name = argv[1];
3384 	}
3385 
3386 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3387 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3388 		B_PRINTF_POINTER_WIDTH, "size");
3389 
3390 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3391 	while ((area = it.Next()) != NULL) {
3392 		if ((id != 0 && area->address_space->ID() != id)
3393 			|| (name != NULL && strstr(area->name, name) == NULL))
3394 			continue;
3395 
3396 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3397 			area->id, (void*)area->Base(), (void*)area->Size(),
3398 			area->protection, area->wiring, area->name);
3399 	}
3400 	return 0;
3401 }
3402 
3403 
3404 static int
3405 dump_available_memory(int argc, char** argv)
3406 {
3407 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3408 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3409 	return 0;
3410 }
3411 
3412 
3413 static int
3414 dump_mapping_info(int argc, char** argv)
3415 {
3416 	bool reverseLookup = false;
3417 	bool pageLookup = false;
3418 
3419 	int argi = 1;
3420 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3421 		const char* arg = argv[argi];
3422 		if (strcmp(arg, "-r") == 0) {
3423 			reverseLookup = true;
3424 		} else if (strcmp(arg, "-p") == 0) {
3425 			reverseLookup = true;
3426 			pageLookup = true;
3427 		} else {
3428 			print_debugger_command_usage(argv[0]);
3429 			return 0;
3430 		}
3431 	}
3432 
3433 	// We need at least one argument, the address. Optionally a thread ID can be
3434 	// specified.
3435 	if (argi >= argc || argi + 2 < argc) {
3436 		print_debugger_command_usage(argv[0]);
3437 		return 0;
3438 	}
3439 
3440 	uint64 addressValue;
3441 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3442 		return 0;
3443 
3444 	Team* team = NULL;
3445 	if (argi < argc) {
3446 		uint64 threadID;
3447 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3448 			return 0;
3449 
3450 		Thread* thread = Thread::GetDebug(threadID);
3451 		if (thread == NULL) {
3452 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3453 			return 0;
3454 		}
3455 
3456 		team = thread->team;
3457 	}
3458 
3459 	if (reverseLookup) {
3460 		phys_addr_t physicalAddress;
3461 		if (pageLookup) {
3462 			vm_page* page = (vm_page*)(addr_t)addressValue;
3463 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3464 		} else {
3465 			physicalAddress = (phys_addr_t)addressValue;
3466 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3467 		}
3468 
3469 		kprintf("    Team     Virtual Address      Area\n");
3470 		kprintf("--------------------------------------\n");
3471 
3472 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3473 			Callback()
3474 				:
3475 				fAddressSpace(NULL)
3476 			{
3477 			}
3478 
3479 			void SetAddressSpace(VMAddressSpace* addressSpace)
3480 			{
3481 				fAddressSpace = addressSpace;
3482 			}
3483 
3484 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3485 			{
3486 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3487 					virtualAddress);
3488 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3489 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3490 				else
3491 					kprintf("\n");
3492 				return false;
3493 			}
3494 
3495 		private:
3496 			VMAddressSpace*	fAddressSpace;
3497 		} callback;
3498 
3499 		if (team != NULL) {
3500 			// team specified -- get its address space
3501 			VMAddressSpace* addressSpace = team->address_space;
3502 			if (addressSpace == NULL) {
3503 				kprintf("Failed to get address space!\n");
3504 				return 0;
3505 			}
3506 
3507 			callback.SetAddressSpace(addressSpace);
3508 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3509 				physicalAddress, callback);
3510 		} else {
3511 			// no team specified -- iterate through all address spaces
3512 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3513 				addressSpace != NULL;
3514 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3515 				callback.SetAddressSpace(addressSpace);
3516 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3517 					physicalAddress, callback);
3518 			}
3519 		}
3520 	} else {
3521 		// get the address space
3522 		addr_t virtualAddress = (addr_t)addressValue;
3523 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3524 		VMAddressSpace* addressSpace;
3525 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3526 			addressSpace = VMAddressSpace::Kernel();
3527 		} else if (team != NULL) {
3528 			addressSpace = team->address_space;
3529 		} else {
3530 			Thread* thread = debug_get_debugged_thread();
3531 			if (thread == NULL || thread->team == NULL) {
3532 				kprintf("Failed to get team!\n");
3533 				return 0;
3534 			}
3535 
3536 			addressSpace = thread->team->address_space;
3537 		}
3538 
3539 		if (addressSpace == NULL) {
3540 			kprintf("Failed to get address space!\n");
3541 			return 0;
3542 		}
3543 
3544 		// let the translation map implementation do the job
3545 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3546 	}
3547 
3548 	return 0;
3549 }
3550 
3551 
3552 /*!	Deletes all areas and reserved regions in the given address space.
3553 
3554 	The caller must ensure that none of the areas has any wired ranges.
3555 
3556 	\param addressSpace The address space.
3557 	\param deletingAddressSpace \c true, if the address space is in the process
3558 		of being deleted.
3559 */
3560 void
3561 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3562 {
3563 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3564 		addressSpace->ID()));
3565 
3566 	addressSpace->WriteLock();
3567 
3568 	// remove all reserved areas in this address space
3569 	addressSpace->UnreserveAllAddressRanges(0);
3570 
3571 	// delete all the areas in this address space
3572 	while (VMArea* area = addressSpace->FirstArea()) {
3573 		ASSERT(!area->IsWired());
3574 		delete_area(addressSpace, area, deletingAddressSpace);
3575 	}
3576 
3577 	addressSpace->WriteUnlock();
3578 }
3579 
3580 
3581 static area_id
3582 vm_area_for(addr_t address, bool kernel)
3583 {
3584 	team_id team;
3585 	if (IS_USER_ADDRESS(address)) {
3586 		// we try the user team address space, if any
3587 		team = VMAddressSpace::CurrentID();
3588 		if (team < 0)
3589 			return team;
3590 	} else
3591 		team = VMAddressSpace::KernelID();
3592 
3593 	AddressSpaceReadLocker locker(team);
3594 	if (!locker.IsLocked())
3595 		return B_BAD_TEAM_ID;
3596 
3597 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3598 	if (area != NULL) {
3599 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3600 			return B_ERROR;
3601 
3602 		return area->id;
3603 	}
3604 
3605 	return B_ERROR;
3606 }
3607 
3608 
3609 /*!	Frees physical pages that were used during the boot process.
3610 	\a end is inclusive.
3611 */
3612 static void
3613 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3614 {
3615 	// free all physical pages in the specified range
3616 
3617 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3618 		phys_addr_t physicalAddress;
3619 		uint32 flags;
3620 
3621 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3622 			&& (flags & PAGE_PRESENT) != 0) {
3623 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3624 			if (page != NULL && page->State() != PAGE_STATE_FREE
3625 					 && page->State() != PAGE_STATE_CLEAR
3626 					 && page->State() != PAGE_STATE_UNUSED) {
3627 				DEBUG_PAGE_ACCESS_START(page);
3628 				vm_page_set_state(page, PAGE_STATE_FREE);
3629 			}
3630 		}
3631 	}
3632 
3633 	// unmap the memory
3634 	map->Unmap(start, end);
3635 }
3636 
3637 
3638 void
3639 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3640 {
3641 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3642 	addr_t end = start + (size - 1);
3643 	addr_t lastEnd = start;
3644 
3645 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3646 		(void*)start, (void*)end));
3647 
3648 	// The areas are sorted in virtual address space order, so
3649 	// we just have to find the holes between them that fall
3650 	// into the area we should dispose
3651 
3652 	map->Lock();
3653 
3654 	for (VMAddressSpace::AreaIterator it
3655 				= VMAddressSpace::Kernel()->GetAreaIterator();
3656 			VMArea* area = it.Next();) {
3657 		addr_t areaStart = area->Base();
3658 		addr_t areaEnd = areaStart + (area->Size() - 1);
3659 
3660 		if (areaEnd < start)
3661 			continue;
3662 
3663 		if (areaStart > end) {
3664 			// we are done, the area is already beyond of what we have to free
3665 			break;
3666 		}
3667 
3668 		if (areaStart > lastEnd) {
3669 			// this is something we can free
3670 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3671 				(void*)areaStart));
3672 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3673 		}
3674 
3675 		if (areaEnd >= end) {
3676 			lastEnd = areaEnd;
3677 				// no +1 to prevent potential overflow
3678 			break;
3679 		}
3680 
3681 		lastEnd = areaEnd + 1;
3682 	}
3683 
3684 	if (lastEnd < end) {
3685 		// we can also get rid of some space at the end of the area
3686 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3687 			(void*)end));
3688 		unmap_and_free_physical_pages(map, lastEnd, end);
3689 	}
3690 
3691 	map->Unlock();
3692 }
3693 
3694 
3695 static void
3696 create_preloaded_image_areas(struct preloaded_image* _image)
3697 {
3698 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3699 	char name[B_OS_NAME_LENGTH];
3700 	void* address;
3701 	int32 length;
3702 
3703 	// use file name to create a good area name
3704 	char* fileName = strrchr(image->name, '/');
3705 	if (fileName == NULL)
3706 		fileName = image->name;
3707 	else
3708 		fileName++;
3709 
3710 	length = strlen(fileName);
3711 	// make sure there is enough space for the suffix
3712 	if (length > 25)
3713 		length = 25;
3714 
3715 	memcpy(name, fileName, length);
3716 	strcpy(name + length, "_text");
3717 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3718 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3719 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3720 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3721 		// this will later be remapped read-only/executable by the
3722 		// ELF initialization code
3723 
3724 	strcpy(name + length, "_data");
3725 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3726 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3727 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3728 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3729 }
3730 
3731 
3732 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3733 	Any boot loader resources contained in that arguments must not be accessed
3734 	anymore past this point.
3735 */
3736 void
3737 vm_free_kernel_args(kernel_args* args)
3738 {
3739 	uint32 i;
3740 
3741 	TRACE(("vm_free_kernel_args()\n"));
3742 
3743 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3744 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3745 		if (area >= B_OK)
3746 			delete_area(area);
3747 	}
3748 }
3749 
3750 
3751 static void
3752 allocate_kernel_args(kernel_args* args)
3753 {
3754 	TRACE(("allocate_kernel_args()\n"));
3755 
3756 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3757 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3758 
3759 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3760 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3761 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3762 	}
3763 }
3764 
3765 
3766 static void
3767 unreserve_boot_loader_ranges(kernel_args* args)
3768 {
3769 	TRACE(("unreserve_boot_loader_ranges()\n"));
3770 
3771 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3772 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3773 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3774 			args->virtual_allocated_range[i].size);
3775 	}
3776 }
3777 
3778 
3779 static void
3780 reserve_boot_loader_ranges(kernel_args* args)
3781 {
3782 	TRACE(("reserve_boot_loader_ranges()\n"));
3783 
3784 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3785 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3786 
3787 		// If the address is no kernel address, we just skip it. The
3788 		// architecture specific code has to deal with it.
3789 		if (!IS_KERNEL_ADDRESS(address)) {
3790 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3791 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3792 			continue;
3793 		}
3794 
3795 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3796 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3797 		if (status < B_OK)
3798 			panic("could not reserve boot loader ranges\n");
3799 	}
3800 }
3801 
3802 
3803 static addr_t
3804 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3805 {
3806 	size = PAGE_ALIGN(size);
3807 
3808 	// find a slot in the virtual allocation addr range
3809 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3810 		// check to see if the space between this one and the last is big enough
3811 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3812 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3813 			+ args->virtual_allocated_range[i - 1].size;
3814 
3815 		addr_t base = alignment > 0
3816 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3817 
3818 		if (base >= KERNEL_BASE && base < rangeStart
3819 				&& rangeStart - base >= size) {
3820 			args->virtual_allocated_range[i - 1].size
3821 				+= base + size - previousRangeEnd;
3822 			return base;
3823 		}
3824 	}
3825 
3826 	// we hadn't found one between allocation ranges. this is ok.
3827 	// see if there's a gap after the last one
3828 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3829 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3830 		+ args->virtual_allocated_range[lastEntryIndex].size;
3831 	addr_t base = alignment > 0
3832 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3833 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3834 		args->virtual_allocated_range[lastEntryIndex].size
3835 			+= base + size - lastRangeEnd;
3836 		return base;
3837 	}
3838 
3839 	// see if there's a gap before the first one
3840 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3841 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3842 		base = rangeStart - size;
3843 		if (alignment > 0)
3844 			base = ROUNDDOWN(base, alignment);
3845 
3846 		if (base >= KERNEL_BASE) {
3847 			args->virtual_allocated_range[0].start = base;
3848 			args->virtual_allocated_range[0].size += rangeStart - base;
3849 			return base;
3850 		}
3851 	}
3852 
3853 	return 0;
3854 }
3855 
3856 
3857 static bool
3858 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3859 {
3860 	// TODO: horrible brute-force method of determining if the page can be
3861 	// allocated
3862 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3863 		if (address >= args->physical_memory_range[i].start
3864 			&& address < args->physical_memory_range[i].start
3865 				+ args->physical_memory_range[i].size)
3866 			return true;
3867 	}
3868 	return false;
3869 }
3870 
3871 
3872 page_num_t
3873 vm_allocate_early_physical_page(kernel_args* args)
3874 {
3875 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3876 		phys_addr_t nextPage;
3877 
3878 		nextPage = args->physical_allocated_range[i].start
3879 			+ args->physical_allocated_range[i].size;
3880 		// see if the page after the next allocated paddr run can be allocated
3881 		if (i + 1 < args->num_physical_allocated_ranges
3882 			&& args->physical_allocated_range[i + 1].size != 0) {
3883 			// see if the next page will collide with the next allocated range
3884 			if (nextPage >= args->physical_allocated_range[i+1].start)
3885 				continue;
3886 		}
3887 		// see if the next physical page fits in the memory block
3888 		if (is_page_in_physical_memory_range(args, nextPage)) {
3889 			// we got one!
3890 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3891 			return nextPage / B_PAGE_SIZE;
3892 		}
3893 	}
3894 
3895 	// Expanding upwards didn't work, try going downwards.
3896 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3897 		phys_addr_t nextPage;
3898 
3899 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3900 		// see if the page after the prev allocated paddr run can be allocated
3901 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3902 			// see if the next page will collide with the next allocated range
3903 			if (nextPage < args->physical_allocated_range[i-1].start
3904 				+ args->physical_allocated_range[i-1].size)
3905 				continue;
3906 		}
3907 		// see if the next physical page fits in the memory block
3908 		if (is_page_in_physical_memory_range(args, nextPage)) {
3909 			// we got one!
3910 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3911 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3912 			return nextPage / B_PAGE_SIZE;
3913 		}
3914 	}
3915 
3916 	return 0;
3917 		// could not allocate a block
3918 }
3919 
3920 
3921 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3922 	allocate some pages before the VM is completely up.
3923 */
3924 addr_t
3925 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3926 	uint32 attributes, addr_t alignment)
3927 {
3928 	if (physicalSize > virtualSize)
3929 		physicalSize = virtualSize;
3930 
3931 	// find the vaddr to allocate at
3932 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3933 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3934 	if (virtualBase == 0) {
3935 		panic("vm_allocate_early: could not allocate virtual address\n");
3936 		return 0;
3937 	}
3938 
3939 	// map the pages
3940 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3941 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3942 		if (physicalAddress == 0)
3943 			panic("error allocating early page!\n");
3944 
3945 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3946 
3947 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3948 			physicalAddress * B_PAGE_SIZE, attributes,
3949 			&vm_allocate_early_physical_page);
3950 	}
3951 
3952 	return virtualBase;
3953 }
3954 
3955 
3956 /*!	The main entrance point to initialize the VM. */
3957 status_t
3958 vm_init(kernel_args* args)
3959 {
3960 	struct preloaded_image* image;
3961 	void* address;
3962 	status_t err = 0;
3963 	uint32 i;
3964 
3965 	TRACE(("vm_init: entry\n"));
3966 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3967 	err = arch_vm_init(args);
3968 
3969 	// initialize some globals
3970 	vm_page_init_num_pages(args);
3971 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3972 
3973 	slab_init(args);
3974 
3975 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3976 	off_t heapSize = INITIAL_HEAP_SIZE;
3977 	// try to accomodate low memory systems
3978 	while (heapSize > sAvailableMemory / 8)
3979 		heapSize /= 2;
3980 	if (heapSize < 1024 * 1024)
3981 		panic("vm_init: go buy some RAM please.");
3982 
3983 	// map in the new heap and initialize it
3984 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3985 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3986 	TRACE(("heap at 0x%lx\n", heapBase));
3987 	heap_init(heapBase, heapSize);
3988 #endif
3989 
3990 	// initialize the free page list and physical page mapper
3991 	vm_page_init(args);
3992 
3993 	// initialize the cache allocators
3994 	vm_cache_init(args);
3995 
3996 	{
3997 		status_t error = VMAreaHash::Init();
3998 		if (error != B_OK)
3999 			panic("vm_init: error initializing area hash table\n");
4000 	}
4001 
4002 	VMAddressSpace::Init();
4003 	reserve_boot_loader_ranges(args);
4004 
4005 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4006 	heap_init_post_area();
4007 #endif
4008 
4009 	// Do any further initialization that the architecture dependant layers may
4010 	// need now
4011 	arch_vm_translation_map_init_post_area(args);
4012 	arch_vm_init_post_area(args);
4013 	vm_page_init_post_area(args);
4014 	slab_init_post_area();
4015 
4016 	// allocate areas to represent stuff that already exists
4017 
4018 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4019 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4020 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4021 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4022 #endif
4023 
4024 	allocate_kernel_args(args);
4025 
4026 	create_preloaded_image_areas(args->kernel_image);
4027 
4028 	// allocate areas for preloaded images
4029 	for (image = args->preloaded_images; image != NULL; image = image->next)
4030 		create_preloaded_image_areas(image);
4031 
4032 	// allocate kernel stacks
4033 	for (i = 0; i < args->num_cpus; i++) {
4034 		char name[64];
4035 
4036 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4037 		address = (void*)args->cpu_kstack[i].start;
4038 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4039 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4040 	}
4041 
4042 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4043 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4044 
4045 #if PARANOID_KERNEL_MALLOC
4046 	vm_block_address_range("uninitialized heap memory",
4047 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4048 #endif
4049 #if PARANOID_KERNEL_FREE
4050 	vm_block_address_range("freed heap memory",
4051 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4052 #endif
4053 
4054 	// create the object cache for the page mappings
4055 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4056 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4057 		NULL, NULL);
4058 	if (gPageMappingsObjectCache == NULL)
4059 		panic("failed to create page mappings object cache");
4060 
4061 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4062 
4063 #if DEBUG_CACHE_LIST
4064 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4065 		virtual_address_restrictions virtualRestrictions = {};
4066 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4067 		physical_address_restrictions physicalRestrictions = {};
4068 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4069 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4070 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4071 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4072 			&physicalRestrictions, (void**)&sCacheInfoTable);
4073 	}
4074 #endif	// DEBUG_CACHE_LIST
4075 
4076 	// add some debugger commands
4077 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4078 	add_debugger_command("area", &dump_area,
4079 		"Dump info about a particular area");
4080 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4081 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4082 #if DEBUG_CACHE_LIST
4083 	if (sCacheInfoTable != NULL) {
4084 		add_debugger_command_etc("caches", &dump_caches,
4085 			"List all VMCache trees",
4086 			"[ \"-c\" ]\n"
4087 			"All cache trees are listed sorted in decreasing order by number "
4088 				"of\n"
4089 			"used pages or, if \"-c\" is specified, by size of committed "
4090 				"memory.\n",
4091 			0);
4092 	}
4093 #endif
4094 	add_debugger_command("avail", &dump_available_memory,
4095 		"Dump available memory");
4096 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4097 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4098 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4099 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4100 	add_debugger_command("string", &display_mem, "dump strings");
4101 
4102 	add_debugger_command_etc("mapping", &dump_mapping_info,
4103 		"Print address mapping information",
4104 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4105 		"Prints low-level page mapping information for a given address. If\n"
4106 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4107 		"address that is looked up in the translation map of the current\n"
4108 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4109 		"\"-r\" is specified, <address> is a physical address that is\n"
4110 		"searched in the translation map of all teams, respectively the team\n"
4111 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4112 		"<address> is the address of a vm_page structure. The behavior is\n"
4113 		"equivalent to specifying \"-r\" with the physical address of that\n"
4114 		"page.\n",
4115 		0);
4116 
4117 	TRACE(("vm_init: exit\n"));
4118 
4119 	vm_cache_init_post_heap();
4120 
4121 	return err;
4122 }
4123 
4124 
4125 status_t
4126 vm_init_post_sem(kernel_args* args)
4127 {
4128 	// This frees all unused boot loader resources and makes its space available
4129 	// again
4130 	arch_vm_init_end(args);
4131 	unreserve_boot_loader_ranges(args);
4132 
4133 	// fill in all of the semaphores that were not allocated before
4134 	// since we're still single threaded and only the kernel address space
4135 	// exists, it isn't that hard to find all of the ones we need to create
4136 
4137 	arch_vm_translation_map_init_post_sem(args);
4138 
4139 	slab_init_post_sem();
4140 
4141 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4142 	heap_init_post_sem();
4143 #endif
4144 
4145 	return B_OK;
4146 }
4147 
4148 
4149 status_t
4150 vm_init_post_thread(kernel_args* args)
4151 {
4152 	vm_page_init_post_thread(args);
4153 	slab_init_post_thread();
4154 	return heap_init_post_thread();
4155 }
4156 
4157 
4158 status_t
4159 vm_init_post_modules(kernel_args* args)
4160 {
4161 	return arch_vm_init_post_modules(args);
4162 }
4163 
4164 
4165 void
4166 permit_page_faults(void)
4167 {
4168 	Thread* thread = thread_get_current_thread();
4169 	if (thread != NULL)
4170 		atomic_add(&thread->page_faults_allowed, 1);
4171 }
4172 
4173 
4174 void
4175 forbid_page_faults(void)
4176 {
4177 	Thread* thread = thread_get_current_thread();
4178 	if (thread != NULL)
4179 		atomic_add(&thread->page_faults_allowed, -1);
4180 }
4181 
4182 
4183 status_t
4184 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4185 	bool isUser, addr_t* newIP)
4186 {
4187 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4188 		faultAddress));
4189 
4190 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4191 
4192 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4193 	VMAddressSpace* addressSpace = NULL;
4194 
4195 	status_t status = B_OK;
4196 	*newIP = 0;
4197 	atomic_add((int32*)&sPageFaults, 1);
4198 
4199 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4200 		addressSpace = VMAddressSpace::GetKernel();
4201 	} else if (IS_USER_ADDRESS(pageAddress)) {
4202 		addressSpace = VMAddressSpace::GetCurrent();
4203 		if (addressSpace == NULL) {
4204 			if (!isUser) {
4205 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4206 					"memory!\n");
4207 				status = B_BAD_ADDRESS;
4208 				TPF(PageFaultError(-1,
4209 					VMPageFaultTracing
4210 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4211 			} else {
4212 				// XXX weird state.
4213 				panic("vm_page_fault: non kernel thread accessing user memory "
4214 					"that doesn't exist!\n");
4215 				status = B_BAD_ADDRESS;
4216 			}
4217 		}
4218 	} else {
4219 		// the hit was probably in the 64k DMZ between kernel and user space
4220 		// this keeps a user space thread from passing a buffer that crosses
4221 		// into kernel space
4222 		status = B_BAD_ADDRESS;
4223 		TPF(PageFaultError(-1,
4224 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4225 	}
4226 
4227 	if (status == B_OK) {
4228 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4229 			isUser, NULL);
4230 	}
4231 
4232 	if (status < B_OK) {
4233 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4234 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4235 			strerror(status), address, faultAddress, isWrite, isUser,
4236 			thread_get_current_thread_id());
4237 		if (!isUser) {
4238 			Thread* thread = thread_get_current_thread();
4239 			if (thread != NULL && thread->fault_handler != 0) {
4240 				// this will cause the arch dependant page fault handler to
4241 				// modify the IP on the interrupt frame or whatever to return
4242 				// to this address
4243 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4244 			} else {
4245 				// unhandled page fault in the kernel
4246 				panic("vm_page_fault: unhandled page fault in kernel space at "
4247 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4248 			}
4249 		} else {
4250 #if 1
4251 			// TODO: remove me once we have proper userland debugging support
4252 			// (and tools)
4253 			VMArea* area = NULL;
4254 			if (addressSpace != NULL) {
4255 				addressSpace->ReadLock();
4256 				area = addressSpace->LookupArea(faultAddress);
4257 			}
4258 
4259 			Thread* thread = thread_get_current_thread();
4260 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4261 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4262 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4263 				thread->team->Name(), thread->team->id,
4264 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4265 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4266 					area->Base() : 0x0));
4267 
4268 			// We can print a stack trace of the userland thread here.
4269 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4270 // fault and someone is already waiting for a write lock on the same address
4271 // space. This thread will then try to acquire the lock again and will
4272 // be queued after the writer.
4273 #	if 0
4274 			if (area) {
4275 				struct stack_frame {
4276 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4277 						struct stack_frame*	previous;
4278 						void*				return_address;
4279 					#else
4280 						// ...
4281 					#warning writeme
4282 					#endif
4283 				} frame;
4284 #		ifdef __INTEL__
4285 				struct iframe* iframe = x86_get_user_iframe();
4286 				if (iframe == NULL)
4287 					panic("iframe is NULL!");
4288 
4289 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4290 					sizeof(struct stack_frame));
4291 #		elif defined(__POWERPC__)
4292 				struct iframe* iframe = ppc_get_user_iframe();
4293 				if (iframe == NULL)
4294 					panic("iframe is NULL!");
4295 
4296 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4297 					sizeof(struct stack_frame));
4298 #		else
4299 #			warning "vm_page_fault() stack trace won't work"
4300 				status = B_ERROR;
4301 #		endif
4302 
4303 				dprintf("stack trace:\n");
4304 				int32 maxFrames = 50;
4305 				while (status == B_OK && --maxFrames >= 0
4306 						&& frame.return_address != NULL) {
4307 					dprintf("  %p", frame.return_address);
4308 					area = addressSpace->LookupArea(
4309 						(addr_t)frame.return_address);
4310 					if (area) {
4311 						dprintf(" (%s + %#lx)", area->name,
4312 							(addr_t)frame.return_address - area->Base());
4313 					}
4314 					dprintf("\n");
4315 
4316 					status = user_memcpy(&frame, frame.previous,
4317 						sizeof(struct stack_frame));
4318 				}
4319 			}
4320 #	endif	// 0 (stack trace)
4321 
4322 			if (addressSpace != NULL)
4323 				addressSpace->ReadUnlock();
4324 #endif
4325 
4326 			// If the thread has a signal handler for SIGSEGV, we simply
4327 			// send it the signal. Otherwise we notify the user debugger
4328 			// first.
4329 			struct sigaction action;
4330 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4331 					&& action.sa_handler != SIG_DFL
4332 					&& action.sa_handler != SIG_IGN)
4333 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4334 					SIGSEGV)) {
4335 				Signal signal(SIGSEGV,
4336 					status == B_PERMISSION_DENIED
4337 						? SEGV_ACCERR : SEGV_MAPERR,
4338 					EFAULT, thread->team->id);
4339 				signal.SetAddress((void*)address);
4340 				send_signal_to_thread(thread, signal, 0);
4341 			}
4342 		}
4343 	}
4344 
4345 	if (addressSpace != NULL)
4346 		addressSpace->Put();
4347 
4348 	return B_HANDLED_INTERRUPT;
4349 }
4350 
4351 
4352 struct PageFaultContext {
4353 	AddressSpaceReadLocker	addressSpaceLocker;
4354 	VMCacheChainLocker		cacheChainLocker;
4355 
4356 	VMTranslationMap*		map;
4357 	VMCache*				topCache;
4358 	off_t					cacheOffset;
4359 	vm_page_reservation		reservation;
4360 	bool					isWrite;
4361 
4362 	// return values
4363 	vm_page*				page;
4364 	bool					restart;
4365 	bool					pageAllocated;
4366 
4367 
4368 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4369 		:
4370 		addressSpaceLocker(addressSpace, true),
4371 		map(addressSpace->TranslationMap()),
4372 		isWrite(isWrite)
4373 	{
4374 	}
4375 
4376 	~PageFaultContext()
4377 	{
4378 		UnlockAll();
4379 		vm_page_unreserve_pages(&reservation);
4380 	}
4381 
4382 	void Prepare(VMCache* topCache, off_t cacheOffset)
4383 	{
4384 		this->topCache = topCache;
4385 		this->cacheOffset = cacheOffset;
4386 		page = NULL;
4387 		restart = false;
4388 		pageAllocated = false;
4389 
4390 		cacheChainLocker.SetTo(topCache);
4391 	}
4392 
4393 	void UnlockAll(VMCache* exceptCache = NULL)
4394 	{
4395 		topCache = NULL;
4396 		addressSpaceLocker.Unlock();
4397 		cacheChainLocker.Unlock(exceptCache);
4398 	}
4399 };
4400 
4401 
4402 /*!	Gets the page that should be mapped into the area.
4403 	Returns an error code other than \c B_OK, if the page couldn't be found or
4404 	paged in. The locking state of the address space and the caches is undefined
4405 	in that case.
4406 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4407 	had to unlock the address space and all caches and is supposed to be called
4408 	again.
4409 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4410 	found. It is returned in \c context.page. The address space will still be
4411 	locked as well as all caches starting from the top cache to at least the
4412 	cache the page lives in.
4413 */
4414 static status_t
4415 fault_get_page(PageFaultContext& context)
4416 {
4417 	VMCache* cache = context.topCache;
4418 	VMCache* lastCache = NULL;
4419 	vm_page* page = NULL;
4420 
4421 	while (cache != NULL) {
4422 		// We already hold the lock of the cache at this point.
4423 
4424 		lastCache = cache;
4425 
4426 		page = cache->LookupPage(context.cacheOffset);
4427 		if (page != NULL && page->busy) {
4428 			// page must be busy -- wait for it to become unbusy
4429 			context.UnlockAll(cache);
4430 			cache->ReleaseRefLocked();
4431 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4432 
4433 			// restart the whole process
4434 			context.restart = true;
4435 			return B_OK;
4436 		}
4437 
4438 		if (page != NULL)
4439 			break;
4440 
4441 		// The current cache does not contain the page we're looking for.
4442 
4443 		// see if the backing store has it
4444 		if (cache->HasPage(context.cacheOffset)) {
4445 			// insert a fresh page and mark it busy -- we're going to read it in
4446 			page = vm_page_allocate_page(&context.reservation,
4447 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4448 			cache->InsertPage(page, context.cacheOffset);
4449 
4450 			// We need to unlock all caches and the address space while reading
4451 			// the page in. Keep a reference to the cache around.
4452 			cache->AcquireRefLocked();
4453 			context.UnlockAll();
4454 
4455 			// read the page in
4456 			generic_io_vec vec;
4457 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4458 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4459 
4460 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4461 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4462 
4463 			cache->Lock();
4464 
4465 			if (status < B_OK) {
4466 				// on error remove and free the page
4467 				dprintf("reading page from cache %p returned: %s!\n",
4468 					cache, strerror(status));
4469 
4470 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4471 				cache->RemovePage(page);
4472 				vm_page_set_state(page, PAGE_STATE_FREE);
4473 
4474 				cache->ReleaseRefAndUnlock();
4475 				return status;
4476 			}
4477 
4478 			// mark the page unbusy again
4479 			cache->MarkPageUnbusy(page);
4480 
4481 			DEBUG_PAGE_ACCESS_END(page);
4482 
4483 			// Since we needed to unlock everything temporarily, the area
4484 			// situation might have changed. So we need to restart the whole
4485 			// process.
4486 			cache->ReleaseRefAndUnlock();
4487 			context.restart = true;
4488 			return B_OK;
4489 		}
4490 
4491 		cache = context.cacheChainLocker.LockSourceCache();
4492 	}
4493 
4494 	if (page == NULL) {
4495 		// There was no adequate page, determine the cache for a clean one.
4496 		// Read-only pages come in the deepest cache, only the top most cache
4497 		// may have direct write access.
4498 		cache = context.isWrite ? context.topCache : lastCache;
4499 
4500 		// allocate a clean page
4501 		page = vm_page_allocate_page(&context.reservation,
4502 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4503 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4504 			page->physical_page_number));
4505 
4506 		// insert the new page into our cache
4507 		cache->InsertPage(page, context.cacheOffset);
4508 		context.pageAllocated = true;
4509 	} else if (page->Cache() != context.topCache && context.isWrite) {
4510 		// We have a page that has the data we want, but in the wrong cache
4511 		// object so we need to copy it and stick it into the top cache.
4512 		vm_page* sourcePage = page;
4513 
4514 		// TODO: If memory is low, it might be a good idea to steal the page
4515 		// from our source cache -- if possible, that is.
4516 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4517 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4518 
4519 		// To not needlessly kill concurrency we unlock all caches but the top
4520 		// one while copying the page. Lacking another mechanism to ensure that
4521 		// the source page doesn't disappear, we mark it busy.
4522 		sourcePage->busy = true;
4523 		context.cacheChainLocker.UnlockKeepRefs(true);
4524 
4525 		// copy the page
4526 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4527 			sourcePage->physical_page_number * B_PAGE_SIZE);
4528 
4529 		context.cacheChainLocker.RelockCaches(true);
4530 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4531 
4532 		// insert the new page into our cache
4533 		context.topCache->InsertPage(page, context.cacheOffset);
4534 		context.pageAllocated = true;
4535 	} else
4536 		DEBUG_PAGE_ACCESS_START(page);
4537 
4538 	context.page = page;
4539 	return B_OK;
4540 }
4541 
4542 
4543 /*!	Makes sure the address in the given address space is mapped.
4544 
4545 	\param addressSpace The address space.
4546 	\param originalAddress The address. Doesn't need to be page aligned.
4547 	\param isWrite If \c true the address shall be write-accessible.
4548 	\param isUser If \c true the access is requested by a userland team.
4549 	\param wirePage On success, if non \c NULL, the wired count of the page
4550 		mapped at the given address is incremented and the page is returned
4551 		via this parameter.
4552 	\return \c B_OK on success, another error code otherwise.
4553 */
4554 static status_t
4555 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4556 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4557 {
4558 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4559 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4560 		originalAddress, isWrite, isUser));
4561 
4562 	PageFaultContext context(addressSpace, isWrite);
4563 
4564 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4565 	status_t status = B_OK;
4566 
4567 	addressSpace->IncrementFaultCount();
4568 
4569 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4570 	// the pages upfront makes sure we don't have any cache locked, so that the
4571 	// page daemon/thief can do their job without problems.
4572 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4573 		originalAddress);
4574 	context.addressSpaceLocker.Unlock();
4575 	vm_page_reserve_pages(&context.reservation, reservePages,
4576 		addressSpace == VMAddressSpace::Kernel()
4577 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4578 
4579 	while (true) {
4580 		context.addressSpaceLocker.Lock();
4581 
4582 		// get the area the fault was in
4583 		VMArea* area = addressSpace->LookupArea(address);
4584 		if (area == NULL) {
4585 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4586 				"space\n", originalAddress);
4587 			TPF(PageFaultError(-1,
4588 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4589 			status = B_BAD_ADDRESS;
4590 			break;
4591 		}
4592 
4593 		// check permissions
4594 		uint32 protection = get_area_page_protection(area, address);
4595 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4596 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4597 				area->id, (void*)originalAddress);
4598 			TPF(PageFaultError(area->id,
4599 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4600 			status = B_PERMISSION_DENIED;
4601 			break;
4602 		}
4603 		if (isWrite && (protection
4604 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4605 			dprintf("write access attempted on write-protected area 0x%"
4606 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4607 			TPF(PageFaultError(area->id,
4608 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4609 			status = B_PERMISSION_DENIED;
4610 			break;
4611 		} else if (isExecute && (protection
4612 				& (B_EXECUTE_AREA
4613 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4614 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4615 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4616 			TPF(PageFaultError(area->id,
4617 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4618 			status = B_PERMISSION_DENIED;
4619 			break;
4620 		} else if (!isWrite && !isExecute && (protection
4621 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4622 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4623 				" at %p\n", area->id, (void*)originalAddress);
4624 			TPF(PageFaultError(area->id,
4625 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4626 			status = B_PERMISSION_DENIED;
4627 			break;
4628 		}
4629 
4630 		// We have the area, it was a valid access, so let's try to resolve the
4631 		// page fault now.
4632 		// At first, the top most cache from the area is investigated.
4633 
4634 		context.Prepare(vm_area_get_locked_cache(area),
4635 			address - area->Base() + area->cache_offset);
4636 
4637 		// See if this cache has a fault handler -- this will do all the work
4638 		// for us.
4639 		{
4640 			// Note, since the page fault is resolved with interrupts enabled,
4641 			// the fault handler could be called more than once for the same
4642 			// reason -- the store must take this into account.
4643 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4644 			if (status != B_BAD_HANDLER)
4645 				break;
4646 		}
4647 
4648 		// The top most cache has no fault handler, so let's see if the cache or
4649 		// its sources already have the page we're searching for (we're going
4650 		// from top to bottom).
4651 		status = fault_get_page(context);
4652 		if (status != B_OK) {
4653 			TPF(PageFaultError(area->id, status));
4654 			break;
4655 		}
4656 
4657 		if (context.restart)
4658 			continue;
4659 
4660 		// All went fine, all there is left to do is to map the page into the
4661 		// address space.
4662 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4663 			context.page));
4664 
4665 		// If the page doesn't reside in the area's cache, we need to make sure
4666 		// it's mapped in read-only, so that we cannot overwrite someone else's
4667 		// data (copy-on-write)
4668 		uint32 newProtection = protection;
4669 		if (context.page->Cache() != context.topCache && !isWrite)
4670 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4671 
4672 		bool unmapPage = false;
4673 		bool mapPage = true;
4674 
4675 		// check whether there's already a page mapped at the address
4676 		context.map->Lock();
4677 
4678 		phys_addr_t physicalAddress;
4679 		uint32 flags;
4680 		vm_page* mappedPage = NULL;
4681 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4682 			&& (flags & PAGE_PRESENT) != 0
4683 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4684 				!= NULL) {
4685 			// Yep there's already a page. If it's ours, we can simply adjust
4686 			// its protection. Otherwise we have to unmap it.
4687 			if (mappedPage == context.page) {
4688 				context.map->ProtectPage(area, address, newProtection);
4689 					// Note: We assume that ProtectPage() is atomic (i.e.
4690 					// the page isn't temporarily unmapped), otherwise we'd have
4691 					// to make sure it isn't wired.
4692 				mapPage = false;
4693 			} else
4694 				unmapPage = true;
4695 		}
4696 
4697 		context.map->Unlock();
4698 
4699 		if (unmapPage) {
4700 			// If the page is wired, we can't unmap it. Wait until it is unwired
4701 			// again and restart. Note that the page cannot be wired for
4702 			// writing, since it it isn't in the topmost cache. So we can safely
4703 			// ignore ranges wired for writing (our own and other concurrent
4704 			// wiring attempts in progress) and in fact have to do that to avoid
4705 			// a deadlock.
4706 			VMAreaUnwiredWaiter waiter;
4707 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4708 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4709 				// unlock everything and wait
4710 				if (context.pageAllocated) {
4711 					// ... but since we allocated a page and inserted it into
4712 					// the top cache, remove and free it first. Otherwise we'd
4713 					// have a page from a lower cache mapped while an upper
4714 					// cache has a page that would shadow it.
4715 					context.topCache->RemovePage(context.page);
4716 					vm_page_free_etc(context.topCache, context.page,
4717 						&context.reservation);
4718 				} else
4719 					DEBUG_PAGE_ACCESS_END(context.page);
4720 
4721 				context.UnlockAll();
4722 				waiter.waitEntry.Wait();
4723 				continue;
4724 			}
4725 
4726 			// Note: The mapped page is a page of a lower cache. We are
4727 			// guaranteed to have that cached locked, our new page is a copy of
4728 			// that page, and the page is not busy. The logic for that guarantee
4729 			// is as follows: Since the page is mapped, it must live in the top
4730 			// cache (ruled out above) or any of its lower caches, and there is
4731 			// (was before the new page was inserted) no other page in any
4732 			// cache between the top cache and the page's cache (otherwise that
4733 			// would be mapped instead). That in turn means that our algorithm
4734 			// must have found it and therefore it cannot be busy either.
4735 			DEBUG_PAGE_ACCESS_START(mappedPage);
4736 			unmap_page(area, address);
4737 			DEBUG_PAGE_ACCESS_END(mappedPage);
4738 		}
4739 
4740 		if (mapPage) {
4741 			if (map_page(area, context.page, address, newProtection,
4742 					&context.reservation) != B_OK) {
4743 				// Mapping can only fail, when the page mapping object couldn't
4744 				// be allocated. Save for the missing mapping everything is
4745 				// fine, though. If this was a regular page fault, we'll simply
4746 				// leave and probably fault again. To make sure we'll have more
4747 				// luck then, we ensure that the minimum object reserve is
4748 				// available.
4749 				DEBUG_PAGE_ACCESS_END(context.page);
4750 
4751 				context.UnlockAll();
4752 
4753 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4754 						!= B_OK) {
4755 					// Apparently the situation is serious. Let's get ourselves
4756 					// killed.
4757 					status = B_NO_MEMORY;
4758 				} else if (wirePage != NULL) {
4759 					// The caller expects us to wire the page. Since
4760 					// object_cache_reserve() succeeded, we should now be able
4761 					// to allocate a mapping structure. Restart.
4762 					continue;
4763 				}
4764 
4765 				break;
4766 			}
4767 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4768 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4769 
4770 		// also wire the page, if requested
4771 		if (wirePage != NULL && status == B_OK) {
4772 			increment_page_wired_count(context.page);
4773 			*wirePage = context.page;
4774 		}
4775 
4776 		DEBUG_PAGE_ACCESS_END(context.page);
4777 
4778 		break;
4779 	}
4780 
4781 	return status;
4782 }
4783 
4784 
4785 status_t
4786 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4787 {
4788 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4789 }
4790 
4791 status_t
4792 vm_put_physical_page(addr_t vaddr, void* handle)
4793 {
4794 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4795 }
4796 
4797 
4798 status_t
4799 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4800 	void** _handle)
4801 {
4802 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4803 }
4804 
4805 status_t
4806 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4807 {
4808 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4809 }
4810 
4811 
4812 status_t
4813 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4814 {
4815 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4816 }
4817 
4818 status_t
4819 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4820 {
4821 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4822 }
4823 
4824 
4825 void
4826 vm_get_info(system_info* info)
4827 {
4828 	swap_get_info(info);
4829 
4830 	MutexLocker locker(sAvailableMemoryLock);
4831 	info->needed_memory = sNeededMemory;
4832 	info->free_memory = sAvailableMemory;
4833 }
4834 
4835 
4836 uint32
4837 vm_num_page_faults(void)
4838 {
4839 	return sPageFaults;
4840 }
4841 
4842 
4843 off_t
4844 vm_available_memory(void)
4845 {
4846 	MutexLocker locker(sAvailableMemoryLock);
4847 	return sAvailableMemory;
4848 }
4849 
4850 
4851 off_t
4852 vm_available_not_needed_memory(void)
4853 {
4854 	MutexLocker locker(sAvailableMemoryLock);
4855 	return sAvailableMemory - sNeededMemory;
4856 }
4857 
4858 
4859 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4860 	debugger.
4861 */
4862 off_t
4863 vm_available_not_needed_memory_debug(void)
4864 {
4865 	return sAvailableMemory - sNeededMemory;
4866 }
4867 
4868 
4869 size_t
4870 vm_kernel_address_space_left(void)
4871 {
4872 	return VMAddressSpace::Kernel()->FreeSpace();
4873 }
4874 
4875 
4876 void
4877 vm_unreserve_memory(size_t amount)
4878 {
4879 	mutex_lock(&sAvailableMemoryLock);
4880 
4881 	sAvailableMemory += amount;
4882 
4883 	mutex_unlock(&sAvailableMemoryLock);
4884 }
4885 
4886 
4887 status_t
4888 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4889 {
4890 	size_t reserve = kMemoryReserveForPriority[priority];
4891 
4892 	MutexLocker locker(sAvailableMemoryLock);
4893 
4894 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4895 
4896 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4897 		sAvailableMemory -= amount;
4898 		return B_OK;
4899 	}
4900 
4901 	if (timeout <= 0)
4902 		return B_NO_MEMORY;
4903 
4904 	// turn timeout into an absolute timeout
4905 	timeout += system_time();
4906 
4907 	// loop until we've got the memory or the timeout occurs
4908 	do {
4909 		sNeededMemory += amount;
4910 
4911 		// call the low resource manager
4912 		locker.Unlock();
4913 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4914 			B_ABSOLUTE_TIMEOUT, timeout);
4915 		locker.Lock();
4916 
4917 		sNeededMemory -= amount;
4918 
4919 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4920 			sAvailableMemory -= amount;
4921 			return B_OK;
4922 		}
4923 	} while (timeout > system_time());
4924 
4925 	return B_NO_MEMORY;
4926 }
4927 
4928 
4929 status_t
4930 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4931 {
4932 	// NOTE: The caller is responsible for synchronizing calls to this function!
4933 
4934 	AddressSpaceReadLocker locker;
4935 	VMArea* area;
4936 	status_t status = locker.SetFromArea(id, area);
4937 	if (status != B_OK)
4938 		return status;
4939 
4940 	// nothing to do, if the type doesn't change
4941 	uint32 oldType = area->MemoryType();
4942 	if (type == oldType)
4943 		return B_OK;
4944 
4945 	// set the memory type of the area and the mapped pages
4946 	VMTranslationMap* map = area->address_space->TranslationMap();
4947 	map->Lock();
4948 	area->SetMemoryType(type);
4949 	map->ProtectArea(area, area->protection);
4950 	map->Unlock();
4951 
4952 	// set the physical memory type
4953 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4954 	if (error != B_OK) {
4955 		// reset the memory type of the area and the mapped pages
4956 		map->Lock();
4957 		area->SetMemoryType(oldType);
4958 		map->ProtectArea(area, area->protection);
4959 		map->Unlock();
4960 		return error;
4961 	}
4962 
4963 	return B_OK;
4964 
4965 }
4966 
4967 
4968 /*!	This function enforces some protection properties:
4969 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4970 	 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well
4971 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4972 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4973 	   and B_KERNEL_WRITE_AREA.
4974 */
4975 static void
4976 fix_protection(uint32* protection)
4977 {
4978 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4979 		if ((*protection & B_USER_PROTECTION) == 0
4980 			|| (*protection & B_WRITE_AREA) != 0)
4981 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4982 		else
4983 			*protection |= B_KERNEL_READ_AREA;
4984 		if ((*protection & B_EXECUTE_AREA) != 0)
4985 			*protection |= B_KERNEL_EXECUTE_AREA;
4986 	}
4987 }
4988 
4989 
4990 static void
4991 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4992 {
4993 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4994 	info->area = area->id;
4995 	info->address = (void*)area->Base();
4996 	info->size = area->Size();
4997 	info->protection = area->protection;
4998 	info->lock = B_FULL_LOCK;
4999 	info->team = area->address_space->ID();
5000 	info->copy_count = 0;
5001 	info->in_count = 0;
5002 	info->out_count = 0;
5003 		// TODO: retrieve real values here!
5004 
5005 	VMCache* cache = vm_area_get_locked_cache(area);
5006 
5007 	// Note, this is a simplification; the cache could be larger than this area
5008 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5009 
5010 	vm_area_put_locked_cache(cache);
5011 }
5012 
5013 
5014 static status_t
5015 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5016 {
5017 	// is newSize a multiple of B_PAGE_SIZE?
5018 	if (newSize & (B_PAGE_SIZE - 1))
5019 		return B_BAD_VALUE;
5020 
5021 	// lock all affected address spaces and the cache
5022 	VMArea* area;
5023 	VMCache* cache;
5024 
5025 	MultiAddressSpaceLocker locker;
5026 	AreaCacheLocker cacheLocker;
5027 
5028 	status_t status;
5029 	size_t oldSize;
5030 	bool anyKernelArea;
5031 	bool restart;
5032 
5033 	do {
5034 		anyKernelArea = false;
5035 		restart = false;
5036 
5037 		locker.Unset();
5038 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5039 		if (status != B_OK)
5040 			return status;
5041 		cacheLocker.SetTo(cache, true);	// already locked
5042 
5043 		// enforce restrictions
5044 		if (!kernel) {
5045 			if ((area->protection & B_KERNEL_AREA) != 0)
5046 				return B_NOT_ALLOWED;
5047 			// TODO: Enforce all restrictions (team, etc.)!
5048 		}
5049 
5050 		oldSize = area->Size();
5051 		if (newSize == oldSize)
5052 			return B_OK;
5053 
5054 		if (cache->type != CACHE_TYPE_RAM)
5055 			return B_NOT_ALLOWED;
5056 
5057 		if (oldSize < newSize) {
5058 			// We need to check if all areas of this cache can be resized.
5059 			for (VMArea* current = cache->areas; current != NULL;
5060 					current = current->cache_next) {
5061 				if (!current->address_space->CanResizeArea(current, newSize))
5062 					return B_ERROR;
5063 				anyKernelArea
5064 					|= current->address_space == VMAddressSpace::Kernel();
5065 			}
5066 		} else {
5067 			// We're shrinking the areas, so we must make sure the affected
5068 			// ranges are not wired.
5069 			for (VMArea* current = cache->areas; current != NULL;
5070 					current = current->cache_next) {
5071 				anyKernelArea
5072 					|= current->address_space == VMAddressSpace::Kernel();
5073 
5074 				if (wait_if_area_range_is_wired(current,
5075 						current->Base() + newSize, oldSize - newSize, &locker,
5076 						&cacheLocker)) {
5077 					restart = true;
5078 					break;
5079 				}
5080 			}
5081 		}
5082 	} while (restart);
5083 
5084 	// Okay, looks good so far, so let's do it
5085 
5086 	int priority = kernel && anyKernelArea
5087 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5088 	uint32 allocationFlags = kernel && anyKernelArea
5089 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5090 
5091 	if (oldSize < newSize) {
5092 		// Growing the cache can fail, so we do it first.
5093 		status = cache->Resize(cache->virtual_base + newSize, priority);
5094 		if (status != B_OK)
5095 			return status;
5096 	}
5097 
5098 	for (VMArea* current = cache->areas; current != NULL;
5099 			current = current->cache_next) {
5100 		status = current->address_space->ResizeArea(current, newSize,
5101 			allocationFlags);
5102 		if (status != B_OK)
5103 			break;
5104 
5105 		// We also need to unmap all pages beyond the new size, if the area has
5106 		// shrunk
5107 		if (newSize < oldSize) {
5108 			VMCacheChainLocker cacheChainLocker(cache);
5109 			cacheChainLocker.LockAllSourceCaches();
5110 
5111 			unmap_pages(current, current->Base() + newSize,
5112 				oldSize - newSize);
5113 
5114 			cacheChainLocker.Unlock(cache);
5115 		}
5116 	}
5117 
5118 	if (status == B_OK) {
5119 		// Shrink or grow individual page protections if in use.
5120 		if (area->page_protections != NULL) {
5121 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5122 			uint8* newProtections
5123 				= (uint8*)realloc(area->page_protections, bytes);
5124 			if (newProtections == NULL)
5125 				status = B_NO_MEMORY;
5126 			else {
5127 				area->page_protections = newProtections;
5128 
5129 				if (oldSize < newSize) {
5130 					// init the additional page protections to that of the area
5131 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5132 					uint32 areaProtection = area->protection
5133 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5134 					memset(area->page_protections + offset,
5135 						areaProtection | (areaProtection << 4), bytes - offset);
5136 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5137 						uint8& entry = area->page_protections[offset - 1];
5138 						entry = (entry & 0x0f) | (areaProtection << 4);
5139 					}
5140 				}
5141 			}
5142 		}
5143 	}
5144 
5145 	// shrinking the cache can't fail, so we do it now
5146 	if (status == B_OK && newSize < oldSize)
5147 		status = cache->Resize(cache->virtual_base + newSize, priority);
5148 
5149 	if (status != B_OK) {
5150 		// Something failed -- resize the areas back to their original size.
5151 		// This can fail, too, in which case we're seriously screwed.
5152 		for (VMArea* current = cache->areas; current != NULL;
5153 				current = current->cache_next) {
5154 			if (current->address_space->ResizeArea(current, oldSize,
5155 					allocationFlags) != B_OK) {
5156 				panic("vm_resize_area(): Failed and not being able to restore "
5157 					"original state.");
5158 			}
5159 		}
5160 
5161 		cache->Resize(cache->virtual_base + oldSize, priority);
5162 	}
5163 
5164 	// TODO: we must honour the lock restrictions of this area
5165 	return status;
5166 }
5167 
5168 
5169 status_t
5170 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5171 {
5172 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5173 }
5174 
5175 
5176 status_t
5177 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5178 {
5179 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5180 }
5181 
5182 
5183 status_t
5184 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5185 	bool user)
5186 {
5187 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5188 }
5189 
5190 
5191 void
5192 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5193 {
5194 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5195 }
5196 
5197 
5198 /*!	Copies a range of memory directly from/to a page that might not be mapped
5199 	at the moment.
5200 
5201 	For \a unsafeMemory the current mapping (if any is ignored). The function
5202 	walks through the respective area's cache chain to find the physical page
5203 	and copies from/to it directly.
5204 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5205 	must not cross a page boundary.
5206 
5207 	\param teamID The team ID identifying the address space \a unsafeMemory is
5208 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5209 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5210 		is passed, the address space of the thread returned by
5211 		debug_get_debugged_thread() is used.
5212 	\param unsafeMemory The start of the unsafe memory range to be copied
5213 		from/to.
5214 	\param buffer A safely accessible kernel buffer to be copied from/to.
5215 	\param size The number of bytes to be copied.
5216 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5217 		\a unsafeMemory, the other way around otherwise.
5218 */
5219 status_t
5220 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5221 	size_t size, bool copyToUnsafe)
5222 {
5223 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5224 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5225 		return B_BAD_VALUE;
5226 	}
5227 
5228 	// get the address space for the debugged thread
5229 	VMAddressSpace* addressSpace;
5230 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5231 		addressSpace = VMAddressSpace::Kernel();
5232 	} else if (teamID == B_CURRENT_TEAM) {
5233 		Thread* thread = debug_get_debugged_thread();
5234 		if (thread == NULL || thread->team == NULL)
5235 			return B_BAD_ADDRESS;
5236 
5237 		addressSpace = thread->team->address_space;
5238 	} else
5239 		addressSpace = VMAddressSpace::DebugGet(teamID);
5240 
5241 	if (addressSpace == NULL)
5242 		return B_BAD_ADDRESS;
5243 
5244 	// get the area
5245 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5246 	if (area == NULL)
5247 		return B_BAD_ADDRESS;
5248 
5249 	// search the page
5250 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5251 		+ area->cache_offset;
5252 	VMCache* cache = area->cache;
5253 	vm_page* page = NULL;
5254 	while (cache != NULL) {
5255 		page = cache->DebugLookupPage(cacheOffset);
5256 		if (page != NULL)
5257 			break;
5258 
5259 		// Page not found in this cache -- if it is paged out, we must not try
5260 		// to get it from lower caches.
5261 		if (cache->DebugHasPage(cacheOffset))
5262 			break;
5263 
5264 		cache = cache->source;
5265 	}
5266 
5267 	if (page == NULL)
5268 		return B_UNSUPPORTED;
5269 
5270 	// copy from/to physical memory
5271 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5272 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5273 
5274 	if (copyToUnsafe) {
5275 		if (page->Cache() != area->cache)
5276 			return B_UNSUPPORTED;
5277 
5278 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5279 	}
5280 
5281 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5282 }
5283 
5284 
5285 //	#pragma mark - kernel public API
5286 
5287 
5288 status_t
5289 user_memcpy(void* to, const void* from, size_t size)
5290 {
5291 	// don't allow address overflows
5292 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5293 		return B_BAD_ADDRESS;
5294 
5295 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5296 		return B_BAD_ADDRESS;
5297 
5298 	return B_OK;
5299 }
5300 
5301 
5302 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5303 	the string in \a to, NULL-terminating the result.
5304 
5305 	\param to Pointer to the destination C-string.
5306 	\param from Pointer to the source C-string.
5307 	\param size Size in bytes of the string buffer pointed to by \a to.
5308 
5309 	\return strlen(\a from).
5310 */
5311 ssize_t
5312 user_strlcpy(char* to, const char* from, size_t size)
5313 {
5314 	if (to == NULL && size != 0)
5315 		return B_BAD_VALUE;
5316 	if (from == NULL)
5317 		return B_BAD_ADDRESS;
5318 
5319 	// limit size to avoid address overflows
5320 	size_t maxSize = std::min((addr_t)size,
5321 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5322 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5323 		// the source address might still overflow.
5324 
5325 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5326 
5327 	// If we hit the address overflow boundary, fail.
5328 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5329 			&& maxSize < size)) {
5330 		return B_BAD_ADDRESS;
5331 	}
5332 
5333 	return result;
5334 }
5335 
5336 
5337 status_t
5338 user_memset(void* s, char c, size_t count)
5339 {
5340 	// don't allow address overflows
5341 	if ((addr_t)s + count < (addr_t)s)
5342 		return B_BAD_ADDRESS;
5343 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5344 		return B_BAD_ADDRESS;
5345 
5346 	return B_OK;
5347 }
5348 
5349 
5350 /*!	Wires a single page at the given address.
5351 
5352 	\param team The team whose address space the address belongs to. Supports
5353 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5354 		parameter is ignored.
5355 	\param address address The virtual address to wire down. Does not need to
5356 		be page aligned.
5357 	\param writable If \c true the page shall be writable.
5358 	\param info On success the info is filled in, among other things
5359 		containing the physical address the given virtual one translates to.
5360 	\return \c B_OK, when the page could be wired, another error code otherwise.
5361 */
5362 status_t
5363 vm_wire_page(team_id team, addr_t address, bool writable,
5364 	VMPageWiringInfo* info)
5365 {
5366 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5367 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5368 
5369 	// compute the page protection that is required
5370 	bool isUser = IS_USER_ADDRESS(address);
5371 	uint32 requiredProtection = PAGE_PRESENT
5372 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5373 	if (writable)
5374 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5375 
5376 	// get and read lock the address space
5377 	VMAddressSpace* addressSpace = NULL;
5378 	if (isUser) {
5379 		if (team == B_CURRENT_TEAM)
5380 			addressSpace = VMAddressSpace::GetCurrent();
5381 		else
5382 			addressSpace = VMAddressSpace::Get(team);
5383 	} else
5384 		addressSpace = VMAddressSpace::GetKernel();
5385 	if (addressSpace == NULL)
5386 		return B_ERROR;
5387 
5388 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5389 
5390 	VMTranslationMap* map = addressSpace->TranslationMap();
5391 	status_t error = B_OK;
5392 
5393 	// get the area
5394 	VMArea* area = addressSpace->LookupArea(pageAddress);
5395 	if (area == NULL) {
5396 		addressSpace->Put();
5397 		return B_BAD_ADDRESS;
5398 	}
5399 
5400 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5401 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5402 
5403 	// mark the area range wired
5404 	area->Wire(&info->range);
5405 
5406 	// Lock the area's cache chain and the translation map. Needed to look
5407 	// up the page and play with its wired count.
5408 	cacheChainLocker.LockAllSourceCaches();
5409 	map->Lock();
5410 
5411 	phys_addr_t physicalAddress;
5412 	uint32 flags;
5413 	vm_page* page;
5414 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5415 		&& (flags & requiredProtection) == requiredProtection
5416 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5417 			!= NULL) {
5418 		// Already mapped with the correct permissions -- just increment
5419 		// the page's wired count.
5420 		increment_page_wired_count(page);
5421 
5422 		map->Unlock();
5423 		cacheChainLocker.Unlock();
5424 		addressSpaceLocker.Unlock();
5425 	} else {
5426 		// Let vm_soft_fault() map the page for us, if possible. We need
5427 		// to fully unlock to avoid deadlocks. Since we have already
5428 		// wired the area itself, nothing disturbing will happen with it
5429 		// in the meantime.
5430 		map->Unlock();
5431 		cacheChainLocker.Unlock();
5432 		addressSpaceLocker.Unlock();
5433 
5434 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5435 			isUser, &page);
5436 
5437 		if (error != B_OK) {
5438 			// The page could not be mapped -- clean up.
5439 			VMCache* cache = vm_area_get_locked_cache(area);
5440 			area->Unwire(&info->range);
5441 			cache->ReleaseRefAndUnlock();
5442 			addressSpace->Put();
5443 			return error;
5444 		}
5445 	}
5446 
5447 	info->physicalAddress
5448 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5449 			+ address % B_PAGE_SIZE;
5450 	info->page = page;
5451 
5452 	return B_OK;
5453 }
5454 
5455 
5456 /*!	Unwires a single page previously wired via vm_wire_page().
5457 
5458 	\param info The same object passed to vm_wire_page() before.
5459 */
5460 void
5461 vm_unwire_page(VMPageWiringInfo* info)
5462 {
5463 	// lock the address space
5464 	VMArea* area = info->range.area;
5465 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5466 		// takes over our reference
5467 
5468 	// lock the top cache
5469 	VMCache* cache = vm_area_get_locked_cache(area);
5470 	VMCacheChainLocker cacheChainLocker(cache);
5471 
5472 	if (info->page->Cache() != cache) {
5473 		// The page is not in the top cache, so we lock the whole cache chain
5474 		// before touching the page's wired count.
5475 		cacheChainLocker.LockAllSourceCaches();
5476 	}
5477 
5478 	decrement_page_wired_count(info->page);
5479 
5480 	// remove the wired range from the range
5481 	area->Unwire(&info->range);
5482 
5483 	cacheChainLocker.Unlock();
5484 }
5485 
5486 
5487 /*!	Wires down the given address range in the specified team's address space.
5488 
5489 	If successful the function
5490 	- acquires a reference to the specified team's address space,
5491 	- adds respective wired ranges to all areas that intersect with the given
5492 	  address range,
5493 	- makes sure all pages in the given address range are mapped with the
5494 	  requested access permissions and increments their wired count.
5495 
5496 	It fails, when \a team doesn't specify a valid address space, when any part
5497 	of the specified address range is not covered by areas, when the concerned
5498 	areas don't allow mapping with the requested permissions, or when mapping
5499 	failed for another reason.
5500 
5501 	When successful the call must be balanced by a unlock_memory_etc() call with
5502 	the exact same parameters.
5503 
5504 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5505 		supported.
5506 	\param address The start of the address range to be wired.
5507 	\param numBytes The size of the address range to be wired.
5508 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5509 		requests that the range must be wired writable ("read from device
5510 		into memory").
5511 	\return \c B_OK on success, another error code otherwise.
5512 */
5513 status_t
5514 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5515 {
5516 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5517 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5518 
5519 	// compute the page protection that is required
5520 	bool isUser = IS_USER_ADDRESS(address);
5521 	bool writable = (flags & B_READ_DEVICE) == 0;
5522 	uint32 requiredProtection = PAGE_PRESENT
5523 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5524 	if (writable)
5525 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5526 
5527 	uint32 mallocFlags = isUser
5528 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5529 
5530 	// get and read lock the address space
5531 	VMAddressSpace* addressSpace = NULL;
5532 	if (isUser) {
5533 		if (team == B_CURRENT_TEAM)
5534 			addressSpace = VMAddressSpace::GetCurrent();
5535 		else
5536 			addressSpace = VMAddressSpace::Get(team);
5537 	} else
5538 		addressSpace = VMAddressSpace::GetKernel();
5539 	if (addressSpace == NULL)
5540 		return B_ERROR;
5541 
5542 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5543 		// We get a new address space reference here. The one we got above will
5544 		// be freed by unlock_memory_etc().
5545 
5546 	VMTranslationMap* map = addressSpace->TranslationMap();
5547 	status_t error = B_OK;
5548 
5549 	// iterate through all concerned areas
5550 	addr_t nextAddress = lockBaseAddress;
5551 	while (nextAddress != lockEndAddress) {
5552 		// get the next area
5553 		VMArea* area = addressSpace->LookupArea(nextAddress);
5554 		if (area == NULL) {
5555 			error = B_BAD_ADDRESS;
5556 			break;
5557 		}
5558 
5559 		addr_t areaStart = nextAddress;
5560 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5561 
5562 		// allocate the wired range (do that before locking the cache to avoid
5563 		// deadlocks)
5564 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5565 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5566 		if (range == NULL) {
5567 			error = B_NO_MEMORY;
5568 			break;
5569 		}
5570 
5571 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5572 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5573 
5574 		// mark the area range wired
5575 		area->Wire(range);
5576 
5577 		// Depending on the area cache type and the wiring, we may not need to
5578 		// look at the individual pages.
5579 		if (area->cache_type == CACHE_TYPE_NULL
5580 			|| area->cache_type == CACHE_TYPE_DEVICE
5581 			|| area->wiring == B_FULL_LOCK
5582 			|| area->wiring == B_CONTIGUOUS) {
5583 			nextAddress = areaEnd;
5584 			continue;
5585 		}
5586 
5587 		// Lock the area's cache chain and the translation map. Needed to look
5588 		// up pages and play with their wired count.
5589 		cacheChainLocker.LockAllSourceCaches();
5590 		map->Lock();
5591 
5592 		// iterate through the pages and wire them
5593 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5594 			phys_addr_t physicalAddress;
5595 			uint32 flags;
5596 
5597 			vm_page* page;
5598 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5599 				&& (flags & requiredProtection) == requiredProtection
5600 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5601 					!= NULL) {
5602 				// Already mapped with the correct permissions -- just increment
5603 				// the page's wired count.
5604 				increment_page_wired_count(page);
5605 			} else {
5606 				// Let vm_soft_fault() map the page for us, if possible. We need
5607 				// to fully unlock to avoid deadlocks. Since we have already
5608 				// wired the area itself, nothing disturbing will happen with it
5609 				// in the meantime.
5610 				map->Unlock();
5611 				cacheChainLocker.Unlock();
5612 				addressSpaceLocker.Unlock();
5613 
5614 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5615 					false, isUser, &page);
5616 
5617 				addressSpaceLocker.Lock();
5618 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5619 				cacheChainLocker.LockAllSourceCaches();
5620 				map->Lock();
5621 			}
5622 
5623 			if (error != B_OK)
5624 				break;
5625 		}
5626 
5627 		map->Unlock();
5628 
5629 		if (error == B_OK) {
5630 			cacheChainLocker.Unlock();
5631 		} else {
5632 			// An error occurred, so abort right here. If the current address
5633 			// is the first in this area, unwire the area, since we won't get
5634 			// to it when reverting what we've done so far.
5635 			if (nextAddress == areaStart) {
5636 				area->Unwire(range);
5637 				cacheChainLocker.Unlock();
5638 				range->~VMAreaWiredRange();
5639 				free_etc(range, mallocFlags);
5640 			} else
5641 				cacheChainLocker.Unlock();
5642 
5643 			break;
5644 		}
5645 	}
5646 
5647 	if (error != B_OK) {
5648 		// An error occurred, so unwire all that we've already wired. Note that
5649 		// even if not a single page was wired, unlock_memory_etc() is called
5650 		// to put the address space reference.
5651 		addressSpaceLocker.Unlock();
5652 		unlock_memory_etc(team, (void*)lockBaseAddress,
5653 			nextAddress - lockBaseAddress, flags);
5654 	}
5655 
5656 	return error;
5657 }
5658 
5659 
5660 status_t
5661 lock_memory(void* address, size_t numBytes, uint32 flags)
5662 {
5663 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5664 }
5665 
5666 
5667 /*!	Unwires an address range previously wired with lock_memory_etc().
5668 
5669 	Note that a call to this function must balance a previous lock_memory_etc()
5670 	call with exactly the same parameters.
5671 */
5672 status_t
5673 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5674 {
5675 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5676 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5677 
5678 	// compute the page protection that is required
5679 	bool isUser = IS_USER_ADDRESS(address);
5680 	bool writable = (flags & B_READ_DEVICE) == 0;
5681 	uint32 requiredProtection = PAGE_PRESENT
5682 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5683 	if (writable)
5684 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5685 
5686 	uint32 mallocFlags = isUser
5687 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5688 
5689 	// get and read lock the address space
5690 	VMAddressSpace* addressSpace = NULL;
5691 	if (isUser) {
5692 		if (team == B_CURRENT_TEAM)
5693 			addressSpace = VMAddressSpace::GetCurrent();
5694 		else
5695 			addressSpace = VMAddressSpace::Get(team);
5696 	} else
5697 		addressSpace = VMAddressSpace::GetKernel();
5698 	if (addressSpace == NULL)
5699 		return B_ERROR;
5700 
5701 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5702 		// Take over the address space reference. We don't unlock until we're
5703 		// done.
5704 
5705 	VMTranslationMap* map = addressSpace->TranslationMap();
5706 	status_t error = B_OK;
5707 
5708 	// iterate through all concerned areas
5709 	addr_t nextAddress = lockBaseAddress;
5710 	while (nextAddress != lockEndAddress) {
5711 		// get the next area
5712 		VMArea* area = addressSpace->LookupArea(nextAddress);
5713 		if (area == NULL) {
5714 			error = B_BAD_ADDRESS;
5715 			break;
5716 		}
5717 
5718 		addr_t areaStart = nextAddress;
5719 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5720 
5721 		// Lock the area's top cache. This is a requirement for
5722 		// VMArea::Unwire().
5723 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5724 
5725 		// Depending on the area cache type and the wiring, we may not need to
5726 		// look at the individual pages.
5727 		if (area->cache_type == CACHE_TYPE_NULL
5728 			|| area->cache_type == CACHE_TYPE_DEVICE
5729 			|| area->wiring == B_FULL_LOCK
5730 			|| area->wiring == B_CONTIGUOUS) {
5731 			// unwire the range (to avoid deadlocks we delete the range after
5732 			// unlocking the cache)
5733 			nextAddress = areaEnd;
5734 			VMAreaWiredRange* range = area->Unwire(areaStart,
5735 				areaEnd - areaStart, writable);
5736 			cacheChainLocker.Unlock();
5737 			if (range != NULL) {
5738 				range->~VMAreaWiredRange();
5739 				free_etc(range, mallocFlags);
5740 			}
5741 			continue;
5742 		}
5743 
5744 		// Lock the area's cache chain and the translation map. Needed to look
5745 		// up pages and play with their wired count.
5746 		cacheChainLocker.LockAllSourceCaches();
5747 		map->Lock();
5748 
5749 		// iterate through the pages and unwire them
5750 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5751 			phys_addr_t physicalAddress;
5752 			uint32 flags;
5753 
5754 			vm_page* page;
5755 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5756 				&& (flags & PAGE_PRESENT) != 0
5757 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5758 					!= NULL) {
5759 				// Already mapped with the correct permissions -- just increment
5760 				// the page's wired count.
5761 				decrement_page_wired_count(page);
5762 			} else {
5763 				panic("unlock_memory_etc(): Failed to unwire page: address "
5764 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5765 					nextAddress);
5766 				error = B_BAD_VALUE;
5767 				break;
5768 			}
5769 		}
5770 
5771 		map->Unlock();
5772 
5773 		// All pages are unwired. Remove the area's wired range as well (to
5774 		// avoid deadlocks we delete the range after unlocking the cache).
5775 		VMAreaWiredRange* range = area->Unwire(areaStart,
5776 			areaEnd - areaStart, writable);
5777 
5778 		cacheChainLocker.Unlock();
5779 
5780 		if (range != NULL) {
5781 			range->~VMAreaWiredRange();
5782 			free_etc(range, mallocFlags);
5783 		}
5784 
5785 		if (error != B_OK)
5786 			break;
5787 	}
5788 
5789 	// get rid of the address space reference lock_memory_etc() acquired
5790 	addressSpace->Put();
5791 
5792 	return error;
5793 }
5794 
5795 
5796 status_t
5797 unlock_memory(void* address, size_t numBytes, uint32 flags)
5798 {
5799 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5800 }
5801 
5802 
5803 /*!	Similar to get_memory_map(), but also allows to specify the address space
5804 	for the memory in question and has a saner semantics.
5805 	Returns \c B_OK when the complete range could be translated or
5806 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5807 	case the actual number of entries is written to \c *_numEntries. Any other
5808 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5809 	in this case.
5810 */
5811 status_t
5812 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5813 	physical_entry* table, uint32* _numEntries)
5814 {
5815 	uint32 numEntries = *_numEntries;
5816 	*_numEntries = 0;
5817 
5818 	VMAddressSpace* addressSpace;
5819 	addr_t virtualAddress = (addr_t)address;
5820 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5821 	phys_addr_t physicalAddress;
5822 	status_t status = B_OK;
5823 	int32 index = -1;
5824 	addr_t offset = 0;
5825 	bool interrupts = are_interrupts_enabled();
5826 
5827 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5828 		"entries)\n", team, address, numBytes, numEntries));
5829 
5830 	if (numEntries == 0 || numBytes == 0)
5831 		return B_BAD_VALUE;
5832 
5833 	// in which address space is the address to be found?
5834 	if (IS_USER_ADDRESS(virtualAddress)) {
5835 		if (team == B_CURRENT_TEAM)
5836 			addressSpace = VMAddressSpace::GetCurrent();
5837 		else
5838 			addressSpace = VMAddressSpace::Get(team);
5839 	} else
5840 		addressSpace = VMAddressSpace::GetKernel();
5841 
5842 	if (addressSpace == NULL)
5843 		return B_ERROR;
5844 
5845 	VMTranslationMap* map = addressSpace->TranslationMap();
5846 
5847 	if (interrupts)
5848 		map->Lock();
5849 
5850 	while (offset < numBytes) {
5851 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5852 		uint32 flags;
5853 
5854 		if (interrupts) {
5855 			status = map->Query((addr_t)address + offset, &physicalAddress,
5856 				&flags);
5857 		} else {
5858 			status = map->QueryInterrupt((addr_t)address + offset,
5859 				&physicalAddress, &flags);
5860 		}
5861 		if (status < B_OK)
5862 			break;
5863 		if ((flags & PAGE_PRESENT) == 0) {
5864 			panic("get_memory_map() called on unmapped memory!");
5865 			return B_BAD_ADDRESS;
5866 		}
5867 
5868 		if (index < 0 && pageOffset > 0) {
5869 			physicalAddress += pageOffset;
5870 			if (bytes > B_PAGE_SIZE - pageOffset)
5871 				bytes = B_PAGE_SIZE - pageOffset;
5872 		}
5873 
5874 		// need to switch to the next physical_entry?
5875 		if (index < 0 || table[index].address
5876 				!= physicalAddress - table[index].size) {
5877 			if ((uint32)++index + 1 > numEntries) {
5878 				// table to small
5879 				break;
5880 			}
5881 			table[index].address = physicalAddress;
5882 			table[index].size = bytes;
5883 		} else {
5884 			// page does fit in current entry
5885 			table[index].size += bytes;
5886 		}
5887 
5888 		offset += bytes;
5889 	}
5890 
5891 	if (interrupts)
5892 		map->Unlock();
5893 
5894 	if (status != B_OK)
5895 		return status;
5896 
5897 	if ((uint32)index + 1 > numEntries) {
5898 		*_numEntries = index;
5899 		return B_BUFFER_OVERFLOW;
5900 	}
5901 
5902 	*_numEntries = index + 1;
5903 	return B_OK;
5904 }
5905 
5906 
5907 /*!	According to the BeBook, this function should always succeed.
5908 	This is no longer the case.
5909 */
5910 extern "C" int32
5911 __get_memory_map_haiku(const void* address, size_t numBytes,
5912 	physical_entry* table, int32 numEntries)
5913 {
5914 	uint32 entriesRead = numEntries;
5915 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5916 		table, &entriesRead);
5917 	if (error != B_OK)
5918 		return error;
5919 
5920 	// close the entry list
5921 
5922 	// if it's only one entry, we will silently accept the missing ending
5923 	if (numEntries == 1)
5924 		return B_OK;
5925 
5926 	if (entriesRead + 1 > (uint32)numEntries)
5927 		return B_BUFFER_OVERFLOW;
5928 
5929 	table[entriesRead].address = 0;
5930 	table[entriesRead].size = 0;
5931 
5932 	return B_OK;
5933 }
5934 
5935 
5936 area_id
5937 area_for(void* address)
5938 {
5939 	return vm_area_for((addr_t)address, true);
5940 }
5941 
5942 
5943 area_id
5944 find_area(const char* name)
5945 {
5946 	return VMAreaHash::Find(name);
5947 }
5948 
5949 
5950 status_t
5951 _get_area_info(area_id id, area_info* info, size_t size)
5952 {
5953 	if (size != sizeof(area_info) || info == NULL)
5954 		return B_BAD_VALUE;
5955 
5956 	AddressSpaceReadLocker locker;
5957 	VMArea* area;
5958 	status_t status = locker.SetFromArea(id, area);
5959 	if (status != B_OK)
5960 		return status;
5961 
5962 	fill_area_info(area, info, size);
5963 	return B_OK;
5964 }
5965 
5966 
5967 status_t
5968 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5969 {
5970 	addr_t nextBase = *(addr_t*)cookie;
5971 
5972 	// we're already through the list
5973 	if (nextBase == (addr_t)-1)
5974 		return B_ENTRY_NOT_FOUND;
5975 
5976 	if (team == B_CURRENT_TEAM)
5977 		team = team_get_current_team_id();
5978 
5979 	AddressSpaceReadLocker locker(team);
5980 	if (!locker.IsLocked())
5981 		return B_BAD_TEAM_ID;
5982 
5983 	VMArea* area;
5984 	for (VMAddressSpace::AreaIterator it
5985 				= locker.AddressSpace()->GetAreaIterator();
5986 			(area = it.Next()) != NULL;) {
5987 		if (area->Base() > nextBase)
5988 			break;
5989 	}
5990 
5991 	if (area == NULL) {
5992 		nextBase = (addr_t)-1;
5993 		return B_ENTRY_NOT_FOUND;
5994 	}
5995 
5996 	fill_area_info(area, info, size);
5997 	*cookie = (ssize_t)(area->Base());
5998 
5999 	return B_OK;
6000 }
6001 
6002 
6003 status_t
6004 set_area_protection(area_id area, uint32 newProtection)
6005 {
6006 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6007 		newProtection, true);
6008 }
6009 
6010 
6011 status_t
6012 resize_area(area_id areaID, size_t newSize)
6013 {
6014 	return vm_resize_area(areaID, newSize, true);
6015 }
6016 
6017 
6018 /*!	Transfers the specified area to a new team. The caller must be the owner
6019 	of the area.
6020 */
6021 area_id
6022 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6023 	bool kernel)
6024 {
6025 	area_info info;
6026 	status_t status = get_area_info(id, &info);
6027 	if (status != B_OK)
6028 		return status;
6029 
6030 	if (info.team != thread_get_current_thread()->team->id)
6031 		return B_PERMISSION_DENIED;
6032 
6033 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6034 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6035 	if (clonedArea < 0)
6036 		return clonedArea;
6037 
6038 	status = vm_delete_area(info.team, id, kernel);
6039 	if (status != B_OK) {
6040 		vm_delete_area(target, clonedArea, kernel);
6041 		return status;
6042 	}
6043 
6044 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6045 
6046 	return clonedArea;
6047 }
6048 
6049 
6050 extern "C" area_id
6051 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6052 	size_t numBytes, uint32 addressSpec, uint32 protection,
6053 	void** _virtualAddress)
6054 {
6055 	if (!arch_vm_supports_protection(protection))
6056 		return B_NOT_SUPPORTED;
6057 
6058 	fix_protection(&protection);
6059 
6060 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6061 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6062 		false);
6063 }
6064 
6065 
6066 area_id
6067 clone_area(const char* name, void** _address, uint32 addressSpec,
6068 	uint32 protection, area_id source)
6069 {
6070 	if ((protection & B_KERNEL_PROTECTION) == 0)
6071 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6072 
6073 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6074 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6075 }
6076 
6077 
6078 area_id
6079 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6080 	uint32 protection, uint32 flags, uint32 guardSize,
6081 	const virtual_address_restrictions* virtualAddressRestrictions,
6082 	const physical_address_restrictions* physicalAddressRestrictions,
6083 	void** _address)
6084 {
6085 	fix_protection(&protection);
6086 
6087 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6088 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6089 		true, _address);
6090 }
6091 
6092 
6093 extern "C" area_id
6094 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6095 	size_t size, uint32 lock, uint32 protection)
6096 {
6097 	fix_protection(&protection);
6098 
6099 	virtual_address_restrictions virtualRestrictions = {};
6100 	virtualRestrictions.address = *_address;
6101 	virtualRestrictions.address_specification = addressSpec;
6102 	physical_address_restrictions physicalRestrictions = {};
6103 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6104 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6105 		true, _address);
6106 }
6107 
6108 
6109 status_t
6110 delete_area(area_id area)
6111 {
6112 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6113 }
6114 
6115 
6116 //	#pragma mark - Userland syscalls
6117 
6118 
6119 status_t
6120 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6121 	addr_t size)
6122 {
6123 	// filter out some unavailable values (for userland)
6124 	switch (addressSpec) {
6125 		case B_ANY_KERNEL_ADDRESS:
6126 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6127 			return B_BAD_VALUE;
6128 	}
6129 
6130 	addr_t address;
6131 
6132 	if (!IS_USER_ADDRESS(userAddress)
6133 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6134 		return B_BAD_ADDRESS;
6135 
6136 	status_t status = vm_reserve_address_range(
6137 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6138 		RESERVED_AVOID_BASE);
6139 	if (status != B_OK)
6140 		return status;
6141 
6142 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6143 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6144 			(void*)address, size);
6145 		return B_BAD_ADDRESS;
6146 	}
6147 
6148 	return B_OK;
6149 }
6150 
6151 
6152 status_t
6153 _user_unreserve_address_range(addr_t address, addr_t size)
6154 {
6155 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6156 		(void*)address, size);
6157 }
6158 
6159 
6160 area_id
6161 _user_area_for(void* address)
6162 {
6163 	return vm_area_for((addr_t)address, false);
6164 }
6165 
6166 
6167 area_id
6168 _user_find_area(const char* userName)
6169 {
6170 	char name[B_OS_NAME_LENGTH];
6171 
6172 	if (!IS_USER_ADDRESS(userName)
6173 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6174 		return B_BAD_ADDRESS;
6175 
6176 	return find_area(name);
6177 }
6178 
6179 
6180 status_t
6181 _user_get_area_info(area_id area, area_info* userInfo)
6182 {
6183 	if (!IS_USER_ADDRESS(userInfo))
6184 		return B_BAD_ADDRESS;
6185 
6186 	area_info info;
6187 	status_t status = get_area_info(area, &info);
6188 	if (status < B_OK)
6189 		return status;
6190 
6191 	// TODO: do we want to prevent userland from seeing kernel protections?
6192 	//info.protection &= B_USER_PROTECTION;
6193 
6194 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6195 		return B_BAD_ADDRESS;
6196 
6197 	return status;
6198 }
6199 
6200 
6201 status_t
6202 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6203 {
6204 	ssize_t cookie;
6205 
6206 	if (!IS_USER_ADDRESS(userCookie)
6207 		|| !IS_USER_ADDRESS(userInfo)
6208 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6209 		return B_BAD_ADDRESS;
6210 
6211 	area_info info;
6212 	status_t status = _get_next_area_info(team, &cookie, &info,
6213 		sizeof(area_info));
6214 	if (status != B_OK)
6215 		return status;
6216 
6217 	//info.protection &= B_USER_PROTECTION;
6218 
6219 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6220 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6221 		return B_BAD_ADDRESS;
6222 
6223 	return status;
6224 }
6225 
6226 
6227 status_t
6228 _user_set_area_protection(area_id area, uint32 newProtection)
6229 {
6230 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6231 		return B_BAD_VALUE;
6232 
6233 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6234 		newProtection, false);
6235 }
6236 
6237 
6238 status_t
6239 _user_resize_area(area_id area, size_t newSize)
6240 {
6241 	// TODO: Since we restrict deleting of areas to those owned by the team,
6242 	// we should also do that for resizing (check other functions, too).
6243 	return vm_resize_area(area, newSize, false);
6244 }
6245 
6246 
6247 area_id
6248 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6249 	team_id target)
6250 {
6251 	// filter out some unavailable values (for userland)
6252 	switch (addressSpec) {
6253 		case B_ANY_KERNEL_ADDRESS:
6254 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6255 			return B_BAD_VALUE;
6256 	}
6257 
6258 	void* address;
6259 	if (!IS_USER_ADDRESS(userAddress)
6260 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6261 		return B_BAD_ADDRESS;
6262 
6263 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6264 	if (newArea < B_OK)
6265 		return newArea;
6266 
6267 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6268 		return B_BAD_ADDRESS;
6269 
6270 	return newArea;
6271 }
6272 
6273 
6274 area_id
6275 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6276 	uint32 protection, area_id sourceArea)
6277 {
6278 	char name[B_OS_NAME_LENGTH];
6279 	void* address;
6280 
6281 	// filter out some unavailable values (for userland)
6282 	switch (addressSpec) {
6283 		case B_ANY_KERNEL_ADDRESS:
6284 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6285 			return B_BAD_VALUE;
6286 	}
6287 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6288 		return B_BAD_VALUE;
6289 
6290 	if (!IS_USER_ADDRESS(userName)
6291 		|| !IS_USER_ADDRESS(userAddress)
6292 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6293 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6294 		return B_BAD_ADDRESS;
6295 
6296 	fix_protection(&protection);
6297 
6298 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6299 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6300 		false);
6301 	if (clonedArea < B_OK)
6302 		return clonedArea;
6303 
6304 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6305 		delete_area(clonedArea);
6306 		return B_BAD_ADDRESS;
6307 	}
6308 
6309 	return clonedArea;
6310 }
6311 
6312 
6313 area_id
6314 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6315 	size_t size, uint32 lock, uint32 protection)
6316 {
6317 	char name[B_OS_NAME_LENGTH];
6318 	void* address;
6319 
6320 	// filter out some unavailable values (for userland)
6321 	switch (addressSpec) {
6322 		case B_ANY_KERNEL_ADDRESS:
6323 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6324 			return B_BAD_VALUE;
6325 	}
6326 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6327 		return B_BAD_VALUE;
6328 
6329 	if (!IS_USER_ADDRESS(userName)
6330 		|| !IS_USER_ADDRESS(userAddress)
6331 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6332 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6333 		return B_BAD_ADDRESS;
6334 
6335 	if (addressSpec == B_EXACT_ADDRESS
6336 		&& IS_KERNEL_ADDRESS(address))
6337 		return B_BAD_VALUE;
6338 
6339 	if (addressSpec == B_ANY_ADDRESS)
6340 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6341 	if (addressSpec == B_BASE_ADDRESS)
6342 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6343 
6344 	fix_protection(&protection);
6345 
6346 	virtual_address_restrictions virtualRestrictions = {};
6347 	virtualRestrictions.address = address;
6348 	virtualRestrictions.address_specification = addressSpec;
6349 	physical_address_restrictions physicalRestrictions = {};
6350 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6351 		size, lock, protection, 0, 0, &virtualRestrictions,
6352 		&physicalRestrictions, false, &address);
6353 
6354 	if (area >= B_OK
6355 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6356 		delete_area(area);
6357 		return B_BAD_ADDRESS;
6358 	}
6359 
6360 	return area;
6361 }
6362 
6363 
6364 status_t
6365 _user_delete_area(area_id area)
6366 {
6367 	// Unlike the BeOS implementation, you can now only delete areas
6368 	// that you have created yourself from userland.
6369 	// The documentation to delete_area() explicitly states that this
6370 	// will be restricted in the future, and so it will.
6371 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6372 }
6373 
6374 
6375 // TODO: create a BeOS style call for this!
6376 
6377 area_id
6378 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6379 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6380 	int fd, off_t offset)
6381 {
6382 	char name[B_OS_NAME_LENGTH];
6383 	void* address;
6384 	area_id area;
6385 
6386 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6387 		return B_BAD_VALUE;
6388 
6389 	fix_protection(&protection);
6390 
6391 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6392 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6393 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6394 		return B_BAD_ADDRESS;
6395 
6396 	if (addressSpec == B_EXACT_ADDRESS) {
6397 		if ((addr_t)address + size < (addr_t)address
6398 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6399 			return B_BAD_VALUE;
6400 		}
6401 		if (!IS_USER_ADDRESS(address)
6402 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6403 			return B_BAD_ADDRESS;
6404 		}
6405 	}
6406 
6407 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6408 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6409 		false);
6410 	if (area < B_OK)
6411 		return area;
6412 
6413 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6414 		return B_BAD_ADDRESS;
6415 
6416 	return area;
6417 }
6418 
6419 
6420 status_t
6421 _user_unmap_memory(void* _address, size_t size)
6422 {
6423 	addr_t address = (addr_t)_address;
6424 
6425 	// check params
6426 	if (size == 0 || (addr_t)address + size < (addr_t)address
6427 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6428 		return B_BAD_VALUE;
6429 	}
6430 
6431 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6432 		return B_BAD_ADDRESS;
6433 
6434 	// Write lock the address space and ensure the address range is not wired.
6435 	AddressSpaceWriteLocker locker;
6436 	do {
6437 		status_t status = locker.SetTo(team_get_current_team_id());
6438 		if (status != B_OK)
6439 			return status;
6440 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6441 			size, &locker));
6442 
6443 	// unmap
6444 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6445 }
6446 
6447 
6448 status_t
6449 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6450 {
6451 	// check address range
6452 	addr_t address = (addr_t)_address;
6453 	size = PAGE_ALIGN(size);
6454 
6455 	if ((address % B_PAGE_SIZE) != 0)
6456 		return B_BAD_VALUE;
6457 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6458 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6459 		// weird error code required by POSIX
6460 		return ENOMEM;
6461 	}
6462 
6463 	// extend and check protection
6464 	if ((protection & ~B_USER_PROTECTION) != 0)
6465 		return B_BAD_VALUE;
6466 
6467 	fix_protection(&protection);
6468 
6469 	// We need to write lock the address space, since we're going to play with
6470 	// the areas. Also make sure that none of the areas is wired and that we're
6471 	// actually allowed to change the protection.
6472 	AddressSpaceWriteLocker locker;
6473 
6474 	bool restart;
6475 	do {
6476 		restart = false;
6477 
6478 		status_t status = locker.SetTo(team_get_current_team_id());
6479 		if (status != B_OK)
6480 			return status;
6481 
6482 		// First round: Check whether the whole range is covered by areas and we
6483 		// are allowed to modify them.
6484 		addr_t currentAddress = address;
6485 		size_t sizeLeft = size;
6486 		while (sizeLeft > 0) {
6487 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6488 			if (area == NULL)
6489 				return B_NO_MEMORY;
6490 
6491 			if ((area->protection & B_KERNEL_AREA) != 0)
6492 				return B_NOT_ALLOWED;
6493 
6494 			// TODO: For (shared) mapped files we should check whether the new
6495 			// protections are compatible with the file permissions. We don't
6496 			// have a way to do that yet, though.
6497 
6498 			addr_t offset = currentAddress - area->Base();
6499 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6500 
6501 			AreaCacheLocker cacheLocker(area);
6502 
6503 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6504 					&locker, &cacheLocker)) {
6505 				restart = true;
6506 				break;
6507 			}
6508 
6509 			cacheLocker.Unlock();
6510 
6511 			currentAddress += rangeSize;
6512 			sizeLeft -= rangeSize;
6513 		}
6514 	} while (restart);
6515 
6516 	// Second round: If the protections differ from that of the area, create a
6517 	// page protection array and re-map mapped pages.
6518 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6519 	addr_t currentAddress = address;
6520 	size_t sizeLeft = size;
6521 	while (sizeLeft > 0) {
6522 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6523 		if (area == NULL)
6524 			return B_NO_MEMORY;
6525 
6526 		addr_t offset = currentAddress - area->Base();
6527 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6528 
6529 		currentAddress += rangeSize;
6530 		sizeLeft -= rangeSize;
6531 
6532 		if (area->page_protections == NULL) {
6533 			if (area->protection == protection)
6534 				continue;
6535 
6536 			status_t status = allocate_area_page_protections(area);
6537 			if (status != B_OK)
6538 				return status;
6539 		}
6540 
6541 		// We need to lock the complete cache chain, since we potentially unmap
6542 		// pages of lower caches.
6543 		VMCache* topCache = vm_area_get_locked_cache(area);
6544 		VMCacheChainLocker cacheChainLocker(topCache);
6545 		cacheChainLocker.LockAllSourceCaches();
6546 
6547 		for (addr_t pageAddress = area->Base() + offset;
6548 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6549 			map->Lock();
6550 
6551 			set_area_page_protection(area, pageAddress, protection);
6552 
6553 			phys_addr_t physicalAddress;
6554 			uint32 flags;
6555 
6556 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6557 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6558 				map->Unlock();
6559 				continue;
6560 			}
6561 
6562 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6563 			if (page == NULL) {
6564 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6565 					"\n", area, physicalAddress);
6566 				map->Unlock();
6567 				return B_ERROR;
6568 			}
6569 
6570 			// If the page is not in the topmost cache and write access is
6571 			// requested, we have to unmap it. Otherwise we can re-map it with
6572 			// the new protection.
6573 			bool unmapPage = page->Cache() != topCache
6574 				&& (protection & B_WRITE_AREA) != 0;
6575 
6576 			if (!unmapPage)
6577 				map->ProtectPage(area, pageAddress, protection);
6578 
6579 			map->Unlock();
6580 
6581 			if (unmapPage) {
6582 				DEBUG_PAGE_ACCESS_START(page);
6583 				unmap_page(area, pageAddress);
6584 				DEBUG_PAGE_ACCESS_END(page);
6585 			}
6586 		}
6587 	}
6588 
6589 	return B_OK;
6590 }
6591 
6592 
6593 status_t
6594 _user_sync_memory(void* _address, size_t size, uint32 flags)
6595 {
6596 	addr_t address = (addr_t)_address;
6597 	size = PAGE_ALIGN(size);
6598 
6599 	// check params
6600 	if ((address % B_PAGE_SIZE) != 0)
6601 		return B_BAD_VALUE;
6602 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6603 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6604 		// weird error code required by POSIX
6605 		return ENOMEM;
6606 	}
6607 
6608 	bool writeSync = (flags & MS_SYNC) != 0;
6609 	bool writeAsync = (flags & MS_ASYNC) != 0;
6610 	if (writeSync && writeAsync)
6611 		return B_BAD_VALUE;
6612 
6613 	if (size == 0 || (!writeSync && !writeAsync))
6614 		return B_OK;
6615 
6616 	// iterate through the range and sync all concerned areas
6617 	while (size > 0) {
6618 		// read lock the address space
6619 		AddressSpaceReadLocker locker;
6620 		status_t error = locker.SetTo(team_get_current_team_id());
6621 		if (error != B_OK)
6622 			return error;
6623 
6624 		// get the first area
6625 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6626 		if (area == NULL)
6627 			return B_NO_MEMORY;
6628 
6629 		uint32 offset = address - area->Base();
6630 		size_t rangeSize = min_c(area->Size() - offset, size);
6631 		offset += area->cache_offset;
6632 
6633 		// lock the cache
6634 		AreaCacheLocker cacheLocker(area);
6635 		if (!cacheLocker)
6636 			return B_BAD_VALUE;
6637 		VMCache* cache = area->cache;
6638 
6639 		locker.Unlock();
6640 
6641 		uint32 firstPage = offset >> PAGE_SHIFT;
6642 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6643 
6644 		// write the pages
6645 		if (cache->type == CACHE_TYPE_VNODE) {
6646 			if (writeSync) {
6647 				// synchronous
6648 				error = vm_page_write_modified_page_range(cache, firstPage,
6649 					endPage);
6650 				if (error != B_OK)
6651 					return error;
6652 			} else {
6653 				// asynchronous
6654 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6655 				// TODO: This is probably not quite what is supposed to happen.
6656 				// Especially when a lot has to be written, it might take ages
6657 				// until it really hits the disk.
6658 			}
6659 		}
6660 
6661 		address += rangeSize;
6662 		size -= rangeSize;
6663 	}
6664 
6665 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6666 	// synchronize multiple mappings of the same file. In our VM they never get
6667 	// out of sync, though, so we don't have to do anything.
6668 
6669 	return B_OK;
6670 }
6671 
6672 
6673 status_t
6674 _user_memory_advice(void* address, size_t size, uint32 advice)
6675 {
6676 	// TODO: Implement!
6677 	return B_OK;
6678 }
6679 
6680 
6681 status_t
6682 _user_get_memory_properties(team_id teamID, const void* address,
6683 	uint32* _protected, uint32* _lock)
6684 {
6685 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6686 		return B_BAD_ADDRESS;
6687 
6688 	AddressSpaceReadLocker locker;
6689 	status_t error = locker.SetTo(teamID);
6690 	if (error != B_OK)
6691 		return error;
6692 
6693 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6694 	if (area == NULL)
6695 		return B_NO_MEMORY;
6696 
6697 
6698 	uint32 protection = area->protection;
6699 	if (area->page_protections != NULL)
6700 		protection = get_area_page_protection(area, (addr_t)address);
6701 
6702 	uint32 wiring = area->wiring;
6703 
6704 	locker.Unlock();
6705 
6706 	error = user_memcpy(_protected, &protection, sizeof(protection));
6707 	if (error != B_OK)
6708 		return error;
6709 
6710 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6711 
6712 	return error;
6713 }
6714 
6715 
6716 // #pragma mark -- compatibility
6717 
6718 
6719 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6720 
6721 
6722 struct physical_entry_beos {
6723 	uint32	address;
6724 	uint32	size;
6725 };
6726 
6727 
6728 /*!	The physical_entry structure has changed. We need to translate it to the
6729 	old one.
6730 */
6731 extern "C" int32
6732 __get_memory_map_beos(const void* _address, size_t numBytes,
6733 	physical_entry_beos* table, int32 numEntries)
6734 {
6735 	if (numEntries <= 0)
6736 		return B_BAD_VALUE;
6737 
6738 	const uint8* address = (const uint8*)_address;
6739 
6740 	int32 count = 0;
6741 	while (numBytes > 0 && count < numEntries) {
6742 		physical_entry entry;
6743 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6744 		if (result < 0) {
6745 			if (result != B_BUFFER_OVERFLOW)
6746 				return result;
6747 		}
6748 
6749 		if (entry.address >= (phys_addr_t)1 << 32) {
6750 			panic("get_memory_map(): Address is greater 4 GB!");
6751 			return B_ERROR;
6752 		}
6753 
6754 		table[count].address = entry.address;
6755 		table[count++].size = entry.size;
6756 
6757 		address += entry.size;
6758 		numBytes -= entry.size;
6759 	}
6760 
6761 	// null-terminate the table, if possible
6762 	if (count < numEntries) {
6763 		table[count].address = 0;
6764 		table[count].size = 0;
6765 	}
6766 
6767 	return B_OK;
6768 }
6769 
6770 
6771 /*!	The type of the \a physicalAddress parameter has changed from void* to
6772 	phys_addr_t.
6773 */
6774 extern "C" area_id
6775 __map_physical_memory_beos(const char* name, void* physicalAddress,
6776 	size_t numBytes, uint32 addressSpec, uint32 protection,
6777 	void** _virtualAddress)
6778 {
6779 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6780 		addressSpec, protection, _virtualAddress);
6781 }
6782 
6783 
6784 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6785 	we meddle with the \a lock parameter to force 32 bit.
6786 */
6787 extern "C" area_id
6788 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6789 	size_t size, uint32 lock, uint32 protection)
6790 {
6791 	switch (lock) {
6792 		case B_NO_LOCK:
6793 			break;
6794 		case B_FULL_LOCK:
6795 		case B_LAZY_LOCK:
6796 			lock = B_32_BIT_FULL_LOCK;
6797 			break;
6798 		case B_CONTIGUOUS:
6799 			lock = B_32_BIT_CONTIGUOUS;
6800 			break;
6801 	}
6802 
6803 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6804 		protection);
6805 }
6806 
6807 
6808 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6809 	"BASE");
6810 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6811 	"map_physical_memory@", "BASE");
6812 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6813 	"BASE");
6814 
6815 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6816 	"get_memory_map@@", "1_ALPHA3");
6817 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6818 	"map_physical_memory@@", "1_ALPHA3");
6819 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6820 	"1_ALPHA3");
6821 
6822 
6823 #else
6824 
6825 
6826 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6827 	"get_memory_map@@", "BASE");
6828 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6829 	"map_physical_memory@@", "BASE");
6830 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6831 	"BASE");
6832 
6833 
6834 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6835