xref: /haiku/src/system/kernel/vm/vm.cpp (revision 5b189b0e1e2f51f367bfcb126b2f00a3702f352d)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// Cut the end only?
648 	if (areaLast <= lastAddress) {
649 		size_t oldSize = area->Size();
650 		size_t newSize = address - area->Base();
651 
652 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 			allocationFlags);
654 		if (error != B_OK)
655 			return error;
656 
657 		// unmap pages
658 		unmap_pages(area, address, oldSize - newSize);
659 
660 		// If no one else uses the area's cache, we can resize it, too.
661 		if (cache->areas == area && area->cache_next == NULL
662 			&& cache->consumers.IsEmpty()
663 			&& cache->type == CACHE_TYPE_RAM) {
664 			// Since VMCache::Resize() can temporarily drop the lock, we must
665 			// unlock all lower caches to prevent locking order inversion.
666 			cacheChainLocker.Unlock(cache);
667 			cache->Resize(cache->virtual_base + newSize, priority);
668 			cache->ReleaseRefAndUnlock();
669 		}
670 
671 		return B_OK;
672 	}
673 
674 	// Cut the beginning only?
675 	if (area->Base() >= address) {
676 		addr_t oldBase = area->Base();
677 		addr_t newBase = lastAddress + 1;
678 		size_t newSize = areaLast - lastAddress;
679 
680 		// unmap pages
681 		unmap_pages(area, oldBase, newBase - oldBase);
682 
683 		// resize the area
684 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 			allocationFlags);
686 		if (error != B_OK)
687 			return error;
688 
689 		// TODO: If no one else uses the area's cache, we should resize it, too!
690 
691 		area->cache_offset += newBase - oldBase;
692 
693 		return B_OK;
694 	}
695 
696 	// The tough part -- cut a piece out of the middle of the area.
697 	// We do that by shrinking the area to the begin section and creating a
698 	// new area for the end section.
699 
700 	addr_t firstNewSize = address - area->Base();
701 	addr_t secondBase = lastAddress + 1;
702 	addr_t secondSize = areaLast - lastAddress;
703 
704 	// unmap pages
705 	unmap_pages(area, address, area->Size() - firstNewSize);
706 
707 	// resize the area
708 	addr_t oldSize = area->Size();
709 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 		allocationFlags);
711 	if (error != B_OK)
712 		return error;
713 
714 	// TODO: If no one else uses the area's cache, we might want to create a
715 	// new cache for the second area, transfer the concerned pages from the
716 	// first cache to it and resize the first cache.
717 
718 	// map the second area
719 	virtual_address_restrictions addressRestrictions = {};
720 	addressRestrictions.address = (void*)secondBase;
721 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 	VMArea* secondArea;
723 	error = map_backing_store(addressSpace, cache,
724 		area->cache_offset + (secondBase - area->Base()), area->name,
725 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 		&addressRestrictions, kernel, &secondArea, NULL);
727 	if (error != B_OK) {
728 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 		return error;
730 	}
731 
732 	// We need a cache reference for the new area.
733 	cache->AcquireRefLocked();
734 
735 	if (_secondArea != NULL)
736 		*_secondArea = secondArea;
737 
738 	return B_OK;
739 }
740 
741 
742 /*!	Deletes all areas in the given address range.
743 	The address space must be write-locked.
744 	The caller must ensure that no part of the given range is wired.
745 */
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 	bool kernel)
749 {
750 	size = PAGE_ALIGN(size);
751 	addr_t lastAddress = address + (size - 1);
752 
753 	// Check, whether the caller is allowed to modify the concerned areas.
754 	if (!kernel) {
755 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 				VMArea* area = it.Next();) {
757 			addr_t areaLast = area->Base() + (area->Size() - 1);
758 			if (area->Base() < lastAddress && address < areaLast) {
759 				if (area->address_space == VMAddressSpace::Kernel()) {
760 					dprintf("unmap_address_range: team %" B_PRId32 " tried to "
761 						"unmap range of kernel area %" B_PRId32 " (%s)\n",
762 						team_get_current_team_id(), area->id, area->name);
763 					return B_NOT_ALLOWED;
764 				}
765 			}
766 		}
767 	}
768 
769 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
770 			VMArea* area = it.Next();) {
771 		addr_t areaLast = area->Base() + (area->Size() - 1);
772 		if (area->Base() < lastAddress && address < areaLast) {
773 			status_t error = cut_area(addressSpace, area, address,
774 				lastAddress, NULL, kernel);
775 			if (error != B_OK)
776 				return error;
777 				// Failing after already messing with areas is ugly, but we
778 				// can't do anything about it.
779 		}
780 	}
781 
782 	return B_OK;
783 }
784 
785 
786 /*! You need to hold the lock of the cache and the write lock of the address
787 	space when calling this function.
788 	Note, that in case of error your cache will be temporarily unlocked.
789 	If \a addressSpec is \c B_EXACT_ADDRESS and the
790 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
791 	that no part of the specified address range (base \c *_virtualAddress, size
792 	\a size) is wired.
793 */
794 static status_t
795 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
796 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
797 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
798 	bool kernel, VMArea** _area, void** _virtualAddress)
799 {
800 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
801 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
802 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
803 		addressRestrictions->address, offset, size,
804 		addressRestrictions->address_specification, wiring, protection,
805 		_area, areaName));
806 	cache->AssertLocked();
807 
808 	if (size == 0) {
809 #if KDEBUG
810 		panic("map_backing_store(): called with size=0 for area '%s'!",
811 			areaName);
812 #endif
813 		return B_BAD_VALUE;
814 	}
815 
816 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
817 		| HEAP_DONT_LOCK_KERNEL_SPACE;
818 	int priority;
819 	if (addressSpace != VMAddressSpace::Kernel()) {
820 		priority = VM_PRIORITY_USER;
821 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
822 		priority = VM_PRIORITY_VIP;
823 		allocationFlags |= HEAP_PRIORITY_VIP;
824 	} else
825 		priority = VM_PRIORITY_SYSTEM;
826 
827 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
828 		allocationFlags);
829 	if (area == NULL)
830 		return B_NO_MEMORY;
831 
832 	status_t status;
833 
834 	// if this is a private map, we need to create a new cache
835 	// to handle the private copies of pages as they are written to
836 	VMCache* sourceCache = cache;
837 	if (mapping == REGION_PRIVATE_MAP) {
838 		VMCache* newCache;
839 
840 		// create an anonymous cache
841 		status = VMCacheFactory::CreateAnonymousCache(newCache,
842 			(protection & B_STACK_AREA) != 0
843 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
844 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
845 		if (status != B_OK)
846 			goto err1;
847 
848 		newCache->Lock();
849 		newCache->temporary = 1;
850 		newCache->virtual_base = offset;
851 		newCache->virtual_end = offset + size;
852 
853 		cache->AddConsumer(newCache);
854 
855 		cache = newCache;
856 	}
857 
858 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
859 		status = cache->SetMinimalCommitment(size, priority);
860 		if (status != B_OK)
861 			goto err2;
862 	}
863 
864 	// check to see if this address space has entered DELETE state
865 	if (addressSpace->IsBeingDeleted()) {
866 		// okay, someone is trying to delete this address space now, so we can't
867 		// insert the area, so back out
868 		status = B_BAD_TEAM_ID;
869 		goto err2;
870 	}
871 
872 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
873 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
874 		status = unmap_address_range(addressSpace,
875 			(addr_t)addressRestrictions->address, size, kernel);
876 		if (status != B_OK)
877 			goto err2;
878 	}
879 
880 	status = addressSpace->InsertArea(area, size, addressRestrictions,
881 		allocationFlags, _virtualAddress);
882 	if (status == B_NO_MEMORY
883 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
884 		// TODO: At present, there is no way to notify the low_resource monitor
885 		// that kernel addresss space is fragmented, nor does it check for this
886 		// automatically. Due to how many locks are held, we cannot wait here
887 		// for space to be freed up, but it would be good to at least notify
888 		// that we tried and failed to allocate some amount.
889 	}
890 	if (status != B_OK)
891 		goto err2;
892 
893 	// attach the cache to the area
894 	area->cache = cache;
895 	area->cache_offset = offset;
896 
897 	// point the cache back to the area
898 	cache->InsertAreaLocked(area);
899 	if (mapping == REGION_PRIVATE_MAP)
900 		cache->Unlock();
901 
902 	// insert the area in the global area hash table
903 	VMAreaHash::Insert(area);
904 
905 	// grab a ref to the address space (the area holds this)
906 	addressSpace->Get();
907 
908 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
909 //		cache, sourceCache, areaName, area);
910 
911 	*_area = area;
912 	return B_OK;
913 
914 err2:
915 	if (mapping == REGION_PRIVATE_MAP) {
916 		// We created this cache, so we must delete it again. Note, that we
917 		// need to temporarily unlock the source cache or we'll otherwise
918 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
919 		sourceCache->Unlock();
920 		cache->ReleaseRefAndUnlock();
921 		sourceCache->Lock();
922 	}
923 err1:
924 	addressSpace->DeleteArea(area, allocationFlags);
925 	return status;
926 }
927 
928 
929 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
930 	  locker1, locker2).
931 */
932 template<typename LockerType1, typename LockerType2>
933 static inline bool
934 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
935 {
936 	area->cache->AssertLocked();
937 
938 	VMAreaUnwiredWaiter waiter;
939 	if (!area->AddWaiterIfWired(&waiter))
940 		return false;
941 
942 	// unlock everything and wait
943 	if (locker1 != NULL)
944 		locker1->Unlock();
945 	if (locker2 != NULL)
946 		locker2->Unlock();
947 
948 	waiter.waitEntry.Wait();
949 
950 	return true;
951 }
952 
953 
954 /*!	Checks whether the given area has any wired ranges intersecting with the
955 	specified range and waits, if so.
956 
957 	When it has to wait, the function calls \c Unlock() on both \a locker1
958 	and \a locker2, if given.
959 	The area's top cache must be locked and must be unlocked as a side effect
960 	of calling \c Unlock() on either \a locker1 or \a locker2.
961 
962 	If the function does not have to wait it does not modify or unlock any
963 	object.
964 
965 	\param area The area to be checked.
966 	\param base The base address of the range to check.
967 	\param size The size of the address range to check.
968 	\param locker1 An object to be unlocked when before starting to wait (may
969 		be \c NULL).
970 	\param locker2 An object to be unlocked when before starting to wait (may
971 		be \c NULL).
972 	\return \c true, if the function had to wait, \c false otherwise.
973 */
974 template<typename LockerType1, typename LockerType2>
975 static inline bool
976 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
977 	LockerType1* locker1, LockerType2* locker2)
978 {
979 	area->cache->AssertLocked();
980 
981 	VMAreaUnwiredWaiter waiter;
982 	if (!area->AddWaiterIfWired(&waiter, base, size))
983 		return false;
984 
985 	// unlock everything and wait
986 	if (locker1 != NULL)
987 		locker1->Unlock();
988 	if (locker2 != NULL)
989 		locker2->Unlock();
990 
991 	waiter.waitEntry.Wait();
992 
993 	return true;
994 }
995 
996 
997 /*!	Checks whether the given address space has any wired ranges intersecting
998 	with the specified range and waits, if so.
999 
1000 	Similar to wait_if_area_range_is_wired(), with the following differences:
1001 	- All areas intersecting with the range are checked (respectively all until
1002 	  one is found that contains a wired range intersecting with the given
1003 	  range).
1004 	- The given address space must at least be read-locked and must be unlocked
1005 	  when \c Unlock() is called on \a locker.
1006 	- None of the areas' caches are allowed to be locked.
1007 */
1008 template<typename LockerType>
1009 static inline bool
1010 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1011 	size_t size, LockerType* locker)
1012 {
1013 	addr_t end = base + size - 1;
1014 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1015 			VMArea* area = it.Next();) {
1016 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1017 		if (area->Base() > end)
1018 			return false;
1019 
1020 		if (base >= area->Base() + area->Size() - 1)
1021 			continue;
1022 
1023 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1024 
1025 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1026 			return true;
1027 	}
1028 
1029 	return false;
1030 }
1031 
1032 
1033 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1034 	It must be called in a situation where the kernel address space may be
1035 	locked.
1036 */
1037 status_t
1038 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1039 {
1040 	AddressSpaceReadLocker locker;
1041 	VMArea* area;
1042 	status_t status = locker.SetFromArea(id, area);
1043 	if (status != B_OK)
1044 		return status;
1045 
1046 	if (area->page_protections == NULL) {
1047 		status = allocate_area_page_protections(area);
1048 		if (status != B_OK)
1049 			return status;
1050 	}
1051 
1052 	*cookie = (void*)area;
1053 	return B_OK;
1054 }
1055 
1056 
1057 /*!	This is a debug helper function that can only be used with very specific
1058 	use cases.
1059 	Sets protection for the given address range to the protection specified.
1060 	If \a protection is 0 then the involved pages will be marked non-present
1061 	in the translation map to cause a fault on access. The pages aren't
1062 	actually unmapped however so that they can be marked present again with
1063 	additional calls to this function. For this to work the area must be
1064 	fully locked in memory so that the pages aren't otherwise touched.
1065 	This function does not lock the kernel address space and needs to be
1066 	supplied with a \a cookie retrieved from a successful call to
1067 	vm_prepare_kernel_area_debug_protection().
1068 */
1069 status_t
1070 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1071 	uint32 protection)
1072 {
1073 	// check address range
1074 	addr_t address = (addr_t)_address;
1075 	size = PAGE_ALIGN(size);
1076 
1077 	if ((address % B_PAGE_SIZE) != 0
1078 		|| (addr_t)address + size < (addr_t)address
1079 		|| !IS_KERNEL_ADDRESS(address)
1080 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1081 		return B_BAD_VALUE;
1082 	}
1083 
1084 	// Translate the kernel protection to user protection as we only store that.
1085 	if ((protection & B_KERNEL_READ_AREA) != 0)
1086 		protection |= B_READ_AREA;
1087 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1088 		protection |= B_WRITE_AREA;
1089 
1090 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1091 	VMTranslationMap* map = addressSpace->TranslationMap();
1092 	VMArea* area = (VMArea*)cookie;
1093 
1094 	addr_t offset = address - area->Base();
1095 	if (area->Size() - offset < size) {
1096 		panic("protect range not fully within supplied area");
1097 		return B_BAD_VALUE;
1098 	}
1099 
1100 	if (area->page_protections == NULL) {
1101 		panic("area has no page protections");
1102 		return B_BAD_VALUE;
1103 	}
1104 
1105 	// Invalidate the mapping entries so any access to them will fault or
1106 	// restore the mapping entries unchanged so that lookup will success again.
1107 	map->Lock();
1108 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1109 	map->Unlock();
1110 
1111 	// And set the proper page protections so that the fault case will actually
1112 	// fail and not simply try to map a new page.
1113 	for (addr_t pageAddress = address; pageAddress < address + size;
1114 			pageAddress += B_PAGE_SIZE) {
1115 		set_area_page_protection(area, pageAddress, protection);
1116 	}
1117 
1118 	return B_OK;
1119 }
1120 
1121 
1122 status_t
1123 vm_block_address_range(const char* name, void* address, addr_t size)
1124 {
1125 	if (!arch_vm_supports_protection(0))
1126 		return B_NOT_SUPPORTED;
1127 
1128 	AddressSpaceWriteLocker locker;
1129 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1130 	if (status != B_OK)
1131 		return status;
1132 
1133 	VMAddressSpace* addressSpace = locker.AddressSpace();
1134 
1135 	// create an anonymous cache
1136 	VMCache* cache;
1137 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1138 		VM_PRIORITY_SYSTEM);
1139 	if (status != B_OK)
1140 		return status;
1141 
1142 	cache->temporary = 1;
1143 	cache->virtual_end = size;
1144 	cache->Lock();
1145 
1146 	VMArea* area;
1147 	virtual_address_restrictions addressRestrictions = {};
1148 	addressRestrictions.address = address;
1149 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1150 	status = map_backing_store(addressSpace, cache, 0, name, size,
1151 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1152 		true, &area, NULL);
1153 	if (status != B_OK) {
1154 		cache->ReleaseRefAndUnlock();
1155 		return status;
1156 	}
1157 
1158 	cache->Unlock();
1159 	area->cache_type = CACHE_TYPE_RAM;
1160 	return area->id;
1161 }
1162 
1163 
1164 status_t
1165 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1166 {
1167 	AddressSpaceWriteLocker locker(team);
1168 	if (!locker.IsLocked())
1169 		return B_BAD_TEAM_ID;
1170 
1171 	VMAddressSpace* addressSpace = locker.AddressSpace();
1172 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1173 		addressSpace == VMAddressSpace::Kernel()
1174 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1175 }
1176 
1177 
1178 status_t
1179 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1180 	addr_t size, uint32 flags)
1181 {
1182 	if (size == 0)
1183 		return B_BAD_VALUE;
1184 
1185 	AddressSpaceWriteLocker locker(team);
1186 	if (!locker.IsLocked())
1187 		return B_BAD_TEAM_ID;
1188 
1189 	virtual_address_restrictions addressRestrictions = {};
1190 	addressRestrictions.address = *_address;
1191 	addressRestrictions.address_specification = addressSpec;
1192 	VMAddressSpace* addressSpace = locker.AddressSpace();
1193 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1194 		addressSpace == VMAddressSpace::Kernel()
1195 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1196 		_address);
1197 }
1198 
1199 
1200 area_id
1201 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1202 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1203 	const virtual_address_restrictions* virtualAddressRestrictions,
1204 	const physical_address_restrictions* physicalAddressRestrictions,
1205 	bool kernel, void** _address)
1206 {
1207 	VMArea* area;
1208 	VMCache* cache;
1209 	vm_page* page = NULL;
1210 	bool isStack = (protection & B_STACK_AREA) != 0;
1211 	page_num_t guardPages;
1212 	bool canOvercommit = false;
1213 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1214 		? VM_PAGE_ALLOC_CLEAR : 0;
1215 
1216 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1217 		team, name, size));
1218 
1219 	size = PAGE_ALIGN(size);
1220 	guardSize = PAGE_ALIGN(guardSize);
1221 	guardPages = guardSize / B_PAGE_SIZE;
1222 
1223 	if (size == 0 || size < guardSize)
1224 		return B_BAD_VALUE;
1225 	if (!arch_vm_supports_protection(protection))
1226 		return B_NOT_SUPPORTED;
1227 
1228 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1229 		canOvercommit = true;
1230 
1231 #ifdef DEBUG_KERNEL_STACKS
1232 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1233 		isStack = true;
1234 #endif
1235 
1236 	// check parameters
1237 	switch (virtualAddressRestrictions->address_specification) {
1238 		case B_ANY_ADDRESS:
1239 		case B_EXACT_ADDRESS:
1240 		case B_BASE_ADDRESS:
1241 		case B_ANY_KERNEL_ADDRESS:
1242 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1243 		case B_RANDOMIZED_ANY_ADDRESS:
1244 		case B_RANDOMIZED_BASE_ADDRESS:
1245 			break;
1246 
1247 		default:
1248 			return B_BAD_VALUE;
1249 	}
1250 
1251 	// If low or high physical address restrictions are given, we force
1252 	// B_CONTIGUOUS wiring, since only then we'll use
1253 	// vm_page_allocate_page_run() which deals with those restrictions.
1254 	if (physicalAddressRestrictions->low_address != 0
1255 		|| physicalAddressRestrictions->high_address != 0) {
1256 		wiring = B_CONTIGUOUS;
1257 	}
1258 
1259 	physical_address_restrictions stackPhysicalRestrictions;
1260 	bool doReserveMemory = false;
1261 	switch (wiring) {
1262 		case B_NO_LOCK:
1263 			break;
1264 		case B_FULL_LOCK:
1265 		case B_LAZY_LOCK:
1266 		case B_CONTIGUOUS:
1267 			doReserveMemory = true;
1268 			break;
1269 		case B_ALREADY_WIRED:
1270 			break;
1271 		case B_LOMEM:
1272 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1273 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1274 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1275 			wiring = B_CONTIGUOUS;
1276 			doReserveMemory = true;
1277 			break;
1278 		case B_32_BIT_FULL_LOCK:
1279 			if (B_HAIKU_PHYSICAL_BITS <= 32
1280 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1281 				wiring = B_FULL_LOCK;
1282 				doReserveMemory = true;
1283 				break;
1284 			}
1285 			// TODO: We don't really support this mode efficiently. Just fall
1286 			// through for now ...
1287 		case B_32_BIT_CONTIGUOUS:
1288 			#if B_HAIKU_PHYSICAL_BITS > 32
1289 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1290 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1291 					stackPhysicalRestrictions.high_address
1292 						= (phys_addr_t)1 << 32;
1293 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1294 				}
1295 			#endif
1296 			wiring = B_CONTIGUOUS;
1297 			doReserveMemory = true;
1298 			break;
1299 		default:
1300 			return B_BAD_VALUE;
1301 	}
1302 
1303 	// Optimization: For a single-page contiguous allocation without low/high
1304 	// memory restriction B_FULL_LOCK wiring suffices.
1305 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1306 		&& physicalAddressRestrictions->low_address == 0
1307 		&& physicalAddressRestrictions->high_address == 0) {
1308 		wiring = B_FULL_LOCK;
1309 	}
1310 
1311 	// For full lock or contiguous areas we're also going to map the pages and
1312 	// thus need to reserve pages for the mapping backend upfront.
1313 	addr_t reservedMapPages = 0;
1314 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1315 		AddressSpaceWriteLocker locker;
1316 		status_t status = locker.SetTo(team);
1317 		if (status != B_OK)
1318 			return status;
1319 
1320 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1321 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1322 	}
1323 
1324 	int priority;
1325 	if (team != VMAddressSpace::KernelID())
1326 		priority = VM_PRIORITY_USER;
1327 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1328 		priority = VM_PRIORITY_VIP;
1329 	else
1330 		priority = VM_PRIORITY_SYSTEM;
1331 
1332 	// Reserve memory before acquiring the address space lock. This reduces the
1333 	// chances of failure, since while holding the write lock to the address
1334 	// space (if it is the kernel address space that is), the low memory handler
1335 	// won't be able to free anything for us.
1336 	addr_t reservedMemory = 0;
1337 	if (doReserveMemory) {
1338 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1339 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1340 			return B_NO_MEMORY;
1341 		reservedMemory = size;
1342 		// TODO: We don't reserve the memory for the pages for the page
1343 		// directories/tables. We actually need to do since we currently don't
1344 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1345 		// there are actually less physical pages than there should be, which
1346 		// can get the VM into trouble in low memory situations.
1347 	}
1348 
1349 	AddressSpaceWriteLocker locker;
1350 	VMAddressSpace* addressSpace;
1351 	status_t status;
1352 
1353 	// For full lock areas reserve the pages before locking the address
1354 	// space. E.g. block caches can't release their memory while we hold the
1355 	// address space lock.
1356 	page_num_t reservedPages = reservedMapPages;
1357 	if (wiring == B_FULL_LOCK)
1358 		reservedPages += size / B_PAGE_SIZE;
1359 
1360 	vm_page_reservation reservation;
1361 	if (reservedPages > 0) {
1362 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1363 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1364 					priority)) {
1365 				reservedPages = 0;
1366 				status = B_WOULD_BLOCK;
1367 				goto err0;
1368 			}
1369 		} else
1370 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1371 	}
1372 
1373 	if (wiring == B_CONTIGUOUS) {
1374 		// we try to allocate the page run here upfront as this may easily
1375 		// fail for obvious reasons
1376 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1377 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1378 		if (page == NULL) {
1379 			status = B_NO_MEMORY;
1380 			goto err0;
1381 		}
1382 	}
1383 
1384 	// Lock the address space and, if B_EXACT_ADDRESS and
1385 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1386 	// is not wired.
1387 	do {
1388 		status = locker.SetTo(team);
1389 		if (status != B_OK)
1390 			goto err1;
1391 
1392 		addressSpace = locker.AddressSpace();
1393 	} while (virtualAddressRestrictions->address_specification
1394 			== B_EXACT_ADDRESS
1395 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1396 		&& wait_if_address_range_is_wired(addressSpace,
1397 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1398 
1399 	// create an anonymous cache
1400 	// if it's a stack, make sure that two pages are available at least
1401 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1402 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1403 		wiring == B_NO_LOCK, priority);
1404 	if (status != B_OK)
1405 		goto err1;
1406 
1407 	cache->temporary = 1;
1408 	cache->virtual_end = size;
1409 	cache->committed_size = reservedMemory;
1410 		// TODO: This should be done via a method.
1411 	reservedMemory = 0;
1412 
1413 	cache->Lock();
1414 
1415 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1416 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1417 		kernel, &area, _address);
1418 
1419 	if (status != B_OK) {
1420 		cache->ReleaseRefAndUnlock();
1421 		goto err1;
1422 	}
1423 
1424 	locker.DegradeToReadLock();
1425 
1426 	switch (wiring) {
1427 		case B_NO_LOCK:
1428 		case B_LAZY_LOCK:
1429 			// do nothing - the pages are mapped in as needed
1430 			break;
1431 
1432 		case B_FULL_LOCK:
1433 		{
1434 			// Allocate and map all pages for this area
1435 
1436 			off_t offset = 0;
1437 			for (addr_t address = area->Base();
1438 					address < area->Base() + (area->Size() - 1);
1439 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1440 #ifdef DEBUG_KERNEL_STACKS
1441 #	ifdef STACK_GROWS_DOWNWARDS
1442 				if (isStack && address < area->Base()
1443 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1444 #	else
1445 				if (isStack && address >= area->Base() + area->Size()
1446 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1447 #	endif
1448 					continue;
1449 #endif
1450 				vm_page* page = vm_page_allocate_page(&reservation,
1451 					PAGE_STATE_WIRED | pageAllocFlags);
1452 				cache->InsertPage(page, offset);
1453 				map_page(area, page, address, protection, &reservation);
1454 
1455 				DEBUG_PAGE_ACCESS_END(page);
1456 			}
1457 
1458 			break;
1459 		}
1460 
1461 		case B_ALREADY_WIRED:
1462 		{
1463 			// The pages should already be mapped. This is only really useful
1464 			// during boot time. Find the appropriate vm_page objects and stick
1465 			// them in the cache object.
1466 			VMTranslationMap* map = addressSpace->TranslationMap();
1467 			off_t offset = 0;
1468 
1469 			if (!gKernelStartup)
1470 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1471 
1472 			map->Lock();
1473 
1474 			for (addr_t virtualAddress = area->Base();
1475 					virtualAddress < area->Base() + (area->Size() - 1);
1476 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1477 				phys_addr_t physicalAddress;
1478 				uint32 flags;
1479 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1480 				if (status < B_OK) {
1481 					panic("looking up mapping failed for va 0x%lx\n",
1482 						virtualAddress);
1483 				}
1484 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1485 				if (page == NULL) {
1486 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1487 						"\n", physicalAddress);
1488 				}
1489 
1490 				DEBUG_PAGE_ACCESS_START(page);
1491 
1492 				cache->InsertPage(page, offset);
1493 				increment_page_wired_count(page);
1494 				vm_page_set_state(page, PAGE_STATE_WIRED);
1495 				page->busy = false;
1496 
1497 				DEBUG_PAGE_ACCESS_END(page);
1498 			}
1499 
1500 			map->Unlock();
1501 			break;
1502 		}
1503 
1504 		case B_CONTIGUOUS:
1505 		{
1506 			// We have already allocated our continuous pages run, so we can now
1507 			// just map them in the address space
1508 			VMTranslationMap* map = addressSpace->TranslationMap();
1509 			phys_addr_t physicalAddress
1510 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1511 			addr_t virtualAddress = area->Base();
1512 			off_t offset = 0;
1513 
1514 			map->Lock();
1515 
1516 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1517 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1518 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1519 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1520 				if (page == NULL)
1521 					panic("couldn't lookup physical page just allocated\n");
1522 
1523 				status = map->Map(virtualAddress, physicalAddress, protection,
1524 					area->MemoryType(), &reservation);
1525 				if (status < B_OK)
1526 					panic("couldn't map physical page in page run\n");
1527 
1528 				cache->InsertPage(page, offset);
1529 				increment_page_wired_count(page);
1530 
1531 				DEBUG_PAGE_ACCESS_END(page);
1532 			}
1533 
1534 			map->Unlock();
1535 			break;
1536 		}
1537 
1538 		default:
1539 			break;
1540 	}
1541 
1542 	cache->Unlock();
1543 
1544 	if (reservedPages > 0)
1545 		vm_page_unreserve_pages(&reservation);
1546 
1547 	TRACE(("vm_create_anonymous_area: done\n"));
1548 
1549 	area->cache_type = CACHE_TYPE_RAM;
1550 	return area->id;
1551 
1552 err1:
1553 	if (wiring == B_CONTIGUOUS) {
1554 		// we had reserved the area space upfront...
1555 		phys_addr_t pageNumber = page->physical_page_number;
1556 		int32 i;
1557 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1558 			page = vm_lookup_page(pageNumber);
1559 			if (page == NULL)
1560 				panic("couldn't lookup physical page just allocated\n");
1561 
1562 			vm_page_set_state(page, PAGE_STATE_FREE);
1563 		}
1564 	}
1565 
1566 err0:
1567 	if (reservedPages > 0)
1568 		vm_page_unreserve_pages(&reservation);
1569 	if (reservedMemory > 0)
1570 		vm_unreserve_memory(reservedMemory);
1571 
1572 	return status;
1573 }
1574 
1575 
1576 area_id
1577 vm_map_physical_memory(team_id team, const char* name, void** _address,
1578 	uint32 addressSpec, addr_t size, uint32 protection,
1579 	phys_addr_t physicalAddress, bool alreadyWired)
1580 {
1581 	VMArea* area;
1582 	VMCache* cache;
1583 	addr_t mapOffset;
1584 
1585 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1586 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1587 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1588 		addressSpec, size, protection, physicalAddress));
1589 
1590 	if (!arch_vm_supports_protection(protection))
1591 		return B_NOT_SUPPORTED;
1592 
1593 	AddressSpaceWriteLocker locker(team);
1594 	if (!locker.IsLocked())
1595 		return B_BAD_TEAM_ID;
1596 
1597 	// if the physical address is somewhat inside a page,
1598 	// move the actual area down to align on a page boundary
1599 	mapOffset = physicalAddress % B_PAGE_SIZE;
1600 	size += mapOffset;
1601 	physicalAddress -= mapOffset;
1602 
1603 	size = PAGE_ALIGN(size);
1604 
1605 	// create a device cache
1606 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1607 	if (status != B_OK)
1608 		return status;
1609 
1610 	cache->virtual_end = size;
1611 
1612 	cache->Lock();
1613 
1614 	virtual_address_restrictions addressRestrictions = {};
1615 	addressRestrictions.address = *_address;
1616 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1617 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1618 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1619 		true, &area, _address);
1620 
1621 	if (status < B_OK)
1622 		cache->ReleaseRefLocked();
1623 
1624 	cache->Unlock();
1625 
1626 	if (status == B_OK) {
1627 		// set requested memory type -- use uncached, if not given
1628 		uint32 memoryType = addressSpec & B_MTR_MASK;
1629 		if (memoryType == 0)
1630 			memoryType = B_MTR_UC;
1631 
1632 		area->SetMemoryType(memoryType);
1633 
1634 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1635 		if (status != B_OK)
1636 			delete_area(locker.AddressSpace(), area, false);
1637 	}
1638 
1639 	if (status != B_OK)
1640 		return status;
1641 
1642 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1643 
1644 	if (alreadyWired) {
1645 		// The area is already mapped, but possibly not with the right
1646 		// memory type.
1647 		map->Lock();
1648 		map->ProtectArea(area, area->protection);
1649 		map->Unlock();
1650 	} else {
1651 		// Map the area completely.
1652 
1653 		// reserve pages needed for the mapping
1654 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1655 			area->Base() + (size - 1));
1656 		vm_page_reservation reservation;
1657 		vm_page_reserve_pages(&reservation, reservePages,
1658 			team == VMAddressSpace::KernelID()
1659 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1660 
1661 		map->Lock();
1662 
1663 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1664 			map->Map(area->Base() + offset, physicalAddress + offset,
1665 				protection, area->MemoryType(), &reservation);
1666 		}
1667 
1668 		map->Unlock();
1669 
1670 		vm_page_unreserve_pages(&reservation);
1671 	}
1672 
1673 	// modify the pointer returned to be offset back into the new area
1674 	// the same way the physical address in was offset
1675 	*_address = (void*)((addr_t)*_address + mapOffset);
1676 
1677 	area->cache_type = CACHE_TYPE_DEVICE;
1678 	return area->id;
1679 }
1680 
1681 
1682 /*!	Don't use!
1683 	TODO: This function was introduced to map physical page vecs to
1684 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1685 	use a device cache and does not track vm_page::wired_count!
1686 */
1687 area_id
1688 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1689 	uint32 addressSpec, addr_t* _size, uint32 protection,
1690 	struct generic_io_vec* vecs, uint32 vecCount)
1691 {
1692 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1693 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1694 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1695 		addressSpec, _size, protection, vecs, vecCount));
1696 
1697 	if (!arch_vm_supports_protection(protection)
1698 		|| (addressSpec & B_MTR_MASK) != 0) {
1699 		return B_NOT_SUPPORTED;
1700 	}
1701 
1702 	AddressSpaceWriteLocker locker(team);
1703 	if (!locker.IsLocked())
1704 		return B_BAD_TEAM_ID;
1705 
1706 	if (vecCount == 0)
1707 		return B_BAD_VALUE;
1708 
1709 	addr_t size = 0;
1710 	for (uint32 i = 0; i < vecCount; i++) {
1711 		if (vecs[i].base % B_PAGE_SIZE != 0
1712 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1713 			return B_BAD_VALUE;
1714 		}
1715 
1716 		size += vecs[i].length;
1717 	}
1718 
1719 	// create a device cache
1720 	VMCache* cache;
1721 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1722 	if (result != B_OK)
1723 		return result;
1724 
1725 	cache->virtual_end = size;
1726 
1727 	cache->Lock();
1728 
1729 	VMArea* area;
1730 	virtual_address_restrictions addressRestrictions = {};
1731 	addressRestrictions.address = *_address;
1732 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1733 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1734 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1735 		&addressRestrictions, true, &area, _address);
1736 
1737 	if (result != B_OK)
1738 		cache->ReleaseRefLocked();
1739 
1740 	cache->Unlock();
1741 
1742 	if (result != B_OK)
1743 		return result;
1744 
1745 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1746 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1747 		area->Base() + (size - 1));
1748 
1749 	vm_page_reservation reservation;
1750 	vm_page_reserve_pages(&reservation, reservePages,
1751 			team == VMAddressSpace::KernelID()
1752 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1753 	map->Lock();
1754 
1755 	uint32 vecIndex = 0;
1756 	size_t vecOffset = 0;
1757 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1758 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1759 			vecOffset = 0;
1760 			vecIndex++;
1761 		}
1762 
1763 		if (vecIndex >= vecCount)
1764 			break;
1765 
1766 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1767 			protection, area->MemoryType(), &reservation);
1768 
1769 		vecOffset += B_PAGE_SIZE;
1770 	}
1771 
1772 	map->Unlock();
1773 	vm_page_unreserve_pages(&reservation);
1774 
1775 	if (_size != NULL)
1776 		*_size = size;
1777 
1778 	area->cache_type = CACHE_TYPE_DEVICE;
1779 	return area->id;
1780 }
1781 
1782 
1783 area_id
1784 vm_create_null_area(team_id team, const char* name, void** address,
1785 	uint32 addressSpec, addr_t size, uint32 flags)
1786 {
1787 	size = PAGE_ALIGN(size);
1788 
1789 	// Lock the address space and, if B_EXACT_ADDRESS and
1790 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1791 	// is not wired.
1792 	AddressSpaceWriteLocker locker;
1793 	do {
1794 		if (locker.SetTo(team) != B_OK)
1795 			return B_BAD_TEAM_ID;
1796 	} while (addressSpec == B_EXACT_ADDRESS
1797 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1798 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1799 			(addr_t)*address, size, &locker));
1800 
1801 	// create a null cache
1802 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1803 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1804 	VMCache* cache;
1805 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1806 	if (status != B_OK)
1807 		return status;
1808 
1809 	cache->temporary = 1;
1810 	cache->virtual_end = size;
1811 
1812 	cache->Lock();
1813 
1814 	VMArea* area;
1815 	virtual_address_restrictions addressRestrictions = {};
1816 	addressRestrictions.address = *address;
1817 	addressRestrictions.address_specification = addressSpec;
1818 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1819 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1820 		&addressRestrictions, true, &area, address);
1821 
1822 	if (status < B_OK) {
1823 		cache->ReleaseRefAndUnlock();
1824 		return status;
1825 	}
1826 
1827 	cache->Unlock();
1828 
1829 	area->cache_type = CACHE_TYPE_NULL;
1830 	return area->id;
1831 }
1832 
1833 
1834 /*!	Creates the vnode cache for the specified \a vnode.
1835 	The vnode has to be marked busy when calling this function.
1836 */
1837 status_t
1838 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1839 {
1840 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1841 }
1842 
1843 
1844 /*!	\a cache must be locked. The area's address space must be read-locked.
1845 */
1846 static void
1847 pre_map_area_pages(VMArea* area, VMCache* cache,
1848 	vm_page_reservation* reservation)
1849 {
1850 	addr_t baseAddress = area->Base();
1851 	addr_t cacheOffset = area->cache_offset;
1852 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1853 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1854 
1855 	for (VMCachePagesTree::Iterator it
1856 				= cache->pages.GetIterator(firstPage, true, true);
1857 			vm_page* page = it.Next();) {
1858 		if (page->cache_offset >= endPage)
1859 			break;
1860 
1861 		// skip busy and inactive pages
1862 		if (page->busy || page->usage_count == 0)
1863 			continue;
1864 
1865 		DEBUG_PAGE_ACCESS_START(page);
1866 		map_page(area, page,
1867 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1868 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1869 		DEBUG_PAGE_ACCESS_END(page);
1870 	}
1871 }
1872 
1873 
1874 /*!	Will map the file specified by \a fd to an area in memory.
1875 	The file will be mirrored beginning at the specified \a offset. The
1876 	\a offset and \a size arguments have to be page aligned.
1877 */
1878 static area_id
1879 _vm_map_file(team_id team, const char* name, void** _address,
1880 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1881 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1882 {
1883 	// TODO: for binary files, we want to make sure that they get the
1884 	//	copy of a file at a given time, ie. later changes should not
1885 	//	make it into the mapped copy -- this will need quite some changes
1886 	//	to be done in a nice way
1887 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1888 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1889 
1890 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1891 	size = PAGE_ALIGN(size);
1892 
1893 	if (mapping == REGION_NO_PRIVATE_MAP)
1894 		protection |= B_SHARED_AREA;
1895 	if (addressSpec != B_EXACT_ADDRESS)
1896 		unmapAddressRange = false;
1897 
1898 	if (fd < 0) {
1899 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1900 		virtual_address_restrictions virtualRestrictions = {};
1901 		virtualRestrictions.address = *_address;
1902 		virtualRestrictions.address_specification = addressSpec;
1903 		physical_address_restrictions physicalRestrictions = {};
1904 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1905 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1906 			_address);
1907 	}
1908 
1909 	// get the open flags of the FD
1910 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1911 	if (descriptor == NULL)
1912 		return EBADF;
1913 	int32 openMode = descriptor->open_mode;
1914 	put_fd(descriptor);
1915 
1916 	// The FD must open for reading at any rate. For shared mapping with write
1917 	// access, additionally the FD must be open for writing.
1918 	if ((openMode & O_ACCMODE) == O_WRONLY
1919 		|| (mapping == REGION_NO_PRIVATE_MAP
1920 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1921 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1922 		return EACCES;
1923 	}
1924 
1925 	// get the vnode for the object, this also grabs a ref to it
1926 	struct vnode* vnode = NULL;
1927 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1928 	if (status < B_OK)
1929 		return status;
1930 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1931 
1932 	// If we're going to pre-map pages, we need to reserve the pages needed by
1933 	// the mapping backend upfront.
1934 	page_num_t reservedPreMapPages = 0;
1935 	vm_page_reservation reservation;
1936 	if ((protection & B_READ_AREA) != 0) {
1937 		AddressSpaceWriteLocker locker;
1938 		status = locker.SetTo(team);
1939 		if (status != B_OK)
1940 			return status;
1941 
1942 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1943 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1944 
1945 		locker.Unlock();
1946 
1947 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1948 			team == VMAddressSpace::KernelID()
1949 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1950 	}
1951 
1952 	struct PageUnreserver {
1953 		PageUnreserver(vm_page_reservation* reservation)
1954 			:
1955 			fReservation(reservation)
1956 		{
1957 		}
1958 
1959 		~PageUnreserver()
1960 		{
1961 			if (fReservation != NULL)
1962 				vm_page_unreserve_pages(fReservation);
1963 		}
1964 
1965 		vm_page_reservation* fReservation;
1966 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1967 
1968 	// Lock the address space and, if the specified address range shall be
1969 	// unmapped, ensure it is not wired.
1970 	AddressSpaceWriteLocker locker;
1971 	do {
1972 		if (locker.SetTo(team) != B_OK)
1973 			return B_BAD_TEAM_ID;
1974 	} while (unmapAddressRange
1975 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1976 			(addr_t)*_address, size, &locker));
1977 
1978 	// TODO: this only works for file systems that use the file cache
1979 	VMCache* cache;
1980 	status = vfs_get_vnode_cache(vnode, &cache, false);
1981 	if (status < B_OK)
1982 		return status;
1983 
1984 	cache->Lock();
1985 
1986 	VMArea* area;
1987 	virtual_address_restrictions addressRestrictions = {};
1988 	addressRestrictions.address = *_address;
1989 	addressRestrictions.address_specification = addressSpec;
1990 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1991 		0, protection, mapping,
1992 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1993 		&addressRestrictions, kernel, &area, _address);
1994 
1995 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1996 		// map_backing_store() cannot know we no longer need the ref
1997 		cache->ReleaseRefLocked();
1998 	}
1999 
2000 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2001 		pre_map_area_pages(area, cache, &reservation);
2002 
2003 	cache->Unlock();
2004 
2005 	if (status == B_OK) {
2006 		// TODO: this probably deserves a smarter solution, ie. don't always
2007 		// prefetch stuff, and also, probably don't trigger it at this place.
2008 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2009 			// prefetches at max 10 MB starting from "offset"
2010 	}
2011 
2012 	if (status != B_OK)
2013 		return status;
2014 
2015 	area->cache_type = CACHE_TYPE_VNODE;
2016 	return area->id;
2017 }
2018 
2019 
2020 area_id
2021 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2022 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2023 	int fd, off_t offset)
2024 {
2025 	if (!arch_vm_supports_protection(protection))
2026 		return B_NOT_SUPPORTED;
2027 
2028 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2029 		mapping, unmapAddressRange, fd, offset, true);
2030 }
2031 
2032 
2033 VMCache*
2034 vm_area_get_locked_cache(VMArea* area)
2035 {
2036 	rw_lock_read_lock(&sAreaCacheLock);
2037 
2038 	while (true) {
2039 		VMCache* cache = area->cache;
2040 
2041 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2042 			// cache has been deleted
2043 			rw_lock_read_lock(&sAreaCacheLock);
2044 			continue;
2045 		}
2046 
2047 		rw_lock_read_lock(&sAreaCacheLock);
2048 
2049 		if (cache == area->cache) {
2050 			cache->AcquireRefLocked();
2051 			rw_lock_read_unlock(&sAreaCacheLock);
2052 			return cache;
2053 		}
2054 
2055 		// the cache changed in the meantime
2056 		cache->Unlock();
2057 	}
2058 }
2059 
2060 
2061 void
2062 vm_area_put_locked_cache(VMCache* cache)
2063 {
2064 	cache->ReleaseRefAndUnlock();
2065 }
2066 
2067 
2068 area_id
2069 vm_clone_area(team_id team, const char* name, void** address,
2070 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2071 	bool kernel)
2072 {
2073 	VMArea* newArea = NULL;
2074 	VMArea* sourceArea;
2075 
2076 	// Check whether the source area exists and is cloneable. If so, mark it
2077 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2078 	{
2079 		AddressSpaceWriteLocker locker;
2080 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2081 		if (status != B_OK)
2082 			return status;
2083 
2084 		sourceArea->protection |= B_SHARED_AREA;
2085 		protection |= B_SHARED_AREA;
2086 	}
2087 
2088 	// Now lock both address spaces and actually do the cloning.
2089 
2090 	MultiAddressSpaceLocker locker;
2091 	VMAddressSpace* sourceAddressSpace;
2092 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	VMAddressSpace* targetAddressSpace;
2097 	status = locker.AddTeam(team, true, &targetAddressSpace);
2098 	if (status != B_OK)
2099 		return status;
2100 
2101 	status = locker.Lock();
2102 	if (status != B_OK)
2103 		return status;
2104 
2105 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2106 	if (sourceArea == NULL)
2107 		return B_BAD_VALUE;
2108 
2109 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2110 
2111 	if (!kernel && sourceAddressSpace == VMAddressSpace::Kernel()
2112 		&& targetAddressSpace != VMAddressSpace::Kernel()
2113 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2114 		// kernel areas must not be cloned in userland, unless explicitly
2115 		// declared user-cloneable upon construction
2116 #if KDEBUG
2117 		panic("attempting to clone kernel area \"%s\" (%" B_PRId32 ")!",
2118 			sourceArea->name, sourceID);
2119 #endif
2120 		status = B_NOT_ALLOWED;
2121 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2122 		status = B_NOT_ALLOWED;
2123 	} else {
2124 		virtual_address_restrictions addressRestrictions = {};
2125 		addressRestrictions.address = *address;
2126 		addressRestrictions.address_specification = addressSpec;
2127 		status = map_backing_store(targetAddressSpace, cache,
2128 			sourceArea->cache_offset, name, sourceArea->Size(),
2129 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2130 			kernel, &newArea, address);
2131 	}
2132 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2133 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2134 		// to create a new cache, and has therefore already acquired a reference
2135 		// to the source cache - but otherwise it has no idea that we need
2136 		// one.
2137 		cache->AcquireRefLocked();
2138 	}
2139 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2140 		// we need to map in everything at this point
2141 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2142 			// we don't have actual pages to map but a physical area
2143 			VMTranslationMap* map
2144 				= sourceArea->address_space->TranslationMap();
2145 			map->Lock();
2146 
2147 			phys_addr_t physicalAddress;
2148 			uint32 oldProtection;
2149 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2150 
2151 			map->Unlock();
2152 
2153 			map = targetAddressSpace->TranslationMap();
2154 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2155 				newArea->Base() + (newArea->Size() - 1));
2156 
2157 			vm_page_reservation reservation;
2158 			vm_page_reserve_pages(&reservation, reservePages,
2159 				targetAddressSpace == VMAddressSpace::Kernel()
2160 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2161 			map->Lock();
2162 
2163 			for (addr_t offset = 0; offset < newArea->Size();
2164 					offset += B_PAGE_SIZE) {
2165 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2166 					protection, newArea->MemoryType(), &reservation);
2167 			}
2168 
2169 			map->Unlock();
2170 			vm_page_unreserve_pages(&reservation);
2171 		} else {
2172 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2173 			size_t reservePages = map->MaxPagesNeededToMap(
2174 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2175 			vm_page_reservation reservation;
2176 			vm_page_reserve_pages(&reservation, reservePages,
2177 				targetAddressSpace == VMAddressSpace::Kernel()
2178 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2179 
2180 			// map in all pages from source
2181 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2182 					vm_page* page  = it.Next();) {
2183 				if (!page->busy) {
2184 					DEBUG_PAGE_ACCESS_START(page);
2185 					map_page(newArea, page,
2186 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2187 							- newArea->cache_offset),
2188 						protection, &reservation);
2189 					DEBUG_PAGE_ACCESS_END(page);
2190 				}
2191 			}
2192 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2193 			// ensuring that!
2194 
2195 			vm_page_unreserve_pages(&reservation);
2196 		}
2197 	}
2198 	if (status == B_OK)
2199 		newArea->cache_type = sourceArea->cache_type;
2200 
2201 	vm_area_put_locked_cache(cache);
2202 
2203 	if (status < B_OK)
2204 		return status;
2205 
2206 	return newArea->id;
2207 }
2208 
2209 
2210 /*!	Deletes the specified area of the given address space.
2211 
2212 	The address space must be write-locked.
2213 	The caller must ensure that the area does not have any wired ranges.
2214 
2215 	\param addressSpace The address space containing the area.
2216 	\param area The area to be deleted.
2217 	\param deletingAddressSpace \c true, if the address space is in the process
2218 		of being deleted.
2219 */
2220 static void
2221 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2222 	bool deletingAddressSpace)
2223 {
2224 	ASSERT(!area->IsWired());
2225 
2226 	VMAreaHash::Remove(area);
2227 
2228 	// At this point the area is removed from the global hash table, but
2229 	// still exists in the area list.
2230 
2231 	// Unmap the virtual address space the area occupied.
2232 	{
2233 		// We need to lock the complete cache chain.
2234 		VMCache* topCache = vm_area_get_locked_cache(area);
2235 		VMCacheChainLocker cacheChainLocker(topCache);
2236 		cacheChainLocker.LockAllSourceCaches();
2237 
2238 		// If the area's top cache is a temporary cache and the area is the only
2239 		// one referencing it (besides us currently holding a second reference),
2240 		// the unmapping code doesn't need to care about preserving the accessed
2241 		// and dirty flags of the top cache page mappings.
2242 		bool ignoreTopCachePageFlags
2243 			= topCache->temporary && topCache->RefCount() == 2;
2244 
2245 		area->address_space->TranslationMap()->UnmapArea(area,
2246 			deletingAddressSpace, ignoreTopCachePageFlags);
2247 	}
2248 
2249 	if (!area->cache->temporary)
2250 		area->cache->WriteModified();
2251 
2252 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2253 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2254 
2255 	arch_vm_unset_memory_type(area);
2256 	addressSpace->RemoveArea(area, allocationFlags);
2257 	addressSpace->Put();
2258 
2259 	area->cache->RemoveArea(area);
2260 	area->cache->ReleaseRef();
2261 
2262 	addressSpace->DeleteArea(area, allocationFlags);
2263 }
2264 
2265 
2266 status_t
2267 vm_delete_area(team_id team, area_id id, bool kernel)
2268 {
2269 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2270 		team, id));
2271 
2272 	// lock the address space and make sure the area isn't wired
2273 	AddressSpaceWriteLocker locker;
2274 	VMArea* area;
2275 	AreaCacheLocker cacheLocker;
2276 
2277 	do {
2278 		status_t status = locker.SetFromArea(team, id, area);
2279 		if (status != B_OK)
2280 			return status;
2281 
2282 		cacheLocker.SetTo(area);
2283 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2284 
2285 	cacheLocker.Unlock();
2286 
2287 	// SetFromArea will have returned an error if the area's owning team is not
2288 	// the same as the passed team, so we don't need to do those checks here.
2289 
2290 	delete_area(locker.AddressSpace(), area, false);
2291 	return B_OK;
2292 }
2293 
2294 
2295 /*!	Creates a new cache on top of given cache, moves all areas from
2296 	the old cache to the new one, and changes the protection of all affected
2297 	areas' pages to read-only. If requested, wired pages are moved up to the
2298 	new cache and copies are added to the old cache in their place.
2299 	Preconditions:
2300 	- The given cache must be locked.
2301 	- All of the cache's areas' address spaces must be read locked.
2302 	- Either the cache must not have any wired ranges or a page reservation for
2303 	  all wired pages must be provided, so they can be copied.
2304 
2305 	\param lowerCache The cache on top of which a new cache shall be created.
2306 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2307 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2308 		has wired page. The wired pages are copied in this case.
2309 */
2310 static status_t
2311 vm_copy_on_write_area(VMCache* lowerCache,
2312 	vm_page_reservation* wiredPagesReservation)
2313 {
2314 	VMCache* upperCache;
2315 
2316 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2317 
2318 	// We need to separate the cache from its areas. The cache goes one level
2319 	// deeper and we create a new cache inbetween.
2320 
2321 	// create an anonymous cache
2322 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2323 		lowerCache->GuardSize() / B_PAGE_SIZE,
2324 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2325 		VM_PRIORITY_USER);
2326 	if (status != B_OK)
2327 		return status;
2328 
2329 	upperCache->Lock();
2330 
2331 	upperCache->temporary = 1;
2332 	upperCache->virtual_base = lowerCache->virtual_base;
2333 	upperCache->virtual_end = lowerCache->virtual_end;
2334 
2335 	// transfer the lower cache areas to the upper cache
2336 	rw_lock_write_lock(&sAreaCacheLock);
2337 	upperCache->TransferAreas(lowerCache);
2338 	rw_lock_write_unlock(&sAreaCacheLock);
2339 
2340 	lowerCache->AddConsumer(upperCache);
2341 
2342 	// We now need to remap all pages from all of the cache's areas read-only,
2343 	// so that a copy will be created on next write access. If there are wired
2344 	// pages, we keep their protection, move them to the upper cache and create
2345 	// copies for the lower cache.
2346 	if (wiredPagesReservation != NULL) {
2347 		// We need to handle wired pages -- iterate through the cache's pages.
2348 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2349 				vm_page* page = it.Next();) {
2350 			if (page->WiredCount() > 0) {
2351 				// allocate a new page and copy the wired one
2352 				vm_page* copiedPage = vm_page_allocate_page(
2353 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2354 
2355 				vm_memcpy_physical_page(
2356 					copiedPage->physical_page_number * B_PAGE_SIZE,
2357 					page->physical_page_number * B_PAGE_SIZE);
2358 
2359 				// move the wired page to the upper cache (note: removing is OK
2360 				// with the SplayTree iterator) and insert the copy
2361 				upperCache->MovePage(page);
2362 				lowerCache->InsertPage(copiedPage,
2363 					page->cache_offset * B_PAGE_SIZE);
2364 
2365 				DEBUG_PAGE_ACCESS_END(copiedPage);
2366 			} else {
2367 				// Change the protection of this page in all areas.
2368 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2369 						tempArea = tempArea->cache_next) {
2370 					// The area must be readable in the same way it was
2371 					// previously writable.
2372 					uint32 protection = B_KERNEL_READ_AREA;
2373 					if ((tempArea->protection & B_READ_AREA) != 0)
2374 						protection |= B_READ_AREA;
2375 
2376 					VMTranslationMap* map
2377 						= tempArea->address_space->TranslationMap();
2378 					map->Lock();
2379 					map->ProtectPage(tempArea,
2380 						virtual_page_address(tempArea, page), protection);
2381 					map->Unlock();
2382 				}
2383 			}
2384 		}
2385 	} else {
2386 		ASSERT(lowerCache->WiredPagesCount() == 0);
2387 
2388 		// just change the protection of all areas
2389 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2390 				tempArea = tempArea->cache_next) {
2391 			// The area must be readable in the same way it was previously
2392 			// writable.
2393 			uint32 protection = B_KERNEL_READ_AREA;
2394 			if ((tempArea->protection & B_READ_AREA) != 0)
2395 				protection |= B_READ_AREA;
2396 
2397 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2398 			map->Lock();
2399 			map->ProtectArea(tempArea, protection);
2400 			map->Unlock();
2401 		}
2402 	}
2403 
2404 	vm_area_put_locked_cache(upperCache);
2405 
2406 	return B_OK;
2407 }
2408 
2409 
2410 area_id
2411 vm_copy_area(team_id team, const char* name, void** _address,
2412 	uint32 addressSpec, uint32 protection, area_id sourceID)
2413 {
2414 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2415 
2416 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2417 		// set the same protection for the kernel as for userland
2418 		protection |= B_KERNEL_READ_AREA;
2419 		if (writableCopy)
2420 			protection |= B_KERNEL_WRITE_AREA;
2421 	}
2422 
2423 	// Do the locking: target address space, all address spaces associated with
2424 	// the source cache, and the cache itself.
2425 	MultiAddressSpaceLocker locker;
2426 	VMAddressSpace* targetAddressSpace;
2427 	VMCache* cache;
2428 	VMArea* source;
2429 	AreaCacheLocker cacheLocker;
2430 	status_t status;
2431 	bool sharedArea;
2432 
2433 	page_num_t wiredPages = 0;
2434 	vm_page_reservation wiredPagesReservation;
2435 
2436 	bool restart;
2437 	do {
2438 		restart = false;
2439 
2440 		locker.Unset();
2441 		status = locker.AddTeam(team, true, &targetAddressSpace);
2442 		if (status == B_OK) {
2443 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2444 				&cache);
2445 		}
2446 		if (status != B_OK)
2447 			return status;
2448 
2449 		cacheLocker.SetTo(cache, true);	// already locked
2450 
2451 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2452 
2453 		page_num_t oldWiredPages = wiredPages;
2454 		wiredPages = 0;
2455 
2456 		// If the source area isn't shared, count the number of wired pages in
2457 		// the cache and reserve as many pages.
2458 		if (!sharedArea) {
2459 			wiredPages = cache->WiredPagesCount();
2460 
2461 			if (wiredPages > oldWiredPages) {
2462 				cacheLocker.Unlock();
2463 				locker.Unlock();
2464 
2465 				if (oldWiredPages > 0)
2466 					vm_page_unreserve_pages(&wiredPagesReservation);
2467 
2468 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2469 					VM_PRIORITY_USER);
2470 
2471 				restart = true;
2472 			}
2473 		} else if (oldWiredPages > 0)
2474 			vm_page_unreserve_pages(&wiredPagesReservation);
2475 	} while (restart);
2476 
2477 	// unreserve pages later
2478 	struct PagesUnreserver {
2479 		PagesUnreserver(vm_page_reservation* reservation)
2480 			:
2481 			fReservation(reservation)
2482 		{
2483 		}
2484 
2485 		~PagesUnreserver()
2486 		{
2487 			if (fReservation != NULL)
2488 				vm_page_unreserve_pages(fReservation);
2489 		}
2490 
2491 	private:
2492 		vm_page_reservation*	fReservation;
2493 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2494 
2495 	if (addressSpec == B_CLONE_ADDRESS) {
2496 		addressSpec = B_EXACT_ADDRESS;
2497 		*_address = (void*)source->Base();
2498 	}
2499 
2500 	// First, create a cache on top of the source area, respectively use the
2501 	// existing one, if this is a shared area.
2502 
2503 	VMArea* target;
2504 	virtual_address_restrictions addressRestrictions = {};
2505 	addressRestrictions.address = *_address;
2506 	addressRestrictions.address_specification = addressSpec;
2507 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2508 		name, source->Size(), source->wiring, protection,
2509 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2510 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2511 		&addressRestrictions, true, &target, _address);
2512 	if (status < B_OK)
2513 		return status;
2514 
2515 	if (sharedArea) {
2516 		// The new area uses the old area's cache, but map_backing_store()
2517 		// hasn't acquired a ref. So we have to do that now.
2518 		cache->AcquireRefLocked();
2519 	}
2520 
2521 	// If the source area is writable, we need to move it one layer up as well
2522 
2523 	if (!sharedArea) {
2524 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2525 			// TODO: do something more useful if this fails!
2526 			if (vm_copy_on_write_area(cache,
2527 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2528 				panic("vm_copy_on_write_area() failed!\n");
2529 			}
2530 		}
2531 	}
2532 
2533 	// we return the ID of the newly created area
2534 	return target->id;
2535 }
2536 
2537 
2538 status_t
2539 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2540 	bool kernel)
2541 {
2542 	fix_protection(&newProtection);
2543 
2544 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2545 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2546 
2547 	if (!arch_vm_supports_protection(newProtection))
2548 		return B_NOT_SUPPORTED;
2549 
2550 	bool becomesWritable
2551 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2552 
2553 	// lock address spaces and cache
2554 	MultiAddressSpaceLocker locker;
2555 	VMCache* cache;
2556 	VMArea* area;
2557 	status_t status;
2558 	AreaCacheLocker cacheLocker;
2559 	bool isWritable;
2560 
2561 	bool restart;
2562 	do {
2563 		restart = false;
2564 
2565 		locker.Unset();
2566 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2567 		if (status != B_OK)
2568 			return status;
2569 
2570 		cacheLocker.SetTo(cache, true);	// already locked
2571 
2572 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
2573 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2574 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2575 				" (%s)\n", team, newProtection, areaID, area->name);
2576 			return B_NOT_ALLOWED;
2577 		}
2578 
2579 		if (area->protection == newProtection)
2580 			return B_OK;
2581 
2582 		if (team != VMAddressSpace::KernelID()
2583 			&& area->address_space->ID() != team) {
2584 			// unless you're the kernel, you are only allowed to set
2585 			// the protection of your own areas
2586 			return B_NOT_ALLOWED;
2587 		}
2588 
2589 		isWritable
2590 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2591 
2592 		// Make sure the area (respectively, if we're going to call
2593 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2594 		// wired ranges.
2595 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2596 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2597 					otherArea = otherArea->cache_next) {
2598 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2599 					restart = true;
2600 					break;
2601 				}
2602 			}
2603 		} else {
2604 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2605 				restart = true;
2606 		}
2607 	} while (restart);
2608 
2609 	bool changePageProtection = true;
2610 	bool changeTopCachePagesOnly = false;
2611 
2612 	if (isWritable && !becomesWritable) {
2613 		// writable -> !writable
2614 
2615 		if (cache->source != NULL && cache->temporary) {
2616 			if (cache->CountWritableAreas(area) == 0) {
2617 				// Since this cache now lives from the pages in its source cache,
2618 				// we can change the cache's commitment to take only those pages
2619 				// into account that really are in this cache.
2620 
2621 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2622 					team == VMAddressSpace::KernelID()
2623 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2624 
2625 				// TODO: we may be able to join with our source cache, if
2626 				// count == 0
2627 			}
2628 		}
2629 
2630 		// If only the writability changes, we can just remap the pages of the
2631 		// top cache, since the pages of lower caches are mapped read-only
2632 		// anyway. That's advantageous only, if the number of pages in the cache
2633 		// is significantly smaller than the number of pages in the area,
2634 		// though.
2635 		if (newProtection
2636 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2637 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2638 			changeTopCachePagesOnly = true;
2639 		}
2640 	} else if (!isWritable && becomesWritable) {
2641 		// !writable -> writable
2642 
2643 		if (!cache->consumers.IsEmpty()) {
2644 			// There are consumers -- we have to insert a new cache. Fortunately
2645 			// vm_copy_on_write_area() does everything that's needed.
2646 			changePageProtection = false;
2647 			status = vm_copy_on_write_area(cache, NULL);
2648 		} else {
2649 			// No consumers, so we don't need to insert a new one.
2650 			if (cache->source != NULL && cache->temporary) {
2651 				// the cache's commitment must contain all possible pages
2652 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2653 					team == VMAddressSpace::KernelID()
2654 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2655 			}
2656 
2657 			if (status == B_OK && cache->source != NULL) {
2658 				// There's a source cache, hence we can't just change all pages'
2659 				// protection or we might allow writing into pages belonging to
2660 				// a lower cache.
2661 				changeTopCachePagesOnly = true;
2662 			}
2663 		}
2664 	} else {
2665 		// we don't have anything special to do in all other cases
2666 	}
2667 
2668 	if (status == B_OK) {
2669 		// remap existing pages in this cache
2670 		if (changePageProtection) {
2671 			VMTranslationMap* map = area->address_space->TranslationMap();
2672 			map->Lock();
2673 
2674 			if (changeTopCachePagesOnly) {
2675 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2676 				page_num_t lastPageOffset
2677 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2678 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2679 						vm_page* page = it.Next();) {
2680 					if (page->cache_offset >= firstPageOffset
2681 						&& page->cache_offset <= lastPageOffset) {
2682 						addr_t address = virtual_page_address(area, page);
2683 						map->ProtectPage(area, address, newProtection);
2684 					}
2685 				}
2686 			} else
2687 				map->ProtectArea(area, newProtection);
2688 
2689 			map->Unlock();
2690 		}
2691 
2692 		area->protection = newProtection;
2693 	}
2694 
2695 	return status;
2696 }
2697 
2698 
2699 status_t
2700 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2701 {
2702 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2703 	if (addressSpace == NULL)
2704 		return B_BAD_TEAM_ID;
2705 
2706 	VMTranslationMap* map = addressSpace->TranslationMap();
2707 
2708 	map->Lock();
2709 	uint32 dummyFlags;
2710 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2711 	map->Unlock();
2712 
2713 	addressSpace->Put();
2714 	return status;
2715 }
2716 
2717 
2718 /*!	The page's cache must be locked.
2719 */
2720 bool
2721 vm_test_map_modification(vm_page* page)
2722 {
2723 	if (page->modified)
2724 		return true;
2725 
2726 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2727 	vm_page_mapping* mapping;
2728 	while ((mapping = iterator.Next()) != NULL) {
2729 		VMArea* area = mapping->area;
2730 		VMTranslationMap* map = area->address_space->TranslationMap();
2731 
2732 		phys_addr_t physicalAddress;
2733 		uint32 flags;
2734 		map->Lock();
2735 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2736 		map->Unlock();
2737 
2738 		if ((flags & PAGE_MODIFIED) != 0)
2739 			return true;
2740 	}
2741 
2742 	return false;
2743 }
2744 
2745 
2746 /*!	The page's cache must be locked.
2747 */
2748 void
2749 vm_clear_map_flags(vm_page* page, uint32 flags)
2750 {
2751 	if ((flags & PAGE_ACCESSED) != 0)
2752 		page->accessed = false;
2753 	if ((flags & PAGE_MODIFIED) != 0)
2754 		page->modified = false;
2755 
2756 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2757 	vm_page_mapping* mapping;
2758 	while ((mapping = iterator.Next()) != NULL) {
2759 		VMArea* area = mapping->area;
2760 		VMTranslationMap* map = area->address_space->TranslationMap();
2761 
2762 		map->Lock();
2763 		map->ClearFlags(virtual_page_address(area, page), flags);
2764 		map->Unlock();
2765 	}
2766 }
2767 
2768 
2769 /*!	Removes all mappings from a page.
2770 	After you've called this function, the page is unmapped from memory and
2771 	the page's \c accessed and \c modified flags have been updated according
2772 	to the state of the mappings.
2773 	The page's cache must be locked.
2774 */
2775 void
2776 vm_remove_all_page_mappings(vm_page* page)
2777 {
2778 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2779 		VMArea* area = mapping->area;
2780 		VMTranslationMap* map = area->address_space->TranslationMap();
2781 		addr_t address = virtual_page_address(area, page);
2782 		map->UnmapPage(area, address, false);
2783 	}
2784 }
2785 
2786 
2787 int32
2788 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2789 {
2790 	int32 count = 0;
2791 
2792 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2793 	vm_page_mapping* mapping;
2794 	while ((mapping = iterator.Next()) != NULL) {
2795 		VMArea* area = mapping->area;
2796 		VMTranslationMap* map = area->address_space->TranslationMap();
2797 
2798 		bool modified;
2799 		if (map->ClearAccessedAndModified(area,
2800 				virtual_page_address(area, page), false, modified)) {
2801 			count++;
2802 		}
2803 
2804 		page->modified |= modified;
2805 	}
2806 
2807 
2808 	if (page->accessed) {
2809 		count++;
2810 		page->accessed = false;
2811 	}
2812 
2813 	return count;
2814 }
2815 
2816 
2817 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2818 	mappings.
2819 	The function iterates through the page mappings and removes them until
2820 	encountering one that has been accessed. From then on it will continue to
2821 	iterate, but only clear the accessed flag of the mapping. The page's
2822 	\c modified bit will be updated accordingly, the \c accessed bit will be
2823 	cleared.
2824 	\return The number of mapping accessed bits encountered, including the
2825 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2826 		of the page have been removed.
2827 */
2828 int32
2829 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2830 {
2831 	ASSERT(page->WiredCount() == 0);
2832 
2833 	if (page->accessed)
2834 		return vm_clear_page_mapping_accessed_flags(page);
2835 
2836 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2837 		VMArea* area = mapping->area;
2838 		VMTranslationMap* map = area->address_space->TranslationMap();
2839 		addr_t address = virtual_page_address(area, page);
2840 		bool modified = false;
2841 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2842 			page->accessed = true;
2843 			page->modified |= modified;
2844 			return vm_clear_page_mapping_accessed_flags(page);
2845 		}
2846 		page->modified |= modified;
2847 	}
2848 
2849 	return 0;
2850 }
2851 
2852 
2853 static int
2854 display_mem(int argc, char** argv)
2855 {
2856 	bool physical = false;
2857 	addr_t copyAddress;
2858 	int32 displayWidth;
2859 	int32 itemSize;
2860 	int32 num = -1;
2861 	addr_t address;
2862 	int i = 1, j;
2863 
2864 	if (argc > 1 && argv[1][0] == '-') {
2865 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2866 			physical = true;
2867 			i++;
2868 		} else
2869 			i = 99;
2870 	}
2871 
2872 	if (argc < i + 1 || argc > i + 2) {
2873 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2874 			"\tdl - 8 bytes\n"
2875 			"\tdw - 4 bytes\n"
2876 			"\tds - 2 bytes\n"
2877 			"\tdb - 1 byte\n"
2878 			"\tstring - a whole string\n"
2879 			"  -p or --physical only allows memory from a single page to be "
2880 			"displayed.\n");
2881 		return 0;
2882 	}
2883 
2884 	address = parse_expression(argv[i]);
2885 
2886 	if (argc > i + 1)
2887 		num = parse_expression(argv[i + 1]);
2888 
2889 	// build the format string
2890 	if (strcmp(argv[0], "db") == 0) {
2891 		itemSize = 1;
2892 		displayWidth = 16;
2893 	} else if (strcmp(argv[0], "ds") == 0) {
2894 		itemSize = 2;
2895 		displayWidth = 8;
2896 	} else if (strcmp(argv[0], "dw") == 0) {
2897 		itemSize = 4;
2898 		displayWidth = 4;
2899 	} else if (strcmp(argv[0], "dl") == 0) {
2900 		itemSize = 8;
2901 		displayWidth = 2;
2902 	} else if (strcmp(argv[0], "string") == 0) {
2903 		itemSize = 1;
2904 		displayWidth = -1;
2905 	} else {
2906 		kprintf("display_mem called in an invalid way!\n");
2907 		return 0;
2908 	}
2909 
2910 	if (num <= 0)
2911 		num = displayWidth;
2912 
2913 	void* physicalPageHandle = NULL;
2914 
2915 	if (physical) {
2916 		int32 offset = address & (B_PAGE_SIZE - 1);
2917 		if (num * itemSize + offset > B_PAGE_SIZE) {
2918 			num = (B_PAGE_SIZE - offset) / itemSize;
2919 			kprintf("NOTE: number of bytes has been cut to page size\n");
2920 		}
2921 
2922 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2923 
2924 		if (vm_get_physical_page_debug(address, &copyAddress,
2925 				&physicalPageHandle) != B_OK) {
2926 			kprintf("getting the hardware page failed.");
2927 			return 0;
2928 		}
2929 
2930 		address += offset;
2931 		copyAddress += offset;
2932 	} else
2933 		copyAddress = address;
2934 
2935 	if (!strcmp(argv[0], "string")) {
2936 		kprintf("%p \"", (char*)copyAddress);
2937 
2938 		// string mode
2939 		for (i = 0; true; i++) {
2940 			char c;
2941 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2942 					!= B_OK
2943 				|| c == '\0') {
2944 				break;
2945 			}
2946 
2947 			if (c == '\n')
2948 				kprintf("\\n");
2949 			else if (c == '\t')
2950 				kprintf("\\t");
2951 			else {
2952 				if (!isprint(c))
2953 					c = '.';
2954 
2955 				kprintf("%c", c);
2956 			}
2957 		}
2958 
2959 		kprintf("\"\n");
2960 	} else {
2961 		// number mode
2962 		for (i = 0; i < num; i++) {
2963 			uint64 value;
2964 
2965 			if ((i % displayWidth) == 0) {
2966 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2967 				if (i != 0)
2968 					kprintf("\n");
2969 
2970 				kprintf("[0x%lx]  ", address + i * itemSize);
2971 
2972 				for (j = 0; j < displayed; j++) {
2973 					char c;
2974 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2975 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2976 						displayed = j;
2977 						break;
2978 					}
2979 					if (!isprint(c))
2980 						c = '.';
2981 
2982 					kprintf("%c", c);
2983 				}
2984 				if (num > displayWidth) {
2985 					// make sure the spacing in the last line is correct
2986 					for (j = displayed; j < displayWidth * itemSize; j++)
2987 						kprintf(" ");
2988 				}
2989 				kprintf("  ");
2990 			}
2991 
2992 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2993 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2994 				kprintf("read fault");
2995 				break;
2996 			}
2997 
2998 			switch (itemSize) {
2999 				case 1:
3000 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3001 					break;
3002 				case 2:
3003 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3004 					break;
3005 				case 4:
3006 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3007 					break;
3008 				case 8:
3009 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3010 					break;
3011 			}
3012 		}
3013 
3014 		kprintf("\n");
3015 	}
3016 
3017 	if (physical) {
3018 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3019 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3020 	}
3021 	return 0;
3022 }
3023 
3024 
3025 static void
3026 dump_cache_tree_recursively(VMCache* cache, int level,
3027 	VMCache* highlightCache)
3028 {
3029 	// print this cache
3030 	for (int i = 0; i < level; i++)
3031 		kprintf("  ");
3032 	if (cache == highlightCache)
3033 		kprintf("%p <--\n", cache);
3034 	else
3035 		kprintf("%p\n", cache);
3036 
3037 	// recursively print its consumers
3038 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3039 			VMCache* consumer = it.Next();) {
3040 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3041 	}
3042 }
3043 
3044 
3045 static int
3046 dump_cache_tree(int argc, char** argv)
3047 {
3048 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3049 		kprintf("usage: %s <address>\n", argv[0]);
3050 		return 0;
3051 	}
3052 
3053 	addr_t address = parse_expression(argv[1]);
3054 	if (address == 0)
3055 		return 0;
3056 
3057 	VMCache* cache = (VMCache*)address;
3058 	VMCache* root = cache;
3059 
3060 	// find the root cache (the transitive source)
3061 	while (root->source != NULL)
3062 		root = root->source;
3063 
3064 	dump_cache_tree_recursively(root, 0, cache);
3065 
3066 	return 0;
3067 }
3068 
3069 
3070 const char*
3071 vm_cache_type_to_string(int32 type)
3072 {
3073 	switch (type) {
3074 		case CACHE_TYPE_RAM:
3075 			return "RAM";
3076 		case CACHE_TYPE_DEVICE:
3077 			return "device";
3078 		case CACHE_TYPE_VNODE:
3079 			return "vnode";
3080 		case CACHE_TYPE_NULL:
3081 			return "null";
3082 
3083 		default:
3084 			return "unknown";
3085 	}
3086 }
3087 
3088 
3089 #if DEBUG_CACHE_LIST
3090 
3091 static void
3092 update_cache_info_recursively(VMCache* cache, cache_info& info)
3093 {
3094 	info.page_count += cache->page_count;
3095 	if (cache->type == CACHE_TYPE_RAM)
3096 		info.committed += cache->committed_size;
3097 
3098 	// recurse
3099 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3100 			VMCache* consumer = it.Next();) {
3101 		update_cache_info_recursively(consumer, info);
3102 	}
3103 }
3104 
3105 
3106 static int
3107 cache_info_compare_page_count(const void* _a, const void* _b)
3108 {
3109 	const cache_info* a = (const cache_info*)_a;
3110 	const cache_info* b = (const cache_info*)_b;
3111 	if (a->page_count == b->page_count)
3112 		return 0;
3113 	return a->page_count < b->page_count ? 1 : -1;
3114 }
3115 
3116 
3117 static int
3118 cache_info_compare_committed(const void* _a, const void* _b)
3119 {
3120 	const cache_info* a = (const cache_info*)_a;
3121 	const cache_info* b = (const cache_info*)_b;
3122 	if (a->committed == b->committed)
3123 		return 0;
3124 	return a->committed < b->committed ? 1 : -1;
3125 }
3126 
3127 
3128 static void
3129 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3130 {
3131 	for (int i = 0; i < level; i++)
3132 		kprintf("  ");
3133 
3134 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3135 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3136 		cache->virtual_base, cache->virtual_end, cache->page_count);
3137 
3138 	if (level == 0)
3139 		kprintf("/%lu", info.page_count);
3140 
3141 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3142 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3143 
3144 		if (level == 0)
3145 			kprintf("/%lu", info.committed);
3146 	}
3147 
3148 	// areas
3149 	if (cache->areas != NULL) {
3150 		VMArea* area = cache->areas;
3151 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3152 			area->name, area->address_space->ID());
3153 
3154 		while (area->cache_next != NULL) {
3155 			area = area->cache_next;
3156 			kprintf(", %" B_PRId32, area->id);
3157 		}
3158 	}
3159 
3160 	kputs("\n");
3161 
3162 	// recurse
3163 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3164 			VMCache* consumer = it.Next();) {
3165 		dump_caches_recursively(consumer, info, level + 1);
3166 	}
3167 }
3168 
3169 
3170 static int
3171 dump_caches(int argc, char** argv)
3172 {
3173 	if (sCacheInfoTable == NULL) {
3174 		kprintf("No cache info table!\n");
3175 		return 0;
3176 	}
3177 
3178 	bool sortByPageCount = true;
3179 
3180 	for (int32 i = 1; i < argc; i++) {
3181 		if (strcmp(argv[i], "-c") == 0) {
3182 			sortByPageCount = false;
3183 		} else {
3184 			print_debugger_command_usage(argv[0]);
3185 			return 0;
3186 		}
3187 	}
3188 
3189 	uint32 totalCount = 0;
3190 	uint32 rootCount = 0;
3191 	off_t totalCommitted = 0;
3192 	page_num_t totalPages = 0;
3193 
3194 	VMCache* cache = gDebugCacheList;
3195 	while (cache) {
3196 		totalCount++;
3197 		if (cache->source == NULL) {
3198 			cache_info stackInfo;
3199 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3200 				? sCacheInfoTable[rootCount] : stackInfo;
3201 			rootCount++;
3202 			info.cache = cache;
3203 			info.page_count = 0;
3204 			info.committed = 0;
3205 			update_cache_info_recursively(cache, info);
3206 			totalCommitted += info.committed;
3207 			totalPages += info.page_count;
3208 		}
3209 
3210 		cache = cache->debug_next;
3211 	}
3212 
3213 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3214 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3215 			sortByPageCount
3216 				? &cache_info_compare_page_count
3217 				: &cache_info_compare_committed);
3218 	}
3219 
3220 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3221 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3222 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3223 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3224 			"page count" : "committed size");
3225 
3226 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3227 		for (uint32 i = 0; i < rootCount; i++) {
3228 			cache_info& info = sCacheInfoTable[i];
3229 			dump_caches_recursively(info.cache, info, 0);
3230 		}
3231 	} else
3232 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3233 
3234 	return 0;
3235 }
3236 
3237 #endif	// DEBUG_CACHE_LIST
3238 
3239 
3240 static int
3241 dump_cache(int argc, char** argv)
3242 {
3243 	VMCache* cache;
3244 	bool showPages = false;
3245 	int i = 1;
3246 
3247 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3248 		kprintf("usage: %s [-ps] <address>\n"
3249 			"  if -p is specified, all pages are shown, if -s is used\n"
3250 			"  only the cache info is shown respectively.\n", argv[0]);
3251 		return 0;
3252 	}
3253 	while (argv[i][0] == '-') {
3254 		char* arg = argv[i] + 1;
3255 		while (arg[0]) {
3256 			if (arg[0] == 'p')
3257 				showPages = true;
3258 			arg++;
3259 		}
3260 		i++;
3261 	}
3262 	if (argv[i] == NULL) {
3263 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3264 		return 0;
3265 	}
3266 
3267 	addr_t address = parse_expression(argv[i]);
3268 	if (address == 0)
3269 		return 0;
3270 
3271 	cache = (VMCache*)address;
3272 
3273 	cache->Dump(showPages);
3274 
3275 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3276 
3277 	return 0;
3278 }
3279 
3280 
3281 static void
3282 dump_area_struct(VMArea* area, bool mappings)
3283 {
3284 	kprintf("AREA: %p\n", area);
3285 	kprintf("name:\t\t'%s'\n", area->name);
3286 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3287 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3288 	kprintf("base:\t\t0x%lx\n", area->Base());
3289 	kprintf("size:\t\t0x%lx\n", area->Size());
3290 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3291 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3292 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3293 	kprintf("cache:\t\t%p\n", area->cache);
3294 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3295 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3296 	kprintf("cache_next:\t%p\n", area->cache_next);
3297 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3298 
3299 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3300 	if (mappings) {
3301 		kprintf("page mappings:\n");
3302 		while (iterator.HasNext()) {
3303 			vm_page_mapping* mapping = iterator.Next();
3304 			kprintf("  %p", mapping->page);
3305 		}
3306 		kprintf("\n");
3307 	} else {
3308 		uint32 count = 0;
3309 		while (iterator.Next() != NULL) {
3310 			count++;
3311 		}
3312 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3313 	}
3314 }
3315 
3316 
3317 static int
3318 dump_area(int argc, char** argv)
3319 {
3320 	bool mappings = false;
3321 	bool found = false;
3322 	int32 index = 1;
3323 	VMArea* area;
3324 	addr_t num;
3325 
3326 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3327 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3328 			"All areas matching either id/address/name are listed. You can\n"
3329 			"force to check only a specific item by prefixing the specifier\n"
3330 			"with the id/contains/address/name keywords.\n"
3331 			"-m shows the area's mappings as well.\n");
3332 		return 0;
3333 	}
3334 
3335 	if (!strcmp(argv[1], "-m")) {
3336 		mappings = true;
3337 		index++;
3338 	}
3339 
3340 	int32 mode = 0xf;
3341 	if (!strcmp(argv[index], "id"))
3342 		mode = 1;
3343 	else if (!strcmp(argv[index], "contains"))
3344 		mode = 2;
3345 	else if (!strcmp(argv[index], "name"))
3346 		mode = 4;
3347 	else if (!strcmp(argv[index], "address"))
3348 		mode = 0;
3349 	if (mode != 0xf)
3350 		index++;
3351 
3352 	if (index >= argc) {
3353 		kprintf("No area specifier given.\n");
3354 		return 0;
3355 	}
3356 
3357 	num = parse_expression(argv[index]);
3358 
3359 	if (mode == 0) {
3360 		dump_area_struct((struct VMArea*)num, mappings);
3361 	} else {
3362 		// walk through the area list, looking for the arguments as a name
3363 
3364 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3365 		while ((area = it.Next()) != NULL) {
3366 			if (((mode & 4) != 0 && area->name != NULL
3367 					&& !strcmp(argv[index], area->name))
3368 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3369 					|| (((mode & 2) != 0 && area->Base() <= num
3370 						&& area->Base() + area->Size() > num))))) {
3371 				dump_area_struct(area, mappings);
3372 				found = true;
3373 			}
3374 		}
3375 
3376 		if (!found)
3377 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3378 	}
3379 
3380 	return 0;
3381 }
3382 
3383 
3384 static int
3385 dump_area_list(int argc, char** argv)
3386 {
3387 	VMArea* area;
3388 	const char* name = NULL;
3389 	int32 id = 0;
3390 
3391 	if (argc > 1) {
3392 		id = parse_expression(argv[1]);
3393 		if (id == 0)
3394 			name = argv[1];
3395 	}
3396 
3397 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3398 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3399 		B_PRINTF_POINTER_WIDTH, "size");
3400 
3401 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3402 	while ((area = it.Next()) != NULL) {
3403 		if ((id != 0 && area->address_space->ID() != id)
3404 			|| (name != NULL && strstr(area->name, name) == NULL))
3405 			continue;
3406 
3407 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3408 			area->id, (void*)area->Base(), (void*)area->Size(),
3409 			area->protection, area->wiring, area->name);
3410 	}
3411 	return 0;
3412 }
3413 
3414 
3415 static int
3416 dump_available_memory(int argc, char** argv)
3417 {
3418 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3419 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3420 	return 0;
3421 }
3422 
3423 
3424 static int
3425 dump_mapping_info(int argc, char** argv)
3426 {
3427 	bool reverseLookup = false;
3428 	bool pageLookup = false;
3429 
3430 	int argi = 1;
3431 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3432 		const char* arg = argv[argi];
3433 		if (strcmp(arg, "-r") == 0) {
3434 			reverseLookup = true;
3435 		} else if (strcmp(arg, "-p") == 0) {
3436 			reverseLookup = true;
3437 			pageLookup = true;
3438 		} else {
3439 			print_debugger_command_usage(argv[0]);
3440 			return 0;
3441 		}
3442 	}
3443 
3444 	// We need at least one argument, the address. Optionally a thread ID can be
3445 	// specified.
3446 	if (argi >= argc || argi + 2 < argc) {
3447 		print_debugger_command_usage(argv[0]);
3448 		return 0;
3449 	}
3450 
3451 	uint64 addressValue;
3452 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3453 		return 0;
3454 
3455 	Team* team = NULL;
3456 	if (argi < argc) {
3457 		uint64 threadID;
3458 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3459 			return 0;
3460 
3461 		Thread* thread = Thread::GetDebug(threadID);
3462 		if (thread == NULL) {
3463 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3464 			return 0;
3465 		}
3466 
3467 		team = thread->team;
3468 	}
3469 
3470 	if (reverseLookup) {
3471 		phys_addr_t physicalAddress;
3472 		if (pageLookup) {
3473 			vm_page* page = (vm_page*)(addr_t)addressValue;
3474 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3475 		} else {
3476 			physicalAddress = (phys_addr_t)addressValue;
3477 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3478 		}
3479 
3480 		kprintf("    Team     Virtual Address      Area\n");
3481 		kprintf("--------------------------------------\n");
3482 
3483 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3484 			Callback()
3485 				:
3486 				fAddressSpace(NULL)
3487 			{
3488 			}
3489 
3490 			void SetAddressSpace(VMAddressSpace* addressSpace)
3491 			{
3492 				fAddressSpace = addressSpace;
3493 			}
3494 
3495 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3496 			{
3497 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3498 					virtualAddress);
3499 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3500 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3501 				else
3502 					kprintf("\n");
3503 				return false;
3504 			}
3505 
3506 		private:
3507 			VMAddressSpace*	fAddressSpace;
3508 		} callback;
3509 
3510 		if (team != NULL) {
3511 			// team specified -- get its address space
3512 			VMAddressSpace* addressSpace = team->address_space;
3513 			if (addressSpace == NULL) {
3514 				kprintf("Failed to get address space!\n");
3515 				return 0;
3516 			}
3517 
3518 			callback.SetAddressSpace(addressSpace);
3519 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3520 				physicalAddress, callback);
3521 		} else {
3522 			// no team specified -- iterate through all address spaces
3523 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3524 				addressSpace != NULL;
3525 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3526 				callback.SetAddressSpace(addressSpace);
3527 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3528 					physicalAddress, callback);
3529 			}
3530 		}
3531 	} else {
3532 		// get the address space
3533 		addr_t virtualAddress = (addr_t)addressValue;
3534 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3535 		VMAddressSpace* addressSpace;
3536 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3537 			addressSpace = VMAddressSpace::Kernel();
3538 		} else if (team != NULL) {
3539 			addressSpace = team->address_space;
3540 		} else {
3541 			Thread* thread = debug_get_debugged_thread();
3542 			if (thread == NULL || thread->team == NULL) {
3543 				kprintf("Failed to get team!\n");
3544 				return 0;
3545 			}
3546 
3547 			addressSpace = thread->team->address_space;
3548 		}
3549 
3550 		if (addressSpace == NULL) {
3551 			kprintf("Failed to get address space!\n");
3552 			return 0;
3553 		}
3554 
3555 		// let the translation map implementation do the job
3556 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3557 	}
3558 
3559 	return 0;
3560 }
3561 
3562 
3563 /*!	Deletes all areas and reserved regions in the given address space.
3564 
3565 	The caller must ensure that none of the areas has any wired ranges.
3566 
3567 	\param addressSpace The address space.
3568 	\param deletingAddressSpace \c true, if the address space is in the process
3569 		of being deleted.
3570 */
3571 void
3572 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3573 {
3574 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3575 		addressSpace->ID()));
3576 
3577 	addressSpace->WriteLock();
3578 
3579 	// remove all reserved areas in this address space
3580 	addressSpace->UnreserveAllAddressRanges(0);
3581 
3582 	// delete all the areas in this address space
3583 	while (VMArea* area = addressSpace->FirstArea()) {
3584 		ASSERT(!area->IsWired());
3585 		delete_area(addressSpace, area, deletingAddressSpace);
3586 	}
3587 
3588 	addressSpace->WriteUnlock();
3589 }
3590 
3591 
3592 static area_id
3593 vm_area_for(addr_t address, bool kernel)
3594 {
3595 	team_id team;
3596 	if (IS_USER_ADDRESS(address)) {
3597 		// we try the user team address space, if any
3598 		team = VMAddressSpace::CurrentID();
3599 		if (team < 0)
3600 			return team;
3601 	} else
3602 		team = VMAddressSpace::KernelID();
3603 
3604 	AddressSpaceReadLocker locker(team);
3605 	if (!locker.IsLocked())
3606 		return B_BAD_TEAM_ID;
3607 
3608 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3609 	if (area != NULL) {
3610 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3611 			return B_ERROR;
3612 
3613 		return area->id;
3614 	}
3615 
3616 	return B_ERROR;
3617 }
3618 
3619 
3620 /*!	Frees physical pages that were used during the boot process.
3621 	\a end is inclusive.
3622 */
3623 static void
3624 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3625 {
3626 	// free all physical pages in the specified range
3627 
3628 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3629 		phys_addr_t physicalAddress;
3630 		uint32 flags;
3631 
3632 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3633 			&& (flags & PAGE_PRESENT) != 0) {
3634 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3635 			if (page != NULL && page->State() != PAGE_STATE_FREE
3636 					 && page->State() != PAGE_STATE_CLEAR
3637 					 && page->State() != PAGE_STATE_UNUSED) {
3638 				DEBUG_PAGE_ACCESS_START(page);
3639 				vm_page_set_state(page, PAGE_STATE_FREE);
3640 			}
3641 		}
3642 	}
3643 
3644 	// unmap the memory
3645 	map->Unmap(start, end);
3646 }
3647 
3648 
3649 void
3650 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3651 {
3652 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3653 	addr_t end = start + (size - 1);
3654 	addr_t lastEnd = start;
3655 
3656 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3657 		(void*)start, (void*)end));
3658 
3659 	// The areas are sorted in virtual address space order, so
3660 	// we just have to find the holes between them that fall
3661 	// into the area we should dispose
3662 
3663 	map->Lock();
3664 
3665 	for (VMAddressSpace::AreaIterator it
3666 				= VMAddressSpace::Kernel()->GetAreaIterator();
3667 			VMArea* area = it.Next();) {
3668 		addr_t areaStart = area->Base();
3669 		addr_t areaEnd = areaStart + (area->Size() - 1);
3670 
3671 		if (areaEnd < start)
3672 			continue;
3673 
3674 		if (areaStart > end) {
3675 			// we are done, the area is already beyond of what we have to free
3676 			break;
3677 		}
3678 
3679 		if (areaStart > lastEnd) {
3680 			// this is something we can free
3681 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3682 				(void*)areaStart));
3683 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3684 		}
3685 
3686 		if (areaEnd >= end) {
3687 			lastEnd = areaEnd;
3688 				// no +1 to prevent potential overflow
3689 			break;
3690 		}
3691 
3692 		lastEnd = areaEnd + 1;
3693 	}
3694 
3695 	if (lastEnd < end) {
3696 		// we can also get rid of some space at the end of the area
3697 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3698 			(void*)end));
3699 		unmap_and_free_physical_pages(map, lastEnd, end);
3700 	}
3701 
3702 	map->Unlock();
3703 }
3704 
3705 
3706 static void
3707 create_preloaded_image_areas(struct preloaded_image* _image)
3708 {
3709 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3710 	char name[B_OS_NAME_LENGTH];
3711 	void* address;
3712 	int32 length;
3713 
3714 	// use file name to create a good area name
3715 	char* fileName = strrchr(image->name, '/');
3716 	if (fileName == NULL)
3717 		fileName = image->name;
3718 	else
3719 		fileName++;
3720 
3721 	length = strlen(fileName);
3722 	// make sure there is enough space for the suffix
3723 	if (length > 25)
3724 		length = 25;
3725 
3726 	memcpy(name, fileName, length);
3727 	strcpy(name + length, "_text");
3728 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3729 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3730 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3731 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3732 		// this will later be remapped read-only/executable by the
3733 		// ELF initialization code
3734 
3735 	strcpy(name + length, "_data");
3736 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3737 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3738 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3739 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3740 }
3741 
3742 
3743 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3744 	Any boot loader resources contained in that arguments must not be accessed
3745 	anymore past this point.
3746 */
3747 void
3748 vm_free_kernel_args(kernel_args* args)
3749 {
3750 	uint32 i;
3751 
3752 	TRACE(("vm_free_kernel_args()\n"));
3753 
3754 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3755 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3756 		if (area >= B_OK)
3757 			delete_area(area);
3758 	}
3759 }
3760 
3761 
3762 static void
3763 allocate_kernel_args(kernel_args* args)
3764 {
3765 	TRACE(("allocate_kernel_args()\n"));
3766 
3767 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3768 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3769 
3770 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3771 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3772 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3773 	}
3774 }
3775 
3776 
3777 static void
3778 unreserve_boot_loader_ranges(kernel_args* args)
3779 {
3780 	TRACE(("unreserve_boot_loader_ranges()\n"));
3781 
3782 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3783 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3784 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3785 			args->virtual_allocated_range[i].size);
3786 	}
3787 }
3788 
3789 
3790 static void
3791 reserve_boot_loader_ranges(kernel_args* args)
3792 {
3793 	TRACE(("reserve_boot_loader_ranges()\n"));
3794 
3795 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3796 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3797 
3798 		// If the address is no kernel address, we just skip it. The
3799 		// architecture specific code has to deal with it.
3800 		if (!IS_KERNEL_ADDRESS(address)) {
3801 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3802 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3803 			continue;
3804 		}
3805 
3806 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3807 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3808 		if (status < B_OK)
3809 			panic("could not reserve boot loader ranges\n");
3810 	}
3811 }
3812 
3813 
3814 static addr_t
3815 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3816 {
3817 	size = PAGE_ALIGN(size);
3818 
3819 	// find a slot in the virtual allocation addr range
3820 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3821 		// check to see if the space between this one and the last is big enough
3822 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3823 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3824 			+ args->virtual_allocated_range[i - 1].size;
3825 
3826 		addr_t base = alignment > 0
3827 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3828 
3829 		if (base >= KERNEL_BASE && base < rangeStart
3830 				&& rangeStart - base >= size) {
3831 			args->virtual_allocated_range[i - 1].size
3832 				+= base + size - previousRangeEnd;
3833 			return base;
3834 		}
3835 	}
3836 
3837 	// we hadn't found one between allocation ranges. this is ok.
3838 	// see if there's a gap after the last one
3839 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3840 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3841 		+ args->virtual_allocated_range[lastEntryIndex].size;
3842 	addr_t base = alignment > 0
3843 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3844 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3845 		args->virtual_allocated_range[lastEntryIndex].size
3846 			+= base + size - lastRangeEnd;
3847 		return base;
3848 	}
3849 
3850 	// see if there's a gap before the first one
3851 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3852 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3853 		base = rangeStart - size;
3854 		if (alignment > 0)
3855 			base = ROUNDDOWN(base, alignment);
3856 
3857 		if (base >= KERNEL_BASE) {
3858 			args->virtual_allocated_range[0].start = base;
3859 			args->virtual_allocated_range[0].size += rangeStart - base;
3860 			return base;
3861 		}
3862 	}
3863 
3864 	return 0;
3865 }
3866 
3867 
3868 static bool
3869 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3870 {
3871 	// TODO: horrible brute-force method of determining if the page can be
3872 	// allocated
3873 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3874 		if (address >= args->physical_memory_range[i].start
3875 			&& address < args->physical_memory_range[i].start
3876 				+ args->physical_memory_range[i].size)
3877 			return true;
3878 	}
3879 	return false;
3880 }
3881 
3882 
3883 page_num_t
3884 vm_allocate_early_physical_page(kernel_args* args)
3885 {
3886 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3887 		phys_addr_t nextPage;
3888 
3889 		nextPage = args->physical_allocated_range[i].start
3890 			+ args->physical_allocated_range[i].size;
3891 		// see if the page after the next allocated paddr run can be allocated
3892 		if (i + 1 < args->num_physical_allocated_ranges
3893 			&& args->physical_allocated_range[i + 1].size != 0) {
3894 			// see if the next page will collide with the next allocated range
3895 			if (nextPage >= args->physical_allocated_range[i+1].start)
3896 				continue;
3897 		}
3898 		// see if the next physical page fits in the memory block
3899 		if (is_page_in_physical_memory_range(args, nextPage)) {
3900 			// we got one!
3901 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3902 			return nextPage / B_PAGE_SIZE;
3903 		}
3904 	}
3905 
3906 	// Expanding upwards didn't work, try going downwards.
3907 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3908 		phys_addr_t nextPage;
3909 
3910 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3911 		// see if the page after the prev allocated paddr run can be allocated
3912 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3913 			// see if the next page will collide with the next allocated range
3914 			if (nextPage < args->physical_allocated_range[i-1].start
3915 				+ args->physical_allocated_range[i-1].size)
3916 				continue;
3917 		}
3918 		// see if the next physical page fits in the memory block
3919 		if (is_page_in_physical_memory_range(args, nextPage)) {
3920 			// we got one!
3921 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3922 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3923 			return nextPage / B_PAGE_SIZE;
3924 		}
3925 	}
3926 
3927 	return 0;
3928 		// could not allocate a block
3929 }
3930 
3931 
3932 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3933 	allocate some pages before the VM is completely up.
3934 */
3935 addr_t
3936 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3937 	uint32 attributes, addr_t alignment)
3938 {
3939 	if (physicalSize > virtualSize)
3940 		physicalSize = virtualSize;
3941 
3942 	// find the vaddr to allocate at
3943 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3944 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3945 	if (virtualBase == 0) {
3946 		panic("vm_allocate_early: could not allocate virtual address\n");
3947 		return 0;
3948 	}
3949 
3950 	// map the pages
3951 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3952 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3953 		if (physicalAddress == 0)
3954 			panic("error allocating early page!\n");
3955 
3956 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3957 
3958 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3959 			physicalAddress * B_PAGE_SIZE, attributes,
3960 			&vm_allocate_early_physical_page);
3961 	}
3962 
3963 	return virtualBase;
3964 }
3965 
3966 
3967 /*!	The main entrance point to initialize the VM. */
3968 status_t
3969 vm_init(kernel_args* args)
3970 {
3971 	struct preloaded_image* image;
3972 	void* address;
3973 	status_t err = 0;
3974 	uint32 i;
3975 
3976 	TRACE(("vm_init: entry\n"));
3977 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3978 	err = arch_vm_init(args);
3979 
3980 	// initialize some globals
3981 	vm_page_init_num_pages(args);
3982 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3983 
3984 	slab_init(args);
3985 
3986 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3987 	off_t heapSize = INITIAL_HEAP_SIZE;
3988 	// try to accomodate low memory systems
3989 	while (heapSize > sAvailableMemory / 8)
3990 		heapSize /= 2;
3991 	if (heapSize < 1024 * 1024)
3992 		panic("vm_init: go buy some RAM please.");
3993 
3994 	// map in the new heap and initialize it
3995 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3996 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3997 	TRACE(("heap at 0x%lx\n", heapBase));
3998 	heap_init(heapBase, heapSize);
3999 #endif
4000 
4001 	// initialize the free page list and physical page mapper
4002 	vm_page_init(args);
4003 
4004 	// initialize the cache allocators
4005 	vm_cache_init(args);
4006 
4007 	{
4008 		status_t error = VMAreaHash::Init();
4009 		if (error != B_OK)
4010 			panic("vm_init: error initializing area hash table\n");
4011 	}
4012 
4013 	VMAddressSpace::Init();
4014 	reserve_boot_loader_ranges(args);
4015 
4016 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4017 	heap_init_post_area();
4018 #endif
4019 
4020 	// Do any further initialization that the architecture dependant layers may
4021 	// need now
4022 	arch_vm_translation_map_init_post_area(args);
4023 	arch_vm_init_post_area(args);
4024 	vm_page_init_post_area(args);
4025 	slab_init_post_area();
4026 
4027 	// allocate areas to represent stuff that already exists
4028 
4029 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4030 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4031 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4032 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4033 #endif
4034 
4035 	allocate_kernel_args(args);
4036 
4037 	create_preloaded_image_areas(args->kernel_image);
4038 
4039 	// allocate areas for preloaded images
4040 	for (image = args->preloaded_images; image != NULL; image = image->next)
4041 		create_preloaded_image_areas(image);
4042 
4043 	// allocate kernel stacks
4044 	for (i = 0; i < args->num_cpus; i++) {
4045 		char name[64];
4046 
4047 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4048 		address = (void*)args->cpu_kstack[i].start;
4049 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4050 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4051 	}
4052 
4053 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4054 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4055 
4056 #if PARANOID_KERNEL_MALLOC
4057 	vm_block_address_range("uninitialized heap memory",
4058 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4059 #endif
4060 #if PARANOID_KERNEL_FREE
4061 	vm_block_address_range("freed heap memory",
4062 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4063 #endif
4064 
4065 	// create the object cache for the page mappings
4066 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4067 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4068 		NULL, NULL);
4069 	if (gPageMappingsObjectCache == NULL)
4070 		panic("failed to create page mappings object cache");
4071 
4072 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4073 
4074 #if DEBUG_CACHE_LIST
4075 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4076 		virtual_address_restrictions virtualRestrictions = {};
4077 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4078 		physical_address_restrictions physicalRestrictions = {};
4079 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4080 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4081 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4082 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4083 			&physicalRestrictions, (void**)&sCacheInfoTable);
4084 	}
4085 #endif	// DEBUG_CACHE_LIST
4086 
4087 	// add some debugger commands
4088 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4089 	add_debugger_command("area", &dump_area,
4090 		"Dump info about a particular area");
4091 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4092 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4093 #if DEBUG_CACHE_LIST
4094 	if (sCacheInfoTable != NULL) {
4095 		add_debugger_command_etc("caches", &dump_caches,
4096 			"List all VMCache trees",
4097 			"[ \"-c\" ]\n"
4098 			"All cache trees are listed sorted in decreasing order by number "
4099 				"of\n"
4100 			"used pages or, if \"-c\" is specified, by size of committed "
4101 				"memory.\n",
4102 			0);
4103 	}
4104 #endif
4105 	add_debugger_command("avail", &dump_available_memory,
4106 		"Dump available memory");
4107 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4108 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4109 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4110 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4111 	add_debugger_command("string", &display_mem, "dump strings");
4112 
4113 	add_debugger_command_etc("mapping", &dump_mapping_info,
4114 		"Print address mapping information",
4115 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4116 		"Prints low-level page mapping information for a given address. If\n"
4117 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4118 		"address that is looked up in the translation map of the current\n"
4119 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4120 		"\"-r\" is specified, <address> is a physical address that is\n"
4121 		"searched in the translation map of all teams, respectively the team\n"
4122 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4123 		"<address> is the address of a vm_page structure. The behavior is\n"
4124 		"equivalent to specifying \"-r\" with the physical address of that\n"
4125 		"page.\n",
4126 		0);
4127 
4128 	TRACE(("vm_init: exit\n"));
4129 
4130 	vm_cache_init_post_heap();
4131 
4132 	return err;
4133 }
4134 
4135 
4136 status_t
4137 vm_init_post_sem(kernel_args* args)
4138 {
4139 	// This frees all unused boot loader resources and makes its space available
4140 	// again
4141 	arch_vm_init_end(args);
4142 	unreserve_boot_loader_ranges(args);
4143 
4144 	// fill in all of the semaphores that were not allocated before
4145 	// since we're still single threaded and only the kernel address space
4146 	// exists, it isn't that hard to find all of the ones we need to create
4147 
4148 	arch_vm_translation_map_init_post_sem(args);
4149 
4150 	slab_init_post_sem();
4151 
4152 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4153 	heap_init_post_sem();
4154 #endif
4155 
4156 	return B_OK;
4157 }
4158 
4159 
4160 status_t
4161 vm_init_post_thread(kernel_args* args)
4162 {
4163 	vm_page_init_post_thread(args);
4164 	slab_init_post_thread();
4165 	return heap_init_post_thread();
4166 }
4167 
4168 
4169 status_t
4170 vm_init_post_modules(kernel_args* args)
4171 {
4172 	return arch_vm_init_post_modules(args);
4173 }
4174 
4175 
4176 void
4177 permit_page_faults(void)
4178 {
4179 	Thread* thread = thread_get_current_thread();
4180 	if (thread != NULL)
4181 		atomic_add(&thread->page_faults_allowed, 1);
4182 }
4183 
4184 
4185 void
4186 forbid_page_faults(void)
4187 {
4188 	Thread* thread = thread_get_current_thread();
4189 	if (thread != NULL)
4190 		atomic_add(&thread->page_faults_allowed, -1);
4191 }
4192 
4193 
4194 status_t
4195 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4196 	bool isUser, addr_t* newIP)
4197 {
4198 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4199 		faultAddress));
4200 
4201 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4202 
4203 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4204 	VMAddressSpace* addressSpace = NULL;
4205 
4206 	status_t status = B_OK;
4207 	*newIP = 0;
4208 	atomic_add((int32*)&sPageFaults, 1);
4209 
4210 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4211 		addressSpace = VMAddressSpace::GetKernel();
4212 	} else if (IS_USER_ADDRESS(pageAddress)) {
4213 		addressSpace = VMAddressSpace::GetCurrent();
4214 		if (addressSpace == NULL) {
4215 			if (!isUser) {
4216 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4217 					"memory!\n");
4218 				status = B_BAD_ADDRESS;
4219 				TPF(PageFaultError(-1,
4220 					VMPageFaultTracing
4221 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4222 			} else {
4223 				// XXX weird state.
4224 				panic("vm_page_fault: non kernel thread accessing user memory "
4225 					"that doesn't exist!\n");
4226 				status = B_BAD_ADDRESS;
4227 			}
4228 		}
4229 	} else {
4230 		// the hit was probably in the 64k DMZ between kernel and user space
4231 		// this keeps a user space thread from passing a buffer that crosses
4232 		// into kernel space
4233 		status = B_BAD_ADDRESS;
4234 		TPF(PageFaultError(-1,
4235 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4236 	}
4237 
4238 	if (status == B_OK) {
4239 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4240 			isUser, NULL);
4241 	}
4242 
4243 	if (status < B_OK) {
4244 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4245 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4246 			strerror(status), address, faultAddress, isWrite, isUser,
4247 			thread_get_current_thread_id());
4248 		if (!isUser) {
4249 			Thread* thread = thread_get_current_thread();
4250 			if (thread != NULL && thread->fault_handler != 0) {
4251 				// this will cause the arch dependant page fault handler to
4252 				// modify the IP on the interrupt frame or whatever to return
4253 				// to this address
4254 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4255 			} else {
4256 				// unhandled page fault in the kernel
4257 				panic("vm_page_fault: unhandled page fault in kernel space at "
4258 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4259 			}
4260 		} else {
4261 			Thread* thread = thread_get_current_thread();
4262 
4263 #ifdef TRACE_FAULTS
4264 			VMArea* area = NULL;
4265 			if (addressSpace != NULL) {
4266 				addressSpace->ReadLock();
4267 				area = addressSpace->LookupArea(faultAddress);
4268 			}
4269 
4270 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4271 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4272 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4273 				thread->team->Name(), thread->team->id,
4274 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4275 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4276 					area->Base() : 0x0));
4277 
4278 			if (addressSpace != NULL)
4279 				addressSpace->ReadUnlock();
4280 #endif
4281 
4282 			// If the thread has a signal handler for SIGSEGV, we simply
4283 			// send it the signal. Otherwise we notify the user debugger
4284 			// first.
4285 			struct sigaction action;
4286 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4287 					&& action.sa_handler != SIG_DFL
4288 					&& action.sa_handler != SIG_IGN)
4289 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4290 					SIGSEGV)) {
4291 				Signal signal(SIGSEGV,
4292 					status == B_PERMISSION_DENIED
4293 						? SEGV_ACCERR : SEGV_MAPERR,
4294 					EFAULT, thread->team->id);
4295 				signal.SetAddress((void*)address);
4296 				send_signal_to_thread(thread, signal, 0);
4297 			}
4298 		}
4299 	}
4300 
4301 	if (addressSpace != NULL)
4302 		addressSpace->Put();
4303 
4304 	return B_HANDLED_INTERRUPT;
4305 }
4306 
4307 
4308 struct PageFaultContext {
4309 	AddressSpaceReadLocker	addressSpaceLocker;
4310 	VMCacheChainLocker		cacheChainLocker;
4311 
4312 	VMTranslationMap*		map;
4313 	VMCache*				topCache;
4314 	off_t					cacheOffset;
4315 	vm_page_reservation		reservation;
4316 	bool					isWrite;
4317 
4318 	// return values
4319 	vm_page*				page;
4320 	bool					restart;
4321 	bool					pageAllocated;
4322 
4323 
4324 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4325 		:
4326 		addressSpaceLocker(addressSpace, true),
4327 		map(addressSpace->TranslationMap()),
4328 		isWrite(isWrite)
4329 	{
4330 	}
4331 
4332 	~PageFaultContext()
4333 	{
4334 		UnlockAll();
4335 		vm_page_unreserve_pages(&reservation);
4336 	}
4337 
4338 	void Prepare(VMCache* topCache, off_t cacheOffset)
4339 	{
4340 		this->topCache = topCache;
4341 		this->cacheOffset = cacheOffset;
4342 		page = NULL;
4343 		restart = false;
4344 		pageAllocated = false;
4345 
4346 		cacheChainLocker.SetTo(topCache);
4347 	}
4348 
4349 	void UnlockAll(VMCache* exceptCache = NULL)
4350 	{
4351 		topCache = NULL;
4352 		addressSpaceLocker.Unlock();
4353 		cacheChainLocker.Unlock(exceptCache);
4354 	}
4355 };
4356 
4357 
4358 /*!	Gets the page that should be mapped into the area.
4359 	Returns an error code other than \c B_OK, if the page couldn't be found or
4360 	paged in. The locking state of the address space and the caches is undefined
4361 	in that case.
4362 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4363 	had to unlock the address space and all caches and is supposed to be called
4364 	again.
4365 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4366 	found. It is returned in \c context.page. The address space will still be
4367 	locked as well as all caches starting from the top cache to at least the
4368 	cache the page lives in.
4369 */
4370 static status_t
4371 fault_get_page(PageFaultContext& context)
4372 {
4373 	VMCache* cache = context.topCache;
4374 	VMCache* lastCache = NULL;
4375 	vm_page* page = NULL;
4376 
4377 	while (cache != NULL) {
4378 		// We already hold the lock of the cache at this point.
4379 
4380 		lastCache = cache;
4381 
4382 		page = cache->LookupPage(context.cacheOffset);
4383 		if (page != NULL && page->busy) {
4384 			// page must be busy -- wait for it to become unbusy
4385 			context.UnlockAll(cache);
4386 			cache->ReleaseRefLocked();
4387 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4388 
4389 			// restart the whole process
4390 			context.restart = true;
4391 			return B_OK;
4392 		}
4393 
4394 		if (page != NULL)
4395 			break;
4396 
4397 		// The current cache does not contain the page we're looking for.
4398 
4399 		// see if the backing store has it
4400 		if (cache->HasPage(context.cacheOffset)) {
4401 			// insert a fresh page and mark it busy -- we're going to read it in
4402 			page = vm_page_allocate_page(&context.reservation,
4403 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4404 			cache->InsertPage(page, context.cacheOffset);
4405 
4406 			// We need to unlock all caches and the address space while reading
4407 			// the page in. Keep a reference to the cache around.
4408 			cache->AcquireRefLocked();
4409 			context.UnlockAll();
4410 
4411 			// read the page in
4412 			generic_io_vec vec;
4413 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4414 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4415 
4416 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4417 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4418 
4419 			cache->Lock();
4420 
4421 			if (status < B_OK) {
4422 				// on error remove and free the page
4423 				dprintf("reading page from cache %p returned: %s!\n",
4424 					cache, strerror(status));
4425 
4426 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4427 				cache->RemovePage(page);
4428 				vm_page_set_state(page, PAGE_STATE_FREE);
4429 
4430 				cache->ReleaseRefAndUnlock();
4431 				return status;
4432 			}
4433 
4434 			// mark the page unbusy again
4435 			cache->MarkPageUnbusy(page);
4436 
4437 			DEBUG_PAGE_ACCESS_END(page);
4438 
4439 			// Since we needed to unlock everything temporarily, the area
4440 			// situation might have changed. So we need to restart the whole
4441 			// process.
4442 			cache->ReleaseRefAndUnlock();
4443 			context.restart = true;
4444 			return B_OK;
4445 		}
4446 
4447 		cache = context.cacheChainLocker.LockSourceCache();
4448 	}
4449 
4450 	if (page == NULL) {
4451 		// There was no adequate page, determine the cache for a clean one.
4452 		// Read-only pages come in the deepest cache, only the top most cache
4453 		// may have direct write access.
4454 		cache = context.isWrite ? context.topCache : lastCache;
4455 
4456 		// allocate a clean page
4457 		page = vm_page_allocate_page(&context.reservation,
4458 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4459 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4460 			page->physical_page_number));
4461 
4462 		// insert the new page into our cache
4463 		cache->InsertPage(page, context.cacheOffset);
4464 		context.pageAllocated = true;
4465 	} else if (page->Cache() != context.topCache && context.isWrite) {
4466 		// We have a page that has the data we want, but in the wrong cache
4467 		// object so we need to copy it and stick it into the top cache.
4468 		vm_page* sourcePage = page;
4469 
4470 		// TODO: If memory is low, it might be a good idea to steal the page
4471 		// from our source cache -- if possible, that is.
4472 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4473 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4474 
4475 		// To not needlessly kill concurrency we unlock all caches but the top
4476 		// one while copying the page. Lacking another mechanism to ensure that
4477 		// the source page doesn't disappear, we mark it busy.
4478 		sourcePage->busy = true;
4479 		context.cacheChainLocker.UnlockKeepRefs(true);
4480 
4481 		// copy the page
4482 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4483 			sourcePage->physical_page_number * B_PAGE_SIZE);
4484 
4485 		context.cacheChainLocker.RelockCaches(true);
4486 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4487 
4488 		// insert the new page into our cache
4489 		context.topCache->InsertPage(page, context.cacheOffset);
4490 		context.pageAllocated = true;
4491 	} else
4492 		DEBUG_PAGE_ACCESS_START(page);
4493 
4494 	context.page = page;
4495 	return B_OK;
4496 }
4497 
4498 
4499 /*!	Makes sure the address in the given address space is mapped.
4500 
4501 	\param addressSpace The address space.
4502 	\param originalAddress The address. Doesn't need to be page aligned.
4503 	\param isWrite If \c true the address shall be write-accessible.
4504 	\param isUser If \c true the access is requested by a userland team.
4505 	\param wirePage On success, if non \c NULL, the wired count of the page
4506 		mapped at the given address is incremented and the page is returned
4507 		via this parameter.
4508 	\return \c B_OK on success, another error code otherwise.
4509 */
4510 static status_t
4511 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4512 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4513 {
4514 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4515 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4516 		originalAddress, isWrite, isUser));
4517 
4518 	PageFaultContext context(addressSpace, isWrite);
4519 
4520 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4521 	status_t status = B_OK;
4522 
4523 	addressSpace->IncrementFaultCount();
4524 
4525 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4526 	// the pages upfront makes sure we don't have any cache locked, so that the
4527 	// page daemon/thief can do their job without problems.
4528 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4529 		originalAddress);
4530 	context.addressSpaceLocker.Unlock();
4531 	vm_page_reserve_pages(&context.reservation, reservePages,
4532 		addressSpace == VMAddressSpace::Kernel()
4533 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4534 
4535 	while (true) {
4536 		context.addressSpaceLocker.Lock();
4537 
4538 		// get the area the fault was in
4539 		VMArea* area = addressSpace->LookupArea(address);
4540 		if (area == NULL) {
4541 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4542 				"space\n", originalAddress);
4543 			TPF(PageFaultError(-1,
4544 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4545 			status = B_BAD_ADDRESS;
4546 			break;
4547 		}
4548 
4549 		// check permissions
4550 		uint32 protection = get_area_page_protection(area, address);
4551 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4552 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4553 				area->id, (void*)originalAddress);
4554 			TPF(PageFaultError(area->id,
4555 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4556 			status = B_PERMISSION_DENIED;
4557 			break;
4558 		}
4559 		if (isWrite && (protection
4560 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4561 			dprintf("write access attempted on write-protected area 0x%"
4562 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4563 			TPF(PageFaultError(area->id,
4564 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4565 			status = B_PERMISSION_DENIED;
4566 			break;
4567 		} else if (isExecute && (protection
4568 				& (B_EXECUTE_AREA
4569 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4570 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4571 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4572 			TPF(PageFaultError(area->id,
4573 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4574 			status = B_PERMISSION_DENIED;
4575 			break;
4576 		} else if (!isWrite && !isExecute && (protection
4577 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4578 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4579 				" at %p\n", area->id, (void*)originalAddress);
4580 			TPF(PageFaultError(area->id,
4581 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4582 			status = B_PERMISSION_DENIED;
4583 			break;
4584 		}
4585 
4586 		// We have the area, it was a valid access, so let's try to resolve the
4587 		// page fault now.
4588 		// At first, the top most cache from the area is investigated.
4589 
4590 		context.Prepare(vm_area_get_locked_cache(area),
4591 			address - area->Base() + area->cache_offset);
4592 
4593 		// See if this cache has a fault handler -- this will do all the work
4594 		// for us.
4595 		{
4596 			// Note, since the page fault is resolved with interrupts enabled,
4597 			// the fault handler could be called more than once for the same
4598 			// reason -- the store must take this into account.
4599 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4600 			if (status != B_BAD_HANDLER)
4601 				break;
4602 		}
4603 
4604 		// The top most cache has no fault handler, so let's see if the cache or
4605 		// its sources already have the page we're searching for (we're going
4606 		// from top to bottom).
4607 		status = fault_get_page(context);
4608 		if (status != B_OK) {
4609 			TPF(PageFaultError(area->id, status));
4610 			break;
4611 		}
4612 
4613 		if (context.restart)
4614 			continue;
4615 
4616 		// All went fine, all there is left to do is to map the page into the
4617 		// address space.
4618 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4619 			context.page));
4620 
4621 		// If the page doesn't reside in the area's cache, we need to make sure
4622 		// it's mapped in read-only, so that we cannot overwrite someone else's
4623 		// data (copy-on-write)
4624 		uint32 newProtection = protection;
4625 		if (context.page->Cache() != context.topCache && !isWrite)
4626 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4627 
4628 		bool unmapPage = false;
4629 		bool mapPage = true;
4630 
4631 		// check whether there's already a page mapped at the address
4632 		context.map->Lock();
4633 
4634 		phys_addr_t physicalAddress;
4635 		uint32 flags;
4636 		vm_page* mappedPage = NULL;
4637 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4638 			&& (flags & PAGE_PRESENT) != 0
4639 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4640 				!= NULL) {
4641 			// Yep there's already a page. If it's ours, we can simply adjust
4642 			// its protection. Otherwise we have to unmap it.
4643 			if (mappedPage == context.page) {
4644 				context.map->ProtectPage(area, address, newProtection);
4645 					// Note: We assume that ProtectPage() is atomic (i.e.
4646 					// the page isn't temporarily unmapped), otherwise we'd have
4647 					// to make sure it isn't wired.
4648 				mapPage = false;
4649 			} else
4650 				unmapPage = true;
4651 		}
4652 
4653 		context.map->Unlock();
4654 
4655 		if (unmapPage) {
4656 			// If the page is wired, we can't unmap it. Wait until it is unwired
4657 			// again and restart. Note that the page cannot be wired for
4658 			// writing, since it it isn't in the topmost cache. So we can safely
4659 			// ignore ranges wired for writing (our own and other concurrent
4660 			// wiring attempts in progress) and in fact have to do that to avoid
4661 			// a deadlock.
4662 			VMAreaUnwiredWaiter waiter;
4663 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4664 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4665 				// unlock everything and wait
4666 				if (context.pageAllocated) {
4667 					// ... but since we allocated a page and inserted it into
4668 					// the top cache, remove and free it first. Otherwise we'd
4669 					// have a page from a lower cache mapped while an upper
4670 					// cache has a page that would shadow it.
4671 					context.topCache->RemovePage(context.page);
4672 					vm_page_free_etc(context.topCache, context.page,
4673 						&context.reservation);
4674 				} else
4675 					DEBUG_PAGE_ACCESS_END(context.page);
4676 
4677 				context.UnlockAll();
4678 				waiter.waitEntry.Wait();
4679 				continue;
4680 			}
4681 
4682 			// Note: The mapped page is a page of a lower cache. We are
4683 			// guaranteed to have that cached locked, our new page is a copy of
4684 			// that page, and the page is not busy. The logic for that guarantee
4685 			// is as follows: Since the page is mapped, it must live in the top
4686 			// cache (ruled out above) or any of its lower caches, and there is
4687 			// (was before the new page was inserted) no other page in any
4688 			// cache between the top cache and the page's cache (otherwise that
4689 			// would be mapped instead). That in turn means that our algorithm
4690 			// must have found it and therefore it cannot be busy either.
4691 			DEBUG_PAGE_ACCESS_START(mappedPage);
4692 			unmap_page(area, address);
4693 			DEBUG_PAGE_ACCESS_END(mappedPage);
4694 		}
4695 
4696 		if (mapPage) {
4697 			if (map_page(area, context.page, address, newProtection,
4698 					&context.reservation) != B_OK) {
4699 				// Mapping can only fail, when the page mapping object couldn't
4700 				// be allocated. Save for the missing mapping everything is
4701 				// fine, though. If this was a regular page fault, we'll simply
4702 				// leave and probably fault again. To make sure we'll have more
4703 				// luck then, we ensure that the minimum object reserve is
4704 				// available.
4705 				DEBUG_PAGE_ACCESS_END(context.page);
4706 
4707 				context.UnlockAll();
4708 
4709 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4710 						!= B_OK) {
4711 					// Apparently the situation is serious. Let's get ourselves
4712 					// killed.
4713 					status = B_NO_MEMORY;
4714 				} else if (wirePage != NULL) {
4715 					// The caller expects us to wire the page. Since
4716 					// object_cache_reserve() succeeded, we should now be able
4717 					// to allocate a mapping structure. Restart.
4718 					continue;
4719 				}
4720 
4721 				break;
4722 			}
4723 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4724 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4725 
4726 		// also wire the page, if requested
4727 		if (wirePage != NULL && status == B_OK) {
4728 			increment_page_wired_count(context.page);
4729 			*wirePage = context.page;
4730 		}
4731 
4732 		DEBUG_PAGE_ACCESS_END(context.page);
4733 
4734 		break;
4735 	}
4736 
4737 	return status;
4738 }
4739 
4740 
4741 status_t
4742 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4743 {
4744 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4745 }
4746 
4747 status_t
4748 vm_put_physical_page(addr_t vaddr, void* handle)
4749 {
4750 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4751 }
4752 
4753 
4754 status_t
4755 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4756 	void** _handle)
4757 {
4758 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4759 }
4760 
4761 status_t
4762 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4763 {
4764 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4765 }
4766 
4767 
4768 status_t
4769 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4770 {
4771 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4772 }
4773 
4774 status_t
4775 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4776 {
4777 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4778 }
4779 
4780 
4781 void
4782 vm_get_info(system_info* info)
4783 {
4784 	swap_get_info(info);
4785 
4786 	MutexLocker locker(sAvailableMemoryLock);
4787 	info->needed_memory = sNeededMemory;
4788 	info->free_memory = sAvailableMemory;
4789 }
4790 
4791 
4792 uint32
4793 vm_num_page_faults(void)
4794 {
4795 	return sPageFaults;
4796 }
4797 
4798 
4799 off_t
4800 vm_available_memory(void)
4801 {
4802 	MutexLocker locker(sAvailableMemoryLock);
4803 	return sAvailableMemory;
4804 }
4805 
4806 
4807 off_t
4808 vm_available_not_needed_memory(void)
4809 {
4810 	MutexLocker locker(sAvailableMemoryLock);
4811 	return sAvailableMemory - sNeededMemory;
4812 }
4813 
4814 
4815 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4816 	debugger.
4817 */
4818 off_t
4819 vm_available_not_needed_memory_debug(void)
4820 {
4821 	return sAvailableMemory - sNeededMemory;
4822 }
4823 
4824 
4825 size_t
4826 vm_kernel_address_space_left(void)
4827 {
4828 	return VMAddressSpace::Kernel()->FreeSpace();
4829 }
4830 
4831 
4832 void
4833 vm_unreserve_memory(size_t amount)
4834 {
4835 	mutex_lock(&sAvailableMemoryLock);
4836 
4837 	sAvailableMemory += amount;
4838 
4839 	mutex_unlock(&sAvailableMemoryLock);
4840 }
4841 
4842 
4843 status_t
4844 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4845 {
4846 	size_t reserve = kMemoryReserveForPriority[priority];
4847 
4848 	MutexLocker locker(sAvailableMemoryLock);
4849 
4850 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4851 
4852 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4853 		sAvailableMemory -= amount;
4854 		return B_OK;
4855 	}
4856 
4857 	if (timeout <= 0)
4858 		return B_NO_MEMORY;
4859 
4860 	// turn timeout into an absolute timeout
4861 	timeout += system_time();
4862 
4863 	// loop until we've got the memory or the timeout occurs
4864 	do {
4865 		sNeededMemory += amount;
4866 
4867 		// call the low resource manager
4868 		locker.Unlock();
4869 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4870 			B_ABSOLUTE_TIMEOUT, timeout);
4871 		locker.Lock();
4872 
4873 		sNeededMemory -= amount;
4874 
4875 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4876 			sAvailableMemory -= amount;
4877 			return B_OK;
4878 		}
4879 	} while (timeout > system_time());
4880 
4881 	return B_NO_MEMORY;
4882 }
4883 
4884 
4885 status_t
4886 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4887 {
4888 	// NOTE: The caller is responsible for synchronizing calls to this function!
4889 
4890 	AddressSpaceReadLocker locker;
4891 	VMArea* area;
4892 	status_t status = locker.SetFromArea(id, area);
4893 	if (status != B_OK)
4894 		return status;
4895 
4896 	// nothing to do, if the type doesn't change
4897 	uint32 oldType = area->MemoryType();
4898 	if (type == oldType)
4899 		return B_OK;
4900 
4901 	// set the memory type of the area and the mapped pages
4902 	VMTranslationMap* map = area->address_space->TranslationMap();
4903 	map->Lock();
4904 	area->SetMemoryType(type);
4905 	map->ProtectArea(area, area->protection);
4906 	map->Unlock();
4907 
4908 	// set the physical memory type
4909 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4910 	if (error != B_OK) {
4911 		// reset the memory type of the area and the mapped pages
4912 		map->Lock();
4913 		area->SetMemoryType(oldType);
4914 		map->ProtectArea(area, area->protection);
4915 		map->Unlock();
4916 		return error;
4917 	}
4918 
4919 	return B_OK;
4920 
4921 }
4922 
4923 
4924 /*!	This function enforces some protection properties:
4925 	 - kernel areas must be W^X (after kernel startup)
4926 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4927 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4928 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4929 	   and B_KERNEL_WRITE_AREA.
4930 */
4931 static void
4932 fix_protection(uint32* protection)
4933 {
4934 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4935 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
4936 			|| (*protection & B_WRITE_AREA) != 0)
4937 		&& !gKernelStartup)
4938 		panic("kernel areas cannot be both writable and executable!");
4939 
4940 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4941 		if ((*protection & B_USER_PROTECTION) == 0
4942 			|| (*protection & B_WRITE_AREA) != 0)
4943 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4944 		else
4945 			*protection |= B_KERNEL_READ_AREA;
4946 	}
4947 }
4948 
4949 
4950 static void
4951 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4952 {
4953 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4954 	info->area = area->id;
4955 	info->address = (void*)area->Base();
4956 	info->size = area->Size();
4957 	info->protection = area->protection;
4958 	info->lock = B_FULL_LOCK;
4959 	info->team = area->address_space->ID();
4960 	info->copy_count = 0;
4961 	info->in_count = 0;
4962 	info->out_count = 0;
4963 		// TODO: retrieve real values here!
4964 
4965 	VMCache* cache = vm_area_get_locked_cache(area);
4966 
4967 	// Note, this is a simplification; the cache could be larger than this area
4968 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4969 
4970 	vm_area_put_locked_cache(cache);
4971 }
4972 
4973 
4974 static status_t
4975 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4976 {
4977 	// is newSize a multiple of B_PAGE_SIZE?
4978 	if (newSize & (B_PAGE_SIZE - 1))
4979 		return B_BAD_VALUE;
4980 
4981 	// lock all affected address spaces and the cache
4982 	VMArea* area;
4983 	VMCache* cache;
4984 
4985 	MultiAddressSpaceLocker locker;
4986 	AreaCacheLocker cacheLocker;
4987 
4988 	status_t status;
4989 	size_t oldSize;
4990 	bool anyKernelArea;
4991 	bool restart;
4992 
4993 	do {
4994 		anyKernelArea = false;
4995 		restart = false;
4996 
4997 		locker.Unset();
4998 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4999 		if (status != B_OK)
5000 			return status;
5001 		cacheLocker.SetTo(cache, true);	// already locked
5002 
5003 		// enforce restrictions
5004 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
5005 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5006 				"resize kernel area %" B_PRId32 " (%s)\n",
5007 				team_get_current_team_id(), areaID, area->name);
5008 			return B_NOT_ALLOWED;
5009 		}
5010 		// TODO: Enforce all restrictions (team, etc.)!
5011 
5012 		oldSize = area->Size();
5013 		if (newSize == oldSize)
5014 			return B_OK;
5015 
5016 		if (cache->type != CACHE_TYPE_RAM)
5017 			return B_NOT_ALLOWED;
5018 
5019 		if (oldSize < newSize) {
5020 			// We need to check if all areas of this cache can be resized.
5021 			for (VMArea* current = cache->areas; current != NULL;
5022 					current = current->cache_next) {
5023 				if (!current->address_space->CanResizeArea(current, newSize))
5024 					return B_ERROR;
5025 				anyKernelArea
5026 					|= current->address_space == VMAddressSpace::Kernel();
5027 			}
5028 		} else {
5029 			// We're shrinking the areas, so we must make sure the affected
5030 			// ranges are not wired.
5031 			for (VMArea* current = cache->areas; current != NULL;
5032 					current = current->cache_next) {
5033 				anyKernelArea
5034 					|= current->address_space == VMAddressSpace::Kernel();
5035 
5036 				if (wait_if_area_range_is_wired(current,
5037 						current->Base() + newSize, oldSize - newSize, &locker,
5038 						&cacheLocker)) {
5039 					restart = true;
5040 					break;
5041 				}
5042 			}
5043 		}
5044 	} while (restart);
5045 
5046 	// Okay, looks good so far, so let's do it
5047 
5048 	int priority = kernel && anyKernelArea
5049 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5050 	uint32 allocationFlags = kernel && anyKernelArea
5051 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5052 
5053 	if (oldSize < newSize) {
5054 		// Growing the cache can fail, so we do it first.
5055 		status = cache->Resize(cache->virtual_base + newSize, priority);
5056 		if (status != B_OK)
5057 			return status;
5058 	}
5059 
5060 	for (VMArea* current = cache->areas; current != NULL;
5061 			current = current->cache_next) {
5062 		status = current->address_space->ResizeArea(current, newSize,
5063 			allocationFlags);
5064 		if (status != B_OK)
5065 			break;
5066 
5067 		// We also need to unmap all pages beyond the new size, if the area has
5068 		// shrunk
5069 		if (newSize < oldSize) {
5070 			VMCacheChainLocker cacheChainLocker(cache);
5071 			cacheChainLocker.LockAllSourceCaches();
5072 
5073 			unmap_pages(current, current->Base() + newSize,
5074 				oldSize - newSize);
5075 
5076 			cacheChainLocker.Unlock(cache);
5077 		}
5078 	}
5079 
5080 	if (status == B_OK) {
5081 		// Shrink or grow individual page protections if in use.
5082 		if (area->page_protections != NULL) {
5083 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5084 			uint8* newProtections
5085 				= (uint8*)realloc(area->page_protections, bytes);
5086 			if (newProtections == NULL)
5087 				status = B_NO_MEMORY;
5088 			else {
5089 				area->page_protections = newProtections;
5090 
5091 				if (oldSize < newSize) {
5092 					// init the additional page protections to that of the area
5093 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5094 					uint32 areaProtection = area->protection
5095 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5096 					memset(area->page_protections + offset,
5097 						areaProtection | (areaProtection << 4), bytes - offset);
5098 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5099 						uint8& entry = area->page_protections[offset - 1];
5100 						entry = (entry & 0x0f) | (areaProtection << 4);
5101 					}
5102 				}
5103 			}
5104 		}
5105 	}
5106 
5107 	// shrinking the cache can't fail, so we do it now
5108 	if (status == B_OK && newSize < oldSize)
5109 		status = cache->Resize(cache->virtual_base + newSize, priority);
5110 
5111 	if (status != B_OK) {
5112 		// Something failed -- resize the areas back to their original size.
5113 		// This can fail, too, in which case we're seriously screwed.
5114 		for (VMArea* current = cache->areas; current != NULL;
5115 				current = current->cache_next) {
5116 			if (current->address_space->ResizeArea(current, oldSize,
5117 					allocationFlags) != B_OK) {
5118 				panic("vm_resize_area(): Failed and not being able to restore "
5119 					"original state.");
5120 			}
5121 		}
5122 
5123 		cache->Resize(cache->virtual_base + oldSize, priority);
5124 	}
5125 
5126 	// TODO: we must honour the lock restrictions of this area
5127 	return status;
5128 }
5129 
5130 
5131 status_t
5132 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5133 {
5134 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5135 }
5136 
5137 
5138 status_t
5139 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5140 {
5141 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5142 }
5143 
5144 
5145 status_t
5146 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5147 	bool user)
5148 {
5149 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5150 }
5151 
5152 
5153 void
5154 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5155 {
5156 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5157 }
5158 
5159 
5160 /*!	Copies a range of memory directly from/to a page that might not be mapped
5161 	at the moment.
5162 
5163 	For \a unsafeMemory the current mapping (if any is ignored). The function
5164 	walks through the respective area's cache chain to find the physical page
5165 	and copies from/to it directly.
5166 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5167 	must not cross a page boundary.
5168 
5169 	\param teamID The team ID identifying the address space \a unsafeMemory is
5170 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5171 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5172 		is passed, the address space of the thread returned by
5173 		debug_get_debugged_thread() is used.
5174 	\param unsafeMemory The start of the unsafe memory range to be copied
5175 		from/to.
5176 	\param buffer A safely accessible kernel buffer to be copied from/to.
5177 	\param size The number of bytes to be copied.
5178 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5179 		\a unsafeMemory, the other way around otherwise.
5180 */
5181 status_t
5182 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5183 	size_t size, bool copyToUnsafe)
5184 {
5185 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5186 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5187 		return B_BAD_VALUE;
5188 	}
5189 
5190 	// get the address space for the debugged thread
5191 	VMAddressSpace* addressSpace;
5192 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5193 		addressSpace = VMAddressSpace::Kernel();
5194 	} else if (teamID == B_CURRENT_TEAM) {
5195 		Thread* thread = debug_get_debugged_thread();
5196 		if (thread == NULL || thread->team == NULL)
5197 			return B_BAD_ADDRESS;
5198 
5199 		addressSpace = thread->team->address_space;
5200 	} else
5201 		addressSpace = VMAddressSpace::DebugGet(teamID);
5202 
5203 	if (addressSpace == NULL)
5204 		return B_BAD_ADDRESS;
5205 
5206 	// get the area
5207 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5208 	if (area == NULL)
5209 		return B_BAD_ADDRESS;
5210 
5211 	// search the page
5212 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5213 		+ area->cache_offset;
5214 	VMCache* cache = area->cache;
5215 	vm_page* page = NULL;
5216 	while (cache != NULL) {
5217 		page = cache->DebugLookupPage(cacheOffset);
5218 		if (page != NULL)
5219 			break;
5220 
5221 		// Page not found in this cache -- if it is paged out, we must not try
5222 		// to get it from lower caches.
5223 		if (cache->DebugHasPage(cacheOffset))
5224 			break;
5225 
5226 		cache = cache->source;
5227 	}
5228 
5229 	if (page == NULL)
5230 		return B_UNSUPPORTED;
5231 
5232 	// copy from/to physical memory
5233 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5234 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5235 
5236 	if (copyToUnsafe) {
5237 		if (page->Cache() != area->cache)
5238 			return B_UNSUPPORTED;
5239 
5240 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5241 	}
5242 
5243 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5244 }
5245 
5246 
5247 //	#pragma mark - kernel public API
5248 
5249 
5250 status_t
5251 user_memcpy(void* to, const void* from, size_t size)
5252 {
5253 	// don't allow address overflows
5254 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5255 		return B_BAD_ADDRESS;
5256 
5257 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5258 		return B_BAD_ADDRESS;
5259 
5260 	return B_OK;
5261 }
5262 
5263 
5264 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5265 	the string in \a to, NULL-terminating the result.
5266 
5267 	\param to Pointer to the destination C-string.
5268 	\param from Pointer to the source C-string.
5269 	\param size Size in bytes of the string buffer pointed to by \a to.
5270 
5271 	\return strlen(\a from).
5272 */
5273 ssize_t
5274 user_strlcpy(char* to, const char* from, size_t size)
5275 {
5276 	if (to == NULL && size != 0)
5277 		return B_BAD_VALUE;
5278 	if (from == NULL)
5279 		return B_BAD_ADDRESS;
5280 
5281 	// limit size to avoid address overflows
5282 	size_t maxSize = std::min((addr_t)size,
5283 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5284 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5285 		// the source address might still overflow.
5286 
5287 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5288 
5289 	// If we hit the address overflow boundary, fail.
5290 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5291 			&& maxSize < size)) {
5292 		return B_BAD_ADDRESS;
5293 	}
5294 
5295 	return result;
5296 }
5297 
5298 
5299 status_t
5300 user_memset(void* s, char c, size_t count)
5301 {
5302 	// don't allow address overflows
5303 	if ((addr_t)s + count < (addr_t)s)
5304 		return B_BAD_ADDRESS;
5305 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5306 		return B_BAD_ADDRESS;
5307 
5308 	return B_OK;
5309 }
5310 
5311 
5312 /*!	Wires a single page at the given address.
5313 
5314 	\param team The team whose address space the address belongs to. Supports
5315 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5316 		parameter is ignored.
5317 	\param address address The virtual address to wire down. Does not need to
5318 		be page aligned.
5319 	\param writable If \c true the page shall be writable.
5320 	\param info On success the info is filled in, among other things
5321 		containing the physical address the given virtual one translates to.
5322 	\return \c B_OK, when the page could be wired, another error code otherwise.
5323 */
5324 status_t
5325 vm_wire_page(team_id team, addr_t address, bool writable,
5326 	VMPageWiringInfo* info)
5327 {
5328 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5329 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5330 
5331 	// compute the page protection that is required
5332 	bool isUser = IS_USER_ADDRESS(address);
5333 	uint32 requiredProtection = PAGE_PRESENT
5334 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5335 	if (writable)
5336 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5337 
5338 	// get and read lock the address space
5339 	VMAddressSpace* addressSpace = NULL;
5340 	if (isUser) {
5341 		if (team == B_CURRENT_TEAM)
5342 			addressSpace = VMAddressSpace::GetCurrent();
5343 		else
5344 			addressSpace = VMAddressSpace::Get(team);
5345 	} else
5346 		addressSpace = VMAddressSpace::GetKernel();
5347 	if (addressSpace == NULL)
5348 		return B_ERROR;
5349 
5350 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5351 
5352 	VMTranslationMap* map = addressSpace->TranslationMap();
5353 	status_t error = B_OK;
5354 
5355 	// get the area
5356 	VMArea* area = addressSpace->LookupArea(pageAddress);
5357 	if (area == NULL) {
5358 		addressSpace->Put();
5359 		return B_BAD_ADDRESS;
5360 	}
5361 
5362 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5363 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5364 
5365 	// mark the area range wired
5366 	area->Wire(&info->range);
5367 
5368 	// Lock the area's cache chain and the translation map. Needed to look
5369 	// up the page and play with its wired count.
5370 	cacheChainLocker.LockAllSourceCaches();
5371 	map->Lock();
5372 
5373 	phys_addr_t physicalAddress;
5374 	uint32 flags;
5375 	vm_page* page;
5376 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5377 		&& (flags & requiredProtection) == requiredProtection
5378 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5379 			!= NULL) {
5380 		// Already mapped with the correct permissions -- just increment
5381 		// the page's wired count.
5382 		increment_page_wired_count(page);
5383 
5384 		map->Unlock();
5385 		cacheChainLocker.Unlock();
5386 		addressSpaceLocker.Unlock();
5387 	} else {
5388 		// Let vm_soft_fault() map the page for us, if possible. We need
5389 		// to fully unlock to avoid deadlocks. Since we have already
5390 		// wired the area itself, nothing disturbing will happen with it
5391 		// in the meantime.
5392 		map->Unlock();
5393 		cacheChainLocker.Unlock();
5394 		addressSpaceLocker.Unlock();
5395 
5396 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5397 			isUser, &page);
5398 
5399 		if (error != B_OK) {
5400 			// The page could not be mapped -- clean up.
5401 			VMCache* cache = vm_area_get_locked_cache(area);
5402 			area->Unwire(&info->range);
5403 			cache->ReleaseRefAndUnlock();
5404 			addressSpace->Put();
5405 			return error;
5406 		}
5407 	}
5408 
5409 	info->physicalAddress
5410 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5411 			+ address % B_PAGE_SIZE;
5412 	info->page = page;
5413 
5414 	return B_OK;
5415 }
5416 
5417 
5418 /*!	Unwires a single page previously wired via vm_wire_page().
5419 
5420 	\param info The same object passed to vm_wire_page() before.
5421 */
5422 void
5423 vm_unwire_page(VMPageWiringInfo* info)
5424 {
5425 	// lock the address space
5426 	VMArea* area = info->range.area;
5427 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5428 		// takes over our reference
5429 
5430 	// lock the top cache
5431 	VMCache* cache = vm_area_get_locked_cache(area);
5432 	VMCacheChainLocker cacheChainLocker(cache);
5433 
5434 	if (info->page->Cache() != cache) {
5435 		// The page is not in the top cache, so we lock the whole cache chain
5436 		// before touching the page's wired count.
5437 		cacheChainLocker.LockAllSourceCaches();
5438 	}
5439 
5440 	decrement_page_wired_count(info->page);
5441 
5442 	// remove the wired range from the range
5443 	area->Unwire(&info->range);
5444 
5445 	cacheChainLocker.Unlock();
5446 }
5447 
5448 
5449 /*!	Wires down the given address range in the specified team's address space.
5450 
5451 	If successful the function
5452 	- acquires a reference to the specified team's address space,
5453 	- adds respective wired ranges to all areas that intersect with the given
5454 	  address range,
5455 	- makes sure all pages in the given address range are mapped with the
5456 	  requested access permissions and increments their wired count.
5457 
5458 	It fails, when \a team doesn't specify a valid address space, when any part
5459 	of the specified address range is not covered by areas, when the concerned
5460 	areas don't allow mapping with the requested permissions, or when mapping
5461 	failed for another reason.
5462 
5463 	When successful the call must be balanced by a unlock_memory_etc() call with
5464 	the exact same parameters.
5465 
5466 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5467 		supported.
5468 	\param address The start of the address range to be wired.
5469 	\param numBytes The size of the address range to be wired.
5470 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5471 		requests that the range must be wired writable ("read from device
5472 		into memory").
5473 	\return \c B_OK on success, another error code otherwise.
5474 */
5475 status_t
5476 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5477 {
5478 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5479 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5480 
5481 	// compute the page protection that is required
5482 	bool isUser = IS_USER_ADDRESS(address);
5483 	bool writable = (flags & B_READ_DEVICE) == 0;
5484 	uint32 requiredProtection = PAGE_PRESENT
5485 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5486 	if (writable)
5487 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5488 
5489 	uint32 mallocFlags = isUser
5490 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5491 
5492 	// get and read lock the address space
5493 	VMAddressSpace* addressSpace = NULL;
5494 	if (isUser) {
5495 		if (team == B_CURRENT_TEAM)
5496 			addressSpace = VMAddressSpace::GetCurrent();
5497 		else
5498 			addressSpace = VMAddressSpace::Get(team);
5499 	} else
5500 		addressSpace = VMAddressSpace::GetKernel();
5501 	if (addressSpace == NULL)
5502 		return B_ERROR;
5503 
5504 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5505 		// We get a new address space reference here. The one we got above will
5506 		// be freed by unlock_memory_etc().
5507 
5508 	VMTranslationMap* map = addressSpace->TranslationMap();
5509 	status_t error = B_OK;
5510 
5511 	// iterate through all concerned areas
5512 	addr_t nextAddress = lockBaseAddress;
5513 	while (nextAddress != lockEndAddress) {
5514 		// get the next area
5515 		VMArea* area = addressSpace->LookupArea(nextAddress);
5516 		if (area == NULL) {
5517 			error = B_BAD_ADDRESS;
5518 			break;
5519 		}
5520 
5521 		addr_t areaStart = nextAddress;
5522 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5523 
5524 		// allocate the wired range (do that before locking the cache to avoid
5525 		// deadlocks)
5526 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5527 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5528 		if (range == NULL) {
5529 			error = B_NO_MEMORY;
5530 			break;
5531 		}
5532 
5533 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5534 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5535 
5536 		// mark the area range wired
5537 		area->Wire(range);
5538 
5539 		// Depending on the area cache type and the wiring, we may not need to
5540 		// look at the individual pages.
5541 		if (area->cache_type == CACHE_TYPE_NULL
5542 			|| area->cache_type == CACHE_TYPE_DEVICE
5543 			|| area->wiring == B_FULL_LOCK
5544 			|| area->wiring == B_CONTIGUOUS) {
5545 			nextAddress = areaEnd;
5546 			continue;
5547 		}
5548 
5549 		// Lock the area's cache chain and the translation map. Needed to look
5550 		// up pages and play with their wired count.
5551 		cacheChainLocker.LockAllSourceCaches();
5552 		map->Lock();
5553 
5554 		// iterate through the pages and wire them
5555 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5556 			phys_addr_t physicalAddress;
5557 			uint32 flags;
5558 
5559 			vm_page* page;
5560 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5561 				&& (flags & requiredProtection) == requiredProtection
5562 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5563 					!= NULL) {
5564 				// Already mapped with the correct permissions -- just increment
5565 				// the page's wired count.
5566 				increment_page_wired_count(page);
5567 			} else {
5568 				// Let vm_soft_fault() map the page for us, if possible. We need
5569 				// to fully unlock to avoid deadlocks. Since we have already
5570 				// wired the area itself, nothing disturbing will happen with it
5571 				// in the meantime.
5572 				map->Unlock();
5573 				cacheChainLocker.Unlock();
5574 				addressSpaceLocker.Unlock();
5575 
5576 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5577 					false, isUser, &page);
5578 
5579 				addressSpaceLocker.Lock();
5580 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5581 				cacheChainLocker.LockAllSourceCaches();
5582 				map->Lock();
5583 			}
5584 
5585 			if (error != B_OK)
5586 				break;
5587 		}
5588 
5589 		map->Unlock();
5590 
5591 		if (error == B_OK) {
5592 			cacheChainLocker.Unlock();
5593 		} else {
5594 			// An error occurred, so abort right here. If the current address
5595 			// is the first in this area, unwire the area, since we won't get
5596 			// to it when reverting what we've done so far.
5597 			if (nextAddress == areaStart) {
5598 				area->Unwire(range);
5599 				cacheChainLocker.Unlock();
5600 				range->~VMAreaWiredRange();
5601 				free_etc(range, mallocFlags);
5602 			} else
5603 				cacheChainLocker.Unlock();
5604 
5605 			break;
5606 		}
5607 	}
5608 
5609 	if (error != B_OK) {
5610 		// An error occurred, so unwire all that we've already wired. Note that
5611 		// even if not a single page was wired, unlock_memory_etc() is called
5612 		// to put the address space reference.
5613 		addressSpaceLocker.Unlock();
5614 		unlock_memory_etc(team, (void*)lockBaseAddress,
5615 			nextAddress - lockBaseAddress, flags);
5616 	}
5617 
5618 	return error;
5619 }
5620 
5621 
5622 status_t
5623 lock_memory(void* address, size_t numBytes, uint32 flags)
5624 {
5625 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5626 }
5627 
5628 
5629 /*!	Unwires an address range previously wired with lock_memory_etc().
5630 
5631 	Note that a call to this function must balance a previous lock_memory_etc()
5632 	call with exactly the same parameters.
5633 */
5634 status_t
5635 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5636 {
5637 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5638 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5639 
5640 	// compute the page protection that is required
5641 	bool isUser = IS_USER_ADDRESS(address);
5642 	bool writable = (flags & B_READ_DEVICE) == 0;
5643 	uint32 requiredProtection = PAGE_PRESENT
5644 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5645 	if (writable)
5646 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5647 
5648 	uint32 mallocFlags = isUser
5649 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5650 
5651 	// get and read lock the address space
5652 	VMAddressSpace* addressSpace = NULL;
5653 	if (isUser) {
5654 		if (team == B_CURRENT_TEAM)
5655 			addressSpace = VMAddressSpace::GetCurrent();
5656 		else
5657 			addressSpace = VMAddressSpace::Get(team);
5658 	} else
5659 		addressSpace = VMAddressSpace::GetKernel();
5660 	if (addressSpace == NULL)
5661 		return B_ERROR;
5662 
5663 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5664 		// Take over the address space reference. We don't unlock until we're
5665 		// done.
5666 
5667 	VMTranslationMap* map = addressSpace->TranslationMap();
5668 	status_t error = B_OK;
5669 
5670 	// iterate through all concerned areas
5671 	addr_t nextAddress = lockBaseAddress;
5672 	while (nextAddress != lockEndAddress) {
5673 		// get the next area
5674 		VMArea* area = addressSpace->LookupArea(nextAddress);
5675 		if (area == NULL) {
5676 			error = B_BAD_ADDRESS;
5677 			break;
5678 		}
5679 
5680 		addr_t areaStart = nextAddress;
5681 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5682 
5683 		// Lock the area's top cache. This is a requirement for
5684 		// VMArea::Unwire().
5685 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5686 
5687 		// Depending on the area cache type and the wiring, we may not need to
5688 		// look at the individual pages.
5689 		if (area->cache_type == CACHE_TYPE_NULL
5690 			|| area->cache_type == CACHE_TYPE_DEVICE
5691 			|| area->wiring == B_FULL_LOCK
5692 			|| area->wiring == B_CONTIGUOUS) {
5693 			// unwire the range (to avoid deadlocks we delete the range after
5694 			// unlocking the cache)
5695 			nextAddress = areaEnd;
5696 			VMAreaWiredRange* range = area->Unwire(areaStart,
5697 				areaEnd - areaStart, writable);
5698 			cacheChainLocker.Unlock();
5699 			if (range != NULL) {
5700 				range->~VMAreaWiredRange();
5701 				free_etc(range, mallocFlags);
5702 			}
5703 			continue;
5704 		}
5705 
5706 		// Lock the area's cache chain and the translation map. Needed to look
5707 		// up pages and play with their wired count.
5708 		cacheChainLocker.LockAllSourceCaches();
5709 		map->Lock();
5710 
5711 		// iterate through the pages and unwire them
5712 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5713 			phys_addr_t physicalAddress;
5714 			uint32 flags;
5715 
5716 			vm_page* page;
5717 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5718 				&& (flags & PAGE_PRESENT) != 0
5719 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5720 					!= NULL) {
5721 				// Already mapped with the correct permissions -- just increment
5722 				// the page's wired count.
5723 				decrement_page_wired_count(page);
5724 			} else {
5725 				panic("unlock_memory_etc(): Failed to unwire page: address "
5726 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5727 					nextAddress);
5728 				error = B_BAD_VALUE;
5729 				break;
5730 			}
5731 		}
5732 
5733 		map->Unlock();
5734 
5735 		// All pages are unwired. Remove the area's wired range as well (to
5736 		// avoid deadlocks we delete the range after unlocking the cache).
5737 		VMAreaWiredRange* range = area->Unwire(areaStart,
5738 			areaEnd - areaStart, writable);
5739 
5740 		cacheChainLocker.Unlock();
5741 
5742 		if (range != NULL) {
5743 			range->~VMAreaWiredRange();
5744 			free_etc(range, mallocFlags);
5745 		}
5746 
5747 		if (error != B_OK)
5748 			break;
5749 	}
5750 
5751 	// get rid of the address space reference lock_memory_etc() acquired
5752 	addressSpace->Put();
5753 
5754 	return error;
5755 }
5756 
5757 
5758 status_t
5759 unlock_memory(void* address, size_t numBytes, uint32 flags)
5760 {
5761 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5762 }
5763 
5764 
5765 /*!	Similar to get_memory_map(), but also allows to specify the address space
5766 	for the memory in question and has a saner semantics.
5767 	Returns \c B_OK when the complete range could be translated or
5768 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5769 	case the actual number of entries is written to \c *_numEntries. Any other
5770 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5771 	in this case.
5772 */
5773 status_t
5774 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5775 	physical_entry* table, uint32* _numEntries)
5776 {
5777 	uint32 numEntries = *_numEntries;
5778 	*_numEntries = 0;
5779 
5780 	VMAddressSpace* addressSpace;
5781 	addr_t virtualAddress = (addr_t)address;
5782 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5783 	phys_addr_t physicalAddress;
5784 	status_t status = B_OK;
5785 	int32 index = -1;
5786 	addr_t offset = 0;
5787 	bool interrupts = are_interrupts_enabled();
5788 
5789 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5790 		"entries)\n", team, address, numBytes, numEntries));
5791 
5792 	if (numEntries == 0 || numBytes == 0)
5793 		return B_BAD_VALUE;
5794 
5795 	// in which address space is the address to be found?
5796 	if (IS_USER_ADDRESS(virtualAddress)) {
5797 		if (team == B_CURRENT_TEAM)
5798 			addressSpace = VMAddressSpace::GetCurrent();
5799 		else
5800 			addressSpace = VMAddressSpace::Get(team);
5801 	} else
5802 		addressSpace = VMAddressSpace::GetKernel();
5803 
5804 	if (addressSpace == NULL)
5805 		return B_ERROR;
5806 
5807 	VMTranslationMap* map = addressSpace->TranslationMap();
5808 
5809 	if (interrupts)
5810 		map->Lock();
5811 
5812 	while (offset < numBytes) {
5813 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5814 		uint32 flags;
5815 
5816 		if (interrupts) {
5817 			status = map->Query((addr_t)address + offset, &physicalAddress,
5818 				&flags);
5819 		} else {
5820 			status = map->QueryInterrupt((addr_t)address + offset,
5821 				&physicalAddress, &flags);
5822 		}
5823 		if (status < B_OK)
5824 			break;
5825 		if ((flags & PAGE_PRESENT) == 0) {
5826 			panic("get_memory_map() called on unmapped memory!");
5827 			return B_BAD_ADDRESS;
5828 		}
5829 
5830 		if (index < 0 && pageOffset > 0) {
5831 			physicalAddress += pageOffset;
5832 			if (bytes > B_PAGE_SIZE - pageOffset)
5833 				bytes = B_PAGE_SIZE - pageOffset;
5834 		}
5835 
5836 		// need to switch to the next physical_entry?
5837 		if (index < 0 || table[index].address
5838 				!= physicalAddress - table[index].size) {
5839 			if ((uint32)++index + 1 > numEntries) {
5840 				// table to small
5841 				break;
5842 			}
5843 			table[index].address = physicalAddress;
5844 			table[index].size = bytes;
5845 		} else {
5846 			// page does fit in current entry
5847 			table[index].size += bytes;
5848 		}
5849 
5850 		offset += bytes;
5851 	}
5852 
5853 	if (interrupts)
5854 		map->Unlock();
5855 
5856 	if (status != B_OK)
5857 		return status;
5858 
5859 	if ((uint32)index + 1 > numEntries) {
5860 		*_numEntries = index;
5861 		return B_BUFFER_OVERFLOW;
5862 	}
5863 
5864 	*_numEntries = index + 1;
5865 	return B_OK;
5866 }
5867 
5868 
5869 /*!	According to the BeBook, this function should always succeed.
5870 	This is no longer the case.
5871 */
5872 extern "C" int32
5873 __get_memory_map_haiku(const void* address, size_t numBytes,
5874 	physical_entry* table, int32 numEntries)
5875 {
5876 	uint32 entriesRead = numEntries;
5877 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5878 		table, &entriesRead);
5879 	if (error != B_OK)
5880 		return error;
5881 
5882 	// close the entry list
5883 
5884 	// if it's only one entry, we will silently accept the missing ending
5885 	if (numEntries == 1)
5886 		return B_OK;
5887 
5888 	if (entriesRead + 1 > (uint32)numEntries)
5889 		return B_BUFFER_OVERFLOW;
5890 
5891 	table[entriesRead].address = 0;
5892 	table[entriesRead].size = 0;
5893 
5894 	return B_OK;
5895 }
5896 
5897 
5898 area_id
5899 area_for(void* address)
5900 {
5901 	return vm_area_for((addr_t)address, true);
5902 }
5903 
5904 
5905 area_id
5906 find_area(const char* name)
5907 {
5908 	return VMAreaHash::Find(name);
5909 }
5910 
5911 
5912 status_t
5913 _get_area_info(area_id id, area_info* info, size_t size)
5914 {
5915 	if (size != sizeof(area_info) || info == NULL)
5916 		return B_BAD_VALUE;
5917 
5918 	AddressSpaceReadLocker locker;
5919 	VMArea* area;
5920 	status_t status = locker.SetFromArea(id, area);
5921 	if (status != B_OK)
5922 		return status;
5923 
5924 	fill_area_info(area, info, size);
5925 	return B_OK;
5926 }
5927 
5928 
5929 status_t
5930 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5931 {
5932 	addr_t nextBase = *(addr_t*)cookie;
5933 
5934 	// we're already through the list
5935 	if (nextBase == (addr_t)-1)
5936 		return B_ENTRY_NOT_FOUND;
5937 
5938 	if (team == B_CURRENT_TEAM)
5939 		team = team_get_current_team_id();
5940 
5941 	AddressSpaceReadLocker locker(team);
5942 	if (!locker.IsLocked())
5943 		return B_BAD_TEAM_ID;
5944 
5945 	VMArea* area;
5946 	for (VMAddressSpace::AreaIterator it
5947 				= locker.AddressSpace()->GetAreaIterator();
5948 			(area = it.Next()) != NULL;) {
5949 		if (area->Base() > nextBase)
5950 			break;
5951 	}
5952 
5953 	if (area == NULL) {
5954 		nextBase = (addr_t)-1;
5955 		return B_ENTRY_NOT_FOUND;
5956 	}
5957 
5958 	fill_area_info(area, info, size);
5959 	*cookie = (ssize_t)(area->Base());
5960 
5961 	return B_OK;
5962 }
5963 
5964 
5965 status_t
5966 set_area_protection(area_id area, uint32 newProtection)
5967 {
5968 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5969 		newProtection, true);
5970 }
5971 
5972 
5973 status_t
5974 resize_area(area_id areaID, size_t newSize)
5975 {
5976 	return vm_resize_area(areaID, newSize, true);
5977 }
5978 
5979 
5980 /*!	Transfers the specified area to a new team. The caller must be the owner
5981 	of the area.
5982 */
5983 area_id
5984 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5985 	bool kernel)
5986 {
5987 	area_info info;
5988 	status_t status = get_area_info(id, &info);
5989 	if (status != B_OK)
5990 		return status;
5991 
5992 	if (info.team != thread_get_current_thread()->team->id)
5993 		return B_PERMISSION_DENIED;
5994 
5995 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5996 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5997 	if (clonedArea < 0)
5998 		return clonedArea;
5999 
6000 	status = vm_delete_area(info.team, id, kernel);
6001 	if (status != B_OK) {
6002 		vm_delete_area(target, clonedArea, kernel);
6003 		return status;
6004 	}
6005 
6006 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6007 
6008 	return clonedArea;
6009 }
6010 
6011 
6012 extern "C" area_id
6013 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6014 	size_t numBytes, uint32 addressSpec, uint32 protection,
6015 	void** _virtualAddress)
6016 {
6017 	if (!arch_vm_supports_protection(protection))
6018 		return B_NOT_SUPPORTED;
6019 
6020 	fix_protection(&protection);
6021 
6022 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6023 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6024 		false);
6025 }
6026 
6027 
6028 area_id
6029 clone_area(const char* name, void** _address, uint32 addressSpec,
6030 	uint32 protection, area_id source)
6031 {
6032 	if ((protection & B_KERNEL_PROTECTION) == 0)
6033 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6034 
6035 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6036 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6037 }
6038 
6039 
6040 area_id
6041 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6042 	uint32 protection, uint32 flags, uint32 guardSize,
6043 	const virtual_address_restrictions* virtualAddressRestrictions,
6044 	const physical_address_restrictions* physicalAddressRestrictions,
6045 	void** _address)
6046 {
6047 	fix_protection(&protection);
6048 
6049 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6050 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6051 		true, _address);
6052 }
6053 
6054 
6055 extern "C" area_id
6056 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6057 	size_t size, uint32 lock, uint32 protection)
6058 {
6059 	fix_protection(&protection);
6060 
6061 	virtual_address_restrictions virtualRestrictions = {};
6062 	virtualRestrictions.address = *_address;
6063 	virtualRestrictions.address_specification = addressSpec;
6064 	physical_address_restrictions physicalRestrictions = {};
6065 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6066 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6067 		true, _address);
6068 }
6069 
6070 
6071 status_t
6072 delete_area(area_id area)
6073 {
6074 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6075 }
6076 
6077 
6078 //	#pragma mark - Userland syscalls
6079 
6080 
6081 status_t
6082 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6083 	addr_t size)
6084 {
6085 	// filter out some unavailable values (for userland)
6086 	switch (addressSpec) {
6087 		case B_ANY_KERNEL_ADDRESS:
6088 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6089 			return B_BAD_VALUE;
6090 	}
6091 
6092 	addr_t address;
6093 
6094 	if (!IS_USER_ADDRESS(userAddress)
6095 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6096 		return B_BAD_ADDRESS;
6097 
6098 	status_t status = vm_reserve_address_range(
6099 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6100 		RESERVED_AVOID_BASE);
6101 	if (status != B_OK)
6102 		return status;
6103 
6104 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6105 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6106 			(void*)address, size);
6107 		return B_BAD_ADDRESS;
6108 	}
6109 
6110 	return B_OK;
6111 }
6112 
6113 
6114 status_t
6115 _user_unreserve_address_range(addr_t address, addr_t size)
6116 {
6117 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6118 		(void*)address, size);
6119 }
6120 
6121 
6122 area_id
6123 _user_area_for(void* address)
6124 {
6125 	return vm_area_for((addr_t)address, false);
6126 }
6127 
6128 
6129 area_id
6130 _user_find_area(const char* userName)
6131 {
6132 	char name[B_OS_NAME_LENGTH];
6133 
6134 	if (!IS_USER_ADDRESS(userName)
6135 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6136 		return B_BAD_ADDRESS;
6137 
6138 	return find_area(name);
6139 }
6140 
6141 
6142 status_t
6143 _user_get_area_info(area_id area, area_info* userInfo)
6144 {
6145 	if (!IS_USER_ADDRESS(userInfo))
6146 		return B_BAD_ADDRESS;
6147 
6148 	area_info info;
6149 	status_t status = get_area_info(area, &info);
6150 	if (status < B_OK)
6151 		return status;
6152 
6153 	// TODO: do we want to prevent userland from seeing kernel protections?
6154 	//info.protection &= B_USER_PROTECTION;
6155 
6156 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6157 		return B_BAD_ADDRESS;
6158 
6159 	return status;
6160 }
6161 
6162 
6163 status_t
6164 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6165 {
6166 	ssize_t cookie;
6167 
6168 	if (!IS_USER_ADDRESS(userCookie)
6169 		|| !IS_USER_ADDRESS(userInfo)
6170 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6171 		return B_BAD_ADDRESS;
6172 
6173 	area_info info;
6174 	status_t status = _get_next_area_info(team, &cookie, &info,
6175 		sizeof(area_info));
6176 	if (status != B_OK)
6177 		return status;
6178 
6179 	//info.protection &= B_USER_PROTECTION;
6180 
6181 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6182 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6183 		return B_BAD_ADDRESS;
6184 
6185 	return status;
6186 }
6187 
6188 
6189 status_t
6190 _user_set_area_protection(area_id area, uint32 newProtection)
6191 {
6192 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6193 		return B_BAD_VALUE;
6194 
6195 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6196 		newProtection, false);
6197 }
6198 
6199 
6200 status_t
6201 _user_resize_area(area_id area, size_t newSize)
6202 {
6203 	// TODO: Since we restrict deleting of areas to those owned by the team,
6204 	// we should also do that for resizing (check other functions, too).
6205 	return vm_resize_area(area, newSize, false);
6206 }
6207 
6208 
6209 area_id
6210 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6211 	team_id target)
6212 {
6213 	// filter out some unavailable values (for userland)
6214 	switch (addressSpec) {
6215 		case B_ANY_KERNEL_ADDRESS:
6216 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6217 			return B_BAD_VALUE;
6218 	}
6219 
6220 	void* address;
6221 	if (!IS_USER_ADDRESS(userAddress)
6222 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6223 		return B_BAD_ADDRESS;
6224 
6225 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6226 	if (newArea < B_OK)
6227 		return newArea;
6228 
6229 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6230 		return B_BAD_ADDRESS;
6231 
6232 	return newArea;
6233 }
6234 
6235 
6236 area_id
6237 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6238 	uint32 protection, area_id sourceArea)
6239 {
6240 	char name[B_OS_NAME_LENGTH];
6241 	void* address;
6242 
6243 	// filter out some unavailable values (for userland)
6244 	switch (addressSpec) {
6245 		case B_ANY_KERNEL_ADDRESS:
6246 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6247 			return B_BAD_VALUE;
6248 	}
6249 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6250 		return B_BAD_VALUE;
6251 
6252 	if (!IS_USER_ADDRESS(userName)
6253 		|| !IS_USER_ADDRESS(userAddress)
6254 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6255 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6256 		return B_BAD_ADDRESS;
6257 
6258 	fix_protection(&protection);
6259 
6260 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6261 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6262 		false);
6263 	if (clonedArea < B_OK)
6264 		return clonedArea;
6265 
6266 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6267 		delete_area(clonedArea);
6268 		return B_BAD_ADDRESS;
6269 	}
6270 
6271 	return clonedArea;
6272 }
6273 
6274 
6275 area_id
6276 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6277 	size_t size, uint32 lock, uint32 protection)
6278 {
6279 	char name[B_OS_NAME_LENGTH];
6280 	void* address;
6281 
6282 	// filter out some unavailable values (for userland)
6283 	switch (addressSpec) {
6284 		case B_ANY_KERNEL_ADDRESS:
6285 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6286 			return B_BAD_VALUE;
6287 	}
6288 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6289 		return B_BAD_VALUE;
6290 
6291 	if (!IS_USER_ADDRESS(userName)
6292 		|| !IS_USER_ADDRESS(userAddress)
6293 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6294 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6295 		return B_BAD_ADDRESS;
6296 
6297 	if (addressSpec == B_EXACT_ADDRESS
6298 		&& IS_KERNEL_ADDRESS(address))
6299 		return B_BAD_VALUE;
6300 
6301 	if (addressSpec == B_ANY_ADDRESS)
6302 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6303 	if (addressSpec == B_BASE_ADDRESS)
6304 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6305 
6306 	fix_protection(&protection);
6307 
6308 	virtual_address_restrictions virtualRestrictions = {};
6309 	virtualRestrictions.address = address;
6310 	virtualRestrictions.address_specification = addressSpec;
6311 	physical_address_restrictions physicalRestrictions = {};
6312 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6313 		size, lock, protection, 0, 0, &virtualRestrictions,
6314 		&physicalRestrictions, false, &address);
6315 
6316 	if (area >= B_OK
6317 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6318 		delete_area(area);
6319 		return B_BAD_ADDRESS;
6320 	}
6321 
6322 	return area;
6323 }
6324 
6325 
6326 status_t
6327 _user_delete_area(area_id area)
6328 {
6329 	// Unlike the BeOS implementation, you can now only delete areas
6330 	// that you have created yourself from userland.
6331 	// The documentation to delete_area() explicitly states that this
6332 	// will be restricted in the future, and so it will.
6333 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6334 }
6335 
6336 
6337 // TODO: create a BeOS style call for this!
6338 
6339 area_id
6340 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6341 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6342 	int fd, off_t offset)
6343 {
6344 	char name[B_OS_NAME_LENGTH];
6345 	void* address;
6346 	area_id area;
6347 
6348 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6349 		return B_BAD_VALUE;
6350 
6351 	fix_protection(&protection);
6352 
6353 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6354 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6355 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6356 		return B_BAD_ADDRESS;
6357 
6358 	if (addressSpec == B_EXACT_ADDRESS) {
6359 		if ((addr_t)address + size < (addr_t)address
6360 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6361 			return B_BAD_VALUE;
6362 		}
6363 		if (!IS_USER_ADDRESS(address)
6364 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6365 			return B_BAD_ADDRESS;
6366 		}
6367 	}
6368 
6369 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6370 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6371 		false);
6372 	if (area < B_OK)
6373 		return area;
6374 
6375 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6376 		return B_BAD_ADDRESS;
6377 
6378 	return area;
6379 }
6380 
6381 
6382 status_t
6383 _user_unmap_memory(void* _address, size_t size)
6384 {
6385 	addr_t address = (addr_t)_address;
6386 
6387 	// check params
6388 	if (size == 0 || (addr_t)address + size < (addr_t)address
6389 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6390 		return B_BAD_VALUE;
6391 	}
6392 
6393 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6394 		return B_BAD_ADDRESS;
6395 
6396 	// Write lock the address space and ensure the address range is not wired.
6397 	AddressSpaceWriteLocker locker;
6398 	do {
6399 		status_t status = locker.SetTo(team_get_current_team_id());
6400 		if (status != B_OK)
6401 			return status;
6402 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6403 			size, &locker));
6404 
6405 	// unmap
6406 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6407 }
6408 
6409 
6410 status_t
6411 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6412 {
6413 	// check address range
6414 	addr_t address = (addr_t)_address;
6415 	size = PAGE_ALIGN(size);
6416 
6417 	if ((address % B_PAGE_SIZE) != 0)
6418 		return B_BAD_VALUE;
6419 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6420 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6421 		// weird error code required by POSIX
6422 		return ENOMEM;
6423 	}
6424 
6425 	// extend and check protection
6426 	if ((protection & ~B_USER_PROTECTION) != 0)
6427 		return B_BAD_VALUE;
6428 
6429 	fix_protection(&protection);
6430 
6431 	// We need to write lock the address space, since we're going to play with
6432 	// the areas. Also make sure that none of the areas is wired and that we're
6433 	// actually allowed to change the protection.
6434 	AddressSpaceWriteLocker locker;
6435 
6436 	bool restart;
6437 	do {
6438 		restart = false;
6439 
6440 		status_t status = locker.SetTo(team_get_current_team_id());
6441 		if (status != B_OK)
6442 			return status;
6443 
6444 		// First round: Check whether the whole range is covered by areas and we
6445 		// are allowed to modify them.
6446 		addr_t currentAddress = address;
6447 		size_t sizeLeft = size;
6448 		while (sizeLeft > 0) {
6449 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6450 			if (area == NULL)
6451 				return B_NO_MEMORY;
6452 
6453 			if (area->address_space == VMAddressSpace::Kernel())
6454 				return B_NOT_ALLOWED;
6455 
6456 			// TODO: For (shared) mapped files we should check whether the new
6457 			// protections are compatible with the file permissions. We don't
6458 			// have a way to do that yet, though.
6459 
6460 			addr_t offset = currentAddress - area->Base();
6461 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6462 
6463 			AreaCacheLocker cacheLocker(area);
6464 
6465 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6466 					&locker, &cacheLocker)) {
6467 				restart = true;
6468 				break;
6469 			}
6470 
6471 			cacheLocker.Unlock();
6472 
6473 			currentAddress += rangeSize;
6474 			sizeLeft -= rangeSize;
6475 		}
6476 	} while (restart);
6477 
6478 	// Second round: If the protections differ from that of the area, create a
6479 	// page protection array and re-map mapped pages.
6480 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6481 	addr_t currentAddress = address;
6482 	size_t sizeLeft = size;
6483 	while (sizeLeft > 0) {
6484 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6485 		if (area == NULL)
6486 			return B_NO_MEMORY;
6487 
6488 		addr_t offset = currentAddress - area->Base();
6489 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6490 
6491 		currentAddress += rangeSize;
6492 		sizeLeft -= rangeSize;
6493 
6494 		if (area->page_protections == NULL) {
6495 			if (area->protection == protection)
6496 				continue;
6497 
6498 			status_t status = allocate_area_page_protections(area);
6499 			if (status != B_OK)
6500 				return status;
6501 		}
6502 
6503 		// We need to lock the complete cache chain, since we potentially unmap
6504 		// pages of lower caches.
6505 		VMCache* topCache = vm_area_get_locked_cache(area);
6506 		VMCacheChainLocker cacheChainLocker(topCache);
6507 		cacheChainLocker.LockAllSourceCaches();
6508 
6509 		for (addr_t pageAddress = area->Base() + offset;
6510 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6511 			map->Lock();
6512 
6513 			set_area_page_protection(area, pageAddress, protection);
6514 
6515 			phys_addr_t physicalAddress;
6516 			uint32 flags;
6517 
6518 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6519 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6520 				map->Unlock();
6521 				continue;
6522 			}
6523 
6524 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6525 			if (page == NULL) {
6526 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6527 					"\n", area, physicalAddress);
6528 				map->Unlock();
6529 				return B_ERROR;
6530 			}
6531 
6532 			// If the page is not in the topmost cache and write access is
6533 			// requested, we have to unmap it. Otherwise we can re-map it with
6534 			// the new protection.
6535 			bool unmapPage = page->Cache() != topCache
6536 				&& (protection & B_WRITE_AREA) != 0;
6537 
6538 			if (!unmapPage)
6539 				map->ProtectPage(area, pageAddress, protection);
6540 
6541 			map->Unlock();
6542 
6543 			if (unmapPage) {
6544 				DEBUG_PAGE_ACCESS_START(page);
6545 				unmap_page(area, pageAddress);
6546 				DEBUG_PAGE_ACCESS_END(page);
6547 			}
6548 		}
6549 	}
6550 
6551 	return B_OK;
6552 }
6553 
6554 
6555 status_t
6556 _user_sync_memory(void* _address, size_t size, uint32 flags)
6557 {
6558 	addr_t address = (addr_t)_address;
6559 	size = PAGE_ALIGN(size);
6560 
6561 	// check params
6562 	if ((address % B_PAGE_SIZE) != 0)
6563 		return B_BAD_VALUE;
6564 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6565 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6566 		// weird error code required by POSIX
6567 		return ENOMEM;
6568 	}
6569 
6570 	bool writeSync = (flags & MS_SYNC) != 0;
6571 	bool writeAsync = (flags & MS_ASYNC) != 0;
6572 	if (writeSync && writeAsync)
6573 		return B_BAD_VALUE;
6574 
6575 	if (size == 0 || (!writeSync && !writeAsync))
6576 		return B_OK;
6577 
6578 	// iterate through the range and sync all concerned areas
6579 	while (size > 0) {
6580 		// read lock the address space
6581 		AddressSpaceReadLocker locker;
6582 		status_t error = locker.SetTo(team_get_current_team_id());
6583 		if (error != B_OK)
6584 			return error;
6585 
6586 		// get the first area
6587 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6588 		if (area == NULL)
6589 			return B_NO_MEMORY;
6590 
6591 		uint32 offset = address - area->Base();
6592 		size_t rangeSize = min_c(area->Size() - offset, size);
6593 		offset += area->cache_offset;
6594 
6595 		// lock the cache
6596 		AreaCacheLocker cacheLocker(area);
6597 		if (!cacheLocker)
6598 			return B_BAD_VALUE;
6599 		VMCache* cache = area->cache;
6600 
6601 		locker.Unlock();
6602 
6603 		uint32 firstPage = offset >> PAGE_SHIFT;
6604 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6605 
6606 		// write the pages
6607 		if (cache->type == CACHE_TYPE_VNODE) {
6608 			if (writeSync) {
6609 				// synchronous
6610 				error = vm_page_write_modified_page_range(cache, firstPage,
6611 					endPage);
6612 				if (error != B_OK)
6613 					return error;
6614 			} else {
6615 				// asynchronous
6616 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6617 				// TODO: This is probably not quite what is supposed to happen.
6618 				// Especially when a lot has to be written, it might take ages
6619 				// until it really hits the disk.
6620 			}
6621 		}
6622 
6623 		address += rangeSize;
6624 		size -= rangeSize;
6625 	}
6626 
6627 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6628 	// synchronize multiple mappings of the same file. In our VM they never get
6629 	// out of sync, though, so we don't have to do anything.
6630 
6631 	return B_OK;
6632 }
6633 
6634 
6635 status_t
6636 _user_memory_advice(void* address, size_t size, uint32 advice)
6637 {
6638 	// TODO: Implement!
6639 	return B_OK;
6640 }
6641 
6642 
6643 status_t
6644 _user_get_memory_properties(team_id teamID, const void* address,
6645 	uint32* _protected, uint32* _lock)
6646 {
6647 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6648 		return B_BAD_ADDRESS;
6649 
6650 	AddressSpaceReadLocker locker;
6651 	status_t error = locker.SetTo(teamID);
6652 	if (error != B_OK)
6653 		return error;
6654 
6655 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6656 	if (area == NULL)
6657 		return B_NO_MEMORY;
6658 
6659 
6660 	uint32 protection = area->protection;
6661 	if (area->page_protections != NULL)
6662 		protection = get_area_page_protection(area, (addr_t)address);
6663 
6664 	uint32 wiring = area->wiring;
6665 
6666 	locker.Unlock();
6667 
6668 	error = user_memcpy(_protected, &protection, sizeof(protection));
6669 	if (error != B_OK)
6670 		return error;
6671 
6672 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6673 
6674 	return error;
6675 }
6676 
6677 
6678 // #pragma mark -- compatibility
6679 
6680 
6681 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6682 
6683 
6684 struct physical_entry_beos {
6685 	uint32	address;
6686 	uint32	size;
6687 };
6688 
6689 
6690 /*!	The physical_entry structure has changed. We need to translate it to the
6691 	old one.
6692 */
6693 extern "C" int32
6694 __get_memory_map_beos(const void* _address, size_t numBytes,
6695 	physical_entry_beos* table, int32 numEntries)
6696 {
6697 	if (numEntries <= 0)
6698 		return B_BAD_VALUE;
6699 
6700 	const uint8* address = (const uint8*)_address;
6701 
6702 	int32 count = 0;
6703 	while (numBytes > 0 && count < numEntries) {
6704 		physical_entry entry;
6705 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6706 		if (result < 0) {
6707 			if (result != B_BUFFER_OVERFLOW)
6708 				return result;
6709 		}
6710 
6711 		if (entry.address >= (phys_addr_t)1 << 32) {
6712 			panic("get_memory_map(): Address is greater 4 GB!");
6713 			return B_ERROR;
6714 		}
6715 
6716 		table[count].address = entry.address;
6717 		table[count++].size = entry.size;
6718 
6719 		address += entry.size;
6720 		numBytes -= entry.size;
6721 	}
6722 
6723 	// null-terminate the table, if possible
6724 	if (count < numEntries) {
6725 		table[count].address = 0;
6726 		table[count].size = 0;
6727 	}
6728 
6729 	return B_OK;
6730 }
6731 
6732 
6733 /*!	The type of the \a physicalAddress parameter has changed from void* to
6734 	phys_addr_t.
6735 */
6736 extern "C" area_id
6737 __map_physical_memory_beos(const char* name, void* physicalAddress,
6738 	size_t numBytes, uint32 addressSpec, uint32 protection,
6739 	void** _virtualAddress)
6740 {
6741 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6742 		addressSpec, protection, _virtualAddress);
6743 }
6744 
6745 
6746 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6747 	we meddle with the \a lock parameter to force 32 bit.
6748 */
6749 extern "C" area_id
6750 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6751 	size_t size, uint32 lock, uint32 protection)
6752 {
6753 	switch (lock) {
6754 		case B_NO_LOCK:
6755 			break;
6756 		case B_FULL_LOCK:
6757 		case B_LAZY_LOCK:
6758 			lock = B_32_BIT_FULL_LOCK;
6759 			break;
6760 		case B_CONTIGUOUS:
6761 			lock = B_32_BIT_CONTIGUOUS;
6762 			break;
6763 	}
6764 
6765 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6766 		protection);
6767 }
6768 
6769 
6770 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6771 	"BASE");
6772 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6773 	"map_physical_memory@", "BASE");
6774 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6775 	"BASE");
6776 
6777 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6778 	"get_memory_map@@", "1_ALPHA3");
6779 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6780 	"map_physical_memory@@", "1_ALPHA3");
6781 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6782 	"1_ALPHA3");
6783 
6784 
6785 #else
6786 
6787 
6788 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6789 	"get_memory_map@@", "BASE");
6790 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6791 	"map_physical_memory@@", "BASE");
6792 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6793 	"BASE");
6794 
6795 
6796 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6797