xref: /haiku/src/system/kernel/vm/vm.cpp (revision 1f52c921e27aa442370e1bd4adc021acf2b78b64)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// Cut the end only?
648 	if (areaLast <= lastAddress) {
649 		size_t oldSize = area->Size();
650 		size_t newSize = address - area->Base();
651 
652 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 			allocationFlags);
654 		if (error != B_OK)
655 			return error;
656 
657 		// unmap pages
658 		unmap_pages(area, address, oldSize - newSize);
659 
660 		// If no one else uses the area's cache, we can resize it, too.
661 		if (cache->areas == area && area->cache_next == NULL
662 			&& cache->consumers.IsEmpty()
663 			&& cache->type == CACHE_TYPE_RAM) {
664 			// Since VMCache::Resize() can temporarily drop the lock, we must
665 			// unlock all lower caches to prevent locking order inversion.
666 			cacheChainLocker.Unlock(cache);
667 			cache->Resize(cache->virtual_base + newSize, priority);
668 			cache->ReleaseRefAndUnlock();
669 		}
670 
671 		return B_OK;
672 	}
673 
674 	// Cut the beginning only?
675 	if (area->Base() >= address) {
676 		addr_t oldBase = area->Base();
677 		addr_t newBase = lastAddress + 1;
678 		size_t newSize = areaLast - lastAddress;
679 
680 		// unmap pages
681 		unmap_pages(area, oldBase, newBase - oldBase);
682 
683 		// resize the area
684 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 			allocationFlags);
686 		if (error != B_OK)
687 			return error;
688 
689 		// TODO: If no one else uses the area's cache, we should resize it, too!
690 
691 		area->cache_offset += newBase - oldBase;
692 
693 		return B_OK;
694 	}
695 
696 	// The tough part -- cut a piece out of the middle of the area.
697 	// We do that by shrinking the area to the begin section and creating a
698 	// new area for the end section.
699 
700 	addr_t firstNewSize = address - area->Base();
701 	addr_t secondBase = lastAddress + 1;
702 	addr_t secondSize = areaLast - lastAddress;
703 
704 	// unmap pages
705 	unmap_pages(area, address, area->Size() - firstNewSize);
706 
707 	// resize the area
708 	addr_t oldSize = area->Size();
709 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 		allocationFlags);
711 	if (error != B_OK)
712 		return error;
713 
714 	// TODO: If no one else uses the area's cache, we might want to create a
715 	// new cache for the second area, transfer the concerned pages from the
716 	// first cache to it and resize the first cache.
717 
718 	// map the second area
719 	virtual_address_restrictions addressRestrictions = {};
720 	addressRestrictions.address = (void*)secondBase;
721 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 	VMArea* secondArea;
723 	error = map_backing_store(addressSpace, cache,
724 		area->cache_offset + (secondBase - area->Base()), area->name,
725 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 		&addressRestrictions, kernel, &secondArea, NULL);
727 	if (error != B_OK) {
728 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 		return error;
730 	}
731 
732 	// We need a cache reference for the new area.
733 	cache->AcquireRefLocked();
734 
735 	if (_secondArea != NULL)
736 		*_secondArea = secondArea;
737 
738 	return B_OK;
739 }
740 
741 
742 /*!	Deletes all areas in the given address range.
743 	The address space must be write-locked.
744 	The caller must ensure that no part of the given range is wired.
745 */
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 	bool kernel)
749 {
750 	size = PAGE_ALIGN(size);
751 	addr_t lastAddress = address + (size - 1);
752 
753 	// Check, whether the caller is allowed to modify the concerned areas.
754 	if (!kernel) {
755 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 				VMArea* area = it.Next();) {
757 			addr_t areaLast = area->Base() + (area->Size() - 1);
758 			if (area->Base() < lastAddress && address < areaLast) {
759 				if (area->address_space == VMAddressSpace::Kernel()) {
760 					dprintf("unmap_address_range: team %" B_PRId32 " tried to "
761 						"unmap range of kernel area %" B_PRId32 " (%s)\n",
762 						team_get_current_team_id(), area->id, area->name);
763 					return B_NOT_ALLOWED;
764 				}
765 			}
766 		}
767 	}
768 
769 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
770 			VMArea* area = it.Next();) {
771 		addr_t areaLast = area->Base() + (area->Size() - 1);
772 		if (area->Base() < lastAddress && address < areaLast) {
773 			status_t error = cut_area(addressSpace, area, address,
774 				lastAddress, NULL, kernel);
775 			if (error != B_OK)
776 				return error;
777 				// Failing after already messing with areas is ugly, but we
778 				// can't do anything about it.
779 		}
780 	}
781 
782 	return B_OK;
783 }
784 
785 
786 /*! You need to hold the lock of the cache and the write lock of the address
787 	space when calling this function.
788 	Note, that in case of error your cache will be temporarily unlocked.
789 	If \a addressSpec is \c B_EXACT_ADDRESS and the
790 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
791 	that no part of the specified address range (base \c *_virtualAddress, size
792 	\a size) is wired.
793 */
794 static status_t
795 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
796 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
797 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
798 	bool kernel, VMArea** _area, void** _virtualAddress)
799 {
800 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
801 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
802 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
803 		addressRestrictions->address, offset, size,
804 		addressRestrictions->address_specification, wiring, protection,
805 		_area, areaName));
806 	cache->AssertLocked();
807 
808 	if (size == 0) {
809 #if KDEBUG
810 		panic("map_backing_store(): called with size=0 for area '%s'!",
811 			areaName);
812 #endif
813 		return B_BAD_VALUE;
814 	}
815 
816 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
817 		| HEAP_DONT_LOCK_KERNEL_SPACE;
818 	int priority;
819 	if (addressSpace != VMAddressSpace::Kernel()) {
820 		priority = VM_PRIORITY_USER;
821 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
822 		priority = VM_PRIORITY_VIP;
823 		allocationFlags |= HEAP_PRIORITY_VIP;
824 	} else
825 		priority = VM_PRIORITY_SYSTEM;
826 
827 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
828 		allocationFlags);
829 	if (area == NULL)
830 		return B_NO_MEMORY;
831 
832 	status_t status;
833 
834 	// if this is a private map, we need to create a new cache
835 	// to handle the private copies of pages as they are written to
836 	VMCache* sourceCache = cache;
837 	if (mapping == REGION_PRIVATE_MAP) {
838 		VMCache* newCache;
839 
840 		// create an anonymous cache
841 		status = VMCacheFactory::CreateAnonymousCache(newCache,
842 			(protection & B_STACK_AREA) != 0
843 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
844 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
845 		if (status != B_OK)
846 			goto err1;
847 
848 		newCache->Lock();
849 		newCache->temporary = 1;
850 		newCache->virtual_base = offset;
851 		newCache->virtual_end = offset + size;
852 
853 		cache->AddConsumer(newCache);
854 
855 		cache = newCache;
856 	}
857 
858 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
859 		status = cache->SetMinimalCommitment(size, priority);
860 		if (status != B_OK)
861 			goto err2;
862 	}
863 
864 	// check to see if this address space has entered DELETE state
865 	if (addressSpace->IsBeingDeleted()) {
866 		// okay, someone is trying to delete this address space now, so we can't
867 		// insert the area, so back out
868 		status = B_BAD_TEAM_ID;
869 		goto err2;
870 	}
871 
872 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
873 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
874 		status = unmap_address_range(addressSpace,
875 			(addr_t)addressRestrictions->address, size, kernel);
876 		if (status != B_OK)
877 			goto err2;
878 	}
879 
880 	status = addressSpace->InsertArea(area, size, addressRestrictions,
881 		allocationFlags, _virtualAddress);
882 	if (status == B_NO_MEMORY
883 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
884 		// TODO: At present, there is no way to notify the low_resource monitor
885 		// that kernel addresss space is fragmented, nor does it check for this
886 		// automatically. Due to how many locks are held, we cannot wait here
887 		// for space to be freed up, but it would be good to at least notify
888 		// that we tried and failed to allocate some amount.
889 	}
890 	if (status != B_OK)
891 		goto err2;
892 
893 	// attach the cache to the area
894 	area->cache = cache;
895 	area->cache_offset = offset;
896 
897 	// point the cache back to the area
898 	cache->InsertAreaLocked(area);
899 	if (mapping == REGION_PRIVATE_MAP)
900 		cache->Unlock();
901 
902 	// insert the area in the global area hash table
903 	VMAreaHash::Insert(area);
904 
905 	// grab a ref to the address space (the area holds this)
906 	addressSpace->Get();
907 
908 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
909 //		cache, sourceCache, areaName, area);
910 
911 	*_area = area;
912 	return B_OK;
913 
914 err2:
915 	if (mapping == REGION_PRIVATE_MAP) {
916 		// We created this cache, so we must delete it again. Note, that we
917 		// need to temporarily unlock the source cache or we'll otherwise
918 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
919 		sourceCache->Unlock();
920 		cache->ReleaseRefAndUnlock();
921 		sourceCache->Lock();
922 	}
923 err1:
924 	addressSpace->DeleteArea(area, allocationFlags);
925 	return status;
926 }
927 
928 
929 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
930 	  locker1, locker2).
931 */
932 template<typename LockerType1, typename LockerType2>
933 static inline bool
934 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
935 {
936 	area->cache->AssertLocked();
937 
938 	VMAreaUnwiredWaiter waiter;
939 	if (!area->AddWaiterIfWired(&waiter))
940 		return false;
941 
942 	// unlock everything and wait
943 	if (locker1 != NULL)
944 		locker1->Unlock();
945 	if (locker2 != NULL)
946 		locker2->Unlock();
947 
948 	waiter.waitEntry.Wait();
949 
950 	return true;
951 }
952 
953 
954 /*!	Checks whether the given area has any wired ranges intersecting with the
955 	specified range and waits, if so.
956 
957 	When it has to wait, the function calls \c Unlock() on both \a locker1
958 	and \a locker2, if given.
959 	The area's top cache must be locked and must be unlocked as a side effect
960 	of calling \c Unlock() on either \a locker1 or \a locker2.
961 
962 	If the function does not have to wait it does not modify or unlock any
963 	object.
964 
965 	\param area The area to be checked.
966 	\param base The base address of the range to check.
967 	\param size The size of the address range to check.
968 	\param locker1 An object to be unlocked when before starting to wait (may
969 		be \c NULL).
970 	\param locker2 An object to be unlocked when before starting to wait (may
971 		be \c NULL).
972 	\return \c true, if the function had to wait, \c false otherwise.
973 */
974 template<typename LockerType1, typename LockerType2>
975 static inline bool
976 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
977 	LockerType1* locker1, LockerType2* locker2)
978 {
979 	area->cache->AssertLocked();
980 
981 	VMAreaUnwiredWaiter waiter;
982 	if (!area->AddWaiterIfWired(&waiter, base, size))
983 		return false;
984 
985 	// unlock everything and wait
986 	if (locker1 != NULL)
987 		locker1->Unlock();
988 	if (locker2 != NULL)
989 		locker2->Unlock();
990 
991 	waiter.waitEntry.Wait();
992 
993 	return true;
994 }
995 
996 
997 /*!	Checks whether the given address space has any wired ranges intersecting
998 	with the specified range and waits, if so.
999 
1000 	Similar to wait_if_area_range_is_wired(), with the following differences:
1001 	- All areas intersecting with the range are checked (respectively all until
1002 	  one is found that contains a wired range intersecting with the given
1003 	  range).
1004 	- The given address space must at least be read-locked and must be unlocked
1005 	  when \c Unlock() is called on \a locker.
1006 	- None of the areas' caches are allowed to be locked.
1007 */
1008 template<typename LockerType>
1009 static inline bool
1010 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1011 	size_t size, LockerType* locker)
1012 {
1013 	addr_t end = base + size - 1;
1014 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1015 			VMArea* area = it.Next();) {
1016 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1017 		if (area->Base() > end)
1018 			return false;
1019 
1020 		if (base >= area->Base() + area->Size() - 1)
1021 			continue;
1022 
1023 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1024 
1025 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1026 			return true;
1027 	}
1028 
1029 	return false;
1030 }
1031 
1032 
1033 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1034 	It must be called in a situation where the kernel address space may be
1035 	locked.
1036 */
1037 status_t
1038 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1039 {
1040 	AddressSpaceReadLocker locker;
1041 	VMArea* area;
1042 	status_t status = locker.SetFromArea(id, area);
1043 	if (status != B_OK)
1044 		return status;
1045 
1046 	if (area->page_protections == NULL) {
1047 		status = allocate_area_page_protections(area);
1048 		if (status != B_OK)
1049 			return status;
1050 	}
1051 
1052 	*cookie = (void*)area;
1053 	return B_OK;
1054 }
1055 
1056 
1057 /*!	This is a debug helper function that can only be used with very specific
1058 	use cases.
1059 	Sets protection for the given address range to the protection specified.
1060 	If \a protection is 0 then the involved pages will be marked non-present
1061 	in the translation map to cause a fault on access. The pages aren't
1062 	actually unmapped however so that they can be marked present again with
1063 	additional calls to this function. For this to work the area must be
1064 	fully locked in memory so that the pages aren't otherwise touched.
1065 	This function does not lock the kernel address space and needs to be
1066 	supplied with a \a cookie retrieved from a successful call to
1067 	vm_prepare_kernel_area_debug_protection().
1068 */
1069 status_t
1070 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1071 	uint32 protection)
1072 {
1073 	// check address range
1074 	addr_t address = (addr_t)_address;
1075 	size = PAGE_ALIGN(size);
1076 
1077 	if ((address % B_PAGE_SIZE) != 0
1078 		|| (addr_t)address + size < (addr_t)address
1079 		|| !IS_KERNEL_ADDRESS(address)
1080 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1081 		return B_BAD_VALUE;
1082 	}
1083 
1084 	// Translate the kernel protection to user protection as we only store that.
1085 	if ((protection & B_KERNEL_READ_AREA) != 0)
1086 		protection |= B_READ_AREA;
1087 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1088 		protection |= B_WRITE_AREA;
1089 
1090 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1091 	VMTranslationMap* map = addressSpace->TranslationMap();
1092 	VMArea* area = (VMArea*)cookie;
1093 
1094 	addr_t offset = address - area->Base();
1095 	if (area->Size() - offset < size) {
1096 		panic("protect range not fully within supplied area");
1097 		return B_BAD_VALUE;
1098 	}
1099 
1100 	if (area->page_protections == NULL) {
1101 		panic("area has no page protections");
1102 		return B_BAD_VALUE;
1103 	}
1104 
1105 	// Invalidate the mapping entries so any access to them will fault or
1106 	// restore the mapping entries unchanged so that lookup will success again.
1107 	map->Lock();
1108 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1109 	map->Unlock();
1110 
1111 	// And set the proper page protections so that the fault case will actually
1112 	// fail and not simply try to map a new page.
1113 	for (addr_t pageAddress = address; pageAddress < address + size;
1114 			pageAddress += B_PAGE_SIZE) {
1115 		set_area_page_protection(area, pageAddress, protection);
1116 	}
1117 
1118 	return B_OK;
1119 }
1120 
1121 
1122 status_t
1123 vm_block_address_range(const char* name, void* address, addr_t size)
1124 {
1125 	if (!arch_vm_supports_protection(0))
1126 		return B_NOT_SUPPORTED;
1127 
1128 	AddressSpaceWriteLocker locker;
1129 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1130 	if (status != B_OK)
1131 		return status;
1132 
1133 	VMAddressSpace* addressSpace = locker.AddressSpace();
1134 
1135 	// create an anonymous cache
1136 	VMCache* cache;
1137 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1138 		VM_PRIORITY_SYSTEM);
1139 	if (status != B_OK)
1140 		return status;
1141 
1142 	cache->temporary = 1;
1143 	cache->virtual_end = size;
1144 	cache->Lock();
1145 
1146 	VMArea* area;
1147 	virtual_address_restrictions addressRestrictions = {};
1148 	addressRestrictions.address = address;
1149 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1150 	status = map_backing_store(addressSpace, cache, 0, name, size,
1151 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1152 		true, &area, NULL);
1153 	if (status != B_OK) {
1154 		cache->ReleaseRefAndUnlock();
1155 		return status;
1156 	}
1157 
1158 	cache->Unlock();
1159 	area->cache_type = CACHE_TYPE_RAM;
1160 	return area->id;
1161 }
1162 
1163 
1164 status_t
1165 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1166 {
1167 	AddressSpaceWriteLocker locker(team);
1168 	if (!locker.IsLocked())
1169 		return B_BAD_TEAM_ID;
1170 
1171 	VMAddressSpace* addressSpace = locker.AddressSpace();
1172 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1173 		addressSpace == VMAddressSpace::Kernel()
1174 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1175 }
1176 
1177 
1178 status_t
1179 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1180 	addr_t size, uint32 flags)
1181 {
1182 	if (size == 0)
1183 		return B_BAD_VALUE;
1184 
1185 	AddressSpaceWriteLocker locker(team);
1186 	if (!locker.IsLocked())
1187 		return B_BAD_TEAM_ID;
1188 
1189 	virtual_address_restrictions addressRestrictions = {};
1190 	addressRestrictions.address = *_address;
1191 	addressRestrictions.address_specification = addressSpec;
1192 	VMAddressSpace* addressSpace = locker.AddressSpace();
1193 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1194 		addressSpace == VMAddressSpace::Kernel()
1195 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1196 		_address);
1197 }
1198 
1199 
1200 area_id
1201 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1202 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1203 	const virtual_address_restrictions* virtualAddressRestrictions,
1204 	const physical_address_restrictions* physicalAddressRestrictions,
1205 	bool kernel, void** _address)
1206 {
1207 	VMArea* area;
1208 	VMCache* cache;
1209 	vm_page* page = NULL;
1210 	bool isStack = (protection & B_STACK_AREA) != 0;
1211 	page_num_t guardPages;
1212 	bool canOvercommit = false;
1213 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1214 		? VM_PAGE_ALLOC_CLEAR : 0;
1215 
1216 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1217 		team, name, size));
1218 
1219 	size = PAGE_ALIGN(size);
1220 	guardSize = PAGE_ALIGN(guardSize);
1221 	guardPages = guardSize / B_PAGE_SIZE;
1222 
1223 	if (size == 0 || size < guardSize)
1224 		return B_BAD_VALUE;
1225 	if (!arch_vm_supports_protection(protection))
1226 		return B_NOT_SUPPORTED;
1227 
1228 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1229 		canOvercommit = true;
1230 
1231 #ifdef DEBUG_KERNEL_STACKS
1232 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1233 		isStack = true;
1234 #endif
1235 
1236 	// check parameters
1237 	switch (virtualAddressRestrictions->address_specification) {
1238 		case B_ANY_ADDRESS:
1239 		case B_EXACT_ADDRESS:
1240 		case B_BASE_ADDRESS:
1241 		case B_ANY_KERNEL_ADDRESS:
1242 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1243 		case B_RANDOMIZED_ANY_ADDRESS:
1244 		case B_RANDOMIZED_BASE_ADDRESS:
1245 			break;
1246 
1247 		default:
1248 			return B_BAD_VALUE;
1249 	}
1250 
1251 	// If low or high physical address restrictions are given, we force
1252 	// B_CONTIGUOUS wiring, since only then we'll use
1253 	// vm_page_allocate_page_run() which deals with those restrictions.
1254 	if (physicalAddressRestrictions->low_address != 0
1255 		|| physicalAddressRestrictions->high_address != 0) {
1256 		wiring = B_CONTIGUOUS;
1257 	}
1258 
1259 	physical_address_restrictions stackPhysicalRestrictions;
1260 	bool doReserveMemory = false;
1261 	switch (wiring) {
1262 		case B_NO_LOCK:
1263 			break;
1264 		case B_FULL_LOCK:
1265 		case B_LAZY_LOCK:
1266 		case B_CONTIGUOUS:
1267 			doReserveMemory = true;
1268 			break;
1269 		case B_ALREADY_WIRED:
1270 			break;
1271 		case B_LOMEM:
1272 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1273 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1274 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1275 			wiring = B_CONTIGUOUS;
1276 			doReserveMemory = true;
1277 			break;
1278 		case B_32_BIT_FULL_LOCK:
1279 			if (B_HAIKU_PHYSICAL_BITS <= 32
1280 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1281 				wiring = B_FULL_LOCK;
1282 				doReserveMemory = true;
1283 				break;
1284 			}
1285 			// TODO: We don't really support this mode efficiently. Just fall
1286 			// through for now ...
1287 		case B_32_BIT_CONTIGUOUS:
1288 			#if B_HAIKU_PHYSICAL_BITS > 32
1289 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1290 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1291 					stackPhysicalRestrictions.high_address
1292 						= (phys_addr_t)1 << 32;
1293 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1294 				}
1295 			#endif
1296 			wiring = B_CONTIGUOUS;
1297 			doReserveMemory = true;
1298 			break;
1299 		default:
1300 			return B_BAD_VALUE;
1301 	}
1302 
1303 	// Optimization: For a single-page contiguous allocation without low/high
1304 	// memory restriction B_FULL_LOCK wiring suffices.
1305 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1306 		&& physicalAddressRestrictions->low_address == 0
1307 		&& physicalAddressRestrictions->high_address == 0) {
1308 		wiring = B_FULL_LOCK;
1309 	}
1310 
1311 	// For full lock or contiguous areas we're also going to map the pages and
1312 	// thus need to reserve pages for the mapping backend upfront.
1313 	addr_t reservedMapPages = 0;
1314 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1315 		AddressSpaceWriteLocker locker;
1316 		status_t status = locker.SetTo(team);
1317 		if (status != B_OK)
1318 			return status;
1319 
1320 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1321 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1322 	}
1323 
1324 	int priority;
1325 	if (team != VMAddressSpace::KernelID())
1326 		priority = VM_PRIORITY_USER;
1327 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1328 		priority = VM_PRIORITY_VIP;
1329 	else
1330 		priority = VM_PRIORITY_SYSTEM;
1331 
1332 	// Reserve memory before acquiring the address space lock. This reduces the
1333 	// chances of failure, since while holding the write lock to the address
1334 	// space (if it is the kernel address space that is), the low memory handler
1335 	// won't be able to free anything for us.
1336 	addr_t reservedMemory = 0;
1337 	if (doReserveMemory) {
1338 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1339 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1340 			return B_NO_MEMORY;
1341 		reservedMemory = size;
1342 		// TODO: We don't reserve the memory for the pages for the page
1343 		// directories/tables. We actually need to do since we currently don't
1344 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1345 		// there are actually less physical pages than there should be, which
1346 		// can get the VM into trouble in low memory situations.
1347 	}
1348 
1349 	AddressSpaceWriteLocker locker;
1350 	VMAddressSpace* addressSpace;
1351 	status_t status;
1352 
1353 	// For full lock areas reserve the pages before locking the address
1354 	// space. E.g. block caches can't release their memory while we hold the
1355 	// address space lock.
1356 	page_num_t reservedPages = reservedMapPages;
1357 	if (wiring == B_FULL_LOCK)
1358 		reservedPages += size / B_PAGE_SIZE;
1359 
1360 	vm_page_reservation reservation;
1361 	if (reservedPages > 0) {
1362 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1363 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1364 					priority)) {
1365 				reservedPages = 0;
1366 				status = B_WOULD_BLOCK;
1367 				goto err0;
1368 			}
1369 		} else
1370 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1371 	}
1372 
1373 	if (wiring == B_CONTIGUOUS) {
1374 		// we try to allocate the page run here upfront as this may easily
1375 		// fail for obvious reasons
1376 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1377 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1378 		if (page == NULL) {
1379 			status = B_NO_MEMORY;
1380 			goto err0;
1381 		}
1382 	}
1383 
1384 	// Lock the address space and, if B_EXACT_ADDRESS and
1385 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1386 	// is not wired.
1387 	do {
1388 		status = locker.SetTo(team);
1389 		if (status != B_OK)
1390 			goto err1;
1391 
1392 		addressSpace = locker.AddressSpace();
1393 	} while (virtualAddressRestrictions->address_specification
1394 			== B_EXACT_ADDRESS
1395 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1396 		&& wait_if_address_range_is_wired(addressSpace,
1397 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1398 
1399 	// create an anonymous cache
1400 	// if it's a stack, make sure that two pages are available at least
1401 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1402 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1403 		wiring == B_NO_LOCK, priority);
1404 	if (status != B_OK)
1405 		goto err1;
1406 
1407 	cache->temporary = 1;
1408 	cache->virtual_end = size;
1409 	cache->committed_size = reservedMemory;
1410 		// TODO: This should be done via a method.
1411 	reservedMemory = 0;
1412 
1413 	cache->Lock();
1414 
1415 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1416 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1417 		kernel, &area, _address);
1418 
1419 	if (status != B_OK) {
1420 		cache->ReleaseRefAndUnlock();
1421 		goto err1;
1422 	}
1423 
1424 	locker.DegradeToReadLock();
1425 
1426 	switch (wiring) {
1427 		case B_NO_LOCK:
1428 		case B_LAZY_LOCK:
1429 			// do nothing - the pages are mapped in as needed
1430 			break;
1431 
1432 		case B_FULL_LOCK:
1433 		{
1434 			// Allocate and map all pages for this area
1435 
1436 			off_t offset = 0;
1437 			for (addr_t address = area->Base();
1438 					address < area->Base() + (area->Size() - 1);
1439 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1440 #ifdef DEBUG_KERNEL_STACKS
1441 #	ifdef STACK_GROWS_DOWNWARDS
1442 				if (isStack && address < area->Base()
1443 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1444 #	else
1445 				if (isStack && address >= area->Base() + area->Size()
1446 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1447 #	endif
1448 					continue;
1449 #endif
1450 				vm_page* page = vm_page_allocate_page(&reservation,
1451 					PAGE_STATE_WIRED | pageAllocFlags);
1452 				cache->InsertPage(page, offset);
1453 				map_page(area, page, address, protection, &reservation);
1454 
1455 				DEBUG_PAGE_ACCESS_END(page);
1456 			}
1457 
1458 			break;
1459 		}
1460 
1461 		case B_ALREADY_WIRED:
1462 		{
1463 			// The pages should already be mapped. This is only really useful
1464 			// during boot time. Find the appropriate vm_page objects and stick
1465 			// them in the cache object.
1466 			VMTranslationMap* map = addressSpace->TranslationMap();
1467 			off_t offset = 0;
1468 
1469 			if (!gKernelStartup)
1470 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1471 
1472 			map->Lock();
1473 
1474 			for (addr_t virtualAddress = area->Base();
1475 					virtualAddress < area->Base() + (area->Size() - 1);
1476 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1477 				phys_addr_t physicalAddress;
1478 				uint32 flags;
1479 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1480 				if (status < B_OK) {
1481 					panic("looking up mapping failed for va 0x%lx\n",
1482 						virtualAddress);
1483 				}
1484 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1485 				if (page == NULL) {
1486 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1487 						"\n", physicalAddress);
1488 				}
1489 
1490 				DEBUG_PAGE_ACCESS_START(page);
1491 
1492 				cache->InsertPage(page, offset);
1493 				increment_page_wired_count(page);
1494 				vm_page_set_state(page, PAGE_STATE_WIRED);
1495 				page->busy = false;
1496 
1497 				DEBUG_PAGE_ACCESS_END(page);
1498 			}
1499 
1500 			map->Unlock();
1501 			break;
1502 		}
1503 
1504 		case B_CONTIGUOUS:
1505 		{
1506 			// We have already allocated our continuous pages run, so we can now
1507 			// just map them in the address space
1508 			VMTranslationMap* map = addressSpace->TranslationMap();
1509 			phys_addr_t physicalAddress
1510 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1511 			addr_t virtualAddress = area->Base();
1512 			off_t offset = 0;
1513 
1514 			map->Lock();
1515 
1516 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1517 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1518 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1519 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1520 				if (page == NULL)
1521 					panic("couldn't lookup physical page just allocated\n");
1522 
1523 				status = map->Map(virtualAddress, physicalAddress, protection,
1524 					area->MemoryType(), &reservation);
1525 				if (status < B_OK)
1526 					panic("couldn't map physical page in page run\n");
1527 
1528 				cache->InsertPage(page, offset);
1529 				increment_page_wired_count(page);
1530 
1531 				DEBUG_PAGE_ACCESS_END(page);
1532 			}
1533 
1534 			map->Unlock();
1535 			break;
1536 		}
1537 
1538 		default:
1539 			break;
1540 	}
1541 
1542 	cache->Unlock();
1543 
1544 	if (reservedPages > 0)
1545 		vm_page_unreserve_pages(&reservation);
1546 
1547 	TRACE(("vm_create_anonymous_area: done\n"));
1548 
1549 	area->cache_type = CACHE_TYPE_RAM;
1550 	return area->id;
1551 
1552 err1:
1553 	if (wiring == B_CONTIGUOUS) {
1554 		// we had reserved the area space upfront...
1555 		phys_addr_t pageNumber = page->physical_page_number;
1556 		int32 i;
1557 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1558 			page = vm_lookup_page(pageNumber);
1559 			if (page == NULL)
1560 				panic("couldn't lookup physical page just allocated\n");
1561 
1562 			vm_page_set_state(page, PAGE_STATE_FREE);
1563 		}
1564 	}
1565 
1566 err0:
1567 	if (reservedPages > 0)
1568 		vm_page_unreserve_pages(&reservation);
1569 	if (reservedMemory > 0)
1570 		vm_unreserve_memory(reservedMemory);
1571 
1572 	return status;
1573 }
1574 
1575 
1576 area_id
1577 vm_map_physical_memory(team_id team, const char* name, void** _address,
1578 	uint32 addressSpec, addr_t size, uint32 protection,
1579 	phys_addr_t physicalAddress, bool alreadyWired)
1580 {
1581 	VMArea* area;
1582 	VMCache* cache;
1583 	addr_t mapOffset;
1584 
1585 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1586 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1587 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1588 		addressSpec, size, protection, physicalAddress));
1589 
1590 	if (!arch_vm_supports_protection(protection))
1591 		return B_NOT_SUPPORTED;
1592 
1593 	AddressSpaceWriteLocker locker(team);
1594 	if (!locker.IsLocked())
1595 		return B_BAD_TEAM_ID;
1596 
1597 	// if the physical address is somewhat inside a page,
1598 	// move the actual area down to align on a page boundary
1599 	mapOffset = physicalAddress % B_PAGE_SIZE;
1600 	size += mapOffset;
1601 	physicalAddress -= mapOffset;
1602 
1603 	size = PAGE_ALIGN(size);
1604 
1605 	// create a device cache
1606 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1607 	if (status != B_OK)
1608 		return status;
1609 
1610 	cache->virtual_end = size;
1611 
1612 	cache->Lock();
1613 
1614 	virtual_address_restrictions addressRestrictions = {};
1615 	addressRestrictions.address = *_address;
1616 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1617 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1618 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1619 		true, &area, _address);
1620 
1621 	if (status < B_OK)
1622 		cache->ReleaseRefLocked();
1623 
1624 	cache->Unlock();
1625 
1626 	if (status == B_OK) {
1627 		// set requested memory type -- use uncached, if not given
1628 		uint32 memoryType = addressSpec & B_MTR_MASK;
1629 		if (memoryType == 0)
1630 			memoryType = B_MTR_UC;
1631 
1632 		area->SetMemoryType(memoryType);
1633 
1634 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1635 		if (status != B_OK)
1636 			delete_area(locker.AddressSpace(), area, false);
1637 	}
1638 
1639 	if (status != B_OK)
1640 		return status;
1641 
1642 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1643 
1644 	if (alreadyWired) {
1645 		// The area is already mapped, but possibly not with the right
1646 		// memory type.
1647 		map->Lock();
1648 		map->ProtectArea(area, area->protection);
1649 		map->Unlock();
1650 	} else {
1651 		// Map the area completely.
1652 
1653 		// reserve pages needed for the mapping
1654 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1655 			area->Base() + (size - 1));
1656 		vm_page_reservation reservation;
1657 		vm_page_reserve_pages(&reservation, reservePages,
1658 			team == VMAddressSpace::KernelID()
1659 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1660 
1661 		map->Lock();
1662 
1663 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1664 			map->Map(area->Base() + offset, physicalAddress + offset,
1665 				protection, area->MemoryType(), &reservation);
1666 		}
1667 
1668 		map->Unlock();
1669 
1670 		vm_page_unreserve_pages(&reservation);
1671 	}
1672 
1673 	// modify the pointer returned to be offset back into the new area
1674 	// the same way the physical address in was offset
1675 	*_address = (void*)((addr_t)*_address + mapOffset);
1676 
1677 	area->cache_type = CACHE_TYPE_DEVICE;
1678 	return area->id;
1679 }
1680 
1681 
1682 /*!	Don't use!
1683 	TODO: This function was introduced to map physical page vecs to
1684 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1685 	use a device cache and does not track vm_page::wired_count!
1686 */
1687 area_id
1688 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1689 	uint32 addressSpec, addr_t* _size, uint32 protection,
1690 	struct generic_io_vec* vecs, uint32 vecCount)
1691 {
1692 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1693 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1694 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1695 		addressSpec, _size, protection, vecs, vecCount));
1696 
1697 	if (!arch_vm_supports_protection(protection)
1698 		|| (addressSpec & B_MTR_MASK) != 0) {
1699 		return B_NOT_SUPPORTED;
1700 	}
1701 
1702 	AddressSpaceWriteLocker locker(team);
1703 	if (!locker.IsLocked())
1704 		return B_BAD_TEAM_ID;
1705 
1706 	if (vecCount == 0)
1707 		return B_BAD_VALUE;
1708 
1709 	addr_t size = 0;
1710 	for (uint32 i = 0; i < vecCount; i++) {
1711 		if (vecs[i].base % B_PAGE_SIZE != 0
1712 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1713 			return B_BAD_VALUE;
1714 		}
1715 
1716 		size += vecs[i].length;
1717 	}
1718 
1719 	// create a device cache
1720 	VMCache* cache;
1721 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1722 	if (result != B_OK)
1723 		return result;
1724 
1725 	cache->virtual_end = size;
1726 
1727 	cache->Lock();
1728 
1729 	VMArea* area;
1730 	virtual_address_restrictions addressRestrictions = {};
1731 	addressRestrictions.address = *_address;
1732 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1733 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1734 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1735 		&addressRestrictions, true, &area, _address);
1736 
1737 	if (result != B_OK)
1738 		cache->ReleaseRefLocked();
1739 
1740 	cache->Unlock();
1741 
1742 	if (result != B_OK)
1743 		return result;
1744 
1745 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1746 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1747 		area->Base() + (size - 1));
1748 
1749 	vm_page_reservation reservation;
1750 	vm_page_reserve_pages(&reservation, reservePages,
1751 			team == VMAddressSpace::KernelID()
1752 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1753 	map->Lock();
1754 
1755 	uint32 vecIndex = 0;
1756 	size_t vecOffset = 0;
1757 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1758 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1759 			vecOffset = 0;
1760 			vecIndex++;
1761 		}
1762 
1763 		if (vecIndex >= vecCount)
1764 			break;
1765 
1766 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1767 			protection, area->MemoryType(), &reservation);
1768 
1769 		vecOffset += B_PAGE_SIZE;
1770 	}
1771 
1772 	map->Unlock();
1773 	vm_page_unreserve_pages(&reservation);
1774 
1775 	if (_size != NULL)
1776 		*_size = size;
1777 
1778 	area->cache_type = CACHE_TYPE_DEVICE;
1779 	return area->id;
1780 }
1781 
1782 
1783 area_id
1784 vm_create_null_area(team_id team, const char* name, void** address,
1785 	uint32 addressSpec, addr_t size, uint32 flags)
1786 {
1787 	size = PAGE_ALIGN(size);
1788 
1789 	// Lock the address space and, if B_EXACT_ADDRESS and
1790 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1791 	// is not wired.
1792 	AddressSpaceWriteLocker locker;
1793 	do {
1794 		if (locker.SetTo(team) != B_OK)
1795 			return B_BAD_TEAM_ID;
1796 	} while (addressSpec == B_EXACT_ADDRESS
1797 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1798 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1799 			(addr_t)*address, size, &locker));
1800 
1801 	// create a null cache
1802 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1803 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1804 	VMCache* cache;
1805 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1806 	if (status != B_OK)
1807 		return status;
1808 
1809 	cache->temporary = 1;
1810 	cache->virtual_end = size;
1811 
1812 	cache->Lock();
1813 
1814 	VMArea* area;
1815 	virtual_address_restrictions addressRestrictions = {};
1816 	addressRestrictions.address = *address;
1817 	addressRestrictions.address_specification = addressSpec;
1818 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1819 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1820 		&addressRestrictions, true, &area, address);
1821 
1822 	if (status < B_OK) {
1823 		cache->ReleaseRefAndUnlock();
1824 		return status;
1825 	}
1826 
1827 	cache->Unlock();
1828 
1829 	area->cache_type = CACHE_TYPE_NULL;
1830 	return area->id;
1831 }
1832 
1833 
1834 /*!	Creates the vnode cache for the specified \a vnode.
1835 	The vnode has to be marked busy when calling this function.
1836 */
1837 status_t
1838 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1839 {
1840 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1841 }
1842 
1843 
1844 /*!	\a cache must be locked. The area's address space must be read-locked.
1845 */
1846 static void
1847 pre_map_area_pages(VMArea* area, VMCache* cache,
1848 	vm_page_reservation* reservation)
1849 {
1850 	addr_t baseAddress = area->Base();
1851 	addr_t cacheOffset = area->cache_offset;
1852 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1853 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1854 
1855 	for (VMCachePagesTree::Iterator it
1856 				= cache->pages.GetIterator(firstPage, true, true);
1857 			vm_page* page = it.Next();) {
1858 		if (page->cache_offset >= endPage)
1859 			break;
1860 
1861 		// skip busy and inactive pages
1862 		if (page->busy || page->usage_count == 0)
1863 			continue;
1864 
1865 		DEBUG_PAGE_ACCESS_START(page);
1866 		map_page(area, page,
1867 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1868 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1869 		DEBUG_PAGE_ACCESS_END(page);
1870 	}
1871 }
1872 
1873 
1874 /*!	Will map the file specified by \a fd to an area in memory.
1875 	The file will be mirrored beginning at the specified \a offset. The
1876 	\a offset and \a size arguments have to be page aligned.
1877 */
1878 static area_id
1879 _vm_map_file(team_id team, const char* name, void** _address,
1880 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1881 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1882 {
1883 	// TODO: for binary files, we want to make sure that they get the
1884 	//	copy of a file at a given time, ie. later changes should not
1885 	//	make it into the mapped copy -- this will need quite some changes
1886 	//	to be done in a nice way
1887 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1888 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1889 
1890 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1891 	size = PAGE_ALIGN(size);
1892 
1893 	if (mapping == REGION_NO_PRIVATE_MAP)
1894 		protection |= B_SHARED_AREA;
1895 	if (addressSpec != B_EXACT_ADDRESS)
1896 		unmapAddressRange = false;
1897 
1898 	if (fd < 0) {
1899 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1900 		virtual_address_restrictions virtualRestrictions = {};
1901 		virtualRestrictions.address = *_address;
1902 		virtualRestrictions.address_specification = addressSpec;
1903 		physical_address_restrictions physicalRestrictions = {};
1904 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1905 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1906 			_address);
1907 	}
1908 
1909 	// get the open flags of the FD
1910 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1911 	if (descriptor == NULL)
1912 		return EBADF;
1913 	int32 openMode = descriptor->open_mode;
1914 	put_fd(descriptor);
1915 
1916 	// The FD must open for reading at any rate. For shared mapping with write
1917 	// access, additionally the FD must be open for writing.
1918 	if ((openMode & O_ACCMODE) == O_WRONLY
1919 		|| (mapping == REGION_NO_PRIVATE_MAP
1920 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1921 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1922 		return EACCES;
1923 	}
1924 
1925 	// get the vnode for the object, this also grabs a ref to it
1926 	struct vnode* vnode = NULL;
1927 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1928 	if (status < B_OK)
1929 		return status;
1930 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1931 
1932 	// If we're going to pre-map pages, we need to reserve the pages needed by
1933 	// the mapping backend upfront.
1934 	page_num_t reservedPreMapPages = 0;
1935 	vm_page_reservation reservation;
1936 	if ((protection & B_READ_AREA) != 0) {
1937 		AddressSpaceWriteLocker locker;
1938 		status = locker.SetTo(team);
1939 		if (status != B_OK)
1940 			return status;
1941 
1942 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1943 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1944 
1945 		locker.Unlock();
1946 
1947 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1948 			team == VMAddressSpace::KernelID()
1949 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1950 	}
1951 
1952 	struct PageUnreserver {
1953 		PageUnreserver(vm_page_reservation* reservation)
1954 			:
1955 			fReservation(reservation)
1956 		{
1957 		}
1958 
1959 		~PageUnreserver()
1960 		{
1961 			if (fReservation != NULL)
1962 				vm_page_unreserve_pages(fReservation);
1963 		}
1964 
1965 		vm_page_reservation* fReservation;
1966 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1967 
1968 	// Lock the address space and, if the specified address range shall be
1969 	// unmapped, ensure it is not wired.
1970 	AddressSpaceWriteLocker locker;
1971 	do {
1972 		if (locker.SetTo(team) != B_OK)
1973 			return B_BAD_TEAM_ID;
1974 	} while (unmapAddressRange
1975 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1976 			(addr_t)*_address, size, &locker));
1977 
1978 	// TODO: this only works for file systems that use the file cache
1979 	VMCache* cache;
1980 	status = vfs_get_vnode_cache(vnode, &cache, false);
1981 	if (status < B_OK)
1982 		return status;
1983 
1984 	cache->Lock();
1985 
1986 	VMArea* area;
1987 	virtual_address_restrictions addressRestrictions = {};
1988 	addressRestrictions.address = *_address;
1989 	addressRestrictions.address_specification = addressSpec;
1990 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1991 		0, protection, mapping,
1992 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1993 		&addressRestrictions, kernel, &area, _address);
1994 
1995 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1996 		// map_backing_store() cannot know we no longer need the ref
1997 		cache->ReleaseRefLocked();
1998 	}
1999 
2000 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2001 		pre_map_area_pages(area, cache, &reservation);
2002 
2003 	cache->Unlock();
2004 
2005 	if (status == B_OK) {
2006 		// TODO: this probably deserves a smarter solution, ie. don't always
2007 		// prefetch stuff, and also, probably don't trigger it at this place.
2008 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2009 			// prefetches at max 10 MB starting from "offset"
2010 	}
2011 
2012 	if (status != B_OK)
2013 		return status;
2014 
2015 	area->cache_type = CACHE_TYPE_VNODE;
2016 	return area->id;
2017 }
2018 
2019 
2020 area_id
2021 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2022 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2023 	int fd, off_t offset)
2024 {
2025 	if (!arch_vm_supports_protection(protection))
2026 		return B_NOT_SUPPORTED;
2027 
2028 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2029 		mapping, unmapAddressRange, fd, offset, true);
2030 }
2031 
2032 
2033 VMCache*
2034 vm_area_get_locked_cache(VMArea* area)
2035 {
2036 	rw_lock_read_lock(&sAreaCacheLock);
2037 
2038 	while (true) {
2039 		VMCache* cache = area->cache;
2040 
2041 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2042 			// cache has been deleted
2043 			rw_lock_read_lock(&sAreaCacheLock);
2044 			continue;
2045 		}
2046 
2047 		rw_lock_read_lock(&sAreaCacheLock);
2048 
2049 		if (cache == area->cache) {
2050 			cache->AcquireRefLocked();
2051 			rw_lock_read_unlock(&sAreaCacheLock);
2052 			return cache;
2053 		}
2054 
2055 		// the cache changed in the meantime
2056 		cache->Unlock();
2057 	}
2058 }
2059 
2060 
2061 void
2062 vm_area_put_locked_cache(VMCache* cache)
2063 {
2064 	cache->ReleaseRefAndUnlock();
2065 }
2066 
2067 
2068 area_id
2069 vm_clone_area(team_id team, const char* name, void** address,
2070 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2071 	bool kernel)
2072 {
2073 	VMArea* newArea = NULL;
2074 	VMArea* sourceArea;
2075 
2076 	// Check whether the source area exists and is cloneable. If so, mark it
2077 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2078 	{
2079 		AddressSpaceWriteLocker locker;
2080 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2081 		if (status != B_OK)
2082 			return status;
2083 
2084 		sourceArea->protection |= B_SHARED_AREA;
2085 		protection |= B_SHARED_AREA;
2086 	}
2087 
2088 	// Now lock both address spaces and actually do the cloning.
2089 
2090 	MultiAddressSpaceLocker locker;
2091 	VMAddressSpace* sourceAddressSpace;
2092 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	VMAddressSpace* targetAddressSpace;
2097 	status = locker.AddTeam(team, true, &targetAddressSpace);
2098 	if (status != B_OK)
2099 		return status;
2100 
2101 	status = locker.Lock();
2102 	if (status != B_OK)
2103 		return status;
2104 
2105 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2106 	if (sourceArea == NULL)
2107 		return B_BAD_VALUE;
2108 
2109 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2110 
2111 	if (!kernel && sourceAddressSpace != targetAddressSpace
2112 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2113 #if KDEBUG
2114 		Team* team = thread_get_current_thread()->team;
2115 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2116 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2117 #endif
2118 		status = B_NOT_ALLOWED;
2119 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2120 		status = B_NOT_ALLOWED;
2121 	} else {
2122 		virtual_address_restrictions addressRestrictions = {};
2123 		addressRestrictions.address = *address;
2124 		addressRestrictions.address_specification = addressSpec;
2125 		status = map_backing_store(targetAddressSpace, cache,
2126 			sourceArea->cache_offset, name, sourceArea->Size(),
2127 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2128 			kernel, &newArea, address);
2129 	}
2130 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2131 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2132 		// to create a new cache, and has therefore already acquired a reference
2133 		// to the source cache - but otherwise it has no idea that we need
2134 		// one.
2135 		cache->AcquireRefLocked();
2136 	}
2137 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2138 		// we need to map in everything at this point
2139 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2140 			// we don't have actual pages to map but a physical area
2141 			VMTranslationMap* map
2142 				= sourceArea->address_space->TranslationMap();
2143 			map->Lock();
2144 
2145 			phys_addr_t physicalAddress;
2146 			uint32 oldProtection;
2147 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2148 
2149 			map->Unlock();
2150 
2151 			map = targetAddressSpace->TranslationMap();
2152 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2153 				newArea->Base() + (newArea->Size() - 1));
2154 
2155 			vm_page_reservation reservation;
2156 			vm_page_reserve_pages(&reservation, reservePages,
2157 				targetAddressSpace == VMAddressSpace::Kernel()
2158 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2159 			map->Lock();
2160 
2161 			for (addr_t offset = 0; offset < newArea->Size();
2162 					offset += B_PAGE_SIZE) {
2163 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2164 					protection, newArea->MemoryType(), &reservation);
2165 			}
2166 
2167 			map->Unlock();
2168 			vm_page_unreserve_pages(&reservation);
2169 		} else {
2170 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2171 			size_t reservePages = map->MaxPagesNeededToMap(
2172 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2173 			vm_page_reservation reservation;
2174 			vm_page_reserve_pages(&reservation, reservePages,
2175 				targetAddressSpace == VMAddressSpace::Kernel()
2176 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2177 
2178 			// map in all pages from source
2179 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2180 					vm_page* page  = it.Next();) {
2181 				if (!page->busy) {
2182 					DEBUG_PAGE_ACCESS_START(page);
2183 					map_page(newArea, page,
2184 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2185 							- newArea->cache_offset),
2186 						protection, &reservation);
2187 					DEBUG_PAGE_ACCESS_END(page);
2188 				}
2189 			}
2190 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2191 			// ensuring that!
2192 
2193 			vm_page_unreserve_pages(&reservation);
2194 		}
2195 	}
2196 	if (status == B_OK)
2197 		newArea->cache_type = sourceArea->cache_type;
2198 
2199 	vm_area_put_locked_cache(cache);
2200 
2201 	if (status < B_OK)
2202 		return status;
2203 
2204 	return newArea->id;
2205 }
2206 
2207 
2208 /*!	Deletes the specified area of the given address space.
2209 
2210 	The address space must be write-locked.
2211 	The caller must ensure that the area does not have any wired ranges.
2212 
2213 	\param addressSpace The address space containing the area.
2214 	\param area The area to be deleted.
2215 	\param deletingAddressSpace \c true, if the address space is in the process
2216 		of being deleted.
2217 */
2218 static void
2219 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2220 	bool deletingAddressSpace)
2221 {
2222 	ASSERT(!area->IsWired());
2223 
2224 	VMAreaHash::Remove(area);
2225 
2226 	// At this point the area is removed from the global hash table, but
2227 	// still exists in the area list.
2228 
2229 	// Unmap the virtual address space the area occupied.
2230 	{
2231 		// We need to lock the complete cache chain.
2232 		VMCache* topCache = vm_area_get_locked_cache(area);
2233 		VMCacheChainLocker cacheChainLocker(topCache);
2234 		cacheChainLocker.LockAllSourceCaches();
2235 
2236 		// If the area's top cache is a temporary cache and the area is the only
2237 		// one referencing it (besides us currently holding a second reference),
2238 		// the unmapping code doesn't need to care about preserving the accessed
2239 		// and dirty flags of the top cache page mappings.
2240 		bool ignoreTopCachePageFlags
2241 			= topCache->temporary && topCache->RefCount() == 2;
2242 
2243 		area->address_space->TranslationMap()->UnmapArea(area,
2244 			deletingAddressSpace, ignoreTopCachePageFlags);
2245 	}
2246 
2247 	if (!area->cache->temporary)
2248 		area->cache->WriteModified();
2249 
2250 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2251 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2252 
2253 	arch_vm_unset_memory_type(area);
2254 	addressSpace->RemoveArea(area, allocationFlags);
2255 	addressSpace->Put();
2256 
2257 	area->cache->RemoveArea(area);
2258 	area->cache->ReleaseRef();
2259 
2260 	addressSpace->DeleteArea(area, allocationFlags);
2261 }
2262 
2263 
2264 status_t
2265 vm_delete_area(team_id team, area_id id, bool kernel)
2266 {
2267 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2268 		team, id));
2269 
2270 	// lock the address space and make sure the area isn't wired
2271 	AddressSpaceWriteLocker locker;
2272 	VMArea* area;
2273 	AreaCacheLocker cacheLocker;
2274 
2275 	do {
2276 		status_t status = locker.SetFromArea(team, id, area);
2277 		if (status != B_OK)
2278 			return status;
2279 
2280 		cacheLocker.SetTo(area);
2281 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2282 
2283 	cacheLocker.Unlock();
2284 
2285 	// SetFromArea will have returned an error if the area's owning team is not
2286 	// the same as the passed team, so we don't need to do those checks here.
2287 
2288 	delete_area(locker.AddressSpace(), area, false);
2289 	return B_OK;
2290 }
2291 
2292 
2293 /*!	Creates a new cache on top of given cache, moves all areas from
2294 	the old cache to the new one, and changes the protection of all affected
2295 	areas' pages to read-only. If requested, wired pages are moved up to the
2296 	new cache and copies are added to the old cache in their place.
2297 	Preconditions:
2298 	- The given cache must be locked.
2299 	- All of the cache's areas' address spaces must be read locked.
2300 	- Either the cache must not have any wired ranges or a page reservation for
2301 	  all wired pages must be provided, so they can be copied.
2302 
2303 	\param lowerCache The cache on top of which a new cache shall be created.
2304 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2305 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2306 		has wired page. The wired pages are copied in this case.
2307 */
2308 static status_t
2309 vm_copy_on_write_area(VMCache* lowerCache,
2310 	vm_page_reservation* wiredPagesReservation)
2311 {
2312 	VMCache* upperCache;
2313 
2314 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2315 
2316 	// We need to separate the cache from its areas. The cache goes one level
2317 	// deeper and we create a new cache inbetween.
2318 
2319 	// create an anonymous cache
2320 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2321 		lowerCache->GuardSize() / B_PAGE_SIZE,
2322 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2323 		VM_PRIORITY_USER);
2324 	if (status != B_OK)
2325 		return status;
2326 
2327 	upperCache->Lock();
2328 
2329 	upperCache->temporary = 1;
2330 	upperCache->virtual_base = lowerCache->virtual_base;
2331 	upperCache->virtual_end = lowerCache->virtual_end;
2332 
2333 	// transfer the lower cache areas to the upper cache
2334 	rw_lock_write_lock(&sAreaCacheLock);
2335 	upperCache->TransferAreas(lowerCache);
2336 	rw_lock_write_unlock(&sAreaCacheLock);
2337 
2338 	lowerCache->AddConsumer(upperCache);
2339 
2340 	// We now need to remap all pages from all of the cache's areas read-only,
2341 	// so that a copy will be created on next write access. If there are wired
2342 	// pages, we keep their protection, move them to the upper cache and create
2343 	// copies for the lower cache.
2344 	if (wiredPagesReservation != NULL) {
2345 		// We need to handle wired pages -- iterate through the cache's pages.
2346 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2347 				vm_page* page = it.Next();) {
2348 			if (page->WiredCount() > 0) {
2349 				// allocate a new page and copy the wired one
2350 				vm_page* copiedPage = vm_page_allocate_page(
2351 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2352 
2353 				vm_memcpy_physical_page(
2354 					copiedPage->physical_page_number * B_PAGE_SIZE,
2355 					page->physical_page_number * B_PAGE_SIZE);
2356 
2357 				// move the wired page to the upper cache (note: removing is OK
2358 				// with the SplayTree iterator) and insert the copy
2359 				upperCache->MovePage(page);
2360 				lowerCache->InsertPage(copiedPage,
2361 					page->cache_offset * B_PAGE_SIZE);
2362 
2363 				DEBUG_PAGE_ACCESS_END(copiedPage);
2364 			} else {
2365 				// Change the protection of this page in all areas.
2366 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2367 						tempArea = tempArea->cache_next) {
2368 					// The area must be readable in the same way it was
2369 					// previously writable.
2370 					uint32 protection = B_KERNEL_READ_AREA;
2371 					if ((tempArea->protection & B_READ_AREA) != 0)
2372 						protection |= B_READ_AREA;
2373 
2374 					VMTranslationMap* map
2375 						= tempArea->address_space->TranslationMap();
2376 					map->Lock();
2377 					map->ProtectPage(tempArea,
2378 						virtual_page_address(tempArea, page), protection);
2379 					map->Unlock();
2380 				}
2381 			}
2382 		}
2383 	} else {
2384 		ASSERT(lowerCache->WiredPagesCount() == 0);
2385 
2386 		// just change the protection of all areas
2387 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2388 				tempArea = tempArea->cache_next) {
2389 			// The area must be readable in the same way it was previously
2390 			// writable.
2391 			uint32 protection = B_KERNEL_READ_AREA;
2392 			if ((tempArea->protection & B_READ_AREA) != 0)
2393 				protection |= B_READ_AREA;
2394 
2395 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2396 			map->Lock();
2397 			map->ProtectArea(tempArea, protection);
2398 			map->Unlock();
2399 		}
2400 	}
2401 
2402 	vm_area_put_locked_cache(upperCache);
2403 
2404 	return B_OK;
2405 }
2406 
2407 
2408 area_id
2409 vm_copy_area(team_id team, const char* name, void** _address,
2410 	uint32 addressSpec, uint32 protection, area_id sourceID)
2411 {
2412 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2413 
2414 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2415 		// set the same protection for the kernel as for userland
2416 		protection |= B_KERNEL_READ_AREA;
2417 		if (writableCopy)
2418 			protection |= B_KERNEL_WRITE_AREA;
2419 	}
2420 
2421 	// Do the locking: target address space, all address spaces associated with
2422 	// the source cache, and the cache itself.
2423 	MultiAddressSpaceLocker locker;
2424 	VMAddressSpace* targetAddressSpace;
2425 	VMCache* cache;
2426 	VMArea* source;
2427 	AreaCacheLocker cacheLocker;
2428 	status_t status;
2429 	bool sharedArea;
2430 
2431 	page_num_t wiredPages = 0;
2432 	vm_page_reservation wiredPagesReservation;
2433 
2434 	bool restart;
2435 	do {
2436 		restart = false;
2437 
2438 		locker.Unset();
2439 		status = locker.AddTeam(team, true, &targetAddressSpace);
2440 		if (status == B_OK) {
2441 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2442 				&cache);
2443 		}
2444 		if (status != B_OK)
2445 			return status;
2446 
2447 		cacheLocker.SetTo(cache, true);	// already locked
2448 
2449 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2450 
2451 		page_num_t oldWiredPages = wiredPages;
2452 		wiredPages = 0;
2453 
2454 		// If the source area isn't shared, count the number of wired pages in
2455 		// the cache and reserve as many pages.
2456 		if (!sharedArea) {
2457 			wiredPages = cache->WiredPagesCount();
2458 
2459 			if (wiredPages > oldWiredPages) {
2460 				cacheLocker.Unlock();
2461 				locker.Unlock();
2462 
2463 				if (oldWiredPages > 0)
2464 					vm_page_unreserve_pages(&wiredPagesReservation);
2465 
2466 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2467 					VM_PRIORITY_USER);
2468 
2469 				restart = true;
2470 			}
2471 		} else if (oldWiredPages > 0)
2472 			vm_page_unreserve_pages(&wiredPagesReservation);
2473 	} while (restart);
2474 
2475 	// unreserve pages later
2476 	struct PagesUnreserver {
2477 		PagesUnreserver(vm_page_reservation* reservation)
2478 			:
2479 			fReservation(reservation)
2480 		{
2481 		}
2482 
2483 		~PagesUnreserver()
2484 		{
2485 			if (fReservation != NULL)
2486 				vm_page_unreserve_pages(fReservation);
2487 		}
2488 
2489 	private:
2490 		vm_page_reservation*	fReservation;
2491 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2492 
2493 	if (addressSpec == B_CLONE_ADDRESS) {
2494 		addressSpec = B_EXACT_ADDRESS;
2495 		*_address = (void*)source->Base();
2496 	}
2497 
2498 	// First, create a cache on top of the source area, respectively use the
2499 	// existing one, if this is a shared area.
2500 
2501 	VMArea* target;
2502 	virtual_address_restrictions addressRestrictions = {};
2503 	addressRestrictions.address = *_address;
2504 	addressRestrictions.address_specification = addressSpec;
2505 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2506 		name, source->Size(), source->wiring, protection,
2507 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2508 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2509 		&addressRestrictions, true, &target, _address);
2510 	if (status < B_OK)
2511 		return status;
2512 
2513 	if (sharedArea) {
2514 		// The new area uses the old area's cache, but map_backing_store()
2515 		// hasn't acquired a ref. So we have to do that now.
2516 		cache->AcquireRefLocked();
2517 	}
2518 
2519 	// If the source area is writable, we need to move it one layer up as well
2520 
2521 	if (!sharedArea) {
2522 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2523 			// TODO: do something more useful if this fails!
2524 			if (vm_copy_on_write_area(cache,
2525 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2526 				panic("vm_copy_on_write_area() failed!\n");
2527 			}
2528 		}
2529 	}
2530 
2531 	// we return the ID of the newly created area
2532 	return target->id;
2533 }
2534 
2535 
2536 status_t
2537 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2538 	bool kernel)
2539 {
2540 	fix_protection(&newProtection);
2541 
2542 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2543 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2544 
2545 	if (!arch_vm_supports_protection(newProtection))
2546 		return B_NOT_SUPPORTED;
2547 
2548 	bool becomesWritable
2549 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2550 
2551 	// lock address spaces and cache
2552 	MultiAddressSpaceLocker locker;
2553 	VMCache* cache;
2554 	VMArea* area;
2555 	status_t status;
2556 	AreaCacheLocker cacheLocker;
2557 	bool isWritable;
2558 
2559 	bool restart;
2560 	do {
2561 		restart = false;
2562 
2563 		locker.Unset();
2564 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2565 		if (status != B_OK)
2566 			return status;
2567 
2568 		cacheLocker.SetTo(cache, true);	// already locked
2569 
2570 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
2571 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2572 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2573 				" (%s)\n", team, newProtection, areaID, area->name);
2574 			return B_NOT_ALLOWED;
2575 		}
2576 
2577 		if (area->protection == newProtection)
2578 			return B_OK;
2579 
2580 		if (team != VMAddressSpace::KernelID()
2581 			&& area->address_space->ID() != team) {
2582 			// unless you're the kernel, you are only allowed to set
2583 			// the protection of your own areas
2584 			return B_NOT_ALLOWED;
2585 		}
2586 
2587 		isWritable
2588 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2589 
2590 		// Make sure the area (respectively, if we're going to call
2591 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2592 		// wired ranges.
2593 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2594 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2595 					otherArea = otherArea->cache_next) {
2596 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2597 					restart = true;
2598 					break;
2599 				}
2600 			}
2601 		} else {
2602 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2603 				restart = true;
2604 		}
2605 	} while (restart);
2606 
2607 	bool changePageProtection = true;
2608 	bool changeTopCachePagesOnly = false;
2609 
2610 	if (isWritable && !becomesWritable) {
2611 		// writable -> !writable
2612 
2613 		if (cache->source != NULL && cache->temporary) {
2614 			if (cache->CountWritableAreas(area) == 0) {
2615 				// Since this cache now lives from the pages in its source cache,
2616 				// we can change the cache's commitment to take only those pages
2617 				// into account that really are in this cache.
2618 
2619 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2620 					team == VMAddressSpace::KernelID()
2621 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2622 
2623 				// TODO: we may be able to join with our source cache, if
2624 				// count == 0
2625 			}
2626 		}
2627 
2628 		// If only the writability changes, we can just remap the pages of the
2629 		// top cache, since the pages of lower caches are mapped read-only
2630 		// anyway. That's advantageous only, if the number of pages in the cache
2631 		// is significantly smaller than the number of pages in the area,
2632 		// though.
2633 		if (newProtection
2634 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2635 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2636 			changeTopCachePagesOnly = true;
2637 		}
2638 	} else if (!isWritable && becomesWritable) {
2639 		// !writable -> writable
2640 
2641 		if (!cache->consumers.IsEmpty()) {
2642 			// There are consumers -- we have to insert a new cache. Fortunately
2643 			// vm_copy_on_write_area() does everything that's needed.
2644 			changePageProtection = false;
2645 			status = vm_copy_on_write_area(cache, NULL);
2646 		} else {
2647 			// No consumers, so we don't need to insert a new one.
2648 			if (cache->source != NULL && cache->temporary) {
2649 				// the cache's commitment must contain all possible pages
2650 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2651 					team == VMAddressSpace::KernelID()
2652 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2653 			}
2654 
2655 			if (status == B_OK && cache->source != NULL) {
2656 				// There's a source cache, hence we can't just change all pages'
2657 				// protection or we might allow writing into pages belonging to
2658 				// a lower cache.
2659 				changeTopCachePagesOnly = true;
2660 			}
2661 		}
2662 	} else {
2663 		// we don't have anything special to do in all other cases
2664 	}
2665 
2666 	if (status == B_OK) {
2667 		// remap existing pages in this cache
2668 		if (changePageProtection) {
2669 			VMTranslationMap* map = area->address_space->TranslationMap();
2670 			map->Lock();
2671 
2672 			if (changeTopCachePagesOnly) {
2673 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2674 				page_num_t lastPageOffset
2675 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2676 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2677 						vm_page* page = it.Next();) {
2678 					if (page->cache_offset >= firstPageOffset
2679 						&& page->cache_offset <= lastPageOffset) {
2680 						addr_t address = virtual_page_address(area, page);
2681 						map->ProtectPage(area, address, newProtection);
2682 					}
2683 				}
2684 			} else
2685 				map->ProtectArea(area, newProtection);
2686 
2687 			map->Unlock();
2688 		}
2689 
2690 		area->protection = newProtection;
2691 	}
2692 
2693 	return status;
2694 }
2695 
2696 
2697 status_t
2698 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2699 {
2700 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2701 	if (addressSpace == NULL)
2702 		return B_BAD_TEAM_ID;
2703 
2704 	VMTranslationMap* map = addressSpace->TranslationMap();
2705 
2706 	map->Lock();
2707 	uint32 dummyFlags;
2708 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2709 	map->Unlock();
2710 
2711 	addressSpace->Put();
2712 	return status;
2713 }
2714 
2715 
2716 /*!	The page's cache must be locked.
2717 */
2718 bool
2719 vm_test_map_modification(vm_page* page)
2720 {
2721 	if (page->modified)
2722 		return true;
2723 
2724 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2725 	vm_page_mapping* mapping;
2726 	while ((mapping = iterator.Next()) != NULL) {
2727 		VMArea* area = mapping->area;
2728 		VMTranslationMap* map = area->address_space->TranslationMap();
2729 
2730 		phys_addr_t physicalAddress;
2731 		uint32 flags;
2732 		map->Lock();
2733 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2734 		map->Unlock();
2735 
2736 		if ((flags & PAGE_MODIFIED) != 0)
2737 			return true;
2738 	}
2739 
2740 	return false;
2741 }
2742 
2743 
2744 /*!	The page's cache must be locked.
2745 */
2746 void
2747 vm_clear_map_flags(vm_page* page, uint32 flags)
2748 {
2749 	if ((flags & PAGE_ACCESSED) != 0)
2750 		page->accessed = false;
2751 	if ((flags & PAGE_MODIFIED) != 0)
2752 		page->modified = false;
2753 
2754 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2755 	vm_page_mapping* mapping;
2756 	while ((mapping = iterator.Next()) != NULL) {
2757 		VMArea* area = mapping->area;
2758 		VMTranslationMap* map = area->address_space->TranslationMap();
2759 
2760 		map->Lock();
2761 		map->ClearFlags(virtual_page_address(area, page), flags);
2762 		map->Unlock();
2763 	}
2764 }
2765 
2766 
2767 /*!	Removes all mappings from a page.
2768 	After you've called this function, the page is unmapped from memory and
2769 	the page's \c accessed and \c modified flags have been updated according
2770 	to the state of the mappings.
2771 	The page's cache must be locked.
2772 */
2773 void
2774 vm_remove_all_page_mappings(vm_page* page)
2775 {
2776 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2777 		VMArea* area = mapping->area;
2778 		VMTranslationMap* map = area->address_space->TranslationMap();
2779 		addr_t address = virtual_page_address(area, page);
2780 		map->UnmapPage(area, address, false);
2781 	}
2782 }
2783 
2784 
2785 int32
2786 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2787 {
2788 	int32 count = 0;
2789 
2790 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2791 	vm_page_mapping* mapping;
2792 	while ((mapping = iterator.Next()) != NULL) {
2793 		VMArea* area = mapping->area;
2794 		VMTranslationMap* map = area->address_space->TranslationMap();
2795 
2796 		bool modified;
2797 		if (map->ClearAccessedAndModified(area,
2798 				virtual_page_address(area, page), false, modified)) {
2799 			count++;
2800 		}
2801 
2802 		page->modified |= modified;
2803 	}
2804 
2805 
2806 	if (page->accessed) {
2807 		count++;
2808 		page->accessed = false;
2809 	}
2810 
2811 	return count;
2812 }
2813 
2814 
2815 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2816 	mappings.
2817 	The function iterates through the page mappings and removes them until
2818 	encountering one that has been accessed. From then on it will continue to
2819 	iterate, but only clear the accessed flag of the mapping. The page's
2820 	\c modified bit will be updated accordingly, the \c accessed bit will be
2821 	cleared.
2822 	\return The number of mapping accessed bits encountered, including the
2823 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2824 		of the page have been removed.
2825 */
2826 int32
2827 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2828 {
2829 	ASSERT(page->WiredCount() == 0);
2830 
2831 	if (page->accessed)
2832 		return vm_clear_page_mapping_accessed_flags(page);
2833 
2834 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2835 		VMArea* area = mapping->area;
2836 		VMTranslationMap* map = area->address_space->TranslationMap();
2837 		addr_t address = virtual_page_address(area, page);
2838 		bool modified = false;
2839 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2840 			page->accessed = true;
2841 			page->modified |= modified;
2842 			return vm_clear_page_mapping_accessed_flags(page);
2843 		}
2844 		page->modified |= modified;
2845 	}
2846 
2847 	return 0;
2848 }
2849 
2850 
2851 static int
2852 display_mem(int argc, char** argv)
2853 {
2854 	bool physical = false;
2855 	addr_t copyAddress;
2856 	int32 displayWidth;
2857 	int32 itemSize;
2858 	int32 num = -1;
2859 	addr_t address;
2860 	int i = 1, j;
2861 
2862 	if (argc > 1 && argv[1][0] == '-') {
2863 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2864 			physical = true;
2865 			i++;
2866 		} else
2867 			i = 99;
2868 	}
2869 
2870 	if (argc < i + 1 || argc > i + 2) {
2871 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2872 			"\tdl - 8 bytes\n"
2873 			"\tdw - 4 bytes\n"
2874 			"\tds - 2 bytes\n"
2875 			"\tdb - 1 byte\n"
2876 			"\tstring - a whole string\n"
2877 			"  -p or --physical only allows memory from a single page to be "
2878 			"displayed.\n");
2879 		return 0;
2880 	}
2881 
2882 	address = parse_expression(argv[i]);
2883 
2884 	if (argc > i + 1)
2885 		num = parse_expression(argv[i + 1]);
2886 
2887 	// build the format string
2888 	if (strcmp(argv[0], "db") == 0) {
2889 		itemSize = 1;
2890 		displayWidth = 16;
2891 	} else if (strcmp(argv[0], "ds") == 0) {
2892 		itemSize = 2;
2893 		displayWidth = 8;
2894 	} else if (strcmp(argv[0], "dw") == 0) {
2895 		itemSize = 4;
2896 		displayWidth = 4;
2897 	} else if (strcmp(argv[0], "dl") == 0) {
2898 		itemSize = 8;
2899 		displayWidth = 2;
2900 	} else if (strcmp(argv[0], "string") == 0) {
2901 		itemSize = 1;
2902 		displayWidth = -1;
2903 	} else {
2904 		kprintf("display_mem called in an invalid way!\n");
2905 		return 0;
2906 	}
2907 
2908 	if (num <= 0)
2909 		num = displayWidth;
2910 
2911 	void* physicalPageHandle = NULL;
2912 
2913 	if (physical) {
2914 		int32 offset = address & (B_PAGE_SIZE - 1);
2915 		if (num * itemSize + offset > B_PAGE_SIZE) {
2916 			num = (B_PAGE_SIZE - offset) / itemSize;
2917 			kprintf("NOTE: number of bytes has been cut to page size\n");
2918 		}
2919 
2920 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2921 
2922 		if (vm_get_physical_page_debug(address, &copyAddress,
2923 				&physicalPageHandle) != B_OK) {
2924 			kprintf("getting the hardware page failed.");
2925 			return 0;
2926 		}
2927 
2928 		address += offset;
2929 		copyAddress += offset;
2930 	} else
2931 		copyAddress = address;
2932 
2933 	if (!strcmp(argv[0], "string")) {
2934 		kprintf("%p \"", (char*)copyAddress);
2935 
2936 		// string mode
2937 		for (i = 0; true; i++) {
2938 			char c;
2939 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2940 					!= B_OK
2941 				|| c == '\0') {
2942 				break;
2943 			}
2944 
2945 			if (c == '\n')
2946 				kprintf("\\n");
2947 			else if (c == '\t')
2948 				kprintf("\\t");
2949 			else {
2950 				if (!isprint(c))
2951 					c = '.';
2952 
2953 				kprintf("%c", c);
2954 			}
2955 		}
2956 
2957 		kprintf("\"\n");
2958 	} else {
2959 		// number mode
2960 		for (i = 0; i < num; i++) {
2961 			uint64 value;
2962 
2963 			if ((i % displayWidth) == 0) {
2964 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2965 				if (i != 0)
2966 					kprintf("\n");
2967 
2968 				kprintf("[0x%lx]  ", address + i * itemSize);
2969 
2970 				for (j = 0; j < displayed; j++) {
2971 					char c;
2972 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2973 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2974 						displayed = j;
2975 						break;
2976 					}
2977 					if (!isprint(c))
2978 						c = '.';
2979 
2980 					kprintf("%c", c);
2981 				}
2982 				if (num > displayWidth) {
2983 					// make sure the spacing in the last line is correct
2984 					for (j = displayed; j < displayWidth * itemSize; j++)
2985 						kprintf(" ");
2986 				}
2987 				kprintf("  ");
2988 			}
2989 
2990 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2991 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2992 				kprintf("read fault");
2993 				break;
2994 			}
2995 
2996 			switch (itemSize) {
2997 				case 1:
2998 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2999 					break;
3000 				case 2:
3001 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3002 					break;
3003 				case 4:
3004 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3005 					break;
3006 				case 8:
3007 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3008 					break;
3009 			}
3010 		}
3011 
3012 		kprintf("\n");
3013 	}
3014 
3015 	if (physical) {
3016 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3017 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3018 	}
3019 	return 0;
3020 }
3021 
3022 
3023 static void
3024 dump_cache_tree_recursively(VMCache* cache, int level,
3025 	VMCache* highlightCache)
3026 {
3027 	// print this cache
3028 	for (int i = 0; i < level; i++)
3029 		kprintf("  ");
3030 	if (cache == highlightCache)
3031 		kprintf("%p <--\n", cache);
3032 	else
3033 		kprintf("%p\n", cache);
3034 
3035 	// recursively print its consumers
3036 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3037 			VMCache* consumer = it.Next();) {
3038 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3039 	}
3040 }
3041 
3042 
3043 static int
3044 dump_cache_tree(int argc, char** argv)
3045 {
3046 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3047 		kprintf("usage: %s <address>\n", argv[0]);
3048 		return 0;
3049 	}
3050 
3051 	addr_t address = parse_expression(argv[1]);
3052 	if (address == 0)
3053 		return 0;
3054 
3055 	VMCache* cache = (VMCache*)address;
3056 	VMCache* root = cache;
3057 
3058 	// find the root cache (the transitive source)
3059 	while (root->source != NULL)
3060 		root = root->source;
3061 
3062 	dump_cache_tree_recursively(root, 0, cache);
3063 
3064 	return 0;
3065 }
3066 
3067 
3068 const char*
3069 vm_cache_type_to_string(int32 type)
3070 {
3071 	switch (type) {
3072 		case CACHE_TYPE_RAM:
3073 			return "RAM";
3074 		case CACHE_TYPE_DEVICE:
3075 			return "device";
3076 		case CACHE_TYPE_VNODE:
3077 			return "vnode";
3078 		case CACHE_TYPE_NULL:
3079 			return "null";
3080 
3081 		default:
3082 			return "unknown";
3083 	}
3084 }
3085 
3086 
3087 #if DEBUG_CACHE_LIST
3088 
3089 static void
3090 update_cache_info_recursively(VMCache* cache, cache_info& info)
3091 {
3092 	info.page_count += cache->page_count;
3093 	if (cache->type == CACHE_TYPE_RAM)
3094 		info.committed += cache->committed_size;
3095 
3096 	// recurse
3097 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3098 			VMCache* consumer = it.Next();) {
3099 		update_cache_info_recursively(consumer, info);
3100 	}
3101 }
3102 
3103 
3104 static int
3105 cache_info_compare_page_count(const void* _a, const void* _b)
3106 {
3107 	const cache_info* a = (const cache_info*)_a;
3108 	const cache_info* b = (const cache_info*)_b;
3109 	if (a->page_count == b->page_count)
3110 		return 0;
3111 	return a->page_count < b->page_count ? 1 : -1;
3112 }
3113 
3114 
3115 static int
3116 cache_info_compare_committed(const void* _a, const void* _b)
3117 {
3118 	const cache_info* a = (const cache_info*)_a;
3119 	const cache_info* b = (const cache_info*)_b;
3120 	if (a->committed == b->committed)
3121 		return 0;
3122 	return a->committed < b->committed ? 1 : -1;
3123 }
3124 
3125 
3126 static void
3127 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3128 {
3129 	for (int i = 0; i < level; i++)
3130 		kprintf("  ");
3131 
3132 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3133 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3134 		cache->virtual_base, cache->virtual_end, cache->page_count);
3135 
3136 	if (level == 0)
3137 		kprintf("/%lu", info.page_count);
3138 
3139 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3140 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3141 
3142 		if (level == 0)
3143 			kprintf("/%lu", info.committed);
3144 	}
3145 
3146 	// areas
3147 	if (cache->areas != NULL) {
3148 		VMArea* area = cache->areas;
3149 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3150 			area->name, area->address_space->ID());
3151 
3152 		while (area->cache_next != NULL) {
3153 			area = area->cache_next;
3154 			kprintf(", %" B_PRId32, area->id);
3155 		}
3156 	}
3157 
3158 	kputs("\n");
3159 
3160 	// recurse
3161 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3162 			VMCache* consumer = it.Next();) {
3163 		dump_caches_recursively(consumer, info, level + 1);
3164 	}
3165 }
3166 
3167 
3168 static int
3169 dump_caches(int argc, char** argv)
3170 {
3171 	if (sCacheInfoTable == NULL) {
3172 		kprintf("No cache info table!\n");
3173 		return 0;
3174 	}
3175 
3176 	bool sortByPageCount = true;
3177 
3178 	for (int32 i = 1; i < argc; i++) {
3179 		if (strcmp(argv[i], "-c") == 0) {
3180 			sortByPageCount = false;
3181 		} else {
3182 			print_debugger_command_usage(argv[0]);
3183 			return 0;
3184 		}
3185 	}
3186 
3187 	uint32 totalCount = 0;
3188 	uint32 rootCount = 0;
3189 	off_t totalCommitted = 0;
3190 	page_num_t totalPages = 0;
3191 
3192 	VMCache* cache = gDebugCacheList;
3193 	while (cache) {
3194 		totalCount++;
3195 		if (cache->source == NULL) {
3196 			cache_info stackInfo;
3197 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3198 				? sCacheInfoTable[rootCount] : stackInfo;
3199 			rootCount++;
3200 			info.cache = cache;
3201 			info.page_count = 0;
3202 			info.committed = 0;
3203 			update_cache_info_recursively(cache, info);
3204 			totalCommitted += info.committed;
3205 			totalPages += info.page_count;
3206 		}
3207 
3208 		cache = cache->debug_next;
3209 	}
3210 
3211 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3212 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3213 			sortByPageCount
3214 				? &cache_info_compare_page_count
3215 				: &cache_info_compare_committed);
3216 	}
3217 
3218 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3219 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3220 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3221 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3222 			"page count" : "committed size");
3223 
3224 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3225 		for (uint32 i = 0; i < rootCount; i++) {
3226 			cache_info& info = sCacheInfoTable[i];
3227 			dump_caches_recursively(info.cache, info, 0);
3228 		}
3229 	} else
3230 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3231 
3232 	return 0;
3233 }
3234 
3235 #endif	// DEBUG_CACHE_LIST
3236 
3237 
3238 static int
3239 dump_cache(int argc, char** argv)
3240 {
3241 	VMCache* cache;
3242 	bool showPages = false;
3243 	int i = 1;
3244 
3245 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3246 		kprintf("usage: %s [-ps] <address>\n"
3247 			"  if -p is specified, all pages are shown, if -s is used\n"
3248 			"  only the cache info is shown respectively.\n", argv[0]);
3249 		return 0;
3250 	}
3251 	while (argv[i][0] == '-') {
3252 		char* arg = argv[i] + 1;
3253 		while (arg[0]) {
3254 			if (arg[0] == 'p')
3255 				showPages = true;
3256 			arg++;
3257 		}
3258 		i++;
3259 	}
3260 	if (argv[i] == NULL) {
3261 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3262 		return 0;
3263 	}
3264 
3265 	addr_t address = parse_expression(argv[i]);
3266 	if (address == 0)
3267 		return 0;
3268 
3269 	cache = (VMCache*)address;
3270 
3271 	cache->Dump(showPages);
3272 
3273 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3274 
3275 	return 0;
3276 }
3277 
3278 
3279 static void
3280 dump_area_struct(VMArea* area, bool mappings)
3281 {
3282 	kprintf("AREA: %p\n", area);
3283 	kprintf("name:\t\t'%s'\n", area->name);
3284 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3285 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3286 	kprintf("base:\t\t0x%lx\n", area->Base());
3287 	kprintf("size:\t\t0x%lx\n", area->Size());
3288 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3289 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3290 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3291 	kprintf("cache:\t\t%p\n", area->cache);
3292 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3293 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3294 	kprintf("cache_next:\t%p\n", area->cache_next);
3295 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3296 
3297 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3298 	if (mappings) {
3299 		kprintf("page mappings:\n");
3300 		while (iterator.HasNext()) {
3301 			vm_page_mapping* mapping = iterator.Next();
3302 			kprintf("  %p", mapping->page);
3303 		}
3304 		kprintf("\n");
3305 	} else {
3306 		uint32 count = 0;
3307 		while (iterator.Next() != NULL) {
3308 			count++;
3309 		}
3310 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3311 	}
3312 }
3313 
3314 
3315 static int
3316 dump_area(int argc, char** argv)
3317 {
3318 	bool mappings = false;
3319 	bool found = false;
3320 	int32 index = 1;
3321 	VMArea* area;
3322 	addr_t num;
3323 
3324 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3325 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3326 			"All areas matching either id/address/name are listed. You can\n"
3327 			"force to check only a specific item by prefixing the specifier\n"
3328 			"with the id/contains/address/name keywords.\n"
3329 			"-m shows the area's mappings as well.\n");
3330 		return 0;
3331 	}
3332 
3333 	if (!strcmp(argv[1], "-m")) {
3334 		mappings = true;
3335 		index++;
3336 	}
3337 
3338 	int32 mode = 0xf;
3339 	if (!strcmp(argv[index], "id"))
3340 		mode = 1;
3341 	else if (!strcmp(argv[index], "contains"))
3342 		mode = 2;
3343 	else if (!strcmp(argv[index], "name"))
3344 		mode = 4;
3345 	else if (!strcmp(argv[index], "address"))
3346 		mode = 0;
3347 	if (mode != 0xf)
3348 		index++;
3349 
3350 	if (index >= argc) {
3351 		kprintf("No area specifier given.\n");
3352 		return 0;
3353 	}
3354 
3355 	num = parse_expression(argv[index]);
3356 
3357 	if (mode == 0) {
3358 		dump_area_struct((struct VMArea*)num, mappings);
3359 	} else {
3360 		// walk through the area list, looking for the arguments as a name
3361 
3362 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3363 		while ((area = it.Next()) != NULL) {
3364 			if (((mode & 4) != 0 && area->name != NULL
3365 					&& !strcmp(argv[index], area->name))
3366 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3367 					|| (((mode & 2) != 0 && area->Base() <= num
3368 						&& area->Base() + area->Size() > num))))) {
3369 				dump_area_struct(area, mappings);
3370 				found = true;
3371 			}
3372 		}
3373 
3374 		if (!found)
3375 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3376 	}
3377 
3378 	return 0;
3379 }
3380 
3381 
3382 static int
3383 dump_area_list(int argc, char** argv)
3384 {
3385 	VMArea* area;
3386 	const char* name = NULL;
3387 	int32 id = 0;
3388 
3389 	if (argc > 1) {
3390 		id = parse_expression(argv[1]);
3391 		if (id == 0)
3392 			name = argv[1];
3393 	}
3394 
3395 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3396 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3397 		B_PRINTF_POINTER_WIDTH, "size");
3398 
3399 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3400 	while ((area = it.Next()) != NULL) {
3401 		if ((id != 0 && area->address_space->ID() != id)
3402 			|| (name != NULL && strstr(area->name, name) == NULL))
3403 			continue;
3404 
3405 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3406 			area->id, (void*)area->Base(), (void*)area->Size(),
3407 			area->protection, area->wiring, area->name);
3408 	}
3409 	return 0;
3410 }
3411 
3412 
3413 static int
3414 dump_available_memory(int argc, char** argv)
3415 {
3416 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3417 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3418 	return 0;
3419 }
3420 
3421 
3422 static int
3423 dump_mapping_info(int argc, char** argv)
3424 {
3425 	bool reverseLookup = false;
3426 	bool pageLookup = false;
3427 
3428 	int argi = 1;
3429 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3430 		const char* arg = argv[argi];
3431 		if (strcmp(arg, "-r") == 0) {
3432 			reverseLookup = true;
3433 		} else if (strcmp(arg, "-p") == 0) {
3434 			reverseLookup = true;
3435 			pageLookup = true;
3436 		} else {
3437 			print_debugger_command_usage(argv[0]);
3438 			return 0;
3439 		}
3440 	}
3441 
3442 	// We need at least one argument, the address. Optionally a thread ID can be
3443 	// specified.
3444 	if (argi >= argc || argi + 2 < argc) {
3445 		print_debugger_command_usage(argv[0]);
3446 		return 0;
3447 	}
3448 
3449 	uint64 addressValue;
3450 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3451 		return 0;
3452 
3453 	Team* team = NULL;
3454 	if (argi < argc) {
3455 		uint64 threadID;
3456 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3457 			return 0;
3458 
3459 		Thread* thread = Thread::GetDebug(threadID);
3460 		if (thread == NULL) {
3461 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3462 			return 0;
3463 		}
3464 
3465 		team = thread->team;
3466 	}
3467 
3468 	if (reverseLookup) {
3469 		phys_addr_t physicalAddress;
3470 		if (pageLookup) {
3471 			vm_page* page = (vm_page*)(addr_t)addressValue;
3472 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3473 		} else {
3474 			physicalAddress = (phys_addr_t)addressValue;
3475 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3476 		}
3477 
3478 		kprintf("    Team     Virtual Address      Area\n");
3479 		kprintf("--------------------------------------\n");
3480 
3481 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3482 			Callback()
3483 				:
3484 				fAddressSpace(NULL)
3485 			{
3486 			}
3487 
3488 			void SetAddressSpace(VMAddressSpace* addressSpace)
3489 			{
3490 				fAddressSpace = addressSpace;
3491 			}
3492 
3493 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3494 			{
3495 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3496 					virtualAddress);
3497 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3498 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3499 				else
3500 					kprintf("\n");
3501 				return false;
3502 			}
3503 
3504 		private:
3505 			VMAddressSpace*	fAddressSpace;
3506 		} callback;
3507 
3508 		if (team != NULL) {
3509 			// team specified -- get its address space
3510 			VMAddressSpace* addressSpace = team->address_space;
3511 			if (addressSpace == NULL) {
3512 				kprintf("Failed to get address space!\n");
3513 				return 0;
3514 			}
3515 
3516 			callback.SetAddressSpace(addressSpace);
3517 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3518 				physicalAddress, callback);
3519 		} else {
3520 			// no team specified -- iterate through all address spaces
3521 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3522 				addressSpace != NULL;
3523 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3524 				callback.SetAddressSpace(addressSpace);
3525 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3526 					physicalAddress, callback);
3527 			}
3528 		}
3529 	} else {
3530 		// get the address space
3531 		addr_t virtualAddress = (addr_t)addressValue;
3532 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3533 		VMAddressSpace* addressSpace;
3534 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3535 			addressSpace = VMAddressSpace::Kernel();
3536 		} else if (team != NULL) {
3537 			addressSpace = team->address_space;
3538 		} else {
3539 			Thread* thread = debug_get_debugged_thread();
3540 			if (thread == NULL || thread->team == NULL) {
3541 				kprintf("Failed to get team!\n");
3542 				return 0;
3543 			}
3544 
3545 			addressSpace = thread->team->address_space;
3546 		}
3547 
3548 		if (addressSpace == NULL) {
3549 			kprintf("Failed to get address space!\n");
3550 			return 0;
3551 		}
3552 
3553 		// let the translation map implementation do the job
3554 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3555 	}
3556 
3557 	return 0;
3558 }
3559 
3560 
3561 /*!	Deletes all areas and reserved regions in the given address space.
3562 
3563 	The caller must ensure that none of the areas has any wired ranges.
3564 
3565 	\param addressSpace The address space.
3566 	\param deletingAddressSpace \c true, if the address space is in the process
3567 		of being deleted.
3568 */
3569 void
3570 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3571 {
3572 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3573 		addressSpace->ID()));
3574 
3575 	addressSpace->WriteLock();
3576 
3577 	// remove all reserved areas in this address space
3578 	addressSpace->UnreserveAllAddressRanges(0);
3579 
3580 	// delete all the areas in this address space
3581 	while (VMArea* area = addressSpace->FirstArea()) {
3582 		ASSERT(!area->IsWired());
3583 		delete_area(addressSpace, area, deletingAddressSpace);
3584 	}
3585 
3586 	addressSpace->WriteUnlock();
3587 }
3588 
3589 
3590 static area_id
3591 vm_area_for(addr_t address, bool kernel)
3592 {
3593 	team_id team;
3594 	if (IS_USER_ADDRESS(address)) {
3595 		// we try the user team address space, if any
3596 		team = VMAddressSpace::CurrentID();
3597 		if (team < 0)
3598 			return team;
3599 	} else
3600 		team = VMAddressSpace::KernelID();
3601 
3602 	AddressSpaceReadLocker locker(team);
3603 	if (!locker.IsLocked())
3604 		return B_BAD_TEAM_ID;
3605 
3606 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3607 	if (area != NULL) {
3608 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3609 			return B_ERROR;
3610 
3611 		return area->id;
3612 	}
3613 
3614 	return B_ERROR;
3615 }
3616 
3617 
3618 /*!	Frees physical pages that were used during the boot process.
3619 	\a end is inclusive.
3620 */
3621 static void
3622 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3623 {
3624 	// free all physical pages in the specified range
3625 
3626 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3627 		phys_addr_t physicalAddress;
3628 		uint32 flags;
3629 
3630 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3631 			&& (flags & PAGE_PRESENT) != 0) {
3632 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3633 			if (page != NULL && page->State() != PAGE_STATE_FREE
3634 					 && page->State() != PAGE_STATE_CLEAR
3635 					 && page->State() != PAGE_STATE_UNUSED) {
3636 				DEBUG_PAGE_ACCESS_START(page);
3637 				vm_page_set_state(page, PAGE_STATE_FREE);
3638 			}
3639 		}
3640 	}
3641 
3642 	// unmap the memory
3643 	map->Unmap(start, end);
3644 }
3645 
3646 
3647 void
3648 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3649 {
3650 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3651 	addr_t end = start + (size - 1);
3652 	addr_t lastEnd = start;
3653 
3654 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3655 		(void*)start, (void*)end));
3656 
3657 	// The areas are sorted in virtual address space order, so
3658 	// we just have to find the holes between them that fall
3659 	// into the area we should dispose
3660 
3661 	map->Lock();
3662 
3663 	for (VMAddressSpace::AreaIterator it
3664 				= VMAddressSpace::Kernel()->GetAreaIterator();
3665 			VMArea* area = it.Next();) {
3666 		addr_t areaStart = area->Base();
3667 		addr_t areaEnd = areaStart + (area->Size() - 1);
3668 
3669 		if (areaEnd < start)
3670 			continue;
3671 
3672 		if (areaStart > end) {
3673 			// we are done, the area is already beyond of what we have to free
3674 			break;
3675 		}
3676 
3677 		if (areaStart > lastEnd) {
3678 			// this is something we can free
3679 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3680 				(void*)areaStart));
3681 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3682 		}
3683 
3684 		if (areaEnd >= end) {
3685 			lastEnd = areaEnd;
3686 				// no +1 to prevent potential overflow
3687 			break;
3688 		}
3689 
3690 		lastEnd = areaEnd + 1;
3691 	}
3692 
3693 	if (lastEnd < end) {
3694 		// we can also get rid of some space at the end of the area
3695 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3696 			(void*)end));
3697 		unmap_and_free_physical_pages(map, lastEnd, end);
3698 	}
3699 
3700 	map->Unlock();
3701 }
3702 
3703 
3704 static void
3705 create_preloaded_image_areas(struct preloaded_image* _image)
3706 {
3707 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3708 	char name[B_OS_NAME_LENGTH];
3709 	void* address;
3710 	int32 length;
3711 
3712 	// use file name to create a good area name
3713 	char* fileName = strrchr(image->name, '/');
3714 	if (fileName == NULL)
3715 		fileName = image->name;
3716 	else
3717 		fileName++;
3718 
3719 	length = strlen(fileName);
3720 	// make sure there is enough space for the suffix
3721 	if (length > 25)
3722 		length = 25;
3723 
3724 	memcpy(name, fileName, length);
3725 	strcpy(name + length, "_text");
3726 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3727 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3728 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3729 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3730 		// this will later be remapped read-only/executable by the
3731 		// ELF initialization code
3732 
3733 	strcpy(name + length, "_data");
3734 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3735 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3736 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3737 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3738 }
3739 
3740 
3741 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3742 	Any boot loader resources contained in that arguments must not be accessed
3743 	anymore past this point.
3744 */
3745 void
3746 vm_free_kernel_args(kernel_args* args)
3747 {
3748 	uint32 i;
3749 
3750 	TRACE(("vm_free_kernel_args()\n"));
3751 
3752 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3753 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3754 		if (area >= B_OK)
3755 			delete_area(area);
3756 	}
3757 }
3758 
3759 
3760 static void
3761 allocate_kernel_args(kernel_args* args)
3762 {
3763 	TRACE(("allocate_kernel_args()\n"));
3764 
3765 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3766 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3767 
3768 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3769 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3770 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3771 	}
3772 }
3773 
3774 
3775 static void
3776 unreserve_boot_loader_ranges(kernel_args* args)
3777 {
3778 	TRACE(("unreserve_boot_loader_ranges()\n"));
3779 
3780 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3781 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3782 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3783 			args->virtual_allocated_range[i].size);
3784 	}
3785 }
3786 
3787 
3788 static void
3789 reserve_boot_loader_ranges(kernel_args* args)
3790 {
3791 	TRACE(("reserve_boot_loader_ranges()\n"));
3792 
3793 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3794 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3795 
3796 		// If the address is no kernel address, we just skip it. The
3797 		// architecture specific code has to deal with it.
3798 		if (!IS_KERNEL_ADDRESS(address)) {
3799 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3800 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3801 			continue;
3802 		}
3803 
3804 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3805 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3806 		if (status < B_OK)
3807 			panic("could not reserve boot loader ranges\n");
3808 	}
3809 }
3810 
3811 
3812 static addr_t
3813 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3814 {
3815 	size = PAGE_ALIGN(size);
3816 
3817 	// find a slot in the virtual allocation addr range
3818 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3819 		// check to see if the space between this one and the last is big enough
3820 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3821 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3822 			+ args->virtual_allocated_range[i - 1].size;
3823 
3824 		addr_t base = alignment > 0
3825 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3826 
3827 		if (base >= KERNEL_BASE && base < rangeStart
3828 				&& rangeStart - base >= size) {
3829 			args->virtual_allocated_range[i - 1].size
3830 				+= base + size - previousRangeEnd;
3831 			return base;
3832 		}
3833 	}
3834 
3835 	// we hadn't found one between allocation ranges. this is ok.
3836 	// see if there's a gap after the last one
3837 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3838 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3839 		+ args->virtual_allocated_range[lastEntryIndex].size;
3840 	addr_t base = alignment > 0
3841 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3842 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3843 		args->virtual_allocated_range[lastEntryIndex].size
3844 			+= base + size - lastRangeEnd;
3845 		return base;
3846 	}
3847 
3848 	// see if there's a gap before the first one
3849 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3850 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3851 		base = rangeStart - size;
3852 		if (alignment > 0)
3853 			base = ROUNDDOWN(base, alignment);
3854 
3855 		if (base >= KERNEL_BASE) {
3856 			args->virtual_allocated_range[0].start = base;
3857 			args->virtual_allocated_range[0].size += rangeStart - base;
3858 			return base;
3859 		}
3860 	}
3861 
3862 	return 0;
3863 }
3864 
3865 
3866 static bool
3867 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3868 {
3869 	// TODO: horrible brute-force method of determining if the page can be
3870 	// allocated
3871 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3872 		if (address >= args->physical_memory_range[i].start
3873 			&& address < args->physical_memory_range[i].start
3874 				+ args->physical_memory_range[i].size)
3875 			return true;
3876 	}
3877 	return false;
3878 }
3879 
3880 
3881 page_num_t
3882 vm_allocate_early_physical_page(kernel_args* args)
3883 {
3884 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3885 		phys_addr_t nextPage;
3886 
3887 		nextPage = args->physical_allocated_range[i].start
3888 			+ args->physical_allocated_range[i].size;
3889 		// see if the page after the next allocated paddr run can be allocated
3890 		if (i + 1 < args->num_physical_allocated_ranges
3891 			&& args->physical_allocated_range[i + 1].size != 0) {
3892 			// see if the next page will collide with the next allocated range
3893 			if (nextPage >= args->physical_allocated_range[i+1].start)
3894 				continue;
3895 		}
3896 		// see if the next physical page fits in the memory block
3897 		if (is_page_in_physical_memory_range(args, nextPage)) {
3898 			// we got one!
3899 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3900 			return nextPage / B_PAGE_SIZE;
3901 		}
3902 	}
3903 
3904 	// Expanding upwards didn't work, try going downwards.
3905 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3906 		phys_addr_t nextPage;
3907 
3908 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3909 		// see if the page after the prev allocated paddr run can be allocated
3910 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3911 			// see if the next page will collide with the next allocated range
3912 			if (nextPage < args->physical_allocated_range[i-1].start
3913 				+ args->physical_allocated_range[i-1].size)
3914 				continue;
3915 		}
3916 		// see if the next physical page fits in the memory block
3917 		if (is_page_in_physical_memory_range(args, nextPage)) {
3918 			// we got one!
3919 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3920 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3921 			return nextPage / B_PAGE_SIZE;
3922 		}
3923 	}
3924 
3925 	return 0;
3926 		// could not allocate a block
3927 }
3928 
3929 
3930 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3931 	allocate some pages before the VM is completely up.
3932 */
3933 addr_t
3934 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3935 	uint32 attributes, addr_t alignment)
3936 {
3937 	if (physicalSize > virtualSize)
3938 		physicalSize = virtualSize;
3939 
3940 	// find the vaddr to allocate at
3941 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3942 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3943 	if (virtualBase == 0) {
3944 		panic("vm_allocate_early: could not allocate virtual address\n");
3945 		return 0;
3946 	}
3947 
3948 	// map the pages
3949 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3950 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3951 		if (physicalAddress == 0)
3952 			panic("error allocating early page!\n");
3953 
3954 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3955 
3956 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3957 			physicalAddress * B_PAGE_SIZE, attributes,
3958 			&vm_allocate_early_physical_page);
3959 	}
3960 
3961 	return virtualBase;
3962 }
3963 
3964 
3965 /*!	The main entrance point to initialize the VM. */
3966 status_t
3967 vm_init(kernel_args* args)
3968 {
3969 	struct preloaded_image* image;
3970 	void* address;
3971 	status_t err = 0;
3972 	uint32 i;
3973 
3974 	TRACE(("vm_init: entry\n"));
3975 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3976 	err = arch_vm_init(args);
3977 
3978 	// initialize some globals
3979 	vm_page_init_num_pages(args);
3980 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3981 
3982 	slab_init(args);
3983 
3984 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3985 	off_t heapSize = INITIAL_HEAP_SIZE;
3986 	// try to accomodate low memory systems
3987 	while (heapSize > sAvailableMemory / 8)
3988 		heapSize /= 2;
3989 	if (heapSize < 1024 * 1024)
3990 		panic("vm_init: go buy some RAM please.");
3991 
3992 	// map in the new heap and initialize it
3993 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3994 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3995 	TRACE(("heap at 0x%lx\n", heapBase));
3996 	heap_init(heapBase, heapSize);
3997 #endif
3998 
3999 	// initialize the free page list and physical page mapper
4000 	vm_page_init(args);
4001 
4002 	// initialize the cache allocators
4003 	vm_cache_init(args);
4004 
4005 	{
4006 		status_t error = VMAreaHash::Init();
4007 		if (error != B_OK)
4008 			panic("vm_init: error initializing area hash table\n");
4009 	}
4010 
4011 	VMAddressSpace::Init();
4012 	reserve_boot_loader_ranges(args);
4013 
4014 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4015 	heap_init_post_area();
4016 #endif
4017 
4018 	// Do any further initialization that the architecture dependant layers may
4019 	// need now
4020 	arch_vm_translation_map_init_post_area(args);
4021 	arch_vm_init_post_area(args);
4022 	vm_page_init_post_area(args);
4023 	slab_init_post_area();
4024 
4025 	// allocate areas to represent stuff that already exists
4026 
4027 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4028 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4029 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4030 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4031 #endif
4032 
4033 	allocate_kernel_args(args);
4034 
4035 	create_preloaded_image_areas(args->kernel_image);
4036 
4037 	// allocate areas for preloaded images
4038 	for (image = args->preloaded_images; image != NULL; image = image->next)
4039 		create_preloaded_image_areas(image);
4040 
4041 	// allocate kernel stacks
4042 	for (i = 0; i < args->num_cpus; i++) {
4043 		char name[64];
4044 
4045 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4046 		address = (void*)args->cpu_kstack[i].start;
4047 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4048 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4049 	}
4050 
4051 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4052 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4053 
4054 #if PARANOID_KERNEL_MALLOC
4055 	vm_block_address_range("uninitialized heap memory",
4056 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4057 #endif
4058 #if PARANOID_KERNEL_FREE
4059 	vm_block_address_range("freed heap memory",
4060 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4061 #endif
4062 
4063 	// create the object cache for the page mappings
4064 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4065 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4066 		NULL, NULL);
4067 	if (gPageMappingsObjectCache == NULL)
4068 		panic("failed to create page mappings object cache");
4069 
4070 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4071 
4072 #if DEBUG_CACHE_LIST
4073 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4074 		virtual_address_restrictions virtualRestrictions = {};
4075 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4076 		physical_address_restrictions physicalRestrictions = {};
4077 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4078 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4079 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4080 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4081 			&physicalRestrictions, (void**)&sCacheInfoTable);
4082 	}
4083 #endif	// DEBUG_CACHE_LIST
4084 
4085 	// add some debugger commands
4086 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4087 	add_debugger_command("area", &dump_area,
4088 		"Dump info about a particular area");
4089 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4090 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4091 #if DEBUG_CACHE_LIST
4092 	if (sCacheInfoTable != NULL) {
4093 		add_debugger_command_etc("caches", &dump_caches,
4094 			"List all VMCache trees",
4095 			"[ \"-c\" ]\n"
4096 			"All cache trees are listed sorted in decreasing order by number "
4097 				"of\n"
4098 			"used pages or, if \"-c\" is specified, by size of committed "
4099 				"memory.\n",
4100 			0);
4101 	}
4102 #endif
4103 	add_debugger_command("avail", &dump_available_memory,
4104 		"Dump available memory");
4105 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4106 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4107 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4108 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4109 	add_debugger_command("string", &display_mem, "dump strings");
4110 
4111 	add_debugger_command_etc("mapping", &dump_mapping_info,
4112 		"Print address mapping information",
4113 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4114 		"Prints low-level page mapping information for a given address. If\n"
4115 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4116 		"address that is looked up in the translation map of the current\n"
4117 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4118 		"\"-r\" is specified, <address> is a physical address that is\n"
4119 		"searched in the translation map of all teams, respectively the team\n"
4120 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4121 		"<address> is the address of a vm_page structure. The behavior is\n"
4122 		"equivalent to specifying \"-r\" with the physical address of that\n"
4123 		"page.\n",
4124 		0);
4125 
4126 	TRACE(("vm_init: exit\n"));
4127 
4128 	vm_cache_init_post_heap();
4129 
4130 	return err;
4131 }
4132 
4133 
4134 status_t
4135 vm_init_post_sem(kernel_args* args)
4136 {
4137 	// This frees all unused boot loader resources and makes its space available
4138 	// again
4139 	arch_vm_init_end(args);
4140 	unreserve_boot_loader_ranges(args);
4141 
4142 	// fill in all of the semaphores that were not allocated before
4143 	// since we're still single threaded and only the kernel address space
4144 	// exists, it isn't that hard to find all of the ones we need to create
4145 
4146 	arch_vm_translation_map_init_post_sem(args);
4147 
4148 	slab_init_post_sem();
4149 
4150 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4151 	heap_init_post_sem();
4152 #endif
4153 
4154 	return B_OK;
4155 }
4156 
4157 
4158 status_t
4159 vm_init_post_thread(kernel_args* args)
4160 {
4161 	vm_page_init_post_thread(args);
4162 	slab_init_post_thread();
4163 	return heap_init_post_thread();
4164 }
4165 
4166 
4167 status_t
4168 vm_init_post_modules(kernel_args* args)
4169 {
4170 	return arch_vm_init_post_modules(args);
4171 }
4172 
4173 
4174 void
4175 permit_page_faults(void)
4176 {
4177 	Thread* thread = thread_get_current_thread();
4178 	if (thread != NULL)
4179 		atomic_add(&thread->page_faults_allowed, 1);
4180 }
4181 
4182 
4183 void
4184 forbid_page_faults(void)
4185 {
4186 	Thread* thread = thread_get_current_thread();
4187 	if (thread != NULL)
4188 		atomic_add(&thread->page_faults_allowed, -1);
4189 }
4190 
4191 
4192 status_t
4193 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4194 	bool isUser, addr_t* newIP)
4195 {
4196 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4197 		faultAddress));
4198 
4199 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4200 
4201 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4202 	VMAddressSpace* addressSpace = NULL;
4203 
4204 	status_t status = B_OK;
4205 	*newIP = 0;
4206 	atomic_add((int32*)&sPageFaults, 1);
4207 
4208 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4209 		addressSpace = VMAddressSpace::GetKernel();
4210 	} else if (IS_USER_ADDRESS(pageAddress)) {
4211 		addressSpace = VMAddressSpace::GetCurrent();
4212 		if (addressSpace == NULL) {
4213 			if (!isUser) {
4214 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4215 					"memory!\n");
4216 				status = B_BAD_ADDRESS;
4217 				TPF(PageFaultError(-1,
4218 					VMPageFaultTracing
4219 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4220 			} else {
4221 				// XXX weird state.
4222 				panic("vm_page_fault: non kernel thread accessing user memory "
4223 					"that doesn't exist!\n");
4224 				status = B_BAD_ADDRESS;
4225 			}
4226 		}
4227 	} else {
4228 		// the hit was probably in the 64k DMZ between kernel and user space
4229 		// this keeps a user space thread from passing a buffer that crosses
4230 		// into kernel space
4231 		status = B_BAD_ADDRESS;
4232 		TPF(PageFaultError(-1,
4233 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4234 	}
4235 
4236 	if (status == B_OK) {
4237 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4238 			isUser, NULL);
4239 	}
4240 
4241 	if (status < B_OK) {
4242 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4243 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4244 			strerror(status), address, faultAddress, isWrite, isUser,
4245 			thread_get_current_thread_id());
4246 		if (!isUser) {
4247 			Thread* thread = thread_get_current_thread();
4248 			if (thread != NULL && thread->fault_handler != 0) {
4249 				// this will cause the arch dependant page fault handler to
4250 				// modify the IP on the interrupt frame or whatever to return
4251 				// to this address
4252 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4253 			} else {
4254 				// unhandled page fault in the kernel
4255 				panic("vm_page_fault: unhandled page fault in kernel space at "
4256 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4257 			}
4258 		} else {
4259 			Thread* thread = thread_get_current_thread();
4260 
4261 #ifdef TRACE_FAULTS
4262 			VMArea* area = NULL;
4263 			if (addressSpace != NULL) {
4264 				addressSpace->ReadLock();
4265 				area = addressSpace->LookupArea(faultAddress);
4266 			}
4267 
4268 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4269 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4270 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4271 				thread->team->Name(), thread->team->id,
4272 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4273 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4274 					area->Base() : 0x0));
4275 
4276 			if (addressSpace != NULL)
4277 				addressSpace->ReadUnlock();
4278 #endif
4279 
4280 			// If the thread has a signal handler for SIGSEGV, we simply
4281 			// send it the signal. Otherwise we notify the user debugger
4282 			// first.
4283 			struct sigaction action;
4284 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4285 					&& action.sa_handler != SIG_DFL
4286 					&& action.sa_handler != SIG_IGN)
4287 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4288 					SIGSEGV)) {
4289 				Signal signal(SIGSEGV,
4290 					status == B_PERMISSION_DENIED
4291 						? SEGV_ACCERR : SEGV_MAPERR,
4292 					EFAULT, thread->team->id);
4293 				signal.SetAddress((void*)address);
4294 				send_signal_to_thread(thread, signal, 0);
4295 			}
4296 		}
4297 	}
4298 
4299 	if (addressSpace != NULL)
4300 		addressSpace->Put();
4301 
4302 	return B_HANDLED_INTERRUPT;
4303 }
4304 
4305 
4306 struct PageFaultContext {
4307 	AddressSpaceReadLocker	addressSpaceLocker;
4308 	VMCacheChainLocker		cacheChainLocker;
4309 
4310 	VMTranslationMap*		map;
4311 	VMCache*				topCache;
4312 	off_t					cacheOffset;
4313 	vm_page_reservation		reservation;
4314 	bool					isWrite;
4315 
4316 	// return values
4317 	vm_page*				page;
4318 	bool					restart;
4319 	bool					pageAllocated;
4320 
4321 
4322 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4323 		:
4324 		addressSpaceLocker(addressSpace, true),
4325 		map(addressSpace->TranslationMap()),
4326 		isWrite(isWrite)
4327 	{
4328 	}
4329 
4330 	~PageFaultContext()
4331 	{
4332 		UnlockAll();
4333 		vm_page_unreserve_pages(&reservation);
4334 	}
4335 
4336 	void Prepare(VMCache* topCache, off_t cacheOffset)
4337 	{
4338 		this->topCache = topCache;
4339 		this->cacheOffset = cacheOffset;
4340 		page = NULL;
4341 		restart = false;
4342 		pageAllocated = false;
4343 
4344 		cacheChainLocker.SetTo(topCache);
4345 	}
4346 
4347 	void UnlockAll(VMCache* exceptCache = NULL)
4348 	{
4349 		topCache = NULL;
4350 		addressSpaceLocker.Unlock();
4351 		cacheChainLocker.Unlock(exceptCache);
4352 	}
4353 };
4354 
4355 
4356 /*!	Gets the page that should be mapped into the area.
4357 	Returns an error code other than \c B_OK, if the page couldn't be found or
4358 	paged in. The locking state of the address space and the caches is undefined
4359 	in that case.
4360 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4361 	had to unlock the address space and all caches and is supposed to be called
4362 	again.
4363 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4364 	found. It is returned in \c context.page. The address space will still be
4365 	locked as well as all caches starting from the top cache to at least the
4366 	cache the page lives in.
4367 */
4368 static status_t
4369 fault_get_page(PageFaultContext& context)
4370 {
4371 	VMCache* cache = context.topCache;
4372 	VMCache* lastCache = NULL;
4373 	vm_page* page = NULL;
4374 
4375 	while (cache != NULL) {
4376 		// We already hold the lock of the cache at this point.
4377 
4378 		lastCache = cache;
4379 
4380 		page = cache->LookupPage(context.cacheOffset);
4381 		if (page != NULL && page->busy) {
4382 			// page must be busy -- wait for it to become unbusy
4383 			context.UnlockAll(cache);
4384 			cache->ReleaseRefLocked();
4385 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4386 
4387 			// restart the whole process
4388 			context.restart = true;
4389 			return B_OK;
4390 		}
4391 
4392 		if (page != NULL)
4393 			break;
4394 
4395 		// The current cache does not contain the page we're looking for.
4396 
4397 		// see if the backing store has it
4398 		if (cache->HasPage(context.cacheOffset)) {
4399 			// insert a fresh page and mark it busy -- we're going to read it in
4400 			page = vm_page_allocate_page(&context.reservation,
4401 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4402 			cache->InsertPage(page, context.cacheOffset);
4403 
4404 			// We need to unlock all caches and the address space while reading
4405 			// the page in. Keep a reference to the cache around.
4406 			cache->AcquireRefLocked();
4407 			context.UnlockAll();
4408 
4409 			// read the page in
4410 			generic_io_vec vec;
4411 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4412 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4413 
4414 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4415 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4416 
4417 			cache->Lock();
4418 
4419 			if (status < B_OK) {
4420 				// on error remove and free the page
4421 				dprintf("reading page from cache %p returned: %s!\n",
4422 					cache, strerror(status));
4423 
4424 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4425 				cache->RemovePage(page);
4426 				vm_page_set_state(page, PAGE_STATE_FREE);
4427 
4428 				cache->ReleaseRefAndUnlock();
4429 				return status;
4430 			}
4431 
4432 			// mark the page unbusy again
4433 			cache->MarkPageUnbusy(page);
4434 
4435 			DEBUG_PAGE_ACCESS_END(page);
4436 
4437 			// Since we needed to unlock everything temporarily, the area
4438 			// situation might have changed. So we need to restart the whole
4439 			// process.
4440 			cache->ReleaseRefAndUnlock();
4441 			context.restart = true;
4442 			return B_OK;
4443 		}
4444 
4445 		cache = context.cacheChainLocker.LockSourceCache();
4446 	}
4447 
4448 	if (page == NULL) {
4449 		// There was no adequate page, determine the cache for a clean one.
4450 		// Read-only pages come in the deepest cache, only the top most cache
4451 		// may have direct write access.
4452 		cache = context.isWrite ? context.topCache : lastCache;
4453 
4454 		// allocate a clean page
4455 		page = vm_page_allocate_page(&context.reservation,
4456 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4457 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4458 			page->physical_page_number));
4459 
4460 		// insert the new page into our cache
4461 		cache->InsertPage(page, context.cacheOffset);
4462 		context.pageAllocated = true;
4463 	} else if (page->Cache() != context.topCache && context.isWrite) {
4464 		// We have a page that has the data we want, but in the wrong cache
4465 		// object so we need to copy it and stick it into the top cache.
4466 		vm_page* sourcePage = page;
4467 
4468 		// TODO: If memory is low, it might be a good idea to steal the page
4469 		// from our source cache -- if possible, that is.
4470 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4471 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4472 
4473 		// To not needlessly kill concurrency we unlock all caches but the top
4474 		// one while copying the page. Lacking another mechanism to ensure that
4475 		// the source page doesn't disappear, we mark it busy.
4476 		sourcePage->busy = true;
4477 		context.cacheChainLocker.UnlockKeepRefs(true);
4478 
4479 		// copy the page
4480 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4481 			sourcePage->physical_page_number * B_PAGE_SIZE);
4482 
4483 		context.cacheChainLocker.RelockCaches(true);
4484 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4485 
4486 		// insert the new page into our cache
4487 		context.topCache->InsertPage(page, context.cacheOffset);
4488 		context.pageAllocated = true;
4489 	} else
4490 		DEBUG_PAGE_ACCESS_START(page);
4491 
4492 	context.page = page;
4493 	return B_OK;
4494 }
4495 
4496 
4497 /*!	Makes sure the address in the given address space is mapped.
4498 
4499 	\param addressSpace The address space.
4500 	\param originalAddress The address. Doesn't need to be page aligned.
4501 	\param isWrite If \c true the address shall be write-accessible.
4502 	\param isUser If \c true the access is requested by a userland team.
4503 	\param wirePage On success, if non \c NULL, the wired count of the page
4504 		mapped at the given address is incremented and the page is returned
4505 		via this parameter.
4506 	\return \c B_OK on success, another error code otherwise.
4507 */
4508 static status_t
4509 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4510 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4511 {
4512 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4513 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4514 		originalAddress, isWrite, isUser));
4515 
4516 	PageFaultContext context(addressSpace, isWrite);
4517 
4518 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4519 	status_t status = B_OK;
4520 
4521 	addressSpace->IncrementFaultCount();
4522 
4523 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4524 	// the pages upfront makes sure we don't have any cache locked, so that the
4525 	// page daemon/thief can do their job without problems.
4526 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4527 		originalAddress);
4528 	context.addressSpaceLocker.Unlock();
4529 	vm_page_reserve_pages(&context.reservation, reservePages,
4530 		addressSpace == VMAddressSpace::Kernel()
4531 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4532 
4533 	while (true) {
4534 		context.addressSpaceLocker.Lock();
4535 
4536 		// get the area the fault was in
4537 		VMArea* area = addressSpace->LookupArea(address);
4538 		if (area == NULL) {
4539 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4540 				"space\n", originalAddress);
4541 			TPF(PageFaultError(-1,
4542 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4543 			status = B_BAD_ADDRESS;
4544 			break;
4545 		}
4546 
4547 		// check permissions
4548 		uint32 protection = get_area_page_protection(area, address);
4549 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4550 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4551 				area->id, (void*)originalAddress);
4552 			TPF(PageFaultError(area->id,
4553 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4554 			status = B_PERMISSION_DENIED;
4555 			break;
4556 		}
4557 		if (isWrite && (protection
4558 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4559 			dprintf("write access attempted on write-protected area 0x%"
4560 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4561 			TPF(PageFaultError(area->id,
4562 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4563 			status = B_PERMISSION_DENIED;
4564 			break;
4565 		} else if (isExecute && (protection
4566 				& (B_EXECUTE_AREA
4567 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4568 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4569 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4570 			TPF(PageFaultError(area->id,
4571 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4572 			status = B_PERMISSION_DENIED;
4573 			break;
4574 		} else if (!isWrite && !isExecute && (protection
4575 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4576 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4577 				" at %p\n", area->id, (void*)originalAddress);
4578 			TPF(PageFaultError(area->id,
4579 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4580 			status = B_PERMISSION_DENIED;
4581 			break;
4582 		}
4583 
4584 		// We have the area, it was a valid access, so let's try to resolve the
4585 		// page fault now.
4586 		// At first, the top most cache from the area is investigated.
4587 
4588 		context.Prepare(vm_area_get_locked_cache(area),
4589 			address - area->Base() + area->cache_offset);
4590 
4591 		// See if this cache has a fault handler -- this will do all the work
4592 		// for us.
4593 		{
4594 			// Note, since the page fault is resolved with interrupts enabled,
4595 			// the fault handler could be called more than once for the same
4596 			// reason -- the store must take this into account.
4597 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4598 			if (status != B_BAD_HANDLER)
4599 				break;
4600 		}
4601 
4602 		// The top most cache has no fault handler, so let's see if the cache or
4603 		// its sources already have the page we're searching for (we're going
4604 		// from top to bottom).
4605 		status = fault_get_page(context);
4606 		if (status != B_OK) {
4607 			TPF(PageFaultError(area->id, status));
4608 			break;
4609 		}
4610 
4611 		if (context.restart)
4612 			continue;
4613 
4614 		// All went fine, all there is left to do is to map the page into the
4615 		// address space.
4616 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4617 			context.page));
4618 
4619 		// If the page doesn't reside in the area's cache, we need to make sure
4620 		// it's mapped in read-only, so that we cannot overwrite someone else's
4621 		// data (copy-on-write)
4622 		uint32 newProtection = protection;
4623 		if (context.page->Cache() != context.topCache && !isWrite)
4624 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4625 
4626 		bool unmapPage = false;
4627 		bool mapPage = true;
4628 
4629 		// check whether there's already a page mapped at the address
4630 		context.map->Lock();
4631 
4632 		phys_addr_t physicalAddress;
4633 		uint32 flags;
4634 		vm_page* mappedPage = NULL;
4635 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4636 			&& (flags & PAGE_PRESENT) != 0
4637 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4638 				!= NULL) {
4639 			// Yep there's already a page. If it's ours, we can simply adjust
4640 			// its protection. Otherwise we have to unmap it.
4641 			if (mappedPage == context.page) {
4642 				context.map->ProtectPage(area, address, newProtection);
4643 					// Note: We assume that ProtectPage() is atomic (i.e.
4644 					// the page isn't temporarily unmapped), otherwise we'd have
4645 					// to make sure it isn't wired.
4646 				mapPage = false;
4647 			} else
4648 				unmapPage = true;
4649 		}
4650 
4651 		context.map->Unlock();
4652 
4653 		if (unmapPage) {
4654 			// If the page is wired, we can't unmap it. Wait until it is unwired
4655 			// again and restart. Note that the page cannot be wired for
4656 			// writing, since it it isn't in the topmost cache. So we can safely
4657 			// ignore ranges wired for writing (our own and other concurrent
4658 			// wiring attempts in progress) and in fact have to do that to avoid
4659 			// a deadlock.
4660 			VMAreaUnwiredWaiter waiter;
4661 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4662 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4663 				// unlock everything and wait
4664 				if (context.pageAllocated) {
4665 					// ... but since we allocated a page and inserted it into
4666 					// the top cache, remove and free it first. Otherwise we'd
4667 					// have a page from a lower cache mapped while an upper
4668 					// cache has a page that would shadow it.
4669 					context.topCache->RemovePage(context.page);
4670 					vm_page_free_etc(context.topCache, context.page,
4671 						&context.reservation);
4672 				} else
4673 					DEBUG_PAGE_ACCESS_END(context.page);
4674 
4675 				context.UnlockAll();
4676 				waiter.waitEntry.Wait();
4677 				continue;
4678 			}
4679 
4680 			// Note: The mapped page is a page of a lower cache. We are
4681 			// guaranteed to have that cached locked, our new page is a copy of
4682 			// that page, and the page is not busy. The logic for that guarantee
4683 			// is as follows: Since the page is mapped, it must live in the top
4684 			// cache (ruled out above) or any of its lower caches, and there is
4685 			// (was before the new page was inserted) no other page in any
4686 			// cache between the top cache and the page's cache (otherwise that
4687 			// would be mapped instead). That in turn means that our algorithm
4688 			// must have found it and therefore it cannot be busy either.
4689 			DEBUG_PAGE_ACCESS_START(mappedPage);
4690 			unmap_page(area, address);
4691 			DEBUG_PAGE_ACCESS_END(mappedPage);
4692 		}
4693 
4694 		if (mapPage) {
4695 			if (map_page(area, context.page, address, newProtection,
4696 					&context.reservation) != B_OK) {
4697 				// Mapping can only fail, when the page mapping object couldn't
4698 				// be allocated. Save for the missing mapping everything is
4699 				// fine, though. If this was a regular page fault, we'll simply
4700 				// leave and probably fault again. To make sure we'll have more
4701 				// luck then, we ensure that the minimum object reserve is
4702 				// available.
4703 				DEBUG_PAGE_ACCESS_END(context.page);
4704 
4705 				context.UnlockAll();
4706 
4707 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4708 						!= B_OK) {
4709 					// Apparently the situation is serious. Let's get ourselves
4710 					// killed.
4711 					status = B_NO_MEMORY;
4712 				} else if (wirePage != NULL) {
4713 					// The caller expects us to wire the page. Since
4714 					// object_cache_reserve() succeeded, we should now be able
4715 					// to allocate a mapping structure. Restart.
4716 					continue;
4717 				}
4718 
4719 				break;
4720 			}
4721 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4722 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4723 
4724 		// also wire the page, if requested
4725 		if (wirePage != NULL && status == B_OK) {
4726 			increment_page_wired_count(context.page);
4727 			*wirePage = context.page;
4728 		}
4729 
4730 		DEBUG_PAGE_ACCESS_END(context.page);
4731 
4732 		break;
4733 	}
4734 
4735 	return status;
4736 }
4737 
4738 
4739 status_t
4740 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4741 {
4742 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4743 }
4744 
4745 status_t
4746 vm_put_physical_page(addr_t vaddr, void* handle)
4747 {
4748 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4749 }
4750 
4751 
4752 status_t
4753 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4754 	void** _handle)
4755 {
4756 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4757 }
4758 
4759 status_t
4760 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4761 {
4762 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4763 }
4764 
4765 
4766 status_t
4767 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4768 {
4769 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4770 }
4771 
4772 status_t
4773 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4774 {
4775 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4776 }
4777 
4778 
4779 void
4780 vm_get_info(system_info* info)
4781 {
4782 	swap_get_info(info);
4783 
4784 	MutexLocker locker(sAvailableMemoryLock);
4785 	info->needed_memory = sNeededMemory;
4786 	info->free_memory = sAvailableMemory;
4787 }
4788 
4789 
4790 uint32
4791 vm_num_page_faults(void)
4792 {
4793 	return sPageFaults;
4794 }
4795 
4796 
4797 off_t
4798 vm_available_memory(void)
4799 {
4800 	MutexLocker locker(sAvailableMemoryLock);
4801 	return sAvailableMemory;
4802 }
4803 
4804 
4805 off_t
4806 vm_available_not_needed_memory(void)
4807 {
4808 	MutexLocker locker(sAvailableMemoryLock);
4809 	return sAvailableMemory - sNeededMemory;
4810 }
4811 
4812 
4813 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4814 	debugger.
4815 */
4816 off_t
4817 vm_available_not_needed_memory_debug(void)
4818 {
4819 	return sAvailableMemory - sNeededMemory;
4820 }
4821 
4822 
4823 size_t
4824 vm_kernel_address_space_left(void)
4825 {
4826 	return VMAddressSpace::Kernel()->FreeSpace();
4827 }
4828 
4829 
4830 void
4831 vm_unreserve_memory(size_t amount)
4832 {
4833 	mutex_lock(&sAvailableMemoryLock);
4834 
4835 	sAvailableMemory += amount;
4836 
4837 	mutex_unlock(&sAvailableMemoryLock);
4838 }
4839 
4840 
4841 status_t
4842 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4843 {
4844 	size_t reserve = kMemoryReserveForPriority[priority];
4845 
4846 	MutexLocker locker(sAvailableMemoryLock);
4847 
4848 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4849 
4850 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4851 		sAvailableMemory -= amount;
4852 		return B_OK;
4853 	}
4854 
4855 	if (timeout <= 0)
4856 		return B_NO_MEMORY;
4857 
4858 	// turn timeout into an absolute timeout
4859 	timeout += system_time();
4860 
4861 	// loop until we've got the memory or the timeout occurs
4862 	do {
4863 		sNeededMemory += amount;
4864 
4865 		// call the low resource manager
4866 		locker.Unlock();
4867 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4868 			B_ABSOLUTE_TIMEOUT, timeout);
4869 		locker.Lock();
4870 
4871 		sNeededMemory -= amount;
4872 
4873 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4874 			sAvailableMemory -= amount;
4875 			return B_OK;
4876 		}
4877 	} while (timeout > system_time());
4878 
4879 	return B_NO_MEMORY;
4880 }
4881 
4882 
4883 status_t
4884 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4885 {
4886 	// NOTE: The caller is responsible for synchronizing calls to this function!
4887 
4888 	AddressSpaceReadLocker locker;
4889 	VMArea* area;
4890 	status_t status = locker.SetFromArea(id, area);
4891 	if (status != B_OK)
4892 		return status;
4893 
4894 	// nothing to do, if the type doesn't change
4895 	uint32 oldType = area->MemoryType();
4896 	if (type == oldType)
4897 		return B_OK;
4898 
4899 	// set the memory type of the area and the mapped pages
4900 	VMTranslationMap* map = area->address_space->TranslationMap();
4901 	map->Lock();
4902 	area->SetMemoryType(type);
4903 	map->ProtectArea(area, area->protection);
4904 	map->Unlock();
4905 
4906 	// set the physical memory type
4907 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4908 	if (error != B_OK) {
4909 		// reset the memory type of the area and the mapped pages
4910 		map->Lock();
4911 		area->SetMemoryType(oldType);
4912 		map->ProtectArea(area, area->protection);
4913 		map->Unlock();
4914 		return error;
4915 	}
4916 
4917 	return B_OK;
4918 
4919 }
4920 
4921 
4922 /*!	This function enforces some protection properties:
4923 	 - kernel areas must be W^X (after kernel startup)
4924 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4925 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4926 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4927 	   and B_KERNEL_WRITE_AREA.
4928 */
4929 static void
4930 fix_protection(uint32* protection)
4931 {
4932 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4933 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
4934 			|| (*protection & B_WRITE_AREA) != 0)
4935 		&& !gKernelStartup)
4936 		panic("kernel areas cannot be both writable and executable!");
4937 
4938 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4939 		if ((*protection & B_USER_PROTECTION) == 0
4940 			|| (*protection & B_WRITE_AREA) != 0)
4941 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4942 		else
4943 			*protection |= B_KERNEL_READ_AREA;
4944 	}
4945 }
4946 
4947 
4948 static void
4949 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4950 {
4951 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4952 	info->area = area->id;
4953 	info->address = (void*)area->Base();
4954 	info->size = area->Size();
4955 	info->protection = area->protection;
4956 	info->lock = B_FULL_LOCK;
4957 	info->team = area->address_space->ID();
4958 	info->copy_count = 0;
4959 	info->in_count = 0;
4960 	info->out_count = 0;
4961 		// TODO: retrieve real values here!
4962 
4963 	VMCache* cache = vm_area_get_locked_cache(area);
4964 
4965 	// Note, this is a simplification; the cache could be larger than this area
4966 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4967 
4968 	vm_area_put_locked_cache(cache);
4969 }
4970 
4971 
4972 static status_t
4973 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4974 {
4975 	// is newSize a multiple of B_PAGE_SIZE?
4976 	if (newSize & (B_PAGE_SIZE - 1))
4977 		return B_BAD_VALUE;
4978 
4979 	// lock all affected address spaces and the cache
4980 	VMArea* area;
4981 	VMCache* cache;
4982 
4983 	MultiAddressSpaceLocker locker;
4984 	AreaCacheLocker cacheLocker;
4985 
4986 	status_t status;
4987 	size_t oldSize;
4988 	bool anyKernelArea;
4989 	bool restart;
4990 
4991 	do {
4992 		anyKernelArea = false;
4993 		restart = false;
4994 
4995 		locker.Unset();
4996 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4997 		if (status != B_OK)
4998 			return status;
4999 		cacheLocker.SetTo(cache, true);	// already locked
5000 
5001 		// enforce restrictions
5002 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
5003 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5004 				"resize kernel area %" B_PRId32 " (%s)\n",
5005 				team_get_current_team_id(), areaID, area->name);
5006 			return B_NOT_ALLOWED;
5007 		}
5008 		// TODO: Enforce all restrictions (team, etc.)!
5009 
5010 		oldSize = area->Size();
5011 		if (newSize == oldSize)
5012 			return B_OK;
5013 
5014 		if (cache->type != CACHE_TYPE_RAM)
5015 			return B_NOT_ALLOWED;
5016 
5017 		if (oldSize < newSize) {
5018 			// We need to check if all areas of this cache can be resized.
5019 			for (VMArea* current = cache->areas; current != NULL;
5020 					current = current->cache_next) {
5021 				if (!current->address_space->CanResizeArea(current, newSize))
5022 					return B_ERROR;
5023 				anyKernelArea
5024 					|= current->address_space == VMAddressSpace::Kernel();
5025 			}
5026 		} else {
5027 			// We're shrinking the areas, so we must make sure the affected
5028 			// ranges are not wired.
5029 			for (VMArea* current = cache->areas; current != NULL;
5030 					current = current->cache_next) {
5031 				anyKernelArea
5032 					|= current->address_space == VMAddressSpace::Kernel();
5033 
5034 				if (wait_if_area_range_is_wired(current,
5035 						current->Base() + newSize, oldSize - newSize, &locker,
5036 						&cacheLocker)) {
5037 					restart = true;
5038 					break;
5039 				}
5040 			}
5041 		}
5042 	} while (restart);
5043 
5044 	// Okay, looks good so far, so let's do it
5045 
5046 	int priority = kernel && anyKernelArea
5047 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5048 	uint32 allocationFlags = kernel && anyKernelArea
5049 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5050 
5051 	if (oldSize < newSize) {
5052 		// Growing the cache can fail, so we do it first.
5053 		status = cache->Resize(cache->virtual_base + newSize, priority);
5054 		if (status != B_OK)
5055 			return status;
5056 	}
5057 
5058 	for (VMArea* current = cache->areas; current != NULL;
5059 			current = current->cache_next) {
5060 		status = current->address_space->ResizeArea(current, newSize,
5061 			allocationFlags);
5062 		if (status != B_OK)
5063 			break;
5064 
5065 		// We also need to unmap all pages beyond the new size, if the area has
5066 		// shrunk
5067 		if (newSize < oldSize) {
5068 			VMCacheChainLocker cacheChainLocker(cache);
5069 			cacheChainLocker.LockAllSourceCaches();
5070 
5071 			unmap_pages(current, current->Base() + newSize,
5072 				oldSize - newSize);
5073 
5074 			cacheChainLocker.Unlock(cache);
5075 		}
5076 	}
5077 
5078 	if (status == B_OK) {
5079 		// Shrink or grow individual page protections if in use.
5080 		if (area->page_protections != NULL) {
5081 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5082 			uint8* newProtections
5083 				= (uint8*)realloc(area->page_protections, bytes);
5084 			if (newProtections == NULL)
5085 				status = B_NO_MEMORY;
5086 			else {
5087 				area->page_protections = newProtections;
5088 
5089 				if (oldSize < newSize) {
5090 					// init the additional page protections to that of the area
5091 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5092 					uint32 areaProtection = area->protection
5093 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5094 					memset(area->page_protections + offset,
5095 						areaProtection | (areaProtection << 4), bytes - offset);
5096 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5097 						uint8& entry = area->page_protections[offset - 1];
5098 						entry = (entry & 0x0f) | (areaProtection << 4);
5099 					}
5100 				}
5101 			}
5102 		}
5103 	}
5104 
5105 	// shrinking the cache can't fail, so we do it now
5106 	if (status == B_OK && newSize < oldSize)
5107 		status = cache->Resize(cache->virtual_base + newSize, priority);
5108 
5109 	if (status != B_OK) {
5110 		// Something failed -- resize the areas back to their original size.
5111 		// This can fail, too, in which case we're seriously screwed.
5112 		for (VMArea* current = cache->areas; current != NULL;
5113 				current = current->cache_next) {
5114 			if (current->address_space->ResizeArea(current, oldSize,
5115 					allocationFlags) != B_OK) {
5116 				panic("vm_resize_area(): Failed and not being able to restore "
5117 					"original state.");
5118 			}
5119 		}
5120 
5121 		cache->Resize(cache->virtual_base + oldSize, priority);
5122 	}
5123 
5124 	// TODO: we must honour the lock restrictions of this area
5125 	return status;
5126 }
5127 
5128 
5129 status_t
5130 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5131 {
5132 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5133 }
5134 
5135 
5136 status_t
5137 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5138 {
5139 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5140 }
5141 
5142 
5143 status_t
5144 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5145 	bool user)
5146 {
5147 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5148 }
5149 
5150 
5151 void
5152 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5153 {
5154 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5155 }
5156 
5157 
5158 /*!	Copies a range of memory directly from/to a page that might not be mapped
5159 	at the moment.
5160 
5161 	For \a unsafeMemory the current mapping (if any is ignored). The function
5162 	walks through the respective area's cache chain to find the physical page
5163 	and copies from/to it directly.
5164 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5165 	must not cross a page boundary.
5166 
5167 	\param teamID The team ID identifying the address space \a unsafeMemory is
5168 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5169 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5170 		is passed, the address space of the thread returned by
5171 		debug_get_debugged_thread() is used.
5172 	\param unsafeMemory The start of the unsafe memory range to be copied
5173 		from/to.
5174 	\param buffer A safely accessible kernel buffer to be copied from/to.
5175 	\param size The number of bytes to be copied.
5176 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5177 		\a unsafeMemory, the other way around otherwise.
5178 */
5179 status_t
5180 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5181 	size_t size, bool copyToUnsafe)
5182 {
5183 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5184 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5185 		return B_BAD_VALUE;
5186 	}
5187 
5188 	// get the address space for the debugged thread
5189 	VMAddressSpace* addressSpace;
5190 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5191 		addressSpace = VMAddressSpace::Kernel();
5192 	} else if (teamID == B_CURRENT_TEAM) {
5193 		Thread* thread = debug_get_debugged_thread();
5194 		if (thread == NULL || thread->team == NULL)
5195 			return B_BAD_ADDRESS;
5196 
5197 		addressSpace = thread->team->address_space;
5198 	} else
5199 		addressSpace = VMAddressSpace::DebugGet(teamID);
5200 
5201 	if (addressSpace == NULL)
5202 		return B_BAD_ADDRESS;
5203 
5204 	// get the area
5205 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5206 	if (area == NULL)
5207 		return B_BAD_ADDRESS;
5208 
5209 	// search the page
5210 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5211 		+ area->cache_offset;
5212 	VMCache* cache = area->cache;
5213 	vm_page* page = NULL;
5214 	while (cache != NULL) {
5215 		page = cache->DebugLookupPage(cacheOffset);
5216 		if (page != NULL)
5217 			break;
5218 
5219 		// Page not found in this cache -- if it is paged out, we must not try
5220 		// to get it from lower caches.
5221 		if (cache->DebugHasPage(cacheOffset))
5222 			break;
5223 
5224 		cache = cache->source;
5225 	}
5226 
5227 	if (page == NULL)
5228 		return B_UNSUPPORTED;
5229 
5230 	// copy from/to physical memory
5231 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5232 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5233 
5234 	if (copyToUnsafe) {
5235 		if (page->Cache() != area->cache)
5236 			return B_UNSUPPORTED;
5237 
5238 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5239 	}
5240 
5241 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5242 }
5243 
5244 
5245 //	#pragma mark - kernel public API
5246 
5247 
5248 status_t
5249 user_memcpy(void* to, const void* from, size_t size)
5250 {
5251 	// don't allow address overflows
5252 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5253 		return B_BAD_ADDRESS;
5254 
5255 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5256 		return B_BAD_ADDRESS;
5257 
5258 	return B_OK;
5259 }
5260 
5261 
5262 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5263 	the string in \a to, NULL-terminating the result.
5264 
5265 	\param to Pointer to the destination C-string.
5266 	\param from Pointer to the source C-string.
5267 	\param size Size in bytes of the string buffer pointed to by \a to.
5268 
5269 	\return strlen(\a from).
5270 */
5271 ssize_t
5272 user_strlcpy(char* to, const char* from, size_t size)
5273 {
5274 	if (to == NULL && size != 0)
5275 		return B_BAD_VALUE;
5276 	if (from == NULL)
5277 		return B_BAD_ADDRESS;
5278 
5279 	// limit size to avoid address overflows
5280 	size_t maxSize = std::min((addr_t)size,
5281 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5282 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5283 		// the source address might still overflow.
5284 
5285 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5286 
5287 	// If we hit the address overflow boundary, fail.
5288 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5289 			&& maxSize < size)) {
5290 		return B_BAD_ADDRESS;
5291 	}
5292 
5293 	return result;
5294 }
5295 
5296 
5297 status_t
5298 user_memset(void* s, char c, size_t count)
5299 {
5300 	// don't allow address overflows
5301 	if ((addr_t)s + count < (addr_t)s)
5302 		return B_BAD_ADDRESS;
5303 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5304 		return B_BAD_ADDRESS;
5305 
5306 	return B_OK;
5307 }
5308 
5309 
5310 /*!	Wires a single page at the given address.
5311 
5312 	\param team The team whose address space the address belongs to. Supports
5313 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5314 		parameter is ignored.
5315 	\param address address The virtual address to wire down. Does not need to
5316 		be page aligned.
5317 	\param writable If \c true the page shall be writable.
5318 	\param info On success the info is filled in, among other things
5319 		containing the physical address the given virtual one translates to.
5320 	\return \c B_OK, when the page could be wired, another error code otherwise.
5321 */
5322 status_t
5323 vm_wire_page(team_id team, addr_t address, bool writable,
5324 	VMPageWiringInfo* info)
5325 {
5326 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5327 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5328 
5329 	// compute the page protection that is required
5330 	bool isUser = IS_USER_ADDRESS(address);
5331 	uint32 requiredProtection = PAGE_PRESENT
5332 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5333 	if (writable)
5334 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5335 
5336 	// get and read lock the address space
5337 	VMAddressSpace* addressSpace = NULL;
5338 	if (isUser) {
5339 		if (team == B_CURRENT_TEAM)
5340 			addressSpace = VMAddressSpace::GetCurrent();
5341 		else
5342 			addressSpace = VMAddressSpace::Get(team);
5343 	} else
5344 		addressSpace = VMAddressSpace::GetKernel();
5345 	if (addressSpace == NULL)
5346 		return B_ERROR;
5347 
5348 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5349 
5350 	VMTranslationMap* map = addressSpace->TranslationMap();
5351 	status_t error = B_OK;
5352 
5353 	// get the area
5354 	VMArea* area = addressSpace->LookupArea(pageAddress);
5355 	if (area == NULL) {
5356 		addressSpace->Put();
5357 		return B_BAD_ADDRESS;
5358 	}
5359 
5360 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5361 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5362 
5363 	// mark the area range wired
5364 	area->Wire(&info->range);
5365 
5366 	// Lock the area's cache chain and the translation map. Needed to look
5367 	// up the page and play with its wired count.
5368 	cacheChainLocker.LockAllSourceCaches();
5369 	map->Lock();
5370 
5371 	phys_addr_t physicalAddress;
5372 	uint32 flags;
5373 	vm_page* page;
5374 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5375 		&& (flags & requiredProtection) == requiredProtection
5376 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5377 			!= NULL) {
5378 		// Already mapped with the correct permissions -- just increment
5379 		// the page's wired count.
5380 		increment_page_wired_count(page);
5381 
5382 		map->Unlock();
5383 		cacheChainLocker.Unlock();
5384 		addressSpaceLocker.Unlock();
5385 	} else {
5386 		// Let vm_soft_fault() map the page for us, if possible. We need
5387 		// to fully unlock to avoid deadlocks. Since we have already
5388 		// wired the area itself, nothing disturbing will happen with it
5389 		// in the meantime.
5390 		map->Unlock();
5391 		cacheChainLocker.Unlock();
5392 		addressSpaceLocker.Unlock();
5393 
5394 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5395 			isUser, &page);
5396 
5397 		if (error != B_OK) {
5398 			// The page could not be mapped -- clean up.
5399 			VMCache* cache = vm_area_get_locked_cache(area);
5400 			area->Unwire(&info->range);
5401 			cache->ReleaseRefAndUnlock();
5402 			addressSpace->Put();
5403 			return error;
5404 		}
5405 	}
5406 
5407 	info->physicalAddress
5408 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5409 			+ address % B_PAGE_SIZE;
5410 	info->page = page;
5411 
5412 	return B_OK;
5413 }
5414 
5415 
5416 /*!	Unwires a single page previously wired via vm_wire_page().
5417 
5418 	\param info The same object passed to vm_wire_page() before.
5419 */
5420 void
5421 vm_unwire_page(VMPageWiringInfo* info)
5422 {
5423 	// lock the address space
5424 	VMArea* area = info->range.area;
5425 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5426 		// takes over our reference
5427 
5428 	// lock the top cache
5429 	VMCache* cache = vm_area_get_locked_cache(area);
5430 	VMCacheChainLocker cacheChainLocker(cache);
5431 
5432 	if (info->page->Cache() != cache) {
5433 		// The page is not in the top cache, so we lock the whole cache chain
5434 		// before touching the page's wired count.
5435 		cacheChainLocker.LockAllSourceCaches();
5436 	}
5437 
5438 	decrement_page_wired_count(info->page);
5439 
5440 	// remove the wired range from the range
5441 	area->Unwire(&info->range);
5442 
5443 	cacheChainLocker.Unlock();
5444 }
5445 
5446 
5447 /*!	Wires down the given address range in the specified team's address space.
5448 
5449 	If successful the function
5450 	- acquires a reference to the specified team's address space,
5451 	- adds respective wired ranges to all areas that intersect with the given
5452 	  address range,
5453 	- makes sure all pages in the given address range are mapped with the
5454 	  requested access permissions and increments their wired count.
5455 
5456 	It fails, when \a team doesn't specify a valid address space, when any part
5457 	of the specified address range is not covered by areas, when the concerned
5458 	areas don't allow mapping with the requested permissions, or when mapping
5459 	failed for another reason.
5460 
5461 	When successful the call must be balanced by a unlock_memory_etc() call with
5462 	the exact same parameters.
5463 
5464 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5465 		supported.
5466 	\param address The start of the address range to be wired.
5467 	\param numBytes The size of the address range to be wired.
5468 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5469 		requests that the range must be wired writable ("read from device
5470 		into memory").
5471 	\return \c B_OK on success, another error code otherwise.
5472 */
5473 status_t
5474 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5475 {
5476 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5477 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5478 
5479 	// compute the page protection that is required
5480 	bool isUser = IS_USER_ADDRESS(address);
5481 	bool writable = (flags & B_READ_DEVICE) == 0;
5482 	uint32 requiredProtection = PAGE_PRESENT
5483 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5484 	if (writable)
5485 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5486 
5487 	uint32 mallocFlags = isUser
5488 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5489 
5490 	// get and read lock the address space
5491 	VMAddressSpace* addressSpace = NULL;
5492 	if (isUser) {
5493 		if (team == B_CURRENT_TEAM)
5494 			addressSpace = VMAddressSpace::GetCurrent();
5495 		else
5496 			addressSpace = VMAddressSpace::Get(team);
5497 	} else
5498 		addressSpace = VMAddressSpace::GetKernel();
5499 	if (addressSpace == NULL)
5500 		return B_ERROR;
5501 
5502 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5503 		// We get a new address space reference here. The one we got above will
5504 		// be freed by unlock_memory_etc().
5505 
5506 	VMTranslationMap* map = addressSpace->TranslationMap();
5507 	status_t error = B_OK;
5508 
5509 	// iterate through all concerned areas
5510 	addr_t nextAddress = lockBaseAddress;
5511 	while (nextAddress != lockEndAddress) {
5512 		// get the next area
5513 		VMArea* area = addressSpace->LookupArea(nextAddress);
5514 		if (area == NULL) {
5515 			error = B_BAD_ADDRESS;
5516 			break;
5517 		}
5518 
5519 		addr_t areaStart = nextAddress;
5520 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5521 
5522 		// allocate the wired range (do that before locking the cache to avoid
5523 		// deadlocks)
5524 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5525 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5526 		if (range == NULL) {
5527 			error = B_NO_MEMORY;
5528 			break;
5529 		}
5530 
5531 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5532 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5533 
5534 		// mark the area range wired
5535 		area->Wire(range);
5536 
5537 		// Depending on the area cache type and the wiring, we may not need to
5538 		// look at the individual pages.
5539 		if (area->cache_type == CACHE_TYPE_NULL
5540 			|| area->cache_type == CACHE_TYPE_DEVICE
5541 			|| area->wiring == B_FULL_LOCK
5542 			|| area->wiring == B_CONTIGUOUS) {
5543 			nextAddress = areaEnd;
5544 			continue;
5545 		}
5546 
5547 		// Lock the area's cache chain and the translation map. Needed to look
5548 		// up pages and play with their wired count.
5549 		cacheChainLocker.LockAllSourceCaches();
5550 		map->Lock();
5551 
5552 		// iterate through the pages and wire them
5553 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5554 			phys_addr_t physicalAddress;
5555 			uint32 flags;
5556 
5557 			vm_page* page;
5558 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5559 				&& (flags & requiredProtection) == requiredProtection
5560 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5561 					!= NULL) {
5562 				// Already mapped with the correct permissions -- just increment
5563 				// the page's wired count.
5564 				increment_page_wired_count(page);
5565 			} else {
5566 				// Let vm_soft_fault() map the page for us, if possible. We need
5567 				// to fully unlock to avoid deadlocks. Since we have already
5568 				// wired the area itself, nothing disturbing will happen with it
5569 				// in the meantime.
5570 				map->Unlock();
5571 				cacheChainLocker.Unlock();
5572 				addressSpaceLocker.Unlock();
5573 
5574 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5575 					false, isUser, &page);
5576 
5577 				addressSpaceLocker.Lock();
5578 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5579 				cacheChainLocker.LockAllSourceCaches();
5580 				map->Lock();
5581 			}
5582 
5583 			if (error != B_OK)
5584 				break;
5585 		}
5586 
5587 		map->Unlock();
5588 
5589 		if (error == B_OK) {
5590 			cacheChainLocker.Unlock();
5591 		} else {
5592 			// An error occurred, so abort right here. If the current address
5593 			// is the first in this area, unwire the area, since we won't get
5594 			// to it when reverting what we've done so far.
5595 			if (nextAddress == areaStart) {
5596 				area->Unwire(range);
5597 				cacheChainLocker.Unlock();
5598 				range->~VMAreaWiredRange();
5599 				free_etc(range, mallocFlags);
5600 			} else
5601 				cacheChainLocker.Unlock();
5602 
5603 			break;
5604 		}
5605 	}
5606 
5607 	if (error != B_OK) {
5608 		// An error occurred, so unwire all that we've already wired. Note that
5609 		// even if not a single page was wired, unlock_memory_etc() is called
5610 		// to put the address space reference.
5611 		addressSpaceLocker.Unlock();
5612 		unlock_memory_etc(team, (void*)lockBaseAddress,
5613 			nextAddress - lockBaseAddress, flags);
5614 	}
5615 
5616 	return error;
5617 }
5618 
5619 
5620 status_t
5621 lock_memory(void* address, size_t numBytes, uint32 flags)
5622 {
5623 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5624 }
5625 
5626 
5627 /*!	Unwires an address range previously wired with lock_memory_etc().
5628 
5629 	Note that a call to this function must balance a previous lock_memory_etc()
5630 	call with exactly the same parameters.
5631 */
5632 status_t
5633 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5634 {
5635 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5636 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5637 
5638 	// compute the page protection that is required
5639 	bool isUser = IS_USER_ADDRESS(address);
5640 	bool writable = (flags & B_READ_DEVICE) == 0;
5641 	uint32 requiredProtection = PAGE_PRESENT
5642 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5643 	if (writable)
5644 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5645 
5646 	uint32 mallocFlags = isUser
5647 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5648 
5649 	// get and read lock the address space
5650 	VMAddressSpace* addressSpace = NULL;
5651 	if (isUser) {
5652 		if (team == B_CURRENT_TEAM)
5653 			addressSpace = VMAddressSpace::GetCurrent();
5654 		else
5655 			addressSpace = VMAddressSpace::Get(team);
5656 	} else
5657 		addressSpace = VMAddressSpace::GetKernel();
5658 	if (addressSpace == NULL)
5659 		return B_ERROR;
5660 
5661 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5662 		// Take over the address space reference. We don't unlock until we're
5663 		// done.
5664 
5665 	VMTranslationMap* map = addressSpace->TranslationMap();
5666 	status_t error = B_OK;
5667 
5668 	// iterate through all concerned areas
5669 	addr_t nextAddress = lockBaseAddress;
5670 	while (nextAddress != lockEndAddress) {
5671 		// get the next area
5672 		VMArea* area = addressSpace->LookupArea(nextAddress);
5673 		if (area == NULL) {
5674 			error = B_BAD_ADDRESS;
5675 			break;
5676 		}
5677 
5678 		addr_t areaStart = nextAddress;
5679 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5680 
5681 		// Lock the area's top cache. This is a requirement for
5682 		// VMArea::Unwire().
5683 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5684 
5685 		// Depending on the area cache type and the wiring, we may not need to
5686 		// look at the individual pages.
5687 		if (area->cache_type == CACHE_TYPE_NULL
5688 			|| area->cache_type == CACHE_TYPE_DEVICE
5689 			|| area->wiring == B_FULL_LOCK
5690 			|| area->wiring == B_CONTIGUOUS) {
5691 			// unwire the range (to avoid deadlocks we delete the range after
5692 			// unlocking the cache)
5693 			nextAddress = areaEnd;
5694 			VMAreaWiredRange* range = area->Unwire(areaStart,
5695 				areaEnd - areaStart, writable);
5696 			cacheChainLocker.Unlock();
5697 			if (range != NULL) {
5698 				range->~VMAreaWiredRange();
5699 				free_etc(range, mallocFlags);
5700 			}
5701 			continue;
5702 		}
5703 
5704 		// Lock the area's cache chain and the translation map. Needed to look
5705 		// up pages and play with their wired count.
5706 		cacheChainLocker.LockAllSourceCaches();
5707 		map->Lock();
5708 
5709 		// iterate through the pages and unwire them
5710 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5711 			phys_addr_t physicalAddress;
5712 			uint32 flags;
5713 
5714 			vm_page* page;
5715 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5716 				&& (flags & PAGE_PRESENT) != 0
5717 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5718 					!= NULL) {
5719 				// Already mapped with the correct permissions -- just increment
5720 				// the page's wired count.
5721 				decrement_page_wired_count(page);
5722 			} else {
5723 				panic("unlock_memory_etc(): Failed to unwire page: address "
5724 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5725 					nextAddress);
5726 				error = B_BAD_VALUE;
5727 				break;
5728 			}
5729 		}
5730 
5731 		map->Unlock();
5732 
5733 		// All pages are unwired. Remove the area's wired range as well (to
5734 		// avoid deadlocks we delete the range after unlocking the cache).
5735 		VMAreaWiredRange* range = area->Unwire(areaStart,
5736 			areaEnd - areaStart, writable);
5737 
5738 		cacheChainLocker.Unlock();
5739 
5740 		if (range != NULL) {
5741 			range->~VMAreaWiredRange();
5742 			free_etc(range, mallocFlags);
5743 		}
5744 
5745 		if (error != B_OK)
5746 			break;
5747 	}
5748 
5749 	// get rid of the address space reference lock_memory_etc() acquired
5750 	addressSpace->Put();
5751 
5752 	return error;
5753 }
5754 
5755 
5756 status_t
5757 unlock_memory(void* address, size_t numBytes, uint32 flags)
5758 {
5759 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5760 }
5761 
5762 
5763 /*!	Similar to get_memory_map(), but also allows to specify the address space
5764 	for the memory in question and has a saner semantics.
5765 	Returns \c B_OK when the complete range could be translated or
5766 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5767 	case the actual number of entries is written to \c *_numEntries. Any other
5768 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5769 	in this case.
5770 */
5771 status_t
5772 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5773 	physical_entry* table, uint32* _numEntries)
5774 {
5775 	uint32 numEntries = *_numEntries;
5776 	*_numEntries = 0;
5777 
5778 	VMAddressSpace* addressSpace;
5779 	addr_t virtualAddress = (addr_t)address;
5780 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5781 	phys_addr_t physicalAddress;
5782 	status_t status = B_OK;
5783 	int32 index = -1;
5784 	addr_t offset = 0;
5785 	bool interrupts = are_interrupts_enabled();
5786 
5787 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5788 		"entries)\n", team, address, numBytes, numEntries));
5789 
5790 	if (numEntries == 0 || numBytes == 0)
5791 		return B_BAD_VALUE;
5792 
5793 	// in which address space is the address to be found?
5794 	if (IS_USER_ADDRESS(virtualAddress)) {
5795 		if (team == B_CURRENT_TEAM)
5796 			addressSpace = VMAddressSpace::GetCurrent();
5797 		else
5798 			addressSpace = VMAddressSpace::Get(team);
5799 	} else
5800 		addressSpace = VMAddressSpace::GetKernel();
5801 
5802 	if (addressSpace == NULL)
5803 		return B_ERROR;
5804 
5805 	VMTranslationMap* map = addressSpace->TranslationMap();
5806 
5807 	if (interrupts)
5808 		map->Lock();
5809 
5810 	while (offset < numBytes) {
5811 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5812 		uint32 flags;
5813 
5814 		if (interrupts) {
5815 			status = map->Query((addr_t)address + offset, &physicalAddress,
5816 				&flags);
5817 		} else {
5818 			status = map->QueryInterrupt((addr_t)address + offset,
5819 				&physicalAddress, &flags);
5820 		}
5821 		if (status < B_OK)
5822 			break;
5823 		if ((flags & PAGE_PRESENT) == 0) {
5824 			panic("get_memory_map() called on unmapped memory!");
5825 			return B_BAD_ADDRESS;
5826 		}
5827 
5828 		if (index < 0 && pageOffset > 0) {
5829 			physicalAddress += pageOffset;
5830 			if (bytes > B_PAGE_SIZE - pageOffset)
5831 				bytes = B_PAGE_SIZE - pageOffset;
5832 		}
5833 
5834 		// need to switch to the next physical_entry?
5835 		if (index < 0 || table[index].address
5836 				!= physicalAddress - table[index].size) {
5837 			if ((uint32)++index + 1 > numEntries) {
5838 				// table to small
5839 				break;
5840 			}
5841 			table[index].address = physicalAddress;
5842 			table[index].size = bytes;
5843 		} else {
5844 			// page does fit in current entry
5845 			table[index].size += bytes;
5846 		}
5847 
5848 		offset += bytes;
5849 	}
5850 
5851 	if (interrupts)
5852 		map->Unlock();
5853 
5854 	if (status != B_OK)
5855 		return status;
5856 
5857 	if ((uint32)index + 1 > numEntries) {
5858 		*_numEntries = index;
5859 		return B_BUFFER_OVERFLOW;
5860 	}
5861 
5862 	*_numEntries = index + 1;
5863 	return B_OK;
5864 }
5865 
5866 
5867 /*!	According to the BeBook, this function should always succeed.
5868 	This is no longer the case.
5869 */
5870 extern "C" int32
5871 __get_memory_map_haiku(const void* address, size_t numBytes,
5872 	physical_entry* table, int32 numEntries)
5873 {
5874 	uint32 entriesRead = numEntries;
5875 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5876 		table, &entriesRead);
5877 	if (error != B_OK)
5878 		return error;
5879 
5880 	// close the entry list
5881 
5882 	// if it's only one entry, we will silently accept the missing ending
5883 	if (numEntries == 1)
5884 		return B_OK;
5885 
5886 	if (entriesRead + 1 > (uint32)numEntries)
5887 		return B_BUFFER_OVERFLOW;
5888 
5889 	table[entriesRead].address = 0;
5890 	table[entriesRead].size = 0;
5891 
5892 	return B_OK;
5893 }
5894 
5895 
5896 area_id
5897 area_for(void* address)
5898 {
5899 	return vm_area_for((addr_t)address, true);
5900 }
5901 
5902 
5903 area_id
5904 find_area(const char* name)
5905 {
5906 	return VMAreaHash::Find(name);
5907 }
5908 
5909 
5910 status_t
5911 _get_area_info(area_id id, area_info* info, size_t size)
5912 {
5913 	if (size != sizeof(area_info) || info == NULL)
5914 		return B_BAD_VALUE;
5915 
5916 	AddressSpaceReadLocker locker;
5917 	VMArea* area;
5918 	status_t status = locker.SetFromArea(id, area);
5919 	if (status != B_OK)
5920 		return status;
5921 
5922 	fill_area_info(area, info, size);
5923 	return B_OK;
5924 }
5925 
5926 
5927 status_t
5928 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5929 {
5930 	addr_t nextBase = *(addr_t*)cookie;
5931 
5932 	// we're already through the list
5933 	if (nextBase == (addr_t)-1)
5934 		return B_ENTRY_NOT_FOUND;
5935 
5936 	if (team == B_CURRENT_TEAM)
5937 		team = team_get_current_team_id();
5938 
5939 	AddressSpaceReadLocker locker(team);
5940 	if (!locker.IsLocked())
5941 		return B_BAD_TEAM_ID;
5942 
5943 	VMArea* area;
5944 	for (VMAddressSpace::AreaIterator it
5945 				= locker.AddressSpace()->GetAreaIterator();
5946 			(area = it.Next()) != NULL;) {
5947 		if (area->Base() > nextBase)
5948 			break;
5949 	}
5950 
5951 	if (area == NULL) {
5952 		nextBase = (addr_t)-1;
5953 		return B_ENTRY_NOT_FOUND;
5954 	}
5955 
5956 	fill_area_info(area, info, size);
5957 	*cookie = (ssize_t)(area->Base());
5958 
5959 	return B_OK;
5960 }
5961 
5962 
5963 status_t
5964 set_area_protection(area_id area, uint32 newProtection)
5965 {
5966 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5967 		newProtection, true);
5968 }
5969 
5970 
5971 status_t
5972 resize_area(area_id areaID, size_t newSize)
5973 {
5974 	return vm_resize_area(areaID, newSize, true);
5975 }
5976 
5977 
5978 /*!	Transfers the specified area to a new team. The caller must be the owner
5979 	of the area.
5980 */
5981 area_id
5982 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5983 	bool kernel)
5984 {
5985 	area_info info;
5986 	status_t status = get_area_info(id, &info);
5987 	if (status != B_OK)
5988 		return status;
5989 
5990 	if (info.team != thread_get_current_thread()->team->id)
5991 		return B_PERMISSION_DENIED;
5992 
5993 	// We need to mark the area cloneable so the following operations work.
5994 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
5995 	if (status != B_OK)
5996 		return status;
5997 
5998 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5999 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6000 	if (clonedArea < 0)
6001 		return clonedArea;
6002 
6003 	status = vm_delete_area(info.team, id, kernel);
6004 	if (status != B_OK) {
6005 		vm_delete_area(target, clonedArea, kernel);
6006 		return status;
6007 	}
6008 
6009 	// Now we can reset the protection to whatever it was before.
6010 	set_area_protection(clonedArea, info.protection);
6011 
6012 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6013 
6014 	return clonedArea;
6015 }
6016 
6017 
6018 extern "C" area_id
6019 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6020 	size_t numBytes, uint32 addressSpec, uint32 protection,
6021 	void** _virtualAddress)
6022 {
6023 	if (!arch_vm_supports_protection(protection))
6024 		return B_NOT_SUPPORTED;
6025 
6026 	fix_protection(&protection);
6027 
6028 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6029 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6030 		false);
6031 }
6032 
6033 
6034 area_id
6035 clone_area(const char* name, void** _address, uint32 addressSpec,
6036 	uint32 protection, area_id source)
6037 {
6038 	if ((protection & B_KERNEL_PROTECTION) == 0)
6039 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6040 
6041 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6042 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6043 }
6044 
6045 
6046 area_id
6047 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6048 	uint32 protection, uint32 flags, uint32 guardSize,
6049 	const virtual_address_restrictions* virtualAddressRestrictions,
6050 	const physical_address_restrictions* physicalAddressRestrictions,
6051 	void** _address)
6052 {
6053 	fix_protection(&protection);
6054 
6055 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6056 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6057 		true, _address);
6058 }
6059 
6060 
6061 extern "C" area_id
6062 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6063 	size_t size, uint32 lock, uint32 protection)
6064 {
6065 	fix_protection(&protection);
6066 
6067 	virtual_address_restrictions virtualRestrictions = {};
6068 	virtualRestrictions.address = *_address;
6069 	virtualRestrictions.address_specification = addressSpec;
6070 	physical_address_restrictions physicalRestrictions = {};
6071 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6072 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6073 		true, _address);
6074 }
6075 
6076 
6077 status_t
6078 delete_area(area_id area)
6079 {
6080 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6081 }
6082 
6083 
6084 //	#pragma mark - Userland syscalls
6085 
6086 
6087 status_t
6088 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6089 	addr_t size)
6090 {
6091 	// filter out some unavailable values (for userland)
6092 	switch (addressSpec) {
6093 		case B_ANY_KERNEL_ADDRESS:
6094 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6095 			return B_BAD_VALUE;
6096 	}
6097 
6098 	addr_t address;
6099 
6100 	if (!IS_USER_ADDRESS(userAddress)
6101 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6102 		return B_BAD_ADDRESS;
6103 
6104 	status_t status = vm_reserve_address_range(
6105 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6106 		RESERVED_AVOID_BASE);
6107 	if (status != B_OK)
6108 		return status;
6109 
6110 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6111 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6112 			(void*)address, size);
6113 		return B_BAD_ADDRESS;
6114 	}
6115 
6116 	return B_OK;
6117 }
6118 
6119 
6120 status_t
6121 _user_unreserve_address_range(addr_t address, addr_t size)
6122 {
6123 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6124 		(void*)address, size);
6125 }
6126 
6127 
6128 area_id
6129 _user_area_for(void* address)
6130 {
6131 	return vm_area_for((addr_t)address, false);
6132 }
6133 
6134 
6135 area_id
6136 _user_find_area(const char* userName)
6137 {
6138 	char name[B_OS_NAME_LENGTH];
6139 
6140 	if (!IS_USER_ADDRESS(userName)
6141 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6142 		return B_BAD_ADDRESS;
6143 
6144 	return find_area(name);
6145 }
6146 
6147 
6148 status_t
6149 _user_get_area_info(area_id area, area_info* userInfo)
6150 {
6151 	if (!IS_USER_ADDRESS(userInfo))
6152 		return B_BAD_ADDRESS;
6153 
6154 	area_info info;
6155 	status_t status = get_area_info(area, &info);
6156 	if (status < B_OK)
6157 		return status;
6158 
6159 	// TODO: do we want to prevent userland from seeing kernel protections?
6160 	//info.protection &= B_USER_PROTECTION;
6161 
6162 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6163 		return B_BAD_ADDRESS;
6164 
6165 	return status;
6166 }
6167 
6168 
6169 status_t
6170 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6171 {
6172 	ssize_t cookie;
6173 
6174 	if (!IS_USER_ADDRESS(userCookie)
6175 		|| !IS_USER_ADDRESS(userInfo)
6176 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6177 		return B_BAD_ADDRESS;
6178 
6179 	area_info info;
6180 	status_t status = _get_next_area_info(team, &cookie, &info,
6181 		sizeof(area_info));
6182 	if (status != B_OK)
6183 		return status;
6184 
6185 	//info.protection &= B_USER_PROTECTION;
6186 
6187 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6188 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6189 		return B_BAD_ADDRESS;
6190 
6191 	return status;
6192 }
6193 
6194 
6195 status_t
6196 _user_set_area_protection(area_id area, uint32 newProtection)
6197 {
6198 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6199 		return B_BAD_VALUE;
6200 
6201 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6202 		newProtection, false);
6203 }
6204 
6205 
6206 status_t
6207 _user_resize_area(area_id area, size_t newSize)
6208 {
6209 	// TODO: Since we restrict deleting of areas to those owned by the team,
6210 	// we should also do that for resizing (check other functions, too).
6211 	return vm_resize_area(area, newSize, false);
6212 }
6213 
6214 
6215 area_id
6216 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6217 	team_id target)
6218 {
6219 	// filter out some unavailable values (for userland)
6220 	switch (addressSpec) {
6221 		case B_ANY_KERNEL_ADDRESS:
6222 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6223 			return B_BAD_VALUE;
6224 	}
6225 
6226 	void* address;
6227 	if (!IS_USER_ADDRESS(userAddress)
6228 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6229 		return B_BAD_ADDRESS;
6230 
6231 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6232 	if (newArea < B_OK)
6233 		return newArea;
6234 
6235 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6236 		return B_BAD_ADDRESS;
6237 
6238 	return newArea;
6239 }
6240 
6241 
6242 area_id
6243 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6244 	uint32 protection, area_id sourceArea)
6245 {
6246 	char name[B_OS_NAME_LENGTH];
6247 	void* address;
6248 
6249 	// filter out some unavailable values (for userland)
6250 	switch (addressSpec) {
6251 		case B_ANY_KERNEL_ADDRESS:
6252 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6253 			return B_BAD_VALUE;
6254 	}
6255 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6256 		return B_BAD_VALUE;
6257 
6258 	if (!IS_USER_ADDRESS(userName)
6259 		|| !IS_USER_ADDRESS(userAddress)
6260 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6261 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6262 		return B_BAD_ADDRESS;
6263 
6264 	fix_protection(&protection);
6265 
6266 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6267 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6268 		false);
6269 	if (clonedArea < B_OK)
6270 		return clonedArea;
6271 
6272 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6273 		delete_area(clonedArea);
6274 		return B_BAD_ADDRESS;
6275 	}
6276 
6277 	return clonedArea;
6278 }
6279 
6280 
6281 area_id
6282 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6283 	size_t size, uint32 lock, uint32 protection)
6284 {
6285 	char name[B_OS_NAME_LENGTH];
6286 	void* address;
6287 
6288 	// filter out some unavailable values (for userland)
6289 	switch (addressSpec) {
6290 		case B_ANY_KERNEL_ADDRESS:
6291 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6292 			return B_BAD_VALUE;
6293 	}
6294 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6295 		return B_BAD_VALUE;
6296 
6297 	if (!IS_USER_ADDRESS(userName)
6298 		|| !IS_USER_ADDRESS(userAddress)
6299 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6300 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6301 		return B_BAD_ADDRESS;
6302 
6303 	if (addressSpec == B_EXACT_ADDRESS
6304 		&& IS_KERNEL_ADDRESS(address))
6305 		return B_BAD_VALUE;
6306 
6307 	if (addressSpec == B_ANY_ADDRESS)
6308 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6309 	if (addressSpec == B_BASE_ADDRESS)
6310 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6311 
6312 	fix_protection(&protection);
6313 
6314 	virtual_address_restrictions virtualRestrictions = {};
6315 	virtualRestrictions.address = address;
6316 	virtualRestrictions.address_specification = addressSpec;
6317 	physical_address_restrictions physicalRestrictions = {};
6318 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6319 		size, lock, protection, 0, 0, &virtualRestrictions,
6320 		&physicalRestrictions, false, &address);
6321 
6322 	if (area >= B_OK
6323 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6324 		delete_area(area);
6325 		return B_BAD_ADDRESS;
6326 	}
6327 
6328 	return area;
6329 }
6330 
6331 
6332 status_t
6333 _user_delete_area(area_id area)
6334 {
6335 	// Unlike the BeOS implementation, you can now only delete areas
6336 	// that you have created yourself from userland.
6337 	// The documentation to delete_area() explicitly states that this
6338 	// will be restricted in the future, and so it will.
6339 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6340 }
6341 
6342 
6343 // TODO: create a BeOS style call for this!
6344 
6345 area_id
6346 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6347 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6348 	int fd, off_t offset)
6349 {
6350 	char name[B_OS_NAME_LENGTH];
6351 	void* address;
6352 	area_id area;
6353 
6354 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6355 		return B_BAD_VALUE;
6356 
6357 	fix_protection(&protection);
6358 
6359 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6360 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6361 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6362 		return B_BAD_ADDRESS;
6363 
6364 	if (addressSpec == B_EXACT_ADDRESS) {
6365 		if ((addr_t)address + size < (addr_t)address
6366 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6367 			return B_BAD_VALUE;
6368 		}
6369 		if (!IS_USER_ADDRESS(address)
6370 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6371 			return B_BAD_ADDRESS;
6372 		}
6373 	}
6374 
6375 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6376 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6377 		false);
6378 	if (area < B_OK)
6379 		return area;
6380 
6381 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6382 		return B_BAD_ADDRESS;
6383 
6384 	return area;
6385 }
6386 
6387 
6388 status_t
6389 _user_unmap_memory(void* _address, size_t size)
6390 {
6391 	addr_t address = (addr_t)_address;
6392 
6393 	// check params
6394 	if (size == 0 || (addr_t)address + size < (addr_t)address
6395 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6396 		return B_BAD_VALUE;
6397 	}
6398 
6399 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6400 		return B_BAD_ADDRESS;
6401 
6402 	// Write lock the address space and ensure the address range is not wired.
6403 	AddressSpaceWriteLocker locker;
6404 	do {
6405 		status_t status = locker.SetTo(team_get_current_team_id());
6406 		if (status != B_OK)
6407 			return status;
6408 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6409 			size, &locker));
6410 
6411 	// unmap
6412 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6413 }
6414 
6415 
6416 status_t
6417 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6418 {
6419 	// check address range
6420 	addr_t address = (addr_t)_address;
6421 	size = PAGE_ALIGN(size);
6422 
6423 	if ((address % B_PAGE_SIZE) != 0)
6424 		return B_BAD_VALUE;
6425 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6426 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6427 		// weird error code required by POSIX
6428 		return ENOMEM;
6429 	}
6430 
6431 	// extend and check protection
6432 	if ((protection & ~B_USER_PROTECTION) != 0)
6433 		return B_BAD_VALUE;
6434 
6435 	fix_protection(&protection);
6436 
6437 	// We need to write lock the address space, since we're going to play with
6438 	// the areas. Also make sure that none of the areas is wired and that we're
6439 	// actually allowed to change the protection.
6440 	AddressSpaceWriteLocker locker;
6441 
6442 	bool restart;
6443 	do {
6444 		restart = false;
6445 
6446 		status_t status = locker.SetTo(team_get_current_team_id());
6447 		if (status != B_OK)
6448 			return status;
6449 
6450 		// First round: Check whether the whole range is covered by areas and we
6451 		// are allowed to modify them.
6452 		addr_t currentAddress = address;
6453 		size_t sizeLeft = size;
6454 		while (sizeLeft > 0) {
6455 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6456 			if (area == NULL)
6457 				return B_NO_MEMORY;
6458 
6459 			if (area->address_space == VMAddressSpace::Kernel())
6460 				return B_NOT_ALLOWED;
6461 
6462 			// TODO: For (shared) mapped files we should check whether the new
6463 			// protections are compatible with the file permissions. We don't
6464 			// have a way to do that yet, though.
6465 
6466 			addr_t offset = currentAddress - area->Base();
6467 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6468 
6469 			AreaCacheLocker cacheLocker(area);
6470 
6471 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6472 					&locker, &cacheLocker)) {
6473 				restart = true;
6474 				break;
6475 			}
6476 
6477 			cacheLocker.Unlock();
6478 
6479 			currentAddress += rangeSize;
6480 			sizeLeft -= rangeSize;
6481 		}
6482 	} while (restart);
6483 
6484 	// Second round: If the protections differ from that of the area, create a
6485 	// page protection array and re-map mapped pages.
6486 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6487 	addr_t currentAddress = address;
6488 	size_t sizeLeft = size;
6489 	while (sizeLeft > 0) {
6490 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6491 		if (area == NULL)
6492 			return B_NO_MEMORY;
6493 
6494 		addr_t offset = currentAddress - area->Base();
6495 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6496 
6497 		currentAddress += rangeSize;
6498 		sizeLeft -= rangeSize;
6499 
6500 		if (area->page_protections == NULL) {
6501 			if (area->protection == protection)
6502 				continue;
6503 
6504 			status_t status = allocate_area_page_protections(area);
6505 			if (status != B_OK)
6506 				return status;
6507 		}
6508 
6509 		// We need to lock the complete cache chain, since we potentially unmap
6510 		// pages of lower caches.
6511 		VMCache* topCache = vm_area_get_locked_cache(area);
6512 		VMCacheChainLocker cacheChainLocker(topCache);
6513 		cacheChainLocker.LockAllSourceCaches();
6514 
6515 		for (addr_t pageAddress = area->Base() + offset;
6516 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6517 			map->Lock();
6518 
6519 			set_area_page_protection(area, pageAddress, protection);
6520 
6521 			phys_addr_t physicalAddress;
6522 			uint32 flags;
6523 
6524 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6525 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6526 				map->Unlock();
6527 				continue;
6528 			}
6529 
6530 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6531 			if (page == NULL) {
6532 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6533 					"\n", area, physicalAddress);
6534 				map->Unlock();
6535 				return B_ERROR;
6536 			}
6537 
6538 			// If the page is not in the topmost cache and write access is
6539 			// requested, we have to unmap it. Otherwise we can re-map it with
6540 			// the new protection.
6541 			bool unmapPage = page->Cache() != topCache
6542 				&& (protection & B_WRITE_AREA) != 0;
6543 
6544 			if (!unmapPage)
6545 				map->ProtectPage(area, pageAddress, protection);
6546 
6547 			map->Unlock();
6548 
6549 			if (unmapPage) {
6550 				DEBUG_PAGE_ACCESS_START(page);
6551 				unmap_page(area, pageAddress);
6552 				DEBUG_PAGE_ACCESS_END(page);
6553 			}
6554 		}
6555 	}
6556 
6557 	return B_OK;
6558 }
6559 
6560 
6561 status_t
6562 _user_sync_memory(void* _address, size_t size, uint32 flags)
6563 {
6564 	addr_t address = (addr_t)_address;
6565 	size = PAGE_ALIGN(size);
6566 
6567 	// check params
6568 	if ((address % B_PAGE_SIZE) != 0)
6569 		return B_BAD_VALUE;
6570 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6571 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6572 		// weird error code required by POSIX
6573 		return ENOMEM;
6574 	}
6575 
6576 	bool writeSync = (flags & MS_SYNC) != 0;
6577 	bool writeAsync = (flags & MS_ASYNC) != 0;
6578 	if (writeSync && writeAsync)
6579 		return B_BAD_VALUE;
6580 
6581 	if (size == 0 || (!writeSync && !writeAsync))
6582 		return B_OK;
6583 
6584 	// iterate through the range and sync all concerned areas
6585 	while (size > 0) {
6586 		// read lock the address space
6587 		AddressSpaceReadLocker locker;
6588 		status_t error = locker.SetTo(team_get_current_team_id());
6589 		if (error != B_OK)
6590 			return error;
6591 
6592 		// get the first area
6593 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6594 		if (area == NULL)
6595 			return B_NO_MEMORY;
6596 
6597 		uint32 offset = address - area->Base();
6598 		size_t rangeSize = min_c(area->Size() - offset, size);
6599 		offset += area->cache_offset;
6600 
6601 		// lock the cache
6602 		AreaCacheLocker cacheLocker(area);
6603 		if (!cacheLocker)
6604 			return B_BAD_VALUE;
6605 		VMCache* cache = area->cache;
6606 
6607 		locker.Unlock();
6608 
6609 		uint32 firstPage = offset >> PAGE_SHIFT;
6610 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6611 
6612 		// write the pages
6613 		if (cache->type == CACHE_TYPE_VNODE) {
6614 			if (writeSync) {
6615 				// synchronous
6616 				error = vm_page_write_modified_page_range(cache, firstPage,
6617 					endPage);
6618 				if (error != B_OK)
6619 					return error;
6620 			} else {
6621 				// asynchronous
6622 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6623 				// TODO: This is probably not quite what is supposed to happen.
6624 				// Especially when a lot has to be written, it might take ages
6625 				// until it really hits the disk.
6626 			}
6627 		}
6628 
6629 		address += rangeSize;
6630 		size -= rangeSize;
6631 	}
6632 
6633 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6634 	// synchronize multiple mappings of the same file. In our VM they never get
6635 	// out of sync, though, so we don't have to do anything.
6636 
6637 	return B_OK;
6638 }
6639 
6640 
6641 status_t
6642 _user_memory_advice(void* address, size_t size, uint32 advice)
6643 {
6644 	// TODO: Implement!
6645 	return B_OK;
6646 }
6647 
6648 
6649 status_t
6650 _user_get_memory_properties(team_id teamID, const void* address,
6651 	uint32* _protected, uint32* _lock)
6652 {
6653 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6654 		return B_BAD_ADDRESS;
6655 
6656 	AddressSpaceReadLocker locker;
6657 	status_t error = locker.SetTo(teamID);
6658 	if (error != B_OK)
6659 		return error;
6660 
6661 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6662 	if (area == NULL)
6663 		return B_NO_MEMORY;
6664 
6665 
6666 	uint32 protection = area->protection;
6667 	if (area->page_protections != NULL)
6668 		protection = get_area_page_protection(area, (addr_t)address);
6669 
6670 	uint32 wiring = area->wiring;
6671 
6672 	locker.Unlock();
6673 
6674 	error = user_memcpy(_protected, &protection, sizeof(protection));
6675 	if (error != B_OK)
6676 		return error;
6677 
6678 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6679 
6680 	return error;
6681 }
6682 
6683 
6684 // #pragma mark -- compatibility
6685 
6686 
6687 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6688 
6689 
6690 struct physical_entry_beos {
6691 	uint32	address;
6692 	uint32	size;
6693 };
6694 
6695 
6696 /*!	The physical_entry structure has changed. We need to translate it to the
6697 	old one.
6698 */
6699 extern "C" int32
6700 __get_memory_map_beos(const void* _address, size_t numBytes,
6701 	physical_entry_beos* table, int32 numEntries)
6702 {
6703 	if (numEntries <= 0)
6704 		return B_BAD_VALUE;
6705 
6706 	const uint8* address = (const uint8*)_address;
6707 
6708 	int32 count = 0;
6709 	while (numBytes > 0 && count < numEntries) {
6710 		physical_entry entry;
6711 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6712 		if (result < 0) {
6713 			if (result != B_BUFFER_OVERFLOW)
6714 				return result;
6715 		}
6716 
6717 		if (entry.address >= (phys_addr_t)1 << 32) {
6718 			panic("get_memory_map(): Address is greater 4 GB!");
6719 			return B_ERROR;
6720 		}
6721 
6722 		table[count].address = entry.address;
6723 		table[count++].size = entry.size;
6724 
6725 		address += entry.size;
6726 		numBytes -= entry.size;
6727 	}
6728 
6729 	// null-terminate the table, if possible
6730 	if (count < numEntries) {
6731 		table[count].address = 0;
6732 		table[count].size = 0;
6733 	}
6734 
6735 	return B_OK;
6736 }
6737 
6738 
6739 /*!	The type of the \a physicalAddress parameter has changed from void* to
6740 	phys_addr_t.
6741 */
6742 extern "C" area_id
6743 __map_physical_memory_beos(const char* name, void* physicalAddress,
6744 	size_t numBytes, uint32 addressSpec, uint32 protection,
6745 	void** _virtualAddress)
6746 {
6747 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6748 		addressSpec, protection, _virtualAddress);
6749 }
6750 
6751 
6752 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6753 	we meddle with the \a lock parameter to force 32 bit.
6754 */
6755 extern "C" area_id
6756 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6757 	size_t size, uint32 lock, uint32 protection)
6758 {
6759 	switch (lock) {
6760 		case B_NO_LOCK:
6761 			break;
6762 		case B_FULL_LOCK:
6763 		case B_LAZY_LOCK:
6764 			lock = B_32_BIT_FULL_LOCK;
6765 			break;
6766 		case B_CONTIGUOUS:
6767 			lock = B_32_BIT_CONTIGUOUS;
6768 			break;
6769 	}
6770 
6771 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6772 		protection);
6773 }
6774 
6775 
6776 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6777 	"BASE");
6778 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6779 	"map_physical_memory@", "BASE");
6780 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6781 	"BASE");
6782 
6783 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6784 	"get_memory_map@@", "1_ALPHA3");
6785 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6786 	"map_physical_memory@@", "1_ALPHA3");
6787 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6788 	"1_ALPHA3");
6789 
6790 
6791 #else
6792 
6793 
6794 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6795 	"get_memory_map@@", "BASE");
6796 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6797 	"map_physical_memory@@", "BASE");
6798 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6799 	"BASE");
6800 
6801 
6802 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6803