xref: /haiku/src/system/kernel/vm/vm.cpp (revision 04d1d2da0b27294f0f1e623071df310a0820d4b6)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// Cut the end only?
648 	if (areaLast <= lastAddress) {
649 		size_t oldSize = area->Size();
650 		size_t newSize = address - area->Base();
651 
652 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 			allocationFlags);
654 		if (error != B_OK)
655 			return error;
656 
657 		// unmap pages
658 		unmap_pages(area, address, oldSize - newSize);
659 
660 		// If no one else uses the area's cache, we can resize it, too.
661 		if (cache->areas == area && area->cache_next == NULL
662 			&& cache->consumers.IsEmpty()
663 			&& cache->type == CACHE_TYPE_RAM) {
664 			// Since VMCache::Resize() can temporarily drop the lock, we must
665 			// unlock all lower caches to prevent locking order inversion.
666 			cacheChainLocker.Unlock(cache);
667 			cache->Resize(cache->virtual_base + newSize, priority);
668 			cache->ReleaseRefAndUnlock();
669 		}
670 
671 		return B_OK;
672 	}
673 
674 	// Cut the beginning only?
675 	if (area->Base() >= address) {
676 		addr_t oldBase = area->Base();
677 		addr_t newBase = lastAddress + 1;
678 		size_t newSize = areaLast - lastAddress;
679 
680 		// unmap pages
681 		unmap_pages(area, oldBase, newBase - oldBase);
682 
683 		// resize the area
684 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 			allocationFlags);
686 		if (error != B_OK)
687 			return error;
688 
689 		// TODO: If no one else uses the area's cache, we should resize it, too!
690 
691 		area->cache_offset += newBase - oldBase;
692 
693 		return B_OK;
694 	}
695 
696 	// The tough part -- cut a piece out of the middle of the area.
697 	// We do that by shrinking the area to the begin section and creating a
698 	// new area for the end section.
699 
700 	addr_t firstNewSize = address - area->Base();
701 	addr_t secondBase = lastAddress + 1;
702 	addr_t secondSize = areaLast - lastAddress;
703 
704 	// unmap pages
705 	unmap_pages(area, address, area->Size() - firstNewSize);
706 
707 	// resize the area
708 	addr_t oldSize = area->Size();
709 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 		allocationFlags);
711 	if (error != B_OK)
712 		return error;
713 
714 	// TODO: If no one else uses the area's cache, we might want to create a
715 	// new cache for the second area, transfer the concerned pages from the
716 	// first cache to it and resize the first cache.
717 
718 	// map the second area
719 	virtual_address_restrictions addressRestrictions = {};
720 	addressRestrictions.address = (void*)secondBase;
721 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 	VMArea* secondArea;
723 	error = map_backing_store(addressSpace, cache,
724 		area->cache_offset + (secondBase - area->Base()), area->name,
725 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 		&addressRestrictions, kernel, &secondArea, NULL);
727 	if (error != B_OK) {
728 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 		return error;
730 	}
731 
732 	// We need a cache reference for the new area.
733 	cache->AcquireRefLocked();
734 
735 	if (_secondArea != NULL)
736 		*_secondArea = secondArea;
737 
738 	return B_OK;
739 }
740 
741 
742 /*!	Deletes all areas in the given address range.
743 	The address space must be write-locked.
744 	The caller must ensure that no part of the given range is wired.
745 */
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 	bool kernel)
749 {
750 	size = PAGE_ALIGN(size);
751 	addr_t lastAddress = address + (size - 1);
752 
753 	// Check, whether the caller is allowed to modify the concerned areas.
754 	if (!kernel) {
755 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 				VMArea* area = it.Next();) {
757 			addr_t areaLast = area->Base() + (area->Size() - 1);
758 			if (area->Base() < lastAddress && address < areaLast) {
759 				if (area->address_space == VMAddressSpace::Kernel()) {
760 					dprintf("unmap_address_range: team %" B_PRId32 " tried to "
761 						"unmap range of kernel area %" B_PRId32 " (%s)\n",
762 						team_get_current_team_id(), area->id, area->name);
763 					return B_NOT_ALLOWED;
764 				}
765 			}
766 		}
767 	}
768 
769 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
770 			VMArea* area = it.Next();) {
771 		addr_t areaLast = area->Base() + (area->Size() - 1);
772 		if (area->Base() < lastAddress && address < areaLast) {
773 			status_t error = cut_area(addressSpace, area, address,
774 				lastAddress, NULL, kernel);
775 			if (error != B_OK)
776 				return error;
777 				// Failing after already messing with areas is ugly, but we
778 				// can't do anything about it.
779 		}
780 	}
781 
782 	return B_OK;
783 }
784 
785 
786 /*! You need to hold the lock of the cache and the write lock of the address
787 	space when calling this function.
788 	Note, that in case of error your cache will be temporarily unlocked.
789 	If \a addressSpec is \c B_EXACT_ADDRESS and the
790 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
791 	that no part of the specified address range (base \c *_virtualAddress, size
792 	\a size) is wired.
793 */
794 static status_t
795 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
796 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
797 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
798 	bool kernel, VMArea** _area, void** _virtualAddress)
799 {
800 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
801 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
802 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
803 		addressRestrictions->address, offset, size,
804 		addressRestrictions->address_specification, wiring, protection,
805 		_area, areaName));
806 	cache->AssertLocked();
807 
808 	if (size == 0) {
809 #if KDEBUG
810 		panic("map_backing_store(): called with size=0 for area '%s'!",
811 			areaName);
812 #endif
813 		return B_BAD_VALUE;
814 	}
815 
816 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
817 		| HEAP_DONT_LOCK_KERNEL_SPACE;
818 	int priority;
819 	if (addressSpace != VMAddressSpace::Kernel()) {
820 		priority = VM_PRIORITY_USER;
821 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
822 		priority = VM_PRIORITY_VIP;
823 		allocationFlags |= HEAP_PRIORITY_VIP;
824 	} else
825 		priority = VM_PRIORITY_SYSTEM;
826 
827 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
828 		allocationFlags);
829 	if (area == NULL)
830 		return B_NO_MEMORY;
831 
832 	status_t status;
833 
834 	// if this is a private map, we need to create a new cache
835 	// to handle the private copies of pages as they are written to
836 	VMCache* sourceCache = cache;
837 	if (mapping == REGION_PRIVATE_MAP) {
838 		VMCache* newCache;
839 
840 		// create an anonymous cache
841 		status = VMCacheFactory::CreateAnonymousCache(newCache,
842 			(protection & B_STACK_AREA) != 0
843 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
844 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
845 		if (status != B_OK)
846 			goto err1;
847 
848 		newCache->Lock();
849 		newCache->temporary = 1;
850 		newCache->virtual_base = offset;
851 		newCache->virtual_end = offset + size;
852 
853 		cache->AddConsumer(newCache);
854 
855 		cache = newCache;
856 	}
857 
858 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
859 		status = cache->SetMinimalCommitment(size, priority);
860 		if (status != B_OK)
861 			goto err2;
862 	}
863 
864 	// check to see if this address space has entered DELETE state
865 	if (addressSpace->IsBeingDeleted()) {
866 		// okay, someone is trying to delete this address space now, so we can't
867 		// insert the area, so back out
868 		status = B_BAD_TEAM_ID;
869 		goto err2;
870 	}
871 
872 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
873 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
874 		status = unmap_address_range(addressSpace,
875 			(addr_t)addressRestrictions->address, size, kernel);
876 		if (status != B_OK)
877 			goto err2;
878 	}
879 
880 	status = addressSpace->InsertArea(area, size, addressRestrictions,
881 		allocationFlags, _virtualAddress);
882 	if (status == B_NO_MEMORY
883 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
884 		// TODO: At present, there is no way to notify the low_resource monitor
885 		// that kernel addresss space is fragmented, nor does it check for this
886 		// automatically. Due to how many locks are held, we cannot wait here
887 		// for space to be freed up, but it would be good to at least notify
888 		// that we tried and failed to allocate some amount.
889 	}
890 	if (status != B_OK)
891 		goto err2;
892 
893 	// attach the cache to the area
894 	area->cache = cache;
895 	area->cache_offset = offset;
896 
897 	// point the cache back to the area
898 	cache->InsertAreaLocked(area);
899 	if (mapping == REGION_PRIVATE_MAP)
900 		cache->Unlock();
901 
902 	// insert the area in the global area hash table
903 	VMAreaHash::Insert(area);
904 
905 	// grab a ref to the address space (the area holds this)
906 	addressSpace->Get();
907 
908 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
909 //		cache, sourceCache, areaName, area);
910 
911 	*_area = area;
912 	return B_OK;
913 
914 err2:
915 	if (mapping == REGION_PRIVATE_MAP) {
916 		// We created this cache, so we must delete it again. Note, that we
917 		// need to temporarily unlock the source cache or we'll otherwise
918 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
919 		sourceCache->Unlock();
920 		cache->ReleaseRefAndUnlock();
921 		sourceCache->Lock();
922 	}
923 err1:
924 	addressSpace->DeleteArea(area, allocationFlags);
925 	return status;
926 }
927 
928 
929 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
930 	  locker1, locker2).
931 */
932 template<typename LockerType1, typename LockerType2>
933 static inline bool
934 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
935 {
936 	area->cache->AssertLocked();
937 
938 	VMAreaUnwiredWaiter waiter;
939 	if (!area->AddWaiterIfWired(&waiter))
940 		return false;
941 
942 	// unlock everything and wait
943 	if (locker1 != NULL)
944 		locker1->Unlock();
945 	if (locker2 != NULL)
946 		locker2->Unlock();
947 
948 	waiter.waitEntry.Wait();
949 
950 	return true;
951 }
952 
953 
954 /*!	Checks whether the given area has any wired ranges intersecting with the
955 	specified range and waits, if so.
956 
957 	When it has to wait, the function calls \c Unlock() on both \a locker1
958 	and \a locker2, if given.
959 	The area's top cache must be locked and must be unlocked as a side effect
960 	of calling \c Unlock() on either \a locker1 or \a locker2.
961 
962 	If the function does not have to wait it does not modify or unlock any
963 	object.
964 
965 	\param area The area to be checked.
966 	\param base The base address of the range to check.
967 	\param size The size of the address range to check.
968 	\param locker1 An object to be unlocked when before starting to wait (may
969 		be \c NULL).
970 	\param locker2 An object to be unlocked when before starting to wait (may
971 		be \c NULL).
972 	\return \c true, if the function had to wait, \c false otherwise.
973 */
974 template<typename LockerType1, typename LockerType2>
975 static inline bool
976 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
977 	LockerType1* locker1, LockerType2* locker2)
978 {
979 	area->cache->AssertLocked();
980 
981 	VMAreaUnwiredWaiter waiter;
982 	if (!area->AddWaiterIfWired(&waiter, base, size))
983 		return false;
984 
985 	// unlock everything and wait
986 	if (locker1 != NULL)
987 		locker1->Unlock();
988 	if (locker2 != NULL)
989 		locker2->Unlock();
990 
991 	waiter.waitEntry.Wait();
992 
993 	return true;
994 }
995 
996 
997 /*!	Checks whether the given address space has any wired ranges intersecting
998 	with the specified range and waits, if so.
999 
1000 	Similar to wait_if_area_range_is_wired(), with the following differences:
1001 	- All areas intersecting with the range are checked (respectively all until
1002 	  one is found that contains a wired range intersecting with the given
1003 	  range).
1004 	- The given address space must at least be read-locked and must be unlocked
1005 	  when \c Unlock() is called on \a locker.
1006 	- None of the areas' caches are allowed to be locked.
1007 */
1008 template<typename LockerType>
1009 static inline bool
1010 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1011 	size_t size, LockerType* locker)
1012 {
1013 	addr_t end = base + size - 1;
1014 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1015 			VMArea* area = it.Next();) {
1016 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1017 		if (area->Base() > end)
1018 			return false;
1019 
1020 		if (base >= area->Base() + area->Size() - 1)
1021 			continue;
1022 
1023 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1024 
1025 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1026 			return true;
1027 	}
1028 
1029 	return false;
1030 }
1031 
1032 
1033 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1034 	It must be called in a situation where the kernel address space may be
1035 	locked.
1036 */
1037 status_t
1038 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1039 {
1040 	AddressSpaceReadLocker locker;
1041 	VMArea* area;
1042 	status_t status = locker.SetFromArea(id, area);
1043 	if (status != B_OK)
1044 		return status;
1045 
1046 	if (area->page_protections == NULL) {
1047 		status = allocate_area_page_protections(area);
1048 		if (status != B_OK)
1049 			return status;
1050 	}
1051 
1052 	*cookie = (void*)area;
1053 	return B_OK;
1054 }
1055 
1056 
1057 /*!	This is a debug helper function that can only be used with very specific
1058 	use cases.
1059 	Sets protection for the given address range to the protection specified.
1060 	If \a protection is 0 then the involved pages will be marked non-present
1061 	in the translation map to cause a fault on access. The pages aren't
1062 	actually unmapped however so that they can be marked present again with
1063 	additional calls to this function. For this to work the area must be
1064 	fully locked in memory so that the pages aren't otherwise touched.
1065 	This function does not lock the kernel address space and needs to be
1066 	supplied with a \a cookie retrieved from a successful call to
1067 	vm_prepare_kernel_area_debug_protection().
1068 */
1069 status_t
1070 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1071 	uint32 protection)
1072 {
1073 	// check address range
1074 	addr_t address = (addr_t)_address;
1075 	size = PAGE_ALIGN(size);
1076 
1077 	if ((address % B_PAGE_SIZE) != 0
1078 		|| (addr_t)address + size < (addr_t)address
1079 		|| !IS_KERNEL_ADDRESS(address)
1080 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1081 		return B_BAD_VALUE;
1082 	}
1083 
1084 	// Translate the kernel protection to user protection as we only store that.
1085 	if ((protection & B_KERNEL_READ_AREA) != 0)
1086 		protection |= B_READ_AREA;
1087 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1088 		protection |= B_WRITE_AREA;
1089 
1090 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1091 	VMTranslationMap* map = addressSpace->TranslationMap();
1092 	VMArea* area = (VMArea*)cookie;
1093 
1094 	addr_t offset = address - area->Base();
1095 	if (area->Size() - offset < size) {
1096 		panic("protect range not fully within supplied area");
1097 		return B_BAD_VALUE;
1098 	}
1099 
1100 	if (area->page_protections == NULL) {
1101 		panic("area has no page protections");
1102 		return B_BAD_VALUE;
1103 	}
1104 
1105 	// Invalidate the mapping entries so any access to them will fault or
1106 	// restore the mapping entries unchanged so that lookup will success again.
1107 	map->Lock();
1108 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1109 	map->Unlock();
1110 
1111 	// And set the proper page protections so that the fault case will actually
1112 	// fail and not simply try to map a new page.
1113 	for (addr_t pageAddress = address; pageAddress < address + size;
1114 			pageAddress += B_PAGE_SIZE) {
1115 		set_area_page_protection(area, pageAddress, protection);
1116 	}
1117 
1118 	return B_OK;
1119 }
1120 
1121 
1122 status_t
1123 vm_block_address_range(const char* name, void* address, addr_t size)
1124 {
1125 	if (!arch_vm_supports_protection(0))
1126 		return B_NOT_SUPPORTED;
1127 
1128 	AddressSpaceWriteLocker locker;
1129 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1130 	if (status != B_OK)
1131 		return status;
1132 
1133 	VMAddressSpace* addressSpace = locker.AddressSpace();
1134 
1135 	// create an anonymous cache
1136 	VMCache* cache;
1137 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1138 		VM_PRIORITY_SYSTEM);
1139 	if (status != B_OK)
1140 		return status;
1141 
1142 	cache->temporary = 1;
1143 	cache->virtual_end = size;
1144 	cache->Lock();
1145 
1146 	VMArea* area;
1147 	virtual_address_restrictions addressRestrictions = {};
1148 	addressRestrictions.address = address;
1149 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1150 	status = map_backing_store(addressSpace, cache, 0, name, size,
1151 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1152 		true, &area, NULL);
1153 	if (status != B_OK) {
1154 		cache->ReleaseRefAndUnlock();
1155 		return status;
1156 	}
1157 
1158 	cache->Unlock();
1159 	area->cache_type = CACHE_TYPE_RAM;
1160 	return area->id;
1161 }
1162 
1163 
1164 status_t
1165 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1166 {
1167 	AddressSpaceWriteLocker locker(team);
1168 	if (!locker.IsLocked())
1169 		return B_BAD_TEAM_ID;
1170 
1171 	VMAddressSpace* addressSpace = locker.AddressSpace();
1172 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1173 		addressSpace == VMAddressSpace::Kernel()
1174 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1175 }
1176 
1177 
1178 status_t
1179 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1180 	addr_t size, uint32 flags)
1181 {
1182 	if (size == 0)
1183 		return B_BAD_VALUE;
1184 
1185 	AddressSpaceWriteLocker locker(team);
1186 	if (!locker.IsLocked())
1187 		return B_BAD_TEAM_ID;
1188 
1189 	virtual_address_restrictions addressRestrictions = {};
1190 	addressRestrictions.address = *_address;
1191 	addressRestrictions.address_specification = addressSpec;
1192 	VMAddressSpace* addressSpace = locker.AddressSpace();
1193 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1194 		addressSpace == VMAddressSpace::Kernel()
1195 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1196 		_address);
1197 }
1198 
1199 
1200 area_id
1201 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1202 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1203 	const virtual_address_restrictions* virtualAddressRestrictions,
1204 	const physical_address_restrictions* physicalAddressRestrictions,
1205 	bool kernel, void** _address)
1206 {
1207 	VMArea* area;
1208 	VMCache* cache;
1209 	vm_page* page = NULL;
1210 	bool isStack = (protection & B_STACK_AREA) != 0;
1211 	page_num_t guardPages;
1212 	bool canOvercommit = false;
1213 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1214 		? VM_PAGE_ALLOC_CLEAR : 0;
1215 
1216 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1217 		team, name, size));
1218 
1219 	size = PAGE_ALIGN(size);
1220 	guardSize = PAGE_ALIGN(guardSize);
1221 	guardPages = guardSize / B_PAGE_SIZE;
1222 
1223 	if (size == 0 || size < guardSize)
1224 		return B_BAD_VALUE;
1225 	if (!arch_vm_supports_protection(protection))
1226 		return B_NOT_SUPPORTED;
1227 
1228 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1229 		canOvercommit = true;
1230 
1231 #ifdef DEBUG_KERNEL_STACKS
1232 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1233 		isStack = true;
1234 #endif
1235 
1236 	// check parameters
1237 	switch (virtualAddressRestrictions->address_specification) {
1238 		case B_ANY_ADDRESS:
1239 		case B_EXACT_ADDRESS:
1240 		case B_BASE_ADDRESS:
1241 		case B_ANY_KERNEL_ADDRESS:
1242 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1243 		case B_RANDOMIZED_ANY_ADDRESS:
1244 		case B_RANDOMIZED_BASE_ADDRESS:
1245 			break;
1246 
1247 		default:
1248 			return B_BAD_VALUE;
1249 	}
1250 
1251 	// If low or high physical address restrictions are given, we force
1252 	// B_CONTIGUOUS wiring, since only then we'll use
1253 	// vm_page_allocate_page_run() which deals with those restrictions.
1254 	if (physicalAddressRestrictions->low_address != 0
1255 		|| physicalAddressRestrictions->high_address != 0) {
1256 		wiring = B_CONTIGUOUS;
1257 	}
1258 
1259 	physical_address_restrictions stackPhysicalRestrictions;
1260 	bool doReserveMemory = false;
1261 	switch (wiring) {
1262 		case B_NO_LOCK:
1263 			break;
1264 		case B_FULL_LOCK:
1265 		case B_LAZY_LOCK:
1266 		case B_CONTIGUOUS:
1267 			doReserveMemory = true;
1268 			break;
1269 		case B_ALREADY_WIRED:
1270 			break;
1271 		case B_LOMEM:
1272 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1273 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1274 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1275 			wiring = B_CONTIGUOUS;
1276 			doReserveMemory = true;
1277 			break;
1278 		case B_32_BIT_FULL_LOCK:
1279 			if (B_HAIKU_PHYSICAL_BITS <= 32
1280 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1281 				wiring = B_FULL_LOCK;
1282 				doReserveMemory = true;
1283 				break;
1284 			}
1285 			// TODO: We don't really support this mode efficiently. Just fall
1286 			// through for now ...
1287 		case B_32_BIT_CONTIGUOUS:
1288 			#if B_HAIKU_PHYSICAL_BITS > 32
1289 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1290 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1291 					stackPhysicalRestrictions.high_address
1292 						= (phys_addr_t)1 << 32;
1293 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1294 				}
1295 			#endif
1296 			wiring = B_CONTIGUOUS;
1297 			doReserveMemory = true;
1298 			break;
1299 		default:
1300 			return B_BAD_VALUE;
1301 	}
1302 
1303 	// Optimization: For a single-page contiguous allocation without low/high
1304 	// memory restriction B_FULL_LOCK wiring suffices.
1305 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1306 		&& physicalAddressRestrictions->low_address == 0
1307 		&& physicalAddressRestrictions->high_address == 0) {
1308 		wiring = B_FULL_LOCK;
1309 	}
1310 
1311 	// For full lock or contiguous areas we're also going to map the pages and
1312 	// thus need to reserve pages for the mapping backend upfront.
1313 	addr_t reservedMapPages = 0;
1314 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1315 		AddressSpaceWriteLocker locker;
1316 		status_t status = locker.SetTo(team);
1317 		if (status != B_OK)
1318 			return status;
1319 
1320 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1321 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1322 	}
1323 
1324 	int priority;
1325 	if (team != VMAddressSpace::KernelID())
1326 		priority = VM_PRIORITY_USER;
1327 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1328 		priority = VM_PRIORITY_VIP;
1329 	else
1330 		priority = VM_PRIORITY_SYSTEM;
1331 
1332 	// Reserve memory before acquiring the address space lock. This reduces the
1333 	// chances of failure, since while holding the write lock to the address
1334 	// space (if it is the kernel address space that is), the low memory handler
1335 	// won't be able to free anything for us.
1336 	addr_t reservedMemory = 0;
1337 	if (doReserveMemory) {
1338 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1339 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1340 			return B_NO_MEMORY;
1341 		reservedMemory = size;
1342 		// TODO: We don't reserve the memory for the pages for the page
1343 		// directories/tables. We actually need to do since we currently don't
1344 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1345 		// there are actually less physical pages than there should be, which
1346 		// can get the VM into trouble in low memory situations.
1347 	}
1348 
1349 	AddressSpaceWriteLocker locker;
1350 	VMAddressSpace* addressSpace;
1351 	status_t status;
1352 
1353 	// For full lock areas reserve the pages before locking the address
1354 	// space. E.g. block caches can't release their memory while we hold the
1355 	// address space lock.
1356 	page_num_t reservedPages = reservedMapPages;
1357 	if (wiring == B_FULL_LOCK)
1358 		reservedPages += size / B_PAGE_SIZE;
1359 
1360 	vm_page_reservation reservation;
1361 	if (reservedPages > 0) {
1362 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1363 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1364 					priority)) {
1365 				reservedPages = 0;
1366 				status = B_WOULD_BLOCK;
1367 				goto err0;
1368 			}
1369 		} else
1370 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1371 	}
1372 
1373 	if (wiring == B_CONTIGUOUS) {
1374 		// we try to allocate the page run here upfront as this may easily
1375 		// fail for obvious reasons
1376 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1377 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1378 		if (page == NULL) {
1379 			status = B_NO_MEMORY;
1380 			goto err0;
1381 		}
1382 	}
1383 
1384 	// Lock the address space and, if B_EXACT_ADDRESS and
1385 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1386 	// is not wired.
1387 	do {
1388 		status = locker.SetTo(team);
1389 		if (status != B_OK)
1390 			goto err1;
1391 
1392 		addressSpace = locker.AddressSpace();
1393 	} while (virtualAddressRestrictions->address_specification
1394 			== B_EXACT_ADDRESS
1395 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1396 		&& wait_if_address_range_is_wired(addressSpace,
1397 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1398 
1399 	// create an anonymous cache
1400 	// if it's a stack, make sure that two pages are available at least
1401 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1402 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1403 		wiring == B_NO_LOCK, priority);
1404 	if (status != B_OK)
1405 		goto err1;
1406 
1407 	cache->temporary = 1;
1408 	cache->virtual_end = size;
1409 	cache->committed_size = reservedMemory;
1410 		// TODO: This should be done via a method.
1411 	reservedMemory = 0;
1412 
1413 	cache->Lock();
1414 
1415 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1416 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1417 		kernel, &area, _address);
1418 
1419 	if (status != B_OK) {
1420 		cache->ReleaseRefAndUnlock();
1421 		goto err1;
1422 	}
1423 
1424 	locker.DegradeToReadLock();
1425 
1426 	switch (wiring) {
1427 		case B_NO_LOCK:
1428 		case B_LAZY_LOCK:
1429 			// do nothing - the pages are mapped in as needed
1430 			break;
1431 
1432 		case B_FULL_LOCK:
1433 		{
1434 			// Allocate and map all pages for this area
1435 
1436 			off_t offset = 0;
1437 			for (addr_t address = area->Base();
1438 					address < area->Base() + (area->Size() - 1);
1439 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1440 #ifdef DEBUG_KERNEL_STACKS
1441 #	ifdef STACK_GROWS_DOWNWARDS
1442 				if (isStack && address < area->Base()
1443 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1444 #	else
1445 				if (isStack && address >= area->Base() + area->Size()
1446 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1447 #	endif
1448 					continue;
1449 #endif
1450 				vm_page* page = vm_page_allocate_page(&reservation,
1451 					PAGE_STATE_WIRED | pageAllocFlags);
1452 				cache->InsertPage(page, offset);
1453 				map_page(area, page, address, protection, &reservation);
1454 
1455 				DEBUG_PAGE_ACCESS_END(page);
1456 			}
1457 
1458 			break;
1459 		}
1460 
1461 		case B_ALREADY_WIRED:
1462 		{
1463 			// The pages should already be mapped. This is only really useful
1464 			// during boot time. Find the appropriate vm_page objects and stick
1465 			// them in the cache object.
1466 			VMTranslationMap* map = addressSpace->TranslationMap();
1467 			off_t offset = 0;
1468 
1469 			if (!gKernelStartup)
1470 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1471 
1472 			map->Lock();
1473 
1474 			for (addr_t virtualAddress = area->Base();
1475 					virtualAddress < area->Base() + (area->Size() - 1);
1476 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1477 				phys_addr_t physicalAddress;
1478 				uint32 flags;
1479 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1480 				if (status < B_OK) {
1481 					panic("looking up mapping failed for va 0x%lx\n",
1482 						virtualAddress);
1483 				}
1484 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1485 				if (page == NULL) {
1486 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1487 						"\n", physicalAddress);
1488 				}
1489 
1490 				DEBUG_PAGE_ACCESS_START(page);
1491 
1492 				cache->InsertPage(page, offset);
1493 				increment_page_wired_count(page);
1494 				vm_page_set_state(page, PAGE_STATE_WIRED);
1495 				page->busy = false;
1496 
1497 				DEBUG_PAGE_ACCESS_END(page);
1498 			}
1499 
1500 			map->Unlock();
1501 			break;
1502 		}
1503 
1504 		case B_CONTIGUOUS:
1505 		{
1506 			// We have already allocated our continuous pages run, so we can now
1507 			// just map them in the address space
1508 			VMTranslationMap* map = addressSpace->TranslationMap();
1509 			phys_addr_t physicalAddress
1510 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1511 			addr_t virtualAddress = area->Base();
1512 			off_t offset = 0;
1513 
1514 			map->Lock();
1515 
1516 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1517 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1518 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1519 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1520 				if (page == NULL)
1521 					panic("couldn't lookup physical page just allocated\n");
1522 
1523 				status = map->Map(virtualAddress, physicalAddress, protection,
1524 					area->MemoryType(), &reservation);
1525 				if (status < B_OK)
1526 					panic("couldn't map physical page in page run\n");
1527 
1528 				cache->InsertPage(page, offset);
1529 				increment_page_wired_count(page);
1530 
1531 				DEBUG_PAGE_ACCESS_END(page);
1532 			}
1533 
1534 			map->Unlock();
1535 			break;
1536 		}
1537 
1538 		default:
1539 			break;
1540 	}
1541 
1542 	cache->Unlock();
1543 
1544 	if (reservedPages > 0)
1545 		vm_page_unreserve_pages(&reservation);
1546 
1547 	TRACE(("vm_create_anonymous_area: done\n"));
1548 
1549 	area->cache_type = CACHE_TYPE_RAM;
1550 	return area->id;
1551 
1552 err1:
1553 	if (wiring == B_CONTIGUOUS) {
1554 		// we had reserved the area space upfront...
1555 		phys_addr_t pageNumber = page->physical_page_number;
1556 		int32 i;
1557 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1558 			page = vm_lookup_page(pageNumber);
1559 			if (page == NULL)
1560 				panic("couldn't lookup physical page just allocated\n");
1561 
1562 			vm_page_set_state(page, PAGE_STATE_FREE);
1563 		}
1564 	}
1565 
1566 err0:
1567 	if (reservedPages > 0)
1568 		vm_page_unreserve_pages(&reservation);
1569 	if (reservedMemory > 0)
1570 		vm_unreserve_memory(reservedMemory);
1571 
1572 	return status;
1573 }
1574 
1575 
1576 area_id
1577 vm_map_physical_memory(team_id team, const char* name, void** _address,
1578 	uint32 addressSpec, addr_t size, uint32 protection,
1579 	phys_addr_t physicalAddress, bool alreadyWired)
1580 {
1581 	VMArea* area;
1582 	VMCache* cache;
1583 	addr_t mapOffset;
1584 
1585 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1586 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1587 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1588 		addressSpec, size, protection, physicalAddress));
1589 
1590 	if (!arch_vm_supports_protection(protection))
1591 		return B_NOT_SUPPORTED;
1592 
1593 	AddressSpaceWriteLocker locker(team);
1594 	if (!locker.IsLocked())
1595 		return B_BAD_TEAM_ID;
1596 
1597 	// if the physical address is somewhat inside a page,
1598 	// move the actual area down to align on a page boundary
1599 	mapOffset = physicalAddress % B_PAGE_SIZE;
1600 	size += mapOffset;
1601 	physicalAddress -= mapOffset;
1602 
1603 	size = PAGE_ALIGN(size);
1604 
1605 	// create a device cache
1606 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1607 	if (status != B_OK)
1608 		return status;
1609 
1610 	cache->virtual_end = size;
1611 
1612 	cache->Lock();
1613 
1614 	virtual_address_restrictions addressRestrictions = {};
1615 	addressRestrictions.address = *_address;
1616 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1617 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1618 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1619 		true, &area, _address);
1620 
1621 	if (status < B_OK)
1622 		cache->ReleaseRefLocked();
1623 
1624 	cache->Unlock();
1625 
1626 	if (status == B_OK) {
1627 		// set requested memory type -- use uncached, if not given
1628 		uint32 memoryType = addressSpec & B_MTR_MASK;
1629 		if (memoryType == 0)
1630 			memoryType = B_MTR_UC;
1631 
1632 		area->SetMemoryType(memoryType);
1633 
1634 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1635 		if (status != B_OK)
1636 			delete_area(locker.AddressSpace(), area, false);
1637 	}
1638 
1639 	if (status != B_OK)
1640 		return status;
1641 
1642 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1643 
1644 	if (alreadyWired) {
1645 		// The area is already mapped, but possibly not with the right
1646 		// memory type.
1647 		map->Lock();
1648 		map->ProtectArea(area, area->protection);
1649 		map->Unlock();
1650 	} else {
1651 		// Map the area completely.
1652 
1653 		// reserve pages needed for the mapping
1654 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1655 			area->Base() + (size - 1));
1656 		vm_page_reservation reservation;
1657 		vm_page_reserve_pages(&reservation, reservePages,
1658 			team == VMAddressSpace::KernelID()
1659 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1660 
1661 		map->Lock();
1662 
1663 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1664 			map->Map(area->Base() + offset, physicalAddress + offset,
1665 				protection, area->MemoryType(), &reservation);
1666 		}
1667 
1668 		map->Unlock();
1669 
1670 		vm_page_unreserve_pages(&reservation);
1671 	}
1672 
1673 	// modify the pointer returned to be offset back into the new area
1674 	// the same way the physical address in was offset
1675 	*_address = (void*)((addr_t)*_address + mapOffset);
1676 
1677 	area->cache_type = CACHE_TYPE_DEVICE;
1678 	return area->id;
1679 }
1680 
1681 
1682 /*!	Don't use!
1683 	TODO: This function was introduced to map physical page vecs to
1684 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1685 	use a device cache and does not track vm_page::wired_count!
1686 */
1687 area_id
1688 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1689 	uint32 addressSpec, addr_t* _size, uint32 protection,
1690 	struct generic_io_vec* vecs, uint32 vecCount)
1691 {
1692 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1693 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1694 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1695 		addressSpec, _size, protection, vecs, vecCount));
1696 
1697 	if (!arch_vm_supports_protection(protection)
1698 		|| (addressSpec & B_MTR_MASK) != 0) {
1699 		return B_NOT_SUPPORTED;
1700 	}
1701 
1702 	AddressSpaceWriteLocker locker(team);
1703 	if (!locker.IsLocked())
1704 		return B_BAD_TEAM_ID;
1705 
1706 	if (vecCount == 0)
1707 		return B_BAD_VALUE;
1708 
1709 	addr_t size = 0;
1710 	for (uint32 i = 0; i < vecCount; i++) {
1711 		if (vecs[i].base % B_PAGE_SIZE != 0
1712 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1713 			return B_BAD_VALUE;
1714 		}
1715 
1716 		size += vecs[i].length;
1717 	}
1718 
1719 	// create a device cache
1720 	VMCache* cache;
1721 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1722 	if (result != B_OK)
1723 		return result;
1724 
1725 	cache->virtual_end = size;
1726 
1727 	cache->Lock();
1728 
1729 	VMArea* area;
1730 	virtual_address_restrictions addressRestrictions = {};
1731 	addressRestrictions.address = *_address;
1732 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1733 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1734 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1735 		&addressRestrictions, true, &area, _address);
1736 
1737 	if (result != B_OK)
1738 		cache->ReleaseRefLocked();
1739 
1740 	cache->Unlock();
1741 
1742 	if (result != B_OK)
1743 		return result;
1744 
1745 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1746 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1747 		area->Base() + (size - 1));
1748 
1749 	vm_page_reservation reservation;
1750 	vm_page_reserve_pages(&reservation, reservePages,
1751 			team == VMAddressSpace::KernelID()
1752 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1753 	map->Lock();
1754 
1755 	uint32 vecIndex = 0;
1756 	size_t vecOffset = 0;
1757 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1758 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1759 			vecOffset = 0;
1760 			vecIndex++;
1761 		}
1762 
1763 		if (vecIndex >= vecCount)
1764 			break;
1765 
1766 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1767 			protection, area->MemoryType(), &reservation);
1768 
1769 		vecOffset += B_PAGE_SIZE;
1770 	}
1771 
1772 	map->Unlock();
1773 	vm_page_unreserve_pages(&reservation);
1774 
1775 	if (_size != NULL)
1776 		*_size = size;
1777 
1778 	area->cache_type = CACHE_TYPE_DEVICE;
1779 	return area->id;
1780 }
1781 
1782 
1783 area_id
1784 vm_create_null_area(team_id team, const char* name, void** address,
1785 	uint32 addressSpec, addr_t size, uint32 flags)
1786 {
1787 	size = PAGE_ALIGN(size);
1788 
1789 	// Lock the address space and, if B_EXACT_ADDRESS and
1790 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1791 	// is not wired.
1792 	AddressSpaceWriteLocker locker;
1793 	do {
1794 		if (locker.SetTo(team) != B_OK)
1795 			return B_BAD_TEAM_ID;
1796 	} while (addressSpec == B_EXACT_ADDRESS
1797 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1798 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1799 			(addr_t)*address, size, &locker));
1800 
1801 	// create a null cache
1802 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1803 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1804 	VMCache* cache;
1805 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1806 	if (status != B_OK)
1807 		return status;
1808 
1809 	cache->temporary = 1;
1810 	cache->virtual_end = size;
1811 
1812 	cache->Lock();
1813 
1814 	VMArea* area;
1815 	virtual_address_restrictions addressRestrictions = {};
1816 	addressRestrictions.address = *address;
1817 	addressRestrictions.address_specification = addressSpec;
1818 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1819 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1820 		&addressRestrictions, true, &area, address);
1821 
1822 	if (status < B_OK) {
1823 		cache->ReleaseRefAndUnlock();
1824 		return status;
1825 	}
1826 
1827 	cache->Unlock();
1828 
1829 	area->cache_type = CACHE_TYPE_NULL;
1830 	return area->id;
1831 }
1832 
1833 
1834 /*!	Creates the vnode cache for the specified \a vnode.
1835 	The vnode has to be marked busy when calling this function.
1836 */
1837 status_t
1838 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1839 {
1840 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1841 }
1842 
1843 
1844 /*!	\a cache must be locked. The area's address space must be read-locked.
1845 */
1846 static void
1847 pre_map_area_pages(VMArea* area, VMCache* cache,
1848 	vm_page_reservation* reservation)
1849 {
1850 	addr_t baseAddress = area->Base();
1851 	addr_t cacheOffset = area->cache_offset;
1852 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1853 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1854 
1855 	for (VMCachePagesTree::Iterator it
1856 				= cache->pages.GetIterator(firstPage, true, true);
1857 			vm_page* page = it.Next();) {
1858 		if (page->cache_offset >= endPage)
1859 			break;
1860 
1861 		// skip busy and inactive pages
1862 		if (page->busy || page->usage_count == 0)
1863 			continue;
1864 
1865 		DEBUG_PAGE_ACCESS_START(page);
1866 		map_page(area, page,
1867 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1868 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1869 		DEBUG_PAGE_ACCESS_END(page);
1870 	}
1871 }
1872 
1873 
1874 /*!	Will map the file specified by \a fd to an area in memory.
1875 	The file will be mirrored beginning at the specified \a offset. The
1876 	\a offset and \a size arguments have to be page aligned.
1877 */
1878 static area_id
1879 _vm_map_file(team_id team, const char* name, void** _address,
1880 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1881 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1882 {
1883 	// TODO: for binary files, we want to make sure that they get the
1884 	//	copy of a file at a given time, ie. later changes should not
1885 	//	make it into the mapped copy -- this will need quite some changes
1886 	//	to be done in a nice way
1887 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1888 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1889 
1890 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1891 	size = PAGE_ALIGN(size);
1892 
1893 	if (mapping == REGION_NO_PRIVATE_MAP)
1894 		protection |= B_SHARED_AREA;
1895 	if (addressSpec != B_EXACT_ADDRESS)
1896 		unmapAddressRange = false;
1897 
1898 	if (fd < 0) {
1899 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1900 		virtual_address_restrictions virtualRestrictions = {};
1901 		virtualRestrictions.address = *_address;
1902 		virtualRestrictions.address_specification = addressSpec;
1903 		physical_address_restrictions physicalRestrictions = {};
1904 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1905 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1906 			_address);
1907 	}
1908 
1909 	// get the open flags of the FD
1910 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1911 	if (descriptor == NULL)
1912 		return EBADF;
1913 	int32 openMode = descriptor->open_mode;
1914 	put_fd(descriptor);
1915 
1916 	// The FD must open for reading at any rate. For shared mapping with write
1917 	// access, additionally the FD must be open for writing.
1918 	if ((openMode & O_ACCMODE) == O_WRONLY
1919 		|| (mapping == REGION_NO_PRIVATE_MAP
1920 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1921 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1922 		return EACCES;
1923 	}
1924 
1925 	// get the vnode for the object, this also grabs a ref to it
1926 	struct vnode* vnode = NULL;
1927 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1928 	if (status < B_OK)
1929 		return status;
1930 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1931 
1932 	// If we're going to pre-map pages, we need to reserve the pages needed by
1933 	// the mapping backend upfront.
1934 	page_num_t reservedPreMapPages = 0;
1935 	vm_page_reservation reservation;
1936 	if ((protection & B_READ_AREA) != 0) {
1937 		AddressSpaceWriteLocker locker;
1938 		status = locker.SetTo(team);
1939 		if (status != B_OK)
1940 			return status;
1941 
1942 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1943 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1944 
1945 		locker.Unlock();
1946 
1947 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1948 			team == VMAddressSpace::KernelID()
1949 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1950 	}
1951 
1952 	struct PageUnreserver {
1953 		PageUnreserver(vm_page_reservation* reservation)
1954 			:
1955 			fReservation(reservation)
1956 		{
1957 		}
1958 
1959 		~PageUnreserver()
1960 		{
1961 			if (fReservation != NULL)
1962 				vm_page_unreserve_pages(fReservation);
1963 		}
1964 
1965 		vm_page_reservation* fReservation;
1966 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1967 
1968 	// Lock the address space and, if the specified address range shall be
1969 	// unmapped, ensure it is not wired.
1970 	AddressSpaceWriteLocker locker;
1971 	do {
1972 		if (locker.SetTo(team) != B_OK)
1973 			return B_BAD_TEAM_ID;
1974 	} while (unmapAddressRange
1975 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1976 			(addr_t)*_address, size, &locker));
1977 
1978 	// TODO: this only works for file systems that use the file cache
1979 	VMCache* cache;
1980 	status = vfs_get_vnode_cache(vnode, &cache, false);
1981 	if (status < B_OK)
1982 		return status;
1983 
1984 	cache->Lock();
1985 
1986 	VMArea* area;
1987 	virtual_address_restrictions addressRestrictions = {};
1988 	addressRestrictions.address = *_address;
1989 	addressRestrictions.address_specification = addressSpec;
1990 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1991 		0, protection, mapping,
1992 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1993 		&addressRestrictions, kernel, &area, _address);
1994 
1995 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1996 		// map_backing_store() cannot know we no longer need the ref
1997 		cache->ReleaseRefLocked();
1998 	}
1999 
2000 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2001 		pre_map_area_pages(area, cache, &reservation);
2002 
2003 	cache->Unlock();
2004 
2005 	if (status == B_OK) {
2006 		// TODO: this probably deserves a smarter solution, ie. don't always
2007 		// prefetch stuff, and also, probably don't trigger it at this place.
2008 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2009 			// prefetches at max 10 MB starting from "offset"
2010 	}
2011 
2012 	if (status != B_OK)
2013 		return status;
2014 
2015 	area->cache_type = CACHE_TYPE_VNODE;
2016 	return area->id;
2017 }
2018 
2019 
2020 area_id
2021 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2022 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2023 	int fd, off_t offset)
2024 {
2025 	if (!arch_vm_supports_protection(protection))
2026 		return B_NOT_SUPPORTED;
2027 
2028 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2029 		mapping, unmapAddressRange, fd, offset, true);
2030 }
2031 
2032 
2033 VMCache*
2034 vm_area_get_locked_cache(VMArea* area)
2035 {
2036 	rw_lock_read_lock(&sAreaCacheLock);
2037 
2038 	while (true) {
2039 		VMCache* cache = area->cache;
2040 
2041 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2042 			// cache has been deleted
2043 			rw_lock_read_lock(&sAreaCacheLock);
2044 			continue;
2045 		}
2046 
2047 		rw_lock_read_lock(&sAreaCacheLock);
2048 
2049 		if (cache == area->cache) {
2050 			cache->AcquireRefLocked();
2051 			rw_lock_read_unlock(&sAreaCacheLock);
2052 			return cache;
2053 		}
2054 
2055 		// the cache changed in the meantime
2056 		cache->Unlock();
2057 	}
2058 }
2059 
2060 
2061 void
2062 vm_area_put_locked_cache(VMCache* cache)
2063 {
2064 	cache->ReleaseRefAndUnlock();
2065 }
2066 
2067 
2068 area_id
2069 vm_clone_area(team_id team, const char* name, void** address,
2070 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2071 	bool kernel)
2072 {
2073 	VMArea* newArea = NULL;
2074 	VMArea* sourceArea;
2075 
2076 	// Check whether the source area exists and is cloneable. If so, mark it
2077 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2078 	{
2079 		AddressSpaceWriteLocker locker;
2080 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2081 		if (status != B_OK)
2082 			return status;
2083 
2084 		sourceArea->protection |= B_SHARED_AREA;
2085 		protection |= B_SHARED_AREA;
2086 	}
2087 
2088 	// Now lock both address spaces and actually do the cloning.
2089 
2090 	MultiAddressSpaceLocker locker;
2091 	VMAddressSpace* sourceAddressSpace;
2092 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	VMAddressSpace* targetAddressSpace;
2097 	status = locker.AddTeam(team, true, &targetAddressSpace);
2098 	if (status != B_OK)
2099 		return status;
2100 
2101 	status = locker.Lock();
2102 	if (status != B_OK)
2103 		return status;
2104 
2105 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2106 	if (sourceArea == NULL)
2107 		return B_BAD_VALUE;
2108 
2109 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2110 
2111 	if (!kernel && sourceAddressSpace != targetAddressSpace
2112 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2113 #if KDEBUG
2114 		Team* team = thread_get_current_thread()->team;
2115 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2116 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2117 #endif
2118 		status = B_NOT_ALLOWED;
2119 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2120 		status = B_NOT_ALLOWED;
2121 	} else {
2122 		virtual_address_restrictions addressRestrictions = {};
2123 		addressRestrictions.address = *address;
2124 		addressRestrictions.address_specification = addressSpec;
2125 		status = map_backing_store(targetAddressSpace, cache,
2126 			sourceArea->cache_offset, name, sourceArea->Size(),
2127 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2128 			kernel, &newArea, address);
2129 	}
2130 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2131 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2132 		// to create a new cache, and has therefore already acquired a reference
2133 		// to the source cache - but otherwise it has no idea that we need
2134 		// one.
2135 		cache->AcquireRefLocked();
2136 	}
2137 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2138 		// we need to map in everything at this point
2139 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2140 			// we don't have actual pages to map but a physical area
2141 			VMTranslationMap* map
2142 				= sourceArea->address_space->TranslationMap();
2143 			map->Lock();
2144 
2145 			phys_addr_t physicalAddress;
2146 			uint32 oldProtection;
2147 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2148 
2149 			map->Unlock();
2150 
2151 			map = targetAddressSpace->TranslationMap();
2152 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2153 				newArea->Base() + (newArea->Size() - 1));
2154 
2155 			vm_page_reservation reservation;
2156 			vm_page_reserve_pages(&reservation, reservePages,
2157 				targetAddressSpace == VMAddressSpace::Kernel()
2158 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2159 			map->Lock();
2160 
2161 			for (addr_t offset = 0; offset < newArea->Size();
2162 					offset += B_PAGE_SIZE) {
2163 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2164 					protection, newArea->MemoryType(), &reservation);
2165 			}
2166 
2167 			map->Unlock();
2168 			vm_page_unreserve_pages(&reservation);
2169 		} else {
2170 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2171 			size_t reservePages = map->MaxPagesNeededToMap(
2172 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2173 			vm_page_reservation reservation;
2174 			vm_page_reserve_pages(&reservation, reservePages,
2175 				targetAddressSpace == VMAddressSpace::Kernel()
2176 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2177 
2178 			// map in all pages from source
2179 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2180 					vm_page* page  = it.Next();) {
2181 				if (!page->busy) {
2182 					DEBUG_PAGE_ACCESS_START(page);
2183 					map_page(newArea, page,
2184 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2185 							- newArea->cache_offset),
2186 						protection, &reservation);
2187 					DEBUG_PAGE_ACCESS_END(page);
2188 				}
2189 			}
2190 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2191 			// ensuring that!
2192 
2193 			vm_page_unreserve_pages(&reservation);
2194 		}
2195 	}
2196 	if (status == B_OK)
2197 		newArea->cache_type = sourceArea->cache_type;
2198 
2199 	vm_area_put_locked_cache(cache);
2200 
2201 	if (status < B_OK)
2202 		return status;
2203 
2204 	return newArea->id;
2205 }
2206 
2207 
2208 /*!	Deletes the specified area of the given address space.
2209 
2210 	The address space must be write-locked.
2211 	The caller must ensure that the area does not have any wired ranges.
2212 
2213 	\param addressSpace The address space containing the area.
2214 	\param area The area to be deleted.
2215 	\param deletingAddressSpace \c true, if the address space is in the process
2216 		of being deleted.
2217 */
2218 static void
2219 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2220 	bool deletingAddressSpace)
2221 {
2222 	ASSERT(!area->IsWired());
2223 
2224 	VMAreaHash::Remove(area);
2225 
2226 	// At this point the area is removed from the global hash table, but
2227 	// still exists in the area list.
2228 
2229 	// Unmap the virtual address space the area occupied.
2230 	{
2231 		// We need to lock the complete cache chain.
2232 		VMCache* topCache = vm_area_get_locked_cache(area);
2233 		VMCacheChainLocker cacheChainLocker(topCache);
2234 		cacheChainLocker.LockAllSourceCaches();
2235 
2236 		// If the area's top cache is a temporary cache and the area is the only
2237 		// one referencing it (besides us currently holding a second reference),
2238 		// the unmapping code doesn't need to care about preserving the accessed
2239 		// and dirty flags of the top cache page mappings.
2240 		bool ignoreTopCachePageFlags
2241 			= topCache->temporary && topCache->RefCount() == 2;
2242 
2243 		area->address_space->TranslationMap()->UnmapArea(area,
2244 			deletingAddressSpace, ignoreTopCachePageFlags);
2245 	}
2246 
2247 	if (!area->cache->temporary)
2248 		area->cache->WriteModified();
2249 
2250 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2251 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2252 
2253 	arch_vm_unset_memory_type(area);
2254 	addressSpace->RemoveArea(area, allocationFlags);
2255 	addressSpace->Put();
2256 
2257 	area->cache->RemoveArea(area);
2258 	area->cache->ReleaseRef();
2259 
2260 	addressSpace->DeleteArea(area, allocationFlags);
2261 }
2262 
2263 
2264 status_t
2265 vm_delete_area(team_id team, area_id id, bool kernel)
2266 {
2267 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2268 		team, id));
2269 
2270 	// lock the address space and make sure the area isn't wired
2271 	AddressSpaceWriteLocker locker;
2272 	VMArea* area;
2273 	AreaCacheLocker cacheLocker;
2274 
2275 	do {
2276 		status_t status = locker.SetFromArea(team, id, area);
2277 		if (status != B_OK)
2278 			return status;
2279 
2280 		cacheLocker.SetTo(area);
2281 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2282 
2283 	cacheLocker.Unlock();
2284 
2285 	// SetFromArea will have returned an error if the area's owning team is not
2286 	// the same as the passed team, so we don't need to do those checks here.
2287 
2288 	delete_area(locker.AddressSpace(), area, false);
2289 	return B_OK;
2290 }
2291 
2292 
2293 /*!	Creates a new cache on top of given cache, moves all areas from
2294 	the old cache to the new one, and changes the protection of all affected
2295 	areas' pages to read-only. If requested, wired pages are moved up to the
2296 	new cache and copies are added to the old cache in their place.
2297 	Preconditions:
2298 	- The given cache must be locked.
2299 	- All of the cache's areas' address spaces must be read locked.
2300 	- Either the cache must not have any wired ranges or a page reservation for
2301 	  all wired pages must be provided, so they can be copied.
2302 
2303 	\param lowerCache The cache on top of which a new cache shall be created.
2304 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2305 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2306 		has wired page. The wired pages are copied in this case.
2307 */
2308 static status_t
2309 vm_copy_on_write_area(VMCache* lowerCache,
2310 	vm_page_reservation* wiredPagesReservation)
2311 {
2312 	VMCache* upperCache;
2313 
2314 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2315 
2316 	// We need to separate the cache from its areas. The cache goes one level
2317 	// deeper and we create a new cache inbetween.
2318 
2319 	// create an anonymous cache
2320 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2321 		lowerCache->GuardSize() / B_PAGE_SIZE,
2322 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2323 		VM_PRIORITY_USER);
2324 	if (status != B_OK)
2325 		return status;
2326 
2327 	upperCache->Lock();
2328 
2329 	upperCache->temporary = 1;
2330 	upperCache->virtual_base = lowerCache->virtual_base;
2331 	upperCache->virtual_end = lowerCache->virtual_end;
2332 
2333 	// transfer the lower cache areas to the upper cache
2334 	rw_lock_write_lock(&sAreaCacheLock);
2335 	upperCache->TransferAreas(lowerCache);
2336 	rw_lock_write_unlock(&sAreaCacheLock);
2337 
2338 	lowerCache->AddConsumer(upperCache);
2339 
2340 	// We now need to remap all pages from all of the cache's areas read-only,
2341 	// so that a copy will be created on next write access. If there are wired
2342 	// pages, we keep their protection, move them to the upper cache and create
2343 	// copies for the lower cache.
2344 	if (wiredPagesReservation != NULL) {
2345 		// We need to handle wired pages -- iterate through the cache's pages.
2346 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2347 				vm_page* page = it.Next();) {
2348 			if (page->WiredCount() > 0) {
2349 				// allocate a new page and copy the wired one
2350 				vm_page* copiedPage = vm_page_allocate_page(
2351 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2352 
2353 				vm_memcpy_physical_page(
2354 					copiedPage->physical_page_number * B_PAGE_SIZE,
2355 					page->physical_page_number * B_PAGE_SIZE);
2356 
2357 				// move the wired page to the upper cache (note: removing is OK
2358 				// with the SplayTree iterator) and insert the copy
2359 				upperCache->MovePage(page);
2360 				lowerCache->InsertPage(copiedPage,
2361 					page->cache_offset * B_PAGE_SIZE);
2362 
2363 				DEBUG_PAGE_ACCESS_END(copiedPage);
2364 			} else {
2365 				// Change the protection of this page in all areas.
2366 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2367 						tempArea = tempArea->cache_next) {
2368 					// The area must be readable in the same way it was
2369 					// previously writable.
2370 					uint32 protection = B_KERNEL_READ_AREA;
2371 					if ((tempArea->protection & B_READ_AREA) != 0)
2372 						protection |= B_READ_AREA;
2373 
2374 					VMTranslationMap* map
2375 						= tempArea->address_space->TranslationMap();
2376 					map->Lock();
2377 					map->ProtectPage(tempArea,
2378 						virtual_page_address(tempArea, page), protection);
2379 					map->Unlock();
2380 				}
2381 			}
2382 		}
2383 	} else {
2384 		ASSERT(lowerCache->WiredPagesCount() == 0);
2385 
2386 		// just change the protection of all areas
2387 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2388 				tempArea = tempArea->cache_next) {
2389 			// The area must be readable in the same way it was previously
2390 			// writable.
2391 			uint32 protection = B_KERNEL_READ_AREA;
2392 			if ((tempArea->protection & B_READ_AREA) != 0)
2393 				protection |= B_READ_AREA;
2394 
2395 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2396 			map->Lock();
2397 			map->ProtectArea(tempArea, protection);
2398 			map->Unlock();
2399 		}
2400 	}
2401 
2402 	vm_area_put_locked_cache(upperCache);
2403 
2404 	return B_OK;
2405 }
2406 
2407 
2408 area_id
2409 vm_copy_area(team_id team, const char* name, void** _address,
2410 	uint32 addressSpec, uint32 protection, area_id sourceID)
2411 {
2412 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2413 
2414 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2415 		// set the same protection for the kernel as for userland
2416 		protection |= B_KERNEL_READ_AREA;
2417 		if (writableCopy)
2418 			protection |= B_KERNEL_WRITE_AREA;
2419 	}
2420 
2421 	// Do the locking: target address space, all address spaces associated with
2422 	// the source cache, and the cache itself.
2423 	MultiAddressSpaceLocker locker;
2424 	VMAddressSpace* targetAddressSpace;
2425 	VMCache* cache;
2426 	VMArea* source;
2427 	AreaCacheLocker cacheLocker;
2428 	status_t status;
2429 	bool sharedArea;
2430 
2431 	page_num_t wiredPages = 0;
2432 	vm_page_reservation wiredPagesReservation;
2433 
2434 	bool restart;
2435 	do {
2436 		restart = false;
2437 
2438 		locker.Unset();
2439 		status = locker.AddTeam(team, true, &targetAddressSpace);
2440 		if (status == B_OK) {
2441 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2442 				&cache);
2443 		}
2444 		if (status != B_OK)
2445 			return status;
2446 
2447 		cacheLocker.SetTo(cache, true);	// already locked
2448 
2449 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2450 
2451 		page_num_t oldWiredPages = wiredPages;
2452 		wiredPages = 0;
2453 
2454 		// If the source area isn't shared, count the number of wired pages in
2455 		// the cache and reserve as many pages.
2456 		if (!sharedArea) {
2457 			wiredPages = cache->WiredPagesCount();
2458 
2459 			if (wiredPages > oldWiredPages) {
2460 				cacheLocker.Unlock();
2461 				locker.Unlock();
2462 
2463 				if (oldWiredPages > 0)
2464 					vm_page_unreserve_pages(&wiredPagesReservation);
2465 
2466 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2467 					VM_PRIORITY_USER);
2468 
2469 				restart = true;
2470 			}
2471 		} else if (oldWiredPages > 0)
2472 			vm_page_unreserve_pages(&wiredPagesReservation);
2473 	} while (restart);
2474 
2475 	// unreserve pages later
2476 	struct PagesUnreserver {
2477 		PagesUnreserver(vm_page_reservation* reservation)
2478 			:
2479 			fReservation(reservation)
2480 		{
2481 		}
2482 
2483 		~PagesUnreserver()
2484 		{
2485 			if (fReservation != NULL)
2486 				vm_page_unreserve_pages(fReservation);
2487 		}
2488 
2489 	private:
2490 		vm_page_reservation*	fReservation;
2491 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2492 
2493 	if (addressSpec == B_CLONE_ADDRESS) {
2494 		addressSpec = B_EXACT_ADDRESS;
2495 		*_address = (void*)source->Base();
2496 	}
2497 
2498 	// First, create a cache on top of the source area, respectively use the
2499 	// existing one, if this is a shared area.
2500 
2501 	VMArea* target;
2502 	virtual_address_restrictions addressRestrictions = {};
2503 	addressRestrictions.address = *_address;
2504 	addressRestrictions.address_specification = addressSpec;
2505 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2506 		name, source->Size(), source->wiring, protection,
2507 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2508 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2509 		&addressRestrictions, true, &target, _address);
2510 	if (status < B_OK)
2511 		return status;
2512 
2513 	if (sharedArea) {
2514 		// The new area uses the old area's cache, but map_backing_store()
2515 		// hasn't acquired a ref. So we have to do that now.
2516 		cache->AcquireRefLocked();
2517 	}
2518 
2519 	// If the source area is writable, we need to move it one layer up as well
2520 
2521 	if (!sharedArea) {
2522 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2523 			// TODO: do something more useful if this fails!
2524 			if (vm_copy_on_write_area(cache,
2525 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2526 				panic("vm_copy_on_write_area() failed!\n");
2527 			}
2528 		}
2529 	}
2530 
2531 	// we return the ID of the newly created area
2532 	return target->id;
2533 }
2534 
2535 
2536 status_t
2537 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2538 	bool kernel)
2539 {
2540 	fix_protection(&newProtection);
2541 
2542 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2543 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2544 
2545 	if (!arch_vm_supports_protection(newProtection))
2546 		return B_NOT_SUPPORTED;
2547 
2548 	bool becomesWritable
2549 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2550 
2551 	// lock address spaces and cache
2552 	MultiAddressSpaceLocker locker;
2553 	VMCache* cache;
2554 	VMArea* area;
2555 	status_t status;
2556 	AreaCacheLocker cacheLocker;
2557 	bool isWritable;
2558 
2559 	bool restart;
2560 	do {
2561 		restart = false;
2562 
2563 		locker.Unset();
2564 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2565 		if (status != B_OK)
2566 			return status;
2567 
2568 		cacheLocker.SetTo(cache, true);	// already locked
2569 
2570 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
2571 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2572 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2573 				" (%s)\n", team, newProtection, areaID, area->name);
2574 			return B_NOT_ALLOWED;
2575 		}
2576 
2577 		if (area->protection == newProtection)
2578 			return B_OK;
2579 
2580 		if (team != VMAddressSpace::KernelID()
2581 			&& area->address_space->ID() != team) {
2582 			// unless you're the kernel, you are only allowed to set
2583 			// the protection of your own areas
2584 			return B_NOT_ALLOWED;
2585 		}
2586 
2587 		isWritable
2588 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2589 
2590 		// Make sure the area (respectively, if we're going to call
2591 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2592 		// wired ranges.
2593 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2594 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2595 					otherArea = otherArea->cache_next) {
2596 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2597 					restart = true;
2598 					break;
2599 				}
2600 			}
2601 		} else {
2602 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2603 				restart = true;
2604 		}
2605 	} while (restart);
2606 
2607 	bool changePageProtection = true;
2608 	bool changeTopCachePagesOnly = false;
2609 
2610 	if (isWritable && !becomesWritable) {
2611 		// writable -> !writable
2612 
2613 		if (cache->source != NULL && cache->temporary) {
2614 			if (cache->CountWritableAreas(area) == 0) {
2615 				// Since this cache now lives from the pages in its source cache,
2616 				// we can change the cache's commitment to take only those pages
2617 				// into account that really are in this cache.
2618 
2619 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2620 					team == VMAddressSpace::KernelID()
2621 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2622 
2623 				// TODO: we may be able to join with our source cache, if
2624 				// count == 0
2625 			}
2626 		}
2627 
2628 		// If only the writability changes, we can just remap the pages of the
2629 		// top cache, since the pages of lower caches are mapped read-only
2630 		// anyway. That's advantageous only, if the number of pages in the cache
2631 		// is significantly smaller than the number of pages in the area,
2632 		// though.
2633 		if (newProtection
2634 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2635 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2636 			changeTopCachePagesOnly = true;
2637 		}
2638 	} else if (!isWritable && becomesWritable) {
2639 		// !writable -> writable
2640 
2641 		if (!cache->consumers.IsEmpty()) {
2642 			// There are consumers -- we have to insert a new cache. Fortunately
2643 			// vm_copy_on_write_area() does everything that's needed.
2644 			changePageProtection = false;
2645 			status = vm_copy_on_write_area(cache, NULL);
2646 		} else {
2647 			// No consumers, so we don't need to insert a new one.
2648 			if (cache->source != NULL && cache->temporary) {
2649 				// the cache's commitment must contain all possible pages
2650 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2651 					team == VMAddressSpace::KernelID()
2652 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2653 			}
2654 
2655 			if (status == B_OK && cache->source != NULL) {
2656 				// There's a source cache, hence we can't just change all pages'
2657 				// protection or we might allow writing into pages belonging to
2658 				// a lower cache.
2659 				changeTopCachePagesOnly = true;
2660 			}
2661 		}
2662 	} else {
2663 		// we don't have anything special to do in all other cases
2664 	}
2665 
2666 	if (status == B_OK) {
2667 		// remap existing pages in this cache
2668 		if (changePageProtection) {
2669 			VMTranslationMap* map = area->address_space->TranslationMap();
2670 			map->Lock();
2671 
2672 			if (changeTopCachePagesOnly) {
2673 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2674 				page_num_t lastPageOffset
2675 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2676 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2677 						vm_page* page = it.Next();) {
2678 					if (page->cache_offset >= firstPageOffset
2679 						&& page->cache_offset <= lastPageOffset) {
2680 						addr_t address = virtual_page_address(area, page);
2681 						map->ProtectPage(area, address, newProtection);
2682 					}
2683 				}
2684 			} else
2685 				map->ProtectArea(area, newProtection);
2686 
2687 			map->Unlock();
2688 		}
2689 
2690 		area->protection = newProtection;
2691 	}
2692 
2693 	return status;
2694 }
2695 
2696 
2697 status_t
2698 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2699 {
2700 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2701 	if (addressSpace == NULL)
2702 		return B_BAD_TEAM_ID;
2703 
2704 	VMTranslationMap* map = addressSpace->TranslationMap();
2705 
2706 	map->Lock();
2707 	uint32 dummyFlags;
2708 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2709 	map->Unlock();
2710 
2711 	addressSpace->Put();
2712 	return status;
2713 }
2714 
2715 
2716 /*!	The page's cache must be locked.
2717 */
2718 bool
2719 vm_test_map_modification(vm_page* page)
2720 {
2721 	if (page->modified)
2722 		return true;
2723 
2724 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2725 	vm_page_mapping* mapping;
2726 	while ((mapping = iterator.Next()) != NULL) {
2727 		VMArea* area = mapping->area;
2728 		VMTranslationMap* map = area->address_space->TranslationMap();
2729 
2730 		phys_addr_t physicalAddress;
2731 		uint32 flags;
2732 		map->Lock();
2733 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2734 		map->Unlock();
2735 
2736 		if ((flags & PAGE_MODIFIED) != 0)
2737 			return true;
2738 	}
2739 
2740 	return false;
2741 }
2742 
2743 
2744 /*!	The page's cache must be locked.
2745 */
2746 void
2747 vm_clear_map_flags(vm_page* page, uint32 flags)
2748 {
2749 	if ((flags & PAGE_ACCESSED) != 0)
2750 		page->accessed = false;
2751 	if ((flags & PAGE_MODIFIED) != 0)
2752 		page->modified = false;
2753 
2754 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2755 	vm_page_mapping* mapping;
2756 	while ((mapping = iterator.Next()) != NULL) {
2757 		VMArea* area = mapping->area;
2758 		VMTranslationMap* map = area->address_space->TranslationMap();
2759 
2760 		map->Lock();
2761 		map->ClearFlags(virtual_page_address(area, page), flags);
2762 		map->Unlock();
2763 	}
2764 }
2765 
2766 
2767 /*!	Removes all mappings from a page.
2768 	After you've called this function, the page is unmapped from memory and
2769 	the page's \c accessed and \c modified flags have been updated according
2770 	to the state of the mappings.
2771 	The page's cache must be locked.
2772 */
2773 void
2774 vm_remove_all_page_mappings(vm_page* page)
2775 {
2776 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2777 		VMArea* area = mapping->area;
2778 		VMTranslationMap* map = area->address_space->TranslationMap();
2779 		addr_t address = virtual_page_address(area, page);
2780 		map->UnmapPage(area, address, false);
2781 	}
2782 }
2783 
2784 
2785 int32
2786 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2787 {
2788 	int32 count = 0;
2789 
2790 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2791 	vm_page_mapping* mapping;
2792 	while ((mapping = iterator.Next()) != NULL) {
2793 		VMArea* area = mapping->area;
2794 		VMTranslationMap* map = area->address_space->TranslationMap();
2795 
2796 		bool modified;
2797 		if (map->ClearAccessedAndModified(area,
2798 				virtual_page_address(area, page), false, modified)) {
2799 			count++;
2800 		}
2801 
2802 		page->modified |= modified;
2803 	}
2804 
2805 
2806 	if (page->accessed) {
2807 		count++;
2808 		page->accessed = false;
2809 	}
2810 
2811 	return count;
2812 }
2813 
2814 
2815 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2816 	mappings.
2817 	The function iterates through the page mappings and removes them until
2818 	encountering one that has been accessed. From then on it will continue to
2819 	iterate, but only clear the accessed flag of the mapping. The page's
2820 	\c modified bit will be updated accordingly, the \c accessed bit will be
2821 	cleared.
2822 	\return The number of mapping accessed bits encountered, including the
2823 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2824 		of the page have been removed.
2825 */
2826 int32
2827 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2828 {
2829 	ASSERT(page->WiredCount() == 0);
2830 
2831 	if (page->accessed)
2832 		return vm_clear_page_mapping_accessed_flags(page);
2833 
2834 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2835 		VMArea* area = mapping->area;
2836 		VMTranslationMap* map = area->address_space->TranslationMap();
2837 		addr_t address = virtual_page_address(area, page);
2838 		bool modified = false;
2839 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2840 			page->accessed = true;
2841 			page->modified |= modified;
2842 			return vm_clear_page_mapping_accessed_flags(page);
2843 		}
2844 		page->modified |= modified;
2845 	}
2846 
2847 	return 0;
2848 }
2849 
2850 
2851 static int
2852 display_mem(int argc, char** argv)
2853 {
2854 	bool physical = false;
2855 	addr_t copyAddress;
2856 	int32 displayWidth;
2857 	int32 itemSize;
2858 	int32 num = -1;
2859 	addr_t address;
2860 	int i = 1, j;
2861 
2862 	if (argc > 1 && argv[1][0] == '-') {
2863 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2864 			physical = true;
2865 			i++;
2866 		} else
2867 			i = 99;
2868 	}
2869 
2870 	if (argc < i + 1 || argc > i + 2) {
2871 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2872 			"\tdl - 8 bytes\n"
2873 			"\tdw - 4 bytes\n"
2874 			"\tds - 2 bytes\n"
2875 			"\tdb - 1 byte\n"
2876 			"\tstring - a whole string\n"
2877 			"  -p or --physical only allows memory from a single page to be "
2878 			"displayed.\n");
2879 		return 0;
2880 	}
2881 
2882 	address = parse_expression(argv[i]);
2883 
2884 	if (argc > i + 1)
2885 		num = parse_expression(argv[i + 1]);
2886 
2887 	// build the format string
2888 	if (strcmp(argv[0], "db") == 0) {
2889 		itemSize = 1;
2890 		displayWidth = 16;
2891 	} else if (strcmp(argv[0], "ds") == 0) {
2892 		itemSize = 2;
2893 		displayWidth = 8;
2894 	} else if (strcmp(argv[0], "dw") == 0) {
2895 		itemSize = 4;
2896 		displayWidth = 4;
2897 	} else if (strcmp(argv[0], "dl") == 0) {
2898 		itemSize = 8;
2899 		displayWidth = 2;
2900 	} else if (strcmp(argv[0], "string") == 0) {
2901 		itemSize = 1;
2902 		displayWidth = -1;
2903 	} else {
2904 		kprintf("display_mem called in an invalid way!\n");
2905 		return 0;
2906 	}
2907 
2908 	if (num <= 0)
2909 		num = displayWidth;
2910 
2911 	void* physicalPageHandle = NULL;
2912 
2913 	if (physical) {
2914 		int32 offset = address & (B_PAGE_SIZE - 1);
2915 		if (num * itemSize + offset > B_PAGE_SIZE) {
2916 			num = (B_PAGE_SIZE - offset) / itemSize;
2917 			kprintf("NOTE: number of bytes has been cut to page size\n");
2918 		}
2919 
2920 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2921 
2922 		if (vm_get_physical_page_debug(address, &copyAddress,
2923 				&physicalPageHandle) != B_OK) {
2924 			kprintf("getting the hardware page failed.");
2925 			return 0;
2926 		}
2927 
2928 		address += offset;
2929 		copyAddress += offset;
2930 	} else
2931 		copyAddress = address;
2932 
2933 	if (!strcmp(argv[0], "string")) {
2934 		kprintf("%p \"", (char*)copyAddress);
2935 
2936 		// string mode
2937 		for (i = 0; true; i++) {
2938 			char c;
2939 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2940 					!= B_OK
2941 				|| c == '\0') {
2942 				break;
2943 			}
2944 
2945 			if (c == '\n')
2946 				kprintf("\\n");
2947 			else if (c == '\t')
2948 				kprintf("\\t");
2949 			else {
2950 				if (!isprint(c))
2951 					c = '.';
2952 
2953 				kprintf("%c", c);
2954 			}
2955 		}
2956 
2957 		kprintf("\"\n");
2958 	} else {
2959 		// number mode
2960 		for (i = 0; i < num; i++) {
2961 			uint64 value;
2962 
2963 			if ((i % displayWidth) == 0) {
2964 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2965 				if (i != 0)
2966 					kprintf("\n");
2967 
2968 				kprintf("[0x%lx]  ", address + i * itemSize);
2969 
2970 				for (j = 0; j < displayed; j++) {
2971 					char c;
2972 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2973 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2974 						displayed = j;
2975 						break;
2976 					}
2977 					if (!isprint(c))
2978 						c = '.';
2979 
2980 					kprintf("%c", c);
2981 				}
2982 				if (num > displayWidth) {
2983 					// make sure the spacing in the last line is correct
2984 					for (j = displayed; j < displayWidth * itemSize; j++)
2985 						kprintf(" ");
2986 				}
2987 				kprintf("  ");
2988 			}
2989 
2990 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2991 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2992 				kprintf("read fault");
2993 				break;
2994 			}
2995 
2996 			switch (itemSize) {
2997 				case 1:
2998 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2999 					break;
3000 				case 2:
3001 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3002 					break;
3003 				case 4:
3004 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3005 					break;
3006 				case 8:
3007 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3008 					break;
3009 			}
3010 		}
3011 
3012 		kprintf("\n");
3013 	}
3014 
3015 	if (physical) {
3016 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3017 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3018 	}
3019 	return 0;
3020 }
3021 
3022 
3023 static void
3024 dump_cache_tree_recursively(VMCache* cache, int level,
3025 	VMCache* highlightCache)
3026 {
3027 	// print this cache
3028 	for (int i = 0; i < level; i++)
3029 		kprintf("  ");
3030 	if (cache == highlightCache)
3031 		kprintf("%p <--\n", cache);
3032 	else
3033 		kprintf("%p\n", cache);
3034 
3035 	// recursively print its consumers
3036 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3037 			VMCache* consumer = it.Next();) {
3038 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3039 	}
3040 }
3041 
3042 
3043 static int
3044 dump_cache_tree(int argc, char** argv)
3045 {
3046 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3047 		kprintf("usage: %s <address>\n", argv[0]);
3048 		return 0;
3049 	}
3050 
3051 	addr_t address = parse_expression(argv[1]);
3052 	if (address == 0)
3053 		return 0;
3054 
3055 	VMCache* cache = (VMCache*)address;
3056 	VMCache* root = cache;
3057 
3058 	// find the root cache (the transitive source)
3059 	while (root->source != NULL)
3060 		root = root->source;
3061 
3062 	dump_cache_tree_recursively(root, 0, cache);
3063 
3064 	return 0;
3065 }
3066 
3067 
3068 const char*
3069 vm_cache_type_to_string(int32 type)
3070 {
3071 	switch (type) {
3072 		case CACHE_TYPE_RAM:
3073 			return "RAM";
3074 		case CACHE_TYPE_DEVICE:
3075 			return "device";
3076 		case CACHE_TYPE_VNODE:
3077 			return "vnode";
3078 		case CACHE_TYPE_NULL:
3079 			return "null";
3080 
3081 		default:
3082 			return "unknown";
3083 	}
3084 }
3085 
3086 
3087 #if DEBUG_CACHE_LIST
3088 
3089 static void
3090 update_cache_info_recursively(VMCache* cache, cache_info& info)
3091 {
3092 	info.page_count += cache->page_count;
3093 	if (cache->type == CACHE_TYPE_RAM)
3094 		info.committed += cache->committed_size;
3095 
3096 	// recurse
3097 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3098 			VMCache* consumer = it.Next();) {
3099 		update_cache_info_recursively(consumer, info);
3100 	}
3101 }
3102 
3103 
3104 static int
3105 cache_info_compare_page_count(const void* _a, const void* _b)
3106 {
3107 	const cache_info* a = (const cache_info*)_a;
3108 	const cache_info* b = (const cache_info*)_b;
3109 	if (a->page_count == b->page_count)
3110 		return 0;
3111 	return a->page_count < b->page_count ? 1 : -1;
3112 }
3113 
3114 
3115 static int
3116 cache_info_compare_committed(const void* _a, const void* _b)
3117 {
3118 	const cache_info* a = (const cache_info*)_a;
3119 	const cache_info* b = (const cache_info*)_b;
3120 	if (a->committed == b->committed)
3121 		return 0;
3122 	return a->committed < b->committed ? 1 : -1;
3123 }
3124 
3125 
3126 static void
3127 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3128 {
3129 	for (int i = 0; i < level; i++)
3130 		kprintf("  ");
3131 
3132 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3133 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3134 		cache->virtual_base, cache->virtual_end, cache->page_count);
3135 
3136 	if (level == 0)
3137 		kprintf("/%lu", info.page_count);
3138 
3139 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3140 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3141 
3142 		if (level == 0)
3143 			kprintf("/%lu", info.committed);
3144 	}
3145 
3146 	// areas
3147 	if (cache->areas != NULL) {
3148 		VMArea* area = cache->areas;
3149 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3150 			area->name, area->address_space->ID());
3151 
3152 		while (area->cache_next != NULL) {
3153 			area = area->cache_next;
3154 			kprintf(", %" B_PRId32, area->id);
3155 		}
3156 	}
3157 
3158 	kputs("\n");
3159 
3160 	// recurse
3161 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3162 			VMCache* consumer = it.Next();) {
3163 		dump_caches_recursively(consumer, info, level + 1);
3164 	}
3165 }
3166 
3167 
3168 static int
3169 dump_caches(int argc, char** argv)
3170 {
3171 	if (sCacheInfoTable == NULL) {
3172 		kprintf("No cache info table!\n");
3173 		return 0;
3174 	}
3175 
3176 	bool sortByPageCount = true;
3177 
3178 	for (int32 i = 1; i < argc; i++) {
3179 		if (strcmp(argv[i], "-c") == 0) {
3180 			sortByPageCount = false;
3181 		} else {
3182 			print_debugger_command_usage(argv[0]);
3183 			return 0;
3184 		}
3185 	}
3186 
3187 	uint32 totalCount = 0;
3188 	uint32 rootCount = 0;
3189 	off_t totalCommitted = 0;
3190 	page_num_t totalPages = 0;
3191 
3192 	VMCache* cache = gDebugCacheList;
3193 	while (cache) {
3194 		totalCount++;
3195 		if (cache->source == NULL) {
3196 			cache_info stackInfo;
3197 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3198 				? sCacheInfoTable[rootCount] : stackInfo;
3199 			rootCount++;
3200 			info.cache = cache;
3201 			info.page_count = 0;
3202 			info.committed = 0;
3203 			update_cache_info_recursively(cache, info);
3204 			totalCommitted += info.committed;
3205 			totalPages += info.page_count;
3206 		}
3207 
3208 		cache = cache->debug_next;
3209 	}
3210 
3211 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3212 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3213 			sortByPageCount
3214 				? &cache_info_compare_page_count
3215 				: &cache_info_compare_committed);
3216 	}
3217 
3218 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3219 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3220 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3221 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3222 			"page count" : "committed size");
3223 
3224 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3225 		for (uint32 i = 0; i < rootCount; i++) {
3226 			cache_info& info = sCacheInfoTable[i];
3227 			dump_caches_recursively(info.cache, info, 0);
3228 		}
3229 	} else
3230 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3231 
3232 	return 0;
3233 }
3234 
3235 #endif	// DEBUG_CACHE_LIST
3236 
3237 
3238 static int
3239 dump_cache(int argc, char** argv)
3240 {
3241 	VMCache* cache;
3242 	bool showPages = false;
3243 	int i = 1;
3244 
3245 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3246 		kprintf("usage: %s [-ps] <address>\n"
3247 			"  if -p is specified, all pages are shown, if -s is used\n"
3248 			"  only the cache info is shown respectively.\n", argv[0]);
3249 		return 0;
3250 	}
3251 	while (argv[i][0] == '-') {
3252 		char* arg = argv[i] + 1;
3253 		while (arg[0]) {
3254 			if (arg[0] == 'p')
3255 				showPages = true;
3256 			arg++;
3257 		}
3258 		i++;
3259 	}
3260 	if (argv[i] == NULL) {
3261 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3262 		return 0;
3263 	}
3264 
3265 	addr_t address = parse_expression(argv[i]);
3266 	if (address == 0)
3267 		return 0;
3268 
3269 	cache = (VMCache*)address;
3270 
3271 	cache->Dump(showPages);
3272 
3273 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3274 
3275 	return 0;
3276 }
3277 
3278 
3279 static void
3280 dump_area_struct(VMArea* area, bool mappings)
3281 {
3282 	kprintf("AREA: %p\n", area);
3283 	kprintf("name:\t\t'%s'\n", area->name);
3284 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3285 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3286 	kprintf("base:\t\t0x%lx\n", area->Base());
3287 	kprintf("size:\t\t0x%lx\n", area->Size());
3288 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3289 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3290 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3291 	kprintf("cache:\t\t%p\n", area->cache);
3292 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3293 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3294 	kprintf("cache_next:\t%p\n", area->cache_next);
3295 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3296 
3297 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3298 	if (mappings) {
3299 		kprintf("page mappings:\n");
3300 		while (iterator.HasNext()) {
3301 			vm_page_mapping* mapping = iterator.Next();
3302 			kprintf("  %p", mapping->page);
3303 		}
3304 		kprintf("\n");
3305 	} else {
3306 		uint32 count = 0;
3307 		while (iterator.Next() != NULL) {
3308 			count++;
3309 		}
3310 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3311 	}
3312 }
3313 
3314 
3315 static int
3316 dump_area(int argc, char** argv)
3317 {
3318 	bool mappings = false;
3319 	bool found = false;
3320 	int32 index = 1;
3321 	VMArea* area;
3322 	addr_t num;
3323 
3324 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3325 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3326 			"All areas matching either id/address/name are listed. You can\n"
3327 			"force to check only a specific item by prefixing the specifier\n"
3328 			"with the id/contains/address/name keywords.\n"
3329 			"-m shows the area's mappings as well.\n");
3330 		return 0;
3331 	}
3332 
3333 	if (!strcmp(argv[1], "-m")) {
3334 		mappings = true;
3335 		index++;
3336 	}
3337 
3338 	int32 mode = 0xf;
3339 	if (!strcmp(argv[index], "id"))
3340 		mode = 1;
3341 	else if (!strcmp(argv[index], "contains"))
3342 		mode = 2;
3343 	else if (!strcmp(argv[index], "name"))
3344 		mode = 4;
3345 	else if (!strcmp(argv[index], "address"))
3346 		mode = 0;
3347 	if (mode != 0xf)
3348 		index++;
3349 
3350 	if (index >= argc) {
3351 		kprintf("No area specifier given.\n");
3352 		return 0;
3353 	}
3354 
3355 	num = parse_expression(argv[index]);
3356 
3357 	if (mode == 0) {
3358 		dump_area_struct((struct VMArea*)num, mappings);
3359 	} else {
3360 		// walk through the area list, looking for the arguments as a name
3361 
3362 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3363 		while ((area = it.Next()) != NULL) {
3364 			if (((mode & 4) != 0
3365 					&& !strcmp(argv[index], area->name))
3366 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3367 					|| (((mode & 2) != 0 && area->Base() <= num
3368 						&& area->Base() + area->Size() > num))))) {
3369 				dump_area_struct(area, mappings);
3370 				found = true;
3371 			}
3372 		}
3373 
3374 		if (!found)
3375 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3376 	}
3377 
3378 	return 0;
3379 }
3380 
3381 
3382 static int
3383 dump_area_list(int argc, char** argv)
3384 {
3385 	VMArea* area;
3386 	const char* name = NULL;
3387 	int32 id = 0;
3388 
3389 	if (argc > 1) {
3390 		id = parse_expression(argv[1]);
3391 		if (id == 0)
3392 			name = argv[1];
3393 	}
3394 
3395 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3396 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3397 		B_PRINTF_POINTER_WIDTH, "size");
3398 
3399 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3400 	while ((area = it.Next()) != NULL) {
3401 		if ((id != 0 && area->address_space->ID() != id)
3402 			|| (name != NULL && strstr(area->name, name) == NULL))
3403 			continue;
3404 
3405 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3406 			area->id, (void*)area->Base(), (void*)area->Size(),
3407 			area->protection, area->wiring, area->name);
3408 	}
3409 	return 0;
3410 }
3411 
3412 
3413 static int
3414 dump_available_memory(int argc, char** argv)
3415 {
3416 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3417 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3418 	return 0;
3419 }
3420 
3421 
3422 static int
3423 dump_mapping_info(int argc, char** argv)
3424 {
3425 	bool reverseLookup = false;
3426 	bool pageLookup = false;
3427 
3428 	int argi = 1;
3429 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3430 		const char* arg = argv[argi];
3431 		if (strcmp(arg, "-r") == 0) {
3432 			reverseLookup = true;
3433 		} else if (strcmp(arg, "-p") == 0) {
3434 			reverseLookup = true;
3435 			pageLookup = true;
3436 		} else {
3437 			print_debugger_command_usage(argv[0]);
3438 			return 0;
3439 		}
3440 	}
3441 
3442 	// We need at least one argument, the address. Optionally a thread ID can be
3443 	// specified.
3444 	if (argi >= argc || argi + 2 < argc) {
3445 		print_debugger_command_usage(argv[0]);
3446 		return 0;
3447 	}
3448 
3449 	uint64 addressValue;
3450 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3451 		return 0;
3452 
3453 	Team* team = NULL;
3454 	if (argi < argc) {
3455 		uint64 threadID;
3456 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3457 			return 0;
3458 
3459 		Thread* thread = Thread::GetDebug(threadID);
3460 		if (thread == NULL) {
3461 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3462 			return 0;
3463 		}
3464 
3465 		team = thread->team;
3466 	}
3467 
3468 	if (reverseLookup) {
3469 		phys_addr_t physicalAddress;
3470 		if (pageLookup) {
3471 			vm_page* page = (vm_page*)(addr_t)addressValue;
3472 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3473 		} else {
3474 			physicalAddress = (phys_addr_t)addressValue;
3475 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3476 		}
3477 
3478 		kprintf("    Team     Virtual Address      Area\n");
3479 		kprintf("--------------------------------------\n");
3480 
3481 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3482 			Callback()
3483 				:
3484 				fAddressSpace(NULL)
3485 			{
3486 			}
3487 
3488 			void SetAddressSpace(VMAddressSpace* addressSpace)
3489 			{
3490 				fAddressSpace = addressSpace;
3491 			}
3492 
3493 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3494 			{
3495 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3496 					virtualAddress);
3497 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3498 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3499 				else
3500 					kprintf("\n");
3501 				return false;
3502 			}
3503 
3504 		private:
3505 			VMAddressSpace*	fAddressSpace;
3506 		} callback;
3507 
3508 		if (team != NULL) {
3509 			// team specified -- get its address space
3510 			VMAddressSpace* addressSpace = team->address_space;
3511 			if (addressSpace == NULL) {
3512 				kprintf("Failed to get address space!\n");
3513 				return 0;
3514 			}
3515 
3516 			callback.SetAddressSpace(addressSpace);
3517 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3518 				physicalAddress, callback);
3519 		} else {
3520 			// no team specified -- iterate through all address spaces
3521 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3522 				addressSpace != NULL;
3523 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3524 				callback.SetAddressSpace(addressSpace);
3525 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3526 					physicalAddress, callback);
3527 			}
3528 		}
3529 	} else {
3530 		// get the address space
3531 		addr_t virtualAddress = (addr_t)addressValue;
3532 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3533 		VMAddressSpace* addressSpace;
3534 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3535 			addressSpace = VMAddressSpace::Kernel();
3536 		} else if (team != NULL) {
3537 			addressSpace = team->address_space;
3538 		} else {
3539 			Thread* thread = debug_get_debugged_thread();
3540 			if (thread == NULL || thread->team == NULL) {
3541 				kprintf("Failed to get team!\n");
3542 				return 0;
3543 			}
3544 
3545 			addressSpace = thread->team->address_space;
3546 		}
3547 
3548 		if (addressSpace == NULL) {
3549 			kprintf("Failed to get address space!\n");
3550 			return 0;
3551 		}
3552 
3553 		// let the translation map implementation do the job
3554 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3555 	}
3556 
3557 	return 0;
3558 }
3559 
3560 
3561 /*!	Deletes all areas and reserved regions in the given address space.
3562 
3563 	The caller must ensure that none of the areas has any wired ranges.
3564 
3565 	\param addressSpace The address space.
3566 	\param deletingAddressSpace \c true, if the address space is in the process
3567 		of being deleted.
3568 */
3569 void
3570 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3571 {
3572 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3573 		addressSpace->ID()));
3574 
3575 	addressSpace->WriteLock();
3576 
3577 	// remove all reserved areas in this address space
3578 	addressSpace->UnreserveAllAddressRanges(0);
3579 
3580 	// delete all the areas in this address space
3581 	while (VMArea* area = addressSpace->FirstArea()) {
3582 		ASSERT(!area->IsWired());
3583 		delete_area(addressSpace, area, deletingAddressSpace);
3584 	}
3585 
3586 	addressSpace->WriteUnlock();
3587 }
3588 
3589 
3590 static area_id
3591 vm_area_for(addr_t address, bool kernel)
3592 {
3593 	team_id team;
3594 	if (IS_USER_ADDRESS(address)) {
3595 		// we try the user team address space, if any
3596 		team = VMAddressSpace::CurrentID();
3597 		if (team < 0)
3598 			return team;
3599 	} else
3600 		team = VMAddressSpace::KernelID();
3601 
3602 	AddressSpaceReadLocker locker(team);
3603 	if (!locker.IsLocked())
3604 		return B_BAD_TEAM_ID;
3605 
3606 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3607 	if (area != NULL) {
3608 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3609 			return B_ERROR;
3610 
3611 		return area->id;
3612 	}
3613 
3614 	return B_ERROR;
3615 }
3616 
3617 
3618 /*!	Frees physical pages that were used during the boot process.
3619 	\a end is inclusive.
3620 */
3621 static void
3622 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3623 {
3624 	// free all physical pages in the specified range
3625 
3626 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3627 		phys_addr_t physicalAddress;
3628 		uint32 flags;
3629 
3630 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3631 			&& (flags & PAGE_PRESENT) != 0) {
3632 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3633 			if (page != NULL && page->State() != PAGE_STATE_FREE
3634 					 && page->State() != PAGE_STATE_CLEAR
3635 					 && page->State() != PAGE_STATE_UNUSED) {
3636 				DEBUG_PAGE_ACCESS_START(page);
3637 				vm_page_set_state(page, PAGE_STATE_FREE);
3638 			}
3639 		}
3640 	}
3641 
3642 	// unmap the memory
3643 	map->Unmap(start, end);
3644 }
3645 
3646 
3647 void
3648 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3649 {
3650 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3651 	addr_t end = start + (size - 1);
3652 	addr_t lastEnd = start;
3653 
3654 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3655 		(void*)start, (void*)end));
3656 
3657 	// The areas are sorted in virtual address space order, so
3658 	// we just have to find the holes between them that fall
3659 	// into the area we should dispose
3660 
3661 	map->Lock();
3662 
3663 	for (VMAddressSpace::AreaIterator it
3664 				= VMAddressSpace::Kernel()->GetAreaIterator();
3665 			VMArea* area = it.Next();) {
3666 		addr_t areaStart = area->Base();
3667 		addr_t areaEnd = areaStart + (area->Size() - 1);
3668 
3669 		if (areaEnd < start)
3670 			continue;
3671 
3672 		if (areaStart > end) {
3673 			// we are done, the area is already beyond of what we have to free
3674 			break;
3675 		}
3676 
3677 		if (areaStart > lastEnd) {
3678 			// this is something we can free
3679 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3680 				(void*)areaStart));
3681 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3682 		}
3683 
3684 		if (areaEnd >= end) {
3685 			lastEnd = areaEnd;
3686 				// no +1 to prevent potential overflow
3687 			break;
3688 		}
3689 
3690 		lastEnd = areaEnd + 1;
3691 	}
3692 
3693 	if (lastEnd < end) {
3694 		// we can also get rid of some space at the end of the area
3695 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3696 			(void*)end));
3697 		unmap_and_free_physical_pages(map, lastEnd, end);
3698 	}
3699 
3700 	map->Unlock();
3701 }
3702 
3703 
3704 static void
3705 create_preloaded_image_areas(struct preloaded_image* _image)
3706 {
3707 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3708 	char name[B_OS_NAME_LENGTH];
3709 	void* address;
3710 	int32 length;
3711 
3712 	// use file name to create a good area name
3713 	char* fileName = strrchr(image->name, '/');
3714 	if (fileName == NULL)
3715 		fileName = image->name;
3716 	else
3717 		fileName++;
3718 
3719 	length = strlen(fileName);
3720 	// make sure there is enough space for the suffix
3721 	if (length > 25)
3722 		length = 25;
3723 
3724 	memcpy(name, fileName, length);
3725 	strcpy(name + length, "_text");
3726 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3727 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3728 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3729 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3730 		// this will later be remapped read-only/executable by the
3731 		// ELF initialization code
3732 
3733 	strcpy(name + length, "_data");
3734 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3735 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3736 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3737 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3738 }
3739 
3740 
3741 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3742 	Any boot loader resources contained in that arguments must not be accessed
3743 	anymore past this point.
3744 */
3745 void
3746 vm_free_kernel_args(kernel_args* args)
3747 {
3748 	uint32 i;
3749 
3750 	TRACE(("vm_free_kernel_args()\n"));
3751 
3752 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3753 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3754 		if (area >= B_OK)
3755 			delete_area(area);
3756 	}
3757 }
3758 
3759 
3760 static void
3761 allocate_kernel_args(kernel_args* args)
3762 {
3763 	TRACE(("allocate_kernel_args()\n"));
3764 
3765 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3766 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3767 
3768 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3769 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3770 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3771 	}
3772 }
3773 
3774 
3775 static void
3776 unreserve_boot_loader_ranges(kernel_args* args)
3777 {
3778 	TRACE(("unreserve_boot_loader_ranges()\n"));
3779 
3780 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3781 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3782 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3783 			args->virtual_allocated_range[i].size);
3784 	}
3785 }
3786 
3787 
3788 static void
3789 reserve_boot_loader_ranges(kernel_args* args)
3790 {
3791 	TRACE(("reserve_boot_loader_ranges()\n"));
3792 
3793 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3794 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3795 
3796 		// If the address is no kernel address, we just skip it. The
3797 		// architecture specific code has to deal with it.
3798 		if (!IS_KERNEL_ADDRESS(address)) {
3799 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3800 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3801 			continue;
3802 		}
3803 
3804 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3805 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3806 		if (status < B_OK)
3807 			panic("could not reserve boot loader ranges\n");
3808 	}
3809 }
3810 
3811 
3812 static addr_t
3813 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3814 {
3815 	size = PAGE_ALIGN(size);
3816 
3817 	// find a slot in the virtual allocation addr range
3818 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3819 		// check to see if the space between this one and the last is big enough
3820 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3821 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3822 			+ args->virtual_allocated_range[i - 1].size;
3823 
3824 		addr_t base = alignment > 0
3825 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3826 
3827 		if (base >= KERNEL_BASE && base < rangeStart
3828 				&& rangeStart - base >= size) {
3829 			args->virtual_allocated_range[i - 1].size
3830 				+= base + size - previousRangeEnd;
3831 			return base;
3832 		}
3833 	}
3834 
3835 	// we hadn't found one between allocation ranges. this is ok.
3836 	// see if there's a gap after the last one
3837 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3838 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3839 		+ args->virtual_allocated_range[lastEntryIndex].size;
3840 	addr_t base = alignment > 0
3841 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3842 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3843 		args->virtual_allocated_range[lastEntryIndex].size
3844 			+= base + size - lastRangeEnd;
3845 		return base;
3846 	}
3847 
3848 	// see if there's a gap before the first one
3849 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3850 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3851 		base = rangeStart - size;
3852 		if (alignment > 0)
3853 			base = ROUNDDOWN(base, alignment);
3854 
3855 		if (base >= KERNEL_BASE) {
3856 			args->virtual_allocated_range[0].start = base;
3857 			args->virtual_allocated_range[0].size += rangeStart - base;
3858 			return base;
3859 		}
3860 	}
3861 
3862 	return 0;
3863 }
3864 
3865 
3866 static bool
3867 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3868 {
3869 	// TODO: horrible brute-force method of determining if the page can be
3870 	// allocated
3871 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3872 		if (address >= args->physical_memory_range[i].start
3873 			&& address < args->physical_memory_range[i].start
3874 				+ args->physical_memory_range[i].size)
3875 			return true;
3876 	}
3877 	return false;
3878 }
3879 
3880 
3881 page_num_t
3882 vm_allocate_early_physical_page(kernel_args* args)
3883 {
3884 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3885 		phys_addr_t nextPage;
3886 
3887 		nextPage = args->physical_allocated_range[i].start
3888 			+ args->physical_allocated_range[i].size;
3889 		// see if the page after the next allocated paddr run can be allocated
3890 		if (i + 1 < args->num_physical_allocated_ranges
3891 			&& args->physical_allocated_range[i + 1].size != 0) {
3892 			// see if the next page will collide with the next allocated range
3893 			if (nextPage >= args->physical_allocated_range[i+1].start)
3894 				continue;
3895 		}
3896 		// see if the next physical page fits in the memory block
3897 		if (is_page_in_physical_memory_range(args, nextPage)) {
3898 			// we got one!
3899 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3900 			return nextPage / B_PAGE_SIZE;
3901 		}
3902 	}
3903 
3904 	// Expanding upwards didn't work, try going downwards.
3905 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3906 		phys_addr_t nextPage;
3907 
3908 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3909 		// see if the page after the prev allocated paddr run can be allocated
3910 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3911 			// see if the next page will collide with the next allocated range
3912 			if (nextPage < args->physical_allocated_range[i-1].start
3913 				+ args->physical_allocated_range[i-1].size)
3914 				continue;
3915 		}
3916 		// see if the next physical page fits in the memory block
3917 		if (is_page_in_physical_memory_range(args, nextPage)) {
3918 			// we got one!
3919 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3920 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3921 			return nextPage / B_PAGE_SIZE;
3922 		}
3923 	}
3924 
3925 	return 0;
3926 		// could not allocate a block
3927 }
3928 
3929 
3930 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3931 	allocate some pages before the VM is completely up.
3932 */
3933 addr_t
3934 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3935 	uint32 attributes, addr_t alignment)
3936 {
3937 	if (physicalSize > virtualSize)
3938 		physicalSize = virtualSize;
3939 
3940 	// find the vaddr to allocate at
3941 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3942 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3943 	if (virtualBase == 0) {
3944 		panic("vm_allocate_early: could not allocate virtual address\n");
3945 		return 0;
3946 	}
3947 
3948 	// map the pages
3949 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3950 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3951 		if (physicalAddress == 0)
3952 			panic("error allocating early page!\n");
3953 
3954 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3955 
3956 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3957 			physicalAddress * B_PAGE_SIZE, attributes,
3958 			&vm_allocate_early_physical_page);
3959 	}
3960 
3961 	return virtualBase;
3962 }
3963 
3964 
3965 /*!	The main entrance point to initialize the VM. */
3966 status_t
3967 vm_init(kernel_args* args)
3968 {
3969 	struct preloaded_image* image;
3970 	void* address;
3971 	status_t err = 0;
3972 	uint32 i;
3973 
3974 	TRACE(("vm_init: entry\n"));
3975 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3976 	err = arch_vm_init(args);
3977 
3978 	// initialize some globals
3979 	vm_page_init_num_pages(args);
3980 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3981 
3982 	slab_init(args);
3983 
3984 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3985 	off_t heapSize = INITIAL_HEAP_SIZE;
3986 	// try to accomodate low memory systems
3987 	while (heapSize > sAvailableMemory / 8)
3988 		heapSize /= 2;
3989 	if (heapSize < 1024 * 1024)
3990 		panic("vm_init: go buy some RAM please.");
3991 
3992 	// map in the new heap and initialize it
3993 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3994 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3995 	TRACE(("heap at 0x%lx\n", heapBase));
3996 	heap_init(heapBase, heapSize);
3997 #endif
3998 
3999 	// initialize the free page list and physical page mapper
4000 	vm_page_init(args);
4001 
4002 	// initialize the cache allocators
4003 	vm_cache_init(args);
4004 
4005 	{
4006 		status_t error = VMAreaHash::Init();
4007 		if (error != B_OK)
4008 			panic("vm_init: error initializing area hash table\n");
4009 	}
4010 
4011 	VMAddressSpace::Init();
4012 	reserve_boot_loader_ranges(args);
4013 
4014 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4015 	heap_init_post_area();
4016 #endif
4017 
4018 	// Do any further initialization that the architecture dependant layers may
4019 	// need now
4020 	arch_vm_translation_map_init_post_area(args);
4021 	arch_vm_init_post_area(args);
4022 	vm_page_init_post_area(args);
4023 	slab_init_post_area();
4024 
4025 	// allocate areas to represent stuff that already exists
4026 
4027 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4028 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4029 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4030 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4031 #endif
4032 
4033 	allocate_kernel_args(args);
4034 
4035 	create_preloaded_image_areas(args->kernel_image);
4036 
4037 	// allocate areas for preloaded images
4038 	for (image = args->preloaded_images; image != NULL; image = image->next)
4039 		create_preloaded_image_areas(image);
4040 
4041 	// allocate kernel stacks
4042 	for (i = 0; i < args->num_cpus; i++) {
4043 		char name[64];
4044 
4045 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4046 		address = (void*)args->cpu_kstack[i].start;
4047 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4048 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4049 	}
4050 
4051 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4052 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4053 
4054 #if PARANOID_KERNEL_MALLOC
4055 	vm_block_address_range("uninitialized heap memory",
4056 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4057 #endif
4058 #if PARANOID_KERNEL_FREE
4059 	vm_block_address_range("freed heap memory",
4060 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4061 #endif
4062 
4063 	// create the object cache for the page mappings
4064 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4065 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4066 		NULL, NULL);
4067 	if (gPageMappingsObjectCache == NULL)
4068 		panic("failed to create page mappings object cache");
4069 
4070 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4071 
4072 #if DEBUG_CACHE_LIST
4073 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4074 		virtual_address_restrictions virtualRestrictions = {};
4075 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4076 		physical_address_restrictions physicalRestrictions = {};
4077 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4078 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4079 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4080 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4081 			&physicalRestrictions, (void**)&sCacheInfoTable);
4082 	}
4083 #endif	// DEBUG_CACHE_LIST
4084 
4085 	// add some debugger commands
4086 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4087 	add_debugger_command("area", &dump_area,
4088 		"Dump info about a particular area");
4089 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4090 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4091 #if DEBUG_CACHE_LIST
4092 	if (sCacheInfoTable != NULL) {
4093 		add_debugger_command_etc("caches", &dump_caches,
4094 			"List all VMCache trees",
4095 			"[ \"-c\" ]\n"
4096 			"All cache trees are listed sorted in decreasing order by number "
4097 				"of\n"
4098 			"used pages or, if \"-c\" is specified, by size of committed "
4099 				"memory.\n",
4100 			0);
4101 	}
4102 #endif
4103 	add_debugger_command("avail", &dump_available_memory,
4104 		"Dump available memory");
4105 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4106 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4107 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4108 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4109 	add_debugger_command("string", &display_mem, "dump strings");
4110 
4111 	add_debugger_command_etc("mapping", &dump_mapping_info,
4112 		"Print address mapping information",
4113 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4114 		"Prints low-level page mapping information for a given address. If\n"
4115 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4116 		"address that is looked up in the translation map of the current\n"
4117 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4118 		"\"-r\" is specified, <address> is a physical address that is\n"
4119 		"searched in the translation map of all teams, respectively the team\n"
4120 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4121 		"<address> is the address of a vm_page structure. The behavior is\n"
4122 		"equivalent to specifying \"-r\" with the physical address of that\n"
4123 		"page.\n",
4124 		0);
4125 
4126 	TRACE(("vm_init: exit\n"));
4127 
4128 	vm_cache_init_post_heap();
4129 
4130 	return err;
4131 }
4132 
4133 
4134 status_t
4135 vm_init_post_sem(kernel_args* args)
4136 {
4137 	// This frees all unused boot loader resources and makes its space available
4138 	// again
4139 	arch_vm_init_end(args);
4140 	unreserve_boot_loader_ranges(args);
4141 
4142 	// fill in all of the semaphores that were not allocated before
4143 	// since we're still single threaded and only the kernel address space
4144 	// exists, it isn't that hard to find all of the ones we need to create
4145 
4146 	arch_vm_translation_map_init_post_sem(args);
4147 
4148 	slab_init_post_sem();
4149 
4150 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4151 	heap_init_post_sem();
4152 #endif
4153 
4154 	return B_OK;
4155 }
4156 
4157 
4158 status_t
4159 vm_init_post_thread(kernel_args* args)
4160 {
4161 	vm_page_init_post_thread(args);
4162 	slab_init_post_thread();
4163 	return heap_init_post_thread();
4164 }
4165 
4166 
4167 status_t
4168 vm_init_post_modules(kernel_args* args)
4169 {
4170 	return arch_vm_init_post_modules(args);
4171 }
4172 
4173 
4174 void
4175 permit_page_faults(void)
4176 {
4177 	Thread* thread = thread_get_current_thread();
4178 	if (thread != NULL)
4179 		atomic_add(&thread->page_faults_allowed, 1);
4180 }
4181 
4182 
4183 void
4184 forbid_page_faults(void)
4185 {
4186 	Thread* thread = thread_get_current_thread();
4187 	if (thread != NULL)
4188 		atomic_add(&thread->page_faults_allowed, -1);
4189 }
4190 
4191 
4192 status_t
4193 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4194 	bool isUser, addr_t* newIP)
4195 {
4196 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4197 		faultAddress));
4198 
4199 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4200 
4201 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4202 	VMAddressSpace* addressSpace = NULL;
4203 
4204 	status_t status = B_OK;
4205 	*newIP = 0;
4206 	atomic_add((int32*)&sPageFaults, 1);
4207 
4208 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4209 		addressSpace = VMAddressSpace::GetKernel();
4210 	} else if (IS_USER_ADDRESS(pageAddress)) {
4211 		addressSpace = VMAddressSpace::GetCurrent();
4212 		if (addressSpace == NULL) {
4213 			if (!isUser) {
4214 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4215 					"memory!\n");
4216 				status = B_BAD_ADDRESS;
4217 				TPF(PageFaultError(-1,
4218 					VMPageFaultTracing
4219 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4220 			} else {
4221 				// XXX weird state.
4222 				panic("vm_page_fault: non kernel thread accessing user memory "
4223 					"that doesn't exist!\n");
4224 				status = B_BAD_ADDRESS;
4225 			}
4226 		}
4227 	} else {
4228 		// the hit was probably in the 64k DMZ between kernel and user space
4229 		// this keeps a user space thread from passing a buffer that crosses
4230 		// into kernel space
4231 		status = B_BAD_ADDRESS;
4232 		TPF(PageFaultError(-1,
4233 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4234 	}
4235 
4236 	if (status == B_OK) {
4237 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4238 			isUser, NULL);
4239 	}
4240 
4241 	if (status < B_OK) {
4242 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4243 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4244 			strerror(status), address, faultAddress, isWrite, isUser,
4245 			thread_get_current_thread_id());
4246 		if (!isUser) {
4247 			Thread* thread = thread_get_current_thread();
4248 			if (thread != NULL && thread->fault_handler != 0) {
4249 				// this will cause the arch dependant page fault handler to
4250 				// modify the IP on the interrupt frame or whatever to return
4251 				// to this address
4252 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4253 			} else {
4254 				// unhandled page fault in the kernel
4255 				panic("vm_page_fault: unhandled page fault in kernel space at "
4256 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4257 			}
4258 		} else {
4259 			Thread* thread = thread_get_current_thread();
4260 
4261 #ifdef TRACE_FAULTS
4262 			VMArea* area = NULL;
4263 			if (addressSpace != NULL) {
4264 				addressSpace->ReadLock();
4265 				area = addressSpace->LookupArea(faultAddress);
4266 			}
4267 
4268 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4269 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4270 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4271 				thread->team->Name(), thread->team->id,
4272 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4273 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4274 					area->Base() : 0x0));
4275 
4276 			if (addressSpace != NULL)
4277 				addressSpace->ReadUnlock();
4278 #endif
4279 
4280 			// If the thread has a signal handler for SIGSEGV, we simply
4281 			// send it the signal. Otherwise we notify the user debugger
4282 			// first.
4283 			struct sigaction action;
4284 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4285 					&& action.sa_handler != SIG_DFL
4286 					&& action.sa_handler != SIG_IGN)
4287 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4288 					SIGSEGV)) {
4289 				Signal signal(SIGSEGV,
4290 					status == B_PERMISSION_DENIED
4291 						? SEGV_ACCERR : SEGV_MAPERR,
4292 					EFAULT, thread->team->id);
4293 				signal.SetAddress((void*)address);
4294 				send_signal_to_thread(thread, signal, 0);
4295 			}
4296 		}
4297 	}
4298 
4299 	if (addressSpace != NULL)
4300 		addressSpace->Put();
4301 
4302 	return B_HANDLED_INTERRUPT;
4303 }
4304 
4305 
4306 struct PageFaultContext {
4307 	AddressSpaceReadLocker	addressSpaceLocker;
4308 	VMCacheChainLocker		cacheChainLocker;
4309 
4310 	VMTranslationMap*		map;
4311 	VMCache*				topCache;
4312 	off_t					cacheOffset;
4313 	vm_page_reservation		reservation;
4314 	bool					isWrite;
4315 
4316 	// return values
4317 	vm_page*				page;
4318 	bool					restart;
4319 	bool					pageAllocated;
4320 
4321 
4322 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4323 		:
4324 		addressSpaceLocker(addressSpace, true),
4325 		map(addressSpace->TranslationMap()),
4326 		isWrite(isWrite)
4327 	{
4328 	}
4329 
4330 	~PageFaultContext()
4331 	{
4332 		UnlockAll();
4333 		vm_page_unreserve_pages(&reservation);
4334 	}
4335 
4336 	void Prepare(VMCache* topCache, off_t cacheOffset)
4337 	{
4338 		this->topCache = topCache;
4339 		this->cacheOffset = cacheOffset;
4340 		page = NULL;
4341 		restart = false;
4342 		pageAllocated = false;
4343 
4344 		cacheChainLocker.SetTo(topCache);
4345 	}
4346 
4347 	void UnlockAll(VMCache* exceptCache = NULL)
4348 	{
4349 		topCache = NULL;
4350 		addressSpaceLocker.Unlock();
4351 		cacheChainLocker.Unlock(exceptCache);
4352 	}
4353 };
4354 
4355 
4356 /*!	Gets the page that should be mapped into the area.
4357 	Returns an error code other than \c B_OK, if the page couldn't be found or
4358 	paged in. The locking state of the address space and the caches is undefined
4359 	in that case.
4360 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4361 	had to unlock the address space and all caches and is supposed to be called
4362 	again.
4363 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4364 	found. It is returned in \c context.page. The address space will still be
4365 	locked as well as all caches starting from the top cache to at least the
4366 	cache the page lives in.
4367 */
4368 static status_t
4369 fault_get_page(PageFaultContext& context)
4370 {
4371 	VMCache* cache = context.topCache;
4372 	VMCache* lastCache = NULL;
4373 	vm_page* page = NULL;
4374 
4375 	while (cache != NULL) {
4376 		// We already hold the lock of the cache at this point.
4377 
4378 		lastCache = cache;
4379 
4380 		page = cache->LookupPage(context.cacheOffset);
4381 		if (page != NULL && page->busy) {
4382 			// page must be busy -- wait for it to become unbusy
4383 			context.UnlockAll(cache);
4384 			cache->ReleaseRefLocked();
4385 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4386 
4387 			// restart the whole process
4388 			context.restart = true;
4389 			return B_OK;
4390 		}
4391 
4392 		if (page != NULL)
4393 			break;
4394 
4395 		// The current cache does not contain the page we're looking for.
4396 
4397 		// see if the backing store has it
4398 		if (cache->HasPage(context.cacheOffset)) {
4399 			// insert a fresh page and mark it busy -- we're going to read it in
4400 			page = vm_page_allocate_page(&context.reservation,
4401 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4402 			cache->InsertPage(page, context.cacheOffset);
4403 
4404 			// We need to unlock all caches and the address space while reading
4405 			// the page in. Keep a reference to the cache around.
4406 			cache->AcquireRefLocked();
4407 			context.UnlockAll();
4408 
4409 			// read the page in
4410 			generic_io_vec vec;
4411 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4412 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4413 
4414 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4415 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4416 
4417 			cache->Lock();
4418 
4419 			if (status < B_OK) {
4420 				// on error remove and free the page
4421 				dprintf("reading page from cache %p returned: %s!\n",
4422 					cache, strerror(status));
4423 
4424 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4425 				cache->RemovePage(page);
4426 				vm_page_set_state(page, PAGE_STATE_FREE);
4427 
4428 				cache->ReleaseRefAndUnlock();
4429 				return status;
4430 			}
4431 
4432 			// mark the page unbusy again
4433 			cache->MarkPageUnbusy(page);
4434 
4435 			DEBUG_PAGE_ACCESS_END(page);
4436 
4437 			// Since we needed to unlock everything temporarily, the area
4438 			// situation might have changed. So we need to restart the whole
4439 			// process.
4440 			cache->ReleaseRefAndUnlock();
4441 			context.restart = true;
4442 			return B_OK;
4443 		}
4444 
4445 		cache = context.cacheChainLocker.LockSourceCache();
4446 	}
4447 
4448 	if (page == NULL) {
4449 		// There was no adequate page, determine the cache for a clean one.
4450 		// Read-only pages come in the deepest cache, only the top most cache
4451 		// may have direct write access.
4452 		cache = context.isWrite ? context.topCache : lastCache;
4453 
4454 		// allocate a clean page
4455 		page = vm_page_allocate_page(&context.reservation,
4456 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4457 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4458 			page->physical_page_number));
4459 
4460 		// insert the new page into our cache
4461 		cache->InsertPage(page, context.cacheOffset);
4462 		context.pageAllocated = true;
4463 	} else if (page->Cache() != context.topCache && context.isWrite) {
4464 		// We have a page that has the data we want, but in the wrong cache
4465 		// object so we need to copy it and stick it into the top cache.
4466 		vm_page* sourcePage = page;
4467 
4468 		// TODO: If memory is low, it might be a good idea to steal the page
4469 		// from our source cache -- if possible, that is.
4470 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4471 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4472 
4473 		// To not needlessly kill concurrency we unlock all caches but the top
4474 		// one while copying the page. Lacking another mechanism to ensure that
4475 		// the source page doesn't disappear, we mark it busy.
4476 		sourcePage->busy = true;
4477 		context.cacheChainLocker.UnlockKeepRefs(true);
4478 
4479 		// copy the page
4480 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4481 			sourcePage->physical_page_number * B_PAGE_SIZE);
4482 
4483 		context.cacheChainLocker.RelockCaches(true);
4484 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4485 
4486 		// insert the new page into our cache
4487 		context.topCache->InsertPage(page, context.cacheOffset);
4488 		context.pageAllocated = true;
4489 	} else
4490 		DEBUG_PAGE_ACCESS_START(page);
4491 
4492 	context.page = page;
4493 	return B_OK;
4494 }
4495 
4496 
4497 /*!	Makes sure the address in the given address space is mapped.
4498 
4499 	\param addressSpace The address space.
4500 	\param originalAddress The address. Doesn't need to be page aligned.
4501 	\param isWrite If \c true the address shall be write-accessible.
4502 	\param isUser If \c true the access is requested by a userland team.
4503 	\param wirePage On success, if non \c NULL, the wired count of the page
4504 		mapped at the given address is incremented and the page is returned
4505 		via this parameter.
4506 	\return \c B_OK on success, another error code otherwise.
4507 */
4508 static status_t
4509 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4510 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4511 {
4512 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4513 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4514 		originalAddress, isWrite, isUser));
4515 
4516 	PageFaultContext context(addressSpace, isWrite);
4517 
4518 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4519 	status_t status = B_OK;
4520 
4521 	addressSpace->IncrementFaultCount();
4522 
4523 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4524 	// the pages upfront makes sure we don't have any cache locked, so that the
4525 	// page daemon/thief can do their job without problems.
4526 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4527 		originalAddress);
4528 	context.addressSpaceLocker.Unlock();
4529 	vm_page_reserve_pages(&context.reservation, reservePages,
4530 		addressSpace == VMAddressSpace::Kernel()
4531 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4532 
4533 	while (true) {
4534 		context.addressSpaceLocker.Lock();
4535 
4536 		// get the area the fault was in
4537 		VMArea* area = addressSpace->LookupArea(address);
4538 		if (area == NULL) {
4539 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4540 				"space\n", originalAddress);
4541 			TPF(PageFaultError(-1,
4542 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4543 			status = B_BAD_ADDRESS;
4544 			break;
4545 		}
4546 
4547 		// check permissions
4548 		uint32 protection = get_area_page_protection(area, address);
4549 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4550 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4551 				area->id, (void*)originalAddress);
4552 			TPF(PageFaultError(area->id,
4553 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4554 			status = B_PERMISSION_DENIED;
4555 			break;
4556 		}
4557 		if (isWrite && (protection
4558 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4559 			dprintf("write access attempted on write-protected area 0x%"
4560 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4561 			TPF(PageFaultError(area->id,
4562 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4563 			status = B_PERMISSION_DENIED;
4564 			break;
4565 		} else if (isExecute && (protection
4566 				& (B_EXECUTE_AREA
4567 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4568 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4569 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4570 			TPF(PageFaultError(area->id,
4571 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4572 			status = B_PERMISSION_DENIED;
4573 			break;
4574 		} else if (!isWrite && !isExecute && (protection
4575 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4576 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4577 				" at %p\n", area->id, (void*)originalAddress);
4578 			TPF(PageFaultError(area->id,
4579 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4580 			status = B_PERMISSION_DENIED;
4581 			break;
4582 		}
4583 
4584 		// We have the area, it was a valid access, so let's try to resolve the
4585 		// page fault now.
4586 		// At first, the top most cache from the area is investigated.
4587 
4588 		context.Prepare(vm_area_get_locked_cache(area),
4589 			address - area->Base() + area->cache_offset);
4590 
4591 		// See if this cache has a fault handler -- this will do all the work
4592 		// for us.
4593 		{
4594 			// Note, since the page fault is resolved with interrupts enabled,
4595 			// the fault handler could be called more than once for the same
4596 			// reason -- the store must take this into account.
4597 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4598 			if (status != B_BAD_HANDLER)
4599 				break;
4600 		}
4601 
4602 		// The top most cache has no fault handler, so let's see if the cache or
4603 		// its sources already have the page we're searching for (we're going
4604 		// from top to bottom).
4605 		status = fault_get_page(context);
4606 		if (status != B_OK) {
4607 			TPF(PageFaultError(area->id, status));
4608 			break;
4609 		}
4610 
4611 		if (context.restart)
4612 			continue;
4613 
4614 		// All went fine, all there is left to do is to map the page into the
4615 		// address space.
4616 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4617 			context.page));
4618 
4619 		// If the page doesn't reside in the area's cache, we need to make sure
4620 		// it's mapped in read-only, so that we cannot overwrite someone else's
4621 		// data (copy-on-write)
4622 		uint32 newProtection = protection;
4623 		if (context.page->Cache() != context.topCache && !isWrite)
4624 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4625 
4626 		bool unmapPage = false;
4627 		bool mapPage = true;
4628 
4629 		// check whether there's already a page mapped at the address
4630 		context.map->Lock();
4631 
4632 		phys_addr_t physicalAddress;
4633 		uint32 flags;
4634 		vm_page* mappedPage = NULL;
4635 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4636 			&& (flags & PAGE_PRESENT) != 0
4637 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4638 				!= NULL) {
4639 			// Yep there's already a page. If it's ours, we can simply adjust
4640 			// its protection. Otherwise we have to unmap it.
4641 			if (mappedPage == context.page) {
4642 				context.map->ProtectPage(area, address, newProtection);
4643 					// Note: We assume that ProtectPage() is atomic (i.e.
4644 					// the page isn't temporarily unmapped), otherwise we'd have
4645 					// to make sure it isn't wired.
4646 				mapPage = false;
4647 			} else
4648 				unmapPage = true;
4649 		}
4650 
4651 		context.map->Unlock();
4652 
4653 		if (unmapPage) {
4654 			// If the page is wired, we can't unmap it. Wait until it is unwired
4655 			// again and restart. Note that the page cannot be wired for
4656 			// writing, since it it isn't in the topmost cache. So we can safely
4657 			// ignore ranges wired for writing (our own and other concurrent
4658 			// wiring attempts in progress) and in fact have to do that to avoid
4659 			// a deadlock.
4660 			VMAreaUnwiredWaiter waiter;
4661 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4662 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4663 				// unlock everything and wait
4664 				if (context.pageAllocated) {
4665 					// ... but since we allocated a page and inserted it into
4666 					// the top cache, remove and free it first. Otherwise we'd
4667 					// have a page from a lower cache mapped while an upper
4668 					// cache has a page that would shadow it.
4669 					context.topCache->RemovePage(context.page);
4670 					vm_page_free_etc(context.topCache, context.page,
4671 						&context.reservation);
4672 				} else
4673 					DEBUG_PAGE_ACCESS_END(context.page);
4674 
4675 				context.UnlockAll();
4676 				waiter.waitEntry.Wait();
4677 				continue;
4678 			}
4679 
4680 			// Note: The mapped page is a page of a lower cache. We are
4681 			// guaranteed to have that cached locked, our new page is a copy of
4682 			// that page, and the page is not busy. The logic for that guarantee
4683 			// is as follows: Since the page is mapped, it must live in the top
4684 			// cache (ruled out above) or any of its lower caches, and there is
4685 			// (was before the new page was inserted) no other page in any
4686 			// cache between the top cache and the page's cache (otherwise that
4687 			// would be mapped instead). That in turn means that our algorithm
4688 			// must have found it and therefore it cannot be busy either.
4689 			DEBUG_PAGE_ACCESS_START(mappedPage);
4690 			unmap_page(area, address);
4691 			DEBUG_PAGE_ACCESS_END(mappedPage);
4692 		}
4693 
4694 		if (mapPage) {
4695 			if (map_page(area, context.page, address, newProtection,
4696 					&context.reservation) != B_OK) {
4697 				// Mapping can only fail, when the page mapping object couldn't
4698 				// be allocated. Save for the missing mapping everything is
4699 				// fine, though. If this was a regular page fault, we'll simply
4700 				// leave and probably fault again. To make sure we'll have more
4701 				// luck then, we ensure that the minimum object reserve is
4702 				// available.
4703 				DEBUG_PAGE_ACCESS_END(context.page);
4704 
4705 				context.UnlockAll();
4706 
4707 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4708 						!= B_OK) {
4709 					// Apparently the situation is serious. Let's get ourselves
4710 					// killed.
4711 					status = B_NO_MEMORY;
4712 				} else if (wirePage != NULL) {
4713 					// The caller expects us to wire the page. Since
4714 					// object_cache_reserve() succeeded, we should now be able
4715 					// to allocate a mapping structure. Restart.
4716 					continue;
4717 				}
4718 
4719 				break;
4720 			}
4721 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4722 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4723 
4724 		// also wire the page, if requested
4725 		if (wirePage != NULL && status == B_OK) {
4726 			increment_page_wired_count(context.page);
4727 			*wirePage = context.page;
4728 		}
4729 
4730 		DEBUG_PAGE_ACCESS_END(context.page);
4731 
4732 		break;
4733 	}
4734 
4735 	return status;
4736 }
4737 
4738 
4739 status_t
4740 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4741 {
4742 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4743 }
4744 
4745 status_t
4746 vm_put_physical_page(addr_t vaddr, void* handle)
4747 {
4748 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4749 }
4750 
4751 
4752 status_t
4753 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4754 	void** _handle)
4755 {
4756 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4757 }
4758 
4759 status_t
4760 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4761 {
4762 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4763 }
4764 
4765 
4766 status_t
4767 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4768 {
4769 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4770 }
4771 
4772 status_t
4773 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4774 {
4775 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4776 }
4777 
4778 
4779 void
4780 vm_get_info(system_info* info)
4781 {
4782 	swap_get_info(info);
4783 
4784 	MutexLocker locker(sAvailableMemoryLock);
4785 	info->needed_memory = sNeededMemory;
4786 	info->free_memory = sAvailableMemory;
4787 }
4788 
4789 
4790 uint32
4791 vm_num_page_faults(void)
4792 {
4793 	return sPageFaults;
4794 }
4795 
4796 
4797 off_t
4798 vm_available_memory(void)
4799 {
4800 	MutexLocker locker(sAvailableMemoryLock);
4801 	return sAvailableMemory;
4802 }
4803 
4804 
4805 off_t
4806 vm_available_not_needed_memory(void)
4807 {
4808 	MutexLocker locker(sAvailableMemoryLock);
4809 	return sAvailableMemory - sNeededMemory;
4810 }
4811 
4812 
4813 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4814 	debugger.
4815 */
4816 off_t
4817 vm_available_not_needed_memory_debug(void)
4818 {
4819 	return sAvailableMemory - sNeededMemory;
4820 }
4821 
4822 
4823 size_t
4824 vm_kernel_address_space_left(void)
4825 {
4826 	return VMAddressSpace::Kernel()->FreeSpace();
4827 }
4828 
4829 
4830 void
4831 vm_unreserve_memory(size_t amount)
4832 {
4833 	mutex_lock(&sAvailableMemoryLock);
4834 
4835 	sAvailableMemory += amount;
4836 
4837 	mutex_unlock(&sAvailableMemoryLock);
4838 }
4839 
4840 
4841 status_t
4842 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4843 {
4844 	size_t reserve = kMemoryReserveForPriority[priority];
4845 
4846 	MutexLocker locker(sAvailableMemoryLock);
4847 
4848 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4849 
4850 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4851 		sAvailableMemory -= amount;
4852 		return B_OK;
4853 	}
4854 
4855 	if (timeout <= 0)
4856 		return B_NO_MEMORY;
4857 
4858 	// turn timeout into an absolute timeout
4859 	timeout += system_time();
4860 
4861 	// loop until we've got the memory or the timeout occurs
4862 	do {
4863 		sNeededMemory += amount;
4864 
4865 		// call the low resource manager
4866 		locker.Unlock();
4867 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4868 			B_ABSOLUTE_TIMEOUT, timeout);
4869 		locker.Lock();
4870 
4871 		sNeededMemory -= amount;
4872 
4873 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4874 			sAvailableMemory -= amount;
4875 			return B_OK;
4876 		}
4877 	} while (timeout > system_time());
4878 
4879 	return B_NO_MEMORY;
4880 }
4881 
4882 
4883 status_t
4884 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4885 {
4886 	// NOTE: The caller is responsible for synchronizing calls to this function!
4887 
4888 	AddressSpaceReadLocker locker;
4889 	VMArea* area;
4890 	status_t status = locker.SetFromArea(id, area);
4891 	if (status != B_OK)
4892 		return status;
4893 
4894 	// nothing to do, if the type doesn't change
4895 	uint32 oldType = area->MemoryType();
4896 	if (type == oldType)
4897 		return B_OK;
4898 
4899 	// set the memory type of the area and the mapped pages
4900 	VMTranslationMap* map = area->address_space->TranslationMap();
4901 	map->Lock();
4902 	area->SetMemoryType(type);
4903 	map->ProtectArea(area, area->protection);
4904 	map->Unlock();
4905 
4906 	// set the physical memory type
4907 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4908 	if (error != B_OK) {
4909 		// reset the memory type of the area and the mapped pages
4910 		map->Lock();
4911 		area->SetMemoryType(oldType);
4912 		map->ProtectArea(area, area->protection);
4913 		map->Unlock();
4914 		return error;
4915 	}
4916 
4917 	return B_OK;
4918 
4919 }
4920 
4921 
4922 /*!	This function enforces some protection properties:
4923 	 - kernel areas must be W^X (after kernel startup)
4924 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4925 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4926 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4927 	   and B_KERNEL_WRITE_AREA.
4928 */
4929 static void
4930 fix_protection(uint32* protection)
4931 {
4932 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4933 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
4934 			|| (*protection & B_WRITE_AREA) != 0)
4935 		&& !gKernelStartup)
4936 		panic("kernel areas cannot be both writable and executable!");
4937 
4938 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4939 		if ((*protection & B_USER_PROTECTION) == 0
4940 			|| (*protection & B_WRITE_AREA) != 0)
4941 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4942 		else
4943 			*protection |= B_KERNEL_READ_AREA;
4944 	}
4945 }
4946 
4947 
4948 static void
4949 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4950 {
4951 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4952 	info->area = area->id;
4953 	info->address = (void*)area->Base();
4954 	info->size = area->Size();
4955 	info->protection = area->protection;
4956 	info->lock = B_FULL_LOCK;
4957 	info->team = area->address_space->ID();
4958 	info->copy_count = 0;
4959 	info->in_count = 0;
4960 	info->out_count = 0;
4961 		// TODO: retrieve real values here!
4962 
4963 	VMCache* cache = vm_area_get_locked_cache(area);
4964 
4965 	// Note, this is a simplification; the cache could be larger than this area
4966 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4967 
4968 	vm_area_put_locked_cache(cache);
4969 }
4970 
4971 
4972 static status_t
4973 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4974 {
4975 	// is newSize a multiple of B_PAGE_SIZE?
4976 	if (newSize & (B_PAGE_SIZE - 1))
4977 		return B_BAD_VALUE;
4978 
4979 	// lock all affected address spaces and the cache
4980 	VMArea* area;
4981 	VMCache* cache;
4982 
4983 	MultiAddressSpaceLocker locker;
4984 	AreaCacheLocker cacheLocker;
4985 
4986 	status_t status;
4987 	size_t oldSize;
4988 	bool anyKernelArea;
4989 	bool restart;
4990 
4991 	do {
4992 		anyKernelArea = false;
4993 		restart = false;
4994 
4995 		locker.Unset();
4996 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4997 		if (status != B_OK)
4998 			return status;
4999 		cacheLocker.SetTo(cache, true);	// already locked
5000 
5001 		// enforce restrictions
5002 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
5003 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5004 				"resize kernel area %" B_PRId32 " (%s)\n",
5005 				team_get_current_team_id(), areaID, area->name);
5006 			return B_NOT_ALLOWED;
5007 		}
5008 		// TODO: Enforce all restrictions (team, etc.)!
5009 
5010 		oldSize = area->Size();
5011 		if (newSize == oldSize)
5012 			return B_OK;
5013 
5014 		if (cache->type != CACHE_TYPE_RAM)
5015 			return B_NOT_ALLOWED;
5016 
5017 		if (oldSize < newSize) {
5018 			// We need to check if all areas of this cache can be resized.
5019 			for (VMArea* current = cache->areas; current != NULL;
5020 					current = current->cache_next) {
5021 				if (!current->address_space->CanResizeArea(current, newSize))
5022 					return B_ERROR;
5023 				anyKernelArea
5024 					|= current->address_space == VMAddressSpace::Kernel();
5025 			}
5026 		} else {
5027 			// We're shrinking the areas, so we must make sure the affected
5028 			// ranges are not wired.
5029 			for (VMArea* current = cache->areas; current != NULL;
5030 					current = current->cache_next) {
5031 				anyKernelArea
5032 					|= current->address_space == VMAddressSpace::Kernel();
5033 
5034 				if (wait_if_area_range_is_wired(current,
5035 						current->Base() + newSize, oldSize - newSize, &locker,
5036 						&cacheLocker)) {
5037 					restart = true;
5038 					break;
5039 				}
5040 			}
5041 		}
5042 	} while (restart);
5043 
5044 	// Okay, looks good so far, so let's do it
5045 
5046 	int priority = kernel && anyKernelArea
5047 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5048 	uint32 allocationFlags = kernel && anyKernelArea
5049 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5050 
5051 	if (oldSize < newSize) {
5052 		// Growing the cache can fail, so we do it first.
5053 		status = cache->Resize(cache->virtual_base + newSize, priority);
5054 		if (status != B_OK)
5055 			return status;
5056 	}
5057 
5058 	for (VMArea* current = cache->areas; current != NULL;
5059 			current = current->cache_next) {
5060 		status = current->address_space->ResizeArea(current, newSize,
5061 			allocationFlags);
5062 		if (status != B_OK)
5063 			break;
5064 
5065 		// We also need to unmap all pages beyond the new size, if the area has
5066 		// shrunk
5067 		if (newSize < oldSize) {
5068 			VMCacheChainLocker cacheChainLocker(cache);
5069 			cacheChainLocker.LockAllSourceCaches();
5070 
5071 			unmap_pages(current, current->Base() + newSize,
5072 				oldSize - newSize);
5073 
5074 			cacheChainLocker.Unlock(cache);
5075 		}
5076 	}
5077 
5078 	if (status == B_OK) {
5079 		// Shrink or grow individual page protections if in use.
5080 		if (area->page_protections != NULL) {
5081 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5082 			uint8* newProtections
5083 				= (uint8*)realloc(area->page_protections, bytes);
5084 			if (newProtections == NULL)
5085 				status = B_NO_MEMORY;
5086 			else {
5087 				area->page_protections = newProtections;
5088 
5089 				if (oldSize < newSize) {
5090 					// init the additional page protections to that of the area
5091 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5092 					uint32 areaProtection = area->protection
5093 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5094 					memset(area->page_protections + offset,
5095 						areaProtection | (areaProtection << 4), bytes - offset);
5096 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5097 						uint8& entry = area->page_protections[offset - 1];
5098 						entry = (entry & 0x0f) | (areaProtection << 4);
5099 					}
5100 				}
5101 			}
5102 		}
5103 	}
5104 
5105 	// shrinking the cache can't fail, so we do it now
5106 	if (status == B_OK && newSize < oldSize)
5107 		status = cache->Resize(cache->virtual_base + newSize, priority);
5108 
5109 	if (status != B_OK) {
5110 		// Something failed -- resize the areas back to their original size.
5111 		// This can fail, too, in which case we're seriously screwed.
5112 		for (VMArea* current = cache->areas; current != NULL;
5113 				current = current->cache_next) {
5114 			if (current->address_space->ResizeArea(current, oldSize,
5115 					allocationFlags) != B_OK) {
5116 				panic("vm_resize_area(): Failed and not being able to restore "
5117 					"original state.");
5118 			}
5119 		}
5120 
5121 		cache->Resize(cache->virtual_base + oldSize, priority);
5122 	}
5123 
5124 	// TODO: we must honour the lock restrictions of this area
5125 	return status;
5126 }
5127 
5128 
5129 status_t
5130 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5131 {
5132 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5133 }
5134 
5135 
5136 status_t
5137 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5138 {
5139 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5140 }
5141 
5142 
5143 status_t
5144 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5145 	bool user)
5146 {
5147 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5148 }
5149 
5150 
5151 void
5152 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5153 {
5154 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5155 }
5156 
5157 
5158 /*!	Copies a range of memory directly from/to a page that might not be mapped
5159 	at the moment.
5160 
5161 	For \a unsafeMemory the current mapping (if any is ignored). The function
5162 	walks through the respective area's cache chain to find the physical page
5163 	and copies from/to it directly.
5164 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5165 	must not cross a page boundary.
5166 
5167 	\param teamID The team ID identifying the address space \a unsafeMemory is
5168 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5169 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5170 		is passed, the address space of the thread returned by
5171 		debug_get_debugged_thread() is used.
5172 	\param unsafeMemory The start of the unsafe memory range to be copied
5173 		from/to.
5174 	\param buffer A safely accessible kernel buffer to be copied from/to.
5175 	\param size The number of bytes to be copied.
5176 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5177 		\a unsafeMemory, the other way around otherwise.
5178 */
5179 status_t
5180 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5181 	size_t size, bool copyToUnsafe)
5182 {
5183 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5184 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5185 		return B_BAD_VALUE;
5186 	}
5187 
5188 	// get the address space for the debugged thread
5189 	VMAddressSpace* addressSpace;
5190 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5191 		addressSpace = VMAddressSpace::Kernel();
5192 	} else if (teamID == B_CURRENT_TEAM) {
5193 		Thread* thread = debug_get_debugged_thread();
5194 		if (thread == NULL || thread->team == NULL)
5195 			return B_BAD_ADDRESS;
5196 
5197 		addressSpace = thread->team->address_space;
5198 	} else
5199 		addressSpace = VMAddressSpace::DebugGet(teamID);
5200 
5201 	if (addressSpace == NULL)
5202 		return B_BAD_ADDRESS;
5203 
5204 	// get the area
5205 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5206 	if (area == NULL)
5207 		return B_BAD_ADDRESS;
5208 
5209 	// search the page
5210 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5211 		+ area->cache_offset;
5212 	VMCache* cache = area->cache;
5213 	vm_page* page = NULL;
5214 	while (cache != NULL) {
5215 		page = cache->DebugLookupPage(cacheOffset);
5216 		if (page != NULL)
5217 			break;
5218 
5219 		// Page not found in this cache -- if it is paged out, we must not try
5220 		// to get it from lower caches.
5221 		if (cache->DebugHasPage(cacheOffset))
5222 			break;
5223 
5224 		cache = cache->source;
5225 	}
5226 
5227 	if (page == NULL)
5228 		return B_UNSUPPORTED;
5229 
5230 	// copy from/to physical memory
5231 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5232 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5233 
5234 	if (copyToUnsafe) {
5235 		if (page->Cache() != area->cache)
5236 			return B_UNSUPPORTED;
5237 
5238 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5239 	}
5240 
5241 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5242 }
5243 
5244 
5245 static inline bool
5246 validate_user_range(const void* addr, size_t size)
5247 {
5248 	addr_t address = (addr_t)addr;
5249 
5250 	// Check for overflows on all addresses.
5251 	if ((address + size) < address)
5252 		return false;
5253 
5254 	// Validate that the address does not cross the kernel/user boundary.
5255 	if (IS_USER_ADDRESS(address))
5256 		return IS_USER_ADDRESS(address + size);
5257 	else
5258 		return !IS_USER_ADDRESS(address + size);
5259 }
5260 
5261 
5262 //	#pragma mark - kernel public API
5263 
5264 
5265 status_t
5266 user_memcpy(void* to, const void* from, size_t size)
5267 {
5268 	if (!validate_user_range(to, size) || !validate_user_range(from, size))
5269 		return B_BAD_ADDRESS;
5270 
5271 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5272 		return B_BAD_ADDRESS;
5273 
5274 	return B_OK;
5275 }
5276 
5277 
5278 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5279 	the string in \a to, NULL-terminating the result.
5280 
5281 	\param to Pointer to the destination C-string.
5282 	\param from Pointer to the source C-string.
5283 	\param size Size in bytes of the string buffer pointed to by \a to.
5284 
5285 	\return strlen(\a from).
5286 */
5287 ssize_t
5288 user_strlcpy(char* to, const char* from, size_t size)
5289 {
5290 	if (to == NULL && size != 0)
5291 		return B_BAD_VALUE;
5292 	if (from == NULL)
5293 		return B_BAD_ADDRESS;
5294 	if (!validate_user_range(to, size) || !validate_user_range(from, size))
5295 		return B_BAD_ADDRESS;
5296 
5297 	return arch_cpu_user_strlcpy(to, from, size);
5298 }
5299 
5300 
5301 status_t
5302 user_memset(void* s, char c, size_t count)
5303 {
5304 	if (!validate_user_range(s, count))
5305 		return B_BAD_ADDRESS;
5306 
5307 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5308 		return B_BAD_ADDRESS;
5309 
5310 	return B_OK;
5311 }
5312 
5313 
5314 /*!	Wires a single page at the given address.
5315 
5316 	\param team The team whose address space the address belongs to. Supports
5317 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5318 		parameter is ignored.
5319 	\param address address The virtual address to wire down. Does not need to
5320 		be page aligned.
5321 	\param writable If \c true the page shall be writable.
5322 	\param info On success the info is filled in, among other things
5323 		containing the physical address the given virtual one translates to.
5324 	\return \c B_OK, when the page could be wired, another error code otherwise.
5325 */
5326 status_t
5327 vm_wire_page(team_id team, addr_t address, bool writable,
5328 	VMPageWiringInfo* info)
5329 {
5330 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5331 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5332 
5333 	// compute the page protection that is required
5334 	bool isUser = IS_USER_ADDRESS(address);
5335 	uint32 requiredProtection = PAGE_PRESENT
5336 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5337 	if (writable)
5338 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5339 
5340 	// get and read lock the address space
5341 	VMAddressSpace* addressSpace = NULL;
5342 	if (isUser) {
5343 		if (team == B_CURRENT_TEAM)
5344 			addressSpace = VMAddressSpace::GetCurrent();
5345 		else
5346 			addressSpace = VMAddressSpace::Get(team);
5347 	} else
5348 		addressSpace = VMAddressSpace::GetKernel();
5349 	if (addressSpace == NULL)
5350 		return B_ERROR;
5351 
5352 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5353 
5354 	VMTranslationMap* map = addressSpace->TranslationMap();
5355 	status_t error = B_OK;
5356 
5357 	// get the area
5358 	VMArea* area = addressSpace->LookupArea(pageAddress);
5359 	if (area == NULL) {
5360 		addressSpace->Put();
5361 		return B_BAD_ADDRESS;
5362 	}
5363 
5364 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5365 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5366 
5367 	// mark the area range wired
5368 	area->Wire(&info->range);
5369 
5370 	// Lock the area's cache chain and the translation map. Needed to look
5371 	// up the page and play with its wired count.
5372 	cacheChainLocker.LockAllSourceCaches();
5373 	map->Lock();
5374 
5375 	phys_addr_t physicalAddress;
5376 	uint32 flags;
5377 	vm_page* page;
5378 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5379 		&& (flags & requiredProtection) == requiredProtection
5380 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5381 			!= NULL) {
5382 		// Already mapped with the correct permissions -- just increment
5383 		// the page's wired count.
5384 		increment_page_wired_count(page);
5385 
5386 		map->Unlock();
5387 		cacheChainLocker.Unlock();
5388 		addressSpaceLocker.Unlock();
5389 	} else {
5390 		// Let vm_soft_fault() map the page for us, if possible. We need
5391 		// to fully unlock to avoid deadlocks. Since we have already
5392 		// wired the area itself, nothing disturbing will happen with it
5393 		// in the meantime.
5394 		map->Unlock();
5395 		cacheChainLocker.Unlock();
5396 		addressSpaceLocker.Unlock();
5397 
5398 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5399 			isUser, &page);
5400 
5401 		if (error != B_OK) {
5402 			// The page could not be mapped -- clean up.
5403 			VMCache* cache = vm_area_get_locked_cache(area);
5404 			area->Unwire(&info->range);
5405 			cache->ReleaseRefAndUnlock();
5406 			addressSpace->Put();
5407 			return error;
5408 		}
5409 	}
5410 
5411 	info->physicalAddress
5412 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5413 			+ address % B_PAGE_SIZE;
5414 	info->page = page;
5415 
5416 	return B_OK;
5417 }
5418 
5419 
5420 /*!	Unwires a single page previously wired via vm_wire_page().
5421 
5422 	\param info The same object passed to vm_wire_page() before.
5423 */
5424 void
5425 vm_unwire_page(VMPageWiringInfo* info)
5426 {
5427 	// lock the address space
5428 	VMArea* area = info->range.area;
5429 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5430 		// takes over our reference
5431 
5432 	// lock the top cache
5433 	VMCache* cache = vm_area_get_locked_cache(area);
5434 	VMCacheChainLocker cacheChainLocker(cache);
5435 
5436 	if (info->page->Cache() != cache) {
5437 		// The page is not in the top cache, so we lock the whole cache chain
5438 		// before touching the page's wired count.
5439 		cacheChainLocker.LockAllSourceCaches();
5440 	}
5441 
5442 	decrement_page_wired_count(info->page);
5443 
5444 	// remove the wired range from the range
5445 	area->Unwire(&info->range);
5446 
5447 	cacheChainLocker.Unlock();
5448 }
5449 
5450 
5451 /*!	Wires down the given address range in the specified team's address space.
5452 
5453 	If successful the function
5454 	- acquires a reference to the specified team's address space,
5455 	- adds respective wired ranges to all areas that intersect with the given
5456 	  address range,
5457 	- makes sure all pages in the given address range are mapped with the
5458 	  requested access permissions and increments their wired count.
5459 
5460 	It fails, when \a team doesn't specify a valid address space, when any part
5461 	of the specified address range is not covered by areas, when the concerned
5462 	areas don't allow mapping with the requested permissions, or when mapping
5463 	failed for another reason.
5464 
5465 	When successful the call must be balanced by a unlock_memory_etc() call with
5466 	the exact same parameters.
5467 
5468 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5469 		supported.
5470 	\param address The start of the address range to be wired.
5471 	\param numBytes The size of the address range to be wired.
5472 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5473 		requests that the range must be wired writable ("read from device
5474 		into memory").
5475 	\return \c B_OK on success, another error code otherwise.
5476 */
5477 status_t
5478 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5479 {
5480 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5481 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5482 
5483 	// compute the page protection that is required
5484 	bool isUser = IS_USER_ADDRESS(address);
5485 	bool writable = (flags & B_READ_DEVICE) == 0;
5486 	uint32 requiredProtection = PAGE_PRESENT
5487 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5488 	if (writable)
5489 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5490 
5491 	uint32 mallocFlags = isUser
5492 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5493 
5494 	// get and read lock the address space
5495 	VMAddressSpace* addressSpace = NULL;
5496 	if (isUser) {
5497 		if (team == B_CURRENT_TEAM)
5498 			addressSpace = VMAddressSpace::GetCurrent();
5499 		else
5500 			addressSpace = VMAddressSpace::Get(team);
5501 	} else
5502 		addressSpace = VMAddressSpace::GetKernel();
5503 	if (addressSpace == NULL)
5504 		return B_ERROR;
5505 
5506 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5507 		// We get a new address space reference here. The one we got above will
5508 		// be freed by unlock_memory_etc().
5509 
5510 	VMTranslationMap* map = addressSpace->TranslationMap();
5511 	status_t error = B_OK;
5512 
5513 	// iterate through all concerned areas
5514 	addr_t nextAddress = lockBaseAddress;
5515 	while (nextAddress != lockEndAddress) {
5516 		// get the next area
5517 		VMArea* area = addressSpace->LookupArea(nextAddress);
5518 		if (area == NULL) {
5519 			error = B_BAD_ADDRESS;
5520 			break;
5521 		}
5522 
5523 		addr_t areaStart = nextAddress;
5524 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5525 
5526 		// allocate the wired range (do that before locking the cache to avoid
5527 		// deadlocks)
5528 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5529 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5530 		if (range == NULL) {
5531 			error = B_NO_MEMORY;
5532 			break;
5533 		}
5534 
5535 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5536 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5537 
5538 		// mark the area range wired
5539 		area->Wire(range);
5540 
5541 		// Depending on the area cache type and the wiring, we may not need to
5542 		// look at the individual pages.
5543 		if (area->cache_type == CACHE_TYPE_NULL
5544 			|| area->cache_type == CACHE_TYPE_DEVICE
5545 			|| area->wiring == B_FULL_LOCK
5546 			|| area->wiring == B_CONTIGUOUS) {
5547 			nextAddress = areaEnd;
5548 			continue;
5549 		}
5550 
5551 		// Lock the area's cache chain and the translation map. Needed to look
5552 		// up pages and play with their wired count.
5553 		cacheChainLocker.LockAllSourceCaches();
5554 		map->Lock();
5555 
5556 		// iterate through the pages and wire them
5557 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5558 			phys_addr_t physicalAddress;
5559 			uint32 flags;
5560 
5561 			vm_page* page;
5562 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5563 				&& (flags & requiredProtection) == requiredProtection
5564 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5565 					!= NULL) {
5566 				// Already mapped with the correct permissions -- just increment
5567 				// the page's wired count.
5568 				increment_page_wired_count(page);
5569 			} else {
5570 				// Let vm_soft_fault() map the page for us, if possible. We need
5571 				// to fully unlock to avoid deadlocks. Since we have already
5572 				// wired the area itself, nothing disturbing will happen with it
5573 				// in the meantime.
5574 				map->Unlock();
5575 				cacheChainLocker.Unlock();
5576 				addressSpaceLocker.Unlock();
5577 
5578 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5579 					false, isUser, &page);
5580 
5581 				addressSpaceLocker.Lock();
5582 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5583 				cacheChainLocker.LockAllSourceCaches();
5584 				map->Lock();
5585 			}
5586 
5587 			if (error != B_OK)
5588 				break;
5589 		}
5590 
5591 		map->Unlock();
5592 
5593 		if (error == B_OK) {
5594 			cacheChainLocker.Unlock();
5595 		} else {
5596 			// An error occurred, so abort right here. If the current address
5597 			// is the first in this area, unwire the area, since we won't get
5598 			// to it when reverting what we've done so far.
5599 			if (nextAddress == areaStart) {
5600 				area->Unwire(range);
5601 				cacheChainLocker.Unlock();
5602 				range->~VMAreaWiredRange();
5603 				free_etc(range, mallocFlags);
5604 			} else
5605 				cacheChainLocker.Unlock();
5606 
5607 			break;
5608 		}
5609 	}
5610 
5611 	if (error != B_OK) {
5612 		// An error occurred, so unwire all that we've already wired. Note that
5613 		// even if not a single page was wired, unlock_memory_etc() is called
5614 		// to put the address space reference.
5615 		addressSpaceLocker.Unlock();
5616 		unlock_memory_etc(team, (void*)lockBaseAddress,
5617 			nextAddress - lockBaseAddress, flags);
5618 	}
5619 
5620 	return error;
5621 }
5622 
5623 
5624 status_t
5625 lock_memory(void* address, size_t numBytes, uint32 flags)
5626 {
5627 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5628 }
5629 
5630 
5631 /*!	Unwires an address range previously wired with lock_memory_etc().
5632 
5633 	Note that a call to this function must balance a previous lock_memory_etc()
5634 	call with exactly the same parameters.
5635 */
5636 status_t
5637 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5638 {
5639 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5640 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5641 
5642 	// compute the page protection that is required
5643 	bool isUser = IS_USER_ADDRESS(address);
5644 	bool writable = (flags & B_READ_DEVICE) == 0;
5645 	uint32 requiredProtection = PAGE_PRESENT
5646 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5647 	if (writable)
5648 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5649 
5650 	uint32 mallocFlags = isUser
5651 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5652 
5653 	// get and read lock the address space
5654 	VMAddressSpace* addressSpace = NULL;
5655 	if (isUser) {
5656 		if (team == B_CURRENT_TEAM)
5657 			addressSpace = VMAddressSpace::GetCurrent();
5658 		else
5659 			addressSpace = VMAddressSpace::Get(team);
5660 	} else
5661 		addressSpace = VMAddressSpace::GetKernel();
5662 	if (addressSpace == NULL)
5663 		return B_ERROR;
5664 
5665 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5666 		// Take over the address space reference. We don't unlock until we're
5667 		// done.
5668 
5669 	VMTranslationMap* map = addressSpace->TranslationMap();
5670 	status_t error = B_OK;
5671 
5672 	// iterate through all concerned areas
5673 	addr_t nextAddress = lockBaseAddress;
5674 	while (nextAddress != lockEndAddress) {
5675 		// get the next area
5676 		VMArea* area = addressSpace->LookupArea(nextAddress);
5677 		if (area == NULL) {
5678 			error = B_BAD_ADDRESS;
5679 			break;
5680 		}
5681 
5682 		addr_t areaStart = nextAddress;
5683 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5684 
5685 		// Lock the area's top cache. This is a requirement for
5686 		// VMArea::Unwire().
5687 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5688 
5689 		// Depending on the area cache type and the wiring, we may not need to
5690 		// look at the individual pages.
5691 		if (area->cache_type == CACHE_TYPE_NULL
5692 			|| area->cache_type == CACHE_TYPE_DEVICE
5693 			|| area->wiring == B_FULL_LOCK
5694 			|| area->wiring == B_CONTIGUOUS) {
5695 			// unwire the range (to avoid deadlocks we delete the range after
5696 			// unlocking the cache)
5697 			nextAddress = areaEnd;
5698 			VMAreaWiredRange* range = area->Unwire(areaStart,
5699 				areaEnd - areaStart, writable);
5700 			cacheChainLocker.Unlock();
5701 			if (range != NULL) {
5702 				range->~VMAreaWiredRange();
5703 				free_etc(range, mallocFlags);
5704 			}
5705 			continue;
5706 		}
5707 
5708 		// Lock the area's cache chain and the translation map. Needed to look
5709 		// up pages and play with their wired count.
5710 		cacheChainLocker.LockAllSourceCaches();
5711 		map->Lock();
5712 
5713 		// iterate through the pages and unwire them
5714 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5715 			phys_addr_t physicalAddress;
5716 			uint32 flags;
5717 
5718 			vm_page* page;
5719 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5720 				&& (flags & PAGE_PRESENT) != 0
5721 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5722 					!= NULL) {
5723 				// Already mapped with the correct permissions -- just increment
5724 				// the page's wired count.
5725 				decrement_page_wired_count(page);
5726 			} else {
5727 				panic("unlock_memory_etc(): Failed to unwire page: address "
5728 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5729 					nextAddress);
5730 				error = B_BAD_VALUE;
5731 				break;
5732 			}
5733 		}
5734 
5735 		map->Unlock();
5736 
5737 		// All pages are unwired. Remove the area's wired range as well (to
5738 		// avoid deadlocks we delete the range after unlocking the cache).
5739 		VMAreaWiredRange* range = area->Unwire(areaStart,
5740 			areaEnd - areaStart, writable);
5741 
5742 		cacheChainLocker.Unlock();
5743 
5744 		if (range != NULL) {
5745 			range->~VMAreaWiredRange();
5746 			free_etc(range, mallocFlags);
5747 		}
5748 
5749 		if (error != B_OK)
5750 			break;
5751 	}
5752 
5753 	// get rid of the address space reference lock_memory_etc() acquired
5754 	addressSpace->Put();
5755 
5756 	return error;
5757 }
5758 
5759 
5760 status_t
5761 unlock_memory(void* address, size_t numBytes, uint32 flags)
5762 {
5763 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5764 }
5765 
5766 
5767 /*!	Similar to get_memory_map(), but also allows to specify the address space
5768 	for the memory in question and has a saner semantics.
5769 	Returns \c B_OK when the complete range could be translated or
5770 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5771 	case the actual number of entries is written to \c *_numEntries. Any other
5772 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5773 	in this case.
5774 */
5775 status_t
5776 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5777 	physical_entry* table, uint32* _numEntries)
5778 {
5779 	uint32 numEntries = *_numEntries;
5780 	*_numEntries = 0;
5781 
5782 	VMAddressSpace* addressSpace;
5783 	addr_t virtualAddress = (addr_t)address;
5784 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5785 	phys_addr_t physicalAddress;
5786 	status_t status = B_OK;
5787 	int32 index = -1;
5788 	addr_t offset = 0;
5789 	bool interrupts = are_interrupts_enabled();
5790 
5791 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5792 		"entries)\n", team, address, numBytes, numEntries));
5793 
5794 	if (numEntries == 0 || numBytes == 0)
5795 		return B_BAD_VALUE;
5796 
5797 	// in which address space is the address to be found?
5798 	if (IS_USER_ADDRESS(virtualAddress)) {
5799 		if (team == B_CURRENT_TEAM)
5800 			addressSpace = VMAddressSpace::GetCurrent();
5801 		else
5802 			addressSpace = VMAddressSpace::Get(team);
5803 	} else
5804 		addressSpace = VMAddressSpace::GetKernel();
5805 
5806 	if (addressSpace == NULL)
5807 		return B_ERROR;
5808 
5809 	VMTranslationMap* map = addressSpace->TranslationMap();
5810 
5811 	if (interrupts)
5812 		map->Lock();
5813 
5814 	while (offset < numBytes) {
5815 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5816 		uint32 flags;
5817 
5818 		if (interrupts) {
5819 			status = map->Query((addr_t)address + offset, &physicalAddress,
5820 				&flags);
5821 		} else {
5822 			status = map->QueryInterrupt((addr_t)address + offset,
5823 				&physicalAddress, &flags);
5824 		}
5825 		if (status < B_OK)
5826 			break;
5827 		if ((flags & PAGE_PRESENT) == 0) {
5828 			panic("get_memory_map() called on unmapped memory!");
5829 			return B_BAD_ADDRESS;
5830 		}
5831 
5832 		if (index < 0 && pageOffset > 0) {
5833 			physicalAddress += pageOffset;
5834 			if (bytes > B_PAGE_SIZE - pageOffset)
5835 				bytes = B_PAGE_SIZE - pageOffset;
5836 		}
5837 
5838 		// need to switch to the next physical_entry?
5839 		if (index < 0 || table[index].address
5840 				!= physicalAddress - table[index].size) {
5841 			if ((uint32)++index + 1 > numEntries) {
5842 				// table to small
5843 				break;
5844 			}
5845 			table[index].address = physicalAddress;
5846 			table[index].size = bytes;
5847 		} else {
5848 			// page does fit in current entry
5849 			table[index].size += bytes;
5850 		}
5851 
5852 		offset += bytes;
5853 	}
5854 
5855 	if (interrupts)
5856 		map->Unlock();
5857 
5858 	if (status != B_OK)
5859 		return status;
5860 
5861 	if ((uint32)index + 1 > numEntries) {
5862 		*_numEntries = index;
5863 		return B_BUFFER_OVERFLOW;
5864 	}
5865 
5866 	*_numEntries = index + 1;
5867 	return B_OK;
5868 }
5869 
5870 
5871 /*!	According to the BeBook, this function should always succeed.
5872 	This is no longer the case.
5873 */
5874 extern "C" int32
5875 __get_memory_map_haiku(const void* address, size_t numBytes,
5876 	physical_entry* table, int32 numEntries)
5877 {
5878 	uint32 entriesRead = numEntries;
5879 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5880 		table, &entriesRead);
5881 	if (error != B_OK)
5882 		return error;
5883 
5884 	// close the entry list
5885 
5886 	// if it's only one entry, we will silently accept the missing ending
5887 	if (numEntries == 1)
5888 		return B_OK;
5889 
5890 	if (entriesRead + 1 > (uint32)numEntries)
5891 		return B_BUFFER_OVERFLOW;
5892 
5893 	table[entriesRead].address = 0;
5894 	table[entriesRead].size = 0;
5895 
5896 	return B_OK;
5897 }
5898 
5899 
5900 area_id
5901 area_for(void* address)
5902 {
5903 	return vm_area_for((addr_t)address, true);
5904 }
5905 
5906 
5907 area_id
5908 find_area(const char* name)
5909 {
5910 	return VMAreaHash::Find(name);
5911 }
5912 
5913 
5914 status_t
5915 _get_area_info(area_id id, area_info* info, size_t size)
5916 {
5917 	if (size != sizeof(area_info) || info == NULL)
5918 		return B_BAD_VALUE;
5919 
5920 	AddressSpaceReadLocker locker;
5921 	VMArea* area;
5922 	status_t status = locker.SetFromArea(id, area);
5923 	if (status != B_OK)
5924 		return status;
5925 
5926 	fill_area_info(area, info, size);
5927 	return B_OK;
5928 }
5929 
5930 
5931 status_t
5932 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5933 {
5934 	addr_t nextBase = *(addr_t*)cookie;
5935 
5936 	// we're already through the list
5937 	if (nextBase == (addr_t)-1)
5938 		return B_ENTRY_NOT_FOUND;
5939 
5940 	if (team == B_CURRENT_TEAM)
5941 		team = team_get_current_team_id();
5942 
5943 	AddressSpaceReadLocker locker(team);
5944 	if (!locker.IsLocked())
5945 		return B_BAD_TEAM_ID;
5946 
5947 	VMArea* area;
5948 	for (VMAddressSpace::AreaIterator it
5949 				= locker.AddressSpace()->GetAreaIterator();
5950 			(area = it.Next()) != NULL;) {
5951 		if (area->Base() > nextBase)
5952 			break;
5953 	}
5954 
5955 	if (area == NULL) {
5956 		nextBase = (addr_t)-1;
5957 		return B_ENTRY_NOT_FOUND;
5958 	}
5959 
5960 	fill_area_info(area, info, size);
5961 	*cookie = (ssize_t)(area->Base());
5962 
5963 	return B_OK;
5964 }
5965 
5966 
5967 status_t
5968 set_area_protection(area_id area, uint32 newProtection)
5969 {
5970 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5971 		newProtection, true);
5972 }
5973 
5974 
5975 status_t
5976 resize_area(area_id areaID, size_t newSize)
5977 {
5978 	return vm_resize_area(areaID, newSize, true);
5979 }
5980 
5981 
5982 /*!	Transfers the specified area to a new team. The caller must be the owner
5983 	of the area.
5984 */
5985 area_id
5986 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5987 	bool kernel)
5988 {
5989 	area_info info;
5990 	status_t status = get_area_info(id, &info);
5991 	if (status != B_OK)
5992 		return status;
5993 
5994 	if (info.team != thread_get_current_thread()->team->id)
5995 		return B_PERMISSION_DENIED;
5996 
5997 	// We need to mark the area cloneable so the following operations work.
5998 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
5999 	if (status != B_OK)
6000 		return status;
6001 
6002 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6003 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6004 	if (clonedArea < 0)
6005 		return clonedArea;
6006 
6007 	status = vm_delete_area(info.team, id, kernel);
6008 	if (status != B_OK) {
6009 		vm_delete_area(target, clonedArea, kernel);
6010 		return status;
6011 	}
6012 
6013 	// Now we can reset the protection to whatever it was before.
6014 	set_area_protection(clonedArea, info.protection);
6015 
6016 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6017 
6018 	return clonedArea;
6019 }
6020 
6021 
6022 extern "C" area_id
6023 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6024 	size_t numBytes, uint32 addressSpec, uint32 protection,
6025 	void** _virtualAddress)
6026 {
6027 	if (!arch_vm_supports_protection(protection))
6028 		return B_NOT_SUPPORTED;
6029 
6030 	fix_protection(&protection);
6031 
6032 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6033 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6034 		false);
6035 }
6036 
6037 
6038 area_id
6039 clone_area(const char* name, void** _address, uint32 addressSpec,
6040 	uint32 protection, area_id source)
6041 {
6042 	if ((protection & B_KERNEL_PROTECTION) == 0)
6043 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6044 
6045 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6046 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6047 }
6048 
6049 
6050 area_id
6051 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6052 	uint32 protection, uint32 flags, uint32 guardSize,
6053 	const virtual_address_restrictions* virtualAddressRestrictions,
6054 	const physical_address_restrictions* physicalAddressRestrictions,
6055 	void** _address)
6056 {
6057 	fix_protection(&protection);
6058 
6059 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6060 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6061 		true, _address);
6062 }
6063 
6064 
6065 extern "C" area_id
6066 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6067 	size_t size, uint32 lock, uint32 protection)
6068 {
6069 	fix_protection(&protection);
6070 
6071 	virtual_address_restrictions virtualRestrictions = {};
6072 	virtualRestrictions.address = *_address;
6073 	virtualRestrictions.address_specification = addressSpec;
6074 	physical_address_restrictions physicalRestrictions = {};
6075 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6076 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6077 		true, _address);
6078 }
6079 
6080 
6081 status_t
6082 delete_area(area_id area)
6083 {
6084 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6085 }
6086 
6087 
6088 //	#pragma mark - Userland syscalls
6089 
6090 
6091 status_t
6092 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6093 	addr_t size)
6094 {
6095 	// filter out some unavailable values (for userland)
6096 	switch (addressSpec) {
6097 		case B_ANY_KERNEL_ADDRESS:
6098 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6099 			return B_BAD_VALUE;
6100 	}
6101 
6102 	addr_t address;
6103 
6104 	if (!IS_USER_ADDRESS(userAddress)
6105 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6106 		return B_BAD_ADDRESS;
6107 
6108 	status_t status = vm_reserve_address_range(
6109 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6110 		RESERVED_AVOID_BASE);
6111 	if (status != B_OK)
6112 		return status;
6113 
6114 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6115 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6116 			(void*)address, size);
6117 		return B_BAD_ADDRESS;
6118 	}
6119 
6120 	return B_OK;
6121 }
6122 
6123 
6124 status_t
6125 _user_unreserve_address_range(addr_t address, addr_t size)
6126 {
6127 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6128 		(void*)address, size);
6129 }
6130 
6131 
6132 area_id
6133 _user_area_for(void* address)
6134 {
6135 	return vm_area_for((addr_t)address, false);
6136 }
6137 
6138 
6139 area_id
6140 _user_find_area(const char* userName)
6141 {
6142 	char name[B_OS_NAME_LENGTH];
6143 
6144 	if (!IS_USER_ADDRESS(userName)
6145 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6146 		return B_BAD_ADDRESS;
6147 
6148 	return find_area(name);
6149 }
6150 
6151 
6152 status_t
6153 _user_get_area_info(area_id area, area_info* userInfo)
6154 {
6155 	if (!IS_USER_ADDRESS(userInfo))
6156 		return B_BAD_ADDRESS;
6157 
6158 	area_info info;
6159 	status_t status = get_area_info(area, &info);
6160 	if (status < B_OK)
6161 		return status;
6162 
6163 	// TODO: do we want to prevent userland from seeing kernel protections?
6164 	//info.protection &= B_USER_PROTECTION;
6165 
6166 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6167 		return B_BAD_ADDRESS;
6168 
6169 	return status;
6170 }
6171 
6172 
6173 status_t
6174 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6175 {
6176 	ssize_t cookie;
6177 
6178 	if (!IS_USER_ADDRESS(userCookie)
6179 		|| !IS_USER_ADDRESS(userInfo)
6180 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6181 		return B_BAD_ADDRESS;
6182 
6183 	area_info info;
6184 	status_t status = _get_next_area_info(team, &cookie, &info,
6185 		sizeof(area_info));
6186 	if (status != B_OK)
6187 		return status;
6188 
6189 	//info.protection &= B_USER_PROTECTION;
6190 
6191 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6192 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6193 		return B_BAD_ADDRESS;
6194 
6195 	return status;
6196 }
6197 
6198 
6199 status_t
6200 _user_set_area_protection(area_id area, uint32 newProtection)
6201 {
6202 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6203 		return B_BAD_VALUE;
6204 
6205 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6206 		newProtection, false);
6207 }
6208 
6209 
6210 status_t
6211 _user_resize_area(area_id area, size_t newSize)
6212 {
6213 	// TODO: Since we restrict deleting of areas to those owned by the team,
6214 	// we should also do that for resizing (check other functions, too).
6215 	return vm_resize_area(area, newSize, false);
6216 }
6217 
6218 
6219 area_id
6220 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6221 	team_id target)
6222 {
6223 	// filter out some unavailable values (for userland)
6224 	switch (addressSpec) {
6225 		case B_ANY_KERNEL_ADDRESS:
6226 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6227 			return B_BAD_VALUE;
6228 	}
6229 
6230 	void* address;
6231 	if (!IS_USER_ADDRESS(userAddress)
6232 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6233 		return B_BAD_ADDRESS;
6234 
6235 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6236 	if (newArea < B_OK)
6237 		return newArea;
6238 
6239 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6240 		return B_BAD_ADDRESS;
6241 
6242 	return newArea;
6243 }
6244 
6245 
6246 area_id
6247 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6248 	uint32 protection, area_id sourceArea)
6249 {
6250 	char name[B_OS_NAME_LENGTH];
6251 	void* address;
6252 
6253 	// filter out some unavailable values (for userland)
6254 	switch (addressSpec) {
6255 		case B_ANY_KERNEL_ADDRESS:
6256 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6257 			return B_BAD_VALUE;
6258 	}
6259 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6260 		return B_BAD_VALUE;
6261 
6262 	if (!IS_USER_ADDRESS(userName)
6263 		|| !IS_USER_ADDRESS(userAddress)
6264 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6265 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6266 		return B_BAD_ADDRESS;
6267 
6268 	fix_protection(&protection);
6269 
6270 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6271 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6272 		false);
6273 	if (clonedArea < B_OK)
6274 		return clonedArea;
6275 
6276 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6277 		delete_area(clonedArea);
6278 		return B_BAD_ADDRESS;
6279 	}
6280 
6281 	return clonedArea;
6282 }
6283 
6284 
6285 area_id
6286 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6287 	size_t size, uint32 lock, uint32 protection)
6288 {
6289 	char name[B_OS_NAME_LENGTH];
6290 	void* address;
6291 
6292 	// filter out some unavailable values (for userland)
6293 	switch (addressSpec) {
6294 		case B_ANY_KERNEL_ADDRESS:
6295 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6296 			return B_BAD_VALUE;
6297 	}
6298 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6299 		return B_BAD_VALUE;
6300 
6301 	if (!IS_USER_ADDRESS(userName)
6302 		|| !IS_USER_ADDRESS(userAddress)
6303 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6304 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6305 		return B_BAD_ADDRESS;
6306 
6307 	if (addressSpec == B_EXACT_ADDRESS
6308 		&& IS_KERNEL_ADDRESS(address))
6309 		return B_BAD_VALUE;
6310 
6311 	if (addressSpec == B_ANY_ADDRESS)
6312 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6313 	if (addressSpec == B_BASE_ADDRESS)
6314 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6315 
6316 	fix_protection(&protection);
6317 
6318 	virtual_address_restrictions virtualRestrictions = {};
6319 	virtualRestrictions.address = address;
6320 	virtualRestrictions.address_specification = addressSpec;
6321 	physical_address_restrictions physicalRestrictions = {};
6322 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6323 		size, lock, protection, 0, 0, &virtualRestrictions,
6324 		&physicalRestrictions, false, &address);
6325 
6326 	if (area >= B_OK
6327 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6328 		delete_area(area);
6329 		return B_BAD_ADDRESS;
6330 	}
6331 
6332 	return area;
6333 }
6334 
6335 
6336 status_t
6337 _user_delete_area(area_id area)
6338 {
6339 	// Unlike the BeOS implementation, you can now only delete areas
6340 	// that you have created yourself from userland.
6341 	// The documentation to delete_area() explicitly states that this
6342 	// will be restricted in the future, and so it will.
6343 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6344 }
6345 
6346 
6347 // TODO: create a BeOS style call for this!
6348 
6349 area_id
6350 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6351 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6352 	int fd, off_t offset)
6353 {
6354 	char name[B_OS_NAME_LENGTH];
6355 	void* address;
6356 	area_id area;
6357 
6358 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6359 		return B_BAD_VALUE;
6360 
6361 	fix_protection(&protection);
6362 
6363 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6364 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6365 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6366 		return B_BAD_ADDRESS;
6367 
6368 	if (addressSpec == B_EXACT_ADDRESS) {
6369 		if ((addr_t)address + size < (addr_t)address
6370 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6371 			return B_BAD_VALUE;
6372 		}
6373 		if (!IS_USER_ADDRESS(address)
6374 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6375 			return B_BAD_ADDRESS;
6376 		}
6377 	}
6378 
6379 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6380 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6381 		false);
6382 	if (area < B_OK)
6383 		return area;
6384 
6385 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6386 		return B_BAD_ADDRESS;
6387 
6388 	return area;
6389 }
6390 
6391 
6392 status_t
6393 _user_unmap_memory(void* _address, size_t size)
6394 {
6395 	addr_t address = (addr_t)_address;
6396 
6397 	// check params
6398 	if (size == 0 || (addr_t)address + size < (addr_t)address
6399 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6400 		return B_BAD_VALUE;
6401 	}
6402 
6403 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6404 		return B_BAD_ADDRESS;
6405 
6406 	// Write lock the address space and ensure the address range is not wired.
6407 	AddressSpaceWriteLocker locker;
6408 	do {
6409 		status_t status = locker.SetTo(team_get_current_team_id());
6410 		if (status != B_OK)
6411 			return status;
6412 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6413 			size, &locker));
6414 
6415 	// unmap
6416 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6417 }
6418 
6419 
6420 status_t
6421 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6422 {
6423 	// check address range
6424 	addr_t address = (addr_t)_address;
6425 	size = PAGE_ALIGN(size);
6426 
6427 	if ((address % B_PAGE_SIZE) != 0)
6428 		return B_BAD_VALUE;
6429 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6430 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6431 		// weird error code required by POSIX
6432 		return ENOMEM;
6433 	}
6434 
6435 	// extend and check protection
6436 	if ((protection & ~B_USER_PROTECTION) != 0)
6437 		return B_BAD_VALUE;
6438 
6439 	fix_protection(&protection);
6440 
6441 	// We need to write lock the address space, since we're going to play with
6442 	// the areas. Also make sure that none of the areas is wired and that we're
6443 	// actually allowed to change the protection.
6444 	AddressSpaceWriteLocker locker;
6445 
6446 	bool restart;
6447 	do {
6448 		restart = false;
6449 
6450 		status_t status = locker.SetTo(team_get_current_team_id());
6451 		if (status != B_OK)
6452 			return status;
6453 
6454 		// First round: Check whether the whole range is covered by areas and we
6455 		// are allowed to modify them.
6456 		addr_t currentAddress = address;
6457 		size_t sizeLeft = size;
6458 		while (sizeLeft > 0) {
6459 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6460 			if (area == NULL)
6461 				return B_NO_MEMORY;
6462 
6463 			if (area->address_space == VMAddressSpace::Kernel())
6464 				return B_NOT_ALLOWED;
6465 
6466 			// TODO: For (shared) mapped files we should check whether the new
6467 			// protections are compatible with the file permissions. We don't
6468 			// have a way to do that yet, though.
6469 
6470 			addr_t offset = currentAddress - area->Base();
6471 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6472 
6473 			AreaCacheLocker cacheLocker(area);
6474 
6475 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6476 					&locker, &cacheLocker)) {
6477 				restart = true;
6478 				break;
6479 			}
6480 
6481 			cacheLocker.Unlock();
6482 
6483 			currentAddress += rangeSize;
6484 			sizeLeft -= rangeSize;
6485 		}
6486 	} while (restart);
6487 
6488 	// Second round: If the protections differ from that of the area, create a
6489 	// page protection array and re-map mapped pages.
6490 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6491 	addr_t currentAddress = address;
6492 	size_t sizeLeft = size;
6493 	while (sizeLeft > 0) {
6494 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6495 		if (area == NULL)
6496 			return B_NO_MEMORY;
6497 
6498 		addr_t offset = currentAddress - area->Base();
6499 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6500 
6501 		currentAddress += rangeSize;
6502 		sizeLeft -= rangeSize;
6503 
6504 		if (area->page_protections == NULL) {
6505 			if (area->protection == protection)
6506 				continue;
6507 
6508 			status_t status = allocate_area_page_protections(area);
6509 			if (status != B_OK)
6510 				return status;
6511 		}
6512 
6513 		// We need to lock the complete cache chain, since we potentially unmap
6514 		// pages of lower caches.
6515 		VMCache* topCache = vm_area_get_locked_cache(area);
6516 		VMCacheChainLocker cacheChainLocker(topCache);
6517 		cacheChainLocker.LockAllSourceCaches();
6518 
6519 		for (addr_t pageAddress = area->Base() + offset;
6520 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6521 			map->Lock();
6522 
6523 			set_area_page_protection(area, pageAddress, protection);
6524 
6525 			phys_addr_t physicalAddress;
6526 			uint32 flags;
6527 
6528 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6529 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6530 				map->Unlock();
6531 				continue;
6532 			}
6533 
6534 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6535 			if (page == NULL) {
6536 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6537 					"\n", area, physicalAddress);
6538 				map->Unlock();
6539 				return B_ERROR;
6540 			}
6541 
6542 			// If the page is not in the topmost cache and write access is
6543 			// requested, we have to unmap it. Otherwise we can re-map it with
6544 			// the new protection.
6545 			bool unmapPage = page->Cache() != topCache
6546 				&& (protection & B_WRITE_AREA) != 0;
6547 
6548 			if (!unmapPage)
6549 				map->ProtectPage(area, pageAddress, protection);
6550 
6551 			map->Unlock();
6552 
6553 			if (unmapPage) {
6554 				DEBUG_PAGE_ACCESS_START(page);
6555 				unmap_page(area, pageAddress);
6556 				DEBUG_PAGE_ACCESS_END(page);
6557 			}
6558 		}
6559 	}
6560 
6561 	return B_OK;
6562 }
6563 
6564 
6565 status_t
6566 _user_sync_memory(void* _address, size_t size, uint32 flags)
6567 {
6568 	addr_t address = (addr_t)_address;
6569 	size = PAGE_ALIGN(size);
6570 
6571 	// check params
6572 	if ((address % B_PAGE_SIZE) != 0)
6573 		return B_BAD_VALUE;
6574 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6575 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6576 		// weird error code required by POSIX
6577 		return ENOMEM;
6578 	}
6579 
6580 	bool writeSync = (flags & MS_SYNC) != 0;
6581 	bool writeAsync = (flags & MS_ASYNC) != 0;
6582 	if (writeSync && writeAsync)
6583 		return B_BAD_VALUE;
6584 
6585 	if (size == 0 || (!writeSync && !writeAsync))
6586 		return B_OK;
6587 
6588 	// iterate through the range and sync all concerned areas
6589 	while (size > 0) {
6590 		// read lock the address space
6591 		AddressSpaceReadLocker locker;
6592 		status_t error = locker.SetTo(team_get_current_team_id());
6593 		if (error != B_OK)
6594 			return error;
6595 
6596 		// get the first area
6597 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6598 		if (area == NULL)
6599 			return B_NO_MEMORY;
6600 
6601 		uint32 offset = address - area->Base();
6602 		size_t rangeSize = min_c(area->Size() - offset, size);
6603 		offset += area->cache_offset;
6604 
6605 		// lock the cache
6606 		AreaCacheLocker cacheLocker(area);
6607 		if (!cacheLocker)
6608 			return B_BAD_VALUE;
6609 		VMCache* cache = area->cache;
6610 
6611 		locker.Unlock();
6612 
6613 		uint32 firstPage = offset >> PAGE_SHIFT;
6614 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6615 
6616 		// write the pages
6617 		if (cache->type == CACHE_TYPE_VNODE) {
6618 			if (writeSync) {
6619 				// synchronous
6620 				error = vm_page_write_modified_page_range(cache, firstPage,
6621 					endPage);
6622 				if (error != B_OK)
6623 					return error;
6624 			} else {
6625 				// asynchronous
6626 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6627 				// TODO: This is probably not quite what is supposed to happen.
6628 				// Especially when a lot has to be written, it might take ages
6629 				// until it really hits the disk.
6630 			}
6631 		}
6632 
6633 		address += rangeSize;
6634 		size -= rangeSize;
6635 	}
6636 
6637 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6638 	// synchronize multiple mappings of the same file. In our VM they never get
6639 	// out of sync, though, so we don't have to do anything.
6640 
6641 	return B_OK;
6642 }
6643 
6644 
6645 status_t
6646 _user_memory_advice(void* address, size_t size, uint32 advice)
6647 {
6648 	// TODO: Implement!
6649 	return B_OK;
6650 }
6651 
6652 
6653 status_t
6654 _user_get_memory_properties(team_id teamID, const void* address,
6655 	uint32* _protected, uint32* _lock)
6656 {
6657 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6658 		return B_BAD_ADDRESS;
6659 
6660 	AddressSpaceReadLocker locker;
6661 	status_t error = locker.SetTo(teamID);
6662 	if (error != B_OK)
6663 		return error;
6664 
6665 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6666 	if (area == NULL)
6667 		return B_NO_MEMORY;
6668 
6669 
6670 	uint32 protection = area->protection;
6671 	if (area->page_protections != NULL)
6672 		protection = get_area_page_protection(area, (addr_t)address);
6673 
6674 	uint32 wiring = area->wiring;
6675 
6676 	locker.Unlock();
6677 
6678 	error = user_memcpy(_protected, &protection, sizeof(protection));
6679 	if (error != B_OK)
6680 		return error;
6681 
6682 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6683 
6684 	return error;
6685 }
6686 
6687 
6688 // #pragma mark -- compatibility
6689 
6690 
6691 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6692 
6693 
6694 struct physical_entry_beos {
6695 	uint32	address;
6696 	uint32	size;
6697 };
6698 
6699 
6700 /*!	The physical_entry structure has changed. We need to translate it to the
6701 	old one.
6702 */
6703 extern "C" int32
6704 __get_memory_map_beos(const void* _address, size_t numBytes,
6705 	physical_entry_beos* table, int32 numEntries)
6706 {
6707 	if (numEntries <= 0)
6708 		return B_BAD_VALUE;
6709 
6710 	const uint8* address = (const uint8*)_address;
6711 
6712 	int32 count = 0;
6713 	while (numBytes > 0 && count < numEntries) {
6714 		physical_entry entry;
6715 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6716 		if (result < 0) {
6717 			if (result != B_BUFFER_OVERFLOW)
6718 				return result;
6719 		}
6720 
6721 		if (entry.address >= (phys_addr_t)1 << 32) {
6722 			panic("get_memory_map(): Address is greater 4 GB!");
6723 			return B_ERROR;
6724 		}
6725 
6726 		table[count].address = entry.address;
6727 		table[count++].size = entry.size;
6728 
6729 		address += entry.size;
6730 		numBytes -= entry.size;
6731 	}
6732 
6733 	// null-terminate the table, if possible
6734 	if (count < numEntries) {
6735 		table[count].address = 0;
6736 		table[count].size = 0;
6737 	}
6738 
6739 	return B_OK;
6740 }
6741 
6742 
6743 /*!	The type of the \a physicalAddress parameter has changed from void* to
6744 	phys_addr_t.
6745 */
6746 extern "C" area_id
6747 __map_physical_memory_beos(const char* name, void* physicalAddress,
6748 	size_t numBytes, uint32 addressSpec, uint32 protection,
6749 	void** _virtualAddress)
6750 {
6751 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6752 		addressSpec, protection, _virtualAddress);
6753 }
6754 
6755 
6756 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6757 	we meddle with the \a lock parameter to force 32 bit.
6758 */
6759 extern "C" area_id
6760 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6761 	size_t size, uint32 lock, uint32 protection)
6762 {
6763 	switch (lock) {
6764 		case B_NO_LOCK:
6765 			break;
6766 		case B_FULL_LOCK:
6767 		case B_LAZY_LOCK:
6768 			lock = B_32_BIT_FULL_LOCK;
6769 			break;
6770 		case B_CONTIGUOUS:
6771 			lock = B_32_BIT_CONTIGUOUS;
6772 			break;
6773 	}
6774 
6775 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6776 		protection);
6777 }
6778 
6779 
6780 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6781 	"BASE");
6782 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6783 	"map_physical_memory@", "BASE");
6784 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6785 	"BASE");
6786 
6787 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6788 	"get_memory_map@@", "1_ALPHA3");
6789 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6790 	"map_physical_memory@@", "1_ALPHA3");
6791 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6792 	"1_ALPHA3");
6793 
6794 
6795 #else
6796 
6797 
6798 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6799 	"get_memory_map@@", "BASE");
6800 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6801 	"map_physical_memory@@", "BASE");
6802 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6803 	"BASE");
6804 
6805 
6806 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6807