xref: /haiku/src/system/kernel/vm/vm.cpp (revision 4abe18063a98eec087e9e3a88276453ca0979dc4)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// Cut the end only?
648 	if (areaLast <= lastAddress) {
649 		size_t oldSize = area->Size();
650 		size_t newSize = address - area->Base();
651 
652 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 			allocationFlags);
654 		if (error != B_OK)
655 			return error;
656 
657 		// unmap pages
658 		unmap_pages(area, address, oldSize - newSize);
659 
660 		// If no one else uses the area's cache, we can resize it, too.
661 		if (cache->areas == area && area->cache_next == NULL
662 			&& cache->consumers.IsEmpty()
663 			&& cache->type == CACHE_TYPE_RAM) {
664 			// Since VMCache::Resize() can temporarily drop the lock, we must
665 			// unlock all lower caches to prevent locking order inversion.
666 			cacheChainLocker.Unlock(cache);
667 			cache->Resize(cache->virtual_base + newSize, priority);
668 			cache->ReleaseRefAndUnlock();
669 		}
670 
671 		return B_OK;
672 	}
673 
674 	// Cut the beginning only?
675 	if (area->Base() >= address) {
676 		addr_t oldBase = area->Base();
677 		addr_t newBase = lastAddress + 1;
678 		size_t newSize = areaLast - lastAddress;
679 
680 		// unmap pages
681 		unmap_pages(area, oldBase, newBase - oldBase);
682 
683 		// resize the area
684 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 			allocationFlags);
686 		if (error != B_OK)
687 			return error;
688 
689 		// TODO: If no one else uses the area's cache, we should resize it, too!
690 
691 		area->cache_offset += newBase - oldBase;
692 
693 		return B_OK;
694 	}
695 
696 	// The tough part -- cut a piece out of the middle of the area.
697 	// We do that by shrinking the area to the begin section and creating a
698 	// new area for the end section.
699 
700 	addr_t firstNewSize = address - area->Base();
701 	addr_t secondBase = lastAddress + 1;
702 	addr_t secondSize = areaLast - lastAddress;
703 
704 	// unmap pages
705 	unmap_pages(area, address, area->Size() - firstNewSize);
706 
707 	// resize the area
708 	addr_t oldSize = area->Size();
709 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 		allocationFlags);
711 	if (error != B_OK)
712 		return error;
713 
714 	// TODO: If no one else uses the area's cache, we might want to create a
715 	// new cache for the second area, transfer the concerned pages from the
716 	// first cache to it and resize the first cache.
717 
718 	// map the second area
719 	virtual_address_restrictions addressRestrictions = {};
720 	addressRestrictions.address = (void*)secondBase;
721 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 	VMArea* secondArea;
723 	error = map_backing_store(addressSpace, cache,
724 		area->cache_offset + (secondBase - area->Base()), area->name,
725 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 		&addressRestrictions, kernel, &secondArea, NULL);
727 	if (error != B_OK) {
728 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 		return error;
730 	}
731 
732 	// We need a cache reference for the new area.
733 	cache->AcquireRefLocked();
734 
735 	if (_secondArea != NULL)
736 		*_secondArea = secondArea;
737 
738 	return B_OK;
739 }
740 
741 
742 /*!	Deletes all areas in the given address range.
743 	The address space must be write-locked.
744 	The caller must ensure that no part of the given range is wired.
745 */
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 	bool kernel)
749 {
750 	size = PAGE_ALIGN(size);
751 	addr_t lastAddress = address + (size - 1);
752 
753 	// Check, whether the caller is allowed to modify the concerned areas.
754 	if (!kernel) {
755 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 				VMArea* area = it.Next();) {
757 			addr_t areaLast = area->Base() + (area->Size() - 1);
758 			if (area->Base() < lastAddress && address < areaLast) {
759 				if ((area->protection & B_KERNEL_AREA) != 0)
760 					return B_NOT_ALLOWED;
761 			}
762 		}
763 	}
764 
765 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
766 			VMArea* area = it.Next();) {
767 		addr_t areaLast = area->Base() + (area->Size() - 1);
768 		if (area->Base() < lastAddress && address < areaLast) {
769 			status_t error = cut_area(addressSpace, area, address,
770 				lastAddress, NULL, kernel);
771 			if (error != B_OK)
772 				return error;
773 				// Failing after already messing with areas is ugly, but we
774 				// can't do anything about it.
775 		}
776 	}
777 
778 	return B_OK;
779 }
780 
781 
782 /*! You need to hold the lock of the cache and the write lock of the address
783 	space when calling this function.
784 	Note, that in case of error your cache will be temporarily unlocked.
785 	If \a addressSpec is \c B_EXACT_ADDRESS and the
786 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
787 	that no part of the specified address range (base \c *_virtualAddress, size
788 	\a size) is wired.
789 */
790 static status_t
791 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
792 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
793 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
794 	bool kernel, VMArea** _area, void** _virtualAddress)
795 {
796 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
797 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
798 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
799 		addressRestrictions->address, offset, size,
800 		addressRestrictions->address_specification, wiring, protection,
801 		_area, areaName));
802 	cache->AssertLocked();
803 
804 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
805 		| HEAP_DONT_LOCK_KERNEL_SPACE;
806 	int priority;
807 	if (addressSpace != VMAddressSpace::Kernel()) {
808 		priority = VM_PRIORITY_USER;
809 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
810 		priority = VM_PRIORITY_VIP;
811 		allocationFlags |= HEAP_PRIORITY_VIP;
812 	} else
813 		priority = VM_PRIORITY_SYSTEM;
814 
815 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
816 		allocationFlags);
817 	if (area == NULL)
818 		return B_NO_MEMORY;
819 
820 	status_t status;
821 
822 	// if this is a private map, we need to create a new cache
823 	// to handle the private copies of pages as they are written to
824 	VMCache* sourceCache = cache;
825 	if (mapping == REGION_PRIVATE_MAP) {
826 		VMCache* newCache;
827 
828 		// create an anonymous cache
829 		status = VMCacheFactory::CreateAnonymousCache(newCache,
830 			(protection & B_STACK_AREA) != 0
831 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
832 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
833 		if (status != B_OK)
834 			goto err1;
835 
836 		newCache->Lock();
837 		newCache->temporary = 1;
838 		newCache->virtual_base = offset;
839 		newCache->virtual_end = offset + size;
840 
841 		cache->AddConsumer(newCache);
842 
843 		cache = newCache;
844 	}
845 
846 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
847 		status = cache->SetMinimalCommitment(size, priority);
848 		if (status != B_OK)
849 			goto err2;
850 	}
851 
852 	// check to see if this address space has entered DELETE state
853 	if (addressSpace->IsBeingDeleted()) {
854 		// okay, someone is trying to delete this address space now, so we can't
855 		// insert the area, so back out
856 		status = B_BAD_TEAM_ID;
857 		goto err2;
858 	}
859 
860 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
861 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
862 		status = unmap_address_range(addressSpace,
863 			(addr_t)addressRestrictions->address, size, kernel);
864 		if (status != B_OK)
865 			goto err2;
866 	}
867 
868 	status = addressSpace->InsertArea(area, size, addressRestrictions,
869 		allocationFlags, _virtualAddress);
870 	if (status != B_OK) {
871 		// TODO: wait and try again once this is working in the backend
872 #if 0
873 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
874 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
875 				0, 0);
876 		}
877 #endif
878 		goto err2;
879 	}
880 
881 	// attach the cache to the area
882 	area->cache = cache;
883 	area->cache_offset = offset;
884 
885 	// point the cache back to the area
886 	cache->InsertAreaLocked(area);
887 	if (mapping == REGION_PRIVATE_MAP)
888 		cache->Unlock();
889 
890 	// insert the area in the global area hash table
891 	VMAreaHash::Insert(area);
892 
893 	// grab a ref to the address space (the area holds this)
894 	addressSpace->Get();
895 
896 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
897 //		cache, sourceCache, areaName, area);
898 
899 	*_area = area;
900 	return B_OK;
901 
902 err2:
903 	if (mapping == REGION_PRIVATE_MAP) {
904 		// We created this cache, so we must delete it again. Note, that we
905 		// need to temporarily unlock the source cache or we'll otherwise
906 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
907 		sourceCache->Unlock();
908 		cache->ReleaseRefAndUnlock();
909 		sourceCache->Lock();
910 	}
911 err1:
912 	addressSpace->DeleteArea(area, allocationFlags);
913 	return status;
914 }
915 
916 
917 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
918 	  locker1, locker2).
919 */
920 template<typename LockerType1, typename LockerType2>
921 static inline bool
922 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
923 {
924 	area->cache->AssertLocked();
925 
926 	VMAreaUnwiredWaiter waiter;
927 	if (!area->AddWaiterIfWired(&waiter))
928 		return false;
929 
930 	// unlock everything and wait
931 	if (locker1 != NULL)
932 		locker1->Unlock();
933 	if (locker2 != NULL)
934 		locker2->Unlock();
935 
936 	waiter.waitEntry.Wait();
937 
938 	return true;
939 }
940 
941 
942 /*!	Checks whether the given area has any wired ranges intersecting with the
943 	specified range and waits, if so.
944 
945 	When it has to wait, the function calls \c Unlock() on both \a locker1
946 	and \a locker2, if given.
947 	The area's top cache must be locked and must be unlocked as a side effect
948 	of calling \c Unlock() on either \a locker1 or \a locker2.
949 
950 	If the function does not have to wait it does not modify or unlock any
951 	object.
952 
953 	\param area The area to be checked.
954 	\param base The base address of the range to check.
955 	\param size The size of the address range to check.
956 	\param locker1 An object to be unlocked when before starting to wait (may
957 		be \c NULL).
958 	\param locker2 An object to be unlocked when before starting to wait (may
959 		be \c NULL).
960 	\return \c true, if the function had to wait, \c false otherwise.
961 */
962 template<typename LockerType1, typename LockerType2>
963 static inline bool
964 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
965 	LockerType1* locker1, LockerType2* locker2)
966 {
967 	area->cache->AssertLocked();
968 
969 	VMAreaUnwiredWaiter waiter;
970 	if (!area->AddWaiterIfWired(&waiter, base, size))
971 		return false;
972 
973 	// unlock everything and wait
974 	if (locker1 != NULL)
975 		locker1->Unlock();
976 	if (locker2 != NULL)
977 		locker2->Unlock();
978 
979 	waiter.waitEntry.Wait();
980 
981 	return true;
982 }
983 
984 
985 /*!	Checks whether the given address space has any wired ranges intersecting
986 	with the specified range and waits, if so.
987 
988 	Similar to wait_if_area_range_is_wired(), with the following differences:
989 	- All areas intersecting with the range are checked (respectively all until
990 	  one is found that contains a wired range intersecting with the given
991 	  range).
992 	- The given address space must at least be read-locked and must be unlocked
993 	  when \c Unlock() is called on \a locker.
994 	- None of the areas' caches are allowed to be locked.
995 */
996 template<typename LockerType>
997 static inline bool
998 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
999 	size_t size, LockerType* locker)
1000 {
1001 	addr_t end = base + size - 1;
1002 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1003 			VMArea* area = it.Next();) {
1004 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1005 		if (area->Base() > end)
1006 			return false;
1007 
1008 		if (base >= area->Base() + area->Size() - 1)
1009 			continue;
1010 
1011 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1012 
1013 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1014 			return true;
1015 	}
1016 
1017 	return false;
1018 }
1019 
1020 
1021 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1022 	It must be called in a situation where the kernel address space may be
1023 	locked.
1024 */
1025 status_t
1026 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1027 {
1028 	AddressSpaceReadLocker locker;
1029 	VMArea* area;
1030 	status_t status = locker.SetFromArea(id, area);
1031 	if (status != B_OK)
1032 		return status;
1033 
1034 	if (area->page_protections == NULL) {
1035 		status = allocate_area_page_protections(area);
1036 		if (status != B_OK)
1037 			return status;
1038 	}
1039 
1040 	*cookie = (void*)area;
1041 	return B_OK;
1042 }
1043 
1044 
1045 /*!	This is a debug helper function that can only be used with very specific
1046 	use cases.
1047 	Sets protection for the given address range to the protection specified.
1048 	If \a protection is 0 then the involved pages will be marked non-present
1049 	in the translation map to cause a fault on access. The pages aren't
1050 	actually unmapped however so that they can be marked present again with
1051 	additional calls to this function. For this to work the area must be
1052 	fully locked in memory so that the pages aren't otherwise touched.
1053 	This function does not lock the kernel address space and needs to be
1054 	supplied with a \a cookie retrieved from a successful call to
1055 	vm_prepare_kernel_area_debug_protection().
1056 */
1057 status_t
1058 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1059 	uint32 protection)
1060 {
1061 	// check address range
1062 	addr_t address = (addr_t)_address;
1063 	size = PAGE_ALIGN(size);
1064 
1065 	if ((address % B_PAGE_SIZE) != 0
1066 		|| (addr_t)address + size < (addr_t)address
1067 		|| !IS_KERNEL_ADDRESS(address)
1068 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1069 		return B_BAD_VALUE;
1070 	}
1071 
1072 	// Translate the kernel protection to user protection as we only store that.
1073 	if ((protection & B_KERNEL_READ_AREA) != 0)
1074 		protection |= B_READ_AREA;
1075 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1076 		protection |= B_WRITE_AREA;
1077 
1078 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1079 	VMTranslationMap* map = addressSpace->TranslationMap();
1080 	VMArea* area = (VMArea*)cookie;
1081 
1082 	addr_t offset = address - area->Base();
1083 	if (area->Size() - offset < size) {
1084 		panic("protect range not fully within supplied area");
1085 		return B_BAD_VALUE;
1086 	}
1087 
1088 	if (area->page_protections == NULL) {
1089 		panic("area has no page protections");
1090 		return B_BAD_VALUE;
1091 	}
1092 
1093 	// Invalidate the mapping entries so any access to them will fault or
1094 	// restore the mapping entries unchanged so that lookup will success again.
1095 	map->Lock();
1096 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1097 	map->Unlock();
1098 
1099 	// And set the proper page protections so that the fault case will actually
1100 	// fail and not simply try to map a new page.
1101 	for (addr_t pageAddress = address; pageAddress < address + size;
1102 			pageAddress += B_PAGE_SIZE) {
1103 		set_area_page_protection(area, pageAddress, protection);
1104 	}
1105 
1106 	return B_OK;
1107 }
1108 
1109 
1110 status_t
1111 vm_block_address_range(const char* name, void* address, addr_t size)
1112 {
1113 	if (!arch_vm_supports_protection(0))
1114 		return B_NOT_SUPPORTED;
1115 
1116 	AddressSpaceWriteLocker locker;
1117 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1118 	if (status != B_OK)
1119 		return status;
1120 
1121 	VMAddressSpace* addressSpace = locker.AddressSpace();
1122 
1123 	// create an anonymous cache
1124 	VMCache* cache;
1125 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1126 		VM_PRIORITY_SYSTEM);
1127 	if (status != B_OK)
1128 		return status;
1129 
1130 	cache->temporary = 1;
1131 	cache->virtual_end = size;
1132 	cache->Lock();
1133 
1134 	VMArea* area;
1135 	virtual_address_restrictions addressRestrictions = {};
1136 	addressRestrictions.address = address;
1137 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1138 	status = map_backing_store(addressSpace, cache, 0, name, size,
1139 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1140 		true, &area, NULL);
1141 	if (status != B_OK) {
1142 		cache->ReleaseRefAndUnlock();
1143 		return status;
1144 	}
1145 
1146 	cache->Unlock();
1147 	area->cache_type = CACHE_TYPE_RAM;
1148 	return area->id;
1149 }
1150 
1151 
1152 status_t
1153 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1154 {
1155 	AddressSpaceWriteLocker locker(team);
1156 	if (!locker.IsLocked())
1157 		return B_BAD_TEAM_ID;
1158 
1159 	VMAddressSpace* addressSpace = locker.AddressSpace();
1160 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1161 		addressSpace == VMAddressSpace::Kernel()
1162 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1163 }
1164 
1165 
1166 status_t
1167 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1168 	addr_t size, uint32 flags)
1169 {
1170 	if (size == 0)
1171 		return B_BAD_VALUE;
1172 
1173 	AddressSpaceWriteLocker locker(team);
1174 	if (!locker.IsLocked())
1175 		return B_BAD_TEAM_ID;
1176 
1177 	virtual_address_restrictions addressRestrictions = {};
1178 	addressRestrictions.address = *_address;
1179 	addressRestrictions.address_specification = addressSpec;
1180 	VMAddressSpace* addressSpace = locker.AddressSpace();
1181 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1182 		addressSpace == VMAddressSpace::Kernel()
1183 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1184 		_address);
1185 }
1186 
1187 
1188 area_id
1189 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1190 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1191 	const virtual_address_restrictions* virtualAddressRestrictions,
1192 	const physical_address_restrictions* physicalAddressRestrictions,
1193 	bool kernel, void** _address)
1194 {
1195 	VMArea* area;
1196 	VMCache* cache;
1197 	vm_page* page = NULL;
1198 	bool isStack = (protection & B_STACK_AREA) != 0;
1199 	page_num_t guardPages;
1200 	bool canOvercommit = false;
1201 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1202 		? VM_PAGE_ALLOC_CLEAR : 0;
1203 
1204 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1205 		team, name, size));
1206 
1207 	size = PAGE_ALIGN(size);
1208 	guardSize = PAGE_ALIGN(guardSize);
1209 	guardPages = guardSize / B_PAGE_SIZE;
1210 
1211 	if (size == 0 || size < guardSize)
1212 		return B_BAD_VALUE;
1213 	if (!arch_vm_supports_protection(protection))
1214 		return B_NOT_SUPPORTED;
1215 
1216 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1217 		canOvercommit = true;
1218 
1219 #ifdef DEBUG_KERNEL_STACKS
1220 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1221 		isStack = true;
1222 #endif
1223 
1224 	// check parameters
1225 	switch (virtualAddressRestrictions->address_specification) {
1226 		case B_ANY_ADDRESS:
1227 		case B_EXACT_ADDRESS:
1228 		case B_BASE_ADDRESS:
1229 		case B_ANY_KERNEL_ADDRESS:
1230 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1231 		case B_RANDOMIZED_ANY_ADDRESS:
1232 		case B_RANDOMIZED_BASE_ADDRESS:
1233 			break;
1234 
1235 		default:
1236 			return B_BAD_VALUE;
1237 	}
1238 
1239 	// If low or high physical address restrictions are given, we force
1240 	// B_CONTIGUOUS wiring, since only then we'll use
1241 	// vm_page_allocate_page_run() which deals with those restrictions.
1242 	if (physicalAddressRestrictions->low_address != 0
1243 		|| physicalAddressRestrictions->high_address != 0) {
1244 		wiring = B_CONTIGUOUS;
1245 	}
1246 
1247 	physical_address_restrictions stackPhysicalRestrictions;
1248 	bool doReserveMemory = false;
1249 	switch (wiring) {
1250 		case B_NO_LOCK:
1251 			break;
1252 		case B_FULL_LOCK:
1253 		case B_LAZY_LOCK:
1254 		case B_CONTIGUOUS:
1255 			doReserveMemory = true;
1256 			break;
1257 		case B_ALREADY_WIRED:
1258 			break;
1259 		case B_LOMEM:
1260 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1261 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1262 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1263 			wiring = B_CONTIGUOUS;
1264 			doReserveMemory = true;
1265 			break;
1266 		case B_32_BIT_FULL_LOCK:
1267 			if (B_HAIKU_PHYSICAL_BITS <= 32
1268 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1269 				wiring = B_FULL_LOCK;
1270 				doReserveMemory = true;
1271 				break;
1272 			}
1273 			// TODO: We don't really support this mode efficiently. Just fall
1274 			// through for now ...
1275 		case B_32_BIT_CONTIGUOUS:
1276 			#if B_HAIKU_PHYSICAL_BITS > 32
1277 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1278 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1279 					stackPhysicalRestrictions.high_address
1280 						= (phys_addr_t)1 << 32;
1281 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1282 				}
1283 			#endif
1284 			wiring = B_CONTIGUOUS;
1285 			doReserveMemory = true;
1286 			break;
1287 		default:
1288 			return B_BAD_VALUE;
1289 	}
1290 
1291 	// Optimization: For a single-page contiguous allocation without low/high
1292 	// memory restriction B_FULL_LOCK wiring suffices.
1293 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1294 		&& physicalAddressRestrictions->low_address == 0
1295 		&& physicalAddressRestrictions->high_address == 0) {
1296 		wiring = B_FULL_LOCK;
1297 	}
1298 
1299 	// For full lock or contiguous areas we're also going to map the pages and
1300 	// thus need to reserve pages for the mapping backend upfront.
1301 	addr_t reservedMapPages = 0;
1302 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1303 		AddressSpaceWriteLocker locker;
1304 		status_t status = locker.SetTo(team);
1305 		if (status != B_OK)
1306 			return status;
1307 
1308 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1309 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1310 	}
1311 
1312 	int priority;
1313 	if (team != VMAddressSpace::KernelID())
1314 		priority = VM_PRIORITY_USER;
1315 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1316 		priority = VM_PRIORITY_VIP;
1317 	else
1318 		priority = VM_PRIORITY_SYSTEM;
1319 
1320 	// Reserve memory before acquiring the address space lock. This reduces the
1321 	// chances of failure, since while holding the write lock to the address
1322 	// space (if it is the kernel address space that is), the low memory handler
1323 	// won't be able to free anything for us.
1324 	addr_t reservedMemory = 0;
1325 	if (doReserveMemory) {
1326 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1327 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1328 			return B_NO_MEMORY;
1329 		reservedMemory = size;
1330 		// TODO: We don't reserve the memory for the pages for the page
1331 		// directories/tables. We actually need to do since we currently don't
1332 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1333 		// there are actually less physical pages than there should be, which
1334 		// can get the VM into trouble in low memory situations.
1335 	}
1336 
1337 	AddressSpaceWriteLocker locker;
1338 	VMAddressSpace* addressSpace;
1339 	status_t status;
1340 
1341 	// For full lock areas reserve the pages before locking the address
1342 	// space. E.g. block caches can't release their memory while we hold the
1343 	// address space lock.
1344 	page_num_t reservedPages = reservedMapPages;
1345 	if (wiring == B_FULL_LOCK)
1346 		reservedPages += size / B_PAGE_SIZE;
1347 
1348 	vm_page_reservation reservation;
1349 	if (reservedPages > 0) {
1350 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1351 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1352 					priority)) {
1353 				reservedPages = 0;
1354 				status = B_WOULD_BLOCK;
1355 				goto err0;
1356 			}
1357 		} else
1358 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1359 	}
1360 
1361 	if (wiring == B_CONTIGUOUS) {
1362 		// we try to allocate the page run here upfront as this may easily
1363 		// fail for obvious reasons
1364 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1365 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1366 		if (page == NULL) {
1367 			status = B_NO_MEMORY;
1368 			goto err0;
1369 		}
1370 	}
1371 
1372 	// Lock the address space and, if B_EXACT_ADDRESS and
1373 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1374 	// is not wired.
1375 	do {
1376 		status = locker.SetTo(team);
1377 		if (status != B_OK)
1378 			goto err1;
1379 
1380 		addressSpace = locker.AddressSpace();
1381 	} while (virtualAddressRestrictions->address_specification
1382 			== B_EXACT_ADDRESS
1383 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1384 		&& wait_if_address_range_is_wired(addressSpace,
1385 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1386 
1387 	// create an anonymous cache
1388 	// if it's a stack, make sure that two pages are available at least
1389 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1390 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1391 		wiring == B_NO_LOCK, priority);
1392 	if (status != B_OK)
1393 		goto err1;
1394 
1395 	cache->temporary = 1;
1396 	cache->virtual_end = size;
1397 	cache->committed_size = reservedMemory;
1398 		// TODO: This should be done via a method.
1399 	reservedMemory = 0;
1400 
1401 	cache->Lock();
1402 
1403 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1404 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1405 		kernel, &area, _address);
1406 
1407 	if (status != B_OK) {
1408 		cache->ReleaseRefAndUnlock();
1409 		goto err1;
1410 	}
1411 
1412 	locker.DegradeToReadLock();
1413 
1414 	switch (wiring) {
1415 		case B_NO_LOCK:
1416 		case B_LAZY_LOCK:
1417 			// do nothing - the pages are mapped in as needed
1418 			break;
1419 
1420 		case B_FULL_LOCK:
1421 		{
1422 			// Allocate and map all pages for this area
1423 
1424 			off_t offset = 0;
1425 			for (addr_t address = area->Base();
1426 					address < area->Base() + (area->Size() - 1);
1427 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1428 #ifdef DEBUG_KERNEL_STACKS
1429 #	ifdef STACK_GROWS_DOWNWARDS
1430 				if (isStack && address < area->Base()
1431 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1432 #	else
1433 				if (isStack && address >= area->Base() + area->Size()
1434 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1435 #	endif
1436 					continue;
1437 #endif
1438 				vm_page* page = vm_page_allocate_page(&reservation,
1439 					PAGE_STATE_WIRED | pageAllocFlags);
1440 				cache->InsertPage(page, offset);
1441 				map_page(area, page, address, protection, &reservation);
1442 
1443 				DEBUG_PAGE_ACCESS_END(page);
1444 			}
1445 
1446 			break;
1447 		}
1448 
1449 		case B_ALREADY_WIRED:
1450 		{
1451 			// The pages should already be mapped. This is only really useful
1452 			// during boot time. Find the appropriate vm_page objects and stick
1453 			// them in the cache object.
1454 			VMTranslationMap* map = addressSpace->TranslationMap();
1455 			off_t offset = 0;
1456 
1457 			if (!gKernelStartup)
1458 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1459 
1460 			map->Lock();
1461 
1462 			for (addr_t virtualAddress = area->Base();
1463 					virtualAddress < area->Base() + (area->Size() - 1);
1464 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1465 				phys_addr_t physicalAddress;
1466 				uint32 flags;
1467 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1468 				if (status < B_OK) {
1469 					panic("looking up mapping failed for va 0x%lx\n",
1470 						virtualAddress);
1471 				}
1472 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1473 				if (page == NULL) {
1474 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1475 						"\n", physicalAddress);
1476 				}
1477 
1478 				DEBUG_PAGE_ACCESS_START(page);
1479 
1480 				cache->InsertPage(page, offset);
1481 				increment_page_wired_count(page);
1482 				vm_page_set_state(page, PAGE_STATE_WIRED);
1483 				page->busy = false;
1484 
1485 				DEBUG_PAGE_ACCESS_END(page);
1486 			}
1487 
1488 			map->Unlock();
1489 			break;
1490 		}
1491 
1492 		case B_CONTIGUOUS:
1493 		{
1494 			// We have already allocated our continuous pages run, so we can now
1495 			// just map them in the address space
1496 			VMTranslationMap* map = addressSpace->TranslationMap();
1497 			phys_addr_t physicalAddress
1498 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1499 			addr_t virtualAddress = area->Base();
1500 			off_t offset = 0;
1501 
1502 			map->Lock();
1503 
1504 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1505 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1506 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1507 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1508 				if (page == NULL)
1509 					panic("couldn't lookup physical page just allocated\n");
1510 
1511 				status = map->Map(virtualAddress, physicalAddress, protection,
1512 					area->MemoryType(), &reservation);
1513 				if (status < B_OK)
1514 					panic("couldn't map physical page in page run\n");
1515 
1516 				cache->InsertPage(page, offset);
1517 				increment_page_wired_count(page);
1518 
1519 				DEBUG_PAGE_ACCESS_END(page);
1520 			}
1521 
1522 			map->Unlock();
1523 			break;
1524 		}
1525 
1526 		default:
1527 			break;
1528 	}
1529 
1530 	cache->Unlock();
1531 
1532 	if (reservedPages > 0)
1533 		vm_page_unreserve_pages(&reservation);
1534 
1535 	TRACE(("vm_create_anonymous_area: done\n"));
1536 
1537 	area->cache_type = CACHE_TYPE_RAM;
1538 	return area->id;
1539 
1540 err1:
1541 	if (wiring == B_CONTIGUOUS) {
1542 		// we had reserved the area space upfront...
1543 		phys_addr_t pageNumber = page->physical_page_number;
1544 		int32 i;
1545 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1546 			page = vm_lookup_page(pageNumber);
1547 			if (page == NULL)
1548 				panic("couldn't lookup physical page just allocated\n");
1549 
1550 			vm_page_set_state(page, PAGE_STATE_FREE);
1551 		}
1552 	}
1553 
1554 err0:
1555 	if (reservedPages > 0)
1556 		vm_page_unreserve_pages(&reservation);
1557 	if (reservedMemory > 0)
1558 		vm_unreserve_memory(reservedMemory);
1559 
1560 	return status;
1561 }
1562 
1563 
1564 area_id
1565 vm_map_physical_memory(team_id team, const char* name, void** _address,
1566 	uint32 addressSpec, addr_t size, uint32 protection,
1567 	phys_addr_t physicalAddress, bool alreadyWired)
1568 {
1569 	VMArea* area;
1570 	VMCache* cache;
1571 	addr_t mapOffset;
1572 
1573 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1574 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1575 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1576 		addressSpec, size, protection, physicalAddress));
1577 
1578 	if (!arch_vm_supports_protection(protection))
1579 		return B_NOT_SUPPORTED;
1580 
1581 	AddressSpaceWriteLocker locker(team);
1582 	if (!locker.IsLocked())
1583 		return B_BAD_TEAM_ID;
1584 
1585 	// if the physical address is somewhat inside a page,
1586 	// move the actual area down to align on a page boundary
1587 	mapOffset = physicalAddress % B_PAGE_SIZE;
1588 	size += mapOffset;
1589 	physicalAddress -= mapOffset;
1590 
1591 	size = PAGE_ALIGN(size);
1592 
1593 	// create a device cache
1594 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1595 	if (status != B_OK)
1596 		return status;
1597 
1598 	cache->virtual_end = size;
1599 
1600 	cache->Lock();
1601 
1602 	virtual_address_restrictions addressRestrictions = {};
1603 	addressRestrictions.address = *_address;
1604 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1605 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1606 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1607 		true, &area, _address);
1608 
1609 	if (status < B_OK)
1610 		cache->ReleaseRefLocked();
1611 
1612 	cache->Unlock();
1613 
1614 	if (status == B_OK) {
1615 		// set requested memory type -- use uncached, if not given
1616 		uint32 memoryType = addressSpec & B_MTR_MASK;
1617 		if (memoryType == 0)
1618 			memoryType = B_MTR_UC;
1619 
1620 		area->SetMemoryType(memoryType);
1621 
1622 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1623 		if (status != B_OK)
1624 			delete_area(locker.AddressSpace(), area, false);
1625 	}
1626 
1627 	if (status != B_OK)
1628 		return status;
1629 
1630 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1631 
1632 	if (alreadyWired) {
1633 		// The area is already mapped, but possibly not with the right
1634 		// memory type.
1635 		map->Lock();
1636 		map->ProtectArea(area, area->protection);
1637 		map->Unlock();
1638 	} else {
1639 		// Map the area completely.
1640 
1641 		// reserve pages needed for the mapping
1642 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1643 			area->Base() + (size - 1));
1644 		vm_page_reservation reservation;
1645 		vm_page_reserve_pages(&reservation, reservePages,
1646 			team == VMAddressSpace::KernelID()
1647 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1648 
1649 		map->Lock();
1650 
1651 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1652 			map->Map(area->Base() + offset, physicalAddress + offset,
1653 				protection, area->MemoryType(), &reservation);
1654 		}
1655 
1656 		map->Unlock();
1657 
1658 		vm_page_unreserve_pages(&reservation);
1659 	}
1660 
1661 	// modify the pointer returned to be offset back into the new area
1662 	// the same way the physical address in was offset
1663 	*_address = (void*)((addr_t)*_address + mapOffset);
1664 
1665 	area->cache_type = CACHE_TYPE_DEVICE;
1666 	return area->id;
1667 }
1668 
1669 
1670 /*!	Don't use!
1671 	TODO: This function was introduced to map physical page vecs to
1672 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1673 	use a device cache and does not track vm_page::wired_count!
1674 */
1675 area_id
1676 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1677 	uint32 addressSpec, addr_t* _size, uint32 protection,
1678 	struct generic_io_vec* vecs, uint32 vecCount)
1679 {
1680 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1681 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1682 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1683 		addressSpec, _size, protection, vecs, vecCount));
1684 
1685 	if (!arch_vm_supports_protection(protection)
1686 		|| (addressSpec & B_MTR_MASK) != 0) {
1687 		return B_NOT_SUPPORTED;
1688 	}
1689 
1690 	AddressSpaceWriteLocker locker(team);
1691 	if (!locker.IsLocked())
1692 		return B_BAD_TEAM_ID;
1693 
1694 	if (vecCount == 0)
1695 		return B_BAD_VALUE;
1696 
1697 	addr_t size = 0;
1698 	for (uint32 i = 0; i < vecCount; i++) {
1699 		if (vecs[i].base % B_PAGE_SIZE != 0
1700 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1701 			return B_BAD_VALUE;
1702 		}
1703 
1704 		size += vecs[i].length;
1705 	}
1706 
1707 	// create a device cache
1708 	VMCache* cache;
1709 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1710 	if (result != B_OK)
1711 		return result;
1712 
1713 	cache->virtual_end = size;
1714 
1715 	cache->Lock();
1716 
1717 	VMArea* area;
1718 	virtual_address_restrictions addressRestrictions = {};
1719 	addressRestrictions.address = *_address;
1720 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1721 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1722 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1723 		&addressRestrictions, true, &area, _address);
1724 
1725 	if (result != B_OK)
1726 		cache->ReleaseRefLocked();
1727 
1728 	cache->Unlock();
1729 
1730 	if (result != B_OK)
1731 		return result;
1732 
1733 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1734 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1735 		area->Base() + (size - 1));
1736 
1737 	vm_page_reservation reservation;
1738 	vm_page_reserve_pages(&reservation, reservePages,
1739 			team == VMAddressSpace::KernelID()
1740 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1741 	map->Lock();
1742 
1743 	uint32 vecIndex = 0;
1744 	size_t vecOffset = 0;
1745 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1746 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1747 			vecOffset = 0;
1748 			vecIndex++;
1749 		}
1750 
1751 		if (vecIndex >= vecCount)
1752 			break;
1753 
1754 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1755 			protection, area->MemoryType(), &reservation);
1756 
1757 		vecOffset += B_PAGE_SIZE;
1758 	}
1759 
1760 	map->Unlock();
1761 	vm_page_unreserve_pages(&reservation);
1762 
1763 	if (_size != NULL)
1764 		*_size = size;
1765 
1766 	area->cache_type = CACHE_TYPE_DEVICE;
1767 	return area->id;
1768 }
1769 
1770 
1771 area_id
1772 vm_create_null_area(team_id team, const char* name, void** address,
1773 	uint32 addressSpec, addr_t size, uint32 flags)
1774 {
1775 	size = PAGE_ALIGN(size);
1776 
1777 	// Lock the address space and, if B_EXACT_ADDRESS and
1778 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1779 	// is not wired.
1780 	AddressSpaceWriteLocker locker;
1781 	do {
1782 		if (locker.SetTo(team) != B_OK)
1783 			return B_BAD_TEAM_ID;
1784 	} while (addressSpec == B_EXACT_ADDRESS
1785 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1786 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1787 			(addr_t)*address, size, &locker));
1788 
1789 	// create a null cache
1790 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1791 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1792 	VMCache* cache;
1793 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1794 	if (status != B_OK)
1795 		return status;
1796 
1797 	cache->temporary = 1;
1798 	cache->virtual_end = size;
1799 
1800 	cache->Lock();
1801 
1802 	VMArea* area;
1803 	virtual_address_restrictions addressRestrictions = {};
1804 	addressRestrictions.address = *address;
1805 	addressRestrictions.address_specification = addressSpec;
1806 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1807 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1808 		&addressRestrictions, true, &area, address);
1809 
1810 	if (status < B_OK) {
1811 		cache->ReleaseRefAndUnlock();
1812 		return status;
1813 	}
1814 
1815 	cache->Unlock();
1816 
1817 	area->cache_type = CACHE_TYPE_NULL;
1818 	return area->id;
1819 }
1820 
1821 
1822 /*!	Creates the vnode cache for the specified \a vnode.
1823 	The vnode has to be marked busy when calling this function.
1824 */
1825 status_t
1826 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1827 {
1828 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1829 }
1830 
1831 
1832 /*!	\a cache must be locked. The area's address space must be read-locked.
1833 */
1834 static void
1835 pre_map_area_pages(VMArea* area, VMCache* cache,
1836 	vm_page_reservation* reservation)
1837 {
1838 	addr_t baseAddress = area->Base();
1839 	addr_t cacheOffset = area->cache_offset;
1840 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1841 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1842 
1843 	for (VMCachePagesTree::Iterator it
1844 				= cache->pages.GetIterator(firstPage, true, true);
1845 			vm_page* page = it.Next();) {
1846 		if (page->cache_offset >= endPage)
1847 			break;
1848 
1849 		// skip busy and inactive pages
1850 		if (page->busy || page->usage_count == 0)
1851 			continue;
1852 
1853 		DEBUG_PAGE_ACCESS_START(page);
1854 		map_page(area, page,
1855 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1856 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1857 		DEBUG_PAGE_ACCESS_END(page);
1858 	}
1859 }
1860 
1861 
1862 /*!	Will map the file specified by \a fd to an area in memory.
1863 	The file will be mirrored beginning at the specified \a offset. The
1864 	\a offset and \a size arguments have to be page aligned.
1865 */
1866 static area_id
1867 _vm_map_file(team_id team, const char* name, void** _address,
1868 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1869 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1870 {
1871 	// TODO: for binary files, we want to make sure that they get the
1872 	//	copy of a file at a given time, ie. later changes should not
1873 	//	make it into the mapped copy -- this will need quite some changes
1874 	//	to be done in a nice way
1875 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1876 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1877 
1878 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1879 	size = PAGE_ALIGN(size);
1880 
1881 	if (mapping == REGION_NO_PRIVATE_MAP)
1882 		protection |= B_SHARED_AREA;
1883 	if (addressSpec != B_EXACT_ADDRESS)
1884 		unmapAddressRange = false;
1885 
1886 	if (fd < 0) {
1887 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1888 		virtual_address_restrictions virtualRestrictions = {};
1889 		virtualRestrictions.address = *_address;
1890 		virtualRestrictions.address_specification = addressSpec;
1891 		physical_address_restrictions physicalRestrictions = {};
1892 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1893 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1894 			_address);
1895 	}
1896 
1897 	// get the open flags of the FD
1898 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1899 	if (descriptor == NULL)
1900 		return EBADF;
1901 	int32 openMode = descriptor->open_mode;
1902 	put_fd(descriptor);
1903 
1904 	// The FD must open for reading at any rate. For shared mapping with write
1905 	// access, additionally the FD must be open for writing.
1906 	if ((openMode & O_ACCMODE) == O_WRONLY
1907 		|| (mapping == REGION_NO_PRIVATE_MAP
1908 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1909 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1910 		return EACCES;
1911 	}
1912 
1913 	// get the vnode for the object, this also grabs a ref to it
1914 	struct vnode* vnode = NULL;
1915 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1916 	if (status < B_OK)
1917 		return status;
1918 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1919 
1920 	// If we're going to pre-map pages, we need to reserve the pages needed by
1921 	// the mapping backend upfront.
1922 	page_num_t reservedPreMapPages = 0;
1923 	vm_page_reservation reservation;
1924 	if ((protection & B_READ_AREA) != 0) {
1925 		AddressSpaceWriteLocker locker;
1926 		status = locker.SetTo(team);
1927 		if (status != B_OK)
1928 			return status;
1929 
1930 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1931 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1932 
1933 		locker.Unlock();
1934 
1935 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1936 			team == VMAddressSpace::KernelID()
1937 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1938 	}
1939 
1940 	struct PageUnreserver {
1941 		PageUnreserver(vm_page_reservation* reservation)
1942 			:
1943 			fReservation(reservation)
1944 		{
1945 		}
1946 
1947 		~PageUnreserver()
1948 		{
1949 			if (fReservation != NULL)
1950 				vm_page_unreserve_pages(fReservation);
1951 		}
1952 
1953 		vm_page_reservation* fReservation;
1954 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1955 
1956 	// Lock the address space and, if the specified address range shall be
1957 	// unmapped, ensure it is not wired.
1958 	AddressSpaceWriteLocker locker;
1959 	do {
1960 		if (locker.SetTo(team) != B_OK)
1961 			return B_BAD_TEAM_ID;
1962 	} while (unmapAddressRange
1963 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1964 			(addr_t)*_address, size, &locker));
1965 
1966 	// TODO: this only works for file systems that use the file cache
1967 	VMCache* cache;
1968 	status = vfs_get_vnode_cache(vnode, &cache, false);
1969 	if (status < B_OK)
1970 		return status;
1971 
1972 	cache->Lock();
1973 
1974 	VMArea* area;
1975 	virtual_address_restrictions addressRestrictions = {};
1976 	addressRestrictions.address = *_address;
1977 	addressRestrictions.address_specification = addressSpec;
1978 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1979 		0, protection, mapping,
1980 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1981 		&addressRestrictions, kernel, &area, _address);
1982 
1983 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1984 		// map_backing_store() cannot know we no longer need the ref
1985 		cache->ReleaseRefLocked();
1986 	}
1987 
1988 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1989 		pre_map_area_pages(area, cache, &reservation);
1990 
1991 	cache->Unlock();
1992 
1993 	if (status == B_OK) {
1994 		// TODO: this probably deserves a smarter solution, ie. don't always
1995 		// prefetch stuff, and also, probably don't trigger it at this place.
1996 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1997 			// prefetches at max 10 MB starting from "offset"
1998 	}
1999 
2000 	if (status != B_OK)
2001 		return status;
2002 
2003 	area->cache_type = CACHE_TYPE_VNODE;
2004 	return area->id;
2005 }
2006 
2007 
2008 area_id
2009 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2010 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2011 	int fd, off_t offset)
2012 {
2013 	if (!arch_vm_supports_protection(protection))
2014 		return B_NOT_SUPPORTED;
2015 
2016 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2017 		mapping, unmapAddressRange, fd, offset, true);
2018 }
2019 
2020 
2021 VMCache*
2022 vm_area_get_locked_cache(VMArea* area)
2023 {
2024 	rw_lock_read_lock(&sAreaCacheLock);
2025 
2026 	while (true) {
2027 		VMCache* cache = area->cache;
2028 
2029 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2030 			// cache has been deleted
2031 			rw_lock_read_lock(&sAreaCacheLock);
2032 			continue;
2033 		}
2034 
2035 		rw_lock_read_lock(&sAreaCacheLock);
2036 
2037 		if (cache == area->cache) {
2038 			cache->AcquireRefLocked();
2039 			rw_lock_read_unlock(&sAreaCacheLock);
2040 			return cache;
2041 		}
2042 
2043 		// the cache changed in the meantime
2044 		cache->Unlock();
2045 	}
2046 }
2047 
2048 
2049 void
2050 vm_area_put_locked_cache(VMCache* cache)
2051 {
2052 	cache->ReleaseRefAndUnlock();
2053 }
2054 
2055 
2056 area_id
2057 vm_clone_area(team_id team, const char* name, void** address,
2058 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2059 	bool kernel)
2060 {
2061 	VMArea* newArea = NULL;
2062 	VMArea* sourceArea;
2063 
2064 	// Check whether the source area exists and is cloneable. If so, mark it
2065 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2066 	{
2067 		AddressSpaceWriteLocker locker;
2068 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2069 		if (status != B_OK)
2070 			return status;
2071 
2072 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2073 			return B_NOT_ALLOWED;
2074 
2075 		sourceArea->protection |= B_SHARED_AREA;
2076 		protection |= B_SHARED_AREA;
2077 	}
2078 
2079 	// Now lock both address spaces and actually do the cloning.
2080 
2081 	MultiAddressSpaceLocker locker;
2082 	VMAddressSpace* sourceAddressSpace;
2083 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2084 	if (status != B_OK)
2085 		return status;
2086 
2087 	VMAddressSpace* targetAddressSpace;
2088 	status = locker.AddTeam(team, true, &targetAddressSpace);
2089 	if (status != B_OK)
2090 		return status;
2091 
2092 	status = locker.Lock();
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2097 	if (sourceArea == NULL)
2098 		return B_BAD_VALUE;
2099 
2100 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2101 		return B_NOT_ALLOWED;
2102 
2103 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2104 
2105 	if (!kernel && sourceAddressSpace == VMAddressSpace::Kernel()
2106 		&& targetAddressSpace != VMAddressSpace::Kernel()
2107 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2108 		// kernel areas must not be cloned in userland, unless explicitly
2109 		// declared user-cloneable upon construction
2110 #if KDEBUG
2111 		panic("attempting to clone kernel area \"%s\" (%" B_PRId32 ")!",
2112 			sourceArea->name, sourceID);
2113 #endif
2114 		status = B_NOT_ALLOWED;
2115 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2116 		status = B_NOT_ALLOWED;
2117 	} else {
2118 		virtual_address_restrictions addressRestrictions = {};
2119 		addressRestrictions.address = *address;
2120 		addressRestrictions.address_specification = addressSpec;
2121 		status = map_backing_store(targetAddressSpace, cache,
2122 			sourceArea->cache_offset, name, sourceArea->Size(),
2123 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2124 			kernel, &newArea, address);
2125 	}
2126 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2127 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2128 		// to create a new cache, and has therefore already acquired a reference
2129 		// to the source cache - but otherwise it has no idea that we need
2130 		// one.
2131 		cache->AcquireRefLocked();
2132 	}
2133 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2134 		// we need to map in everything at this point
2135 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2136 			// we don't have actual pages to map but a physical area
2137 			VMTranslationMap* map
2138 				= sourceArea->address_space->TranslationMap();
2139 			map->Lock();
2140 
2141 			phys_addr_t physicalAddress;
2142 			uint32 oldProtection;
2143 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2144 
2145 			map->Unlock();
2146 
2147 			map = targetAddressSpace->TranslationMap();
2148 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2149 				newArea->Base() + (newArea->Size() - 1));
2150 
2151 			vm_page_reservation reservation;
2152 			vm_page_reserve_pages(&reservation, reservePages,
2153 				targetAddressSpace == VMAddressSpace::Kernel()
2154 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2155 			map->Lock();
2156 
2157 			for (addr_t offset = 0; offset < newArea->Size();
2158 					offset += B_PAGE_SIZE) {
2159 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2160 					protection, newArea->MemoryType(), &reservation);
2161 			}
2162 
2163 			map->Unlock();
2164 			vm_page_unreserve_pages(&reservation);
2165 		} else {
2166 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2167 			size_t reservePages = map->MaxPagesNeededToMap(
2168 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2169 			vm_page_reservation reservation;
2170 			vm_page_reserve_pages(&reservation, reservePages,
2171 				targetAddressSpace == VMAddressSpace::Kernel()
2172 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2173 
2174 			// map in all pages from source
2175 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2176 					vm_page* page  = it.Next();) {
2177 				if (!page->busy) {
2178 					DEBUG_PAGE_ACCESS_START(page);
2179 					map_page(newArea, page,
2180 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2181 							- newArea->cache_offset),
2182 						protection, &reservation);
2183 					DEBUG_PAGE_ACCESS_END(page);
2184 				}
2185 			}
2186 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2187 			// ensuring that!
2188 
2189 			vm_page_unreserve_pages(&reservation);
2190 		}
2191 	}
2192 	if (status == B_OK)
2193 		newArea->cache_type = sourceArea->cache_type;
2194 
2195 	vm_area_put_locked_cache(cache);
2196 
2197 	if (status < B_OK)
2198 		return status;
2199 
2200 	return newArea->id;
2201 }
2202 
2203 
2204 /*!	Deletes the specified area of the given address space.
2205 
2206 	The address space must be write-locked.
2207 	The caller must ensure that the area does not have any wired ranges.
2208 
2209 	\param addressSpace The address space containing the area.
2210 	\param area The area to be deleted.
2211 	\param deletingAddressSpace \c true, if the address space is in the process
2212 		of being deleted.
2213 */
2214 static void
2215 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2216 	bool deletingAddressSpace)
2217 {
2218 	ASSERT(!area->IsWired());
2219 
2220 	VMAreaHash::Remove(area);
2221 
2222 	// At this point the area is removed from the global hash table, but
2223 	// still exists in the area list.
2224 
2225 	// Unmap the virtual address space the area occupied.
2226 	{
2227 		// We need to lock the complete cache chain.
2228 		VMCache* topCache = vm_area_get_locked_cache(area);
2229 		VMCacheChainLocker cacheChainLocker(topCache);
2230 		cacheChainLocker.LockAllSourceCaches();
2231 
2232 		// If the area's top cache is a temporary cache and the area is the only
2233 		// one referencing it (besides us currently holding a second reference),
2234 		// the unmapping code doesn't need to care about preserving the accessed
2235 		// and dirty flags of the top cache page mappings.
2236 		bool ignoreTopCachePageFlags
2237 			= topCache->temporary && topCache->RefCount() == 2;
2238 
2239 		area->address_space->TranslationMap()->UnmapArea(area,
2240 			deletingAddressSpace, ignoreTopCachePageFlags);
2241 	}
2242 
2243 	if (!area->cache->temporary)
2244 		area->cache->WriteModified();
2245 
2246 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2247 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2248 
2249 	arch_vm_unset_memory_type(area);
2250 	addressSpace->RemoveArea(area, allocationFlags);
2251 	addressSpace->Put();
2252 
2253 	area->cache->RemoveArea(area);
2254 	area->cache->ReleaseRef();
2255 
2256 	addressSpace->DeleteArea(area, allocationFlags);
2257 }
2258 
2259 
2260 status_t
2261 vm_delete_area(team_id team, area_id id, bool kernel)
2262 {
2263 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2264 		team, id));
2265 
2266 	// lock the address space and make sure the area isn't wired
2267 	AddressSpaceWriteLocker locker;
2268 	VMArea* area;
2269 	AreaCacheLocker cacheLocker;
2270 
2271 	do {
2272 		status_t status = locker.SetFromArea(team, id, area);
2273 		if (status != B_OK)
2274 			return status;
2275 
2276 		cacheLocker.SetTo(area);
2277 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2278 
2279 	cacheLocker.Unlock();
2280 
2281 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2282 		return B_NOT_ALLOWED;
2283 
2284 	delete_area(locker.AddressSpace(), area, false);
2285 	return B_OK;
2286 }
2287 
2288 
2289 /*!	Creates a new cache on top of given cache, moves all areas from
2290 	the old cache to the new one, and changes the protection of all affected
2291 	areas' pages to read-only. If requested, wired pages are moved up to the
2292 	new cache and copies are added to the old cache in their place.
2293 	Preconditions:
2294 	- The given cache must be locked.
2295 	- All of the cache's areas' address spaces must be read locked.
2296 	- Either the cache must not have any wired ranges or a page reservation for
2297 	  all wired pages must be provided, so they can be copied.
2298 
2299 	\param lowerCache The cache on top of which a new cache shall be created.
2300 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2301 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2302 		has wired page. The wired pages are copied in this case.
2303 */
2304 static status_t
2305 vm_copy_on_write_area(VMCache* lowerCache,
2306 	vm_page_reservation* wiredPagesReservation)
2307 {
2308 	VMCache* upperCache;
2309 
2310 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2311 
2312 	// We need to separate the cache from its areas. The cache goes one level
2313 	// deeper and we create a new cache inbetween.
2314 
2315 	// create an anonymous cache
2316 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2317 		lowerCache->GuardSize() / B_PAGE_SIZE,
2318 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2319 		VM_PRIORITY_USER);
2320 	if (status != B_OK)
2321 		return status;
2322 
2323 	upperCache->Lock();
2324 
2325 	upperCache->temporary = 1;
2326 	upperCache->virtual_base = lowerCache->virtual_base;
2327 	upperCache->virtual_end = lowerCache->virtual_end;
2328 
2329 	// transfer the lower cache areas to the upper cache
2330 	rw_lock_write_lock(&sAreaCacheLock);
2331 	upperCache->TransferAreas(lowerCache);
2332 	rw_lock_write_unlock(&sAreaCacheLock);
2333 
2334 	lowerCache->AddConsumer(upperCache);
2335 
2336 	// We now need to remap all pages from all of the cache's areas read-only,
2337 	// so that a copy will be created on next write access. If there are wired
2338 	// pages, we keep their protection, move them to the upper cache and create
2339 	// copies for the lower cache.
2340 	if (wiredPagesReservation != NULL) {
2341 		// We need to handle wired pages -- iterate through the cache's pages.
2342 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2343 				vm_page* page = it.Next();) {
2344 			if (page->WiredCount() > 0) {
2345 				// allocate a new page and copy the wired one
2346 				vm_page* copiedPage = vm_page_allocate_page(
2347 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2348 
2349 				vm_memcpy_physical_page(
2350 					copiedPage->physical_page_number * B_PAGE_SIZE,
2351 					page->physical_page_number * B_PAGE_SIZE);
2352 
2353 				// move the wired page to the upper cache (note: removing is OK
2354 				// with the SplayTree iterator) and insert the copy
2355 				upperCache->MovePage(page);
2356 				lowerCache->InsertPage(copiedPage,
2357 					page->cache_offset * B_PAGE_SIZE);
2358 
2359 				DEBUG_PAGE_ACCESS_END(copiedPage);
2360 			} else {
2361 				// Change the protection of this page in all areas.
2362 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2363 						tempArea = tempArea->cache_next) {
2364 					// The area must be readable in the same way it was
2365 					// previously writable.
2366 					uint32 protection = B_KERNEL_READ_AREA;
2367 					if ((tempArea->protection & B_READ_AREA) != 0)
2368 						protection |= B_READ_AREA;
2369 
2370 					VMTranslationMap* map
2371 						= tempArea->address_space->TranslationMap();
2372 					map->Lock();
2373 					map->ProtectPage(tempArea,
2374 						virtual_page_address(tempArea, page), protection);
2375 					map->Unlock();
2376 				}
2377 			}
2378 		}
2379 	} else {
2380 		ASSERT(lowerCache->WiredPagesCount() == 0);
2381 
2382 		// just change the protection of all areas
2383 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2384 				tempArea = tempArea->cache_next) {
2385 			// The area must be readable in the same way it was previously
2386 			// writable.
2387 			uint32 protection = B_KERNEL_READ_AREA;
2388 			if ((tempArea->protection & B_READ_AREA) != 0)
2389 				protection |= B_READ_AREA;
2390 
2391 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2392 			map->Lock();
2393 			map->ProtectArea(tempArea, protection);
2394 			map->Unlock();
2395 		}
2396 	}
2397 
2398 	vm_area_put_locked_cache(upperCache);
2399 
2400 	return B_OK;
2401 }
2402 
2403 
2404 area_id
2405 vm_copy_area(team_id team, const char* name, void** _address,
2406 	uint32 addressSpec, uint32 protection, area_id sourceID)
2407 {
2408 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2409 
2410 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2411 		// set the same protection for the kernel as for userland
2412 		protection |= B_KERNEL_READ_AREA;
2413 		if (writableCopy)
2414 			protection |= B_KERNEL_WRITE_AREA;
2415 	}
2416 
2417 	// Do the locking: target address space, all address spaces associated with
2418 	// the source cache, and the cache itself.
2419 	MultiAddressSpaceLocker locker;
2420 	VMAddressSpace* targetAddressSpace;
2421 	VMCache* cache;
2422 	VMArea* source;
2423 	AreaCacheLocker cacheLocker;
2424 	status_t status;
2425 	bool sharedArea;
2426 
2427 	page_num_t wiredPages = 0;
2428 	vm_page_reservation wiredPagesReservation;
2429 
2430 	bool restart;
2431 	do {
2432 		restart = false;
2433 
2434 		locker.Unset();
2435 		status = locker.AddTeam(team, true, &targetAddressSpace);
2436 		if (status == B_OK) {
2437 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2438 				&cache);
2439 		}
2440 		if (status != B_OK)
2441 			return status;
2442 
2443 		cacheLocker.SetTo(cache, true);	// already locked
2444 
2445 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2446 
2447 		page_num_t oldWiredPages = wiredPages;
2448 		wiredPages = 0;
2449 
2450 		// If the source area isn't shared, count the number of wired pages in
2451 		// the cache and reserve as many pages.
2452 		if (!sharedArea) {
2453 			wiredPages = cache->WiredPagesCount();
2454 
2455 			if (wiredPages > oldWiredPages) {
2456 				cacheLocker.Unlock();
2457 				locker.Unlock();
2458 
2459 				if (oldWiredPages > 0)
2460 					vm_page_unreserve_pages(&wiredPagesReservation);
2461 
2462 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2463 					VM_PRIORITY_USER);
2464 
2465 				restart = true;
2466 			}
2467 		} else if (oldWiredPages > 0)
2468 			vm_page_unreserve_pages(&wiredPagesReservation);
2469 	} while (restart);
2470 
2471 	// unreserve pages later
2472 	struct PagesUnreserver {
2473 		PagesUnreserver(vm_page_reservation* reservation)
2474 			:
2475 			fReservation(reservation)
2476 		{
2477 		}
2478 
2479 		~PagesUnreserver()
2480 		{
2481 			if (fReservation != NULL)
2482 				vm_page_unreserve_pages(fReservation);
2483 		}
2484 
2485 	private:
2486 		vm_page_reservation*	fReservation;
2487 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2488 
2489 	if (addressSpec == B_CLONE_ADDRESS) {
2490 		addressSpec = B_EXACT_ADDRESS;
2491 		*_address = (void*)source->Base();
2492 	}
2493 
2494 	// First, create a cache on top of the source area, respectively use the
2495 	// existing one, if this is a shared area.
2496 
2497 	VMArea* target;
2498 	virtual_address_restrictions addressRestrictions = {};
2499 	addressRestrictions.address = *_address;
2500 	addressRestrictions.address_specification = addressSpec;
2501 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2502 		name, source->Size(), source->wiring, protection,
2503 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2504 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2505 		&addressRestrictions, true, &target, _address);
2506 	if (status < B_OK)
2507 		return status;
2508 
2509 	if (sharedArea) {
2510 		// The new area uses the old area's cache, but map_backing_store()
2511 		// hasn't acquired a ref. So we have to do that now.
2512 		cache->AcquireRefLocked();
2513 	}
2514 
2515 	// If the source area is writable, we need to move it one layer up as well
2516 
2517 	if (!sharedArea) {
2518 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2519 			// TODO: do something more useful if this fails!
2520 			if (vm_copy_on_write_area(cache,
2521 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2522 				panic("vm_copy_on_write_area() failed!\n");
2523 			}
2524 		}
2525 	}
2526 
2527 	// we return the ID of the newly created area
2528 	return target->id;
2529 }
2530 
2531 
2532 status_t
2533 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2534 	bool kernel)
2535 {
2536 	fix_protection(&newProtection);
2537 
2538 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2539 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2540 
2541 	if (!arch_vm_supports_protection(newProtection))
2542 		return B_NOT_SUPPORTED;
2543 
2544 	bool becomesWritable
2545 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2546 
2547 	// lock address spaces and cache
2548 	MultiAddressSpaceLocker locker;
2549 	VMCache* cache;
2550 	VMArea* area;
2551 	status_t status;
2552 	AreaCacheLocker cacheLocker;
2553 	bool isWritable;
2554 
2555 	bool restart;
2556 	do {
2557 		restart = false;
2558 
2559 		locker.Unset();
2560 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2561 		if (status != B_OK)
2562 			return status;
2563 
2564 		cacheLocker.SetTo(cache, true);	// already locked
2565 
2566 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2567 			return B_NOT_ALLOWED;
2568 
2569 		if (area->protection == newProtection)
2570 			return B_OK;
2571 
2572 		if (team != VMAddressSpace::KernelID()
2573 			&& area->address_space->ID() != team) {
2574 			// unless you're the kernel, you are only allowed to set
2575 			// the protection of your own areas
2576 			return B_NOT_ALLOWED;
2577 		}
2578 
2579 		isWritable
2580 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2581 
2582 		// Make sure the area (respectively, if we're going to call
2583 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2584 		// wired ranges.
2585 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2586 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2587 					otherArea = otherArea->cache_next) {
2588 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2589 					restart = true;
2590 					break;
2591 				}
2592 			}
2593 		} else {
2594 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2595 				restart = true;
2596 		}
2597 	} while (restart);
2598 
2599 	bool changePageProtection = true;
2600 	bool changeTopCachePagesOnly = false;
2601 
2602 	if (isWritable && !becomesWritable) {
2603 		// writable -> !writable
2604 
2605 		if (cache->source != NULL && cache->temporary) {
2606 			if (cache->CountWritableAreas(area) == 0) {
2607 				// Since this cache now lives from the pages in its source cache,
2608 				// we can change the cache's commitment to take only those pages
2609 				// into account that really are in this cache.
2610 
2611 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2612 					team == VMAddressSpace::KernelID()
2613 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2614 
2615 				// TODO: we may be able to join with our source cache, if
2616 				// count == 0
2617 			}
2618 		}
2619 
2620 		// If only the writability changes, we can just remap the pages of the
2621 		// top cache, since the pages of lower caches are mapped read-only
2622 		// anyway. That's advantageous only, if the number of pages in the cache
2623 		// is significantly smaller than the number of pages in the area,
2624 		// though.
2625 		if (newProtection
2626 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2627 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2628 			changeTopCachePagesOnly = true;
2629 		}
2630 	} else if (!isWritable && becomesWritable) {
2631 		// !writable -> writable
2632 
2633 		if (!cache->consumers.IsEmpty()) {
2634 			// There are consumers -- we have to insert a new cache. Fortunately
2635 			// vm_copy_on_write_area() does everything that's needed.
2636 			changePageProtection = false;
2637 			status = vm_copy_on_write_area(cache, NULL);
2638 		} else {
2639 			// No consumers, so we don't need to insert a new one.
2640 			if (cache->source != NULL && cache->temporary) {
2641 				// the cache's commitment must contain all possible pages
2642 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2643 					team == VMAddressSpace::KernelID()
2644 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2645 			}
2646 
2647 			if (status == B_OK && cache->source != NULL) {
2648 				// There's a source cache, hence we can't just change all pages'
2649 				// protection or we might allow writing into pages belonging to
2650 				// a lower cache.
2651 				changeTopCachePagesOnly = true;
2652 			}
2653 		}
2654 	} else {
2655 		// we don't have anything special to do in all other cases
2656 	}
2657 
2658 	if (status == B_OK) {
2659 		// remap existing pages in this cache
2660 		if (changePageProtection) {
2661 			VMTranslationMap* map = area->address_space->TranslationMap();
2662 			map->Lock();
2663 
2664 			if (changeTopCachePagesOnly) {
2665 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2666 				page_num_t lastPageOffset
2667 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2668 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2669 						vm_page* page = it.Next();) {
2670 					if (page->cache_offset >= firstPageOffset
2671 						&& page->cache_offset <= lastPageOffset) {
2672 						addr_t address = virtual_page_address(area, page);
2673 						map->ProtectPage(area, address, newProtection);
2674 					}
2675 				}
2676 			} else
2677 				map->ProtectArea(area, newProtection);
2678 
2679 			map->Unlock();
2680 		}
2681 
2682 		area->protection = newProtection;
2683 	}
2684 
2685 	return status;
2686 }
2687 
2688 
2689 status_t
2690 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2691 {
2692 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2693 	if (addressSpace == NULL)
2694 		return B_BAD_TEAM_ID;
2695 
2696 	VMTranslationMap* map = addressSpace->TranslationMap();
2697 
2698 	map->Lock();
2699 	uint32 dummyFlags;
2700 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2701 	map->Unlock();
2702 
2703 	addressSpace->Put();
2704 	return status;
2705 }
2706 
2707 
2708 /*!	The page's cache must be locked.
2709 */
2710 bool
2711 vm_test_map_modification(vm_page* page)
2712 {
2713 	if (page->modified)
2714 		return true;
2715 
2716 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2717 	vm_page_mapping* mapping;
2718 	while ((mapping = iterator.Next()) != NULL) {
2719 		VMArea* area = mapping->area;
2720 		VMTranslationMap* map = area->address_space->TranslationMap();
2721 
2722 		phys_addr_t physicalAddress;
2723 		uint32 flags;
2724 		map->Lock();
2725 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2726 		map->Unlock();
2727 
2728 		if ((flags & PAGE_MODIFIED) != 0)
2729 			return true;
2730 	}
2731 
2732 	return false;
2733 }
2734 
2735 
2736 /*!	The page's cache must be locked.
2737 */
2738 void
2739 vm_clear_map_flags(vm_page* page, uint32 flags)
2740 {
2741 	if ((flags & PAGE_ACCESSED) != 0)
2742 		page->accessed = false;
2743 	if ((flags & PAGE_MODIFIED) != 0)
2744 		page->modified = false;
2745 
2746 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2747 	vm_page_mapping* mapping;
2748 	while ((mapping = iterator.Next()) != NULL) {
2749 		VMArea* area = mapping->area;
2750 		VMTranslationMap* map = area->address_space->TranslationMap();
2751 
2752 		map->Lock();
2753 		map->ClearFlags(virtual_page_address(area, page), flags);
2754 		map->Unlock();
2755 	}
2756 }
2757 
2758 
2759 /*!	Removes all mappings from a page.
2760 	After you've called this function, the page is unmapped from memory and
2761 	the page's \c accessed and \c modified flags have been updated according
2762 	to the state of the mappings.
2763 	The page's cache must be locked.
2764 */
2765 void
2766 vm_remove_all_page_mappings(vm_page* page)
2767 {
2768 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2769 		VMArea* area = mapping->area;
2770 		VMTranslationMap* map = area->address_space->TranslationMap();
2771 		addr_t address = virtual_page_address(area, page);
2772 		map->UnmapPage(area, address, false);
2773 	}
2774 }
2775 
2776 
2777 int32
2778 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2779 {
2780 	int32 count = 0;
2781 
2782 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2783 	vm_page_mapping* mapping;
2784 	while ((mapping = iterator.Next()) != NULL) {
2785 		VMArea* area = mapping->area;
2786 		VMTranslationMap* map = area->address_space->TranslationMap();
2787 
2788 		bool modified;
2789 		if (map->ClearAccessedAndModified(area,
2790 				virtual_page_address(area, page), false, modified)) {
2791 			count++;
2792 		}
2793 
2794 		page->modified |= modified;
2795 	}
2796 
2797 
2798 	if (page->accessed) {
2799 		count++;
2800 		page->accessed = false;
2801 	}
2802 
2803 	return count;
2804 }
2805 
2806 
2807 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2808 	mappings.
2809 	The function iterates through the page mappings and removes them until
2810 	encountering one that has been accessed. From then on it will continue to
2811 	iterate, but only clear the accessed flag of the mapping. The page's
2812 	\c modified bit will be updated accordingly, the \c accessed bit will be
2813 	cleared.
2814 	\return The number of mapping accessed bits encountered, including the
2815 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2816 		of the page have been removed.
2817 */
2818 int32
2819 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2820 {
2821 	ASSERT(page->WiredCount() == 0);
2822 
2823 	if (page->accessed)
2824 		return vm_clear_page_mapping_accessed_flags(page);
2825 
2826 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2827 		VMArea* area = mapping->area;
2828 		VMTranslationMap* map = area->address_space->TranslationMap();
2829 		addr_t address = virtual_page_address(area, page);
2830 		bool modified = false;
2831 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2832 			page->accessed = true;
2833 			page->modified |= modified;
2834 			return vm_clear_page_mapping_accessed_flags(page);
2835 		}
2836 		page->modified |= modified;
2837 	}
2838 
2839 	return 0;
2840 }
2841 
2842 
2843 static int
2844 display_mem(int argc, char** argv)
2845 {
2846 	bool physical = false;
2847 	addr_t copyAddress;
2848 	int32 displayWidth;
2849 	int32 itemSize;
2850 	int32 num = -1;
2851 	addr_t address;
2852 	int i = 1, j;
2853 
2854 	if (argc > 1 && argv[1][0] == '-') {
2855 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2856 			physical = true;
2857 			i++;
2858 		} else
2859 			i = 99;
2860 	}
2861 
2862 	if (argc < i + 1 || argc > i + 2) {
2863 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2864 			"\tdl - 8 bytes\n"
2865 			"\tdw - 4 bytes\n"
2866 			"\tds - 2 bytes\n"
2867 			"\tdb - 1 byte\n"
2868 			"\tstring - a whole string\n"
2869 			"  -p or --physical only allows memory from a single page to be "
2870 			"displayed.\n");
2871 		return 0;
2872 	}
2873 
2874 	address = parse_expression(argv[i]);
2875 
2876 	if (argc > i + 1)
2877 		num = parse_expression(argv[i + 1]);
2878 
2879 	// build the format string
2880 	if (strcmp(argv[0], "db") == 0) {
2881 		itemSize = 1;
2882 		displayWidth = 16;
2883 	} else if (strcmp(argv[0], "ds") == 0) {
2884 		itemSize = 2;
2885 		displayWidth = 8;
2886 	} else if (strcmp(argv[0], "dw") == 0) {
2887 		itemSize = 4;
2888 		displayWidth = 4;
2889 	} else if (strcmp(argv[0], "dl") == 0) {
2890 		itemSize = 8;
2891 		displayWidth = 2;
2892 	} else if (strcmp(argv[0], "string") == 0) {
2893 		itemSize = 1;
2894 		displayWidth = -1;
2895 	} else {
2896 		kprintf("display_mem called in an invalid way!\n");
2897 		return 0;
2898 	}
2899 
2900 	if (num <= 0)
2901 		num = displayWidth;
2902 
2903 	void* physicalPageHandle = NULL;
2904 
2905 	if (physical) {
2906 		int32 offset = address & (B_PAGE_SIZE - 1);
2907 		if (num * itemSize + offset > B_PAGE_SIZE) {
2908 			num = (B_PAGE_SIZE - offset) / itemSize;
2909 			kprintf("NOTE: number of bytes has been cut to page size\n");
2910 		}
2911 
2912 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2913 
2914 		if (vm_get_physical_page_debug(address, &copyAddress,
2915 				&physicalPageHandle) != B_OK) {
2916 			kprintf("getting the hardware page failed.");
2917 			return 0;
2918 		}
2919 
2920 		address += offset;
2921 		copyAddress += offset;
2922 	} else
2923 		copyAddress = address;
2924 
2925 	if (!strcmp(argv[0], "string")) {
2926 		kprintf("%p \"", (char*)copyAddress);
2927 
2928 		// string mode
2929 		for (i = 0; true; i++) {
2930 			char c;
2931 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2932 					!= B_OK
2933 				|| c == '\0') {
2934 				break;
2935 			}
2936 
2937 			if (c == '\n')
2938 				kprintf("\\n");
2939 			else if (c == '\t')
2940 				kprintf("\\t");
2941 			else {
2942 				if (!isprint(c))
2943 					c = '.';
2944 
2945 				kprintf("%c", c);
2946 			}
2947 		}
2948 
2949 		kprintf("\"\n");
2950 	} else {
2951 		// number mode
2952 		for (i = 0; i < num; i++) {
2953 			uint32 value;
2954 
2955 			if ((i % displayWidth) == 0) {
2956 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2957 				if (i != 0)
2958 					kprintf("\n");
2959 
2960 				kprintf("[0x%lx]  ", address + i * itemSize);
2961 
2962 				for (j = 0; j < displayed; j++) {
2963 					char c;
2964 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2965 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2966 						displayed = j;
2967 						break;
2968 					}
2969 					if (!isprint(c))
2970 						c = '.';
2971 
2972 					kprintf("%c", c);
2973 				}
2974 				if (num > displayWidth) {
2975 					// make sure the spacing in the last line is correct
2976 					for (j = displayed; j < displayWidth * itemSize; j++)
2977 						kprintf(" ");
2978 				}
2979 				kprintf("  ");
2980 			}
2981 
2982 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2983 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2984 				kprintf("read fault");
2985 				break;
2986 			}
2987 
2988 			switch (itemSize) {
2989 				case 1:
2990 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2991 					break;
2992 				case 2:
2993 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2994 					break;
2995 				case 4:
2996 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2997 					break;
2998 				case 8:
2999 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3000 					break;
3001 			}
3002 		}
3003 
3004 		kprintf("\n");
3005 	}
3006 
3007 	if (physical) {
3008 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3009 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3010 	}
3011 	return 0;
3012 }
3013 
3014 
3015 static void
3016 dump_cache_tree_recursively(VMCache* cache, int level,
3017 	VMCache* highlightCache)
3018 {
3019 	// print this cache
3020 	for (int i = 0; i < level; i++)
3021 		kprintf("  ");
3022 	if (cache == highlightCache)
3023 		kprintf("%p <--\n", cache);
3024 	else
3025 		kprintf("%p\n", cache);
3026 
3027 	// recursively print its consumers
3028 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3029 			VMCache* consumer = it.Next();) {
3030 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3031 	}
3032 }
3033 
3034 
3035 static int
3036 dump_cache_tree(int argc, char** argv)
3037 {
3038 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3039 		kprintf("usage: %s <address>\n", argv[0]);
3040 		return 0;
3041 	}
3042 
3043 	addr_t address = parse_expression(argv[1]);
3044 	if (address == 0)
3045 		return 0;
3046 
3047 	VMCache* cache = (VMCache*)address;
3048 	VMCache* root = cache;
3049 
3050 	// find the root cache (the transitive source)
3051 	while (root->source != NULL)
3052 		root = root->source;
3053 
3054 	dump_cache_tree_recursively(root, 0, cache);
3055 
3056 	return 0;
3057 }
3058 
3059 
3060 const char*
3061 vm_cache_type_to_string(int32 type)
3062 {
3063 	switch (type) {
3064 		case CACHE_TYPE_RAM:
3065 			return "RAM";
3066 		case CACHE_TYPE_DEVICE:
3067 			return "device";
3068 		case CACHE_TYPE_VNODE:
3069 			return "vnode";
3070 		case CACHE_TYPE_NULL:
3071 			return "null";
3072 
3073 		default:
3074 			return "unknown";
3075 	}
3076 }
3077 
3078 
3079 #if DEBUG_CACHE_LIST
3080 
3081 static void
3082 update_cache_info_recursively(VMCache* cache, cache_info& info)
3083 {
3084 	info.page_count += cache->page_count;
3085 	if (cache->type == CACHE_TYPE_RAM)
3086 		info.committed += cache->committed_size;
3087 
3088 	// recurse
3089 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3090 			VMCache* consumer = it.Next();) {
3091 		update_cache_info_recursively(consumer, info);
3092 	}
3093 }
3094 
3095 
3096 static int
3097 cache_info_compare_page_count(const void* _a, const void* _b)
3098 {
3099 	const cache_info* a = (const cache_info*)_a;
3100 	const cache_info* b = (const cache_info*)_b;
3101 	if (a->page_count == b->page_count)
3102 		return 0;
3103 	return a->page_count < b->page_count ? 1 : -1;
3104 }
3105 
3106 
3107 static int
3108 cache_info_compare_committed(const void* _a, const void* _b)
3109 {
3110 	const cache_info* a = (const cache_info*)_a;
3111 	const cache_info* b = (const cache_info*)_b;
3112 	if (a->committed == b->committed)
3113 		return 0;
3114 	return a->committed < b->committed ? 1 : -1;
3115 }
3116 
3117 
3118 static void
3119 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3120 {
3121 	for (int i = 0; i < level; i++)
3122 		kprintf("  ");
3123 
3124 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3125 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3126 		cache->virtual_base, cache->virtual_end, cache->page_count);
3127 
3128 	if (level == 0)
3129 		kprintf("/%lu", info.page_count);
3130 
3131 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3132 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3133 
3134 		if (level == 0)
3135 			kprintf("/%lu", info.committed);
3136 	}
3137 
3138 	// areas
3139 	if (cache->areas != NULL) {
3140 		VMArea* area = cache->areas;
3141 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3142 			area->name, area->address_space->ID());
3143 
3144 		while (area->cache_next != NULL) {
3145 			area = area->cache_next;
3146 			kprintf(", %" B_PRId32, area->id);
3147 		}
3148 	}
3149 
3150 	kputs("\n");
3151 
3152 	// recurse
3153 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3154 			VMCache* consumer = it.Next();) {
3155 		dump_caches_recursively(consumer, info, level + 1);
3156 	}
3157 }
3158 
3159 
3160 static int
3161 dump_caches(int argc, char** argv)
3162 {
3163 	if (sCacheInfoTable == NULL) {
3164 		kprintf("No cache info table!\n");
3165 		return 0;
3166 	}
3167 
3168 	bool sortByPageCount = true;
3169 
3170 	for (int32 i = 1; i < argc; i++) {
3171 		if (strcmp(argv[i], "-c") == 0) {
3172 			sortByPageCount = false;
3173 		} else {
3174 			print_debugger_command_usage(argv[0]);
3175 			return 0;
3176 		}
3177 	}
3178 
3179 	uint32 totalCount = 0;
3180 	uint32 rootCount = 0;
3181 	off_t totalCommitted = 0;
3182 	page_num_t totalPages = 0;
3183 
3184 	VMCache* cache = gDebugCacheList;
3185 	while (cache) {
3186 		totalCount++;
3187 		if (cache->source == NULL) {
3188 			cache_info stackInfo;
3189 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3190 				? sCacheInfoTable[rootCount] : stackInfo;
3191 			rootCount++;
3192 			info.cache = cache;
3193 			info.page_count = 0;
3194 			info.committed = 0;
3195 			update_cache_info_recursively(cache, info);
3196 			totalCommitted += info.committed;
3197 			totalPages += info.page_count;
3198 		}
3199 
3200 		cache = cache->debug_next;
3201 	}
3202 
3203 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3204 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3205 			sortByPageCount
3206 				? &cache_info_compare_page_count
3207 				: &cache_info_compare_committed);
3208 	}
3209 
3210 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3211 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3212 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3213 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3214 			"page count" : "committed size");
3215 
3216 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3217 		for (uint32 i = 0; i < rootCount; i++) {
3218 			cache_info& info = sCacheInfoTable[i];
3219 			dump_caches_recursively(info.cache, info, 0);
3220 		}
3221 	} else
3222 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3223 
3224 	return 0;
3225 }
3226 
3227 #endif	// DEBUG_CACHE_LIST
3228 
3229 
3230 static int
3231 dump_cache(int argc, char** argv)
3232 {
3233 	VMCache* cache;
3234 	bool showPages = false;
3235 	int i = 1;
3236 
3237 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3238 		kprintf("usage: %s [-ps] <address>\n"
3239 			"  if -p is specified, all pages are shown, if -s is used\n"
3240 			"  only the cache info is shown respectively.\n", argv[0]);
3241 		return 0;
3242 	}
3243 	while (argv[i][0] == '-') {
3244 		char* arg = argv[i] + 1;
3245 		while (arg[0]) {
3246 			if (arg[0] == 'p')
3247 				showPages = true;
3248 			arg++;
3249 		}
3250 		i++;
3251 	}
3252 	if (argv[i] == NULL) {
3253 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3254 		return 0;
3255 	}
3256 
3257 	addr_t address = parse_expression(argv[i]);
3258 	if (address == 0)
3259 		return 0;
3260 
3261 	cache = (VMCache*)address;
3262 
3263 	cache->Dump(showPages);
3264 
3265 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3266 
3267 	return 0;
3268 }
3269 
3270 
3271 static void
3272 dump_area_struct(VMArea* area, bool mappings)
3273 {
3274 	kprintf("AREA: %p\n", area);
3275 	kprintf("name:\t\t'%s'\n", area->name);
3276 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3277 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3278 	kprintf("base:\t\t0x%lx\n", area->Base());
3279 	kprintf("size:\t\t0x%lx\n", area->Size());
3280 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3281 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3282 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3283 	kprintf("cache:\t\t%p\n", area->cache);
3284 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3285 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3286 	kprintf("cache_next:\t%p\n", area->cache_next);
3287 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3288 
3289 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3290 	if (mappings) {
3291 		kprintf("page mappings:\n");
3292 		while (iterator.HasNext()) {
3293 			vm_page_mapping* mapping = iterator.Next();
3294 			kprintf("  %p", mapping->page);
3295 		}
3296 		kprintf("\n");
3297 	} else {
3298 		uint32 count = 0;
3299 		while (iterator.Next() != NULL) {
3300 			count++;
3301 		}
3302 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3303 	}
3304 }
3305 
3306 
3307 static int
3308 dump_area(int argc, char** argv)
3309 {
3310 	bool mappings = false;
3311 	bool found = false;
3312 	int32 index = 1;
3313 	VMArea* area;
3314 	addr_t num;
3315 
3316 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3317 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3318 			"All areas matching either id/address/name are listed. You can\n"
3319 			"force to check only a specific item by prefixing the specifier\n"
3320 			"with the id/contains/address/name keywords.\n"
3321 			"-m shows the area's mappings as well.\n");
3322 		return 0;
3323 	}
3324 
3325 	if (!strcmp(argv[1], "-m")) {
3326 		mappings = true;
3327 		index++;
3328 	}
3329 
3330 	int32 mode = 0xf;
3331 	if (!strcmp(argv[index], "id"))
3332 		mode = 1;
3333 	else if (!strcmp(argv[index], "contains"))
3334 		mode = 2;
3335 	else if (!strcmp(argv[index], "name"))
3336 		mode = 4;
3337 	else if (!strcmp(argv[index], "address"))
3338 		mode = 0;
3339 	if (mode != 0xf)
3340 		index++;
3341 
3342 	if (index >= argc) {
3343 		kprintf("No area specifier given.\n");
3344 		return 0;
3345 	}
3346 
3347 	num = parse_expression(argv[index]);
3348 
3349 	if (mode == 0) {
3350 		dump_area_struct((struct VMArea*)num, mappings);
3351 	} else {
3352 		// walk through the area list, looking for the arguments as a name
3353 
3354 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3355 		while ((area = it.Next()) != NULL) {
3356 			if (((mode & 4) != 0 && area->name != NULL
3357 					&& !strcmp(argv[index], area->name))
3358 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3359 					|| (((mode & 2) != 0 && area->Base() <= num
3360 						&& area->Base() + area->Size() > num))))) {
3361 				dump_area_struct(area, mappings);
3362 				found = true;
3363 			}
3364 		}
3365 
3366 		if (!found)
3367 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3368 	}
3369 
3370 	return 0;
3371 }
3372 
3373 
3374 static int
3375 dump_area_list(int argc, char** argv)
3376 {
3377 	VMArea* area;
3378 	const char* name = NULL;
3379 	int32 id = 0;
3380 
3381 	if (argc > 1) {
3382 		id = parse_expression(argv[1]);
3383 		if (id == 0)
3384 			name = argv[1];
3385 	}
3386 
3387 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3388 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3389 		B_PRINTF_POINTER_WIDTH, "size");
3390 
3391 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3392 	while ((area = it.Next()) != NULL) {
3393 		if ((id != 0 && area->address_space->ID() != id)
3394 			|| (name != NULL && strstr(area->name, name) == NULL))
3395 			continue;
3396 
3397 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3398 			area->id, (void*)area->Base(), (void*)area->Size(),
3399 			area->protection, area->wiring, area->name);
3400 	}
3401 	return 0;
3402 }
3403 
3404 
3405 static int
3406 dump_available_memory(int argc, char** argv)
3407 {
3408 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3409 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3410 	return 0;
3411 }
3412 
3413 
3414 static int
3415 dump_mapping_info(int argc, char** argv)
3416 {
3417 	bool reverseLookup = false;
3418 	bool pageLookup = false;
3419 
3420 	int argi = 1;
3421 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3422 		const char* arg = argv[argi];
3423 		if (strcmp(arg, "-r") == 0) {
3424 			reverseLookup = true;
3425 		} else if (strcmp(arg, "-p") == 0) {
3426 			reverseLookup = true;
3427 			pageLookup = true;
3428 		} else {
3429 			print_debugger_command_usage(argv[0]);
3430 			return 0;
3431 		}
3432 	}
3433 
3434 	// We need at least one argument, the address. Optionally a thread ID can be
3435 	// specified.
3436 	if (argi >= argc || argi + 2 < argc) {
3437 		print_debugger_command_usage(argv[0]);
3438 		return 0;
3439 	}
3440 
3441 	uint64 addressValue;
3442 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3443 		return 0;
3444 
3445 	Team* team = NULL;
3446 	if (argi < argc) {
3447 		uint64 threadID;
3448 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3449 			return 0;
3450 
3451 		Thread* thread = Thread::GetDebug(threadID);
3452 		if (thread == NULL) {
3453 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3454 			return 0;
3455 		}
3456 
3457 		team = thread->team;
3458 	}
3459 
3460 	if (reverseLookup) {
3461 		phys_addr_t physicalAddress;
3462 		if (pageLookup) {
3463 			vm_page* page = (vm_page*)(addr_t)addressValue;
3464 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3465 		} else {
3466 			physicalAddress = (phys_addr_t)addressValue;
3467 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3468 		}
3469 
3470 		kprintf("    Team     Virtual Address      Area\n");
3471 		kprintf("--------------------------------------\n");
3472 
3473 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3474 			Callback()
3475 				:
3476 				fAddressSpace(NULL)
3477 			{
3478 			}
3479 
3480 			void SetAddressSpace(VMAddressSpace* addressSpace)
3481 			{
3482 				fAddressSpace = addressSpace;
3483 			}
3484 
3485 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3486 			{
3487 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3488 					virtualAddress);
3489 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3490 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3491 				else
3492 					kprintf("\n");
3493 				return false;
3494 			}
3495 
3496 		private:
3497 			VMAddressSpace*	fAddressSpace;
3498 		} callback;
3499 
3500 		if (team != NULL) {
3501 			// team specified -- get its address space
3502 			VMAddressSpace* addressSpace = team->address_space;
3503 			if (addressSpace == NULL) {
3504 				kprintf("Failed to get address space!\n");
3505 				return 0;
3506 			}
3507 
3508 			callback.SetAddressSpace(addressSpace);
3509 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3510 				physicalAddress, callback);
3511 		} else {
3512 			// no team specified -- iterate through all address spaces
3513 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3514 				addressSpace != NULL;
3515 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3516 				callback.SetAddressSpace(addressSpace);
3517 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3518 					physicalAddress, callback);
3519 			}
3520 		}
3521 	} else {
3522 		// get the address space
3523 		addr_t virtualAddress = (addr_t)addressValue;
3524 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3525 		VMAddressSpace* addressSpace;
3526 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3527 			addressSpace = VMAddressSpace::Kernel();
3528 		} else if (team != NULL) {
3529 			addressSpace = team->address_space;
3530 		} else {
3531 			Thread* thread = debug_get_debugged_thread();
3532 			if (thread == NULL || thread->team == NULL) {
3533 				kprintf("Failed to get team!\n");
3534 				return 0;
3535 			}
3536 
3537 			addressSpace = thread->team->address_space;
3538 		}
3539 
3540 		if (addressSpace == NULL) {
3541 			kprintf("Failed to get address space!\n");
3542 			return 0;
3543 		}
3544 
3545 		// let the translation map implementation do the job
3546 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3547 	}
3548 
3549 	return 0;
3550 }
3551 
3552 
3553 /*!	Deletes all areas and reserved regions in the given address space.
3554 
3555 	The caller must ensure that none of the areas has any wired ranges.
3556 
3557 	\param addressSpace The address space.
3558 	\param deletingAddressSpace \c true, if the address space is in the process
3559 		of being deleted.
3560 */
3561 void
3562 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3563 {
3564 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3565 		addressSpace->ID()));
3566 
3567 	addressSpace->WriteLock();
3568 
3569 	// remove all reserved areas in this address space
3570 	addressSpace->UnreserveAllAddressRanges(0);
3571 
3572 	// delete all the areas in this address space
3573 	while (VMArea* area = addressSpace->FirstArea()) {
3574 		ASSERT(!area->IsWired());
3575 		delete_area(addressSpace, area, deletingAddressSpace);
3576 	}
3577 
3578 	addressSpace->WriteUnlock();
3579 }
3580 
3581 
3582 static area_id
3583 vm_area_for(addr_t address, bool kernel)
3584 {
3585 	team_id team;
3586 	if (IS_USER_ADDRESS(address)) {
3587 		// we try the user team address space, if any
3588 		team = VMAddressSpace::CurrentID();
3589 		if (team < 0)
3590 			return team;
3591 	} else
3592 		team = VMAddressSpace::KernelID();
3593 
3594 	AddressSpaceReadLocker locker(team);
3595 	if (!locker.IsLocked())
3596 		return B_BAD_TEAM_ID;
3597 
3598 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3599 	if (area != NULL) {
3600 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3601 			return B_ERROR;
3602 
3603 		return area->id;
3604 	}
3605 
3606 	return B_ERROR;
3607 }
3608 
3609 
3610 /*!	Frees physical pages that were used during the boot process.
3611 	\a end is inclusive.
3612 */
3613 static void
3614 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3615 {
3616 	// free all physical pages in the specified range
3617 
3618 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3619 		phys_addr_t physicalAddress;
3620 		uint32 flags;
3621 
3622 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3623 			&& (flags & PAGE_PRESENT) != 0) {
3624 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3625 			if (page != NULL && page->State() != PAGE_STATE_FREE
3626 					 && page->State() != PAGE_STATE_CLEAR
3627 					 && page->State() != PAGE_STATE_UNUSED) {
3628 				DEBUG_PAGE_ACCESS_START(page);
3629 				vm_page_set_state(page, PAGE_STATE_FREE);
3630 			}
3631 		}
3632 	}
3633 
3634 	// unmap the memory
3635 	map->Unmap(start, end);
3636 }
3637 
3638 
3639 void
3640 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3641 {
3642 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3643 	addr_t end = start + (size - 1);
3644 	addr_t lastEnd = start;
3645 
3646 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3647 		(void*)start, (void*)end));
3648 
3649 	// The areas are sorted in virtual address space order, so
3650 	// we just have to find the holes between them that fall
3651 	// into the area we should dispose
3652 
3653 	map->Lock();
3654 
3655 	for (VMAddressSpace::AreaIterator it
3656 				= VMAddressSpace::Kernel()->GetAreaIterator();
3657 			VMArea* area = it.Next();) {
3658 		addr_t areaStart = area->Base();
3659 		addr_t areaEnd = areaStart + (area->Size() - 1);
3660 
3661 		if (areaEnd < start)
3662 			continue;
3663 
3664 		if (areaStart > end) {
3665 			// we are done, the area is already beyond of what we have to free
3666 			break;
3667 		}
3668 
3669 		if (areaStart > lastEnd) {
3670 			// this is something we can free
3671 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3672 				(void*)areaStart));
3673 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3674 		}
3675 
3676 		if (areaEnd >= end) {
3677 			lastEnd = areaEnd;
3678 				// no +1 to prevent potential overflow
3679 			break;
3680 		}
3681 
3682 		lastEnd = areaEnd + 1;
3683 	}
3684 
3685 	if (lastEnd < end) {
3686 		// we can also get rid of some space at the end of the area
3687 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3688 			(void*)end));
3689 		unmap_and_free_physical_pages(map, lastEnd, end);
3690 	}
3691 
3692 	map->Unlock();
3693 }
3694 
3695 
3696 static void
3697 create_preloaded_image_areas(struct preloaded_image* _image)
3698 {
3699 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3700 	char name[B_OS_NAME_LENGTH];
3701 	void* address;
3702 	int32 length;
3703 
3704 	// use file name to create a good area name
3705 	char* fileName = strrchr(image->name, '/');
3706 	if (fileName == NULL)
3707 		fileName = image->name;
3708 	else
3709 		fileName++;
3710 
3711 	length = strlen(fileName);
3712 	// make sure there is enough space for the suffix
3713 	if (length > 25)
3714 		length = 25;
3715 
3716 	memcpy(name, fileName, length);
3717 	strcpy(name + length, "_text");
3718 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3719 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3720 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3721 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3722 		// this will later be remapped read-only/executable by the
3723 		// ELF initialization code
3724 
3725 	strcpy(name + length, "_data");
3726 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3727 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3728 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3729 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3730 }
3731 
3732 
3733 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3734 	Any boot loader resources contained in that arguments must not be accessed
3735 	anymore past this point.
3736 */
3737 void
3738 vm_free_kernel_args(kernel_args* args)
3739 {
3740 	uint32 i;
3741 
3742 	TRACE(("vm_free_kernel_args()\n"));
3743 
3744 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3745 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3746 		if (area >= B_OK)
3747 			delete_area(area);
3748 	}
3749 }
3750 
3751 
3752 static void
3753 allocate_kernel_args(kernel_args* args)
3754 {
3755 	TRACE(("allocate_kernel_args()\n"));
3756 
3757 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3758 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3759 
3760 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3761 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3762 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3763 	}
3764 }
3765 
3766 
3767 static void
3768 unreserve_boot_loader_ranges(kernel_args* args)
3769 {
3770 	TRACE(("unreserve_boot_loader_ranges()\n"));
3771 
3772 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3773 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3774 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3775 			args->virtual_allocated_range[i].size);
3776 	}
3777 }
3778 
3779 
3780 static void
3781 reserve_boot_loader_ranges(kernel_args* args)
3782 {
3783 	TRACE(("reserve_boot_loader_ranges()\n"));
3784 
3785 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3786 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3787 
3788 		// If the address is no kernel address, we just skip it. The
3789 		// architecture specific code has to deal with it.
3790 		if (!IS_KERNEL_ADDRESS(address)) {
3791 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3792 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3793 			continue;
3794 		}
3795 
3796 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3797 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3798 		if (status < B_OK)
3799 			panic("could not reserve boot loader ranges\n");
3800 	}
3801 }
3802 
3803 
3804 static addr_t
3805 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3806 {
3807 	size = PAGE_ALIGN(size);
3808 
3809 	// find a slot in the virtual allocation addr range
3810 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3811 		// check to see if the space between this one and the last is big enough
3812 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3813 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3814 			+ args->virtual_allocated_range[i - 1].size;
3815 
3816 		addr_t base = alignment > 0
3817 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3818 
3819 		if (base >= KERNEL_BASE && base < rangeStart
3820 				&& rangeStart - base >= size) {
3821 			args->virtual_allocated_range[i - 1].size
3822 				+= base + size - previousRangeEnd;
3823 			return base;
3824 		}
3825 	}
3826 
3827 	// we hadn't found one between allocation ranges. this is ok.
3828 	// see if there's a gap after the last one
3829 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3830 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3831 		+ args->virtual_allocated_range[lastEntryIndex].size;
3832 	addr_t base = alignment > 0
3833 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3834 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3835 		args->virtual_allocated_range[lastEntryIndex].size
3836 			+= base + size - lastRangeEnd;
3837 		return base;
3838 	}
3839 
3840 	// see if there's a gap before the first one
3841 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3842 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3843 		base = rangeStart - size;
3844 		if (alignment > 0)
3845 			base = ROUNDDOWN(base, alignment);
3846 
3847 		if (base >= KERNEL_BASE) {
3848 			args->virtual_allocated_range[0].start = base;
3849 			args->virtual_allocated_range[0].size += rangeStart - base;
3850 			return base;
3851 		}
3852 	}
3853 
3854 	return 0;
3855 }
3856 
3857 
3858 static bool
3859 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3860 {
3861 	// TODO: horrible brute-force method of determining if the page can be
3862 	// allocated
3863 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3864 		if (address >= args->physical_memory_range[i].start
3865 			&& address < args->physical_memory_range[i].start
3866 				+ args->physical_memory_range[i].size)
3867 			return true;
3868 	}
3869 	return false;
3870 }
3871 
3872 
3873 page_num_t
3874 vm_allocate_early_physical_page(kernel_args* args)
3875 {
3876 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3877 		phys_addr_t nextPage;
3878 
3879 		nextPage = args->physical_allocated_range[i].start
3880 			+ args->physical_allocated_range[i].size;
3881 		// see if the page after the next allocated paddr run can be allocated
3882 		if (i + 1 < args->num_physical_allocated_ranges
3883 			&& args->physical_allocated_range[i + 1].size != 0) {
3884 			// see if the next page will collide with the next allocated range
3885 			if (nextPage >= args->physical_allocated_range[i+1].start)
3886 				continue;
3887 		}
3888 		// see if the next physical page fits in the memory block
3889 		if (is_page_in_physical_memory_range(args, nextPage)) {
3890 			// we got one!
3891 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3892 			return nextPage / B_PAGE_SIZE;
3893 		}
3894 	}
3895 
3896 	// Expanding upwards didn't work, try going downwards.
3897 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3898 		phys_addr_t nextPage;
3899 
3900 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3901 		// see if the page after the prev allocated paddr run can be allocated
3902 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3903 			// see if the next page will collide with the next allocated range
3904 			if (nextPage < args->physical_allocated_range[i-1].start
3905 				+ args->physical_allocated_range[i-1].size)
3906 				continue;
3907 		}
3908 		// see if the next physical page fits in the memory block
3909 		if (is_page_in_physical_memory_range(args, nextPage)) {
3910 			// we got one!
3911 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3912 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3913 			return nextPage / B_PAGE_SIZE;
3914 		}
3915 	}
3916 
3917 	return 0;
3918 		// could not allocate a block
3919 }
3920 
3921 
3922 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3923 	allocate some pages before the VM is completely up.
3924 */
3925 addr_t
3926 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3927 	uint32 attributes, addr_t alignment)
3928 {
3929 	if (physicalSize > virtualSize)
3930 		physicalSize = virtualSize;
3931 
3932 	// find the vaddr to allocate at
3933 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3934 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3935 	if (virtualBase == 0) {
3936 		panic("vm_allocate_early: could not allocate virtual address\n");
3937 		return 0;
3938 	}
3939 
3940 	// map the pages
3941 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3942 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3943 		if (physicalAddress == 0)
3944 			panic("error allocating early page!\n");
3945 
3946 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3947 
3948 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3949 			physicalAddress * B_PAGE_SIZE, attributes,
3950 			&vm_allocate_early_physical_page);
3951 	}
3952 
3953 	return virtualBase;
3954 }
3955 
3956 
3957 /*!	The main entrance point to initialize the VM. */
3958 status_t
3959 vm_init(kernel_args* args)
3960 {
3961 	struct preloaded_image* image;
3962 	void* address;
3963 	status_t err = 0;
3964 	uint32 i;
3965 
3966 	TRACE(("vm_init: entry\n"));
3967 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3968 	err = arch_vm_init(args);
3969 
3970 	// initialize some globals
3971 	vm_page_init_num_pages(args);
3972 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3973 
3974 	slab_init(args);
3975 
3976 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3977 	off_t heapSize = INITIAL_HEAP_SIZE;
3978 	// try to accomodate low memory systems
3979 	while (heapSize > sAvailableMemory / 8)
3980 		heapSize /= 2;
3981 	if (heapSize < 1024 * 1024)
3982 		panic("vm_init: go buy some RAM please.");
3983 
3984 	// map in the new heap and initialize it
3985 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3986 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3987 	TRACE(("heap at 0x%lx\n", heapBase));
3988 	heap_init(heapBase, heapSize);
3989 #endif
3990 
3991 	// initialize the free page list and physical page mapper
3992 	vm_page_init(args);
3993 
3994 	// initialize the cache allocators
3995 	vm_cache_init(args);
3996 
3997 	{
3998 		status_t error = VMAreaHash::Init();
3999 		if (error != B_OK)
4000 			panic("vm_init: error initializing area hash table\n");
4001 	}
4002 
4003 	VMAddressSpace::Init();
4004 	reserve_boot_loader_ranges(args);
4005 
4006 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4007 	heap_init_post_area();
4008 #endif
4009 
4010 	// Do any further initialization that the architecture dependant layers may
4011 	// need now
4012 	arch_vm_translation_map_init_post_area(args);
4013 	arch_vm_init_post_area(args);
4014 	vm_page_init_post_area(args);
4015 	slab_init_post_area();
4016 
4017 	// allocate areas to represent stuff that already exists
4018 
4019 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4020 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4021 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4022 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4023 #endif
4024 
4025 	allocate_kernel_args(args);
4026 
4027 	create_preloaded_image_areas(args->kernel_image);
4028 
4029 	// allocate areas for preloaded images
4030 	for (image = args->preloaded_images; image != NULL; image = image->next)
4031 		create_preloaded_image_areas(image);
4032 
4033 	// allocate kernel stacks
4034 	for (i = 0; i < args->num_cpus; i++) {
4035 		char name[64];
4036 
4037 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4038 		address = (void*)args->cpu_kstack[i].start;
4039 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4040 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4041 	}
4042 
4043 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4044 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4045 
4046 #if PARANOID_KERNEL_MALLOC
4047 	vm_block_address_range("uninitialized heap memory",
4048 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4049 #endif
4050 #if PARANOID_KERNEL_FREE
4051 	vm_block_address_range("freed heap memory",
4052 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4053 #endif
4054 
4055 	// create the object cache for the page mappings
4056 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4057 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4058 		NULL, NULL);
4059 	if (gPageMappingsObjectCache == NULL)
4060 		panic("failed to create page mappings object cache");
4061 
4062 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4063 
4064 #if DEBUG_CACHE_LIST
4065 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4066 		virtual_address_restrictions virtualRestrictions = {};
4067 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4068 		physical_address_restrictions physicalRestrictions = {};
4069 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4070 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4071 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4072 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4073 			&physicalRestrictions, (void**)&sCacheInfoTable);
4074 	}
4075 #endif	// DEBUG_CACHE_LIST
4076 
4077 	// add some debugger commands
4078 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4079 	add_debugger_command("area", &dump_area,
4080 		"Dump info about a particular area");
4081 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4082 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4083 #if DEBUG_CACHE_LIST
4084 	if (sCacheInfoTable != NULL) {
4085 		add_debugger_command_etc("caches", &dump_caches,
4086 			"List all VMCache trees",
4087 			"[ \"-c\" ]\n"
4088 			"All cache trees are listed sorted in decreasing order by number "
4089 				"of\n"
4090 			"used pages or, if \"-c\" is specified, by size of committed "
4091 				"memory.\n",
4092 			0);
4093 	}
4094 #endif
4095 	add_debugger_command("avail", &dump_available_memory,
4096 		"Dump available memory");
4097 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4098 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4099 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4100 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4101 	add_debugger_command("string", &display_mem, "dump strings");
4102 
4103 	add_debugger_command_etc("mapping", &dump_mapping_info,
4104 		"Print address mapping information",
4105 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4106 		"Prints low-level page mapping information for a given address. If\n"
4107 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4108 		"address that is looked up in the translation map of the current\n"
4109 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4110 		"\"-r\" is specified, <address> is a physical address that is\n"
4111 		"searched in the translation map of all teams, respectively the team\n"
4112 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4113 		"<address> is the address of a vm_page structure. The behavior is\n"
4114 		"equivalent to specifying \"-r\" with the physical address of that\n"
4115 		"page.\n",
4116 		0);
4117 
4118 	TRACE(("vm_init: exit\n"));
4119 
4120 	vm_cache_init_post_heap();
4121 
4122 	return err;
4123 }
4124 
4125 
4126 status_t
4127 vm_init_post_sem(kernel_args* args)
4128 {
4129 	// This frees all unused boot loader resources and makes its space available
4130 	// again
4131 	arch_vm_init_end(args);
4132 	unreserve_boot_loader_ranges(args);
4133 
4134 	// fill in all of the semaphores that were not allocated before
4135 	// since we're still single threaded and only the kernel address space
4136 	// exists, it isn't that hard to find all of the ones we need to create
4137 
4138 	arch_vm_translation_map_init_post_sem(args);
4139 
4140 	slab_init_post_sem();
4141 
4142 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4143 	heap_init_post_sem();
4144 #endif
4145 
4146 	return B_OK;
4147 }
4148 
4149 
4150 status_t
4151 vm_init_post_thread(kernel_args* args)
4152 {
4153 	vm_page_init_post_thread(args);
4154 	slab_init_post_thread();
4155 	return heap_init_post_thread();
4156 }
4157 
4158 
4159 status_t
4160 vm_init_post_modules(kernel_args* args)
4161 {
4162 	return arch_vm_init_post_modules(args);
4163 }
4164 
4165 
4166 void
4167 permit_page_faults(void)
4168 {
4169 	Thread* thread = thread_get_current_thread();
4170 	if (thread != NULL)
4171 		atomic_add(&thread->page_faults_allowed, 1);
4172 }
4173 
4174 
4175 void
4176 forbid_page_faults(void)
4177 {
4178 	Thread* thread = thread_get_current_thread();
4179 	if (thread != NULL)
4180 		atomic_add(&thread->page_faults_allowed, -1);
4181 }
4182 
4183 
4184 status_t
4185 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4186 	bool isUser, addr_t* newIP)
4187 {
4188 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4189 		faultAddress));
4190 
4191 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4192 
4193 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4194 	VMAddressSpace* addressSpace = NULL;
4195 
4196 	status_t status = B_OK;
4197 	*newIP = 0;
4198 	atomic_add((int32*)&sPageFaults, 1);
4199 
4200 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4201 		addressSpace = VMAddressSpace::GetKernel();
4202 	} else if (IS_USER_ADDRESS(pageAddress)) {
4203 		addressSpace = VMAddressSpace::GetCurrent();
4204 		if (addressSpace == NULL) {
4205 			if (!isUser) {
4206 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4207 					"memory!\n");
4208 				status = B_BAD_ADDRESS;
4209 				TPF(PageFaultError(-1,
4210 					VMPageFaultTracing
4211 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4212 			} else {
4213 				// XXX weird state.
4214 				panic("vm_page_fault: non kernel thread accessing user memory "
4215 					"that doesn't exist!\n");
4216 				status = B_BAD_ADDRESS;
4217 			}
4218 		}
4219 	} else {
4220 		// the hit was probably in the 64k DMZ between kernel and user space
4221 		// this keeps a user space thread from passing a buffer that crosses
4222 		// into kernel space
4223 		status = B_BAD_ADDRESS;
4224 		TPF(PageFaultError(-1,
4225 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4226 	}
4227 
4228 	if (status == B_OK) {
4229 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4230 			isUser, NULL);
4231 	}
4232 
4233 	if (status < B_OK) {
4234 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4235 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4236 			strerror(status), address, faultAddress, isWrite, isUser,
4237 			thread_get_current_thread_id());
4238 		if (!isUser) {
4239 			Thread* thread = thread_get_current_thread();
4240 			if (thread != NULL && thread->fault_handler != 0) {
4241 				// this will cause the arch dependant page fault handler to
4242 				// modify the IP on the interrupt frame or whatever to return
4243 				// to this address
4244 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4245 			} else {
4246 				// unhandled page fault in the kernel
4247 				panic("vm_page_fault: unhandled page fault in kernel space at "
4248 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4249 			}
4250 		} else {
4251 #if 1
4252 			// TODO: remove me once we have proper userland debugging support
4253 			// (and tools)
4254 			VMArea* area = NULL;
4255 			if (addressSpace != NULL) {
4256 				addressSpace->ReadLock();
4257 				area = addressSpace->LookupArea(faultAddress);
4258 			}
4259 
4260 			Thread* thread = thread_get_current_thread();
4261 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4262 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4263 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4264 				thread->team->Name(), thread->team->id,
4265 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4266 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4267 					area->Base() : 0x0));
4268 
4269 			// We can print a stack trace of the userland thread here.
4270 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4271 // fault and someone is already waiting for a write lock on the same address
4272 // space. This thread will then try to acquire the lock again and will
4273 // be queued after the writer.
4274 #	if 0
4275 			if (area) {
4276 				struct stack_frame {
4277 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4278 						struct stack_frame*	previous;
4279 						void*				return_address;
4280 					#else
4281 						// ...
4282 					#warning writeme
4283 					#endif
4284 				} frame;
4285 #		ifdef __INTEL__
4286 				struct iframe* iframe = x86_get_user_iframe();
4287 				if (iframe == NULL)
4288 					panic("iframe is NULL!");
4289 
4290 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4291 					sizeof(struct stack_frame));
4292 #		elif defined(__POWERPC__)
4293 				struct iframe* iframe = ppc_get_user_iframe();
4294 				if (iframe == NULL)
4295 					panic("iframe is NULL!");
4296 
4297 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4298 					sizeof(struct stack_frame));
4299 #		else
4300 #			warning "vm_page_fault() stack trace won't work"
4301 				status = B_ERROR;
4302 #		endif
4303 
4304 				dprintf("stack trace:\n");
4305 				int32 maxFrames = 50;
4306 				while (status == B_OK && --maxFrames >= 0
4307 						&& frame.return_address != NULL) {
4308 					dprintf("  %p", frame.return_address);
4309 					area = addressSpace->LookupArea(
4310 						(addr_t)frame.return_address);
4311 					if (area) {
4312 						dprintf(" (%s + %#lx)", area->name,
4313 							(addr_t)frame.return_address - area->Base());
4314 					}
4315 					dprintf("\n");
4316 
4317 					status = user_memcpy(&frame, frame.previous,
4318 						sizeof(struct stack_frame));
4319 				}
4320 			}
4321 #	endif	// 0 (stack trace)
4322 
4323 			if (addressSpace != NULL)
4324 				addressSpace->ReadUnlock();
4325 #endif
4326 
4327 			// If the thread has a signal handler for SIGSEGV, we simply
4328 			// send it the signal. Otherwise we notify the user debugger
4329 			// first.
4330 			struct sigaction action;
4331 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4332 					&& action.sa_handler != SIG_DFL
4333 					&& action.sa_handler != SIG_IGN)
4334 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4335 					SIGSEGV)) {
4336 				Signal signal(SIGSEGV,
4337 					status == B_PERMISSION_DENIED
4338 						? SEGV_ACCERR : SEGV_MAPERR,
4339 					EFAULT, thread->team->id);
4340 				signal.SetAddress((void*)address);
4341 				send_signal_to_thread(thread, signal, 0);
4342 			}
4343 		}
4344 	}
4345 
4346 	if (addressSpace != NULL)
4347 		addressSpace->Put();
4348 
4349 	return B_HANDLED_INTERRUPT;
4350 }
4351 
4352 
4353 struct PageFaultContext {
4354 	AddressSpaceReadLocker	addressSpaceLocker;
4355 	VMCacheChainLocker		cacheChainLocker;
4356 
4357 	VMTranslationMap*		map;
4358 	VMCache*				topCache;
4359 	off_t					cacheOffset;
4360 	vm_page_reservation		reservation;
4361 	bool					isWrite;
4362 
4363 	// return values
4364 	vm_page*				page;
4365 	bool					restart;
4366 	bool					pageAllocated;
4367 
4368 
4369 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4370 		:
4371 		addressSpaceLocker(addressSpace, true),
4372 		map(addressSpace->TranslationMap()),
4373 		isWrite(isWrite)
4374 	{
4375 	}
4376 
4377 	~PageFaultContext()
4378 	{
4379 		UnlockAll();
4380 		vm_page_unreserve_pages(&reservation);
4381 	}
4382 
4383 	void Prepare(VMCache* topCache, off_t cacheOffset)
4384 	{
4385 		this->topCache = topCache;
4386 		this->cacheOffset = cacheOffset;
4387 		page = NULL;
4388 		restart = false;
4389 		pageAllocated = false;
4390 
4391 		cacheChainLocker.SetTo(topCache);
4392 	}
4393 
4394 	void UnlockAll(VMCache* exceptCache = NULL)
4395 	{
4396 		topCache = NULL;
4397 		addressSpaceLocker.Unlock();
4398 		cacheChainLocker.Unlock(exceptCache);
4399 	}
4400 };
4401 
4402 
4403 /*!	Gets the page that should be mapped into the area.
4404 	Returns an error code other than \c B_OK, if the page couldn't be found or
4405 	paged in. The locking state of the address space and the caches is undefined
4406 	in that case.
4407 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4408 	had to unlock the address space and all caches and is supposed to be called
4409 	again.
4410 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4411 	found. It is returned in \c context.page. The address space will still be
4412 	locked as well as all caches starting from the top cache to at least the
4413 	cache the page lives in.
4414 */
4415 static status_t
4416 fault_get_page(PageFaultContext& context)
4417 {
4418 	VMCache* cache = context.topCache;
4419 	VMCache* lastCache = NULL;
4420 	vm_page* page = NULL;
4421 
4422 	while (cache != NULL) {
4423 		// We already hold the lock of the cache at this point.
4424 
4425 		lastCache = cache;
4426 
4427 		page = cache->LookupPage(context.cacheOffset);
4428 		if (page != NULL && page->busy) {
4429 			// page must be busy -- wait for it to become unbusy
4430 			context.UnlockAll(cache);
4431 			cache->ReleaseRefLocked();
4432 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4433 
4434 			// restart the whole process
4435 			context.restart = true;
4436 			return B_OK;
4437 		}
4438 
4439 		if (page != NULL)
4440 			break;
4441 
4442 		// The current cache does not contain the page we're looking for.
4443 
4444 		// see if the backing store has it
4445 		if (cache->HasPage(context.cacheOffset)) {
4446 			// insert a fresh page and mark it busy -- we're going to read it in
4447 			page = vm_page_allocate_page(&context.reservation,
4448 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4449 			cache->InsertPage(page, context.cacheOffset);
4450 
4451 			// We need to unlock all caches and the address space while reading
4452 			// the page in. Keep a reference to the cache around.
4453 			cache->AcquireRefLocked();
4454 			context.UnlockAll();
4455 
4456 			// read the page in
4457 			generic_io_vec vec;
4458 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4459 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4460 
4461 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4462 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4463 
4464 			cache->Lock();
4465 
4466 			if (status < B_OK) {
4467 				// on error remove and free the page
4468 				dprintf("reading page from cache %p returned: %s!\n",
4469 					cache, strerror(status));
4470 
4471 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4472 				cache->RemovePage(page);
4473 				vm_page_set_state(page, PAGE_STATE_FREE);
4474 
4475 				cache->ReleaseRefAndUnlock();
4476 				return status;
4477 			}
4478 
4479 			// mark the page unbusy again
4480 			cache->MarkPageUnbusy(page);
4481 
4482 			DEBUG_PAGE_ACCESS_END(page);
4483 
4484 			// Since we needed to unlock everything temporarily, the area
4485 			// situation might have changed. So we need to restart the whole
4486 			// process.
4487 			cache->ReleaseRefAndUnlock();
4488 			context.restart = true;
4489 			return B_OK;
4490 		}
4491 
4492 		cache = context.cacheChainLocker.LockSourceCache();
4493 	}
4494 
4495 	if (page == NULL) {
4496 		// There was no adequate page, determine the cache for a clean one.
4497 		// Read-only pages come in the deepest cache, only the top most cache
4498 		// may have direct write access.
4499 		cache = context.isWrite ? context.topCache : lastCache;
4500 
4501 		// allocate a clean page
4502 		page = vm_page_allocate_page(&context.reservation,
4503 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4504 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4505 			page->physical_page_number));
4506 
4507 		// insert the new page into our cache
4508 		cache->InsertPage(page, context.cacheOffset);
4509 		context.pageAllocated = true;
4510 	} else if (page->Cache() != context.topCache && context.isWrite) {
4511 		// We have a page that has the data we want, but in the wrong cache
4512 		// object so we need to copy it and stick it into the top cache.
4513 		vm_page* sourcePage = page;
4514 
4515 		// TODO: If memory is low, it might be a good idea to steal the page
4516 		// from our source cache -- if possible, that is.
4517 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4518 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4519 
4520 		// To not needlessly kill concurrency we unlock all caches but the top
4521 		// one while copying the page. Lacking another mechanism to ensure that
4522 		// the source page doesn't disappear, we mark it busy.
4523 		sourcePage->busy = true;
4524 		context.cacheChainLocker.UnlockKeepRefs(true);
4525 
4526 		// copy the page
4527 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4528 			sourcePage->physical_page_number * B_PAGE_SIZE);
4529 
4530 		context.cacheChainLocker.RelockCaches(true);
4531 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4532 
4533 		// insert the new page into our cache
4534 		context.topCache->InsertPage(page, context.cacheOffset);
4535 		context.pageAllocated = true;
4536 	} else
4537 		DEBUG_PAGE_ACCESS_START(page);
4538 
4539 	context.page = page;
4540 	return B_OK;
4541 }
4542 
4543 
4544 /*!	Makes sure the address in the given address space is mapped.
4545 
4546 	\param addressSpace The address space.
4547 	\param originalAddress The address. Doesn't need to be page aligned.
4548 	\param isWrite If \c true the address shall be write-accessible.
4549 	\param isUser If \c true the access is requested by a userland team.
4550 	\param wirePage On success, if non \c NULL, the wired count of the page
4551 		mapped at the given address is incremented and the page is returned
4552 		via this parameter.
4553 	\return \c B_OK on success, another error code otherwise.
4554 */
4555 static status_t
4556 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4557 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4558 {
4559 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4560 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4561 		originalAddress, isWrite, isUser));
4562 
4563 	PageFaultContext context(addressSpace, isWrite);
4564 
4565 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4566 	status_t status = B_OK;
4567 
4568 	addressSpace->IncrementFaultCount();
4569 
4570 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4571 	// the pages upfront makes sure we don't have any cache locked, so that the
4572 	// page daemon/thief can do their job without problems.
4573 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4574 		originalAddress);
4575 	context.addressSpaceLocker.Unlock();
4576 	vm_page_reserve_pages(&context.reservation, reservePages,
4577 		addressSpace == VMAddressSpace::Kernel()
4578 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4579 
4580 	while (true) {
4581 		context.addressSpaceLocker.Lock();
4582 
4583 		// get the area the fault was in
4584 		VMArea* area = addressSpace->LookupArea(address);
4585 		if (area == NULL) {
4586 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4587 				"space\n", originalAddress);
4588 			TPF(PageFaultError(-1,
4589 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4590 			status = B_BAD_ADDRESS;
4591 			break;
4592 		}
4593 
4594 		// check permissions
4595 		uint32 protection = get_area_page_protection(area, address);
4596 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4597 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4598 				area->id, (void*)originalAddress);
4599 			TPF(PageFaultError(area->id,
4600 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4601 			status = B_PERMISSION_DENIED;
4602 			break;
4603 		}
4604 		if (isWrite && (protection
4605 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4606 			dprintf("write access attempted on write-protected area 0x%"
4607 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4608 			TPF(PageFaultError(area->id,
4609 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4610 			status = B_PERMISSION_DENIED;
4611 			break;
4612 		} else if (isExecute && (protection
4613 				& (B_EXECUTE_AREA
4614 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4615 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4616 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4617 			TPF(PageFaultError(area->id,
4618 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4619 			status = B_PERMISSION_DENIED;
4620 			break;
4621 		} else if (!isWrite && !isExecute && (protection
4622 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4623 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4624 				" at %p\n", area->id, (void*)originalAddress);
4625 			TPF(PageFaultError(area->id,
4626 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4627 			status = B_PERMISSION_DENIED;
4628 			break;
4629 		}
4630 
4631 		// We have the area, it was a valid access, so let's try to resolve the
4632 		// page fault now.
4633 		// At first, the top most cache from the area is investigated.
4634 
4635 		context.Prepare(vm_area_get_locked_cache(area),
4636 			address - area->Base() + area->cache_offset);
4637 
4638 		// See if this cache has a fault handler -- this will do all the work
4639 		// for us.
4640 		{
4641 			// Note, since the page fault is resolved with interrupts enabled,
4642 			// the fault handler could be called more than once for the same
4643 			// reason -- the store must take this into account.
4644 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4645 			if (status != B_BAD_HANDLER)
4646 				break;
4647 		}
4648 
4649 		// The top most cache has no fault handler, so let's see if the cache or
4650 		// its sources already have the page we're searching for (we're going
4651 		// from top to bottom).
4652 		status = fault_get_page(context);
4653 		if (status != B_OK) {
4654 			TPF(PageFaultError(area->id, status));
4655 			break;
4656 		}
4657 
4658 		if (context.restart)
4659 			continue;
4660 
4661 		// All went fine, all there is left to do is to map the page into the
4662 		// address space.
4663 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4664 			context.page));
4665 
4666 		// If the page doesn't reside in the area's cache, we need to make sure
4667 		// it's mapped in read-only, so that we cannot overwrite someone else's
4668 		// data (copy-on-write)
4669 		uint32 newProtection = protection;
4670 		if (context.page->Cache() != context.topCache && !isWrite)
4671 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4672 
4673 		bool unmapPage = false;
4674 		bool mapPage = true;
4675 
4676 		// check whether there's already a page mapped at the address
4677 		context.map->Lock();
4678 
4679 		phys_addr_t physicalAddress;
4680 		uint32 flags;
4681 		vm_page* mappedPage = NULL;
4682 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4683 			&& (flags & PAGE_PRESENT) != 0
4684 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4685 				!= NULL) {
4686 			// Yep there's already a page. If it's ours, we can simply adjust
4687 			// its protection. Otherwise we have to unmap it.
4688 			if (mappedPage == context.page) {
4689 				context.map->ProtectPage(area, address, newProtection);
4690 					// Note: We assume that ProtectPage() is atomic (i.e.
4691 					// the page isn't temporarily unmapped), otherwise we'd have
4692 					// to make sure it isn't wired.
4693 				mapPage = false;
4694 			} else
4695 				unmapPage = true;
4696 		}
4697 
4698 		context.map->Unlock();
4699 
4700 		if (unmapPage) {
4701 			// If the page is wired, we can't unmap it. Wait until it is unwired
4702 			// again and restart. Note that the page cannot be wired for
4703 			// writing, since it it isn't in the topmost cache. So we can safely
4704 			// ignore ranges wired for writing (our own and other concurrent
4705 			// wiring attempts in progress) and in fact have to do that to avoid
4706 			// a deadlock.
4707 			VMAreaUnwiredWaiter waiter;
4708 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4709 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4710 				// unlock everything and wait
4711 				if (context.pageAllocated) {
4712 					// ... but since we allocated a page and inserted it into
4713 					// the top cache, remove and free it first. Otherwise we'd
4714 					// have a page from a lower cache mapped while an upper
4715 					// cache has a page that would shadow it.
4716 					context.topCache->RemovePage(context.page);
4717 					vm_page_free_etc(context.topCache, context.page,
4718 						&context.reservation);
4719 				} else
4720 					DEBUG_PAGE_ACCESS_END(context.page);
4721 
4722 				context.UnlockAll();
4723 				waiter.waitEntry.Wait();
4724 				continue;
4725 			}
4726 
4727 			// Note: The mapped page is a page of a lower cache. We are
4728 			// guaranteed to have that cached locked, our new page is a copy of
4729 			// that page, and the page is not busy. The logic for that guarantee
4730 			// is as follows: Since the page is mapped, it must live in the top
4731 			// cache (ruled out above) or any of its lower caches, and there is
4732 			// (was before the new page was inserted) no other page in any
4733 			// cache between the top cache and the page's cache (otherwise that
4734 			// would be mapped instead). That in turn means that our algorithm
4735 			// must have found it and therefore it cannot be busy either.
4736 			DEBUG_PAGE_ACCESS_START(mappedPage);
4737 			unmap_page(area, address);
4738 			DEBUG_PAGE_ACCESS_END(mappedPage);
4739 		}
4740 
4741 		if (mapPage) {
4742 			if (map_page(area, context.page, address, newProtection,
4743 					&context.reservation) != B_OK) {
4744 				// Mapping can only fail, when the page mapping object couldn't
4745 				// be allocated. Save for the missing mapping everything is
4746 				// fine, though. If this was a regular page fault, we'll simply
4747 				// leave and probably fault again. To make sure we'll have more
4748 				// luck then, we ensure that the minimum object reserve is
4749 				// available.
4750 				DEBUG_PAGE_ACCESS_END(context.page);
4751 
4752 				context.UnlockAll();
4753 
4754 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4755 						!= B_OK) {
4756 					// Apparently the situation is serious. Let's get ourselves
4757 					// killed.
4758 					status = B_NO_MEMORY;
4759 				} else if (wirePage != NULL) {
4760 					// The caller expects us to wire the page. Since
4761 					// object_cache_reserve() succeeded, we should now be able
4762 					// to allocate a mapping structure. Restart.
4763 					continue;
4764 				}
4765 
4766 				break;
4767 			}
4768 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4769 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4770 
4771 		// also wire the page, if requested
4772 		if (wirePage != NULL && status == B_OK) {
4773 			increment_page_wired_count(context.page);
4774 			*wirePage = context.page;
4775 		}
4776 
4777 		DEBUG_PAGE_ACCESS_END(context.page);
4778 
4779 		break;
4780 	}
4781 
4782 	return status;
4783 }
4784 
4785 
4786 status_t
4787 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4788 {
4789 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4790 }
4791 
4792 status_t
4793 vm_put_physical_page(addr_t vaddr, void* handle)
4794 {
4795 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4796 }
4797 
4798 
4799 status_t
4800 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4801 	void** _handle)
4802 {
4803 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4804 }
4805 
4806 status_t
4807 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4808 {
4809 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4810 }
4811 
4812 
4813 status_t
4814 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4815 {
4816 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4817 }
4818 
4819 status_t
4820 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4821 {
4822 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4823 }
4824 
4825 
4826 void
4827 vm_get_info(system_info* info)
4828 {
4829 	swap_get_info(info);
4830 
4831 	MutexLocker locker(sAvailableMemoryLock);
4832 	info->needed_memory = sNeededMemory;
4833 	info->free_memory = sAvailableMemory;
4834 }
4835 
4836 
4837 uint32
4838 vm_num_page_faults(void)
4839 {
4840 	return sPageFaults;
4841 }
4842 
4843 
4844 off_t
4845 vm_available_memory(void)
4846 {
4847 	MutexLocker locker(sAvailableMemoryLock);
4848 	return sAvailableMemory;
4849 }
4850 
4851 
4852 off_t
4853 vm_available_not_needed_memory(void)
4854 {
4855 	MutexLocker locker(sAvailableMemoryLock);
4856 	return sAvailableMemory - sNeededMemory;
4857 }
4858 
4859 
4860 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4861 	debugger.
4862 */
4863 off_t
4864 vm_available_not_needed_memory_debug(void)
4865 {
4866 	return sAvailableMemory - sNeededMemory;
4867 }
4868 
4869 
4870 size_t
4871 vm_kernel_address_space_left(void)
4872 {
4873 	return VMAddressSpace::Kernel()->FreeSpace();
4874 }
4875 
4876 
4877 void
4878 vm_unreserve_memory(size_t amount)
4879 {
4880 	mutex_lock(&sAvailableMemoryLock);
4881 
4882 	sAvailableMemory += amount;
4883 
4884 	mutex_unlock(&sAvailableMemoryLock);
4885 }
4886 
4887 
4888 status_t
4889 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4890 {
4891 	size_t reserve = kMemoryReserveForPriority[priority];
4892 
4893 	MutexLocker locker(sAvailableMemoryLock);
4894 
4895 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4896 
4897 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4898 		sAvailableMemory -= amount;
4899 		return B_OK;
4900 	}
4901 
4902 	if (timeout <= 0)
4903 		return B_NO_MEMORY;
4904 
4905 	// turn timeout into an absolute timeout
4906 	timeout += system_time();
4907 
4908 	// loop until we've got the memory or the timeout occurs
4909 	do {
4910 		sNeededMemory += amount;
4911 
4912 		// call the low resource manager
4913 		locker.Unlock();
4914 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4915 			B_ABSOLUTE_TIMEOUT, timeout);
4916 		locker.Lock();
4917 
4918 		sNeededMemory -= amount;
4919 
4920 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4921 			sAvailableMemory -= amount;
4922 			return B_OK;
4923 		}
4924 	} while (timeout > system_time());
4925 
4926 	return B_NO_MEMORY;
4927 }
4928 
4929 
4930 status_t
4931 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4932 {
4933 	// NOTE: The caller is responsible for synchronizing calls to this function!
4934 
4935 	AddressSpaceReadLocker locker;
4936 	VMArea* area;
4937 	status_t status = locker.SetFromArea(id, area);
4938 	if (status != B_OK)
4939 		return status;
4940 
4941 	// nothing to do, if the type doesn't change
4942 	uint32 oldType = area->MemoryType();
4943 	if (type == oldType)
4944 		return B_OK;
4945 
4946 	// set the memory type of the area and the mapped pages
4947 	VMTranslationMap* map = area->address_space->TranslationMap();
4948 	map->Lock();
4949 	area->SetMemoryType(type);
4950 	map->ProtectArea(area, area->protection);
4951 	map->Unlock();
4952 
4953 	// set the physical memory type
4954 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4955 	if (error != B_OK) {
4956 		// reset the memory type of the area and the mapped pages
4957 		map->Lock();
4958 		area->SetMemoryType(oldType);
4959 		map->ProtectArea(area, area->protection);
4960 		map->Unlock();
4961 		return error;
4962 	}
4963 
4964 	return B_OK;
4965 
4966 }
4967 
4968 
4969 /*!	This function enforces some protection properties:
4970 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4971 	 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well
4972 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4973 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4974 	   and B_KERNEL_WRITE_AREA.
4975 */
4976 static void
4977 fix_protection(uint32* protection)
4978 {
4979 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4980 		if ((*protection & B_USER_PROTECTION) == 0
4981 			|| (*protection & B_WRITE_AREA) != 0)
4982 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4983 		else
4984 			*protection |= B_KERNEL_READ_AREA;
4985 		if ((*protection & B_EXECUTE_AREA) != 0)
4986 			*protection |= B_KERNEL_EXECUTE_AREA;
4987 	}
4988 }
4989 
4990 
4991 static void
4992 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4993 {
4994 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4995 	info->area = area->id;
4996 	info->address = (void*)area->Base();
4997 	info->size = area->Size();
4998 	info->protection = area->protection;
4999 	info->lock = B_FULL_LOCK;
5000 	info->team = area->address_space->ID();
5001 	info->copy_count = 0;
5002 	info->in_count = 0;
5003 	info->out_count = 0;
5004 		// TODO: retrieve real values here!
5005 
5006 	VMCache* cache = vm_area_get_locked_cache(area);
5007 
5008 	// Note, this is a simplification; the cache could be larger than this area
5009 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5010 
5011 	vm_area_put_locked_cache(cache);
5012 }
5013 
5014 
5015 static status_t
5016 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5017 {
5018 	// is newSize a multiple of B_PAGE_SIZE?
5019 	if (newSize & (B_PAGE_SIZE - 1))
5020 		return B_BAD_VALUE;
5021 
5022 	// lock all affected address spaces and the cache
5023 	VMArea* area;
5024 	VMCache* cache;
5025 
5026 	MultiAddressSpaceLocker locker;
5027 	AreaCacheLocker cacheLocker;
5028 
5029 	status_t status;
5030 	size_t oldSize;
5031 	bool anyKernelArea;
5032 	bool restart;
5033 
5034 	do {
5035 		anyKernelArea = false;
5036 		restart = false;
5037 
5038 		locker.Unset();
5039 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5040 		if (status != B_OK)
5041 			return status;
5042 		cacheLocker.SetTo(cache, true);	// already locked
5043 
5044 		// enforce restrictions
5045 		if (!kernel) {
5046 			if ((area->protection & B_KERNEL_AREA) != 0)
5047 				return B_NOT_ALLOWED;
5048 			// TODO: Enforce all restrictions (team, etc.)!
5049 		}
5050 
5051 		oldSize = area->Size();
5052 		if (newSize == oldSize)
5053 			return B_OK;
5054 
5055 		if (cache->type != CACHE_TYPE_RAM)
5056 			return B_NOT_ALLOWED;
5057 
5058 		if (oldSize < newSize) {
5059 			// We need to check if all areas of this cache can be resized.
5060 			for (VMArea* current = cache->areas; current != NULL;
5061 					current = current->cache_next) {
5062 				if (!current->address_space->CanResizeArea(current, newSize))
5063 					return B_ERROR;
5064 				anyKernelArea
5065 					|= current->address_space == VMAddressSpace::Kernel();
5066 			}
5067 		} else {
5068 			// We're shrinking the areas, so we must make sure the affected
5069 			// ranges are not wired.
5070 			for (VMArea* current = cache->areas; current != NULL;
5071 					current = current->cache_next) {
5072 				anyKernelArea
5073 					|= current->address_space == VMAddressSpace::Kernel();
5074 
5075 				if (wait_if_area_range_is_wired(current,
5076 						current->Base() + newSize, oldSize - newSize, &locker,
5077 						&cacheLocker)) {
5078 					restart = true;
5079 					break;
5080 				}
5081 			}
5082 		}
5083 	} while (restart);
5084 
5085 	// Okay, looks good so far, so let's do it
5086 
5087 	int priority = kernel && anyKernelArea
5088 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5089 	uint32 allocationFlags = kernel && anyKernelArea
5090 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5091 
5092 	if (oldSize < newSize) {
5093 		// Growing the cache can fail, so we do it first.
5094 		status = cache->Resize(cache->virtual_base + newSize, priority);
5095 		if (status != B_OK)
5096 			return status;
5097 	}
5098 
5099 	for (VMArea* current = cache->areas; current != NULL;
5100 			current = current->cache_next) {
5101 		status = current->address_space->ResizeArea(current, newSize,
5102 			allocationFlags);
5103 		if (status != B_OK)
5104 			break;
5105 
5106 		// We also need to unmap all pages beyond the new size, if the area has
5107 		// shrunk
5108 		if (newSize < oldSize) {
5109 			VMCacheChainLocker cacheChainLocker(cache);
5110 			cacheChainLocker.LockAllSourceCaches();
5111 
5112 			unmap_pages(current, current->Base() + newSize,
5113 				oldSize - newSize);
5114 
5115 			cacheChainLocker.Unlock(cache);
5116 		}
5117 	}
5118 
5119 	if (status == B_OK) {
5120 		// Shrink or grow individual page protections if in use.
5121 		if (area->page_protections != NULL) {
5122 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5123 			uint8* newProtections
5124 				= (uint8*)realloc(area->page_protections, bytes);
5125 			if (newProtections == NULL)
5126 				status = B_NO_MEMORY;
5127 			else {
5128 				area->page_protections = newProtections;
5129 
5130 				if (oldSize < newSize) {
5131 					// init the additional page protections to that of the area
5132 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5133 					uint32 areaProtection = area->protection
5134 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5135 					memset(area->page_protections + offset,
5136 						areaProtection | (areaProtection << 4), bytes - offset);
5137 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5138 						uint8& entry = area->page_protections[offset - 1];
5139 						entry = (entry & 0x0f) | (areaProtection << 4);
5140 					}
5141 				}
5142 			}
5143 		}
5144 	}
5145 
5146 	// shrinking the cache can't fail, so we do it now
5147 	if (status == B_OK && newSize < oldSize)
5148 		status = cache->Resize(cache->virtual_base + newSize, priority);
5149 
5150 	if (status != B_OK) {
5151 		// Something failed -- resize the areas back to their original size.
5152 		// This can fail, too, in which case we're seriously screwed.
5153 		for (VMArea* current = cache->areas; current != NULL;
5154 				current = current->cache_next) {
5155 			if (current->address_space->ResizeArea(current, oldSize,
5156 					allocationFlags) != B_OK) {
5157 				panic("vm_resize_area(): Failed and not being able to restore "
5158 					"original state.");
5159 			}
5160 		}
5161 
5162 		cache->Resize(cache->virtual_base + oldSize, priority);
5163 	}
5164 
5165 	// TODO: we must honour the lock restrictions of this area
5166 	return status;
5167 }
5168 
5169 
5170 status_t
5171 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5172 {
5173 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5174 }
5175 
5176 
5177 status_t
5178 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5179 {
5180 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5181 }
5182 
5183 
5184 status_t
5185 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5186 	bool user)
5187 {
5188 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5189 }
5190 
5191 
5192 void
5193 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5194 {
5195 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5196 }
5197 
5198 
5199 /*!	Copies a range of memory directly from/to a page that might not be mapped
5200 	at the moment.
5201 
5202 	For \a unsafeMemory the current mapping (if any is ignored). The function
5203 	walks through the respective area's cache chain to find the physical page
5204 	and copies from/to it directly.
5205 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5206 	must not cross a page boundary.
5207 
5208 	\param teamID The team ID identifying the address space \a unsafeMemory is
5209 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5210 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5211 		is passed, the address space of the thread returned by
5212 		debug_get_debugged_thread() is used.
5213 	\param unsafeMemory The start of the unsafe memory range to be copied
5214 		from/to.
5215 	\param buffer A safely accessible kernel buffer to be copied from/to.
5216 	\param size The number of bytes to be copied.
5217 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5218 		\a unsafeMemory, the other way around otherwise.
5219 */
5220 status_t
5221 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5222 	size_t size, bool copyToUnsafe)
5223 {
5224 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5225 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5226 		return B_BAD_VALUE;
5227 	}
5228 
5229 	// get the address space for the debugged thread
5230 	VMAddressSpace* addressSpace;
5231 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5232 		addressSpace = VMAddressSpace::Kernel();
5233 	} else if (teamID == B_CURRENT_TEAM) {
5234 		Thread* thread = debug_get_debugged_thread();
5235 		if (thread == NULL || thread->team == NULL)
5236 			return B_BAD_ADDRESS;
5237 
5238 		addressSpace = thread->team->address_space;
5239 	} else
5240 		addressSpace = VMAddressSpace::DebugGet(teamID);
5241 
5242 	if (addressSpace == NULL)
5243 		return B_BAD_ADDRESS;
5244 
5245 	// get the area
5246 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5247 	if (area == NULL)
5248 		return B_BAD_ADDRESS;
5249 
5250 	// search the page
5251 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5252 		+ area->cache_offset;
5253 	VMCache* cache = area->cache;
5254 	vm_page* page = NULL;
5255 	while (cache != NULL) {
5256 		page = cache->DebugLookupPage(cacheOffset);
5257 		if (page != NULL)
5258 			break;
5259 
5260 		// Page not found in this cache -- if it is paged out, we must not try
5261 		// to get it from lower caches.
5262 		if (cache->DebugHasPage(cacheOffset))
5263 			break;
5264 
5265 		cache = cache->source;
5266 	}
5267 
5268 	if (page == NULL)
5269 		return B_UNSUPPORTED;
5270 
5271 	// copy from/to physical memory
5272 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5273 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5274 
5275 	if (copyToUnsafe) {
5276 		if (page->Cache() != area->cache)
5277 			return B_UNSUPPORTED;
5278 
5279 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5280 	}
5281 
5282 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5283 }
5284 
5285 
5286 //	#pragma mark - kernel public API
5287 
5288 
5289 status_t
5290 user_memcpy(void* to, const void* from, size_t size)
5291 {
5292 	// don't allow address overflows
5293 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5294 		return B_BAD_ADDRESS;
5295 
5296 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5297 		return B_BAD_ADDRESS;
5298 
5299 	return B_OK;
5300 }
5301 
5302 
5303 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5304 	the string in \a to, NULL-terminating the result.
5305 
5306 	\param to Pointer to the destination C-string.
5307 	\param from Pointer to the source C-string.
5308 	\param size Size in bytes of the string buffer pointed to by \a to.
5309 
5310 	\return strlen(\a from).
5311 */
5312 ssize_t
5313 user_strlcpy(char* to, const char* from, size_t size)
5314 {
5315 	if (to == NULL && size != 0)
5316 		return B_BAD_VALUE;
5317 	if (from == NULL)
5318 		return B_BAD_ADDRESS;
5319 
5320 	// limit size to avoid address overflows
5321 	size_t maxSize = std::min((addr_t)size,
5322 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5323 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5324 		// the source address might still overflow.
5325 
5326 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5327 
5328 	// If we hit the address overflow boundary, fail.
5329 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5330 			&& maxSize < size)) {
5331 		return B_BAD_ADDRESS;
5332 	}
5333 
5334 	return result;
5335 }
5336 
5337 
5338 status_t
5339 user_memset(void* s, char c, size_t count)
5340 {
5341 	// don't allow address overflows
5342 	if ((addr_t)s + count < (addr_t)s)
5343 		return B_BAD_ADDRESS;
5344 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5345 		return B_BAD_ADDRESS;
5346 
5347 	return B_OK;
5348 }
5349 
5350 
5351 /*!	Wires a single page at the given address.
5352 
5353 	\param team The team whose address space the address belongs to. Supports
5354 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5355 		parameter is ignored.
5356 	\param address address The virtual address to wire down. Does not need to
5357 		be page aligned.
5358 	\param writable If \c true the page shall be writable.
5359 	\param info On success the info is filled in, among other things
5360 		containing the physical address the given virtual one translates to.
5361 	\return \c B_OK, when the page could be wired, another error code otherwise.
5362 */
5363 status_t
5364 vm_wire_page(team_id team, addr_t address, bool writable,
5365 	VMPageWiringInfo* info)
5366 {
5367 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5368 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5369 
5370 	// compute the page protection that is required
5371 	bool isUser = IS_USER_ADDRESS(address);
5372 	uint32 requiredProtection = PAGE_PRESENT
5373 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5374 	if (writable)
5375 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5376 
5377 	// get and read lock the address space
5378 	VMAddressSpace* addressSpace = NULL;
5379 	if (isUser) {
5380 		if (team == B_CURRENT_TEAM)
5381 			addressSpace = VMAddressSpace::GetCurrent();
5382 		else
5383 			addressSpace = VMAddressSpace::Get(team);
5384 	} else
5385 		addressSpace = VMAddressSpace::GetKernel();
5386 	if (addressSpace == NULL)
5387 		return B_ERROR;
5388 
5389 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5390 
5391 	VMTranslationMap* map = addressSpace->TranslationMap();
5392 	status_t error = B_OK;
5393 
5394 	// get the area
5395 	VMArea* area = addressSpace->LookupArea(pageAddress);
5396 	if (area == NULL) {
5397 		addressSpace->Put();
5398 		return B_BAD_ADDRESS;
5399 	}
5400 
5401 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5402 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5403 
5404 	// mark the area range wired
5405 	area->Wire(&info->range);
5406 
5407 	// Lock the area's cache chain and the translation map. Needed to look
5408 	// up the page and play with its wired count.
5409 	cacheChainLocker.LockAllSourceCaches();
5410 	map->Lock();
5411 
5412 	phys_addr_t physicalAddress;
5413 	uint32 flags;
5414 	vm_page* page;
5415 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5416 		&& (flags & requiredProtection) == requiredProtection
5417 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5418 			!= NULL) {
5419 		// Already mapped with the correct permissions -- just increment
5420 		// the page's wired count.
5421 		increment_page_wired_count(page);
5422 
5423 		map->Unlock();
5424 		cacheChainLocker.Unlock();
5425 		addressSpaceLocker.Unlock();
5426 	} else {
5427 		// Let vm_soft_fault() map the page for us, if possible. We need
5428 		// to fully unlock to avoid deadlocks. Since we have already
5429 		// wired the area itself, nothing disturbing will happen with it
5430 		// in the meantime.
5431 		map->Unlock();
5432 		cacheChainLocker.Unlock();
5433 		addressSpaceLocker.Unlock();
5434 
5435 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5436 			isUser, &page);
5437 
5438 		if (error != B_OK) {
5439 			// The page could not be mapped -- clean up.
5440 			VMCache* cache = vm_area_get_locked_cache(area);
5441 			area->Unwire(&info->range);
5442 			cache->ReleaseRefAndUnlock();
5443 			addressSpace->Put();
5444 			return error;
5445 		}
5446 	}
5447 
5448 	info->physicalAddress
5449 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5450 			+ address % B_PAGE_SIZE;
5451 	info->page = page;
5452 
5453 	return B_OK;
5454 }
5455 
5456 
5457 /*!	Unwires a single page previously wired via vm_wire_page().
5458 
5459 	\param info The same object passed to vm_wire_page() before.
5460 */
5461 void
5462 vm_unwire_page(VMPageWiringInfo* info)
5463 {
5464 	// lock the address space
5465 	VMArea* area = info->range.area;
5466 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5467 		// takes over our reference
5468 
5469 	// lock the top cache
5470 	VMCache* cache = vm_area_get_locked_cache(area);
5471 	VMCacheChainLocker cacheChainLocker(cache);
5472 
5473 	if (info->page->Cache() != cache) {
5474 		// The page is not in the top cache, so we lock the whole cache chain
5475 		// before touching the page's wired count.
5476 		cacheChainLocker.LockAllSourceCaches();
5477 	}
5478 
5479 	decrement_page_wired_count(info->page);
5480 
5481 	// remove the wired range from the range
5482 	area->Unwire(&info->range);
5483 
5484 	cacheChainLocker.Unlock();
5485 }
5486 
5487 
5488 /*!	Wires down the given address range in the specified team's address space.
5489 
5490 	If successful the function
5491 	- acquires a reference to the specified team's address space,
5492 	- adds respective wired ranges to all areas that intersect with the given
5493 	  address range,
5494 	- makes sure all pages in the given address range are mapped with the
5495 	  requested access permissions and increments their wired count.
5496 
5497 	It fails, when \a team doesn't specify a valid address space, when any part
5498 	of the specified address range is not covered by areas, when the concerned
5499 	areas don't allow mapping with the requested permissions, or when mapping
5500 	failed for another reason.
5501 
5502 	When successful the call must be balanced by a unlock_memory_etc() call with
5503 	the exact same parameters.
5504 
5505 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5506 		supported.
5507 	\param address The start of the address range to be wired.
5508 	\param numBytes The size of the address range to be wired.
5509 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5510 		requests that the range must be wired writable ("read from device
5511 		into memory").
5512 	\return \c B_OK on success, another error code otherwise.
5513 */
5514 status_t
5515 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5516 {
5517 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5518 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5519 
5520 	// compute the page protection that is required
5521 	bool isUser = IS_USER_ADDRESS(address);
5522 	bool writable = (flags & B_READ_DEVICE) == 0;
5523 	uint32 requiredProtection = PAGE_PRESENT
5524 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5525 	if (writable)
5526 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5527 
5528 	uint32 mallocFlags = isUser
5529 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5530 
5531 	// get and read lock the address space
5532 	VMAddressSpace* addressSpace = NULL;
5533 	if (isUser) {
5534 		if (team == B_CURRENT_TEAM)
5535 			addressSpace = VMAddressSpace::GetCurrent();
5536 		else
5537 			addressSpace = VMAddressSpace::Get(team);
5538 	} else
5539 		addressSpace = VMAddressSpace::GetKernel();
5540 	if (addressSpace == NULL)
5541 		return B_ERROR;
5542 
5543 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5544 		// We get a new address space reference here. The one we got above will
5545 		// be freed by unlock_memory_etc().
5546 
5547 	VMTranslationMap* map = addressSpace->TranslationMap();
5548 	status_t error = B_OK;
5549 
5550 	// iterate through all concerned areas
5551 	addr_t nextAddress = lockBaseAddress;
5552 	while (nextAddress != lockEndAddress) {
5553 		// get the next area
5554 		VMArea* area = addressSpace->LookupArea(nextAddress);
5555 		if (area == NULL) {
5556 			error = B_BAD_ADDRESS;
5557 			break;
5558 		}
5559 
5560 		addr_t areaStart = nextAddress;
5561 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5562 
5563 		// allocate the wired range (do that before locking the cache to avoid
5564 		// deadlocks)
5565 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5566 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5567 		if (range == NULL) {
5568 			error = B_NO_MEMORY;
5569 			break;
5570 		}
5571 
5572 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5573 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5574 
5575 		// mark the area range wired
5576 		area->Wire(range);
5577 
5578 		// Depending on the area cache type and the wiring, we may not need to
5579 		// look at the individual pages.
5580 		if (area->cache_type == CACHE_TYPE_NULL
5581 			|| area->cache_type == CACHE_TYPE_DEVICE
5582 			|| area->wiring == B_FULL_LOCK
5583 			|| area->wiring == B_CONTIGUOUS) {
5584 			nextAddress = areaEnd;
5585 			continue;
5586 		}
5587 
5588 		// Lock the area's cache chain and the translation map. Needed to look
5589 		// up pages and play with their wired count.
5590 		cacheChainLocker.LockAllSourceCaches();
5591 		map->Lock();
5592 
5593 		// iterate through the pages and wire them
5594 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5595 			phys_addr_t physicalAddress;
5596 			uint32 flags;
5597 
5598 			vm_page* page;
5599 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5600 				&& (flags & requiredProtection) == requiredProtection
5601 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5602 					!= NULL) {
5603 				// Already mapped with the correct permissions -- just increment
5604 				// the page's wired count.
5605 				increment_page_wired_count(page);
5606 			} else {
5607 				// Let vm_soft_fault() map the page for us, if possible. We need
5608 				// to fully unlock to avoid deadlocks. Since we have already
5609 				// wired the area itself, nothing disturbing will happen with it
5610 				// in the meantime.
5611 				map->Unlock();
5612 				cacheChainLocker.Unlock();
5613 				addressSpaceLocker.Unlock();
5614 
5615 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5616 					false, isUser, &page);
5617 
5618 				addressSpaceLocker.Lock();
5619 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5620 				cacheChainLocker.LockAllSourceCaches();
5621 				map->Lock();
5622 			}
5623 
5624 			if (error != B_OK)
5625 				break;
5626 		}
5627 
5628 		map->Unlock();
5629 
5630 		if (error == B_OK) {
5631 			cacheChainLocker.Unlock();
5632 		} else {
5633 			// An error occurred, so abort right here. If the current address
5634 			// is the first in this area, unwire the area, since we won't get
5635 			// to it when reverting what we've done so far.
5636 			if (nextAddress == areaStart) {
5637 				area->Unwire(range);
5638 				cacheChainLocker.Unlock();
5639 				range->~VMAreaWiredRange();
5640 				free_etc(range, mallocFlags);
5641 			} else
5642 				cacheChainLocker.Unlock();
5643 
5644 			break;
5645 		}
5646 	}
5647 
5648 	if (error != B_OK) {
5649 		// An error occurred, so unwire all that we've already wired. Note that
5650 		// even if not a single page was wired, unlock_memory_etc() is called
5651 		// to put the address space reference.
5652 		addressSpaceLocker.Unlock();
5653 		unlock_memory_etc(team, (void*)lockBaseAddress,
5654 			nextAddress - lockBaseAddress, flags);
5655 	}
5656 
5657 	return error;
5658 }
5659 
5660 
5661 status_t
5662 lock_memory(void* address, size_t numBytes, uint32 flags)
5663 {
5664 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5665 }
5666 
5667 
5668 /*!	Unwires an address range previously wired with lock_memory_etc().
5669 
5670 	Note that a call to this function must balance a previous lock_memory_etc()
5671 	call with exactly the same parameters.
5672 */
5673 status_t
5674 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5675 {
5676 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5677 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5678 
5679 	// compute the page protection that is required
5680 	bool isUser = IS_USER_ADDRESS(address);
5681 	bool writable = (flags & B_READ_DEVICE) == 0;
5682 	uint32 requiredProtection = PAGE_PRESENT
5683 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5684 	if (writable)
5685 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5686 
5687 	uint32 mallocFlags = isUser
5688 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5689 
5690 	// get and read lock the address space
5691 	VMAddressSpace* addressSpace = NULL;
5692 	if (isUser) {
5693 		if (team == B_CURRENT_TEAM)
5694 			addressSpace = VMAddressSpace::GetCurrent();
5695 		else
5696 			addressSpace = VMAddressSpace::Get(team);
5697 	} else
5698 		addressSpace = VMAddressSpace::GetKernel();
5699 	if (addressSpace == NULL)
5700 		return B_ERROR;
5701 
5702 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5703 		// Take over the address space reference. We don't unlock until we're
5704 		// done.
5705 
5706 	VMTranslationMap* map = addressSpace->TranslationMap();
5707 	status_t error = B_OK;
5708 
5709 	// iterate through all concerned areas
5710 	addr_t nextAddress = lockBaseAddress;
5711 	while (nextAddress != lockEndAddress) {
5712 		// get the next area
5713 		VMArea* area = addressSpace->LookupArea(nextAddress);
5714 		if (area == NULL) {
5715 			error = B_BAD_ADDRESS;
5716 			break;
5717 		}
5718 
5719 		addr_t areaStart = nextAddress;
5720 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5721 
5722 		// Lock the area's top cache. This is a requirement for
5723 		// VMArea::Unwire().
5724 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5725 
5726 		// Depending on the area cache type and the wiring, we may not need to
5727 		// look at the individual pages.
5728 		if (area->cache_type == CACHE_TYPE_NULL
5729 			|| area->cache_type == CACHE_TYPE_DEVICE
5730 			|| area->wiring == B_FULL_LOCK
5731 			|| area->wiring == B_CONTIGUOUS) {
5732 			// unwire the range (to avoid deadlocks we delete the range after
5733 			// unlocking the cache)
5734 			nextAddress = areaEnd;
5735 			VMAreaWiredRange* range = area->Unwire(areaStart,
5736 				areaEnd - areaStart, writable);
5737 			cacheChainLocker.Unlock();
5738 			if (range != NULL) {
5739 				range->~VMAreaWiredRange();
5740 				free_etc(range, mallocFlags);
5741 			}
5742 			continue;
5743 		}
5744 
5745 		// Lock the area's cache chain and the translation map. Needed to look
5746 		// up pages and play with their wired count.
5747 		cacheChainLocker.LockAllSourceCaches();
5748 		map->Lock();
5749 
5750 		// iterate through the pages and unwire them
5751 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5752 			phys_addr_t physicalAddress;
5753 			uint32 flags;
5754 
5755 			vm_page* page;
5756 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5757 				&& (flags & PAGE_PRESENT) != 0
5758 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5759 					!= NULL) {
5760 				// Already mapped with the correct permissions -- just increment
5761 				// the page's wired count.
5762 				decrement_page_wired_count(page);
5763 			} else {
5764 				panic("unlock_memory_etc(): Failed to unwire page: address "
5765 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5766 					nextAddress);
5767 				error = B_BAD_VALUE;
5768 				break;
5769 			}
5770 		}
5771 
5772 		map->Unlock();
5773 
5774 		// All pages are unwired. Remove the area's wired range as well (to
5775 		// avoid deadlocks we delete the range after unlocking the cache).
5776 		VMAreaWiredRange* range = area->Unwire(areaStart,
5777 			areaEnd - areaStart, writable);
5778 
5779 		cacheChainLocker.Unlock();
5780 
5781 		if (range != NULL) {
5782 			range->~VMAreaWiredRange();
5783 			free_etc(range, mallocFlags);
5784 		}
5785 
5786 		if (error != B_OK)
5787 			break;
5788 	}
5789 
5790 	// get rid of the address space reference lock_memory_etc() acquired
5791 	addressSpace->Put();
5792 
5793 	return error;
5794 }
5795 
5796 
5797 status_t
5798 unlock_memory(void* address, size_t numBytes, uint32 flags)
5799 {
5800 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5801 }
5802 
5803 
5804 /*!	Similar to get_memory_map(), but also allows to specify the address space
5805 	for the memory in question and has a saner semantics.
5806 	Returns \c B_OK when the complete range could be translated or
5807 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5808 	case the actual number of entries is written to \c *_numEntries. Any other
5809 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5810 	in this case.
5811 */
5812 status_t
5813 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5814 	physical_entry* table, uint32* _numEntries)
5815 {
5816 	uint32 numEntries = *_numEntries;
5817 	*_numEntries = 0;
5818 
5819 	VMAddressSpace* addressSpace;
5820 	addr_t virtualAddress = (addr_t)address;
5821 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5822 	phys_addr_t physicalAddress;
5823 	status_t status = B_OK;
5824 	int32 index = -1;
5825 	addr_t offset = 0;
5826 	bool interrupts = are_interrupts_enabled();
5827 
5828 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5829 		"entries)\n", team, address, numBytes, numEntries));
5830 
5831 	if (numEntries == 0 || numBytes == 0)
5832 		return B_BAD_VALUE;
5833 
5834 	// in which address space is the address to be found?
5835 	if (IS_USER_ADDRESS(virtualAddress)) {
5836 		if (team == B_CURRENT_TEAM)
5837 			addressSpace = VMAddressSpace::GetCurrent();
5838 		else
5839 			addressSpace = VMAddressSpace::Get(team);
5840 	} else
5841 		addressSpace = VMAddressSpace::GetKernel();
5842 
5843 	if (addressSpace == NULL)
5844 		return B_ERROR;
5845 
5846 	VMTranslationMap* map = addressSpace->TranslationMap();
5847 
5848 	if (interrupts)
5849 		map->Lock();
5850 
5851 	while (offset < numBytes) {
5852 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5853 		uint32 flags;
5854 
5855 		if (interrupts) {
5856 			status = map->Query((addr_t)address + offset, &physicalAddress,
5857 				&flags);
5858 		} else {
5859 			status = map->QueryInterrupt((addr_t)address + offset,
5860 				&physicalAddress, &flags);
5861 		}
5862 		if (status < B_OK)
5863 			break;
5864 		if ((flags & PAGE_PRESENT) == 0) {
5865 			panic("get_memory_map() called on unmapped memory!");
5866 			return B_BAD_ADDRESS;
5867 		}
5868 
5869 		if (index < 0 && pageOffset > 0) {
5870 			physicalAddress += pageOffset;
5871 			if (bytes > B_PAGE_SIZE - pageOffset)
5872 				bytes = B_PAGE_SIZE - pageOffset;
5873 		}
5874 
5875 		// need to switch to the next physical_entry?
5876 		if (index < 0 || table[index].address
5877 				!= physicalAddress - table[index].size) {
5878 			if ((uint32)++index + 1 > numEntries) {
5879 				// table to small
5880 				break;
5881 			}
5882 			table[index].address = physicalAddress;
5883 			table[index].size = bytes;
5884 		} else {
5885 			// page does fit in current entry
5886 			table[index].size += bytes;
5887 		}
5888 
5889 		offset += bytes;
5890 	}
5891 
5892 	if (interrupts)
5893 		map->Unlock();
5894 
5895 	if (status != B_OK)
5896 		return status;
5897 
5898 	if ((uint32)index + 1 > numEntries) {
5899 		*_numEntries = index;
5900 		return B_BUFFER_OVERFLOW;
5901 	}
5902 
5903 	*_numEntries = index + 1;
5904 	return B_OK;
5905 }
5906 
5907 
5908 /*!	According to the BeBook, this function should always succeed.
5909 	This is no longer the case.
5910 */
5911 extern "C" int32
5912 __get_memory_map_haiku(const void* address, size_t numBytes,
5913 	physical_entry* table, int32 numEntries)
5914 {
5915 	uint32 entriesRead = numEntries;
5916 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5917 		table, &entriesRead);
5918 	if (error != B_OK)
5919 		return error;
5920 
5921 	// close the entry list
5922 
5923 	// if it's only one entry, we will silently accept the missing ending
5924 	if (numEntries == 1)
5925 		return B_OK;
5926 
5927 	if (entriesRead + 1 > (uint32)numEntries)
5928 		return B_BUFFER_OVERFLOW;
5929 
5930 	table[entriesRead].address = 0;
5931 	table[entriesRead].size = 0;
5932 
5933 	return B_OK;
5934 }
5935 
5936 
5937 area_id
5938 area_for(void* address)
5939 {
5940 	return vm_area_for((addr_t)address, true);
5941 }
5942 
5943 
5944 area_id
5945 find_area(const char* name)
5946 {
5947 	return VMAreaHash::Find(name);
5948 }
5949 
5950 
5951 status_t
5952 _get_area_info(area_id id, area_info* info, size_t size)
5953 {
5954 	if (size != sizeof(area_info) || info == NULL)
5955 		return B_BAD_VALUE;
5956 
5957 	AddressSpaceReadLocker locker;
5958 	VMArea* area;
5959 	status_t status = locker.SetFromArea(id, area);
5960 	if (status != B_OK)
5961 		return status;
5962 
5963 	fill_area_info(area, info, size);
5964 	return B_OK;
5965 }
5966 
5967 
5968 status_t
5969 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5970 {
5971 	addr_t nextBase = *(addr_t*)cookie;
5972 
5973 	// we're already through the list
5974 	if (nextBase == (addr_t)-1)
5975 		return B_ENTRY_NOT_FOUND;
5976 
5977 	if (team == B_CURRENT_TEAM)
5978 		team = team_get_current_team_id();
5979 
5980 	AddressSpaceReadLocker locker(team);
5981 	if (!locker.IsLocked())
5982 		return B_BAD_TEAM_ID;
5983 
5984 	VMArea* area;
5985 	for (VMAddressSpace::AreaIterator it
5986 				= locker.AddressSpace()->GetAreaIterator();
5987 			(area = it.Next()) != NULL;) {
5988 		if (area->Base() > nextBase)
5989 			break;
5990 	}
5991 
5992 	if (area == NULL) {
5993 		nextBase = (addr_t)-1;
5994 		return B_ENTRY_NOT_FOUND;
5995 	}
5996 
5997 	fill_area_info(area, info, size);
5998 	*cookie = (ssize_t)(area->Base());
5999 
6000 	return B_OK;
6001 }
6002 
6003 
6004 status_t
6005 set_area_protection(area_id area, uint32 newProtection)
6006 {
6007 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6008 		newProtection, true);
6009 }
6010 
6011 
6012 status_t
6013 resize_area(area_id areaID, size_t newSize)
6014 {
6015 	return vm_resize_area(areaID, newSize, true);
6016 }
6017 
6018 
6019 /*!	Transfers the specified area to a new team. The caller must be the owner
6020 	of the area.
6021 */
6022 area_id
6023 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6024 	bool kernel)
6025 {
6026 	area_info info;
6027 	status_t status = get_area_info(id, &info);
6028 	if (status != B_OK)
6029 		return status;
6030 
6031 	if (info.team != thread_get_current_thread()->team->id)
6032 		return B_PERMISSION_DENIED;
6033 
6034 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6035 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6036 	if (clonedArea < 0)
6037 		return clonedArea;
6038 
6039 	status = vm_delete_area(info.team, id, kernel);
6040 	if (status != B_OK) {
6041 		vm_delete_area(target, clonedArea, kernel);
6042 		return status;
6043 	}
6044 
6045 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6046 
6047 	return clonedArea;
6048 }
6049 
6050 
6051 extern "C" area_id
6052 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6053 	size_t numBytes, uint32 addressSpec, uint32 protection,
6054 	void** _virtualAddress)
6055 {
6056 	if (!arch_vm_supports_protection(protection))
6057 		return B_NOT_SUPPORTED;
6058 
6059 	fix_protection(&protection);
6060 
6061 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6062 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6063 		false);
6064 }
6065 
6066 
6067 area_id
6068 clone_area(const char* name, void** _address, uint32 addressSpec,
6069 	uint32 protection, area_id source)
6070 {
6071 	if ((protection & B_KERNEL_PROTECTION) == 0)
6072 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6073 
6074 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6075 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6076 }
6077 
6078 
6079 area_id
6080 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6081 	uint32 protection, uint32 flags, uint32 guardSize,
6082 	const virtual_address_restrictions* virtualAddressRestrictions,
6083 	const physical_address_restrictions* physicalAddressRestrictions,
6084 	void** _address)
6085 {
6086 	fix_protection(&protection);
6087 
6088 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6089 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6090 		true, _address);
6091 }
6092 
6093 
6094 extern "C" area_id
6095 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6096 	size_t size, uint32 lock, uint32 protection)
6097 {
6098 	fix_protection(&protection);
6099 
6100 	virtual_address_restrictions virtualRestrictions = {};
6101 	virtualRestrictions.address = *_address;
6102 	virtualRestrictions.address_specification = addressSpec;
6103 	physical_address_restrictions physicalRestrictions = {};
6104 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6105 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6106 		true, _address);
6107 }
6108 
6109 
6110 status_t
6111 delete_area(area_id area)
6112 {
6113 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6114 }
6115 
6116 
6117 //	#pragma mark - Userland syscalls
6118 
6119 
6120 status_t
6121 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6122 	addr_t size)
6123 {
6124 	// filter out some unavailable values (for userland)
6125 	switch (addressSpec) {
6126 		case B_ANY_KERNEL_ADDRESS:
6127 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6128 			return B_BAD_VALUE;
6129 	}
6130 
6131 	addr_t address;
6132 
6133 	if (!IS_USER_ADDRESS(userAddress)
6134 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6135 		return B_BAD_ADDRESS;
6136 
6137 	status_t status = vm_reserve_address_range(
6138 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6139 		RESERVED_AVOID_BASE);
6140 	if (status != B_OK)
6141 		return status;
6142 
6143 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6144 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6145 			(void*)address, size);
6146 		return B_BAD_ADDRESS;
6147 	}
6148 
6149 	return B_OK;
6150 }
6151 
6152 
6153 status_t
6154 _user_unreserve_address_range(addr_t address, addr_t size)
6155 {
6156 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6157 		(void*)address, size);
6158 }
6159 
6160 
6161 area_id
6162 _user_area_for(void* address)
6163 {
6164 	return vm_area_for((addr_t)address, false);
6165 }
6166 
6167 
6168 area_id
6169 _user_find_area(const char* userName)
6170 {
6171 	char name[B_OS_NAME_LENGTH];
6172 
6173 	if (!IS_USER_ADDRESS(userName)
6174 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6175 		return B_BAD_ADDRESS;
6176 
6177 	return find_area(name);
6178 }
6179 
6180 
6181 status_t
6182 _user_get_area_info(area_id area, area_info* userInfo)
6183 {
6184 	if (!IS_USER_ADDRESS(userInfo))
6185 		return B_BAD_ADDRESS;
6186 
6187 	area_info info;
6188 	status_t status = get_area_info(area, &info);
6189 	if (status < B_OK)
6190 		return status;
6191 
6192 	// TODO: do we want to prevent userland from seeing kernel protections?
6193 	//info.protection &= B_USER_PROTECTION;
6194 
6195 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6196 		return B_BAD_ADDRESS;
6197 
6198 	return status;
6199 }
6200 
6201 
6202 status_t
6203 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6204 {
6205 	ssize_t cookie;
6206 
6207 	if (!IS_USER_ADDRESS(userCookie)
6208 		|| !IS_USER_ADDRESS(userInfo)
6209 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6210 		return B_BAD_ADDRESS;
6211 
6212 	area_info info;
6213 	status_t status = _get_next_area_info(team, &cookie, &info,
6214 		sizeof(area_info));
6215 	if (status != B_OK)
6216 		return status;
6217 
6218 	//info.protection &= B_USER_PROTECTION;
6219 
6220 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6221 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6222 		return B_BAD_ADDRESS;
6223 
6224 	return status;
6225 }
6226 
6227 
6228 status_t
6229 _user_set_area_protection(area_id area, uint32 newProtection)
6230 {
6231 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6232 		return B_BAD_VALUE;
6233 
6234 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6235 		newProtection, false);
6236 }
6237 
6238 
6239 status_t
6240 _user_resize_area(area_id area, size_t newSize)
6241 {
6242 	// TODO: Since we restrict deleting of areas to those owned by the team,
6243 	// we should also do that for resizing (check other functions, too).
6244 	return vm_resize_area(area, newSize, false);
6245 }
6246 
6247 
6248 area_id
6249 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6250 	team_id target)
6251 {
6252 	// filter out some unavailable values (for userland)
6253 	switch (addressSpec) {
6254 		case B_ANY_KERNEL_ADDRESS:
6255 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6256 			return B_BAD_VALUE;
6257 	}
6258 
6259 	void* address;
6260 	if (!IS_USER_ADDRESS(userAddress)
6261 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6262 		return B_BAD_ADDRESS;
6263 
6264 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6265 	if (newArea < B_OK)
6266 		return newArea;
6267 
6268 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6269 		return B_BAD_ADDRESS;
6270 
6271 	return newArea;
6272 }
6273 
6274 
6275 area_id
6276 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6277 	uint32 protection, area_id sourceArea)
6278 {
6279 	char name[B_OS_NAME_LENGTH];
6280 	void* address;
6281 
6282 	// filter out some unavailable values (for userland)
6283 	switch (addressSpec) {
6284 		case B_ANY_KERNEL_ADDRESS:
6285 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6286 			return B_BAD_VALUE;
6287 	}
6288 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6289 		return B_BAD_VALUE;
6290 
6291 	if (!IS_USER_ADDRESS(userName)
6292 		|| !IS_USER_ADDRESS(userAddress)
6293 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6294 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6295 		return B_BAD_ADDRESS;
6296 
6297 	fix_protection(&protection);
6298 
6299 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6300 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6301 		false);
6302 	if (clonedArea < B_OK)
6303 		return clonedArea;
6304 
6305 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6306 		delete_area(clonedArea);
6307 		return B_BAD_ADDRESS;
6308 	}
6309 
6310 	return clonedArea;
6311 }
6312 
6313 
6314 area_id
6315 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6316 	size_t size, uint32 lock, uint32 protection)
6317 {
6318 	char name[B_OS_NAME_LENGTH];
6319 	void* address;
6320 
6321 	// filter out some unavailable values (for userland)
6322 	switch (addressSpec) {
6323 		case B_ANY_KERNEL_ADDRESS:
6324 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6325 			return B_BAD_VALUE;
6326 	}
6327 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6328 		return B_BAD_VALUE;
6329 
6330 	if (!IS_USER_ADDRESS(userName)
6331 		|| !IS_USER_ADDRESS(userAddress)
6332 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6333 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6334 		return B_BAD_ADDRESS;
6335 
6336 	if (addressSpec == B_EXACT_ADDRESS
6337 		&& IS_KERNEL_ADDRESS(address))
6338 		return B_BAD_VALUE;
6339 
6340 	if (addressSpec == B_ANY_ADDRESS)
6341 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6342 	if (addressSpec == B_BASE_ADDRESS)
6343 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6344 
6345 	fix_protection(&protection);
6346 
6347 	virtual_address_restrictions virtualRestrictions = {};
6348 	virtualRestrictions.address = address;
6349 	virtualRestrictions.address_specification = addressSpec;
6350 	physical_address_restrictions physicalRestrictions = {};
6351 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6352 		size, lock, protection, 0, 0, &virtualRestrictions,
6353 		&physicalRestrictions, false, &address);
6354 
6355 	if (area >= B_OK
6356 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6357 		delete_area(area);
6358 		return B_BAD_ADDRESS;
6359 	}
6360 
6361 	return area;
6362 }
6363 
6364 
6365 status_t
6366 _user_delete_area(area_id area)
6367 {
6368 	// Unlike the BeOS implementation, you can now only delete areas
6369 	// that you have created yourself from userland.
6370 	// The documentation to delete_area() explicitly states that this
6371 	// will be restricted in the future, and so it will.
6372 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6373 }
6374 
6375 
6376 // TODO: create a BeOS style call for this!
6377 
6378 area_id
6379 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6380 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6381 	int fd, off_t offset)
6382 {
6383 	char name[B_OS_NAME_LENGTH];
6384 	void* address;
6385 	area_id area;
6386 
6387 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6388 		return B_BAD_VALUE;
6389 
6390 	fix_protection(&protection);
6391 
6392 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6393 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6394 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6395 		return B_BAD_ADDRESS;
6396 
6397 	if (addressSpec == B_EXACT_ADDRESS) {
6398 		if ((addr_t)address + size < (addr_t)address
6399 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6400 			return B_BAD_VALUE;
6401 		}
6402 		if (!IS_USER_ADDRESS(address)
6403 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6404 			return B_BAD_ADDRESS;
6405 		}
6406 	}
6407 
6408 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6409 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6410 		false);
6411 	if (area < B_OK)
6412 		return area;
6413 
6414 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6415 		return B_BAD_ADDRESS;
6416 
6417 	return area;
6418 }
6419 
6420 
6421 status_t
6422 _user_unmap_memory(void* _address, size_t size)
6423 {
6424 	addr_t address = (addr_t)_address;
6425 
6426 	// check params
6427 	if (size == 0 || (addr_t)address + size < (addr_t)address
6428 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6429 		return B_BAD_VALUE;
6430 	}
6431 
6432 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6433 		return B_BAD_ADDRESS;
6434 
6435 	// Write lock the address space and ensure the address range is not wired.
6436 	AddressSpaceWriteLocker locker;
6437 	do {
6438 		status_t status = locker.SetTo(team_get_current_team_id());
6439 		if (status != B_OK)
6440 			return status;
6441 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6442 			size, &locker));
6443 
6444 	// unmap
6445 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6446 }
6447 
6448 
6449 status_t
6450 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6451 {
6452 	// check address range
6453 	addr_t address = (addr_t)_address;
6454 	size = PAGE_ALIGN(size);
6455 
6456 	if ((address % B_PAGE_SIZE) != 0)
6457 		return B_BAD_VALUE;
6458 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6459 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6460 		// weird error code required by POSIX
6461 		return ENOMEM;
6462 	}
6463 
6464 	// extend and check protection
6465 	if ((protection & ~B_USER_PROTECTION) != 0)
6466 		return B_BAD_VALUE;
6467 
6468 	fix_protection(&protection);
6469 
6470 	// We need to write lock the address space, since we're going to play with
6471 	// the areas. Also make sure that none of the areas is wired and that we're
6472 	// actually allowed to change the protection.
6473 	AddressSpaceWriteLocker locker;
6474 
6475 	bool restart;
6476 	do {
6477 		restart = false;
6478 
6479 		status_t status = locker.SetTo(team_get_current_team_id());
6480 		if (status != B_OK)
6481 			return status;
6482 
6483 		// First round: Check whether the whole range is covered by areas and we
6484 		// are allowed to modify them.
6485 		addr_t currentAddress = address;
6486 		size_t sizeLeft = size;
6487 		while (sizeLeft > 0) {
6488 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6489 			if (area == NULL)
6490 				return B_NO_MEMORY;
6491 
6492 			if ((area->protection & B_KERNEL_AREA) != 0)
6493 				return B_NOT_ALLOWED;
6494 
6495 			// TODO: For (shared) mapped files we should check whether the new
6496 			// protections are compatible with the file permissions. We don't
6497 			// have a way to do that yet, though.
6498 
6499 			addr_t offset = currentAddress - area->Base();
6500 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6501 
6502 			AreaCacheLocker cacheLocker(area);
6503 
6504 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6505 					&locker, &cacheLocker)) {
6506 				restart = true;
6507 				break;
6508 			}
6509 
6510 			cacheLocker.Unlock();
6511 
6512 			currentAddress += rangeSize;
6513 			sizeLeft -= rangeSize;
6514 		}
6515 	} while (restart);
6516 
6517 	// Second round: If the protections differ from that of the area, create a
6518 	// page protection array and re-map mapped pages.
6519 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6520 	addr_t currentAddress = address;
6521 	size_t sizeLeft = size;
6522 	while (sizeLeft > 0) {
6523 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6524 		if (area == NULL)
6525 			return B_NO_MEMORY;
6526 
6527 		addr_t offset = currentAddress - area->Base();
6528 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6529 
6530 		currentAddress += rangeSize;
6531 		sizeLeft -= rangeSize;
6532 
6533 		if (area->page_protections == NULL) {
6534 			if (area->protection == protection)
6535 				continue;
6536 
6537 			status_t status = allocate_area_page_protections(area);
6538 			if (status != B_OK)
6539 				return status;
6540 		}
6541 
6542 		// We need to lock the complete cache chain, since we potentially unmap
6543 		// pages of lower caches.
6544 		VMCache* topCache = vm_area_get_locked_cache(area);
6545 		VMCacheChainLocker cacheChainLocker(topCache);
6546 		cacheChainLocker.LockAllSourceCaches();
6547 
6548 		for (addr_t pageAddress = area->Base() + offset;
6549 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6550 			map->Lock();
6551 
6552 			set_area_page_protection(area, pageAddress, protection);
6553 
6554 			phys_addr_t physicalAddress;
6555 			uint32 flags;
6556 
6557 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6558 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6559 				map->Unlock();
6560 				continue;
6561 			}
6562 
6563 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6564 			if (page == NULL) {
6565 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6566 					"\n", area, physicalAddress);
6567 				map->Unlock();
6568 				return B_ERROR;
6569 			}
6570 
6571 			// If the page is not in the topmost cache and write access is
6572 			// requested, we have to unmap it. Otherwise we can re-map it with
6573 			// the new protection.
6574 			bool unmapPage = page->Cache() != topCache
6575 				&& (protection & B_WRITE_AREA) != 0;
6576 
6577 			if (!unmapPage)
6578 				map->ProtectPage(area, pageAddress, protection);
6579 
6580 			map->Unlock();
6581 
6582 			if (unmapPage) {
6583 				DEBUG_PAGE_ACCESS_START(page);
6584 				unmap_page(area, pageAddress);
6585 				DEBUG_PAGE_ACCESS_END(page);
6586 			}
6587 		}
6588 	}
6589 
6590 	return B_OK;
6591 }
6592 
6593 
6594 status_t
6595 _user_sync_memory(void* _address, size_t size, uint32 flags)
6596 {
6597 	addr_t address = (addr_t)_address;
6598 	size = PAGE_ALIGN(size);
6599 
6600 	// check params
6601 	if ((address % B_PAGE_SIZE) != 0)
6602 		return B_BAD_VALUE;
6603 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6604 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6605 		// weird error code required by POSIX
6606 		return ENOMEM;
6607 	}
6608 
6609 	bool writeSync = (flags & MS_SYNC) != 0;
6610 	bool writeAsync = (flags & MS_ASYNC) != 0;
6611 	if (writeSync && writeAsync)
6612 		return B_BAD_VALUE;
6613 
6614 	if (size == 0 || (!writeSync && !writeAsync))
6615 		return B_OK;
6616 
6617 	// iterate through the range and sync all concerned areas
6618 	while (size > 0) {
6619 		// read lock the address space
6620 		AddressSpaceReadLocker locker;
6621 		status_t error = locker.SetTo(team_get_current_team_id());
6622 		if (error != B_OK)
6623 			return error;
6624 
6625 		// get the first area
6626 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6627 		if (area == NULL)
6628 			return B_NO_MEMORY;
6629 
6630 		uint32 offset = address - area->Base();
6631 		size_t rangeSize = min_c(area->Size() - offset, size);
6632 		offset += area->cache_offset;
6633 
6634 		// lock the cache
6635 		AreaCacheLocker cacheLocker(area);
6636 		if (!cacheLocker)
6637 			return B_BAD_VALUE;
6638 		VMCache* cache = area->cache;
6639 
6640 		locker.Unlock();
6641 
6642 		uint32 firstPage = offset >> PAGE_SHIFT;
6643 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6644 
6645 		// write the pages
6646 		if (cache->type == CACHE_TYPE_VNODE) {
6647 			if (writeSync) {
6648 				// synchronous
6649 				error = vm_page_write_modified_page_range(cache, firstPage,
6650 					endPage);
6651 				if (error != B_OK)
6652 					return error;
6653 			} else {
6654 				// asynchronous
6655 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6656 				// TODO: This is probably not quite what is supposed to happen.
6657 				// Especially when a lot has to be written, it might take ages
6658 				// until it really hits the disk.
6659 			}
6660 		}
6661 
6662 		address += rangeSize;
6663 		size -= rangeSize;
6664 	}
6665 
6666 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6667 	// synchronize multiple mappings of the same file. In our VM they never get
6668 	// out of sync, though, so we don't have to do anything.
6669 
6670 	return B_OK;
6671 }
6672 
6673 
6674 status_t
6675 _user_memory_advice(void* address, size_t size, uint32 advice)
6676 {
6677 	// TODO: Implement!
6678 	return B_OK;
6679 }
6680 
6681 
6682 status_t
6683 _user_get_memory_properties(team_id teamID, const void* address,
6684 	uint32* _protected, uint32* _lock)
6685 {
6686 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6687 		return B_BAD_ADDRESS;
6688 
6689 	AddressSpaceReadLocker locker;
6690 	status_t error = locker.SetTo(teamID);
6691 	if (error != B_OK)
6692 		return error;
6693 
6694 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6695 	if (area == NULL)
6696 		return B_NO_MEMORY;
6697 
6698 
6699 	uint32 protection = area->protection;
6700 	if (area->page_protections != NULL)
6701 		protection = get_area_page_protection(area, (addr_t)address);
6702 
6703 	uint32 wiring = area->wiring;
6704 
6705 	locker.Unlock();
6706 
6707 	error = user_memcpy(_protected, &protection, sizeof(protection));
6708 	if (error != B_OK)
6709 		return error;
6710 
6711 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6712 
6713 	return error;
6714 }
6715 
6716 
6717 // #pragma mark -- compatibility
6718 
6719 
6720 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6721 
6722 
6723 struct physical_entry_beos {
6724 	uint32	address;
6725 	uint32	size;
6726 };
6727 
6728 
6729 /*!	The physical_entry structure has changed. We need to translate it to the
6730 	old one.
6731 */
6732 extern "C" int32
6733 __get_memory_map_beos(const void* _address, size_t numBytes,
6734 	physical_entry_beos* table, int32 numEntries)
6735 {
6736 	if (numEntries <= 0)
6737 		return B_BAD_VALUE;
6738 
6739 	const uint8* address = (const uint8*)_address;
6740 
6741 	int32 count = 0;
6742 	while (numBytes > 0 && count < numEntries) {
6743 		physical_entry entry;
6744 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6745 		if (result < 0) {
6746 			if (result != B_BUFFER_OVERFLOW)
6747 				return result;
6748 		}
6749 
6750 		if (entry.address >= (phys_addr_t)1 << 32) {
6751 			panic("get_memory_map(): Address is greater 4 GB!");
6752 			return B_ERROR;
6753 		}
6754 
6755 		table[count].address = entry.address;
6756 		table[count++].size = entry.size;
6757 
6758 		address += entry.size;
6759 		numBytes -= entry.size;
6760 	}
6761 
6762 	// null-terminate the table, if possible
6763 	if (count < numEntries) {
6764 		table[count].address = 0;
6765 		table[count].size = 0;
6766 	}
6767 
6768 	return B_OK;
6769 }
6770 
6771 
6772 /*!	The type of the \a physicalAddress parameter has changed from void* to
6773 	phys_addr_t.
6774 */
6775 extern "C" area_id
6776 __map_physical_memory_beos(const char* name, void* physicalAddress,
6777 	size_t numBytes, uint32 addressSpec, uint32 protection,
6778 	void** _virtualAddress)
6779 {
6780 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6781 		addressSpec, protection, _virtualAddress);
6782 }
6783 
6784 
6785 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6786 	we meddle with the \a lock parameter to force 32 bit.
6787 */
6788 extern "C" area_id
6789 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6790 	size_t size, uint32 lock, uint32 protection)
6791 {
6792 	switch (lock) {
6793 		case B_NO_LOCK:
6794 			break;
6795 		case B_FULL_LOCK:
6796 		case B_LAZY_LOCK:
6797 			lock = B_32_BIT_FULL_LOCK;
6798 			break;
6799 		case B_CONTIGUOUS:
6800 			lock = B_32_BIT_CONTIGUOUS;
6801 			break;
6802 	}
6803 
6804 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6805 		protection);
6806 }
6807 
6808 
6809 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6810 	"BASE");
6811 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6812 	"map_physical_memory@", "BASE");
6813 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6814 	"BASE");
6815 
6816 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6817 	"get_memory_map@@", "1_ALPHA3");
6818 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6819 	"map_physical_memory@@", "1_ALPHA3");
6820 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6821 	"1_ALPHA3");
6822 
6823 
6824 #else
6825 
6826 
6827 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6828 	"get_memory_map@@", "BASE");
6829 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6830 	"map_physical_memory@@", "BASE");
6831 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6832 	"BASE");
6833 
6834 
6835 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6836