xref: /haiku/src/system/kernel/vm/vm.cpp (revision 4bd6250035acae76540b58c380555236a8dfd4e0)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// If no one else uses the area's cache and it's an anonymous cache, we can
648 	// resize or split it, too.
649 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
650 		&& cache->consumers.IsEmpty() && cache->type == CACHE_TYPE_RAM;
651 
652 	// Cut the end only?
653 	if (areaLast <= lastAddress) {
654 		size_t oldSize = area->Size();
655 		size_t newSize = address - area->Base();
656 
657 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
658 			allocationFlags);
659 		if (error != B_OK)
660 			return error;
661 
662 		// unmap pages
663 		unmap_pages(area, address, oldSize - newSize);
664 
665 		if (onlyCacheUser) {
666 			// Since VMCache::Resize() can temporarily drop the lock, we must
667 			// unlock all lower caches to prevent locking order inversion.
668 			cacheChainLocker.Unlock(cache);
669 			cache->Resize(cache->virtual_base + newSize, priority);
670 			cache->ReleaseRefAndUnlock();
671 		}
672 
673 		return B_OK;
674 	}
675 
676 	// Cut the beginning only?
677 	if (area->Base() >= address) {
678 		addr_t oldBase = area->Base();
679 		addr_t newBase = lastAddress + 1;
680 		size_t newSize = areaLast - lastAddress;
681 		size_t newOffset = newBase - oldBase;
682 
683 		// unmap pages
684 		unmap_pages(area, oldBase, newOffset);
685 
686 		// resize the area
687 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
688 			allocationFlags);
689 		if (error != B_OK)
690 			return error;
691 
692 		if (onlyCacheUser) {
693 			// Since VMCache::Rebase() can temporarily drop the lock, we must
694 			// unlock all lower caches to prevent locking order inversion.
695 			cacheChainLocker.Unlock(cache);
696 			cache->Rebase(cache->virtual_base + newOffset, priority);
697 			cache->ReleaseRefAndUnlock();
698 		}
699 		area->cache_offset += newOffset;
700 
701 		return B_OK;
702 	}
703 
704 	// The tough part -- cut a piece out of the middle of the area.
705 	// We do that by shrinking the area to the begin section and creating a
706 	// new area for the end section.
707 	addr_t firstNewSize = address - area->Base();
708 	addr_t secondBase = lastAddress + 1;
709 	addr_t secondSize = areaLast - lastAddress;
710 
711 	// unmap pages
712 	unmap_pages(area, address, area->Size() - firstNewSize);
713 
714 	// resize the area
715 	addr_t oldSize = area->Size();
716 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
717 		allocationFlags);
718 	if (error != B_OK)
719 		return error;
720 
721 	virtual_address_restrictions addressRestrictions = {};
722 	addressRestrictions.address = (void*)secondBase;
723 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
724 	VMArea* secondArea;
725 
726 	if (onlyCacheUser) {
727 		// Create a new cache for the second area.
728 		VMCache* secondCache;
729 		error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0,
730 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
731 		if (error != B_OK) {
732 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
733 			return error;
734 		}
735 
736 		secondCache->Lock();
737 		secondCache->temporary = cache->temporary;
738 		secondCache->virtual_base = area->cache_offset;
739 		secondCache->virtual_end = area->cache_offset + secondSize;
740 
741 		// Transfer the concerned pages from the first cache.
742 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
743 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
744 			area->cache_offset);
745 
746 		if (error == B_OK) {
747 			// Since VMCache::Resize() can temporarily drop the lock, we must
748 			// unlock all lower caches to prevent locking order inversion.
749 			cacheChainLocker.Unlock(cache);
750 			cache->Resize(cache->virtual_base + firstNewSize, priority);
751 			// Don't unlock the cache yet because we might have to resize it
752 			// back.
753 
754 			// Map the second area.
755 			error = map_backing_store(addressSpace, secondCache,
756 				area->cache_offset, area->name, secondSize, area->wiring,
757 				area->protection, REGION_NO_PRIVATE_MAP, 0,
758 				&addressRestrictions, kernel, &secondArea, NULL);
759 		}
760 
761 		if (error != B_OK) {
762 			// Restore the original cache.
763 			cache->Resize(cache->virtual_base + oldSize, priority);
764 
765 			// Move the pages back.
766 			status_t readoptStatus = cache->Adopt(secondCache,
767 				area->cache_offset, secondSize, adoptOffset);
768 			if (readoptStatus != B_OK) {
769 				// Some (swap) pages have not been moved back and will be lost
770 				// once the second cache is deleted.
771 				panic("failed to restore cache range: %s",
772 					strerror(readoptStatus));
773 
774 				// TODO: Handle out of memory cases by freeing memory and
775 				// retrying.
776 			}
777 
778 			cache->ReleaseRefAndUnlock();
779 			secondCache->ReleaseRefAndUnlock();
780 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
781 			return error;
782 		}
783 
784 		// Now we can unlock it.
785 		cache->ReleaseRefAndUnlock();
786 		secondCache->Unlock();
787 	} else {
788 		error = map_backing_store(addressSpace, cache, area->cache_offset
789 			+ (secondBase - area->Base()),
790 			area->name, secondSize, area->wiring, area->protection,
791 			REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, kernel, &secondArea,
792 			NULL);
793 		if (error != B_OK) {
794 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
795 			return error;
796 		}
797 		// We need a cache reference for the new area.
798 		cache->AcquireRefLocked();
799 	}
800 
801 	if (_secondArea != NULL)
802 		*_secondArea = secondArea;
803 
804 	return B_OK;
805 }
806 
807 
808 /*!	Deletes all areas in the given address range.
809 	The address space must be write-locked.
810 	The caller must ensure that no part of the given range is wired.
811 */
812 static status_t
813 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
814 	bool kernel)
815 {
816 	size = PAGE_ALIGN(size);
817 	addr_t lastAddress = address + (size - 1);
818 
819 	// Check, whether the caller is allowed to modify the concerned areas.
820 	if (!kernel) {
821 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
822 				VMArea* area = it.Next();) {
823 			addr_t areaLast = area->Base() + (area->Size() - 1);
824 			if (area->Base() < lastAddress && address < areaLast) {
825 				if (area->address_space == VMAddressSpace::Kernel()) {
826 					dprintf("unmap_address_range: team %" B_PRId32 " tried to "
827 						"unmap range of kernel area %" B_PRId32 " (%s)\n",
828 						team_get_current_team_id(), area->id, area->name);
829 					return B_NOT_ALLOWED;
830 				}
831 			}
832 		}
833 	}
834 
835 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
836 			VMArea* area = it.Next();) {
837 		addr_t areaLast = area->Base() + (area->Size() - 1);
838 		if (area->Base() < lastAddress && address < areaLast) {
839 			status_t error = cut_area(addressSpace, area, address,
840 				lastAddress, NULL, kernel);
841 			if (error != B_OK)
842 				return error;
843 				// Failing after already messing with areas is ugly, but we
844 				// can't do anything about it.
845 		}
846 	}
847 
848 	return B_OK;
849 }
850 
851 
852 /*! You need to hold the lock of the cache and the write lock of the address
853 	space when calling this function.
854 	Note, that in case of error your cache will be temporarily unlocked.
855 	If \a addressSpec is \c B_EXACT_ADDRESS and the
856 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
857 	that no part of the specified address range (base \c *_virtualAddress, size
858 	\a size) is wired.
859 */
860 static status_t
861 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
862 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
863 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
864 	bool kernel, VMArea** _area, void** _virtualAddress)
865 {
866 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
867 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
868 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
869 		addressRestrictions->address, offset, size,
870 		addressRestrictions->address_specification, wiring, protection,
871 		_area, areaName));
872 	cache->AssertLocked();
873 
874 	if (size == 0) {
875 #if KDEBUG
876 		panic("map_backing_store(): called with size=0 for area '%s'!",
877 			areaName);
878 #endif
879 		return B_BAD_VALUE;
880 	}
881 
882 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
883 		| HEAP_DONT_LOCK_KERNEL_SPACE;
884 	int priority;
885 	if (addressSpace != VMAddressSpace::Kernel()) {
886 		priority = VM_PRIORITY_USER;
887 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
888 		priority = VM_PRIORITY_VIP;
889 		allocationFlags |= HEAP_PRIORITY_VIP;
890 	} else
891 		priority = VM_PRIORITY_SYSTEM;
892 
893 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
894 		allocationFlags);
895 	if (area == NULL)
896 		return B_NO_MEMORY;
897 
898 	status_t status;
899 
900 	// if this is a private map, we need to create a new cache
901 	// to handle the private copies of pages as they are written to
902 	VMCache* sourceCache = cache;
903 	if (mapping == REGION_PRIVATE_MAP) {
904 		VMCache* newCache;
905 
906 		// create an anonymous cache
907 		status = VMCacheFactory::CreateAnonymousCache(newCache,
908 			(protection & B_STACK_AREA) != 0
909 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
910 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
911 		if (status != B_OK)
912 			goto err1;
913 
914 		newCache->Lock();
915 		newCache->temporary = 1;
916 		newCache->virtual_base = offset;
917 		newCache->virtual_end = offset + size;
918 
919 		cache->AddConsumer(newCache);
920 
921 		cache = newCache;
922 	}
923 
924 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
925 		status = cache->SetMinimalCommitment(size, priority);
926 		if (status != B_OK)
927 			goto err2;
928 	}
929 
930 	// check to see if this address space has entered DELETE state
931 	if (addressSpace->IsBeingDeleted()) {
932 		// okay, someone is trying to delete this address space now, so we can't
933 		// insert the area, so back out
934 		status = B_BAD_TEAM_ID;
935 		goto err2;
936 	}
937 
938 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
939 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
940 		status = unmap_address_range(addressSpace,
941 			(addr_t)addressRestrictions->address, size, kernel);
942 		if (status != B_OK)
943 			goto err2;
944 	}
945 
946 	status = addressSpace->InsertArea(area, size, addressRestrictions,
947 		allocationFlags, _virtualAddress);
948 	if (status == B_NO_MEMORY
949 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
950 		// TODO: At present, there is no way to notify the low_resource monitor
951 		// that kernel addresss space is fragmented, nor does it check for this
952 		// automatically. Due to how many locks are held, we cannot wait here
953 		// for space to be freed up, but it would be good to at least notify
954 		// that we tried and failed to allocate some amount.
955 	}
956 	if (status != B_OK)
957 		goto err2;
958 
959 	// attach the cache to the area
960 	area->cache = cache;
961 	area->cache_offset = offset;
962 
963 	// point the cache back to the area
964 	cache->InsertAreaLocked(area);
965 	if (mapping == REGION_PRIVATE_MAP)
966 		cache->Unlock();
967 
968 	// insert the area in the global area hash table
969 	VMAreaHash::Insert(area);
970 
971 	// grab a ref to the address space (the area holds this)
972 	addressSpace->Get();
973 
974 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
975 //		cache, sourceCache, areaName, area);
976 
977 	*_area = area;
978 	return B_OK;
979 
980 err2:
981 	if (mapping == REGION_PRIVATE_MAP) {
982 		// We created this cache, so we must delete it again. Note, that we
983 		// need to temporarily unlock the source cache or we'll otherwise
984 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
985 		sourceCache->Unlock();
986 		cache->ReleaseRefAndUnlock();
987 		sourceCache->Lock();
988 	}
989 err1:
990 	addressSpace->DeleteArea(area, allocationFlags);
991 	return status;
992 }
993 
994 
995 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
996 	  locker1, locker2).
997 */
998 template<typename LockerType1, typename LockerType2>
999 static inline bool
1000 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1001 {
1002 	area->cache->AssertLocked();
1003 
1004 	VMAreaUnwiredWaiter waiter;
1005 	if (!area->AddWaiterIfWired(&waiter))
1006 		return false;
1007 
1008 	// unlock everything and wait
1009 	if (locker1 != NULL)
1010 		locker1->Unlock();
1011 	if (locker2 != NULL)
1012 		locker2->Unlock();
1013 
1014 	waiter.waitEntry.Wait();
1015 
1016 	return true;
1017 }
1018 
1019 
1020 /*!	Checks whether the given area has any wired ranges intersecting with the
1021 	specified range and waits, if so.
1022 
1023 	When it has to wait, the function calls \c Unlock() on both \a locker1
1024 	and \a locker2, if given.
1025 	The area's top cache must be locked and must be unlocked as a side effect
1026 	of calling \c Unlock() on either \a locker1 or \a locker2.
1027 
1028 	If the function does not have to wait it does not modify or unlock any
1029 	object.
1030 
1031 	\param area The area to be checked.
1032 	\param base The base address of the range to check.
1033 	\param size The size of the address range to check.
1034 	\param locker1 An object to be unlocked when before starting to wait (may
1035 		be \c NULL).
1036 	\param locker2 An object to be unlocked when before starting to wait (may
1037 		be \c NULL).
1038 	\return \c true, if the function had to wait, \c false otherwise.
1039 */
1040 template<typename LockerType1, typename LockerType2>
1041 static inline bool
1042 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1043 	LockerType1* locker1, LockerType2* locker2)
1044 {
1045 	area->cache->AssertLocked();
1046 
1047 	VMAreaUnwiredWaiter waiter;
1048 	if (!area->AddWaiterIfWired(&waiter, base, size))
1049 		return false;
1050 
1051 	// unlock everything and wait
1052 	if (locker1 != NULL)
1053 		locker1->Unlock();
1054 	if (locker2 != NULL)
1055 		locker2->Unlock();
1056 
1057 	waiter.waitEntry.Wait();
1058 
1059 	return true;
1060 }
1061 
1062 
1063 /*!	Checks whether the given address space has any wired ranges intersecting
1064 	with the specified range and waits, if so.
1065 
1066 	Similar to wait_if_area_range_is_wired(), with the following differences:
1067 	- All areas intersecting with the range are checked (respectively all until
1068 	  one is found that contains a wired range intersecting with the given
1069 	  range).
1070 	- The given address space must at least be read-locked and must be unlocked
1071 	  when \c Unlock() is called on \a locker.
1072 	- None of the areas' caches are allowed to be locked.
1073 */
1074 template<typename LockerType>
1075 static inline bool
1076 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1077 	size_t size, LockerType* locker)
1078 {
1079 	addr_t end = base + size - 1;
1080 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1081 			VMArea* area = it.Next();) {
1082 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1083 		if (area->Base() > end)
1084 			return false;
1085 
1086 		if (base >= area->Base() + area->Size() - 1)
1087 			continue;
1088 
1089 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1090 
1091 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1092 			return true;
1093 	}
1094 
1095 	return false;
1096 }
1097 
1098 
1099 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1100 	It must be called in a situation where the kernel address space may be
1101 	locked.
1102 */
1103 status_t
1104 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1105 {
1106 	AddressSpaceReadLocker locker;
1107 	VMArea* area;
1108 	status_t status = locker.SetFromArea(id, area);
1109 	if (status != B_OK)
1110 		return status;
1111 
1112 	if (area->page_protections == NULL) {
1113 		status = allocate_area_page_protections(area);
1114 		if (status != B_OK)
1115 			return status;
1116 	}
1117 
1118 	*cookie = (void*)area;
1119 	return B_OK;
1120 }
1121 
1122 
1123 /*!	This is a debug helper function that can only be used with very specific
1124 	use cases.
1125 	Sets protection for the given address range to the protection specified.
1126 	If \a protection is 0 then the involved pages will be marked non-present
1127 	in the translation map to cause a fault on access. The pages aren't
1128 	actually unmapped however so that they can be marked present again with
1129 	additional calls to this function. For this to work the area must be
1130 	fully locked in memory so that the pages aren't otherwise touched.
1131 	This function does not lock the kernel address space and needs to be
1132 	supplied with a \a cookie retrieved from a successful call to
1133 	vm_prepare_kernel_area_debug_protection().
1134 */
1135 status_t
1136 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1137 	uint32 protection)
1138 {
1139 	// check address range
1140 	addr_t address = (addr_t)_address;
1141 	size = PAGE_ALIGN(size);
1142 
1143 	if ((address % B_PAGE_SIZE) != 0
1144 		|| (addr_t)address + size < (addr_t)address
1145 		|| !IS_KERNEL_ADDRESS(address)
1146 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1147 		return B_BAD_VALUE;
1148 	}
1149 
1150 	// Translate the kernel protection to user protection as we only store that.
1151 	if ((protection & B_KERNEL_READ_AREA) != 0)
1152 		protection |= B_READ_AREA;
1153 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1154 		protection |= B_WRITE_AREA;
1155 
1156 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1157 	VMTranslationMap* map = addressSpace->TranslationMap();
1158 	VMArea* area = (VMArea*)cookie;
1159 
1160 	addr_t offset = address - area->Base();
1161 	if (area->Size() - offset < size) {
1162 		panic("protect range not fully within supplied area");
1163 		return B_BAD_VALUE;
1164 	}
1165 
1166 	if (area->page_protections == NULL) {
1167 		panic("area has no page protections");
1168 		return B_BAD_VALUE;
1169 	}
1170 
1171 	// Invalidate the mapping entries so any access to them will fault or
1172 	// restore the mapping entries unchanged so that lookup will success again.
1173 	map->Lock();
1174 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1175 	map->Unlock();
1176 
1177 	// And set the proper page protections so that the fault case will actually
1178 	// fail and not simply try to map a new page.
1179 	for (addr_t pageAddress = address; pageAddress < address + size;
1180 			pageAddress += B_PAGE_SIZE) {
1181 		set_area_page_protection(area, pageAddress, protection);
1182 	}
1183 
1184 	return B_OK;
1185 }
1186 
1187 
1188 status_t
1189 vm_block_address_range(const char* name, void* address, addr_t size)
1190 {
1191 	if (!arch_vm_supports_protection(0))
1192 		return B_NOT_SUPPORTED;
1193 
1194 	AddressSpaceWriteLocker locker;
1195 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1196 	if (status != B_OK)
1197 		return status;
1198 
1199 	VMAddressSpace* addressSpace = locker.AddressSpace();
1200 
1201 	// create an anonymous cache
1202 	VMCache* cache;
1203 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1204 		VM_PRIORITY_SYSTEM);
1205 	if (status != B_OK)
1206 		return status;
1207 
1208 	cache->temporary = 1;
1209 	cache->virtual_end = size;
1210 	cache->Lock();
1211 
1212 	VMArea* area;
1213 	virtual_address_restrictions addressRestrictions = {};
1214 	addressRestrictions.address = address;
1215 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1216 	status = map_backing_store(addressSpace, cache, 0, name, size,
1217 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1218 		true, &area, NULL);
1219 	if (status != B_OK) {
1220 		cache->ReleaseRefAndUnlock();
1221 		return status;
1222 	}
1223 
1224 	cache->Unlock();
1225 	area->cache_type = CACHE_TYPE_RAM;
1226 	return area->id;
1227 }
1228 
1229 
1230 status_t
1231 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1232 {
1233 	AddressSpaceWriteLocker locker(team);
1234 	if (!locker.IsLocked())
1235 		return B_BAD_TEAM_ID;
1236 
1237 	VMAddressSpace* addressSpace = locker.AddressSpace();
1238 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1239 		addressSpace == VMAddressSpace::Kernel()
1240 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1241 }
1242 
1243 
1244 status_t
1245 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1246 	addr_t size, uint32 flags)
1247 {
1248 	if (size == 0)
1249 		return B_BAD_VALUE;
1250 
1251 	AddressSpaceWriteLocker locker(team);
1252 	if (!locker.IsLocked())
1253 		return B_BAD_TEAM_ID;
1254 
1255 	virtual_address_restrictions addressRestrictions = {};
1256 	addressRestrictions.address = *_address;
1257 	addressRestrictions.address_specification = addressSpec;
1258 	VMAddressSpace* addressSpace = locker.AddressSpace();
1259 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1260 		addressSpace == VMAddressSpace::Kernel()
1261 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1262 		_address);
1263 }
1264 
1265 
1266 area_id
1267 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1268 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1269 	const virtual_address_restrictions* virtualAddressRestrictions,
1270 	const physical_address_restrictions* physicalAddressRestrictions,
1271 	bool kernel, void** _address)
1272 {
1273 	VMArea* area;
1274 	VMCache* cache;
1275 	vm_page* page = NULL;
1276 	bool isStack = (protection & B_STACK_AREA) != 0;
1277 	page_num_t guardPages;
1278 	bool canOvercommit = false;
1279 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1280 		? VM_PAGE_ALLOC_CLEAR : 0;
1281 
1282 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1283 		team, name, size));
1284 
1285 	size = PAGE_ALIGN(size);
1286 	guardSize = PAGE_ALIGN(guardSize);
1287 	guardPages = guardSize / B_PAGE_SIZE;
1288 
1289 	if (size == 0 || size < guardSize)
1290 		return B_BAD_VALUE;
1291 	if (!arch_vm_supports_protection(protection))
1292 		return B_NOT_SUPPORTED;
1293 
1294 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1295 		canOvercommit = true;
1296 
1297 #ifdef DEBUG_KERNEL_STACKS
1298 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1299 		isStack = true;
1300 #endif
1301 
1302 	// check parameters
1303 	switch (virtualAddressRestrictions->address_specification) {
1304 		case B_ANY_ADDRESS:
1305 		case B_EXACT_ADDRESS:
1306 		case B_BASE_ADDRESS:
1307 		case B_ANY_KERNEL_ADDRESS:
1308 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1309 		case B_RANDOMIZED_ANY_ADDRESS:
1310 		case B_RANDOMIZED_BASE_ADDRESS:
1311 			break;
1312 
1313 		default:
1314 			return B_BAD_VALUE;
1315 	}
1316 
1317 	// If low or high physical address restrictions are given, we force
1318 	// B_CONTIGUOUS wiring, since only then we'll use
1319 	// vm_page_allocate_page_run() which deals with those restrictions.
1320 	if (physicalAddressRestrictions->low_address != 0
1321 		|| physicalAddressRestrictions->high_address != 0) {
1322 		wiring = B_CONTIGUOUS;
1323 	}
1324 
1325 	physical_address_restrictions stackPhysicalRestrictions;
1326 	bool doReserveMemory = false;
1327 	switch (wiring) {
1328 		case B_NO_LOCK:
1329 			break;
1330 		case B_FULL_LOCK:
1331 		case B_LAZY_LOCK:
1332 		case B_CONTIGUOUS:
1333 			doReserveMemory = true;
1334 			break;
1335 		case B_ALREADY_WIRED:
1336 			break;
1337 		case B_LOMEM:
1338 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1339 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1340 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1341 			wiring = B_CONTIGUOUS;
1342 			doReserveMemory = true;
1343 			break;
1344 		case B_32_BIT_FULL_LOCK:
1345 			if (B_HAIKU_PHYSICAL_BITS <= 32
1346 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1347 				wiring = B_FULL_LOCK;
1348 				doReserveMemory = true;
1349 				break;
1350 			}
1351 			// TODO: We don't really support this mode efficiently. Just fall
1352 			// through for now ...
1353 		case B_32_BIT_CONTIGUOUS:
1354 			#if B_HAIKU_PHYSICAL_BITS > 32
1355 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1356 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1357 					stackPhysicalRestrictions.high_address
1358 						= (phys_addr_t)1 << 32;
1359 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1360 				}
1361 			#endif
1362 			wiring = B_CONTIGUOUS;
1363 			doReserveMemory = true;
1364 			break;
1365 		default:
1366 			return B_BAD_VALUE;
1367 	}
1368 
1369 	// Optimization: For a single-page contiguous allocation without low/high
1370 	// memory restriction B_FULL_LOCK wiring suffices.
1371 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1372 		&& physicalAddressRestrictions->low_address == 0
1373 		&& physicalAddressRestrictions->high_address == 0) {
1374 		wiring = B_FULL_LOCK;
1375 	}
1376 
1377 	// For full lock or contiguous areas we're also going to map the pages and
1378 	// thus need to reserve pages for the mapping backend upfront.
1379 	addr_t reservedMapPages = 0;
1380 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1381 		AddressSpaceWriteLocker locker;
1382 		status_t status = locker.SetTo(team);
1383 		if (status != B_OK)
1384 			return status;
1385 
1386 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1387 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1388 	}
1389 
1390 	int priority;
1391 	if (team != VMAddressSpace::KernelID())
1392 		priority = VM_PRIORITY_USER;
1393 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1394 		priority = VM_PRIORITY_VIP;
1395 	else
1396 		priority = VM_PRIORITY_SYSTEM;
1397 
1398 	// Reserve memory before acquiring the address space lock. This reduces the
1399 	// chances of failure, since while holding the write lock to the address
1400 	// space (if it is the kernel address space that is), the low memory handler
1401 	// won't be able to free anything for us.
1402 	addr_t reservedMemory = 0;
1403 	if (doReserveMemory) {
1404 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1405 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1406 			return B_NO_MEMORY;
1407 		reservedMemory = size;
1408 		// TODO: We don't reserve the memory for the pages for the page
1409 		// directories/tables. We actually need to do since we currently don't
1410 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1411 		// there are actually less physical pages than there should be, which
1412 		// can get the VM into trouble in low memory situations.
1413 	}
1414 
1415 	AddressSpaceWriteLocker locker;
1416 	VMAddressSpace* addressSpace;
1417 	status_t status;
1418 
1419 	// For full lock areas reserve the pages before locking the address
1420 	// space. E.g. block caches can't release their memory while we hold the
1421 	// address space lock.
1422 	page_num_t reservedPages = reservedMapPages;
1423 	if (wiring == B_FULL_LOCK)
1424 		reservedPages += size / B_PAGE_SIZE;
1425 
1426 	vm_page_reservation reservation;
1427 	if (reservedPages > 0) {
1428 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1429 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1430 					priority)) {
1431 				reservedPages = 0;
1432 				status = B_WOULD_BLOCK;
1433 				goto err0;
1434 			}
1435 		} else
1436 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1437 	}
1438 
1439 	if (wiring == B_CONTIGUOUS) {
1440 		// we try to allocate the page run here upfront as this may easily
1441 		// fail for obvious reasons
1442 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1443 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1444 		if (page == NULL) {
1445 			status = B_NO_MEMORY;
1446 			goto err0;
1447 		}
1448 	}
1449 
1450 	// Lock the address space and, if B_EXACT_ADDRESS and
1451 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1452 	// is not wired.
1453 	do {
1454 		status = locker.SetTo(team);
1455 		if (status != B_OK)
1456 			goto err1;
1457 
1458 		addressSpace = locker.AddressSpace();
1459 	} while (virtualAddressRestrictions->address_specification
1460 			== B_EXACT_ADDRESS
1461 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1462 		&& wait_if_address_range_is_wired(addressSpace,
1463 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1464 
1465 	// create an anonymous cache
1466 	// if it's a stack, make sure that two pages are available at least
1467 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1468 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1469 		wiring == B_NO_LOCK, priority);
1470 	if (status != B_OK)
1471 		goto err1;
1472 
1473 	cache->temporary = 1;
1474 	cache->virtual_end = size;
1475 	cache->committed_size = reservedMemory;
1476 		// TODO: This should be done via a method.
1477 	reservedMemory = 0;
1478 
1479 	cache->Lock();
1480 
1481 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1482 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1483 		kernel, &area, _address);
1484 
1485 	if (status != B_OK) {
1486 		cache->ReleaseRefAndUnlock();
1487 		goto err1;
1488 	}
1489 
1490 	locker.DegradeToReadLock();
1491 
1492 	switch (wiring) {
1493 		case B_NO_LOCK:
1494 		case B_LAZY_LOCK:
1495 			// do nothing - the pages are mapped in as needed
1496 			break;
1497 
1498 		case B_FULL_LOCK:
1499 		{
1500 			// Allocate and map all pages for this area
1501 
1502 			off_t offset = 0;
1503 			for (addr_t address = area->Base();
1504 					address < area->Base() + (area->Size() - 1);
1505 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1506 #ifdef DEBUG_KERNEL_STACKS
1507 #	ifdef STACK_GROWS_DOWNWARDS
1508 				if (isStack && address < area->Base()
1509 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1510 #	else
1511 				if (isStack && address >= area->Base() + area->Size()
1512 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1513 #	endif
1514 					continue;
1515 #endif
1516 				vm_page* page = vm_page_allocate_page(&reservation,
1517 					PAGE_STATE_WIRED | pageAllocFlags);
1518 				cache->InsertPage(page, offset);
1519 				map_page(area, page, address, protection, &reservation);
1520 
1521 				DEBUG_PAGE_ACCESS_END(page);
1522 			}
1523 
1524 			break;
1525 		}
1526 
1527 		case B_ALREADY_WIRED:
1528 		{
1529 			// The pages should already be mapped. This is only really useful
1530 			// during boot time. Find the appropriate vm_page objects and stick
1531 			// them in the cache object.
1532 			VMTranslationMap* map = addressSpace->TranslationMap();
1533 			off_t offset = 0;
1534 
1535 			if (!gKernelStartup)
1536 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1537 
1538 			map->Lock();
1539 
1540 			for (addr_t virtualAddress = area->Base();
1541 					virtualAddress < area->Base() + (area->Size() - 1);
1542 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1543 				phys_addr_t physicalAddress;
1544 				uint32 flags;
1545 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1546 				if (status < B_OK) {
1547 					panic("looking up mapping failed for va 0x%lx\n",
1548 						virtualAddress);
1549 				}
1550 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1551 				if (page == NULL) {
1552 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1553 						"\n", physicalAddress);
1554 				}
1555 
1556 				DEBUG_PAGE_ACCESS_START(page);
1557 
1558 				cache->InsertPage(page, offset);
1559 				increment_page_wired_count(page);
1560 				vm_page_set_state(page, PAGE_STATE_WIRED);
1561 				page->busy = false;
1562 
1563 				DEBUG_PAGE_ACCESS_END(page);
1564 			}
1565 
1566 			map->Unlock();
1567 			break;
1568 		}
1569 
1570 		case B_CONTIGUOUS:
1571 		{
1572 			// We have already allocated our continuous pages run, so we can now
1573 			// just map them in the address space
1574 			VMTranslationMap* map = addressSpace->TranslationMap();
1575 			phys_addr_t physicalAddress
1576 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1577 			addr_t virtualAddress = area->Base();
1578 			off_t offset = 0;
1579 
1580 			map->Lock();
1581 
1582 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1583 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1584 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1585 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1586 				if (page == NULL)
1587 					panic("couldn't lookup physical page just allocated\n");
1588 
1589 				status = map->Map(virtualAddress, physicalAddress, protection,
1590 					area->MemoryType(), &reservation);
1591 				if (status < B_OK)
1592 					panic("couldn't map physical page in page run\n");
1593 
1594 				cache->InsertPage(page, offset);
1595 				increment_page_wired_count(page);
1596 
1597 				DEBUG_PAGE_ACCESS_END(page);
1598 			}
1599 
1600 			map->Unlock();
1601 			break;
1602 		}
1603 
1604 		default:
1605 			break;
1606 	}
1607 
1608 	cache->Unlock();
1609 
1610 	if (reservedPages > 0)
1611 		vm_page_unreserve_pages(&reservation);
1612 
1613 	TRACE(("vm_create_anonymous_area: done\n"));
1614 
1615 	area->cache_type = CACHE_TYPE_RAM;
1616 	return area->id;
1617 
1618 err1:
1619 	if (wiring == B_CONTIGUOUS) {
1620 		// we had reserved the area space upfront...
1621 		phys_addr_t pageNumber = page->physical_page_number;
1622 		int32 i;
1623 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1624 			page = vm_lookup_page(pageNumber);
1625 			if (page == NULL)
1626 				panic("couldn't lookup physical page just allocated\n");
1627 
1628 			vm_page_set_state(page, PAGE_STATE_FREE);
1629 		}
1630 	}
1631 
1632 err0:
1633 	if (reservedPages > 0)
1634 		vm_page_unreserve_pages(&reservation);
1635 	if (reservedMemory > 0)
1636 		vm_unreserve_memory(reservedMemory);
1637 
1638 	return status;
1639 }
1640 
1641 
1642 area_id
1643 vm_map_physical_memory(team_id team, const char* name, void** _address,
1644 	uint32 addressSpec, addr_t size, uint32 protection,
1645 	phys_addr_t physicalAddress, bool alreadyWired)
1646 {
1647 	VMArea* area;
1648 	VMCache* cache;
1649 	addr_t mapOffset;
1650 
1651 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1652 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1653 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1654 		addressSpec, size, protection, physicalAddress));
1655 
1656 	if (!arch_vm_supports_protection(protection))
1657 		return B_NOT_SUPPORTED;
1658 
1659 	AddressSpaceWriteLocker locker(team);
1660 	if (!locker.IsLocked())
1661 		return B_BAD_TEAM_ID;
1662 
1663 	// if the physical address is somewhat inside a page,
1664 	// move the actual area down to align on a page boundary
1665 	mapOffset = physicalAddress % B_PAGE_SIZE;
1666 	size += mapOffset;
1667 	physicalAddress -= mapOffset;
1668 
1669 	size = PAGE_ALIGN(size);
1670 
1671 	// create a device cache
1672 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1673 	if (status != B_OK)
1674 		return status;
1675 
1676 	cache->virtual_end = size;
1677 
1678 	cache->Lock();
1679 
1680 	virtual_address_restrictions addressRestrictions = {};
1681 	addressRestrictions.address = *_address;
1682 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1683 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1684 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1685 		true, &area, _address);
1686 
1687 	if (status < B_OK)
1688 		cache->ReleaseRefLocked();
1689 
1690 	cache->Unlock();
1691 
1692 	if (status == B_OK) {
1693 		// set requested memory type -- use uncached, if not given
1694 		uint32 memoryType = addressSpec & B_MTR_MASK;
1695 		if (memoryType == 0)
1696 			memoryType = B_MTR_UC;
1697 
1698 		area->SetMemoryType(memoryType);
1699 
1700 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1701 		if (status != B_OK)
1702 			delete_area(locker.AddressSpace(), area, false);
1703 	}
1704 
1705 	if (status != B_OK)
1706 		return status;
1707 
1708 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1709 
1710 	if (alreadyWired) {
1711 		// The area is already mapped, but possibly not with the right
1712 		// memory type.
1713 		map->Lock();
1714 		map->ProtectArea(area, area->protection);
1715 		map->Unlock();
1716 	} else {
1717 		// Map the area completely.
1718 
1719 		// reserve pages needed for the mapping
1720 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1721 			area->Base() + (size - 1));
1722 		vm_page_reservation reservation;
1723 		vm_page_reserve_pages(&reservation, reservePages,
1724 			team == VMAddressSpace::KernelID()
1725 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1726 
1727 		map->Lock();
1728 
1729 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1730 			map->Map(area->Base() + offset, physicalAddress + offset,
1731 				protection, area->MemoryType(), &reservation);
1732 		}
1733 
1734 		map->Unlock();
1735 
1736 		vm_page_unreserve_pages(&reservation);
1737 	}
1738 
1739 	// modify the pointer returned to be offset back into the new area
1740 	// the same way the physical address in was offset
1741 	*_address = (void*)((addr_t)*_address + mapOffset);
1742 
1743 	area->cache_type = CACHE_TYPE_DEVICE;
1744 	return area->id;
1745 }
1746 
1747 
1748 /*!	Don't use!
1749 	TODO: This function was introduced to map physical page vecs to
1750 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1751 	use a device cache and does not track vm_page::wired_count!
1752 */
1753 area_id
1754 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1755 	uint32 addressSpec, addr_t* _size, uint32 protection,
1756 	struct generic_io_vec* vecs, uint32 vecCount)
1757 {
1758 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1759 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1760 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1761 		addressSpec, _size, protection, vecs, vecCount));
1762 
1763 	if (!arch_vm_supports_protection(protection)
1764 		|| (addressSpec & B_MTR_MASK) != 0) {
1765 		return B_NOT_SUPPORTED;
1766 	}
1767 
1768 	AddressSpaceWriteLocker locker(team);
1769 	if (!locker.IsLocked())
1770 		return B_BAD_TEAM_ID;
1771 
1772 	if (vecCount == 0)
1773 		return B_BAD_VALUE;
1774 
1775 	addr_t size = 0;
1776 	for (uint32 i = 0; i < vecCount; i++) {
1777 		if (vecs[i].base % B_PAGE_SIZE != 0
1778 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1779 			return B_BAD_VALUE;
1780 		}
1781 
1782 		size += vecs[i].length;
1783 	}
1784 
1785 	// create a device cache
1786 	VMCache* cache;
1787 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1788 	if (result != B_OK)
1789 		return result;
1790 
1791 	cache->virtual_end = size;
1792 
1793 	cache->Lock();
1794 
1795 	VMArea* area;
1796 	virtual_address_restrictions addressRestrictions = {};
1797 	addressRestrictions.address = *_address;
1798 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1799 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1800 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1801 		&addressRestrictions, true, &area, _address);
1802 
1803 	if (result != B_OK)
1804 		cache->ReleaseRefLocked();
1805 
1806 	cache->Unlock();
1807 
1808 	if (result != B_OK)
1809 		return result;
1810 
1811 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1812 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1813 		area->Base() + (size - 1));
1814 
1815 	vm_page_reservation reservation;
1816 	vm_page_reserve_pages(&reservation, reservePages,
1817 			team == VMAddressSpace::KernelID()
1818 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1819 	map->Lock();
1820 
1821 	uint32 vecIndex = 0;
1822 	size_t vecOffset = 0;
1823 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1824 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1825 			vecOffset = 0;
1826 			vecIndex++;
1827 		}
1828 
1829 		if (vecIndex >= vecCount)
1830 			break;
1831 
1832 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1833 			protection, area->MemoryType(), &reservation);
1834 
1835 		vecOffset += B_PAGE_SIZE;
1836 	}
1837 
1838 	map->Unlock();
1839 	vm_page_unreserve_pages(&reservation);
1840 
1841 	if (_size != NULL)
1842 		*_size = size;
1843 
1844 	area->cache_type = CACHE_TYPE_DEVICE;
1845 	return area->id;
1846 }
1847 
1848 
1849 area_id
1850 vm_create_null_area(team_id team, const char* name, void** address,
1851 	uint32 addressSpec, addr_t size, uint32 flags)
1852 {
1853 	size = PAGE_ALIGN(size);
1854 
1855 	// Lock the address space and, if B_EXACT_ADDRESS and
1856 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1857 	// is not wired.
1858 	AddressSpaceWriteLocker locker;
1859 	do {
1860 		if (locker.SetTo(team) != B_OK)
1861 			return B_BAD_TEAM_ID;
1862 	} while (addressSpec == B_EXACT_ADDRESS
1863 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1864 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1865 			(addr_t)*address, size, &locker));
1866 
1867 	// create a null cache
1868 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1869 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1870 	VMCache* cache;
1871 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1872 	if (status != B_OK)
1873 		return status;
1874 
1875 	cache->temporary = 1;
1876 	cache->virtual_end = size;
1877 
1878 	cache->Lock();
1879 
1880 	VMArea* area;
1881 	virtual_address_restrictions addressRestrictions = {};
1882 	addressRestrictions.address = *address;
1883 	addressRestrictions.address_specification = addressSpec;
1884 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1885 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1886 		&addressRestrictions, true, &area, address);
1887 
1888 	if (status < B_OK) {
1889 		cache->ReleaseRefAndUnlock();
1890 		return status;
1891 	}
1892 
1893 	cache->Unlock();
1894 
1895 	area->cache_type = CACHE_TYPE_NULL;
1896 	return area->id;
1897 }
1898 
1899 
1900 /*!	Creates the vnode cache for the specified \a vnode.
1901 	The vnode has to be marked busy when calling this function.
1902 */
1903 status_t
1904 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1905 {
1906 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1907 }
1908 
1909 
1910 /*!	\a cache must be locked. The area's address space must be read-locked.
1911 */
1912 static void
1913 pre_map_area_pages(VMArea* area, VMCache* cache,
1914 	vm_page_reservation* reservation)
1915 {
1916 	addr_t baseAddress = area->Base();
1917 	addr_t cacheOffset = area->cache_offset;
1918 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1919 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1920 
1921 	for (VMCachePagesTree::Iterator it
1922 				= cache->pages.GetIterator(firstPage, true, true);
1923 			vm_page* page = it.Next();) {
1924 		if (page->cache_offset >= endPage)
1925 			break;
1926 
1927 		// skip busy and inactive pages
1928 		if (page->busy || page->usage_count == 0)
1929 			continue;
1930 
1931 		DEBUG_PAGE_ACCESS_START(page);
1932 		map_page(area, page,
1933 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1934 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1935 		DEBUG_PAGE_ACCESS_END(page);
1936 	}
1937 }
1938 
1939 
1940 /*!	Will map the file specified by \a fd to an area in memory.
1941 	The file will be mirrored beginning at the specified \a offset. The
1942 	\a offset and \a size arguments have to be page aligned.
1943 */
1944 static area_id
1945 _vm_map_file(team_id team, const char* name, void** _address,
1946 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1947 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1948 {
1949 	// TODO: for binary files, we want to make sure that they get the
1950 	//	copy of a file at a given time, ie. later changes should not
1951 	//	make it into the mapped copy -- this will need quite some changes
1952 	//	to be done in a nice way
1953 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1954 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1955 
1956 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1957 	size = PAGE_ALIGN(size);
1958 
1959 	if (mapping == REGION_NO_PRIVATE_MAP)
1960 		protection |= B_SHARED_AREA;
1961 	if (addressSpec != B_EXACT_ADDRESS)
1962 		unmapAddressRange = false;
1963 
1964 	if (fd < 0) {
1965 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1966 		virtual_address_restrictions virtualRestrictions = {};
1967 		virtualRestrictions.address = *_address;
1968 		virtualRestrictions.address_specification = addressSpec;
1969 		physical_address_restrictions physicalRestrictions = {};
1970 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1971 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1972 			_address);
1973 	}
1974 
1975 	// get the open flags of the FD
1976 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1977 	if (descriptor == NULL)
1978 		return EBADF;
1979 	int32 openMode = descriptor->open_mode;
1980 	put_fd(descriptor);
1981 
1982 	// The FD must open for reading at any rate. For shared mapping with write
1983 	// access, additionally the FD must be open for writing.
1984 	if ((openMode & O_ACCMODE) == O_WRONLY
1985 		|| (mapping == REGION_NO_PRIVATE_MAP
1986 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1987 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1988 		return EACCES;
1989 	}
1990 
1991 	// get the vnode for the object, this also grabs a ref to it
1992 	struct vnode* vnode = NULL;
1993 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1994 	if (status < B_OK)
1995 		return status;
1996 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1997 
1998 	// If we're going to pre-map pages, we need to reserve the pages needed by
1999 	// the mapping backend upfront.
2000 	page_num_t reservedPreMapPages = 0;
2001 	vm_page_reservation reservation;
2002 	if ((protection & B_READ_AREA) != 0) {
2003 		AddressSpaceWriteLocker locker;
2004 		status = locker.SetTo(team);
2005 		if (status != B_OK)
2006 			return status;
2007 
2008 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2009 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2010 
2011 		locker.Unlock();
2012 
2013 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2014 			team == VMAddressSpace::KernelID()
2015 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2016 	}
2017 
2018 	struct PageUnreserver {
2019 		PageUnreserver(vm_page_reservation* reservation)
2020 			:
2021 			fReservation(reservation)
2022 		{
2023 		}
2024 
2025 		~PageUnreserver()
2026 		{
2027 			if (fReservation != NULL)
2028 				vm_page_unreserve_pages(fReservation);
2029 		}
2030 
2031 		vm_page_reservation* fReservation;
2032 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2033 
2034 	// Lock the address space and, if the specified address range shall be
2035 	// unmapped, ensure it is not wired.
2036 	AddressSpaceWriteLocker locker;
2037 	do {
2038 		if (locker.SetTo(team) != B_OK)
2039 			return B_BAD_TEAM_ID;
2040 	} while (unmapAddressRange
2041 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2042 			(addr_t)*_address, size, &locker));
2043 
2044 	// TODO: this only works for file systems that use the file cache
2045 	VMCache* cache;
2046 	status = vfs_get_vnode_cache(vnode, &cache, false);
2047 	if (status < B_OK)
2048 		return status;
2049 
2050 	cache->Lock();
2051 
2052 	VMArea* area;
2053 	virtual_address_restrictions addressRestrictions = {};
2054 	addressRestrictions.address = *_address;
2055 	addressRestrictions.address_specification = addressSpec;
2056 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2057 		0, protection, mapping,
2058 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
2059 		&addressRestrictions, kernel, &area, _address);
2060 
2061 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2062 		// map_backing_store() cannot know we no longer need the ref
2063 		cache->ReleaseRefLocked();
2064 	}
2065 
2066 	if (status == B_OK && (protection & B_READ_AREA) != 0)
2067 		pre_map_area_pages(area, cache, &reservation);
2068 
2069 	cache->Unlock();
2070 
2071 	if (status == B_OK) {
2072 		// TODO: this probably deserves a smarter solution, ie. don't always
2073 		// prefetch stuff, and also, probably don't trigger it at this place.
2074 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2075 			// prefetches at max 10 MB starting from "offset"
2076 	}
2077 
2078 	if (status != B_OK)
2079 		return status;
2080 
2081 	area->cache_type = CACHE_TYPE_VNODE;
2082 	return area->id;
2083 }
2084 
2085 
2086 area_id
2087 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2088 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2089 	int fd, off_t offset)
2090 {
2091 	if (!arch_vm_supports_protection(protection))
2092 		return B_NOT_SUPPORTED;
2093 
2094 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2095 		mapping, unmapAddressRange, fd, offset, true);
2096 }
2097 
2098 
2099 VMCache*
2100 vm_area_get_locked_cache(VMArea* area)
2101 {
2102 	rw_lock_read_lock(&sAreaCacheLock);
2103 
2104 	while (true) {
2105 		VMCache* cache = area->cache;
2106 
2107 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2108 			// cache has been deleted
2109 			rw_lock_read_lock(&sAreaCacheLock);
2110 			continue;
2111 		}
2112 
2113 		rw_lock_read_lock(&sAreaCacheLock);
2114 
2115 		if (cache == area->cache) {
2116 			cache->AcquireRefLocked();
2117 			rw_lock_read_unlock(&sAreaCacheLock);
2118 			return cache;
2119 		}
2120 
2121 		// the cache changed in the meantime
2122 		cache->Unlock();
2123 	}
2124 }
2125 
2126 
2127 void
2128 vm_area_put_locked_cache(VMCache* cache)
2129 {
2130 	cache->ReleaseRefAndUnlock();
2131 }
2132 
2133 
2134 area_id
2135 vm_clone_area(team_id team, const char* name, void** address,
2136 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2137 	bool kernel)
2138 {
2139 	VMArea* newArea = NULL;
2140 	VMArea* sourceArea;
2141 
2142 	// Check whether the source area exists and is cloneable. If so, mark it
2143 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2144 	{
2145 		AddressSpaceWriteLocker locker;
2146 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2147 		if (status != B_OK)
2148 			return status;
2149 
2150 		sourceArea->protection |= B_SHARED_AREA;
2151 		protection |= B_SHARED_AREA;
2152 	}
2153 
2154 	// Now lock both address spaces and actually do the cloning.
2155 
2156 	MultiAddressSpaceLocker locker;
2157 	VMAddressSpace* sourceAddressSpace;
2158 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2159 	if (status != B_OK)
2160 		return status;
2161 
2162 	VMAddressSpace* targetAddressSpace;
2163 	status = locker.AddTeam(team, true, &targetAddressSpace);
2164 	if (status != B_OK)
2165 		return status;
2166 
2167 	status = locker.Lock();
2168 	if (status != B_OK)
2169 		return status;
2170 
2171 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2172 	if (sourceArea == NULL)
2173 		return B_BAD_VALUE;
2174 
2175 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2176 
2177 	if (!kernel && sourceAddressSpace != targetAddressSpace
2178 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2179 #if KDEBUG
2180 		Team* team = thread_get_current_thread()->team;
2181 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2182 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2183 #endif
2184 		status = B_NOT_ALLOWED;
2185 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2186 		status = B_NOT_ALLOWED;
2187 	} else {
2188 		virtual_address_restrictions addressRestrictions = {};
2189 		addressRestrictions.address = *address;
2190 		addressRestrictions.address_specification = addressSpec;
2191 		status = map_backing_store(targetAddressSpace, cache,
2192 			sourceArea->cache_offset, name, sourceArea->Size(),
2193 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2194 			kernel, &newArea, address);
2195 	}
2196 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2197 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2198 		// to create a new cache, and has therefore already acquired a reference
2199 		// to the source cache - but otherwise it has no idea that we need
2200 		// one.
2201 		cache->AcquireRefLocked();
2202 	}
2203 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2204 		// we need to map in everything at this point
2205 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2206 			// we don't have actual pages to map but a physical area
2207 			VMTranslationMap* map
2208 				= sourceArea->address_space->TranslationMap();
2209 			map->Lock();
2210 
2211 			phys_addr_t physicalAddress;
2212 			uint32 oldProtection;
2213 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2214 
2215 			map->Unlock();
2216 
2217 			map = targetAddressSpace->TranslationMap();
2218 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2219 				newArea->Base() + (newArea->Size() - 1));
2220 
2221 			vm_page_reservation reservation;
2222 			vm_page_reserve_pages(&reservation, reservePages,
2223 				targetAddressSpace == VMAddressSpace::Kernel()
2224 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2225 			map->Lock();
2226 
2227 			for (addr_t offset = 0; offset < newArea->Size();
2228 					offset += B_PAGE_SIZE) {
2229 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2230 					protection, newArea->MemoryType(), &reservation);
2231 			}
2232 
2233 			map->Unlock();
2234 			vm_page_unreserve_pages(&reservation);
2235 		} else {
2236 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2237 			size_t reservePages = map->MaxPagesNeededToMap(
2238 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2239 			vm_page_reservation reservation;
2240 			vm_page_reserve_pages(&reservation, reservePages,
2241 				targetAddressSpace == VMAddressSpace::Kernel()
2242 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2243 
2244 			// map in all pages from source
2245 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2246 					vm_page* page  = it.Next();) {
2247 				if (!page->busy) {
2248 					DEBUG_PAGE_ACCESS_START(page);
2249 					map_page(newArea, page,
2250 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2251 							- newArea->cache_offset),
2252 						protection, &reservation);
2253 					DEBUG_PAGE_ACCESS_END(page);
2254 				}
2255 			}
2256 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2257 			// ensuring that!
2258 
2259 			vm_page_unreserve_pages(&reservation);
2260 		}
2261 	}
2262 	if (status == B_OK)
2263 		newArea->cache_type = sourceArea->cache_type;
2264 
2265 	vm_area_put_locked_cache(cache);
2266 
2267 	if (status < B_OK)
2268 		return status;
2269 
2270 	return newArea->id;
2271 }
2272 
2273 
2274 /*!	Deletes the specified area of the given address space.
2275 
2276 	The address space must be write-locked.
2277 	The caller must ensure that the area does not have any wired ranges.
2278 
2279 	\param addressSpace The address space containing the area.
2280 	\param area The area to be deleted.
2281 	\param deletingAddressSpace \c true, if the address space is in the process
2282 		of being deleted.
2283 */
2284 static void
2285 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2286 	bool deletingAddressSpace)
2287 {
2288 	ASSERT(!area->IsWired());
2289 
2290 	VMAreaHash::Remove(area);
2291 
2292 	// At this point the area is removed from the global hash table, but
2293 	// still exists in the area list.
2294 
2295 	// Unmap the virtual address space the area occupied.
2296 	{
2297 		// We need to lock the complete cache chain.
2298 		VMCache* topCache = vm_area_get_locked_cache(area);
2299 		VMCacheChainLocker cacheChainLocker(topCache);
2300 		cacheChainLocker.LockAllSourceCaches();
2301 
2302 		// If the area's top cache is a temporary cache and the area is the only
2303 		// one referencing it (besides us currently holding a second reference),
2304 		// the unmapping code doesn't need to care about preserving the accessed
2305 		// and dirty flags of the top cache page mappings.
2306 		bool ignoreTopCachePageFlags
2307 			= topCache->temporary && topCache->RefCount() == 2;
2308 
2309 		area->address_space->TranslationMap()->UnmapArea(area,
2310 			deletingAddressSpace, ignoreTopCachePageFlags);
2311 	}
2312 
2313 	if (!area->cache->temporary)
2314 		area->cache->WriteModified();
2315 
2316 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2317 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2318 
2319 	arch_vm_unset_memory_type(area);
2320 	addressSpace->RemoveArea(area, allocationFlags);
2321 	addressSpace->Put();
2322 
2323 	area->cache->RemoveArea(area);
2324 	area->cache->ReleaseRef();
2325 
2326 	addressSpace->DeleteArea(area, allocationFlags);
2327 }
2328 
2329 
2330 status_t
2331 vm_delete_area(team_id team, area_id id, bool kernel)
2332 {
2333 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2334 		team, id));
2335 
2336 	// lock the address space and make sure the area isn't wired
2337 	AddressSpaceWriteLocker locker;
2338 	VMArea* area;
2339 	AreaCacheLocker cacheLocker;
2340 
2341 	do {
2342 		status_t status = locker.SetFromArea(team, id, area);
2343 		if (status != B_OK)
2344 			return status;
2345 
2346 		cacheLocker.SetTo(area);
2347 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2348 
2349 	cacheLocker.Unlock();
2350 
2351 	// SetFromArea will have returned an error if the area's owning team is not
2352 	// the same as the passed team, so we don't need to do those checks here.
2353 
2354 	delete_area(locker.AddressSpace(), area, false);
2355 	return B_OK;
2356 }
2357 
2358 
2359 /*!	Creates a new cache on top of given cache, moves all areas from
2360 	the old cache to the new one, and changes the protection of all affected
2361 	areas' pages to read-only. If requested, wired pages are moved up to the
2362 	new cache and copies are added to the old cache in their place.
2363 	Preconditions:
2364 	- The given cache must be locked.
2365 	- All of the cache's areas' address spaces must be read locked.
2366 	- Either the cache must not have any wired ranges or a page reservation for
2367 	  all wired pages must be provided, so they can be copied.
2368 
2369 	\param lowerCache The cache on top of which a new cache shall be created.
2370 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2371 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2372 		has wired page. The wired pages are copied in this case.
2373 */
2374 static status_t
2375 vm_copy_on_write_area(VMCache* lowerCache,
2376 	vm_page_reservation* wiredPagesReservation)
2377 {
2378 	VMCache* upperCache;
2379 
2380 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2381 
2382 	// We need to separate the cache from its areas. The cache goes one level
2383 	// deeper and we create a new cache inbetween.
2384 
2385 	// create an anonymous cache
2386 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2387 		lowerCache->GuardSize() / B_PAGE_SIZE,
2388 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2389 		VM_PRIORITY_USER);
2390 	if (status != B_OK)
2391 		return status;
2392 
2393 	upperCache->Lock();
2394 
2395 	upperCache->temporary = 1;
2396 	upperCache->virtual_base = lowerCache->virtual_base;
2397 	upperCache->virtual_end = lowerCache->virtual_end;
2398 
2399 	// transfer the lower cache areas to the upper cache
2400 	rw_lock_write_lock(&sAreaCacheLock);
2401 	upperCache->TransferAreas(lowerCache);
2402 	rw_lock_write_unlock(&sAreaCacheLock);
2403 
2404 	lowerCache->AddConsumer(upperCache);
2405 
2406 	// We now need to remap all pages from all of the cache's areas read-only,
2407 	// so that a copy will be created on next write access. If there are wired
2408 	// pages, we keep their protection, move them to the upper cache and create
2409 	// copies for the lower cache.
2410 	if (wiredPagesReservation != NULL) {
2411 		// We need to handle wired pages -- iterate through the cache's pages.
2412 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2413 				vm_page* page = it.Next();) {
2414 			if (page->WiredCount() > 0) {
2415 				// allocate a new page and copy the wired one
2416 				vm_page* copiedPage = vm_page_allocate_page(
2417 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2418 
2419 				vm_memcpy_physical_page(
2420 					copiedPage->physical_page_number * B_PAGE_SIZE,
2421 					page->physical_page_number * B_PAGE_SIZE);
2422 
2423 				// move the wired page to the upper cache (note: removing is OK
2424 				// with the SplayTree iterator) and insert the copy
2425 				upperCache->MovePage(page);
2426 				lowerCache->InsertPage(copiedPage,
2427 					page->cache_offset * B_PAGE_SIZE);
2428 
2429 				DEBUG_PAGE_ACCESS_END(copiedPage);
2430 			} else {
2431 				// Change the protection of this page in all areas.
2432 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2433 						tempArea = tempArea->cache_next) {
2434 					// The area must be readable in the same way it was
2435 					// previously writable.
2436 					uint32 protection = B_KERNEL_READ_AREA;
2437 					if ((tempArea->protection & B_READ_AREA) != 0)
2438 						protection |= B_READ_AREA;
2439 
2440 					VMTranslationMap* map
2441 						= tempArea->address_space->TranslationMap();
2442 					map->Lock();
2443 					map->ProtectPage(tempArea,
2444 						virtual_page_address(tempArea, page), protection);
2445 					map->Unlock();
2446 				}
2447 			}
2448 		}
2449 	} else {
2450 		ASSERT(lowerCache->WiredPagesCount() == 0);
2451 
2452 		// just change the protection of all areas
2453 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2454 				tempArea = tempArea->cache_next) {
2455 			// The area must be readable in the same way it was previously
2456 			// writable.
2457 			uint32 protection = B_KERNEL_READ_AREA;
2458 			if ((tempArea->protection & B_READ_AREA) != 0)
2459 				protection |= B_READ_AREA;
2460 
2461 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2462 			map->Lock();
2463 			map->ProtectArea(tempArea, protection);
2464 			map->Unlock();
2465 		}
2466 	}
2467 
2468 	vm_area_put_locked_cache(upperCache);
2469 
2470 	return B_OK;
2471 }
2472 
2473 
2474 area_id
2475 vm_copy_area(team_id team, const char* name, void** _address,
2476 	uint32 addressSpec, uint32 protection, area_id sourceID)
2477 {
2478 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2479 
2480 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2481 		// set the same protection for the kernel as for userland
2482 		protection |= B_KERNEL_READ_AREA;
2483 		if (writableCopy)
2484 			protection |= B_KERNEL_WRITE_AREA;
2485 	}
2486 
2487 	// Do the locking: target address space, all address spaces associated with
2488 	// the source cache, and the cache itself.
2489 	MultiAddressSpaceLocker locker;
2490 	VMAddressSpace* targetAddressSpace;
2491 	VMCache* cache;
2492 	VMArea* source;
2493 	AreaCacheLocker cacheLocker;
2494 	status_t status;
2495 	bool sharedArea;
2496 
2497 	page_num_t wiredPages = 0;
2498 	vm_page_reservation wiredPagesReservation;
2499 
2500 	bool restart;
2501 	do {
2502 		restart = false;
2503 
2504 		locker.Unset();
2505 		status = locker.AddTeam(team, true, &targetAddressSpace);
2506 		if (status == B_OK) {
2507 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2508 				&cache);
2509 		}
2510 		if (status != B_OK)
2511 			return status;
2512 
2513 		cacheLocker.SetTo(cache, true);	// already locked
2514 
2515 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2516 
2517 		page_num_t oldWiredPages = wiredPages;
2518 		wiredPages = 0;
2519 
2520 		// If the source area isn't shared, count the number of wired pages in
2521 		// the cache and reserve as many pages.
2522 		if (!sharedArea) {
2523 			wiredPages = cache->WiredPagesCount();
2524 
2525 			if (wiredPages > oldWiredPages) {
2526 				cacheLocker.Unlock();
2527 				locker.Unlock();
2528 
2529 				if (oldWiredPages > 0)
2530 					vm_page_unreserve_pages(&wiredPagesReservation);
2531 
2532 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2533 					VM_PRIORITY_USER);
2534 
2535 				restart = true;
2536 			}
2537 		} else if (oldWiredPages > 0)
2538 			vm_page_unreserve_pages(&wiredPagesReservation);
2539 	} while (restart);
2540 
2541 	// unreserve pages later
2542 	struct PagesUnreserver {
2543 		PagesUnreserver(vm_page_reservation* reservation)
2544 			:
2545 			fReservation(reservation)
2546 		{
2547 		}
2548 
2549 		~PagesUnreserver()
2550 		{
2551 			if (fReservation != NULL)
2552 				vm_page_unreserve_pages(fReservation);
2553 		}
2554 
2555 	private:
2556 		vm_page_reservation*	fReservation;
2557 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2558 
2559 	if (addressSpec == B_CLONE_ADDRESS) {
2560 		addressSpec = B_EXACT_ADDRESS;
2561 		*_address = (void*)source->Base();
2562 	}
2563 
2564 	// First, create a cache on top of the source area, respectively use the
2565 	// existing one, if this is a shared area.
2566 
2567 	VMArea* target;
2568 	virtual_address_restrictions addressRestrictions = {};
2569 	addressRestrictions.address = *_address;
2570 	addressRestrictions.address_specification = addressSpec;
2571 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2572 		name, source->Size(), source->wiring, protection,
2573 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2574 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2575 		&addressRestrictions, true, &target, _address);
2576 	if (status < B_OK)
2577 		return status;
2578 
2579 	if (sharedArea) {
2580 		// The new area uses the old area's cache, but map_backing_store()
2581 		// hasn't acquired a ref. So we have to do that now.
2582 		cache->AcquireRefLocked();
2583 	}
2584 
2585 	// If the source area is writable, we need to move it one layer up as well
2586 
2587 	if (!sharedArea) {
2588 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2589 			// TODO: do something more useful if this fails!
2590 			if (vm_copy_on_write_area(cache,
2591 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2592 				panic("vm_copy_on_write_area() failed!\n");
2593 			}
2594 		}
2595 	}
2596 
2597 	// we return the ID of the newly created area
2598 	return target->id;
2599 }
2600 
2601 
2602 status_t
2603 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2604 	bool kernel)
2605 {
2606 	fix_protection(&newProtection);
2607 
2608 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2609 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2610 
2611 	if (!arch_vm_supports_protection(newProtection))
2612 		return B_NOT_SUPPORTED;
2613 
2614 	bool becomesWritable
2615 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2616 
2617 	// lock address spaces and cache
2618 	MultiAddressSpaceLocker locker;
2619 	VMCache* cache;
2620 	VMArea* area;
2621 	status_t status;
2622 	AreaCacheLocker cacheLocker;
2623 	bool isWritable;
2624 
2625 	bool restart;
2626 	do {
2627 		restart = false;
2628 
2629 		locker.Unset();
2630 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2631 		if (status != B_OK)
2632 			return status;
2633 
2634 		cacheLocker.SetTo(cache, true);	// already locked
2635 
2636 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
2637 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2638 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2639 				" (%s)\n", team, newProtection, areaID, area->name);
2640 			return B_NOT_ALLOWED;
2641 		}
2642 
2643 		if (area->protection == newProtection)
2644 			return B_OK;
2645 
2646 		if (team != VMAddressSpace::KernelID()
2647 			&& area->address_space->ID() != team) {
2648 			// unless you're the kernel, you are only allowed to set
2649 			// the protection of your own areas
2650 			return B_NOT_ALLOWED;
2651 		}
2652 
2653 		isWritable
2654 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2655 
2656 		// Make sure the area (respectively, if we're going to call
2657 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2658 		// wired ranges.
2659 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2660 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2661 					otherArea = otherArea->cache_next) {
2662 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2663 					restart = true;
2664 					break;
2665 				}
2666 			}
2667 		} else {
2668 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2669 				restart = true;
2670 		}
2671 	} while (restart);
2672 
2673 	bool changePageProtection = true;
2674 	bool changeTopCachePagesOnly = false;
2675 
2676 	if (isWritable && !becomesWritable) {
2677 		// writable -> !writable
2678 
2679 		if (cache->source != NULL && cache->temporary) {
2680 			if (cache->CountWritableAreas(area) == 0) {
2681 				// Since this cache now lives from the pages in its source cache,
2682 				// we can change the cache's commitment to take only those pages
2683 				// into account that really are in this cache.
2684 
2685 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2686 					team == VMAddressSpace::KernelID()
2687 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2688 
2689 				// TODO: we may be able to join with our source cache, if
2690 				// count == 0
2691 			}
2692 		}
2693 
2694 		// If only the writability changes, we can just remap the pages of the
2695 		// top cache, since the pages of lower caches are mapped read-only
2696 		// anyway. That's advantageous only, if the number of pages in the cache
2697 		// is significantly smaller than the number of pages in the area,
2698 		// though.
2699 		if (newProtection
2700 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2701 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2702 			changeTopCachePagesOnly = true;
2703 		}
2704 	} else if (!isWritable && becomesWritable) {
2705 		// !writable -> writable
2706 
2707 		if (!cache->consumers.IsEmpty()) {
2708 			// There are consumers -- we have to insert a new cache. Fortunately
2709 			// vm_copy_on_write_area() does everything that's needed.
2710 			changePageProtection = false;
2711 			status = vm_copy_on_write_area(cache, NULL);
2712 		} else {
2713 			// No consumers, so we don't need to insert a new one.
2714 			if (cache->source != NULL && cache->temporary) {
2715 				// the cache's commitment must contain all possible pages
2716 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2717 					team == VMAddressSpace::KernelID()
2718 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2719 			}
2720 
2721 			if (status == B_OK && cache->source != NULL) {
2722 				// There's a source cache, hence we can't just change all pages'
2723 				// protection or we might allow writing into pages belonging to
2724 				// a lower cache.
2725 				changeTopCachePagesOnly = true;
2726 			}
2727 		}
2728 	} else {
2729 		// we don't have anything special to do in all other cases
2730 	}
2731 
2732 	if (status == B_OK) {
2733 		// remap existing pages in this cache
2734 		if (changePageProtection) {
2735 			VMTranslationMap* map = area->address_space->TranslationMap();
2736 			map->Lock();
2737 
2738 			if (changeTopCachePagesOnly) {
2739 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2740 				page_num_t lastPageOffset
2741 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2742 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2743 						vm_page* page = it.Next();) {
2744 					if (page->cache_offset >= firstPageOffset
2745 						&& page->cache_offset <= lastPageOffset) {
2746 						addr_t address = virtual_page_address(area, page);
2747 						map->ProtectPage(area, address, newProtection);
2748 					}
2749 				}
2750 			} else
2751 				map->ProtectArea(area, newProtection);
2752 
2753 			map->Unlock();
2754 		}
2755 
2756 		area->protection = newProtection;
2757 	}
2758 
2759 	return status;
2760 }
2761 
2762 
2763 status_t
2764 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2765 {
2766 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2767 	if (addressSpace == NULL)
2768 		return B_BAD_TEAM_ID;
2769 
2770 	VMTranslationMap* map = addressSpace->TranslationMap();
2771 
2772 	map->Lock();
2773 	uint32 dummyFlags;
2774 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2775 	map->Unlock();
2776 
2777 	addressSpace->Put();
2778 	return status;
2779 }
2780 
2781 
2782 /*!	The page's cache must be locked.
2783 */
2784 bool
2785 vm_test_map_modification(vm_page* page)
2786 {
2787 	if (page->modified)
2788 		return true;
2789 
2790 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2791 	vm_page_mapping* mapping;
2792 	while ((mapping = iterator.Next()) != NULL) {
2793 		VMArea* area = mapping->area;
2794 		VMTranslationMap* map = area->address_space->TranslationMap();
2795 
2796 		phys_addr_t physicalAddress;
2797 		uint32 flags;
2798 		map->Lock();
2799 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2800 		map->Unlock();
2801 
2802 		if ((flags & PAGE_MODIFIED) != 0)
2803 			return true;
2804 	}
2805 
2806 	return false;
2807 }
2808 
2809 
2810 /*!	The page's cache must be locked.
2811 */
2812 void
2813 vm_clear_map_flags(vm_page* page, uint32 flags)
2814 {
2815 	if ((flags & PAGE_ACCESSED) != 0)
2816 		page->accessed = false;
2817 	if ((flags & PAGE_MODIFIED) != 0)
2818 		page->modified = false;
2819 
2820 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2821 	vm_page_mapping* mapping;
2822 	while ((mapping = iterator.Next()) != NULL) {
2823 		VMArea* area = mapping->area;
2824 		VMTranslationMap* map = area->address_space->TranslationMap();
2825 
2826 		map->Lock();
2827 		map->ClearFlags(virtual_page_address(area, page), flags);
2828 		map->Unlock();
2829 	}
2830 }
2831 
2832 
2833 /*!	Removes all mappings from a page.
2834 	After you've called this function, the page is unmapped from memory and
2835 	the page's \c accessed and \c modified flags have been updated according
2836 	to the state of the mappings.
2837 	The page's cache must be locked.
2838 */
2839 void
2840 vm_remove_all_page_mappings(vm_page* page)
2841 {
2842 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2843 		VMArea* area = mapping->area;
2844 		VMTranslationMap* map = area->address_space->TranslationMap();
2845 		addr_t address = virtual_page_address(area, page);
2846 		map->UnmapPage(area, address, false);
2847 	}
2848 }
2849 
2850 
2851 int32
2852 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2853 {
2854 	int32 count = 0;
2855 
2856 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2857 	vm_page_mapping* mapping;
2858 	while ((mapping = iterator.Next()) != NULL) {
2859 		VMArea* area = mapping->area;
2860 		VMTranslationMap* map = area->address_space->TranslationMap();
2861 
2862 		bool modified;
2863 		if (map->ClearAccessedAndModified(area,
2864 				virtual_page_address(area, page), false, modified)) {
2865 			count++;
2866 		}
2867 
2868 		page->modified |= modified;
2869 	}
2870 
2871 
2872 	if (page->accessed) {
2873 		count++;
2874 		page->accessed = false;
2875 	}
2876 
2877 	return count;
2878 }
2879 
2880 
2881 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2882 	mappings.
2883 	The function iterates through the page mappings and removes them until
2884 	encountering one that has been accessed. From then on it will continue to
2885 	iterate, but only clear the accessed flag of the mapping. The page's
2886 	\c modified bit will be updated accordingly, the \c accessed bit will be
2887 	cleared.
2888 	\return The number of mapping accessed bits encountered, including the
2889 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2890 		of the page have been removed.
2891 */
2892 int32
2893 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2894 {
2895 	ASSERT(page->WiredCount() == 0);
2896 
2897 	if (page->accessed)
2898 		return vm_clear_page_mapping_accessed_flags(page);
2899 
2900 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2901 		VMArea* area = mapping->area;
2902 		VMTranslationMap* map = area->address_space->TranslationMap();
2903 		addr_t address = virtual_page_address(area, page);
2904 		bool modified = false;
2905 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2906 			page->accessed = true;
2907 			page->modified |= modified;
2908 			return vm_clear_page_mapping_accessed_flags(page);
2909 		}
2910 		page->modified |= modified;
2911 	}
2912 
2913 	return 0;
2914 }
2915 
2916 
2917 static int
2918 display_mem(int argc, char** argv)
2919 {
2920 	bool physical = false;
2921 	addr_t copyAddress;
2922 	int32 displayWidth;
2923 	int32 itemSize;
2924 	int32 num = -1;
2925 	addr_t address;
2926 	int i = 1, j;
2927 
2928 	if (argc > 1 && argv[1][0] == '-') {
2929 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2930 			physical = true;
2931 			i++;
2932 		} else
2933 			i = 99;
2934 	}
2935 
2936 	if (argc < i + 1 || argc > i + 2) {
2937 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2938 			"\tdl - 8 bytes\n"
2939 			"\tdw - 4 bytes\n"
2940 			"\tds - 2 bytes\n"
2941 			"\tdb - 1 byte\n"
2942 			"\tstring - a whole string\n"
2943 			"  -p or --physical only allows memory from a single page to be "
2944 			"displayed.\n");
2945 		return 0;
2946 	}
2947 
2948 	address = parse_expression(argv[i]);
2949 
2950 	if (argc > i + 1)
2951 		num = parse_expression(argv[i + 1]);
2952 
2953 	// build the format string
2954 	if (strcmp(argv[0], "db") == 0) {
2955 		itemSize = 1;
2956 		displayWidth = 16;
2957 	} else if (strcmp(argv[0], "ds") == 0) {
2958 		itemSize = 2;
2959 		displayWidth = 8;
2960 	} else if (strcmp(argv[0], "dw") == 0) {
2961 		itemSize = 4;
2962 		displayWidth = 4;
2963 	} else if (strcmp(argv[0], "dl") == 0) {
2964 		itemSize = 8;
2965 		displayWidth = 2;
2966 	} else if (strcmp(argv[0], "string") == 0) {
2967 		itemSize = 1;
2968 		displayWidth = -1;
2969 	} else {
2970 		kprintf("display_mem called in an invalid way!\n");
2971 		return 0;
2972 	}
2973 
2974 	if (num <= 0)
2975 		num = displayWidth;
2976 
2977 	void* physicalPageHandle = NULL;
2978 
2979 	if (physical) {
2980 		int32 offset = address & (B_PAGE_SIZE - 1);
2981 		if (num * itemSize + offset > B_PAGE_SIZE) {
2982 			num = (B_PAGE_SIZE - offset) / itemSize;
2983 			kprintf("NOTE: number of bytes has been cut to page size\n");
2984 		}
2985 
2986 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2987 
2988 		if (vm_get_physical_page_debug(address, &copyAddress,
2989 				&physicalPageHandle) != B_OK) {
2990 			kprintf("getting the hardware page failed.");
2991 			return 0;
2992 		}
2993 
2994 		address += offset;
2995 		copyAddress += offset;
2996 	} else
2997 		copyAddress = address;
2998 
2999 	if (!strcmp(argv[0], "string")) {
3000 		kprintf("%p \"", (char*)copyAddress);
3001 
3002 		// string mode
3003 		for (i = 0; true; i++) {
3004 			char c;
3005 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3006 					!= B_OK
3007 				|| c == '\0') {
3008 				break;
3009 			}
3010 
3011 			if (c == '\n')
3012 				kprintf("\\n");
3013 			else if (c == '\t')
3014 				kprintf("\\t");
3015 			else {
3016 				if (!isprint(c))
3017 					c = '.';
3018 
3019 				kprintf("%c", c);
3020 			}
3021 		}
3022 
3023 		kprintf("\"\n");
3024 	} else {
3025 		// number mode
3026 		for (i = 0; i < num; i++) {
3027 			uint64 value;
3028 
3029 			if ((i % displayWidth) == 0) {
3030 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3031 				if (i != 0)
3032 					kprintf("\n");
3033 
3034 				kprintf("[0x%lx]  ", address + i * itemSize);
3035 
3036 				for (j = 0; j < displayed; j++) {
3037 					char c;
3038 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3039 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3040 						displayed = j;
3041 						break;
3042 					}
3043 					if (!isprint(c))
3044 						c = '.';
3045 
3046 					kprintf("%c", c);
3047 				}
3048 				if (num > displayWidth) {
3049 					// make sure the spacing in the last line is correct
3050 					for (j = displayed; j < displayWidth * itemSize; j++)
3051 						kprintf(" ");
3052 				}
3053 				kprintf("  ");
3054 			}
3055 
3056 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3057 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3058 				kprintf("read fault");
3059 				break;
3060 			}
3061 
3062 			switch (itemSize) {
3063 				case 1:
3064 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3065 					break;
3066 				case 2:
3067 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3068 					break;
3069 				case 4:
3070 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3071 					break;
3072 				case 8:
3073 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3074 					break;
3075 			}
3076 		}
3077 
3078 		kprintf("\n");
3079 	}
3080 
3081 	if (physical) {
3082 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3083 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3084 	}
3085 	return 0;
3086 }
3087 
3088 
3089 static void
3090 dump_cache_tree_recursively(VMCache* cache, int level,
3091 	VMCache* highlightCache)
3092 {
3093 	// print this cache
3094 	for (int i = 0; i < level; i++)
3095 		kprintf("  ");
3096 	if (cache == highlightCache)
3097 		kprintf("%p <--\n", cache);
3098 	else
3099 		kprintf("%p\n", cache);
3100 
3101 	// recursively print its consumers
3102 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3103 			VMCache* consumer = it.Next();) {
3104 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3105 	}
3106 }
3107 
3108 
3109 static int
3110 dump_cache_tree(int argc, char** argv)
3111 {
3112 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3113 		kprintf("usage: %s <address>\n", argv[0]);
3114 		return 0;
3115 	}
3116 
3117 	addr_t address = parse_expression(argv[1]);
3118 	if (address == 0)
3119 		return 0;
3120 
3121 	VMCache* cache = (VMCache*)address;
3122 	VMCache* root = cache;
3123 
3124 	// find the root cache (the transitive source)
3125 	while (root->source != NULL)
3126 		root = root->source;
3127 
3128 	dump_cache_tree_recursively(root, 0, cache);
3129 
3130 	return 0;
3131 }
3132 
3133 
3134 const char*
3135 vm_cache_type_to_string(int32 type)
3136 {
3137 	switch (type) {
3138 		case CACHE_TYPE_RAM:
3139 			return "RAM";
3140 		case CACHE_TYPE_DEVICE:
3141 			return "device";
3142 		case CACHE_TYPE_VNODE:
3143 			return "vnode";
3144 		case CACHE_TYPE_NULL:
3145 			return "null";
3146 
3147 		default:
3148 			return "unknown";
3149 	}
3150 }
3151 
3152 
3153 #if DEBUG_CACHE_LIST
3154 
3155 static void
3156 update_cache_info_recursively(VMCache* cache, cache_info& info)
3157 {
3158 	info.page_count += cache->page_count;
3159 	if (cache->type == CACHE_TYPE_RAM)
3160 		info.committed += cache->committed_size;
3161 
3162 	// recurse
3163 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3164 			VMCache* consumer = it.Next();) {
3165 		update_cache_info_recursively(consumer, info);
3166 	}
3167 }
3168 
3169 
3170 static int
3171 cache_info_compare_page_count(const void* _a, const void* _b)
3172 {
3173 	const cache_info* a = (const cache_info*)_a;
3174 	const cache_info* b = (const cache_info*)_b;
3175 	if (a->page_count == b->page_count)
3176 		return 0;
3177 	return a->page_count < b->page_count ? 1 : -1;
3178 }
3179 
3180 
3181 static int
3182 cache_info_compare_committed(const void* _a, const void* _b)
3183 {
3184 	const cache_info* a = (const cache_info*)_a;
3185 	const cache_info* b = (const cache_info*)_b;
3186 	if (a->committed == b->committed)
3187 		return 0;
3188 	return a->committed < b->committed ? 1 : -1;
3189 }
3190 
3191 
3192 static void
3193 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3194 {
3195 	for (int i = 0; i < level; i++)
3196 		kprintf("  ");
3197 
3198 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3199 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3200 		cache->virtual_base, cache->virtual_end, cache->page_count);
3201 
3202 	if (level == 0)
3203 		kprintf("/%lu", info.page_count);
3204 
3205 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3206 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3207 
3208 		if (level == 0)
3209 			kprintf("/%lu", info.committed);
3210 	}
3211 
3212 	// areas
3213 	if (cache->areas != NULL) {
3214 		VMArea* area = cache->areas;
3215 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3216 			area->name, area->address_space->ID());
3217 
3218 		while (area->cache_next != NULL) {
3219 			area = area->cache_next;
3220 			kprintf(", %" B_PRId32, area->id);
3221 		}
3222 	}
3223 
3224 	kputs("\n");
3225 
3226 	// recurse
3227 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3228 			VMCache* consumer = it.Next();) {
3229 		dump_caches_recursively(consumer, info, level + 1);
3230 	}
3231 }
3232 
3233 
3234 static int
3235 dump_caches(int argc, char** argv)
3236 {
3237 	if (sCacheInfoTable == NULL) {
3238 		kprintf("No cache info table!\n");
3239 		return 0;
3240 	}
3241 
3242 	bool sortByPageCount = true;
3243 
3244 	for (int32 i = 1; i < argc; i++) {
3245 		if (strcmp(argv[i], "-c") == 0) {
3246 			sortByPageCount = false;
3247 		} else {
3248 			print_debugger_command_usage(argv[0]);
3249 			return 0;
3250 		}
3251 	}
3252 
3253 	uint32 totalCount = 0;
3254 	uint32 rootCount = 0;
3255 	off_t totalCommitted = 0;
3256 	page_num_t totalPages = 0;
3257 
3258 	VMCache* cache = gDebugCacheList;
3259 	while (cache) {
3260 		totalCount++;
3261 		if (cache->source == NULL) {
3262 			cache_info stackInfo;
3263 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3264 				? sCacheInfoTable[rootCount] : stackInfo;
3265 			rootCount++;
3266 			info.cache = cache;
3267 			info.page_count = 0;
3268 			info.committed = 0;
3269 			update_cache_info_recursively(cache, info);
3270 			totalCommitted += info.committed;
3271 			totalPages += info.page_count;
3272 		}
3273 
3274 		cache = cache->debug_next;
3275 	}
3276 
3277 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3278 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3279 			sortByPageCount
3280 				? &cache_info_compare_page_count
3281 				: &cache_info_compare_committed);
3282 	}
3283 
3284 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3285 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3286 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3287 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3288 			"page count" : "committed size");
3289 
3290 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3291 		for (uint32 i = 0; i < rootCount; i++) {
3292 			cache_info& info = sCacheInfoTable[i];
3293 			dump_caches_recursively(info.cache, info, 0);
3294 		}
3295 	} else
3296 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3297 
3298 	return 0;
3299 }
3300 
3301 #endif	// DEBUG_CACHE_LIST
3302 
3303 
3304 static int
3305 dump_cache(int argc, char** argv)
3306 {
3307 	VMCache* cache;
3308 	bool showPages = false;
3309 	int i = 1;
3310 
3311 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3312 		kprintf("usage: %s [-ps] <address>\n"
3313 			"  if -p is specified, all pages are shown, if -s is used\n"
3314 			"  only the cache info is shown respectively.\n", argv[0]);
3315 		return 0;
3316 	}
3317 	while (argv[i][0] == '-') {
3318 		char* arg = argv[i] + 1;
3319 		while (arg[0]) {
3320 			if (arg[0] == 'p')
3321 				showPages = true;
3322 			arg++;
3323 		}
3324 		i++;
3325 	}
3326 	if (argv[i] == NULL) {
3327 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3328 		return 0;
3329 	}
3330 
3331 	addr_t address = parse_expression(argv[i]);
3332 	if (address == 0)
3333 		return 0;
3334 
3335 	cache = (VMCache*)address;
3336 
3337 	cache->Dump(showPages);
3338 
3339 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3340 
3341 	return 0;
3342 }
3343 
3344 
3345 static void
3346 dump_area_struct(VMArea* area, bool mappings)
3347 {
3348 	kprintf("AREA: %p\n", area);
3349 	kprintf("name:\t\t'%s'\n", area->name);
3350 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3351 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3352 	kprintf("base:\t\t0x%lx\n", area->Base());
3353 	kprintf("size:\t\t0x%lx\n", area->Size());
3354 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3355 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3356 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3357 	kprintf("cache:\t\t%p\n", area->cache);
3358 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3359 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3360 	kprintf("cache_next:\t%p\n", area->cache_next);
3361 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3362 
3363 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3364 	if (mappings) {
3365 		kprintf("page mappings:\n");
3366 		while (iterator.HasNext()) {
3367 			vm_page_mapping* mapping = iterator.Next();
3368 			kprintf("  %p", mapping->page);
3369 		}
3370 		kprintf("\n");
3371 	} else {
3372 		uint32 count = 0;
3373 		while (iterator.Next() != NULL) {
3374 			count++;
3375 		}
3376 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3377 	}
3378 }
3379 
3380 
3381 static int
3382 dump_area(int argc, char** argv)
3383 {
3384 	bool mappings = false;
3385 	bool found = false;
3386 	int32 index = 1;
3387 	VMArea* area;
3388 	addr_t num;
3389 
3390 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3391 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3392 			"All areas matching either id/address/name are listed. You can\n"
3393 			"force to check only a specific item by prefixing the specifier\n"
3394 			"with the id/contains/address/name keywords.\n"
3395 			"-m shows the area's mappings as well.\n");
3396 		return 0;
3397 	}
3398 
3399 	if (!strcmp(argv[1], "-m")) {
3400 		mappings = true;
3401 		index++;
3402 	}
3403 
3404 	int32 mode = 0xf;
3405 	if (!strcmp(argv[index], "id"))
3406 		mode = 1;
3407 	else if (!strcmp(argv[index], "contains"))
3408 		mode = 2;
3409 	else if (!strcmp(argv[index], "name"))
3410 		mode = 4;
3411 	else if (!strcmp(argv[index], "address"))
3412 		mode = 0;
3413 	if (mode != 0xf)
3414 		index++;
3415 
3416 	if (index >= argc) {
3417 		kprintf("No area specifier given.\n");
3418 		return 0;
3419 	}
3420 
3421 	num = parse_expression(argv[index]);
3422 
3423 	if (mode == 0) {
3424 		dump_area_struct((struct VMArea*)num, mappings);
3425 	} else {
3426 		// walk through the area list, looking for the arguments as a name
3427 
3428 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3429 		while ((area = it.Next()) != NULL) {
3430 			if (((mode & 4) != 0
3431 					&& !strcmp(argv[index], area->name))
3432 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3433 					|| (((mode & 2) != 0 && area->Base() <= num
3434 						&& area->Base() + area->Size() > num))))) {
3435 				dump_area_struct(area, mappings);
3436 				found = true;
3437 			}
3438 		}
3439 
3440 		if (!found)
3441 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3442 	}
3443 
3444 	return 0;
3445 }
3446 
3447 
3448 static int
3449 dump_area_list(int argc, char** argv)
3450 {
3451 	VMArea* area;
3452 	const char* name = NULL;
3453 	int32 id = 0;
3454 
3455 	if (argc > 1) {
3456 		id = parse_expression(argv[1]);
3457 		if (id == 0)
3458 			name = argv[1];
3459 	}
3460 
3461 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3462 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3463 		B_PRINTF_POINTER_WIDTH, "size");
3464 
3465 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3466 	while ((area = it.Next()) != NULL) {
3467 		if ((id != 0 && area->address_space->ID() != id)
3468 			|| (name != NULL && strstr(area->name, name) == NULL))
3469 			continue;
3470 
3471 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3472 			area->id, (void*)area->Base(), (void*)area->Size(),
3473 			area->protection, area->wiring, area->name);
3474 	}
3475 	return 0;
3476 }
3477 
3478 
3479 static int
3480 dump_available_memory(int argc, char** argv)
3481 {
3482 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3483 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3484 	return 0;
3485 }
3486 
3487 
3488 static int
3489 dump_mapping_info(int argc, char** argv)
3490 {
3491 	bool reverseLookup = false;
3492 	bool pageLookup = false;
3493 
3494 	int argi = 1;
3495 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3496 		const char* arg = argv[argi];
3497 		if (strcmp(arg, "-r") == 0) {
3498 			reverseLookup = true;
3499 		} else if (strcmp(arg, "-p") == 0) {
3500 			reverseLookup = true;
3501 			pageLookup = true;
3502 		} else {
3503 			print_debugger_command_usage(argv[0]);
3504 			return 0;
3505 		}
3506 	}
3507 
3508 	// We need at least one argument, the address. Optionally a thread ID can be
3509 	// specified.
3510 	if (argi >= argc || argi + 2 < argc) {
3511 		print_debugger_command_usage(argv[0]);
3512 		return 0;
3513 	}
3514 
3515 	uint64 addressValue;
3516 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3517 		return 0;
3518 
3519 	Team* team = NULL;
3520 	if (argi < argc) {
3521 		uint64 threadID;
3522 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3523 			return 0;
3524 
3525 		Thread* thread = Thread::GetDebug(threadID);
3526 		if (thread == NULL) {
3527 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3528 			return 0;
3529 		}
3530 
3531 		team = thread->team;
3532 	}
3533 
3534 	if (reverseLookup) {
3535 		phys_addr_t physicalAddress;
3536 		if (pageLookup) {
3537 			vm_page* page = (vm_page*)(addr_t)addressValue;
3538 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3539 		} else {
3540 			physicalAddress = (phys_addr_t)addressValue;
3541 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3542 		}
3543 
3544 		kprintf("    Team     Virtual Address      Area\n");
3545 		kprintf("--------------------------------------\n");
3546 
3547 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3548 			Callback()
3549 				:
3550 				fAddressSpace(NULL)
3551 			{
3552 			}
3553 
3554 			void SetAddressSpace(VMAddressSpace* addressSpace)
3555 			{
3556 				fAddressSpace = addressSpace;
3557 			}
3558 
3559 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3560 			{
3561 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3562 					virtualAddress);
3563 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3564 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3565 				else
3566 					kprintf("\n");
3567 				return false;
3568 			}
3569 
3570 		private:
3571 			VMAddressSpace*	fAddressSpace;
3572 		} callback;
3573 
3574 		if (team != NULL) {
3575 			// team specified -- get its address space
3576 			VMAddressSpace* addressSpace = team->address_space;
3577 			if (addressSpace == NULL) {
3578 				kprintf("Failed to get address space!\n");
3579 				return 0;
3580 			}
3581 
3582 			callback.SetAddressSpace(addressSpace);
3583 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3584 				physicalAddress, callback);
3585 		} else {
3586 			// no team specified -- iterate through all address spaces
3587 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3588 				addressSpace != NULL;
3589 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3590 				callback.SetAddressSpace(addressSpace);
3591 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3592 					physicalAddress, callback);
3593 			}
3594 		}
3595 	} else {
3596 		// get the address space
3597 		addr_t virtualAddress = (addr_t)addressValue;
3598 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3599 		VMAddressSpace* addressSpace;
3600 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3601 			addressSpace = VMAddressSpace::Kernel();
3602 		} else if (team != NULL) {
3603 			addressSpace = team->address_space;
3604 		} else {
3605 			Thread* thread = debug_get_debugged_thread();
3606 			if (thread == NULL || thread->team == NULL) {
3607 				kprintf("Failed to get team!\n");
3608 				return 0;
3609 			}
3610 
3611 			addressSpace = thread->team->address_space;
3612 		}
3613 
3614 		if (addressSpace == NULL) {
3615 			kprintf("Failed to get address space!\n");
3616 			return 0;
3617 		}
3618 
3619 		// let the translation map implementation do the job
3620 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3621 	}
3622 
3623 	return 0;
3624 }
3625 
3626 
3627 /*!	Deletes all areas and reserved regions in the given address space.
3628 
3629 	The caller must ensure that none of the areas has any wired ranges.
3630 
3631 	\param addressSpace The address space.
3632 	\param deletingAddressSpace \c true, if the address space is in the process
3633 		of being deleted.
3634 */
3635 void
3636 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3637 {
3638 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3639 		addressSpace->ID()));
3640 
3641 	addressSpace->WriteLock();
3642 
3643 	// remove all reserved areas in this address space
3644 	addressSpace->UnreserveAllAddressRanges(0);
3645 
3646 	// delete all the areas in this address space
3647 	while (VMArea* area = addressSpace->FirstArea()) {
3648 		ASSERT(!area->IsWired());
3649 		delete_area(addressSpace, area, deletingAddressSpace);
3650 	}
3651 
3652 	addressSpace->WriteUnlock();
3653 }
3654 
3655 
3656 static area_id
3657 vm_area_for(addr_t address, bool kernel)
3658 {
3659 	team_id team;
3660 	if (IS_USER_ADDRESS(address)) {
3661 		// we try the user team address space, if any
3662 		team = VMAddressSpace::CurrentID();
3663 		if (team < 0)
3664 			return team;
3665 	} else
3666 		team = VMAddressSpace::KernelID();
3667 
3668 	AddressSpaceReadLocker locker(team);
3669 	if (!locker.IsLocked())
3670 		return B_BAD_TEAM_ID;
3671 
3672 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3673 	if (area != NULL) {
3674 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3675 			return B_ERROR;
3676 
3677 		return area->id;
3678 	}
3679 
3680 	return B_ERROR;
3681 }
3682 
3683 
3684 /*!	Frees physical pages that were used during the boot process.
3685 	\a end is inclusive.
3686 */
3687 static void
3688 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3689 {
3690 	// free all physical pages in the specified range
3691 
3692 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3693 		phys_addr_t physicalAddress;
3694 		uint32 flags;
3695 
3696 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3697 			&& (flags & PAGE_PRESENT) != 0) {
3698 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3699 			if (page != NULL && page->State() != PAGE_STATE_FREE
3700 					 && page->State() != PAGE_STATE_CLEAR
3701 					 && page->State() != PAGE_STATE_UNUSED) {
3702 				DEBUG_PAGE_ACCESS_START(page);
3703 				vm_page_set_state(page, PAGE_STATE_FREE);
3704 			}
3705 		}
3706 	}
3707 
3708 	// unmap the memory
3709 	map->Unmap(start, end);
3710 }
3711 
3712 
3713 void
3714 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3715 {
3716 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3717 	addr_t end = start + (size - 1);
3718 	addr_t lastEnd = start;
3719 
3720 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3721 		(void*)start, (void*)end));
3722 
3723 	// The areas are sorted in virtual address space order, so
3724 	// we just have to find the holes between them that fall
3725 	// into the area we should dispose
3726 
3727 	map->Lock();
3728 
3729 	for (VMAddressSpace::AreaIterator it
3730 				= VMAddressSpace::Kernel()->GetAreaIterator();
3731 			VMArea* area = it.Next();) {
3732 		addr_t areaStart = area->Base();
3733 		addr_t areaEnd = areaStart + (area->Size() - 1);
3734 
3735 		if (areaEnd < start)
3736 			continue;
3737 
3738 		if (areaStart > end) {
3739 			// we are done, the area is already beyond of what we have to free
3740 			break;
3741 		}
3742 
3743 		if (areaStart > lastEnd) {
3744 			// this is something we can free
3745 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3746 				(void*)areaStart));
3747 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3748 		}
3749 
3750 		if (areaEnd >= end) {
3751 			lastEnd = areaEnd;
3752 				// no +1 to prevent potential overflow
3753 			break;
3754 		}
3755 
3756 		lastEnd = areaEnd + 1;
3757 	}
3758 
3759 	if (lastEnd < end) {
3760 		// we can also get rid of some space at the end of the area
3761 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3762 			(void*)end));
3763 		unmap_and_free_physical_pages(map, lastEnd, end);
3764 	}
3765 
3766 	map->Unlock();
3767 }
3768 
3769 
3770 static void
3771 create_preloaded_image_areas(struct preloaded_image* _image)
3772 {
3773 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3774 	char name[B_OS_NAME_LENGTH];
3775 	void* address;
3776 	int32 length;
3777 
3778 	// use file name to create a good area name
3779 	char* fileName = strrchr(image->name, '/');
3780 	if (fileName == NULL)
3781 		fileName = image->name;
3782 	else
3783 		fileName++;
3784 
3785 	length = strlen(fileName);
3786 	// make sure there is enough space for the suffix
3787 	if (length > 25)
3788 		length = 25;
3789 
3790 	memcpy(name, fileName, length);
3791 	strcpy(name + length, "_text");
3792 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3793 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3794 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3795 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3796 		// this will later be remapped read-only/executable by the
3797 		// ELF initialization code
3798 
3799 	strcpy(name + length, "_data");
3800 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3801 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3802 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3803 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3804 }
3805 
3806 
3807 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3808 	Any boot loader resources contained in that arguments must not be accessed
3809 	anymore past this point.
3810 */
3811 void
3812 vm_free_kernel_args(kernel_args* args)
3813 {
3814 	uint32 i;
3815 
3816 	TRACE(("vm_free_kernel_args()\n"));
3817 
3818 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3819 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3820 		if (area >= B_OK)
3821 			delete_area(area);
3822 	}
3823 }
3824 
3825 
3826 static void
3827 allocate_kernel_args(kernel_args* args)
3828 {
3829 	TRACE(("allocate_kernel_args()\n"));
3830 
3831 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3832 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3833 
3834 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3835 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3836 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3837 	}
3838 }
3839 
3840 
3841 static void
3842 unreserve_boot_loader_ranges(kernel_args* args)
3843 {
3844 	TRACE(("unreserve_boot_loader_ranges()\n"));
3845 
3846 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3847 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3848 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3849 			args->virtual_allocated_range[i].size);
3850 	}
3851 }
3852 
3853 
3854 static void
3855 reserve_boot_loader_ranges(kernel_args* args)
3856 {
3857 	TRACE(("reserve_boot_loader_ranges()\n"));
3858 
3859 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3860 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3861 
3862 		// If the address is no kernel address, we just skip it. The
3863 		// architecture specific code has to deal with it.
3864 		if (!IS_KERNEL_ADDRESS(address)) {
3865 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3866 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3867 			continue;
3868 		}
3869 
3870 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3871 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3872 		if (status < B_OK)
3873 			panic("could not reserve boot loader ranges\n");
3874 	}
3875 }
3876 
3877 
3878 static addr_t
3879 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3880 {
3881 	size = PAGE_ALIGN(size);
3882 
3883 	// find a slot in the virtual allocation addr range
3884 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3885 		// check to see if the space between this one and the last is big enough
3886 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3887 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3888 			+ args->virtual_allocated_range[i - 1].size;
3889 
3890 		addr_t base = alignment > 0
3891 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3892 
3893 		if (base >= KERNEL_BASE && base < rangeStart
3894 				&& rangeStart - base >= size) {
3895 			args->virtual_allocated_range[i - 1].size
3896 				+= base + size - previousRangeEnd;
3897 			return base;
3898 		}
3899 	}
3900 
3901 	// we hadn't found one between allocation ranges. this is ok.
3902 	// see if there's a gap after the last one
3903 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3904 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3905 		+ args->virtual_allocated_range[lastEntryIndex].size;
3906 	addr_t base = alignment > 0
3907 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3908 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3909 		args->virtual_allocated_range[lastEntryIndex].size
3910 			+= base + size - lastRangeEnd;
3911 		return base;
3912 	}
3913 
3914 	// see if there's a gap before the first one
3915 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3916 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3917 		base = rangeStart - size;
3918 		if (alignment > 0)
3919 			base = ROUNDDOWN(base, alignment);
3920 
3921 		if (base >= KERNEL_BASE) {
3922 			args->virtual_allocated_range[0].start = base;
3923 			args->virtual_allocated_range[0].size += rangeStart - base;
3924 			return base;
3925 		}
3926 	}
3927 
3928 	return 0;
3929 }
3930 
3931 
3932 static bool
3933 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3934 {
3935 	// TODO: horrible brute-force method of determining if the page can be
3936 	// allocated
3937 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3938 		if (address >= args->physical_memory_range[i].start
3939 			&& address < args->physical_memory_range[i].start
3940 				+ args->physical_memory_range[i].size)
3941 			return true;
3942 	}
3943 	return false;
3944 }
3945 
3946 
3947 page_num_t
3948 vm_allocate_early_physical_page(kernel_args* args)
3949 {
3950 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3951 		phys_addr_t nextPage;
3952 
3953 		nextPage = args->physical_allocated_range[i].start
3954 			+ args->physical_allocated_range[i].size;
3955 		// see if the page after the next allocated paddr run can be allocated
3956 		if (i + 1 < args->num_physical_allocated_ranges
3957 			&& args->physical_allocated_range[i + 1].size != 0) {
3958 			// see if the next page will collide with the next allocated range
3959 			if (nextPage >= args->physical_allocated_range[i+1].start)
3960 				continue;
3961 		}
3962 		// see if the next physical page fits in the memory block
3963 		if (is_page_in_physical_memory_range(args, nextPage)) {
3964 			// we got one!
3965 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3966 			return nextPage / B_PAGE_SIZE;
3967 		}
3968 	}
3969 
3970 	// Expanding upwards didn't work, try going downwards.
3971 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3972 		phys_addr_t nextPage;
3973 
3974 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3975 		// see if the page after the prev allocated paddr run can be allocated
3976 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3977 			// see if the next page will collide with the next allocated range
3978 			if (nextPage < args->physical_allocated_range[i-1].start
3979 				+ args->physical_allocated_range[i-1].size)
3980 				continue;
3981 		}
3982 		// see if the next physical page fits in the memory block
3983 		if (is_page_in_physical_memory_range(args, nextPage)) {
3984 			// we got one!
3985 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3986 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3987 			return nextPage / B_PAGE_SIZE;
3988 		}
3989 	}
3990 
3991 	return 0;
3992 		// could not allocate a block
3993 }
3994 
3995 
3996 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3997 	allocate some pages before the VM is completely up.
3998 */
3999 addr_t
4000 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4001 	uint32 attributes, addr_t alignment)
4002 {
4003 	if (physicalSize > virtualSize)
4004 		physicalSize = virtualSize;
4005 
4006 	// find the vaddr to allocate at
4007 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4008 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4009 	if (virtualBase == 0) {
4010 		panic("vm_allocate_early: could not allocate virtual address\n");
4011 		return 0;
4012 	}
4013 
4014 	// map the pages
4015 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4016 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4017 		if (physicalAddress == 0)
4018 			panic("error allocating early page!\n");
4019 
4020 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4021 
4022 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
4023 			physicalAddress * B_PAGE_SIZE, attributes,
4024 			&vm_allocate_early_physical_page);
4025 	}
4026 
4027 	return virtualBase;
4028 }
4029 
4030 
4031 /*!	The main entrance point to initialize the VM. */
4032 status_t
4033 vm_init(kernel_args* args)
4034 {
4035 	struct preloaded_image* image;
4036 	void* address;
4037 	status_t err = 0;
4038 	uint32 i;
4039 
4040 	TRACE(("vm_init: entry\n"));
4041 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4042 	err = arch_vm_init(args);
4043 
4044 	// initialize some globals
4045 	vm_page_init_num_pages(args);
4046 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4047 
4048 	slab_init(args);
4049 
4050 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4051 	off_t heapSize = INITIAL_HEAP_SIZE;
4052 	// try to accomodate low memory systems
4053 	while (heapSize > sAvailableMemory / 8)
4054 		heapSize /= 2;
4055 	if (heapSize < 1024 * 1024)
4056 		panic("vm_init: go buy some RAM please.");
4057 
4058 	// map in the new heap and initialize it
4059 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4060 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4061 	TRACE(("heap at 0x%lx\n", heapBase));
4062 	heap_init(heapBase, heapSize);
4063 #endif
4064 
4065 	// initialize the free page list and physical page mapper
4066 	vm_page_init(args);
4067 
4068 	// initialize the cache allocators
4069 	vm_cache_init(args);
4070 
4071 	{
4072 		status_t error = VMAreaHash::Init();
4073 		if (error != B_OK)
4074 			panic("vm_init: error initializing area hash table\n");
4075 	}
4076 
4077 	VMAddressSpace::Init();
4078 	reserve_boot_loader_ranges(args);
4079 
4080 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4081 	heap_init_post_area();
4082 #endif
4083 
4084 	// Do any further initialization that the architecture dependant layers may
4085 	// need now
4086 	arch_vm_translation_map_init_post_area(args);
4087 	arch_vm_init_post_area(args);
4088 	vm_page_init_post_area(args);
4089 	slab_init_post_area();
4090 
4091 	// allocate areas to represent stuff that already exists
4092 
4093 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4094 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4095 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4096 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4097 #endif
4098 
4099 	allocate_kernel_args(args);
4100 
4101 	create_preloaded_image_areas(args->kernel_image);
4102 
4103 	// allocate areas for preloaded images
4104 	for (image = args->preloaded_images; image != NULL; image = image->next)
4105 		create_preloaded_image_areas(image);
4106 
4107 	// allocate kernel stacks
4108 	for (i = 0; i < args->num_cpus; i++) {
4109 		char name[64];
4110 
4111 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4112 		address = (void*)args->cpu_kstack[i].start;
4113 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4114 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4115 	}
4116 
4117 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4118 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4119 
4120 #if PARANOID_KERNEL_MALLOC
4121 	vm_block_address_range("uninitialized heap memory",
4122 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4123 #endif
4124 #if PARANOID_KERNEL_FREE
4125 	vm_block_address_range("freed heap memory",
4126 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4127 #endif
4128 
4129 	// create the object cache for the page mappings
4130 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4131 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4132 		NULL, NULL);
4133 	if (gPageMappingsObjectCache == NULL)
4134 		panic("failed to create page mappings object cache");
4135 
4136 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4137 
4138 #if DEBUG_CACHE_LIST
4139 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4140 		virtual_address_restrictions virtualRestrictions = {};
4141 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4142 		physical_address_restrictions physicalRestrictions = {};
4143 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4144 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4145 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4146 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4147 			&physicalRestrictions, (void**)&sCacheInfoTable);
4148 	}
4149 #endif	// DEBUG_CACHE_LIST
4150 
4151 	// add some debugger commands
4152 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4153 	add_debugger_command("area", &dump_area,
4154 		"Dump info about a particular area");
4155 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4156 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4157 #if DEBUG_CACHE_LIST
4158 	if (sCacheInfoTable != NULL) {
4159 		add_debugger_command_etc("caches", &dump_caches,
4160 			"List all VMCache trees",
4161 			"[ \"-c\" ]\n"
4162 			"All cache trees are listed sorted in decreasing order by number "
4163 				"of\n"
4164 			"used pages or, if \"-c\" is specified, by size of committed "
4165 				"memory.\n",
4166 			0);
4167 	}
4168 #endif
4169 	add_debugger_command("avail", &dump_available_memory,
4170 		"Dump available memory");
4171 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4172 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4173 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4174 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4175 	add_debugger_command("string", &display_mem, "dump strings");
4176 
4177 	add_debugger_command_etc("mapping", &dump_mapping_info,
4178 		"Print address mapping information",
4179 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4180 		"Prints low-level page mapping information for a given address. If\n"
4181 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4182 		"address that is looked up in the translation map of the current\n"
4183 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4184 		"\"-r\" is specified, <address> is a physical address that is\n"
4185 		"searched in the translation map of all teams, respectively the team\n"
4186 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4187 		"<address> is the address of a vm_page structure. The behavior is\n"
4188 		"equivalent to specifying \"-r\" with the physical address of that\n"
4189 		"page.\n",
4190 		0);
4191 
4192 	TRACE(("vm_init: exit\n"));
4193 
4194 	vm_cache_init_post_heap();
4195 
4196 	return err;
4197 }
4198 
4199 
4200 status_t
4201 vm_init_post_sem(kernel_args* args)
4202 {
4203 	// This frees all unused boot loader resources and makes its space available
4204 	// again
4205 	arch_vm_init_end(args);
4206 	unreserve_boot_loader_ranges(args);
4207 
4208 	// fill in all of the semaphores that were not allocated before
4209 	// since we're still single threaded and only the kernel address space
4210 	// exists, it isn't that hard to find all of the ones we need to create
4211 
4212 	arch_vm_translation_map_init_post_sem(args);
4213 
4214 	slab_init_post_sem();
4215 
4216 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4217 	heap_init_post_sem();
4218 #endif
4219 
4220 	return B_OK;
4221 }
4222 
4223 
4224 status_t
4225 vm_init_post_thread(kernel_args* args)
4226 {
4227 	vm_page_init_post_thread(args);
4228 	slab_init_post_thread();
4229 	return heap_init_post_thread();
4230 }
4231 
4232 
4233 status_t
4234 vm_init_post_modules(kernel_args* args)
4235 {
4236 	return arch_vm_init_post_modules(args);
4237 }
4238 
4239 
4240 void
4241 permit_page_faults(void)
4242 {
4243 	Thread* thread = thread_get_current_thread();
4244 	if (thread != NULL)
4245 		atomic_add(&thread->page_faults_allowed, 1);
4246 }
4247 
4248 
4249 void
4250 forbid_page_faults(void)
4251 {
4252 	Thread* thread = thread_get_current_thread();
4253 	if (thread != NULL)
4254 		atomic_add(&thread->page_faults_allowed, -1);
4255 }
4256 
4257 
4258 status_t
4259 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4260 	bool isUser, addr_t* newIP)
4261 {
4262 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4263 		faultAddress));
4264 
4265 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4266 
4267 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4268 	VMAddressSpace* addressSpace = NULL;
4269 
4270 	status_t status = B_OK;
4271 	*newIP = 0;
4272 	atomic_add((int32*)&sPageFaults, 1);
4273 
4274 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4275 		addressSpace = VMAddressSpace::GetKernel();
4276 	} else if (IS_USER_ADDRESS(pageAddress)) {
4277 		addressSpace = VMAddressSpace::GetCurrent();
4278 		if (addressSpace == NULL) {
4279 			if (!isUser) {
4280 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4281 					"memory!\n");
4282 				status = B_BAD_ADDRESS;
4283 				TPF(PageFaultError(-1,
4284 					VMPageFaultTracing
4285 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4286 			} else {
4287 				// XXX weird state.
4288 				panic("vm_page_fault: non kernel thread accessing user memory "
4289 					"that doesn't exist!\n");
4290 				status = B_BAD_ADDRESS;
4291 			}
4292 		}
4293 	} else {
4294 		// the hit was probably in the 64k DMZ between kernel and user space
4295 		// this keeps a user space thread from passing a buffer that crosses
4296 		// into kernel space
4297 		status = B_BAD_ADDRESS;
4298 		TPF(PageFaultError(-1,
4299 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4300 	}
4301 
4302 	if (status == B_OK) {
4303 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4304 			isUser, NULL);
4305 	}
4306 
4307 	if (status < B_OK) {
4308 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4309 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4310 			strerror(status), address, faultAddress, isWrite, isUser,
4311 			thread_get_current_thread_id());
4312 		if (!isUser) {
4313 			Thread* thread = thread_get_current_thread();
4314 			if (thread != NULL && thread->fault_handler != 0) {
4315 				// this will cause the arch dependant page fault handler to
4316 				// modify the IP on the interrupt frame or whatever to return
4317 				// to this address
4318 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4319 			} else {
4320 				// unhandled page fault in the kernel
4321 				panic("vm_page_fault: unhandled page fault in kernel space at "
4322 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4323 			}
4324 		} else {
4325 			Thread* thread = thread_get_current_thread();
4326 
4327 #ifdef TRACE_FAULTS
4328 			VMArea* area = NULL;
4329 			if (addressSpace != NULL) {
4330 				addressSpace->ReadLock();
4331 				area = addressSpace->LookupArea(faultAddress);
4332 			}
4333 
4334 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4335 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4336 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4337 				thread->team->Name(), thread->team->id,
4338 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4339 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4340 					area->Base() : 0x0));
4341 
4342 			if (addressSpace != NULL)
4343 				addressSpace->ReadUnlock();
4344 #endif
4345 
4346 			// If the thread has a signal handler for SIGSEGV, we simply
4347 			// send it the signal. Otherwise we notify the user debugger
4348 			// first.
4349 			struct sigaction action;
4350 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4351 					&& action.sa_handler != SIG_DFL
4352 					&& action.sa_handler != SIG_IGN)
4353 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4354 					SIGSEGV)) {
4355 				Signal signal(SIGSEGV,
4356 					status == B_PERMISSION_DENIED
4357 						? SEGV_ACCERR : SEGV_MAPERR,
4358 					EFAULT, thread->team->id);
4359 				signal.SetAddress((void*)address);
4360 				send_signal_to_thread(thread, signal, 0);
4361 			}
4362 		}
4363 	}
4364 
4365 	if (addressSpace != NULL)
4366 		addressSpace->Put();
4367 
4368 	return B_HANDLED_INTERRUPT;
4369 }
4370 
4371 
4372 struct PageFaultContext {
4373 	AddressSpaceReadLocker	addressSpaceLocker;
4374 	VMCacheChainLocker		cacheChainLocker;
4375 
4376 	VMTranslationMap*		map;
4377 	VMCache*				topCache;
4378 	off_t					cacheOffset;
4379 	vm_page_reservation		reservation;
4380 	bool					isWrite;
4381 
4382 	// return values
4383 	vm_page*				page;
4384 	bool					restart;
4385 	bool					pageAllocated;
4386 
4387 
4388 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4389 		:
4390 		addressSpaceLocker(addressSpace, true),
4391 		map(addressSpace->TranslationMap()),
4392 		isWrite(isWrite)
4393 	{
4394 	}
4395 
4396 	~PageFaultContext()
4397 	{
4398 		UnlockAll();
4399 		vm_page_unreserve_pages(&reservation);
4400 	}
4401 
4402 	void Prepare(VMCache* topCache, off_t cacheOffset)
4403 	{
4404 		this->topCache = topCache;
4405 		this->cacheOffset = cacheOffset;
4406 		page = NULL;
4407 		restart = false;
4408 		pageAllocated = false;
4409 
4410 		cacheChainLocker.SetTo(topCache);
4411 	}
4412 
4413 	void UnlockAll(VMCache* exceptCache = NULL)
4414 	{
4415 		topCache = NULL;
4416 		addressSpaceLocker.Unlock();
4417 		cacheChainLocker.Unlock(exceptCache);
4418 	}
4419 };
4420 
4421 
4422 /*!	Gets the page that should be mapped into the area.
4423 	Returns an error code other than \c B_OK, if the page couldn't be found or
4424 	paged in. The locking state of the address space and the caches is undefined
4425 	in that case.
4426 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4427 	had to unlock the address space and all caches and is supposed to be called
4428 	again.
4429 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4430 	found. It is returned in \c context.page. The address space will still be
4431 	locked as well as all caches starting from the top cache to at least the
4432 	cache the page lives in.
4433 */
4434 static status_t
4435 fault_get_page(PageFaultContext& context)
4436 {
4437 	VMCache* cache = context.topCache;
4438 	VMCache* lastCache = NULL;
4439 	vm_page* page = NULL;
4440 
4441 	while (cache != NULL) {
4442 		// We already hold the lock of the cache at this point.
4443 
4444 		lastCache = cache;
4445 
4446 		page = cache->LookupPage(context.cacheOffset);
4447 		if (page != NULL && page->busy) {
4448 			// page must be busy -- wait for it to become unbusy
4449 			context.UnlockAll(cache);
4450 			cache->ReleaseRefLocked();
4451 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4452 
4453 			// restart the whole process
4454 			context.restart = true;
4455 			return B_OK;
4456 		}
4457 
4458 		if (page != NULL)
4459 			break;
4460 
4461 		// The current cache does not contain the page we're looking for.
4462 
4463 		// see if the backing store has it
4464 		if (cache->HasPage(context.cacheOffset)) {
4465 			// insert a fresh page and mark it busy -- we're going to read it in
4466 			page = vm_page_allocate_page(&context.reservation,
4467 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4468 			cache->InsertPage(page, context.cacheOffset);
4469 
4470 			// We need to unlock all caches and the address space while reading
4471 			// the page in. Keep a reference to the cache around.
4472 			cache->AcquireRefLocked();
4473 			context.UnlockAll();
4474 
4475 			// read the page in
4476 			generic_io_vec vec;
4477 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4478 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4479 
4480 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4481 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4482 
4483 			cache->Lock();
4484 
4485 			if (status < B_OK) {
4486 				// on error remove and free the page
4487 				dprintf("reading page from cache %p returned: %s!\n",
4488 					cache, strerror(status));
4489 
4490 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4491 				cache->RemovePage(page);
4492 				vm_page_set_state(page, PAGE_STATE_FREE);
4493 
4494 				cache->ReleaseRefAndUnlock();
4495 				return status;
4496 			}
4497 
4498 			// mark the page unbusy again
4499 			cache->MarkPageUnbusy(page);
4500 
4501 			DEBUG_PAGE_ACCESS_END(page);
4502 
4503 			// Since we needed to unlock everything temporarily, the area
4504 			// situation might have changed. So we need to restart the whole
4505 			// process.
4506 			cache->ReleaseRefAndUnlock();
4507 			context.restart = true;
4508 			return B_OK;
4509 		}
4510 
4511 		cache = context.cacheChainLocker.LockSourceCache();
4512 	}
4513 
4514 	if (page == NULL) {
4515 		// There was no adequate page, determine the cache for a clean one.
4516 		// Read-only pages come in the deepest cache, only the top most cache
4517 		// may have direct write access.
4518 		cache = context.isWrite ? context.topCache : lastCache;
4519 
4520 		// allocate a clean page
4521 		page = vm_page_allocate_page(&context.reservation,
4522 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4523 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4524 			page->physical_page_number));
4525 
4526 		// insert the new page into our cache
4527 		cache->InsertPage(page, context.cacheOffset);
4528 		context.pageAllocated = true;
4529 	} else if (page->Cache() != context.topCache && context.isWrite) {
4530 		// We have a page that has the data we want, but in the wrong cache
4531 		// object so we need to copy it and stick it into the top cache.
4532 		vm_page* sourcePage = page;
4533 
4534 		// TODO: If memory is low, it might be a good idea to steal the page
4535 		// from our source cache -- if possible, that is.
4536 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4537 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4538 
4539 		// To not needlessly kill concurrency we unlock all caches but the top
4540 		// one while copying the page. Lacking another mechanism to ensure that
4541 		// the source page doesn't disappear, we mark it busy.
4542 		sourcePage->busy = true;
4543 		context.cacheChainLocker.UnlockKeepRefs(true);
4544 
4545 		// copy the page
4546 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4547 			sourcePage->physical_page_number * B_PAGE_SIZE);
4548 
4549 		context.cacheChainLocker.RelockCaches(true);
4550 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4551 
4552 		// insert the new page into our cache
4553 		context.topCache->InsertPage(page, context.cacheOffset);
4554 		context.pageAllocated = true;
4555 	} else
4556 		DEBUG_PAGE_ACCESS_START(page);
4557 
4558 	context.page = page;
4559 	return B_OK;
4560 }
4561 
4562 
4563 /*!	Makes sure the address in the given address space is mapped.
4564 
4565 	\param addressSpace The address space.
4566 	\param originalAddress The address. Doesn't need to be page aligned.
4567 	\param isWrite If \c true the address shall be write-accessible.
4568 	\param isUser If \c true the access is requested by a userland team.
4569 	\param wirePage On success, if non \c NULL, the wired count of the page
4570 		mapped at the given address is incremented and the page is returned
4571 		via this parameter.
4572 	\return \c B_OK on success, another error code otherwise.
4573 */
4574 static status_t
4575 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4576 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4577 {
4578 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4579 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4580 		originalAddress, isWrite, isUser));
4581 
4582 	PageFaultContext context(addressSpace, isWrite);
4583 
4584 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4585 	status_t status = B_OK;
4586 
4587 	addressSpace->IncrementFaultCount();
4588 
4589 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4590 	// the pages upfront makes sure we don't have any cache locked, so that the
4591 	// page daemon/thief can do their job without problems.
4592 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4593 		originalAddress);
4594 	context.addressSpaceLocker.Unlock();
4595 	vm_page_reserve_pages(&context.reservation, reservePages,
4596 		addressSpace == VMAddressSpace::Kernel()
4597 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4598 
4599 	while (true) {
4600 		context.addressSpaceLocker.Lock();
4601 
4602 		// get the area the fault was in
4603 		VMArea* area = addressSpace->LookupArea(address);
4604 		if (area == NULL) {
4605 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4606 				"space\n", originalAddress);
4607 			TPF(PageFaultError(-1,
4608 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4609 			status = B_BAD_ADDRESS;
4610 			break;
4611 		}
4612 
4613 		// check permissions
4614 		uint32 protection = get_area_page_protection(area, address);
4615 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4616 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4617 				area->id, (void*)originalAddress);
4618 			TPF(PageFaultError(area->id,
4619 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4620 			status = B_PERMISSION_DENIED;
4621 			break;
4622 		}
4623 		if (isWrite && (protection
4624 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4625 			dprintf("write access attempted on write-protected area 0x%"
4626 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4627 			TPF(PageFaultError(area->id,
4628 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4629 			status = B_PERMISSION_DENIED;
4630 			break;
4631 		} else if (isExecute && (protection
4632 				& (B_EXECUTE_AREA
4633 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4634 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4635 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4636 			TPF(PageFaultError(area->id,
4637 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4638 			status = B_PERMISSION_DENIED;
4639 			break;
4640 		} else if (!isWrite && !isExecute && (protection
4641 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4642 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4643 				" at %p\n", area->id, (void*)originalAddress);
4644 			TPF(PageFaultError(area->id,
4645 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4646 			status = B_PERMISSION_DENIED;
4647 			break;
4648 		}
4649 
4650 		// We have the area, it was a valid access, so let's try to resolve the
4651 		// page fault now.
4652 		// At first, the top most cache from the area is investigated.
4653 
4654 		context.Prepare(vm_area_get_locked_cache(area),
4655 			address - area->Base() + area->cache_offset);
4656 
4657 		// See if this cache has a fault handler -- this will do all the work
4658 		// for us.
4659 		{
4660 			// Note, since the page fault is resolved with interrupts enabled,
4661 			// the fault handler could be called more than once for the same
4662 			// reason -- the store must take this into account.
4663 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4664 			if (status != B_BAD_HANDLER)
4665 				break;
4666 		}
4667 
4668 		// The top most cache has no fault handler, so let's see if the cache or
4669 		// its sources already have the page we're searching for (we're going
4670 		// from top to bottom).
4671 		status = fault_get_page(context);
4672 		if (status != B_OK) {
4673 			TPF(PageFaultError(area->id, status));
4674 			break;
4675 		}
4676 
4677 		if (context.restart)
4678 			continue;
4679 
4680 		// All went fine, all there is left to do is to map the page into the
4681 		// address space.
4682 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4683 			context.page));
4684 
4685 		// If the page doesn't reside in the area's cache, we need to make sure
4686 		// it's mapped in read-only, so that we cannot overwrite someone else's
4687 		// data (copy-on-write)
4688 		uint32 newProtection = protection;
4689 		if (context.page->Cache() != context.topCache && !isWrite)
4690 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4691 
4692 		bool unmapPage = false;
4693 		bool mapPage = true;
4694 
4695 		// check whether there's already a page mapped at the address
4696 		context.map->Lock();
4697 
4698 		phys_addr_t physicalAddress;
4699 		uint32 flags;
4700 		vm_page* mappedPage = NULL;
4701 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4702 			&& (flags & PAGE_PRESENT) != 0
4703 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4704 				!= NULL) {
4705 			// Yep there's already a page. If it's ours, we can simply adjust
4706 			// its protection. Otherwise we have to unmap it.
4707 			if (mappedPage == context.page) {
4708 				context.map->ProtectPage(area, address, newProtection);
4709 					// Note: We assume that ProtectPage() is atomic (i.e.
4710 					// the page isn't temporarily unmapped), otherwise we'd have
4711 					// to make sure it isn't wired.
4712 				mapPage = false;
4713 			} else
4714 				unmapPage = true;
4715 		}
4716 
4717 		context.map->Unlock();
4718 
4719 		if (unmapPage) {
4720 			// If the page is wired, we can't unmap it. Wait until it is unwired
4721 			// again and restart. Note that the page cannot be wired for
4722 			// writing, since it it isn't in the topmost cache. So we can safely
4723 			// ignore ranges wired for writing (our own and other concurrent
4724 			// wiring attempts in progress) and in fact have to do that to avoid
4725 			// a deadlock.
4726 			VMAreaUnwiredWaiter waiter;
4727 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4728 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4729 				// unlock everything and wait
4730 				if (context.pageAllocated) {
4731 					// ... but since we allocated a page and inserted it into
4732 					// the top cache, remove and free it first. Otherwise we'd
4733 					// have a page from a lower cache mapped while an upper
4734 					// cache has a page that would shadow it.
4735 					context.topCache->RemovePage(context.page);
4736 					vm_page_free_etc(context.topCache, context.page,
4737 						&context.reservation);
4738 				} else
4739 					DEBUG_PAGE_ACCESS_END(context.page);
4740 
4741 				context.UnlockAll();
4742 				waiter.waitEntry.Wait();
4743 				continue;
4744 			}
4745 
4746 			// Note: The mapped page is a page of a lower cache. We are
4747 			// guaranteed to have that cached locked, our new page is a copy of
4748 			// that page, and the page is not busy. The logic for that guarantee
4749 			// is as follows: Since the page is mapped, it must live in the top
4750 			// cache (ruled out above) or any of its lower caches, and there is
4751 			// (was before the new page was inserted) no other page in any
4752 			// cache between the top cache and the page's cache (otherwise that
4753 			// would be mapped instead). That in turn means that our algorithm
4754 			// must have found it and therefore it cannot be busy either.
4755 			DEBUG_PAGE_ACCESS_START(mappedPage);
4756 			unmap_page(area, address);
4757 			DEBUG_PAGE_ACCESS_END(mappedPage);
4758 		}
4759 
4760 		if (mapPage) {
4761 			if (map_page(area, context.page, address, newProtection,
4762 					&context.reservation) != B_OK) {
4763 				// Mapping can only fail, when the page mapping object couldn't
4764 				// be allocated. Save for the missing mapping everything is
4765 				// fine, though. If this was a regular page fault, we'll simply
4766 				// leave and probably fault again. To make sure we'll have more
4767 				// luck then, we ensure that the minimum object reserve is
4768 				// available.
4769 				DEBUG_PAGE_ACCESS_END(context.page);
4770 
4771 				context.UnlockAll();
4772 
4773 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4774 						!= B_OK) {
4775 					// Apparently the situation is serious. Let's get ourselves
4776 					// killed.
4777 					status = B_NO_MEMORY;
4778 				} else if (wirePage != NULL) {
4779 					// The caller expects us to wire the page. Since
4780 					// object_cache_reserve() succeeded, we should now be able
4781 					// to allocate a mapping structure. Restart.
4782 					continue;
4783 				}
4784 
4785 				break;
4786 			}
4787 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4788 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4789 
4790 		// also wire the page, if requested
4791 		if (wirePage != NULL && status == B_OK) {
4792 			increment_page_wired_count(context.page);
4793 			*wirePage = context.page;
4794 		}
4795 
4796 		DEBUG_PAGE_ACCESS_END(context.page);
4797 
4798 		break;
4799 	}
4800 
4801 	return status;
4802 }
4803 
4804 
4805 status_t
4806 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4807 {
4808 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4809 }
4810 
4811 status_t
4812 vm_put_physical_page(addr_t vaddr, void* handle)
4813 {
4814 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4815 }
4816 
4817 
4818 status_t
4819 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4820 	void** _handle)
4821 {
4822 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4823 }
4824 
4825 status_t
4826 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4827 {
4828 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4829 }
4830 
4831 
4832 status_t
4833 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4834 {
4835 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4836 }
4837 
4838 status_t
4839 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4840 {
4841 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4842 }
4843 
4844 
4845 void
4846 vm_get_info(system_info* info)
4847 {
4848 	swap_get_info(info);
4849 
4850 	MutexLocker locker(sAvailableMemoryLock);
4851 	info->needed_memory = sNeededMemory;
4852 	info->free_memory = sAvailableMemory;
4853 }
4854 
4855 
4856 uint32
4857 vm_num_page_faults(void)
4858 {
4859 	return sPageFaults;
4860 }
4861 
4862 
4863 off_t
4864 vm_available_memory(void)
4865 {
4866 	MutexLocker locker(sAvailableMemoryLock);
4867 	return sAvailableMemory;
4868 }
4869 
4870 
4871 off_t
4872 vm_available_not_needed_memory(void)
4873 {
4874 	MutexLocker locker(sAvailableMemoryLock);
4875 	return sAvailableMemory - sNeededMemory;
4876 }
4877 
4878 
4879 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4880 	debugger.
4881 */
4882 off_t
4883 vm_available_not_needed_memory_debug(void)
4884 {
4885 	return sAvailableMemory - sNeededMemory;
4886 }
4887 
4888 
4889 size_t
4890 vm_kernel_address_space_left(void)
4891 {
4892 	return VMAddressSpace::Kernel()->FreeSpace();
4893 }
4894 
4895 
4896 void
4897 vm_unreserve_memory(size_t amount)
4898 {
4899 	mutex_lock(&sAvailableMemoryLock);
4900 
4901 	sAvailableMemory += amount;
4902 
4903 	mutex_unlock(&sAvailableMemoryLock);
4904 }
4905 
4906 
4907 status_t
4908 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4909 {
4910 	size_t reserve = kMemoryReserveForPriority[priority];
4911 
4912 	MutexLocker locker(sAvailableMemoryLock);
4913 
4914 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4915 
4916 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4917 		sAvailableMemory -= amount;
4918 		return B_OK;
4919 	}
4920 
4921 	if (timeout <= 0)
4922 		return B_NO_MEMORY;
4923 
4924 	// turn timeout into an absolute timeout
4925 	timeout += system_time();
4926 
4927 	// loop until we've got the memory or the timeout occurs
4928 	do {
4929 		sNeededMemory += amount;
4930 
4931 		// call the low resource manager
4932 		locker.Unlock();
4933 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4934 			B_ABSOLUTE_TIMEOUT, timeout);
4935 		locker.Lock();
4936 
4937 		sNeededMemory -= amount;
4938 
4939 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4940 			sAvailableMemory -= amount;
4941 			return B_OK;
4942 		}
4943 	} while (timeout > system_time());
4944 
4945 	return B_NO_MEMORY;
4946 }
4947 
4948 
4949 status_t
4950 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4951 {
4952 	// NOTE: The caller is responsible for synchronizing calls to this function!
4953 
4954 	AddressSpaceReadLocker locker;
4955 	VMArea* area;
4956 	status_t status = locker.SetFromArea(id, area);
4957 	if (status != B_OK)
4958 		return status;
4959 
4960 	// nothing to do, if the type doesn't change
4961 	uint32 oldType = area->MemoryType();
4962 	if (type == oldType)
4963 		return B_OK;
4964 
4965 	// set the memory type of the area and the mapped pages
4966 	VMTranslationMap* map = area->address_space->TranslationMap();
4967 	map->Lock();
4968 	area->SetMemoryType(type);
4969 	map->ProtectArea(area, area->protection);
4970 	map->Unlock();
4971 
4972 	// set the physical memory type
4973 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4974 	if (error != B_OK) {
4975 		// reset the memory type of the area and the mapped pages
4976 		map->Lock();
4977 		area->SetMemoryType(oldType);
4978 		map->ProtectArea(area, area->protection);
4979 		map->Unlock();
4980 		return error;
4981 	}
4982 
4983 	return B_OK;
4984 
4985 }
4986 
4987 
4988 /*!	This function enforces some protection properties:
4989 	 - kernel areas must be W^X (after kernel startup)
4990 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4991 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4992 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4993 	   and B_KERNEL_WRITE_AREA.
4994 */
4995 static void
4996 fix_protection(uint32* protection)
4997 {
4998 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4999 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5000 			|| (*protection & B_WRITE_AREA) != 0)
5001 		&& !gKernelStartup)
5002 		panic("kernel areas cannot be both writable and executable!");
5003 
5004 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5005 		if ((*protection & B_USER_PROTECTION) == 0
5006 			|| (*protection & B_WRITE_AREA) != 0)
5007 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5008 		else
5009 			*protection |= B_KERNEL_READ_AREA;
5010 	}
5011 }
5012 
5013 
5014 static void
5015 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5016 {
5017 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5018 	info->area = area->id;
5019 	info->address = (void*)area->Base();
5020 	info->size = area->Size();
5021 	info->protection = area->protection;
5022 	info->lock = B_FULL_LOCK;
5023 	info->team = area->address_space->ID();
5024 	info->copy_count = 0;
5025 	info->in_count = 0;
5026 	info->out_count = 0;
5027 		// TODO: retrieve real values here!
5028 
5029 	VMCache* cache = vm_area_get_locked_cache(area);
5030 
5031 	// Note, this is a simplification; the cache could be larger than this area
5032 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5033 
5034 	vm_area_put_locked_cache(cache);
5035 }
5036 
5037 
5038 static status_t
5039 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5040 {
5041 	// is newSize a multiple of B_PAGE_SIZE?
5042 	if (newSize & (B_PAGE_SIZE - 1))
5043 		return B_BAD_VALUE;
5044 
5045 	// lock all affected address spaces and the cache
5046 	VMArea* area;
5047 	VMCache* cache;
5048 
5049 	MultiAddressSpaceLocker locker;
5050 	AreaCacheLocker cacheLocker;
5051 
5052 	status_t status;
5053 	size_t oldSize;
5054 	bool anyKernelArea;
5055 	bool restart;
5056 
5057 	do {
5058 		anyKernelArea = false;
5059 		restart = false;
5060 
5061 		locker.Unset();
5062 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5063 		if (status != B_OK)
5064 			return status;
5065 		cacheLocker.SetTo(cache, true);	// already locked
5066 
5067 		// enforce restrictions
5068 		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
5069 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5070 				"resize kernel area %" B_PRId32 " (%s)\n",
5071 				team_get_current_team_id(), areaID, area->name);
5072 			return B_NOT_ALLOWED;
5073 		}
5074 		// TODO: Enforce all restrictions (team, etc.)!
5075 
5076 		oldSize = area->Size();
5077 		if (newSize == oldSize)
5078 			return B_OK;
5079 
5080 		if (cache->type != CACHE_TYPE_RAM)
5081 			return B_NOT_ALLOWED;
5082 
5083 		if (oldSize < newSize) {
5084 			// We need to check if all areas of this cache can be resized.
5085 			for (VMArea* current = cache->areas; current != NULL;
5086 					current = current->cache_next) {
5087 				if (!current->address_space->CanResizeArea(current, newSize))
5088 					return B_ERROR;
5089 				anyKernelArea
5090 					|= current->address_space == VMAddressSpace::Kernel();
5091 			}
5092 		} else {
5093 			// We're shrinking the areas, so we must make sure the affected
5094 			// ranges are not wired.
5095 			for (VMArea* current = cache->areas; current != NULL;
5096 					current = current->cache_next) {
5097 				anyKernelArea
5098 					|= current->address_space == VMAddressSpace::Kernel();
5099 
5100 				if (wait_if_area_range_is_wired(current,
5101 						current->Base() + newSize, oldSize - newSize, &locker,
5102 						&cacheLocker)) {
5103 					restart = true;
5104 					break;
5105 				}
5106 			}
5107 		}
5108 	} while (restart);
5109 
5110 	// Okay, looks good so far, so let's do it
5111 
5112 	int priority = kernel && anyKernelArea
5113 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5114 	uint32 allocationFlags = kernel && anyKernelArea
5115 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5116 
5117 	if (oldSize < newSize) {
5118 		// Growing the cache can fail, so we do it first.
5119 		status = cache->Resize(cache->virtual_base + newSize, priority);
5120 		if (status != B_OK)
5121 			return status;
5122 	}
5123 
5124 	for (VMArea* current = cache->areas; current != NULL;
5125 			current = current->cache_next) {
5126 		status = current->address_space->ResizeArea(current, newSize,
5127 			allocationFlags);
5128 		if (status != B_OK)
5129 			break;
5130 
5131 		// We also need to unmap all pages beyond the new size, if the area has
5132 		// shrunk
5133 		if (newSize < oldSize) {
5134 			VMCacheChainLocker cacheChainLocker(cache);
5135 			cacheChainLocker.LockAllSourceCaches();
5136 
5137 			unmap_pages(current, current->Base() + newSize,
5138 				oldSize - newSize);
5139 
5140 			cacheChainLocker.Unlock(cache);
5141 		}
5142 	}
5143 
5144 	if (status == B_OK) {
5145 		// Shrink or grow individual page protections if in use.
5146 		if (area->page_protections != NULL) {
5147 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5148 			uint8* newProtections
5149 				= (uint8*)realloc(area->page_protections, bytes);
5150 			if (newProtections == NULL)
5151 				status = B_NO_MEMORY;
5152 			else {
5153 				area->page_protections = newProtections;
5154 
5155 				if (oldSize < newSize) {
5156 					// init the additional page protections to that of the area
5157 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5158 					uint32 areaProtection = area->protection
5159 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5160 					memset(area->page_protections + offset,
5161 						areaProtection | (areaProtection << 4), bytes - offset);
5162 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5163 						uint8& entry = area->page_protections[offset - 1];
5164 						entry = (entry & 0x0f) | (areaProtection << 4);
5165 					}
5166 				}
5167 			}
5168 		}
5169 	}
5170 
5171 	// shrinking the cache can't fail, so we do it now
5172 	if (status == B_OK && newSize < oldSize)
5173 		status = cache->Resize(cache->virtual_base + newSize, priority);
5174 
5175 	if (status != B_OK) {
5176 		// Something failed -- resize the areas back to their original size.
5177 		// This can fail, too, in which case we're seriously screwed.
5178 		for (VMArea* current = cache->areas; current != NULL;
5179 				current = current->cache_next) {
5180 			if (current->address_space->ResizeArea(current, oldSize,
5181 					allocationFlags) != B_OK) {
5182 				panic("vm_resize_area(): Failed and not being able to restore "
5183 					"original state.");
5184 			}
5185 		}
5186 
5187 		cache->Resize(cache->virtual_base + oldSize, priority);
5188 	}
5189 
5190 	// TODO: we must honour the lock restrictions of this area
5191 	return status;
5192 }
5193 
5194 
5195 status_t
5196 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5197 {
5198 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5199 }
5200 
5201 
5202 status_t
5203 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5204 {
5205 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5206 }
5207 
5208 
5209 status_t
5210 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5211 	bool user)
5212 {
5213 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5214 }
5215 
5216 
5217 void
5218 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5219 {
5220 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5221 }
5222 
5223 
5224 /*!	Copies a range of memory directly from/to a page that might not be mapped
5225 	at the moment.
5226 
5227 	For \a unsafeMemory the current mapping (if any is ignored). The function
5228 	walks through the respective area's cache chain to find the physical page
5229 	and copies from/to it directly.
5230 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5231 	must not cross a page boundary.
5232 
5233 	\param teamID The team ID identifying the address space \a unsafeMemory is
5234 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5235 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5236 		is passed, the address space of the thread returned by
5237 		debug_get_debugged_thread() is used.
5238 	\param unsafeMemory The start of the unsafe memory range to be copied
5239 		from/to.
5240 	\param buffer A safely accessible kernel buffer to be copied from/to.
5241 	\param size The number of bytes to be copied.
5242 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5243 		\a unsafeMemory, the other way around otherwise.
5244 */
5245 status_t
5246 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5247 	size_t size, bool copyToUnsafe)
5248 {
5249 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5250 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5251 		return B_BAD_VALUE;
5252 	}
5253 
5254 	// get the address space for the debugged thread
5255 	VMAddressSpace* addressSpace;
5256 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5257 		addressSpace = VMAddressSpace::Kernel();
5258 	} else if (teamID == B_CURRENT_TEAM) {
5259 		Thread* thread = debug_get_debugged_thread();
5260 		if (thread == NULL || thread->team == NULL)
5261 			return B_BAD_ADDRESS;
5262 
5263 		addressSpace = thread->team->address_space;
5264 	} else
5265 		addressSpace = VMAddressSpace::DebugGet(teamID);
5266 
5267 	if (addressSpace == NULL)
5268 		return B_BAD_ADDRESS;
5269 
5270 	// get the area
5271 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5272 	if (area == NULL)
5273 		return B_BAD_ADDRESS;
5274 
5275 	// search the page
5276 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5277 		+ area->cache_offset;
5278 	VMCache* cache = area->cache;
5279 	vm_page* page = NULL;
5280 	while (cache != NULL) {
5281 		page = cache->DebugLookupPage(cacheOffset);
5282 		if (page != NULL)
5283 			break;
5284 
5285 		// Page not found in this cache -- if it is paged out, we must not try
5286 		// to get it from lower caches.
5287 		if (cache->DebugHasPage(cacheOffset))
5288 			break;
5289 
5290 		cache = cache->source;
5291 	}
5292 
5293 	if (page == NULL)
5294 		return B_UNSUPPORTED;
5295 
5296 	// copy from/to physical memory
5297 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5298 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5299 
5300 	if (copyToUnsafe) {
5301 		if (page->Cache() != area->cache)
5302 			return B_UNSUPPORTED;
5303 
5304 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5305 	}
5306 
5307 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5308 }
5309 
5310 
5311 static inline bool
5312 validate_user_range(const void* addr, size_t size)
5313 {
5314 	addr_t address = (addr_t)addr;
5315 
5316 	// Check for overflows on all addresses.
5317 	if ((address + size) < address)
5318 		return false;
5319 
5320 	// Validate that the address does not cross the kernel/user boundary.
5321 	if (IS_USER_ADDRESS(address))
5322 		return IS_USER_ADDRESS(address + size);
5323 	else
5324 		return !IS_USER_ADDRESS(address + size);
5325 }
5326 
5327 
5328 //	#pragma mark - kernel public API
5329 
5330 
5331 status_t
5332 user_memcpy(void* to, const void* from, size_t size)
5333 {
5334 	if (!validate_user_range(to, size) || !validate_user_range(from, size))
5335 		return B_BAD_ADDRESS;
5336 
5337 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5338 		return B_BAD_ADDRESS;
5339 
5340 	return B_OK;
5341 }
5342 
5343 
5344 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5345 	the string in \a to, NULL-terminating the result.
5346 
5347 	\param to Pointer to the destination C-string.
5348 	\param from Pointer to the source C-string.
5349 	\param size Size in bytes of the string buffer pointed to by \a to.
5350 
5351 	\return strlen(\a from).
5352 */
5353 ssize_t
5354 user_strlcpy(char* to, const char* from, size_t size)
5355 {
5356 	if (to == NULL && size != 0)
5357 		return B_BAD_VALUE;
5358 	if (from == NULL)
5359 		return B_BAD_ADDRESS;
5360 
5361 	// Protect the source address from overflows.
5362 	size_t maxSize = size;
5363 	if ((addr_t)from + maxSize < (addr_t)from)
5364 		maxSize -= (addr_t)from + maxSize;
5365 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5366 		maxSize = USER_TOP - (addr_t)from;
5367 
5368 	if (!validate_user_range(to, maxSize))
5369 		return B_BAD_ADDRESS;
5370 
5371 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5372 	if (result < 0)
5373 		return result;
5374 
5375 	// If we hit the address overflow boundary, fail.
5376 	if ((size_t)result >= maxSize && maxSize < size)
5377 		return B_BAD_ADDRESS;
5378 
5379 	return result;
5380 }
5381 
5382 
5383 status_t
5384 user_memset(void* s, char c, size_t count)
5385 {
5386 	if (!validate_user_range(s, count))
5387 		return B_BAD_ADDRESS;
5388 
5389 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5390 		return B_BAD_ADDRESS;
5391 
5392 	return B_OK;
5393 }
5394 
5395 
5396 /*!	Wires a single page at the given address.
5397 
5398 	\param team The team whose address space the address belongs to. Supports
5399 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5400 		parameter is ignored.
5401 	\param address address The virtual address to wire down. Does not need to
5402 		be page aligned.
5403 	\param writable If \c true the page shall be writable.
5404 	\param info On success the info is filled in, among other things
5405 		containing the physical address the given virtual one translates to.
5406 	\return \c B_OK, when the page could be wired, another error code otherwise.
5407 */
5408 status_t
5409 vm_wire_page(team_id team, addr_t address, bool writable,
5410 	VMPageWiringInfo* info)
5411 {
5412 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5413 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5414 
5415 	// compute the page protection that is required
5416 	bool isUser = IS_USER_ADDRESS(address);
5417 	uint32 requiredProtection = PAGE_PRESENT
5418 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5419 	if (writable)
5420 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5421 
5422 	// get and read lock the address space
5423 	VMAddressSpace* addressSpace = NULL;
5424 	if (isUser) {
5425 		if (team == B_CURRENT_TEAM)
5426 			addressSpace = VMAddressSpace::GetCurrent();
5427 		else
5428 			addressSpace = VMAddressSpace::Get(team);
5429 	} else
5430 		addressSpace = VMAddressSpace::GetKernel();
5431 	if (addressSpace == NULL)
5432 		return B_ERROR;
5433 
5434 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5435 
5436 	VMTranslationMap* map = addressSpace->TranslationMap();
5437 	status_t error = B_OK;
5438 
5439 	// get the area
5440 	VMArea* area = addressSpace->LookupArea(pageAddress);
5441 	if (area == NULL) {
5442 		addressSpace->Put();
5443 		return B_BAD_ADDRESS;
5444 	}
5445 
5446 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5447 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5448 
5449 	// mark the area range wired
5450 	area->Wire(&info->range);
5451 
5452 	// Lock the area's cache chain and the translation map. Needed to look
5453 	// up the page and play with its wired count.
5454 	cacheChainLocker.LockAllSourceCaches();
5455 	map->Lock();
5456 
5457 	phys_addr_t physicalAddress;
5458 	uint32 flags;
5459 	vm_page* page;
5460 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5461 		&& (flags & requiredProtection) == requiredProtection
5462 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5463 			!= NULL) {
5464 		// Already mapped with the correct permissions -- just increment
5465 		// the page's wired count.
5466 		increment_page_wired_count(page);
5467 
5468 		map->Unlock();
5469 		cacheChainLocker.Unlock();
5470 		addressSpaceLocker.Unlock();
5471 	} else {
5472 		// Let vm_soft_fault() map the page for us, if possible. We need
5473 		// to fully unlock to avoid deadlocks. Since we have already
5474 		// wired the area itself, nothing disturbing will happen with it
5475 		// in the meantime.
5476 		map->Unlock();
5477 		cacheChainLocker.Unlock();
5478 		addressSpaceLocker.Unlock();
5479 
5480 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5481 			isUser, &page);
5482 
5483 		if (error != B_OK) {
5484 			// The page could not be mapped -- clean up.
5485 			VMCache* cache = vm_area_get_locked_cache(area);
5486 			area->Unwire(&info->range);
5487 			cache->ReleaseRefAndUnlock();
5488 			addressSpace->Put();
5489 			return error;
5490 		}
5491 	}
5492 
5493 	info->physicalAddress
5494 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5495 			+ address % B_PAGE_SIZE;
5496 	info->page = page;
5497 
5498 	return B_OK;
5499 }
5500 
5501 
5502 /*!	Unwires a single page previously wired via vm_wire_page().
5503 
5504 	\param info The same object passed to vm_wire_page() before.
5505 */
5506 void
5507 vm_unwire_page(VMPageWiringInfo* info)
5508 {
5509 	// lock the address space
5510 	VMArea* area = info->range.area;
5511 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5512 		// takes over our reference
5513 
5514 	// lock the top cache
5515 	VMCache* cache = vm_area_get_locked_cache(area);
5516 	VMCacheChainLocker cacheChainLocker(cache);
5517 
5518 	if (info->page->Cache() != cache) {
5519 		// The page is not in the top cache, so we lock the whole cache chain
5520 		// before touching the page's wired count.
5521 		cacheChainLocker.LockAllSourceCaches();
5522 	}
5523 
5524 	decrement_page_wired_count(info->page);
5525 
5526 	// remove the wired range from the range
5527 	area->Unwire(&info->range);
5528 
5529 	cacheChainLocker.Unlock();
5530 }
5531 
5532 
5533 /*!	Wires down the given address range in the specified team's address space.
5534 
5535 	If successful the function
5536 	- acquires a reference to the specified team's address space,
5537 	- adds respective wired ranges to all areas that intersect with the given
5538 	  address range,
5539 	- makes sure all pages in the given address range are mapped with the
5540 	  requested access permissions and increments their wired count.
5541 
5542 	It fails, when \a team doesn't specify a valid address space, when any part
5543 	of the specified address range is not covered by areas, when the concerned
5544 	areas don't allow mapping with the requested permissions, or when mapping
5545 	failed for another reason.
5546 
5547 	When successful the call must be balanced by a unlock_memory_etc() call with
5548 	the exact same parameters.
5549 
5550 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5551 		supported.
5552 	\param address The start of the address range to be wired.
5553 	\param numBytes The size of the address range to be wired.
5554 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5555 		requests that the range must be wired writable ("read from device
5556 		into memory").
5557 	\return \c B_OK on success, another error code otherwise.
5558 */
5559 status_t
5560 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5561 {
5562 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5563 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5564 
5565 	// compute the page protection that is required
5566 	bool isUser = IS_USER_ADDRESS(address);
5567 	bool writable = (flags & B_READ_DEVICE) == 0;
5568 	uint32 requiredProtection = PAGE_PRESENT
5569 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5570 	if (writable)
5571 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5572 
5573 	uint32 mallocFlags = isUser
5574 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5575 
5576 	// get and read lock the address space
5577 	VMAddressSpace* addressSpace = NULL;
5578 	if (isUser) {
5579 		if (team == B_CURRENT_TEAM)
5580 			addressSpace = VMAddressSpace::GetCurrent();
5581 		else
5582 			addressSpace = VMAddressSpace::Get(team);
5583 	} else
5584 		addressSpace = VMAddressSpace::GetKernel();
5585 	if (addressSpace == NULL)
5586 		return B_ERROR;
5587 
5588 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5589 		// We get a new address space reference here. The one we got above will
5590 		// be freed by unlock_memory_etc().
5591 
5592 	VMTranslationMap* map = addressSpace->TranslationMap();
5593 	status_t error = B_OK;
5594 
5595 	// iterate through all concerned areas
5596 	addr_t nextAddress = lockBaseAddress;
5597 	while (nextAddress != lockEndAddress) {
5598 		// get the next area
5599 		VMArea* area = addressSpace->LookupArea(nextAddress);
5600 		if (area == NULL) {
5601 			error = B_BAD_ADDRESS;
5602 			break;
5603 		}
5604 
5605 		addr_t areaStart = nextAddress;
5606 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5607 
5608 		// allocate the wired range (do that before locking the cache to avoid
5609 		// deadlocks)
5610 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5611 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5612 		if (range == NULL) {
5613 			error = B_NO_MEMORY;
5614 			break;
5615 		}
5616 
5617 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5618 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5619 
5620 		// mark the area range wired
5621 		area->Wire(range);
5622 
5623 		// Depending on the area cache type and the wiring, we may not need to
5624 		// look at the individual pages.
5625 		if (area->cache_type == CACHE_TYPE_NULL
5626 			|| area->cache_type == CACHE_TYPE_DEVICE
5627 			|| area->wiring == B_FULL_LOCK
5628 			|| area->wiring == B_CONTIGUOUS) {
5629 			nextAddress = areaEnd;
5630 			continue;
5631 		}
5632 
5633 		// Lock the area's cache chain and the translation map. Needed to look
5634 		// up pages and play with their wired count.
5635 		cacheChainLocker.LockAllSourceCaches();
5636 		map->Lock();
5637 
5638 		// iterate through the pages and wire them
5639 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5640 			phys_addr_t physicalAddress;
5641 			uint32 flags;
5642 
5643 			vm_page* page;
5644 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5645 				&& (flags & requiredProtection) == requiredProtection
5646 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5647 					!= NULL) {
5648 				// Already mapped with the correct permissions -- just increment
5649 				// the page's wired count.
5650 				increment_page_wired_count(page);
5651 			} else {
5652 				// Let vm_soft_fault() map the page for us, if possible. We need
5653 				// to fully unlock to avoid deadlocks. Since we have already
5654 				// wired the area itself, nothing disturbing will happen with it
5655 				// in the meantime.
5656 				map->Unlock();
5657 				cacheChainLocker.Unlock();
5658 				addressSpaceLocker.Unlock();
5659 
5660 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5661 					false, isUser, &page);
5662 
5663 				addressSpaceLocker.Lock();
5664 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5665 				cacheChainLocker.LockAllSourceCaches();
5666 				map->Lock();
5667 			}
5668 
5669 			if (error != B_OK)
5670 				break;
5671 		}
5672 
5673 		map->Unlock();
5674 
5675 		if (error == B_OK) {
5676 			cacheChainLocker.Unlock();
5677 		} else {
5678 			// An error occurred, so abort right here. If the current address
5679 			// is the first in this area, unwire the area, since we won't get
5680 			// to it when reverting what we've done so far.
5681 			if (nextAddress == areaStart) {
5682 				area->Unwire(range);
5683 				cacheChainLocker.Unlock();
5684 				range->~VMAreaWiredRange();
5685 				free_etc(range, mallocFlags);
5686 			} else
5687 				cacheChainLocker.Unlock();
5688 
5689 			break;
5690 		}
5691 	}
5692 
5693 	if (error != B_OK) {
5694 		// An error occurred, so unwire all that we've already wired. Note that
5695 		// even if not a single page was wired, unlock_memory_etc() is called
5696 		// to put the address space reference.
5697 		addressSpaceLocker.Unlock();
5698 		unlock_memory_etc(team, (void*)lockBaseAddress,
5699 			nextAddress - lockBaseAddress, flags);
5700 	}
5701 
5702 	return error;
5703 }
5704 
5705 
5706 status_t
5707 lock_memory(void* address, size_t numBytes, uint32 flags)
5708 {
5709 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5710 }
5711 
5712 
5713 /*!	Unwires an address range previously wired with lock_memory_etc().
5714 
5715 	Note that a call to this function must balance a previous lock_memory_etc()
5716 	call with exactly the same parameters.
5717 */
5718 status_t
5719 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5720 {
5721 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5722 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5723 
5724 	// compute the page protection that is required
5725 	bool isUser = IS_USER_ADDRESS(address);
5726 	bool writable = (flags & B_READ_DEVICE) == 0;
5727 	uint32 requiredProtection = PAGE_PRESENT
5728 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5729 	if (writable)
5730 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5731 
5732 	uint32 mallocFlags = isUser
5733 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5734 
5735 	// get and read lock the address space
5736 	VMAddressSpace* addressSpace = NULL;
5737 	if (isUser) {
5738 		if (team == B_CURRENT_TEAM)
5739 			addressSpace = VMAddressSpace::GetCurrent();
5740 		else
5741 			addressSpace = VMAddressSpace::Get(team);
5742 	} else
5743 		addressSpace = VMAddressSpace::GetKernel();
5744 	if (addressSpace == NULL)
5745 		return B_ERROR;
5746 
5747 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5748 		// Take over the address space reference. We don't unlock until we're
5749 		// done.
5750 
5751 	VMTranslationMap* map = addressSpace->TranslationMap();
5752 	status_t error = B_OK;
5753 
5754 	// iterate through all concerned areas
5755 	addr_t nextAddress = lockBaseAddress;
5756 	while (nextAddress != lockEndAddress) {
5757 		// get the next area
5758 		VMArea* area = addressSpace->LookupArea(nextAddress);
5759 		if (area == NULL) {
5760 			error = B_BAD_ADDRESS;
5761 			break;
5762 		}
5763 
5764 		addr_t areaStart = nextAddress;
5765 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5766 
5767 		// Lock the area's top cache. This is a requirement for
5768 		// VMArea::Unwire().
5769 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5770 
5771 		// Depending on the area cache type and the wiring, we may not need to
5772 		// look at the individual pages.
5773 		if (area->cache_type == CACHE_TYPE_NULL
5774 			|| area->cache_type == CACHE_TYPE_DEVICE
5775 			|| area->wiring == B_FULL_LOCK
5776 			|| area->wiring == B_CONTIGUOUS) {
5777 			// unwire the range (to avoid deadlocks we delete the range after
5778 			// unlocking the cache)
5779 			nextAddress = areaEnd;
5780 			VMAreaWiredRange* range = area->Unwire(areaStart,
5781 				areaEnd - areaStart, writable);
5782 			cacheChainLocker.Unlock();
5783 			if (range != NULL) {
5784 				range->~VMAreaWiredRange();
5785 				free_etc(range, mallocFlags);
5786 			}
5787 			continue;
5788 		}
5789 
5790 		// Lock the area's cache chain and the translation map. Needed to look
5791 		// up pages and play with their wired count.
5792 		cacheChainLocker.LockAllSourceCaches();
5793 		map->Lock();
5794 
5795 		// iterate through the pages and unwire them
5796 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5797 			phys_addr_t physicalAddress;
5798 			uint32 flags;
5799 
5800 			vm_page* page;
5801 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5802 				&& (flags & PAGE_PRESENT) != 0
5803 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5804 					!= NULL) {
5805 				// Already mapped with the correct permissions -- just increment
5806 				// the page's wired count.
5807 				decrement_page_wired_count(page);
5808 			} else {
5809 				panic("unlock_memory_etc(): Failed to unwire page: address "
5810 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5811 					nextAddress);
5812 				error = B_BAD_VALUE;
5813 				break;
5814 			}
5815 		}
5816 
5817 		map->Unlock();
5818 
5819 		// All pages are unwired. Remove the area's wired range as well (to
5820 		// avoid deadlocks we delete the range after unlocking the cache).
5821 		VMAreaWiredRange* range = area->Unwire(areaStart,
5822 			areaEnd - areaStart, writable);
5823 
5824 		cacheChainLocker.Unlock();
5825 
5826 		if (range != NULL) {
5827 			range->~VMAreaWiredRange();
5828 			free_etc(range, mallocFlags);
5829 		}
5830 
5831 		if (error != B_OK)
5832 			break;
5833 	}
5834 
5835 	// get rid of the address space reference lock_memory_etc() acquired
5836 	addressSpace->Put();
5837 
5838 	return error;
5839 }
5840 
5841 
5842 status_t
5843 unlock_memory(void* address, size_t numBytes, uint32 flags)
5844 {
5845 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5846 }
5847 
5848 
5849 /*!	Similar to get_memory_map(), but also allows to specify the address space
5850 	for the memory in question and has a saner semantics.
5851 	Returns \c B_OK when the complete range could be translated or
5852 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5853 	case the actual number of entries is written to \c *_numEntries. Any other
5854 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5855 	in this case.
5856 */
5857 status_t
5858 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5859 	physical_entry* table, uint32* _numEntries)
5860 {
5861 	uint32 numEntries = *_numEntries;
5862 	*_numEntries = 0;
5863 
5864 	VMAddressSpace* addressSpace;
5865 	addr_t virtualAddress = (addr_t)address;
5866 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5867 	phys_addr_t physicalAddress;
5868 	status_t status = B_OK;
5869 	int32 index = -1;
5870 	addr_t offset = 0;
5871 	bool interrupts = are_interrupts_enabled();
5872 
5873 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5874 		"entries)\n", team, address, numBytes, numEntries));
5875 
5876 	if (numEntries == 0 || numBytes == 0)
5877 		return B_BAD_VALUE;
5878 
5879 	// in which address space is the address to be found?
5880 	if (IS_USER_ADDRESS(virtualAddress)) {
5881 		if (team == B_CURRENT_TEAM)
5882 			addressSpace = VMAddressSpace::GetCurrent();
5883 		else
5884 			addressSpace = VMAddressSpace::Get(team);
5885 	} else
5886 		addressSpace = VMAddressSpace::GetKernel();
5887 
5888 	if (addressSpace == NULL)
5889 		return B_ERROR;
5890 
5891 	VMTranslationMap* map = addressSpace->TranslationMap();
5892 
5893 	if (interrupts)
5894 		map->Lock();
5895 
5896 	while (offset < numBytes) {
5897 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5898 		uint32 flags;
5899 
5900 		if (interrupts) {
5901 			status = map->Query((addr_t)address + offset, &physicalAddress,
5902 				&flags);
5903 		} else {
5904 			status = map->QueryInterrupt((addr_t)address + offset,
5905 				&physicalAddress, &flags);
5906 		}
5907 		if (status < B_OK)
5908 			break;
5909 		if ((flags & PAGE_PRESENT) == 0) {
5910 			panic("get_memory_map() called on unmapped memory!");
5911 			return B_BAD_ADDRESS;
5912 		}
5913 
5914 		if (index < 0 && pageOffset > 0) {
5915 			physicalAddress += pageOffset;
5916 			if (bytes > B_PAGE_SIZE - pageOffset)
5917 				bytes = B_PAGE_SIZE - pageOffset;
5918 		}
5919 
5920 		// need to switch to the next physical_entry?
5921 		if (index < 0 || table[index].address
5922 				!= physicalAddress - table[index].size) {
5923 			if ((uint32)++index + 1 > numEntries) {
5924 				// table to small
5925 				break;
5926 			}
5927 			table[index].address = physicalAddress;
5928 			table[index].size = bytes;
5929 		} else {
5930 			// page does fit in current entry
5931 			table[index].size += bytes;
5932 		}
5933 
5934 		offset += bytes;
5935 	}
5936 
5937 	if (interrupts)
5938 		map->Unlock();
5939 
5940 	if (status != B_OK)
5941 		return status;
5942 
5943 	if ((uint32)index + 1 > numEntries) {
5944 		*_numEntries = index;
5945 		return B_BUFFER_OVERFLOW;
5946 	}
5947 
5948 	*_numEntries = index + 1;
5949 	return B_OK;
5950 }
5951 
5952 
5953 /*!	According to the BeBook, this function should always succeed.
5954 	This is no longer the case.
5955 */
5956 extern "C" int32
5957 __get_memory_map_haiku(const void* address, size_t numBytes,
5958 	physical_entry* table, int32 numEntries)
5959 {
5960 	uint32 entriesRead = numEntries;
5961 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5962 		table, &entriesRead);
5963 	if (error != B_OK)
5964 		return error;
5965 
5966 	// close the entry list
5967 
5968 	// if it's only one entry, we will silently accept the missing ending
5969 	if (numEntries == 1)
5970 		return B_OK;
5971 
5972 	if (entriesRead + 1 > (uint32)numEntries)
5973 		return B_BUFFER_OVERFLOW;
5974 
5975 	table[entriesRead].address = 0;
5976 	table[entriesRead].size = 0;
5977 
5978 	return B_OK;
5979 }
5980 
5981 
5982 area_id
5983 area_for(void* address)
5984 {
5985 	return vm_area_for((addr_t)address, true);
5986 }
5987 
5988 
5989 area_id
5990 find_area(const char* name)
5991 {
5992 	return VMAreaHash::Find(name);
5993 }
5994 
5995 
5996 status_t
5997 _get_area_info(area_id id, area_info* info, size_t size)
5998 {
5999 	if (size != sizeof(area_info) || info == NULL)
6000 		return B_BAD_VALUE;
6001 
6002 	AddressSpaceReadLocker locker;
6003 	VMArea* area;
6004 	status_t status = locker.SetFromArea(id, area);
6005 	if (status != B_OK)
6006 		return status;
6007 
6008 	fill_area_info(area, info, size);
6009 	return B_OK;
6010 }
6011 
6012 
6013 status_t
6014 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6015 {
6016 	addr_t nextBase = *(addr_t*)cookie;
6017 
6018 	// we're already through the list
6019 	if (nextBase == (addr_t)-1)
6020 		return B_ENTRY_NOT_FOUND;
6021 
6022 	if (team == B_CURRENT_TEAM)
6023 		team = team_get_current_team_id();
6024 
6025 	AddressSpaceReadLocker locker(team);
6026 	if (!locker.IsLocked())
6027 		return B_BAD_TEAM_ID;
6028 
6029 	VMArea* area;
6030 	for (VMAddressSpace::AreaIterator it
6031 				= locker.AddressSpace()->GetAreaIterator();
6032 			(area = it.Next()) != NULL;) {
6033 		if (area->Base() > nextBase)
6034 			break;
6035 	}
6036 
6037 	if (area == NULL) {
6038 		nextBase = (addr_t)-1;
6039 		return B_ENTRY_NOT_FOUND;
6040 	}
6041 
6042 	fill_area_info(area, info, size);
6043 	*cookie = (ssize_t)(area->Base());
6044 
6045 	return B_OK;
6046 }
6047 
6048 
6049 status_t
6050 set_area_protection(area_id area, uint32 newProtection)
6051 {
6052 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6053 		newProtection, true);
6054 }
6055 
6056 
6057 status_t
6058 resize_area(area_id areaID, size_t newSize)
6059 {
6060 	return vm_resize_area(areaID, newSize, true);
6061 }
6062 
6063 
6064 /*!	Transfers the specified area to a new team. The caller must be the owner
6065 	of the area.
6066 */
6067 area_id
6068 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6069 	bool kernel)
6070 {
6071 	area_info info;
6072 	status_t status = get_area_info(id, &info);
6073 	if (status != B_OK)
6074 		return status;
6075 
6076 	if (info.team != thread_get_current_thread()->team->id)
6077 		return B_PERMISSION_DENIED;
6078 
6079 	// We need to mark the area cloneable so the following operations work.
6080 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6081 	if (status != B_OK)
6082 		return status;
6083 
6084 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6085 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6086 	if (clonedArea < 0)
6087 		return clonedArea;
6088 
6089 	status = vm_delete_area(info.team, id, kernel);
6090 	if (status != B_OK) {
6091 		vm_delete_area(target, clonedArea, kernel);
6092 		return status;
6093 	}
6094 
6095 	// Now we can reset the protection to whatever it was before.
6096 	set_area_protection(clonedArea, info.protection);
6097 
6098 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6099 
6100 	return clonedArea;
6101 }
6102 
6103 
6104 extern "C" area_id
6105 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6106 	size_t numBytes, uint32 addressSpec, uint32 protection,
6107 	void** _virtualAddress)
6108 {
6109 	if (!arch_vm_supports_protection(protection))
6110 		return B_NOT_SUPPORTED;
6111 
6112 	fix_protection(&protection);
6113 
6114 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6115 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6116 		false);
6117 }
6118 
6119 
6120 area_id
6121 clone_area(const char* name, void** _address, uint32 addressSpec,
6122 	uint32 protection, area_id source)
6123 {
6124 	if ((protection & B_KERNEL_PROTECTION) == 0)
6125 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6126 
6127 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6128 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6129 }
6130 
6131 
6132 area_id
6133 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6134 	uint32 protection, uint32 flags, uint32 guardSize,
6135 	const virtual_address_restrictions* virtualAddressRestrictions,
6136 	const physical_address_restrictions* physicalAddressRestrictions,
6137 	void** _address)
6138 {
6139 	fix_protection(&protection);
6140 
6141 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6142 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6143 		true, _address);
6144 }
6145 
6146 
6147 extern "C" area_id
6148 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6149 	size_t size, uint32 lock, uint32 protection)
6150 {
6151 	fix_protection(&protection);
6152 
6153 	virtual_address_restrictions virtualRestrictions = {};
6154 	virtualRestrictions.address = *_address;
6155 	virtualRestrictions.address_specification = addressSpec;
6156 	physical_address_restrictions physicalRestrictions = {};
6157 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6158 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6159 		true, _address);
6160 }
6161 
6162 
6163 status_t
6164 delete_area(area_id area)
6165 {
6166 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6167 }
6168 
6169 
6170 //	#pragma mark - Userland syscalls
6171 
6172 
6173 status_t
6174 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6175 	addr_t size)
6176 {
6177 	// filter out some unavailable values (for userland)
6178 	switch (addressSpec) {
6179 		case B_ANY_KERNEL_ADDRESS:
6180 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6181 			return B_BAD_VALUE;
6182 	}
6183 
6184 	addr_t address;
6185 
6186 	if (!IS_USER_ADDRESS(userAddress)
6187 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6188 		return B_BAD_ADDRESS;
6189 
6190 	status_t status = vm_reserve_address_range(
6191 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6192 		RESERVED_AVOID_BASE);
6193 	if (status != B_OK)
6194 		return status;
6195 
6196 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6197 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6198 			(void*)address, size);
6199 		return B_BAD_ADDRESS;
6200 	}
6201 
6202 	return B_OK;
6203 }
6204 
6205 
6206 status_t
6207 _user_unreserve_address_range(addr_t address, addr_t size)
6208 {
6209 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6210 		(void*)address, size);
6211 }
6212 
6213 
6214 area_id
6215 _user_area_for(void* address)
6216 {
6217 	return vm_area_for((addr_t)address, false);
6218 }
6219 
6220 
6221 area_id
6222 _user_find_area(const char* userName)
6223 {
6224 	char name[B_OS_NAME_LENGTH];
6225 
6226 	if (!IS_USER_ADDRESS(userName)
6227 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6228 		return B_BAD_ADDRESS;
6229 
6230 	return find_area(name);
6231 }
6232 
6233 
6234 status_t
6235 _user_get_area_info(area_id area, area_info* userInfo)
6236 {
6237 	if (!IS_USER_ADDRESS(userInfo))
6238 		return B_BAD_ADDRESS;
6239 
6240 	area_info info;
6241 	status_t status = get_area_info(area, &info);
6242 	if (status < B_OK)
6243 		return status;
6244 
6245 	// TODO: do we want to prevent userland from seeing kernel protections?
6246 	//info.protection &= B_USER_PROTECTION;
6247 
6248 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6249 		return B_BAD_ADDRESS;
6250 
6251 	return status;
6252 }
6253 
6254 
6255 status_t
6256 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6257 {
6258 	ssize_t cookie;
6259 
6260 	if (!IS_USER_ADDRESS(userCookie)
6261 		|| !IS_USER_ADDRESS(userInfo)
6262 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6263 		return B_BAD_ADDRESS;
6264 
6265 	area_info info;
6266 	status_t status = _get_next_area_info(team, &cookie, &info,
6267 		sizeof(area_info));
6268 	if (status != B_OK)
6269 		return status;
6270 
6271 	//info.protection &= B_USER_PROTECTION;
6272 
6273 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6274 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6275 		return B_BAD_ADDRESS;
6276 
6277 	return status;
6278 }
6279 
6280 
6281 status_t
6282 _user_set_area_protection(area_id area, uint32 newProtection)
6283 {
6284 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6285 		return B_BAD_VALUE;
6286 
6287 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6288 		newProtection, false);
6289 }
6290 
6291 
6292 status_t
6293 _user_resize_area(area_id area, size_t newSize)
6294 {
6295 	// TODO: Since we restrict deleting of areas to those owned by the team,
6296 	// we should also do that for resizing (check other functions, too).
6297 	return vm_resize_area(area, newSize, false);
6298 }
6299 
6300 
6301 area_id
6302 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6303 	team_id target)
6304 {
6305 	// filter out some unavailable values (for userland)
6306 	switch (addressSpec) {
6307 		case B_ANY_KERNEL_ADDRESS:
6308 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6309 			return B_BAD_VALUE;
6310 	}
6311 
6312 	void* address;
6313 	if (!IS_USER_ADDRESS(userAddress)
6314 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6315 		return B_BAD_ADDRESS;
6316 
6317 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6318 	if (newArea < B_OK)
6319 		return newArea;
6320 
6321 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6322 		return B_BAD_ADDRESS;
6323 
6324 	return newArea;
6325 }
6326 
6327 
6328 area_id
6329 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6330 	uint32 protection, area_id sourceArea)
6331 {
6332 	char name[B_OS_NAME_LENGTH];
6333 	void* address;
6334 
6335 	// filter out some unavailable values (for userland)
6336 	switch (addressSpec) {
6337 		case B_ANY_KERNEL_ADDRESS:
6338 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6339 			return B_BAD_VALUE;
6340 	}
6341 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6342 		return B_BAD_VALUE;
6343 
6344 	if (!IS_USER_ADDRESS(userName)
6345 		|| !IS_USER_ADDRESS(userAddress)
6346 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6347 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6348 		return B_BAD_ADDRESS;
6349 
6350 	fix_protection(&protection);
6351 
6352 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6353 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6354 		false);
6355 	if (clonedArea < B_OK)
6356 		return clonedArea;
6357 
6358 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6359 		delete_area(clonedArea);
6360 		return B_BAD_ADDRESS;
6361 	}
6362 
6363 	return clonedArea;
6364 }
6365 
6366 
6367 area_id
6368 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6369 	size_t size, uint32 lock, uint32 protection)
6370 {
6371 	char name[B_OS_NAME_LENGTH];
6372 	void* address;
6373 
6374 	// filter out some unavailable values (for userland)
6375 	switch (addressSpec) {
6376 		case B_ANY_KERNEL_ADDRESS:
6377 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6378 			return B_BAD_VALUE;
6379 	}
6380 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6381 		return B_BAD_VALUE;
6382 
6383 	if (!IS_USER_ADDRESS(userName)
6384 		|| !IS_USER_ADDRESS(userAddress)
6385 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6386 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6387 		return B_BAD_ADDRESS;
6388 
6389 	if (addressSpec == B_EXACT_ADDRESS
6390 		&& IS_KERNEL_ADDRESS(address))
6391 		return B_BAD_VALUE;
6392 
6393 	if (addressSpec == B_ANY_ADDRESS)
6394 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6395 	if (addressSpec == B_BASE_ADDRESS)
6396 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6397 
6398 	fix_protection(&protection);
6399 
6400 	virtual_address_restrictions virtualRestrictions = {};
6401 	virtualRestrictions.address = address;
6402 	virtualRestrictions.address_specification = addressSpec;
6403 	physical_address_restrictions physicalRestrictions = {};
6404 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6405 		size, lock, protection, 0, 0, &virtualRestrictions,
6406 		&physicalRestrictions, false, &address);
6407 
6408 	if (area >= B_OK
6409 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6410 		delete_area(area);
6411 		return B_BAD_ADDRESS;
6412 	}
6413 
6414 	return area;
6415 }
6416 
6417 
6418 status_t
6419 _user_delete_area(area_id area)
6420 {
6421 	// Unlike the BeOS implementation, you can now only delete areas
6422 	// that you have created yourself from userland.
6423 	// The documentation to delete_area() explicitly states that this
6424 	// will be restricted in the future, and so it will.
6425 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6426 }
6427 
6428 
6429 // TODO: create a BeOS style call for this!
6430 
6431 area_id
6432 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6433 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6434 	int fd, off_t offset)
6435 {
6436 	char name[B_OS_NAME_LENGTH];
6437 	void* address;
6438 	area_id area;
6439 
6440 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6441 		return B_BAD_VALUE;
6442 
6443 	fix_protection(&protection);
6444 
6445 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6446 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6447 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6448 		return B_BAD_ADDRESS;
6449 
6450 	if (addressSpec == B_EXACT_ADDRESS) {
6451 		if ((addr_t)address + size < (addr_t)address
6452 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6453 			return B_BAD_VALUE;
6454 		}
6455 		if (!IS_USER_ADDRESS(address)
6456 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6457 			return B_BAD_ADDRESS;
6458 		}
6459 	}
6460 
6461 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6462 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6463 		false);
6464 	if (area < B_OK)
6465 		return area;
6466 
6467 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6468 		return B_BAD_ADDRESS;
6469 
6470 	return area;
6471 }
6472 
6473 
6474 status_t
6475 _user_unmap_memory(void* _address, size_t size)
6476 {
6477 	addr_t address = (addr_t)_address;
6478 
6479 	// check params
6480 	if (size == 0 || (addr_t)address + size < (addr_t)address
6481 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6482 		return B_BAD_VALUE;
6483 	}
6484 
6485 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6486 		return B_BAD_ADDRESS;
6487 
6488 	// Write lock the address space and ensure the address range is not wired.
6489 	AddressSpaceWriteLocker locker;
6490 	do {
6491 		status_t status = locker.SetTo(team_get_current_team_id());
6492 		if (status != B_OK)
6493 			return status;
6494 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6495 			size, &locker));
6496 
6497 	// unmap
6498 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6499 }
6500 
6501 
6502 status_t
6503 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6504 {
6505 	// check address range
6506 	addr_t address = (addr_t)_address;
6507 	size = PAGE_ALIGN(size);
6508 
6509 	if ((address % B_PAGE_SIZE) != 0)
6510 		return B_BAD_VALUE;
6511 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6512 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6513 		// weird error code required by POSIX
6514 		return ENOMEM;
6515 	}
6516 
6517 	// extend and check protection
6518 	if ((protection & ~B_USER_PROTECTION) != 0)
6519 		return B_BAD_VALUE;
6520 
6521 	fix_protection(&protection);
6522 
6523 	// We need to write lock the address space, since we're going to play with
6524 	// the areas. Also make sure that none of the areas is wired and that we're
6525 	// actually allowed to change the protection.
6526 	AddressSpaceWriteLocker locker;
6527 
6528 	bool restart;
6529 	do {
6530 		restart = false;
6531 
6532 		status_t status = locker.SetTo(team_get_current_team_id());
6533 		if (status != B_OK)
6534 			return status;
6535 
6536 		// First round: Check whether the whole range is covered by areas and we
6537 		// are allowed to modify them.
6538 		addr_t currentAddress = address;
6539 		size_t sizeLeft = size;
6540 		while (sizeLeft > 0) {
6541 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6542 			if (area == NULL)
6543 				return B_NO_MEMORY;
6544 
6545 			if (area->address_space == VMAddressSpace::Kernel())
6546 				return B_NOT_ALLOWED;
6547 
6548 			// TODO: For (shared) mapped files we should check whether the new
6549 			// protections are compatible with the file permissions. We don't
6550 			// have a way to do that yet, though.
6551 
6552 			addr_t offset = currentAddress - area->Base();
6553 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6554 
6555 			AreaCacheLocker cacheLocker(area);
6556 
6557 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6558 					&locker, &cacheLocker)) {
6559 				restart = true;
6560 				break;
6561 			}
6562 
6563 			cacheLocker.Unlock();
6564 
6565 			currentAddress += rangeSize;
6566 			sizeLeft -= rangeSize;
6567 		}
6568 	} while (restart);
6569 
6570 	// Second round: If the protections differ from that of the area, create a
6571 	// page protection array and re-map mapped pages.
6572 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6573 	addr_t currentAddress = address;
6574 	size_t sizeLeft = size;
6575 	while (sizeLeft > 0) {
6576 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6577 		if (area == NULL)
6578 			return B_NO_MEMORY;
6579 
6580 		addr_t offset = currentAddress - area->Base();
6581 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6582 
6583 		currentAddress += rangeSize;
6584 		sizeLeft -= rangeSize;
6585 
6586 		if (area->page_protections == NULL) {
6587 			if (area->protection == protection)
6588 				continue;
6589 
6590 			status_t status = allocate_area_page_protections(area);
6591 			if (status != B_OK)
6592 				return status;
6593 		}
6594 
6595 		// We need to lock the complete cache chain, since we potentially unmap
6596 		// pages of lower caches.
6597 		VMCache* topCache = vm_area_get_locked_cache(area);
6598 		VMCacheChainLocker cacheChainLocker(topCache);
6599 		cacheChainLocker.LockAllSourceCaches();
6600 
6601 		for (addr_t pageAddress = area->Base() + offset;
6602 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6603 			map->Lock();
6604 
6605 			set_area_page_protection(area, pageAddress, protection);
6606 
6607 			phys_addr_t physicalAddress;
6608 			uint32 flags;
6609 
6610 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6611 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6612 				map->Unlock();
6613 				continue;
6614 			}
6615 
6616 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6617 			if (page == NULL) {
6618 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6619 					"\n", area, physicalAddress);
6620 				map->Unlock();
6621 				return B_ERROR;
6622 			}
6623 
6624 			// If the page is not in the topmost cache and write access is
6625 			// requested, we have to unmap it. Otherwise we can re-map it with
6626 			// the new protection.
6627 			bool unmapPage = page->Cache() != topCache
6628 				&& (protection & B_WRITE_AREA) != 0;
6629 
6630 			if (!unmapPage)
6631 				map->ProtectPage(area, pageAddress, protection);
6632 
6633 			map->Unlock();
6634 
6635 			if (unmapPage) {
6636 				DEBUG_PAGE_ACCESS_START(page);
6637 				unmap_page(area, pageAddress);
6638 				DEBUG_PAGE_ACCESS_END(page);
6639 			}
6640 		}
6641 	}
6642 
6643 	return B_OK;
6644 }
6645 
6646 
6647 status_t
6648 _user_sync_memory(void* _address, size_t size, uint32 flags)
6649 {
6650 	addr_t address = (addr_t)_address;
6651 	size = PAGE_ALIGN(size);
6652 
6653 	// check params
6654 	if ((address % B_PAGE_SIZE) != 0)
6655 		return B_BAD_VALUE;
6656 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6657 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6658 		// weird error code required by POSIX
6659 		return ENOMEM;
6660 	}
6661 
6662 	bool writeSync = (flags & MS_SYNC) != 0;
6663 	bool writeAsync = (flags & MS_ASYNC) != 0;
6664 	if (writeSync && writeAsync)
6665 		return B_BAD_VALUE;
6666 
6667 	if (size == 0 || (!writeSync && !writeAsync))
6668 		return B_OK;
6669 
6670 	// iterate through the range and sync all concerned areas
6671 	while (size > 0) {
6672 		// read lock the address space
6673 		AddressSpaceReadLocker locker;
6674 		status_t error = locker.SetTo(team_get_current_team_id());
6675 		if (error != B_OK)
6676 			return error;
6677 
6678 		// get the first area
6679 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6680 		if (area == NULL)
6681 			return B_NO_MEMORY;
6682 
6683 		uint32 offset = address - area->Base();
6684 		size_t rangeSize = min_c(area->Size() - offset, size);
6685 		offset += area->cache_offset;
6686 
6687 		// lock the cache
6688 		AreaCacheLocker cacheLocker(area);
6689 		if (!cacheLocker)
6690 			return B_BAD_VALUE;
6691 		VMCache* cache = area->cache;
6692 
6693 		locker.Unlock();
6694 
6695 		uint32 firstPage = offset >> PAGE_SHIFT;
6696 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6697 
6698 		// write the pages
6699 		if (cache->type == CACHE_TYPE_VNODE) {
6700 			if (writeSync) {
6701 				// synchronous
6702 				error = vm_page_write_modified_page_range(cache, firstPage,
6703 					endPage);
6704 				if (error != B_OK)
6705 					return error;
6706 			} else {
6707 				// asynchronous
6708 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6709 				// TODO: This is probably not quite what is supposed to happen.
6710 				// Especially when a lot has to be written, it might take ages
6711 				// until it really hits the disk.
6712 			}
6713 		}
6714 
6715 		address += rangeSize;
6716 		size -= rangeSize;
6717 	}
6718 
6719 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6720 	// synchronize multiple mappings of the same file. In our VM they never get
6721 	// out of sync, though, so we don't have to do anything.
6722 
6723 	return B_OK;
6724 }
6725 
6726 
6727 status_t
6728 _user_memory_advice(void* address, size_t size, uint32 advice)
6729 {
6730 	// TODO: Implement!
6731 	return B_OK;
6732 }
6733 
6734 
6735 status_t
6736 _user_get_memory_properties(team_id teamID, const void* address,
6737 	uint32* _protected, uint32* _lock)
6738 {
6739 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6740 		return B_BAD_ADDRESS;
6741 
6742 	AddressSpaceReadLocker locker;
6743 	status_t error = locker.SetTo(teamID);
6744 	if (error != B_OK)
6745 		return error;
6746 
6747 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6748 	if (area == NULL)
6749 		return B_NO_MEMORY;
6750 
6751 
6752 	uint32 protection = area->protection;
6753 	if (area->page_protections != NULL)
6754 		protection = get_area_page_protection(area, (addr_t)address);
6755 
6756 	uint32 wiring = area->wiring;
6757 
6758 	locker.Unlock();
6759 
6760 	error = user_memcpy(_protected, &protection, sizeof(protection));
6761 	if (error != B_OK)
6762 		return error;
6763 
6764 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6765 
6766 	return error;
6767 }
6768 
6769 
6770 // #pragma mark -- compatibility
6771 
6772 
6773 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6774 
6775 
6776 struct physical_entry_beos {
6777 	uint32	address;
6778 	uint32	size;
6779 };
6780 
6781 
6782 /*!	The physical_entry structure has changed. We need to translate it to the
6783 	old one.
6784 */
6785 extern "C" int32
6786 __get_memory_map_beos(const void* _address, size_t numBytes,
6787 	physical_entry_beos* table, int32 numEntries)
6788 {
6789 	if (numEntries <= 0)
6790 		return B_BAD_VALUE;
6791 
6792 	const uint8* address = (const uint8*)_address;
6793 
6794 	int32 count = 0;
6795 	while (numBytes > 0 && count < numEntries) {
6796 		physical_entry entry;
6797 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6798 		if (result < 0) {
6799 			if (result != B_BUFFER_OVERFLOW)
6800 				return result;
6801 		}
6802 
6803 		if (entry.address >= (phys_addr_t)1 << 32) {
6804 			panic("get_memory_map(): Address is greater 4 GB!");
6805 			return B_ERROR;
6806 		}
6807 
6808 		table[count].address = entry.address;
6809 		table[count++].size = entry.size;
6810 
6811 		address += entry.size;
6812 		numBytes -= entry.size;
6813 	}
6814 
6815 	// null-terminate the table, if possible
6816 	if (count < numEntries) {
6817 		table[count].address = 0;
6818 		table[count].size = 0;
6819 	}
6820 
6821 	return B_OK;
6822 }
6823 
6824 
6825 /*!	The type of the \a physicalAddress parameter has changed from void* to
6826 	phys_addr_t.
6827 */
6828 extern "C" area_id
6829 __map_physical_memory_beos(const char* name, void* physicalAddress,
6830 	size_t numBytes, uint32 addressSpec, uint32 protection,
6831 	void** _virtualAddress)
6832 {
6833 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6834 		addressSpec, protection, _virtualAddress);
6835 }
6836 
6837 
6838 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6839 	we meddle with the \a lock parameter to force 32 bit.
6840 */
6841 extern "C" area_id
6842 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6843 	size_t size, uint32 lock, uint32 protection)
6844 {
6845 	switch (lock) {
6846 		case B_NO_LOCK:
6847 			break;
6848 		case B_FULL_LOCK:
6849 		case B_LAZY_LOCK:
6850 			lock = B_32_BIT_FULL_LOCK;
6851 			break;
6852 		case B_CONTIGUOUS:
6853 			lock = B_32_BIT_CONTIGUOUS;
6854 			break;
6855 	}
6856 
6857 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6858 		protection);
6859 }
6860 
6861 
6862 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6863 	"BASE");
6864 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6865 	"map_physical_memory@", "BASE");
6866 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6867 	"BASE");
6868 
6869 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6870 	"get_memory_map@@", "1_ALPHA3");
6871 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6872 	"map_physical_memory@@", "1_ALPHA3");
6873 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6874 	"1_ALPHA3");
6875 
6876 
6877 #else
6878 
6879 
6880 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6881 	"get_memory_map@@", "BASE");
6882 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6883 	"map_physical_memory@@", "BASE");
6884 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6885 	"BASE");
6886 
6887 
6888 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6889