xref: /haiku/src/system/kernel/vm/vm.cpp (revision 9a6a20d4689307142a7ed26a1437ba47e244e73f)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 static ObjectCache** sPageMappingsObjectCaches;
248 static uint32 sPageMappingsMask;
249 
250 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
251 
252 static off_t sAvailableMemory;
253 static off_t sNeededMemory;
254 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
255 static uint32 sPageFaults;
256 
257 static VMPhysicalPageMapper* sPhysicalPageMapper;
258 
259 #if DEBUG_CACHE_LIST
260 
261 struct cache_info {
262 	VMCache*	cache;
263 	addr_t		page_count;
264 	addr_t		committed;
265 };
266 
267 static const int kCacheInfoTableCount = 100 * 1024;
268 static cache_info* sCacheInfoTable;
269 
270 #endif	// DEBUG_CACHE_LIST
271 
272 
273 // function declarations
274 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
275 	bool deletingAddressSpace, bool alreadyRemoved = false);
276 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
277 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
278 static status_t map_backing_store(VMAddressSpace* addressSpace,
279 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
280 	int protection, int protectionMax, int mapping, uint32 flags,
281 	const virtual_address_restrictions* addressRestrictions, bool kernel,
282 	VMArea** _area, void** _virtualAddress);
283 static void fix_protection(uint32* protection);
284 
285 
286 //	#pragma mark -
287 
288 
289 #if VM_PAGE_FAULT_TRACING
290 
291 namespace VMPageFaultTracing {
292 
293 class PageFaultStart : public AbstractTraceEntry {
294 public:
295 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
296 		:
297 		fAddress(address),
298 		fPC(pc),
299 		fWrite(write),
300 		fUser(user)
301 	{
302 		Initialized();
303 	}
304 
305 	virtual void AddDump(TraceOutput& out)
306 	{
307 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
308 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
309 	}
310 
311 private:
312 	addr_t	fAddress;
313 	addr_t	fPC;
314 	bool	fWrite;
315 	bool	fUser;
316 };
317 
318 
319 // page fault errors
320 enum {
321 	PAGE_FAULT_ERROR_NO_AREA		= 0,
322 	PAGE_FAULT_ERROR_KERNEL_ONLY,
323 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
324 	PAGE_FAULT_ERROR_READ_PROTECTED,
325 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
326 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
327 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
328 };
329 
330 
331 class PageFaultError : public AbstractTraceEntry {
332 public:
333 	PageFaultError(area_id area, status_t error)
334 		:
335 		fArea(area),
336 		fError(error)
337 	{
338 		Initialized();
339 	}
340 
341 	virtual void AddDump(TraceOutput& out)
342 	{
343 		switch (fError) {
344 			case PAGE_FAULT_ERROR_NO_AREA:
345 				out.Print("page fault error: no area");
346 				break;
347 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
348 				out.Print("page fault error: area: %ld, kernel only", fArea);
349 				break;
350 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
351 				out.Print("page fault error: area: %ld, write protected",
352 					fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_READ_PROTECTED:
355 				out.Print("page fault error: area: %ld, read protected", fArea);
356 				break;
357 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
358 				out.Print("page fault error: area: %ld, execute protected",
359 					fArea);
360 				break;
361 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
362 				out.Print("page fault error: kernel touching bad user memory");
363 				break;
364 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
365 				out.Print("page fault error: no address space");
366 				break;
367 			default:
368 				out.Print("page fault error: area: %ld, error: %s", fArea,
369 					strerror(fError));
370 				break;
371 		}
372 	}
373 
374 private:
375 	area_id		fArea;
376 	status_t	fError;
377 };
378 
379 
380 class PageFaultDone : public AbstractTraceEntry {
381 public:
382 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
383 			vm_page* page)
384 		:
385 		fArea(area),
386 		fTopCache(topCache),
387 		fCache(cache),
388 		fPage(page)
389 	{
390 		Initialized();
391 	}
392 
393 	virtual void AddDump(TraceOutput& out)
394 	{
395 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
396 			"page: %p", fArea, fTopCache, fCache, fPage);
397 	}
398 
399 private:
400 	area_id		fArea;
401 	VMCache*	fTopCache;
402 	VMCache*	fCache;
403 	vm_page*	fPage;
404 };
405 
406 }	// namespace VMPageFaultTracing
407 
408 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
409 #else
410 #	define TPF(x) ;
411 #endif	// VM_PAGE_FAULT_TRACING
412 
413 
414 //	#pragma mark - page mappings allocation
415 
416 
417 static void
418 create_page_mappings_object_caches()
419 {
420 	// We want an even power of 2 smaller than the number of CPUs.
421 	const int32 numCPUs = smp_get_num_cpus();
422 	int32 count = next_power_of_2(numCPUs);
423 	if (count > numCPUs)
424 		count >>= 1;
425 	sPageMappingsMask = count - 1;
426 
427 	sPageMappingsObjectCaches = new object_cache*[count];
428 	if (sPageMappingsObjectCaches == NULL)
429 		panic("failed to allocate page mappings object_cache array");
430 
431 	for (int32 i = 0; i < count; i++) {
432 		char name[32];
433 		snprintf(name, sizeof(name), "page mappings %" B_PRId32, i);
434 
435 		object_cache* cache = create_object_cache_etc(name,
436 			sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
437 			NULL, NULL);
438 		if (cache == NULL)
439 			panic("failed to create page mappings object_cache");
440 
441 		object_cache_set_minimum_reserve(cache, 1024);
442 		sPageMappingsObjectCaches[i] = cache;
443 	}
444 }
445 
446 
447 static object_cache*
448 page_mapping_object_cache_for(page_num_t page)
449 {
450 	return sPageMappingsObjectCaches[page & sPageMappingsMask];
451 }
452 
453 
454 static vm_page_mapping*
455 allocate_page_mapping(page_num_t page, uint32 flags = 0)
456 {
457 	return (vm_page_mapping*)object_cache_alloc(page_mapping_object_cache_for(page),
458 		flags);
459 }
460 
461 
462 void
463 vm_free_page_mapping(page_num_t page, vm_page_mapping* mapping, uint32 flags)
464 {
465 	object_cache_free(page_mapping_object_cache_for(page), mapping, flags);
466 }
467 
468 
469 //	#pragma mark -
470 
471 
472 /*!	The page's cache must be locked.
473 */
474 static inline void
475 increment_page_wired_count(vm_page* page)
476 {
477 	if (!page->IsMapped())
478 		atomic_add(&gMappedPagesCount, 1);
479 	page->IncrementWiredCount();
480 }
481 
482 
483 /*!	The page's cache must be locked.
484 */
485 static inline void
486 decrement_page_wired_count(vm_page* page)
487 {
488 	page->DecrementWiredCount();
489 	if (!page->IsMapped())
490 		atomic_add(&gMappedPagesCount, -1);
491 }
492 
493 
494 static inline addr_t
495 virtual_page_address(VMArea* area, vm_page* page)
496 {
497 	return area->Base()
498 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
499 }
500 
501 
502 static inline bool
503 is_page_in_area(VMArea* area, vm_page* page)
504 {
505 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
506 	return pageCacheOffsetBytes >= area->cache_offset
507 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
508 }
509 
510 
511 //! You need to have the address space locked when calling this function
512 static VMArea*
513 lookup_area(VMAddressSpace* addressSpace, area_id id)
514 {
515 	VMAreas::ReadLock();
516 
517 	VMArea* area = VMAreas::LookupLocked(id);
518 	if (area != NULL && area->address_space != addressSpace)
519 		area = NULL;
520 
521 	VMAreas::ReadUnlock();
522 
523 	return area;
524 }
525 
526 
527 static inline size_t
528 area_page_protections_size(size_t areaSize)
529 {
530 	// In the page protections we store only the three user protections,
531 	// so we use 4 bits per page.
532 	return (areaSize / B_PAGE_SIZE + 1) / 2;
533 }
534 
535 
536 static status_t
537 allocate_area_page_protections(VMArea* area)
538 {
539 	size_t bytes = area_page_protections_size(area->Size());
540 	area->page_protections = (uint8*)malloc_etc(bytes,
541 		area->address_space == VMAddressSpace::Kernel()
542 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
543 	if (area->page_protections == NULL)
544 		return B_NO_MEMORY;
545 
546 	// init the page protections for all pages to that of the area
547 	uint32 areaProtection = area->protection
548 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
549 	memset(area->page_protections, areaProtection | (areaProtection << 4),
550 		bytes);
551 	return B_OK;
552 }
553 
554 
555 static inline void
556 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
557 {
558 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
559 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
560 	uint8& entry = area->page_protections[pageIndex / 2];
561 	if (pageIndex % 2 == 0)
562 		entry = (entry & 0xf0) | protection;
563 	else
564 		entry = (entry & 0x0f) | (protection << 4);
565 }
566 
567 
568 static inline uint32
569 get_area_page_protection(VMArea* area, addr_t pageAddress)
570 {
571 	if (area->page_protections == NULL)
572 		return area->protection;
573 
574 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
575 	uint32 protection = area->page_protections[pageIndex / 2];
576 	if (pageIndex % 2 == 0)
577 		protection &= 0x0f;
578 	else
579 		protection >>= 4;
580 
581 	uint32 kernelProtection = 0;
582 	if ((protection & B_READ_AREA) != 0)
583 		kernelProtection |= B_KERNEL_READ_AREA;
584 	if ((protection & B_WRITE_AREA) != 0)
585 		kernelProtection |= B_KERNEL_WRITE_AREA;
586 
587 	// If this is a kernel area we return only the kernel flags.
588 	if (area->address_space == VMAddressSpace::Kernel())
589 		return kernelProtection;
590 
591 	return protection | kernelProtection;
592 }
593 
594 
595 static inline uint8*
596 realloc_page_protections(uint8* pageProtections, size_t areaSize,
597 	uint32 allocationFlags)
598 {
599 	size_t bytes = area_page_protections_size(areaSize);
600 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
601 }
602 
603 
604 /*!	The caller must have reserved enough pages the translation map
605 	implementation might need to map this page.
606 	The page's cache must be locked.
607 */
608 static status_t
609 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
610 	vm_page_reservation* reservation)
611 {
612 	VMTranslationMap* map = area->address_space->TranslationMap();
613 
614 	bool wasMapped = page->IsMapped();
615 
616 	if (area->wiring == B_NO_LOCK) {
617 		DEBUG_PAGE_ACCESS_CHECK(page);
618 
619 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
620 		vm_page_mapping* mapping = allocate_page_mapping(page->physical_page_number,
621 			CACHE_DONT_WAIT_FOR_MEMORY
622 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
623 		if (mapping == NULL)
624 			return B_NO_MEMORY;
625 
626 		mapping->page = page;
627 		mapping->area = area;
628 
629 		map->Lock();
630 
631 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
632 			area->MemoryType(), reservation);
633 
634 		// insert mapping into lists
635 		if (!page->IsMapped())
636 			atomic_add(&gMappedPagesCount, 1);
637 
638 		page->mappings.Add(mapping);
639 		area->mappings.Add(mapping);
640 
641 		map->Unlock();
642 	} else {
643 		DEBUG_PAGE_ACCESS_CHECK(page);
644 
645 		map->Lock();
646 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
647 			area->MemoryType(), reservation);
648 		map->Unlock();
649 
650 		increment_page_wired_count(page);
651 	}
652 
653 	if (!wasMapped) {
654 		// The page is mapped now, so we must not remain in the cached queue.
655 		// It also makes sense to move it from the inactive to the active, since
656 		// otherwise the page daemon wouldn't come to keep track of it (in idle
657 		// mode) -- if the page isn't touched, it will be deactivated after a
658 		// full iteration through the queue at the latest.
659 		if (page->State() == PAGE_STATE_CACHED
660 				|| page->State() == PAGE_STATE_INACTIVE) {
661 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
662 		}
663 	}
664 
665 	return B_OK;
666 }
667 
668 
669 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
670 	page's cache.
671 */
672 static inline bool
673 unmap_page(VMArea* area, addr_t virtualAddress)
674 {
675 	return area->address_space->TranslationMap()->UnmapPage(area,
676 		virtualAddress, true);
677 }
678 
679 
680 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
681 	mapped pages' caches.
682 */
683 static inline void
684 unmap_pages(VMArea* area, addr_t base, size_t size)
685 {
686 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
687 }
688 
689 
690 static inline bool
691 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
692 {
693 	if (address < area->Base()) {
694 		offset = area->Base() - address;
695 		if (offset >= size)
696 			return false;
697 
698 		address = area->Base();
699 		size -= offset;
700 		offset = 0;
701 		if (size > area->Size())
702 			size = area->Size();
703 
704 		return true;
705 	}
706 
707 	offset = address - area->Base();
708 	if (offset >= area->Size())
709 		return false;
710 
711 	if (size >= area->Size() - offset)
712 		size = area->Size() - offset;
713 
714 	return true;
715 }
716 
717 
718 /*!	Cuts a piece out of an area. If the given cut range covers the complete
719 	area, it is deleted. If it covers the beginning or the end, the area is
720 	resized accordingly. If the range covers some part in the middle of the
721 	area, it is split in two; in this case the second area is returned via
722 	\a _secondArea (the variable is left untouched in the other cases).
723 	The address space must be write locked.
724 	The caller must ensure that no part of the given range is wired.
725 */
726 static status_t
727 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
728 	addr_t size, VMArea** _secondArea, bool kernel)
729 {
730 	addr_t offset;
731 	if (!intersect_area(area, address, size, offset))
732 		return B_OK;
733 
734 	// Is the area fully covered?
735 	if (address == area->Base() && size == area->Size()) {
736 		delete_area(addressSpace, area, false);
737 		return B_OK;
738 	}
739 
740 	int priority;
741 	uint32 allocationFlags;
742 	if (addressSpace == VMAddressSpace::Kernel()) {
743 		priority = VM_PRIORITY_SYSTEM;
744 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
745 			| HEAP_DONT_LOCK_KERNEL_SPACE;
746 	} else {
747 		priority = VM_PRIORITY_USER;
748 		allocationFlags = 0;
749 	}
750 
751 	VMCache* cache = vm_area_get_locked_cache(area);
752 	VMCacheChainLocker cacheChainLocker(cache);
753 	cacheChainLocker.LockAllSourceCaches();
754 
755 	// If no one else uses the area's cache and it's an anonymous cache, we can
756 	// resize or split it, too.
757 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
758 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
759 
760 	const addr_t oldSize = area->Size();
761 
762 	// Cut the end only?
763 	if (offset > 0 && size == area->Size() - offset) {
764 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
765 			allocationFlags);
766 		if (error != B_OK)
767 			return error;
768 
769 		if (area->page_protections != NULL) {
770 			uint8* newProtections = realloc_page_protections(
771 				area->page_protections, area->Size(), allocationFlags);
772 
773 			if (newProtections == NULL) {
774 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
775 				return B_NO_MEMORY;
776 			}
777 
778 			area->page_protections = newProtections;
779 		}
780 
781 		// unmap pages
782 		unmap_pages(area, address, size);
783 
784 		if (onlyCacheUser) {
785 			// Since VMCache::Resize() can temporarily drop the lock, we must
786 			// unlock all lower caches to prevent locking order inversion.
787 			cacheChainLocker.Unlock(cache);
788 			cache->Resize(cache->virtual_base + offset, priority);
789 			cache->ReleaseRefAndUnlock();
790 		}
791 
792 		return B_OK;
793 	}
794 
795 	// Cut the beginning only?
796 	if (area->Base() == address) {
797 		uint8* newProtections = NULL;
798 		if (area->page_protections != NULL) {
799 			// Allocate all memory before shifting as the shift might lose some
800 			// bits.
801 			newProtections = realloc_page_protections(NULL, area->Size(),
802 				allocationFlags);
803 
804 			if (newProtections == NULL)
805 				return B_NO_MEMORY;
806 		}
807 
808 		// resize the area
809 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
810 			allocationFlags);
811 		if (error != B_OK) {
812 			if (newProtections != NULL)
813 				free_etc(newProtections, allocationFlags);
814 			return error;
815 		}
816 
817 		if (area->page_protections != NULL) {
818 			size_t oldBytes = area_page_protections_size(oldSize);
819 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
820 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
821 
822 			size_t bytes = area_page_protections_size(area->Size());
823 			memcpy(newProtections, area->page_protections, bytes);
824 			free_etc(area->page_protections, allocationFlags);
825 			area->page_protections = newProtections;
826 		}
827 
828 		// unmap pages
829 		unmap_pages(area, address, size);
830 
831 		if (onlyCacheUser) {
832 			// Since VMCache::Rebase() can temporarily drop the lock, we must
833 			// unlock all lower caches to prevent locking order inversion.
834 			cacheChainLocker.Unlock(cache);
835 			cache->Rebase(cache->virtual_base + size, priority);
836 			cache->ReleaseRefAndUnlock();
837 		}
838 		area->cache_offset += size;
839 
840 		return B_OK;
841 	}
842 
843 	// The tough part -- cut a piece out of the middle of the area.
844 	// We do that by shrinking the area to the begin section and creating a
845 	// new area for the end section.
846 	addr_t firstNewSize = offset;
847 	addr_t secondBase = address + size;
848 	addr_t secondSize = area->Size() - offset - size;
849 
850 	// unmap pages
851 	unmap_pages(area, address, area->Size() - firstNewSize);
852 
853 	// resize the area
854 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
855 		allocationFlags);
856 	if (error != B_OK)
857 		return error;
858 
859 	uint8* areaNewProtections = NULL;
860 	uint8* secondAreaNewProtections = NULL;
861 
862 	// Try to allocate the new memory before making some hard to reverse
863 	// changes.
864 	if (area->page_protections != NULL) {
865 		areaNewProtections = realloc_page_protections(NULL, area->Size(),
866 			allocationFlags);
867 		secondAreaNewProtections = realloc_page_protections(NULL, secondSize,
868 			allocationFlags);
869 
870 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
871 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
872 			free_etc(areaNewProtections, allocationFlags);
873 			free_etc(secondAreaNewProtections, allocationFlags);
874 			return B_NO_MEMORY;
875 		}
876 	}
877 
878 	virtual_address_restrictions addressRestrictions = {};
879 	addressRestrictions.address = (void*)secondBase;
880 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
881 	VMArea* secondArea;
882 
883 	if (onlyCacheUser) {
884 		// Create a new cache for the second area.
885 		VMCache* secondCache;
886 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
887 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
888 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
889 		if (error != B_OK) {
890 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
891 			free_etc(areaNewProtections, allocationFlags);
892 			free_etc(secondAreaNewProtections, allocationFlags);
893 			return error;
894 		}
895 
896 		secondCache->Lock();
897 		secondCache->temporary = cache->temporary;
898 		secondCache->virtual_base = area->cache_offset;
899 		secondCache->virtual_end = area->cache_offset + secondSize;
900 
901 		// Transfer the concerned pages from the first cache.
902 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
903 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
904 			area->cache_offset);
905 
906 		if (error == B_OK) {
907 			// Since VMCache::Resize() can temporarily drop the lock, we must
908 			// unlock all lower caches to prevent locking order inversion.
909 			cacheChainLocker.Unlock(cache);
910 			cache->Resize(cache->virtual_base + firstNewSize, priority);
911 			// Don't unlock the cache yet because we might have to resize it
912 			// back.
913 
914 			// Map the second area.
915 			error = map_backing_store(addressSpace, secondCache,
916 				area->cache_offset, area->name, secondSize, area->wiring,
917 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
918 				&addressRestrictions, kernel, &secondArea, NULL);
919 		}
920 
921 		if (error != B_OK) {
922 			// Restore the original cache.
923 			cache->Resize(cache->virtual_base + oldSize, priority);
924 
925 			// Move the pages back.
926 			status_t readoptStatus = cache->Adopt(secondCache,
927 				area->cache_offset, secondSize, adoptOffset);
928 			if (readoptStatus != B_OK) {
929 				// Some (swap) pages have not been moved back and will be lost
930 				// once the second cache is deleted.
931 				panic("failed to restore cache range: %s",
932 					strerror(readoptStatus));
933 
934 				// TODO: Handle out of memory cases by freeing memory and
935 				// retrying.
936 			}
937 
938 			cache->ReleaseRefAndUnlock();
939 			secondCache->ReleaseRefAndUnlock();
940 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
941 			free_etc(areaNewProtections, allocationFlags);
942 			free_etc(secondAreaNewProtections, allocationFlags);
943 			return error;
944 		}
945 
946 		// Now we can unlock it.
947 		cache->ReleaseRefAndUnlock();
948 		secondCache->Unlock();
949 	} else {
950 		error = map_backing_store(addressSpace, cache, area->cache_offset
951 			+ (secondBase - area->Base()),
952 			area->name, secondSize, area->wiring, area->protection,
953 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
954 			&addressRestrictions, kernel, &secondArea, NULL);
955 		if (error != B_OK) {
956 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
957 			free_etc(areaNewProtections, allocationFlags);
958 			free_etc(secondAreaNewProtections, allocationFlags);
959 			return error;
960 		}
961 		// We need a cache reference for the new area.
962 		cache->AcquireRefLocked();
963 	}
964 
965 	if (area->page_protections != NULL) {
966 		// Copy the protection bits of the first area.
967 		size_t areaBytes = area_page_protections_size(area->Size());
968 		memcpy(areaNewProtections, area->page_protections, areaBytes);
969 		uint8* areaOldProtections = area->page_protections;
970 		area->page_protections = areaNewProtections;
971 
972 		// Shift the protection bits of the second area to the start of
973 		// the old array.
974 		size_t oldBytes = area_page_protections_size(oldSize);
975 		addr_t secondAreaOffset = secondBase - area->Base();
976 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
977 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
978 
979 		// Copy the protection bits of the second area.
980 		size_t secondAreaBytes = area_page_protections_size(secondSize);
981 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
982 		secondArea->page_protections = secondAreaNewProtections;
983 
984 		// We don't need this anymore.
985 		free_etc(areaOldProtections, allocationFlags);
986 
987 		// Set the correct page protections for the second area.
988 		VMTranslationMap* map = addressSpace->TranslationMap();
989 		map->Lock();
990 		for (VMCachePagesTree::Iterator it
991 				= secondArea->cache->pages.GetIterator();
992 				vm_page* page = it.Next();) {
993 			if (is_page_in_area(secondArea, page)) {
994 				addr_t address = virtual_page_address(secondArea, page);
995 				uint32 pageProtection
996 					= get_area_page_protection(secondArea, address);
997 				map->ProtectPage(secondArea, address, pageProtection);
998 			}
999 		}
1000 		map->Unlock();
1001 	}
1002 
1003 	if (_secondArea != NULL)
1004 		*_secondArea = secondArea;
1005 
1006 	return B_OK;
1007 }
1008 
1009 
1010 /*!	Deletes or cuts all areas in the given address range.
1011 	The address space must be write-locked.
1012 	The caller must ensure that no part of the given range is wired.
1013 */
1014 static status_t
1015 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1016 	bool kernel)
1017 {
1018 	size = PAGE_ALIGN(size);
1019 
1020 	// Check, whether the caller is allowed to modify the concerned areas.
1021 	if (!kernel) {
1022 		for (VMAddressSpace::AreaRangeIterator it
1023 				= addressSpace->GetAreaRangeIterator(address, size);
1024 			VMArea* area = it.Next();) {
1025 
1026 			if ((area->protection & B_KERNEL_AREA) != 0) {
1027 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
1028 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
1029 					team_get_current_team_id(), area->id, area->name);
1030 				return B_NOT_ALLOWED;
1031 			}
1032 		}
1033 	}
1034 
1035 	for (VMAddressSpace::AreaRangeIterator it
1036 			= addressSpace->GetAreaRangeIterator(address, size);
1037 		VMArea* area = it.Next();) {
1038 
1039 		status_t error = cut_area(addressSpace, area, address, size, NULL,
1040 			kernel);
1041 		if (error != B_OK)
1042 			return error;
1043 			// Failing after already messing with areas is ugly, but we
1044 			// can't do anything about it.
1045 	}
1046 
1047 	return B_OK;
1048 }
1049 
1050 
1051 static status_t
1052 discard_area_range(VMArea* area, addr_t address, addr_t size)
1053 {
1054 	addr_t offset;
1055 	if (!intersect_area(area, address, size, offset))
1056 		return B_OK;
1057 
1058 	// If someone else uses the area's cache or it's not an anonymous cache, we
1059 	// can't discard.
1060 	VMCache* cache = vm_area_get_locked_cache(area);
1061 	if (cache->areas != area || area->cache_next != NULL
1062 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1063 		return B_OK;
1064 	}
1065 
1066 	VMCacheChainLocker cacheChainLocker(cache);
1067 	cacheChainLocker.LockAllSourceCaches();
1068 
1069 	unmap_pages(area, address, size);
1070 
1071 	// Since VMCache::Discard() can temporarily drop the lock, we must
1072 	// unlock all lower caches to prevent locking order inversion.
1073 	cacheChainLocker.Unlock(cache);
1074 	cache->Discard(cache->virtual_base + offset, size);
1075 	cache->ReleaseRefAndUnlock();
1076 
1077 	return B_OK;
1078 }
1079 
1080 
1081 static status_t
1082 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1083 	bool kernel)
1084 {
1085 	for (VMAddressSpace::AreaRangeIterator it
1086 		= addressSpace->GetAreaRangeIterator(address, size);
1087 			VMArea* area = it.Next();) {
1088 		status_t error = discard_area_range(area, address, size);
1089 		if (error != B_OK)
1090 			return error;
1091 	}
1092 
1093 	return B_OK;
1094 }
1095 
1096 
1097 /*! You need to hold the lock of the cache and the write lock of the address
1098 	space when calling this function.
1099 	Note, that in case of error your cache will be temporarily unlocked.
1100 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1101 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1102 	that no part of the specified address range (base \c *_virtualAddress, size
1103 	\a size) is wired. The cache will also be temporarily unlocked.
1104 */
1105 static status_t
1106 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1107 	const char* areaName, addr_t size, int wiring, int protection,
1108 	int protectionMax, int mapping,
1109 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1110 	bool kernel, VMArea** _area, void** _virtualAddress)
1111 {
1112 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1113 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1114 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1115 		addressSpace, cache, addressRestrictions->address, offset, size,
1116 		addressRestrictions->address_specification, wiring, protection,
1117 		protectionMax, _area, areaName));
1118 	cache->AssertLocked();
1119 
1120 	if (size == 0) {
1121 #if KDEBUG
1122 		panic("map_backing_store(): called with size=0 for area '%s'!",
1123 			areaName);
1124 #endif
1125 		return B_BAD_VALUE;
1126 	}
1127 	if (offset < 0)
1128 		return B_BAD_VALUE;
1129 
1130 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1131 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1132 	int priority;
1133 	if (addressSpace != VMAddressSpace::Kernel()) {
1134 		priority = VM_PRIORITY_USER;
1135 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1136 		priority = VM_PRIORITY_VIP;
1137 		allocationFlags |= HEAP_PRIORITY_VIP;
1138 	} else
1139 		priority = VM_PRIORITY_SYSTEM;
1140 
1141 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1142 		allocationFlags);
1143 	if (mapping != REGION_PRIVATE_MAP)
1144 		area->protection_max = protectionMax & B_USER_PROTECTION;
1145 	if (area == NULL)
1146 		return B_NO_MEMORY;
1147 
1148 	status_t status;
1149 
1150 	// if this is a private map, we need to create a new cache
1151 	// to handle the private copies of pages as they are written to
1152 	VMCache* sourceCache = cache;
1153 	if (mapping == REGION_PRIVATE_MAP) {
1154 		VMCache* newCache;
1155 
1156 		// create an anonymous cache
1157 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1158 			(protection & B_STACK_AREA) != 0
1159 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1160 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1161 		if (status != B_OK)
1162 			goto err1;
1163 
1164 		newCache->Lock();
1165 		newCache->temporary = 1;
1166 		newCache->virtual_base = offset;
1167 		newCache->virtual_end = offset + size;
1168 
1169 		cache->AddConsumer(newCache);
1170 
1171 		cache = newCache;
1172 	}
1173 
1174 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1175 		status = cache->SetMinimalCommitment(size, priority);
1176 		if (status != B_OK)
1177 			goto err2;
1178 	}
1179 
1180 	// check to see if this address space has entered DELETE state
1181 	if (addressSpace->IsBeingDeleted()) {
1182 		// okay, someone is trying to delete this address space now, so we can't
1183 		// insert the area, so back out
1184 		status = B_BAD_TEAM_ID;
1185 		goto err2;
1186 	}
1187 
1188 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1189 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1190 		// temporarily unlock the current cache since it might be mapped to
1191 		// some existing area, and unmap_address_range also needs to lock that
1192 		// cache to delete the area.
1193 		cache->Unlock();
1194 		status = unmap_address_range(addressSpace,
1195 			(addr_t)addressRestrictions->address, size, kernel);
1196 		cache->Lock();
1197 		if (status != B_OK)
1198 			goto err2;
1199 	}
1200 
1201 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1202 		allocationFlags, _virtualAddress);
1203 	if (status == B_NO_MEMORY
1204 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1205 		// Due to how many locks are held, we cannot wait here for space to be
1206 		// freed up, but we can at least notify the low_resource handler.
1207 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1208 	}
1209 	if (status != B_OK)
1210 		goto err2;
1211 
1212 	// attach the cache to the area
1213 	area->cache = cache;
1214 	area->cache_offset = offset;
1215 
1216 	// point the cache back to the area
1217 	cache->InsertAreaLocked(area);
1218 	if (mapping == REGION_PRIVATE_MAP)
1219 		cache->Unlock();
1220 
1221 	// insert the area in the global areas map
1222 	status = VMAreas::Insert(area);
1223 	if (status != B_OK)
1224 		goto err3;
1225 
1226 	// grab a ref to the address space (the area holds this)
1227 	addressSpace->Get();
1228 
1229 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1230 //		cache, sourceCache, areaName, area);
1231 
1232 	*_area = area;
1233 	return B_OK;
1234 
1235 err3:
1236 	cache->Lock();
1237 	cache->RemoveArea(area);
1238 	area->cache = NULL;
1239 err2:
1240 	if (mapping == REGION_PRIVATE_MAP) {
1241 		// We created this cache, so we must delete it again. Note, that we
1242 		// need to temporarily unlock the source cache or we'll otherwise
1243 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1244 		sourceCache->Unlock();
1245 		cache->ReleaseRefAndUnlock();
1246 		sourceCache->Lock();
1247 	}
1248 err1:
1249 	addressSpace->DeleteArea(area, allocationFlags);
1250 	return status;
1251 }
1252 
1253 
1254 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1255 	  locker1, locker2).
1256 */
1257 template<typename LockerType1, typename LockerType2>
1258 static inline bool
1259 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1260 {
1261 	area->cache->AssertLocked();
1262 
1263 	VMAreaUnwiredWaiter waiter;
1264 	if (!area->AddWaiterIfWired(&waiter))
1265 		return false;
1266 
1267 	// unlock everything and wait
1268 	if (locker1 != NULL)
1269 		locker1->Unlock();
1270 	if (locker2 != NULL)
1271 		locker2->Unlock();
1272 
1273 	waiter.waitEntry.Wait();
1274 
1275 	return true;
1276 }
1277 
1278 
1279 /*!	Checks whether the given area has any wired ranges intersecting with the
1280 	specified range and waits, if so.
1281 
1282 	When it has to wait, the function calls \c Unlock() on both \a locker1
1283 	and \a locker2, if given.
1284 	The area's top cache must be locked and must be unlocked as a side effect
1285 	of calling \c Unlock() on either \a locker1 or \a locker2.
1286 
1287 	If the function does not have to wait it does not modify or unlock any
1288 	object.
1289 
1290 	\param area The area to be checked.
1291 	\param base The base address of the range to check.
1292 	\param size The size of the address range to check.
1293 	\param locker1 An object to be unlocked when before starting to wait (may
1294 		be \c NULL).
1295 	\param locker2 An object to be unlocked when before starting to wait (may
1296 		be \c NULL).
1297 	\return \c true, if the function had to wait, \c false otherwise.
1298 */
1299 template<typename LockerType1, typename LockerType2>
1300 static inline bool
1301 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1302 	LockerType1* locker1, LockerType2* locker2)
1303 {
1304 	area->cache->AssertLocked();
1305 
1306 	VMAreaUnwiredWaiter waiter;
1307 	if (!area->AddWaiterIfWired(&waiter, base, size))
1308 		return false;
1309 
1310 	// unlock everything and wait
1311 	if (locker1 != NULL)
1312 		locker1->Unlock();
1313 	if (locker2 != NULL)
1314 		locker2->Unlock();
1315 
1316 	waiter.waitEntry.Wait();
1317 
1318 	return true;
1319 }
1320 
1321 
1322 /*!	Checks whether the given address space has any wired ranges intersecting
1323 	with the specified range and waits, if so.
1324 
1325 	Similar to wait_if_area_range_is_wired(), with the following differences:
1326 	- All areas intersecting with the range are checked (respectively all until
1327 	  one is found that contains a wired range intersecting with the given
1328 	  range).
1329 	- The given address space must at least be read-locked and must be unlocked
1330 	  when \c Unlock() is called on \a locker.
1331 	- None of the areas' caches are allowed to be locked.
1332 */
1333 template<typename LockerType>
1334 static inline bool
1335 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1336 	size_t size, LockerType* locker)
1337 {
1338 	for (VMAddressSpace::AreaRangeIterator it
1339 		= addressSpace->GetAreaRangeIterator(base, size);
1340 			VMArea* area = it.Next();) {
1341 
1342 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1343 
1344 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1345 			return true;
1346 	}
1347 
1348 	return false;
1349 }
1350 
1351 
1352 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1353 	It must be called in a situation where the kernel address space may be
1354 	locked.
1355 */
1356 status_t
1357 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1358 {
1359 	AddressSpaceReadLocker locker;
1360 	VMArea* area;
1361 	status_t status = locker.SetFromArea(id, area);
1362 	if (status != B_OK)
1363 		return status;
1364 
1365 	if (area->page_protections == NULL) {
1366 		status = allocate_area_page_protections(area);
1367 		if (status != B_OK)
1368 			return status;
1369 	}
1370 
1371 	*cookie = (void*)area;
1372 	return B_OK;
1373 }
1374 
1375 
1376 /*!	This is a debug helper function that can only be used with very specific
1377 	use cases.
1378 	Sets protection for the given address range to the protection specified.
1379 	If \a protection is 0 then the involved pages will be marked non-present
1380 	in the translation map to cause a fault on access. The pages aren't
1381 	actually unmapped however so that they can be marked present again with
1382 	additional calls to this function. For this to work the area must be
1383 	fully locked in memory so that the pages aren't otherwise touched.
1384 	This function does not lock the kernel address space and needs to be
1385 	supplied with a \a cookie retrieved from a successful call to
1386 	vm_prepare_kernel_area_debug_protection().
1387 */
1388 status_t
1389 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1390 	uint32 protection)
1391 {
1392 	// check address range
1393 	addr_t address = (addr_t)_address;
1394 	size = PAGE_ALIGN(size);
1395 
1396 	if ((address % B_PAGE_SIZE) != 0
1397 		|| (addr_t)address + size < (addr_t)address
1398 		|| !IS_KERNEL_ADDRESS(address)
1399 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1400 		return B_BAD_VALUE;
1401 	}
1402 
1403 	// Translate the kernel protection to user protection as we only store that.
1404 	if ((protection & B_KERNEL_READ_AREA) != 0)
1405 		protection |= B_READ_AREA;
1406 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1407 		protection |= B_WRITE_AREA;
1408 
1409 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1410 	VMTranslationMap* map = addressSpace->TranslationMap();
1411 	VMArea* area = (VMArea*)cookie;
1412 
1413 	addr_t offset = address - area->Base();
1414 	if (area->Size() - offset < size) {
1415 		panic("protect range not fully within supplied area");
1416 		return B_BAD_VALUE;
1417 	}
1418 
1419 	if (area->page_protections == NULL) {
1420 		panic("area has no page protections");
1421 		return B_BAD_VALUE;
1422 	}
1423 
1424 	// Invalidate the mapping entries so any access to them will fault or
1425 	// restore the mapping entries unchanged so that lookup will success again.
1426 	map->Lock();
1427 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1428 	map->Unlock();
1429 
1430 	// And set the proper page protections so that the fault case will actually
1431 	// fail and not simply try to map a new page.
1432 	for (addr_t pageAddress = address; pageAddress < address + size;
1433 			pageAddress += B_PAGE_SIZE) {
1434 		set_area_page_protection(area, pageAddress, protection);
1435 	}
1436 
1437 	return B_OK;
1438 }
1439 
1440 
1441 status_t
1442 vm_block_address_range(const char* name, void* address, addr_t size)
1443 {
1444 	if (!arch_vm_supports_protection(0))
1445 		return B_NOT_SUPPORTED;
1446 
1447 	AddressSpaceWriteLocker locker;
1448 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1449 	if (status != B_OK)
1450 		return status;
1451 
1452 	VMAddressSpace* addressSpace = locker.AddressSpace();
1453 
1454 	// create an anonymous cache
1455 	VMCache* cache;
1456 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1457 		VM_PRIORITY_SYSTEM);
1458 	if (status != B_OK)
1459 		return status;
1460 
1461 	cache->temporary = 1;
1462 	cache->virtual_end = size;
1463 	cache->Lock();
1464 
1465 	VMArea* area;
1466 	virtual_address_restrictions addressRestrictions = {};
1467 	addressRestrictions.address = address;
1468 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1469 	status = map_backing_store(addressSpace, cache, 0, name, size,
1470 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1471 		true, &area, NULL);
1472 	if (status != B_OK) {
1473 		cache->ReleaseRefAndUnlock();
1474 		return status;
1475 	}
1476 
1477 	cache->Unlock();
1478 	area->cache_type = CACHE_TYPE_RAM;
1479 	return area->id;
1480 }
1481 
1482 
1483 status_t
1484 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1485 {
1486 	AddressSpaceWriteLocker locker(team);
1487 	if (!locker.IsLocked())
1488 		return B_BAD_TEAM_ID;
1489 
1490 	VMAddressSpace* addressSpace = locker.AddressSpace();
1491 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1492 		addressSpace == VMAddressSpace::Kernel()
1493 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1494 }
1495 
1496 
1497 status_t
1498 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1499 	addr_t size, uint32 flags)
1500 {
1501 	if (size == 0)
1502 		return B_BAD_VALUE;
1503 
1504 	AddressSpaceWriteLocker locker(team);
1505 	if (!locker.IsLocked())
1506 		return B_BAD_TEAM_ID;
1507 
1508 	virtual_address_restrictions addressRestrictions = {};
1509 	addressRestrictions.address = *_address;
1510 	addressRestrictions.address_specification = addressSpec;
1511 	VMAddressSpace* addressSpace = locker.AddressSpace();
1512 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1513 		addressSpace == VMAddressSpace::Kernel()
1514 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1515 		_address);
1516 }
1517 
1518 
1519 area_id
1520 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1521 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1522 	const virtual_address_restrictions* virtualAddressRestrictions,
1523 	const physical_address_restrictions* physicalAddressRestrictions,
1524 	bool kernel, void** _address)
1525 {
1526 	VMArea* area;
1527 	VMCache* cache;
1528 	vm_page* page = NULL;
1529 	bool isStack = (protection & B_STACK_AREA) != 0;
1530 	page_num_t guardPages;
1531 	bool canOvercommit = false;
1532 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1533 		? VM_PAGE_ALLOC_CLEAR : 0;
1534 
1535 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1536 		team, name, size));
1537 
1538 	size = PAGE_ALIGN(size);
1539 	guardSize = PAGE_ALIGN(guardSize);
1540 	guardPages = guardSize / B_PAGE_SIZE;
1541 
1542 	if (size == 0 || size < guardSize)
1543 		return B_BAD_VALUE;
1544 	if (!arch_vm_supports_protection(protection))
1545 		return B_NOT_SUPPORTED;
1546 
1547 	if (team == B_CURRENT_TEAM)
1548 		team = VMAddressSpace::CurrentID();
1549 	if (team < 0)
1550 		return B_BAD_TEAM_ID;
1551 
1552 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1553 		canOvercommit = true;
1554 
1555 #ifdef DEBUG_KERNEL_STACKS
1556 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1557 		isStack = true;
1558 #endif
1559 
1560 	// check parameters
1561 	switch (virtualAddressRestrictions->address_specification) {
1562 		case B_ANY_ADDRESS:
1563 		case B_EXACT_ADDRESS:
1564 		case B_BASE_ADDRESS:
1565 		case B_ANY_KERNEL_ADDRESS:
1566 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1567 		case B_RANDOMIZED_ANY_ADDRESS:
1568 		case B_RANDOMIZED_BASE_ADDRESS:
1569 			break;
1570 
1571 		default:
1572 			return B_BAD_VALUE;
1573 	}
1574 
1575 	// If low or high physical address restrictions are given, we force
1576 	// B_CONTIGUOUS wiring, since only then we'll use
1577 	// vm_page_allocate_page_run() which deals with those restrictions.
1578 	if (physicalAddressRestrictions->low_address != 0
1579 		|| physicalAddressRestrictions->high_address != 0) {
1580 		wiring = B_CONTIGUOUS;
1581 	}
1582 
1583 	physical_address_restrictions stackPhysicalRestrictions;
1584 	bool doReserveMemory = false;
1585 	switch (wiring) {
1586 		case B_NO_LOCK:
1587 			break;
1588 		case B_FULL_LOCK:
1589 		case B_LAZY_LOCK:
1590 		case B_CONTIGUOUS:
1591 			doReserveMemory = true;
1592 			break;
1593 		case B_ALREADY_WIRED:
1594 			break;
1595 		case B_LOMEM:
1596 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1597 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1598 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1599 			wiring = B_CONTIGUOUS;
1600 			doReserveMemory = true;
1601 			break;
1602 		case B_32_BIT_FULL_LOCK:
1603 			if (B_HAIKU_PHYSICAL_BITS <= 32
1604 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1605 				wiring = B_FULL_LOCK;
1606 				doReserveMemory = true;
1607 				break;
1608 			}
1609 			// TODO: We don't really support this mode efficiently. Just fall
1610 			// through for now ...
1611 		case B_32_BIT_CONTIGUOUS:
1612 			#if B_HAIKU_PHYSICAL_BITS > 32
1613 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1614 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1615 					stackPhysicalRestrictions.high_address
1616 						= (phys_addr_t)1 << 32;
1617 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1618 				}
1619 			#endif
1620 			wiring = B_CONTIGUOUS;
1621 			doReserveMemory = true;
1622 			break;
1623 		default:
1624 			return B_BAD_VALUE;
1625 	}
1626 
1627 	// Optimization: For a single-page contiguous allocation without low/high
1628 	// memory restriction B_FULL_LOCK wiring suffices.
1629 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1630 		&& physicalAddressRestrictions->low_address == 0
1631 		&& physicalAddressRestrictions->high_address == 0) {
1632 		wiring = B_FULL_LOCK;
1633 	}
1634 
1635 	// For full lock or contiguous areas we're also going to map the pages and
1636 	// thus need to reserve pages for the mapping backend upfront.
1637 	addr_t reservedMapPages = 0;
1638 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1639 		AddressSpaceWriteLocker locker;
1640 		status_t status = locker.SetTo(team);
1641 		if (status != B_OK)
1642 			return status;
1643 
1644 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1645 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1646 	}
1647 
1648 	int priority;
1649 	if (team != VMAddressSpace::KernelID())
1650 		priority = VM_PRIORITY_USER;
1651 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1652 		priority = VM_PRIORITY_VIP;
1653 	else
1654 		priority = VM_PRIORITY_SYSTEM;
1655 
1656 	// Reserve memory before acquiring the address space lock. This reduces the
1657 	// chances of failure, since while holding the write lock to the address
1658 	// space (if it is the kernel address space that is), the low memory handler
1659 	// won't be able to free anything for us.
1660 	addr_t reservedMemory = 0;
1661 	if (doReserveMemory) {
1662 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1663 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1664 			return B_NO_MEMORY;
1665 		reservedMemory = size;
1666 		// TODO: We don't reserve the memory for the pages for the page
1667 		// directories/tables. We actually need to do since we currently don't
1668 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1669 		// there are actually less physical pages than there should be, which
1670 		// can get the VM into trouble in low memory situations.
1671 	}
1672 
1673 	AddressSpaceWriteLocker locker;
1674 	VMAddressSpace* addressSpace;
1675 	status_t status;
1676 
1677 	// For full lock areas reserve the pages before locking the address
1678 	// space. E.g. block caches can't release their memory while we hold the
1679 	// address space lock.
1680 	page_num_t reservedPages = reservedMapPages;
1681 	if (wiring == B_FULL_LOCK)
1682 		reservedPages += size / B_PAGE_SIZE;
1683 
1684 	vm_page_reservation reservation;
1685 	if (reservedPages > 0) {
1686 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1687 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1688 					priority)) {
1689 				reservedPages = 0;
1690 				status = B_WOULD_BLOCK;
1691 				goto err0;
1692 			}
1693 		} else
1694 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1695 	}
1696 
1697 	if (wiring == B_CONTIGUOUS) {
1698 		// we try to allocate the page run here upfront as this may easily
1699 		// fail for obvious reasons
1700 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1701 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1702 		if (page == NULL) {
1703 			status = B_NO_MEMORY;
1704 			goto err0;
1705 		}
1706 	}
1707 
1708 	// Lock the address space and, if B_EXACT_ADDRESS and
1709 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1710 	// is not wired.
1711 	do {
1712 		status = locker.SetTo(team);
1713 		if (status != B_OK)
1714 			goto err1;
1715 
1716 		addressSpace = locker.AddressSpace();
1717 	} while (virtualAddressRestrictions->address_specification
1718 			== B_EXACT_ADDRESS
1719 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1720 		&& wait_if_address_range_is_wired(addressSpace,
1721 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1722 
1723 	// create an anonymous cache
1724 	// if it's a stack, make sure that two pages are available at least
1725 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1726 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1727 		wiring == B_NO_LOCK, priority);
1728 	if (status != B_OK)
1729 		goto err1;
1730 
1731 	cache->temporary = 1;
1732 	cache->virtual_end = size;
1733 	cache->committed_size = reservedMemory;
1734 		// TODO: This should be done via a method.
1735 	reservedMemory = 0;
1736 
1737 	cache->Lock();
1738 
1739 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1740 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1741 		virtualAddressRestrictions, kernel, &area, _address);
1742 
1743 	if (status != B_OK) {
1744 		cache->ReleaseRefAndUnlock();
1745 		goto err1;
1746 	}
1747 
1748 	locker.DegradeToReadLock();
1749 
1750 	switch (wiring) {
1751 		case B_NO_LOCK:
1752 		case B_LAZY_LOCK:
1753 			// do nothing - the pages are mapped in as needed
1754 			break;
1755 
1756 		case B_FULL_LOCK:
1757 		{
1758 			// Allocate and map all pages for this area
1759 
1760 			off_t offset = 0;
1761 			for (addr_t address = area->Base();
1762 					address < area->Base() + (area->Size() - 1);
1763 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1764 #ifdef DEBUG_KERNEL_STACKS
1765 #	ifdef STACK_GROWS_DOWNWARDS
1766 				if (isStack && address < area->Base()
1767 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1768 #	else
1769 				if (isStack && address >= area->Base() + area->Size()
1770 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1771 #	endif
1772 					continue;
1773 #endif
1774 				vm_page* page = vm_page_allocate_page(&reservation,
1775 					PAGE_STATE_WIRED | pageAllocFlags);
1776 				cache->InsertPage(page, offset);
1777 				map_page(area, page, address, protection, &reservation);
1778 
1779 				DEBUG_PAGE_ACCESS_END(page);
1780 			}
1781 
1782 			break;
1783 		}
1784 
1785 		case B_ALREADY_WIRED:
1786 		{
1787 			// The pages should already be mapped. This is only really useful
1788 			// during boot time. Find the appropriate vm_page objects and stick
1789 			// them in the cache object.
1790 			VMTranslationMap* map = addressSpace->TranslationMap();
1791 			off_t offset = 0;
1792 
1793 			if (!gKernelStartup)
1794 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1795 
1796 			map->Lock();
1797 
1798 			for (addr_t virtualAddress = area->Base();
1799 					virtualAddress < area->Base() + (area->Size() - 1);
1800 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1801 				phys_addr_t physicalAddress;
1802 				uint32 flags;
1803 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1804 				if (status < B_OK) {
1805 					panic("looking up mapping failed for va 0x%lx\n",
1806 						virtualAddress);
1807 				}
1808 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1809 				if (page == NULL) {
1810 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1811 						"\n", physicalAddress);
1812 				}
1813 
1814 				DEBUG_PAGE_ACCESS_START(page);
1815 
1816 				cache->InsertPage(page, offset);
1817 				increment_page_wired_count(page);
1818 				vm_page_set_state(page, PAGE_STATE_WIRED);
1819 				page->busy = false;
1820 
1821 				DEBUG_PAGE_ACCESS_END(page);
1822 			}
1823 
1824 			map->Unlock();
1825 			break;
1826 		}
1827 
1828 		case B_CONTIGUOUS:
1829 		{
1830 			// We have already allocated our continuous pages run, so we can now
1831 			// just map them in the address space
1832 			VMTranslationMap* map = addressSpace->TranslationMap();
1833 			phys_addr_t physicalAddress
1834 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1835 			addr_t virtualAddress = area->Base();
1836 			off_t offset = 0;
1837 
1838 			map->Lock();
1839 
1840 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1841 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1842 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1843 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1844 				if (page == NULL)
1845 					panic("couldn't lookup physical page just allocated\n");
1846 
1847 				status = map->Map(virtualAddress, physicalAddress, protection,
1848 					area->MemoryType(), &reservation);
1849 				if (status < B_OK)
1850 					panic("couldn't map physical page in page run\n");
1851 
1852 				cache->InsertPage(page, offset);
1853 				increment_page_wired_count(page);
1854 
1855 				DEBUG_PAGE_ACCESS_END(page);
1856 			}
1857 
1858 			map->Unlock();
1859 			break;
1860 		}
1861 
1862 		default:
1863 			break;
1864 	}
1865 
1866 	cache->Unlock();
1867 
1868 	if (reservedPages > 0)
1869 		vm_page_unreserve_pages(&reservation);
1870 
1871 	TRACE(("vm_create_anonymous_area: done\n"));
1872 
1873 	area->cache_type = CACHE_TYPE_RAM;
1874 	return area->id;
1875 
1876 err1:
1877 	if (wiring == B_CONTIGUOUS) {
1878 		// we had reserved the area space upfront...
1879 		phys_addr_t pageNumber = page->physical_page_number;
1880 		int32 i;
1881 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1882 			page = vm_lookup_page(pageNumber);
1883 			if (page == NULL)
1884 				panic("couldn't lookup physical page just allocated\n");
1885 
1886 			vm_page_set_state(page, PAGE_STATE_FREE);
1887 		}
1888 	}
1889 
1890 err0:
1891 	if (reservedPages > 0)
1892 		vm_page_unreserve_pages(&reservation);
1893 	if (reservedMemory > 0)
1894 		vm_unreserve_memory(reservedMemory);
1895 
1896 	return status;
1897 }
1898 
1899 
1900 area_id
1901 vm_map_physical_memory(team_id team, const char* name, void** _address,
1902 	uint32 addressSpec, addr_t size, uint32 protection,
1903 	phys_addr_t physicalAddress, bool alreadyWired)
1904 {
1905 	VMArea* area;
1906 	VMCache* cache;
1907 	addr_t mapOffset;
1908 
1909 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1910 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1911 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1912 		addressSpec, size, protection, physicalAddress));
1913 
1914 	if (!arch_vm_supports_protection(protection))
1915 		return B_NOT_SUPPORTED;
1916 
1917 	AddressSpaceWriteLocker locker(team);
1918 	if (!locker.IsLocked())
1919 		return B_BAD_TEAM_ID;
1920 
1921 	// if the physical address is somewhat inside a page,
1922 	// move the actual area down to align on a page boundary
1923 	mapOffset = physicalAddress % B_PAGE_SIZE;
1924 	size += mapOffset;
1925 	physicalAddress -= mapOffset;
1926 
1927 	size = PAGE_ALIGN(size);
1928 
1929 	// create a device cache
1930 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1931 	if (status != B_OK)
1932 		return status;
1933 
1934 	cache->virtual_end = size;
1935 
1936 	cache->Lock();
1937 
1938 	virtual_address_restrictions addressRestrictions = {};
1939 	addressRestrictions.address = *_address;
1940 	addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK;
1941 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1942 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1943 		true, &area, _address);
1944 
1945 	if (status < B_OK)
1946 		cache->ReleaseRefLocked();
1947 
1948 	cache->Unlock();
1949 
1950 	if (status == B_OK) {
1951 		// Set requested memory type -- default to uncached, but allow
1952 		// that to be overridden by ranges that may already exist.
1953 		uint32 memoryType = addressSpec & B_MEMORY_TYPE_MASK;
1954 		const bool weak = (memoryType == 0);
1955 		if (weak)
1956 			memoryType = B_UNCACHED_MEMORY;
1957 
1958 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType,
1959 			weak ? &memoryType : NULL);
1960 
1961 		area->SetMemoryType(memoryType);
1962 
1963 		if (status != B_OK)
1964 			delete_area(locker.AddressSpace(), area, false);
1965 	}
1966 
1967 	if (status != B_OK)
1968 		return status;
1969 
1970 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1971 
1972 	if (alreadyWired) {
1973 		// The area is already mapped, but possibly not with the right
1974 		// memory type.
1975 		map->Lock();
1976 		map->ProtectArea(area, area->protection);
1977 		map->Unlock();
1978 	} else {
1979 		// Map the area completely.
1980 
1981 		// reserve pages needed for the mapping
1982 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1983 			area->Base() + (size - 1));
1984 		vm_page_reservation reservation;
1985 		vm_page_reserve_pages(&reservation, reservePages,
1986 			team == VMAddressSpace::KernelID()
1987 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1988 
1989 		map->Lock();
1990 
1991 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1992 			map->Map(area->Base() + offset, physicalAddress + offset,
1993 				protection, area->MemoryType(), &reservation);
1994 		}
1995 
1996 		map->Unlock();
1997 
1998 		vm_page_unreserve_pages(&reservation);
1999 	}
2000 
2001 	// modify the pointer returned to be offset back into the new area
2002 	// the same way the physical address in was offset
2003 	*_address = (void*)((addr_t)*_address + mapOffset);
2004 
2005 	area->cache_type = CACHE_TYPE_DEVICE;
2006 	return area->id;
2007 }
2008 
2009 
2010 /*!	Don't use!
2011 	TODO: This function was introduced to map physical page vecs to
2012 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
2013 	use a device cache and does not track vm_page::wired_count!
2014 */
2015 area_id
2016 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
2017 	uint32 addressSpec, addr_t* _size, uint32 protection,
2018 	struct generic_io_vec* vecs, uint32 vecCount)
2019 {
2020 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
2021 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
2022 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
2023 		addressSpec, _size, protection, vecs, vecCount));
2024 
2025 	if (!arch_vm_supports_protection(protection)
2026 		|| (addressSpec & B_MEMORY_TYPE_MASK) != 0) {
2027 		return B_NOT_SUPPORTED;
2028 	}
2029 
2030 	AddressSpaceWriteLocker locker(team);
2031 	if (!locker.IsLocked())
2032 		return B_BAD_TEAM_ID;
2033 
2034 	if (vecCount == 0)
2035 		return B_BAD_VALUE;
2036 
2037 	addr_t size = 0;
2038 	for (uint32 i = 0; i < vecCount; i++) {
2039 		if (vecs[i].base % B_PAGE_SIZE != 0
2040 			|| vecs[i].length % B_PAGE_SIZE != 0) {
2041 			return B_BAD_VALUE;
2042 		}
2043 
2044 		size += vecs[i].length;
2045 	}
2046 
2047 	// create a device cache
2048 	VMCache* cache;
2049 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
2050 	if (result != B_OK)
2051 		return result;
2052 
2053 	cache->virtual_end = size;
2054 
2055 	cache->Lock();
2056 
2057 	VMArea* area;
2058 	virtual_address_restrictions addressRestrictions = {};
2059 	addressRestrictions.address = *_address;
2060 	addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK;
2061 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
2062 		size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0,
2063 		&addressRestrictions, true, &area, _address);
2064 
2065 	if (result != B_OK)
2066 		cache->ReleaseRefLocked();
2067 
2068 	cache->Unlock();
2069 
2070 	if (result != B_OK)
2071 		return result;
2072 
2073 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2074 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2075 		area->Base() + (size - 1));
2076 
2077 	vm_page_reservation reservation;
2078 	vm_page_reserve_pages(&reservation, reservePages,
2079 			team == VMAddressSpace::KernelID()
2080 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2081 	map->Lock();
2082 
2083 	uint32 vecIndex = 0;
2084 	size_t vecOffset = 0;
2085 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2086 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2087 			vecOffset = 0;
2088 			vecIndex++;
2089 		}
2090 
2091 		if (vecIndex >= vecCount)
2092 			break;
2093 
2094 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2095 			protection, area->MemoryType(), &reservation);
2096 
2097 		vecOffset += B_PAGE_SIZE;
2098 	}
2099 
2100 	map->Unlock();
2101 	vm_page_unreserve_pages(&reservation);
2102 
2103 	if (_size != NULL)
2104 		*_size = size;
2105 
2106 	area->cache_type = CACHE_TYPE_DEVICE;
2107 	return area->id;
2108 }
2109 
2110 
2111 area_id
2112 vm_create_null_area(team_id team, const char* name, void** address,
2113 	uint32 addressSpec, addr_t size, uint32 flags)
2114 {
2115 	size = PAGE_ALIGN(size);
2116 
2117 	// Lock the address space and, if B_EXACT_ADDRESS and
2118 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2119 	// is not wired.
2120 	AddressSpaceWriteLocker locker;
2121 	do {
2122 		if (locker.SetTo(team) != B_OK)
2123 			return B_BAD_TEAM_ID;
2124 	} while (addressSpec == B_EXACT_ADDRESS
2125 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2126 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2127 			(addr_t)*address, size, &locker));
2128 
2129 	// create a null cache
2130 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2131 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2132 	VMCache* cache;
2133 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2134 	if (status != B_OK)
2135 		return status;
2136 
2137 	cache->temporary = 1;
2138 	cache->virtual_end = size;
2139 
2140 	cache->Lock();
2141 
2142 	VMArea* area;
2143 	virtual_address_restrictions addressRestrictions = {};
2144 	addressRestrictions.address = *address;
2145 	addressRestrictions.address_specification = addressSpec;
2146 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2147 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2148 		REGION_NO_PRIVATE_MAP, flags,
2149 		&addressRestrictions, true, &area, address);
2150 
2151 	if (status < B_OK) {
2152 		cache->ReleaseRefAndUnlock();
2153 		return status;
2154 	}
2155 
2156 	cache->Unlock();
2157 
2158 	area->cache_type = CACHE_TYPE_NULL;
2159 	return area->id;
2160 }
2161 
2162 
2163 /*!	Creates the vnode cache for the specified \a vnode.
2164 	The vnode has to be marked busy when calling this function.
2165 */
2166 status_t
2167 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2168 {
2169 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2170 }
2171 
2172 
2173 /*!	\a cache must be locked. The area's address space must be read-locked.
2174 */
2175 static void
2176 pre_map_area_pages(VMArea* area, VMCache* cache,
2177 	vm_page_reservation* reservation, int32 maxCount)
2178 {
2179 	addr_t baseAddress = area->Base();
2180 	addr_t cacheOffset = area->cache_offset;
2181 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2182 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2183 
2184 	VMCachePagesTree::Iterator it = cache->pages.GetIterator(firstPage, true, true);
2185 	vm_page* page;
2186 	while ((page = it.Next()) != NULL && maxCount > 0) {
2187 		if (page->cache_offset >= endPage)
2188 			break;
2189 
2190 		// skip busy and inactive pages
2191 		if (page->busy || (page->usage_count == 0 && !page->accessed))
2192 			continue;
2193 
2194 		DEBUG_PAGE_ACCESS_START(page);
2195 		map_page(area, page,
2196 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2197 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2198 		maxCount--;
2199 		DEBUG_PAGE_ACCESS_END(page);
2200 	}
2201 }
2202 
2203 
2204 /*!	Will map the file specified by \a fd to an area in memory.
2205 	The file will be mirrored beginning at the specified \a offset. The
2206 	\a offset and \a size arguments have to be page aligned.
2207 */
2208 static area_id
2209 _vm_map_file(team_id team, const char* name, void** _address,
2210 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2211 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2212 {
2213 	// TODO: for binary files, we want to make sure that they get the
2214 	//	copy of a file at a given time, ie. later changes should not
2215 	//	make it into the mapped copy -- this will need quite some changes
2216 	//	to be done in a nice way
2217 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2218 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2219 
2220 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2221 	size = PAGE_ALIGN(size);
2222 
2223 	if (mapping == REGION_NO_PRIVATE_MAP)
2224 		protection |= B_SHARED_AREA;
2225 	if (addressSpec != B_EXACT_ADDRESS)
2226 		unmapAddressRange = false;
2227 
2228 	uint32 mappingFlags = 0;
2229 	if (unmapAddressRange)
2230 		mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2231 
2232 	if (fd < 0) {
2233 		virtual_address_restrictions virtualRestrictions = {};
2234 		virtualRestrictions.address = *_address;
2235 		virtualRestrictions.address_specification = addressSpec;
2236 		physical_address_restrictions physicalRestrictions = {};
2237 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2238 			mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2239 			_address);
2240 	}
2241 
2242 	// get the open flags of the FD
2243 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2244 	if (descriptor == NULL)
2245 		return EBADF;
2246 	int32 openMode = descriptor->open_mode;
2247 	put_fd(descriptor);
2248 
2249 	// The FD must open for reading at any rate. For shared mapping with write
2250 	// access, additionally the FD must be open for writing.
2251 	if ((openMode & O_ACCMODE) == O_WRONLY
2252 		|| (mapping == REGION_NO_PRIVATE_MAP
2253 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2254 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2255 		return EACCES;
2256 	}
2257 
2258 	uint32 protectionMax = 0;
2259 	if (mapping == REGION_NO_PRIVATE_MAP) {
2260 		if ((openMode & O_ACCMODE) == O_RDWR)
2261 			protectionMax = protection | B_USER_PROTECTION;
2262 		else
2263 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2264 	} else if (mapping == REGION_PRIVATE_MAP) {
2265 		// For privately mapped read-only regions, skip committing memory.
2266 		// (If protections are changed later on, memory will be committed then.)
2267 		if ((protection & B_WRITE_AREA) == 0)
2268 			mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2269 	}
2270 
2271 	// get the vnode for the object, this also grabs a ref to it
2272 	struct vnode* vnode = NULL;
2273 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2274 	if (status < B_OK)
2275 		return status;
2276 	VnodePutter vnodePutter(vnode);
2277 
2278 	// If we're going to pre-map pages, we need to reserve the pages needed by
2279 	// the mapping backend upfront.
2280 	page_num_t reservedPreMapPages = 0;
2281 	vm_page_reservation reservation;
2282 	if ((protection & B_READ_AREA) != 0) {
2283 		AddressSpaceWriteLocker locker;
2284 		status = locker.SetTo(team);
2285 		if (status != B_OK)
2286 			return status;
2287 
2288 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2289 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2290 
2291 		locker.Unlock();
2292 
2293 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2294 			team == VMAddressSpace::KernelID()
2295 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2296 	}
2297 
2298 	struct PageUnreserver {
2299 		PageUnreserver(vm_page_reservation* reservation)
2300 			:
2301 			fReservation(reservation)
2302 		{
2303 		}
2304 
2305 		~PageUnreserver()
2306 		{
2307 			if (fReservation != NULL)
2308 				vm_page_unreserve_pages(fReservation);
2309 		}
2310 
2311 		vm_page_reservation* fReservation;
2312 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2313 
2314 	// Lock the address space and, if the specified address range shall be
2315 	// unmapped, ensure it is not wired.
2316 	AddressSpaceWriteLocker locker;
2317 	do {
2318 		if (locker.SetTo(team) != B_OK)
2319 			return B_BAD_TEAM_ID;
2320 	} while (unmapAddressRange
2321 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2322 			(addr_t)*_address, size, &locker));
2323 
2324 	// TODO: this only works for file systems that use the file cache
2325 	VMCache* cache;
2326 	status = vfs_get_vnode_cache(vnode, &cache, false);
2327 	if (status < B_OK)
2328 		return status;
2329 
2330 	cache->Lock();
2331 
2332 	VMArea* area;
2333 	virtual_address_restrictions addressRestrictions = {};
2334 	addressRestrictions.address = *_address;
2335 	addressRestrictions.address_specification = addressSpec;
2336 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2337 		0, protection, protectionMax, mapping, mappingFlags,
2338 		&addressRestrictions, kernel, &area, _address);
2339 
2340 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2341 		// map_backing_store() cannot know we no longer need the ref
2342 		cache->ReleaseRefLocked();
2343 	}
2344 
2345 	if (status == B_OK && (protection & B_READ_AREA) != 0) {
2346 		// Pre-map at most 10MB worth of pages.
2347 		pre_map_area_pages(area, cache, &reservation,
2348 			(10LL * 1024 * 1024) / B_PAGE_SIZE);
2349 	}
2350 
2351 	cache->Unlock();
2352 
2353 	if (status == B_OK) {
2354 		// TODO: this probably deserves a smarter solution, e.g. probably
2355 		// trigger prefetch somewhere else.
2356 
2357 		// Prefetch at most 10MB starting from "offset", but only if the cache
2358 		// doesn't already contain more pages than the prefetch size.
2359 		const size_t prefetch = min_c(size, 10LL * 1024 * 1024);
2360 		if (cache->page_count < (prefetch / B_PAGE_SIZE))
2361 			cache_prefetch_vnode(vnode, offset, prefetch);
2362 	}
2363 
2364 	if (status != B_OK)
2365 		return status;
2366 
2367 	area->cache_type = CACHE_TYPE_VNODE;
2368 	return area->id;
2369 }
2370 
2371 
2372 area_id
2373 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2374 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2375 	int fd, off_t offset)
2376 {
2377 	if (!arch_vm_supports_protection(protection))
2378 		return B_NOT_SUPPORTED;
2379 
2380 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2381 		mapping, unmapAddressRange, fd, offset, true);
2382 }
2383 
2384 
2385 VMCache*
2386 vm_area_get_locked_cache(VMArea* area)
2387 {
2388 	rw_lock_read_lock(&sAreaCacheLock);
2389 
2390 	while (true) {
2391 		VMCache* cache = area->cache;
2392 
2393 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2394 			// cache has been deleted
2395 			rw_lock_read_lock(&sAreaCacheLock);
2396 			continue;
2397 		}
2398 
2399 		rw_lock_read_lock(&sAreaCacheLock);
2400 
2401 		if (cache == area->cache) {
2402 			cache->AcquireRefLocked();
2403 			rw_lock_read_unlock(&sAreaCacheLock);
2404 			return cache;
2405 		}
2406 
2407 		// the cache changed in the meantime
2408 		cache->Unlock();
2409 	}
2410 }
2411 
2412 
2413 void
2414 vm_area_put_locked_cache(VMCache* cache)
2415 {
2416 	cache->ReleaseRefAndUnlock();
2417 }
2418 
2419 
2420 area_id
2421 vm_clone_area(team_id team, const char* name, void** address,
2422 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2423 	bool kernel)
2424 {
2425 	VMArea* newArea = NULL;
2426 	VMArea* sourceArea;
2427 
2428 	// Check whether the source area exists and is cloneable. If so, mark it
2429 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2430 	{
2431 		AddressSpaceWriteLocker locker;
2432 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2433 		if (status != B_OK)
2434 			return status;
2435 
2436 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2437 			return B_NOT_ALLOWED;
2438 
2439 		sourceArea->protection |= B_SHARED_AREA;
2440 		protection |= B_SHARED_AREA;
2441 	}
2442 
2443 	// Now lock both address spaces and actually do the cloning.
2444 
2445 	MultiAddressSpaceLocker locker;
2446 	VMAddressSpace* sourceAddressSpace;
2447 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2448 	if (status != B_OK)
2449 		return status;
2450 
2451 	VMAddressSpace* targetAddressSpace;
2452 	status = locker.AddTeam(team, true, &targetAddressSpace);
2453 	if (status != B_OK)
2454 		return status;
2455 
2456 	status = locker.Lock();
2457 	if (status != B_OK)
2458 		return status;
2459 
2460 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2461 	if (sourceArea == NULL)
2462 		return B_BAD_VALUE;
2463 
2464 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2465 		return B_NOT_ALLOWED;
2466 
2467 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2468 
2469 	if (!kernel && sourceAddressSpace != targetAddressSpace
2470 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2471 #if KDEBUG
2472 		Team* team = thread_get_current_thread()->team;
2473 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2474 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2475 #endif
2476 		status = B_NOT_ALLOWED;
2477 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2478 		status = B_NOT_ALLOWED;
2479 	} else {
2480 		virtual_address_restrictions addressRestrictions = {};
2481 		addressRestrictions.address = *address;
2482 		addressRestrictions.address_specification = addressSpec;
2483 		status = map_backing_store(targetAddressSpace, cache,
2484 			sourceArea->cache_offset, name, sourceArea->Size(),
2485 			sourceArea->wiring, protection, sourceArea->protection_max,
2486 			mapping, 0, &addressRestrictions,
2487 			kernel, &newArea, address);
2488 	}
2489 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2490 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2491 		// to create a new cache, and has therefore already acquired a reference
2492 		// to the source cache - but otherwise it has no idea that we need
2493 		// one.
2494 		cache->AcquireRefLocked();
2495 	}
2496 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2497 		// we need to map in everything at this point
2498 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2499 			// we don't have actual pages to map but a physical area
2500 			VMTranslationMap* map
2501 				= sourceArea->address_space->TranslationMap();
2502 			map->Lock();
2503 
2504 			phys_addr_t physicalAddress;
2505 			uint32 oldProtection;
2506 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2507 
2508 			map->Unlock();
2509 
2510 			map = targetAddressSpace->TranslationMap();
2511 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2512 				newArea->Base() + (newArea->Size() - 1));
2513 
2514 			vm_page_reservation reservation;
2515 			vm_page_reserve_pages(&reservation, reservePages,
2516 				targetAddressSpace == VMAddressSpace::Kernel()
2517 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2518 			map->Lock();
2519 
2520 			for (addr_t offset = 0; offset < newArea->Size();
2521 					offset += B_PAGE_SIZE) {
2522 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2523 					protection, newArea->MemoryType(), &reservation);
2524 			}
2525 
2526 			map->Unlock();
2527 			vm_page_unreserve_pages(&reservation);
2528 		} else {
2529 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2530 			size_t reservePages = map->MaxPagesNeededToMap(
2531 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2532 			vm_page_reservation reservation;
2533 			vm_page_reserve_pages(&reservation, reservePages,
2534 				targetAddressSpace == VMAddressSpace::Kernel()
2535 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2536 
2537 			// map in all pages from source
2538 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2539 					vm_page* page  = it.Next();) {
2540 				if (!page->busy) {
2541 					DEBUG_PAGE_ACCESS_START(page);
2542 					map_page(newArea, page,
2543 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2544 							- newArea->cache_offset),
2545 						protection, &reservation);
2546 					DEBUG_PAGE_ACCESS_END(page);
2547 				}
2548 			}
2549 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2550 			// ensuring that!
2551 
2552 			vm_page_unreserve_pages(&reservation);
2553 		}
2554 	}
2555 	if (status == B_OK)
2556 		newArea->cache_type = sourceArea->cache_type;
2557 
2558 	vm_area_put_locked_cache(cache);
2559 
2560 	if (status < B_OK)
2561 		return status;
2562 
2563 	return newArea->id;
2564 }
2565 
2566 
2567 /*!	Deletes the specified area of the given address space.
2568 
2569 	The address space must be write-locked.
2570 	The caller must ensure that the area does not have any wired ranges.
2571 
2572 	\param addressSpace The address space containing the area.
2573 	\param area The area to be deleted.
2574 	\param deletingAddressSpace \c true, if the address space is in the process
2575 		of being deleted.
2576 	\param alreadyRemoved \c true, if the area was already removed from the global
2577 		areas map (and thus had its ID deallocated.)
2578 */
2579 static void
2580 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2581 	bool deletingAddressSpace, bool alreadyRemoved)
2582 {
2583 	ASSERT(!area->IsWired());
2584 
2585 	if (area->id >= 0 && !alreadyRemoved)
2586 		VMAreas::Remove(area);
2587 
2588 	// At this point the area is removed from the global hash table, but
2589 	// still exists in the area list.
2590 
2591 	// Unmap the virtual address space the area occupied.
2592 	{
2593 		// We need to lock the complete cache chain.
2594 		VMCache* topCache = vm_area_get_locked_cache(area);
2595 		VMCacheChainLocker cacheChainLocker(topCache);
2596 		cacheChainLocker.LockAllSourceCaches();
2597 
2598 		// If the area's top cache is a temporary cache and the area is the only
2599 		// one referencing it (besides us currently holding a second reference),
2600 		// the unmapping code doesn't need to care about preserving the accessed
2601 		// and dirty flags of the top cache page mappings.
2602 		bool ignoreTopCachePageFlags
2603 			= topCache->temporary && topCache->RefCount() == 2;
2604 
2605 		area->address_space->TranslationMap()->UnmapArea(area,
2606 			deletingAddressSpace, ignoreTopCachePageFlags);
2607 	}
2608 
2609 	if (!area->cache->temporary)
2610 		area->cache->WriteModified();
2611 
2612 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2613 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2614 
2615 	arch_vm_unset_memory_type(area);
2616 	addressSpace->RemoveArea(area, allocationFlags);
2617 	addressSpace->Put();
2618 
2619 	area->cache->RemoveArea(area);
2620 	area->cache->ReleaseRef();
2621 
2622 	addressSpace->DeleteArea(area, allocationFlags);
2623 }
2624 
2625 
2626 status_t
2627 vm_delete_area(team_id team, area_id id, bool kernel)
2628 {
2629 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2630 		team, id));
2631 
2632 	// lock the address space and make sure the area isn't wired
2633 	AddressSpaceWriteLocker locker;
2634 	VMArea* area;
2635 	AreaCacheLocker cacheLocker;
2636 
2637 	do {
2638 		status_t status = locker.SetFromArea(team, id, area);
2639 		if (status != B_OK)
2640 			return status;
2641 
2642 		cacheLocker.SetTo(area);
2643 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2644 
2645 	cacheLocker.Unlock();
2646 
2647 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2648 		return B_NOT_ALLOWED;
2649 
2650 	delete_area(locker.AddressSpace(), area, false);
2651 	return B_OK;
2652 }
2653 
2654 
2655 /*!	Creates a new cache on top of given cache, moves all areas from
2656 	the old cache to the new one, and changes the protection of all affected
2657 	areas' pages to read-only. If requested, wired pages are moved up to the
2658 	new cache and copies are added to the old cache in their place.
2659 	Preconditions:
2660 	- The given cache must be locked.
2661 	- All of the cache's areas' address spaces must be read locked.
2662 	- Either the cache must not have any wired ranges or a page reservation for
2663 	  all wired pages must be provided, so they can be copied.
2664 
2665 	\param lowerCache The cache on top of which a new cache shall be created.
2666 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2667 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2668 		has wired page. The wired pages are copied in this case.
2669 */
2670 static status_t
2671 vm_copy_on_write_area(VMCache* lowerCache,
2672 	vm_page_reservation* wiredPagesReservation)
2673 {
2674 	VMCache* upperCache;
2675 
2676 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2677 
2678 	// We need to separate the cache from its areas. The cache goes one level
2679 	// deeper and we create a new cache inbetween.
2680 
2681 	// create an anonymous cache
2682 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2683 		lowerCache->GuardSize() / B_PAGE_SIZE,
2684 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2685 		VM_PRIORITY_USER);
2686 	if (status != B_OK)
2687 		return status;
2688 
2689 	upperCache->Lock();
2690 
2691 	upperCache->temporary = 1;
2692 	upperCache->virtual_base = lowerCache->virtual_base;
2693 	upperCache->virtual_end = lowerCache->virtual_end;
2694 
2695 	// transfer the lower cache areas to the upper cache
2696 	rw_lock_write_lock(&sAreaCacheLock);
2697 	upperCache->TransferAreas(lowerCache);
2698 	rw_lock_write_unlock(&sAreaCacheLock);
2699 
2700 	lowerCache->AddConsumer(upperCache);
2701 
2702 	// We now need to remap all pages from all of the cache's areas read-only,
2703 	// so that a copy will be created on next write access. If there are wired
2704 	// pages, we keep their protection, move them to the upper cache and create
2705 	// copies for the lower cache.
2706 	if (wiredPagesReservation != NULL) {
2707 		// We need to handle wired pages -- iterate through the cache's pages.
2708 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2709 				vm_page* page = it.Next();) {
2710 			if (page->WiredCount() > 0) {
2711 				// allocate a new page and copy the wired one
2712 				vm_page* copiedPage = vm_page_allocate_page(
2713 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2714 
2715 				vm_memcpy_physical_page(
2716 					copiedPage->physical_page_number * B_PAGE_SIZE,
2717 					page->physical_page_number * B_PAGE_SIZE);
2718 
2719 				// move the wired page to the upper cache (note: removing is OK
2720 				// with the SplayTree iterator) and insert the copy
2721 				upperCache->MovePage(page);
2722 				lowerCache->InsertPage(copiedPage,
2723 					page->cache_offset * B_PAGE_SIZE);
2724 
2725 				DEBUG_PAGE_ACCESS_END(copiedPage);
2726 			} else {
2727 				// Change the protection of this page in all areas.
2728 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2729 						tempArea = tempArea->cache_next) {
2730 					if (!is_page_in_area(tempArea, page))
2731 						continue;
2732 
2733 					// The area must be readable in the same way it was
2734 					// previously writable.
2735 					addr_t address = virtual_page_address(tempArea, page);
2736 					uint32 protection = 0;
2737 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2738 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2739 						protection |= B_KERNEL_READ_AREA;
2740 					if ((pageProtection & B_READ_AREA) != 0)
2741 						protection |= B_READ_AREA;
2742 
2743 					VMTranslationMap* map
2744 						= tempArea->address_space->TranslationMap();
2745 					map->Lock();
2746 					map->ProtectPage(tempArea, address, protection);
2747 					map->Unlock();
2748 				}
2749 			}
2750 		}
2751 	} else {
2752 		ASSERT(lowerCache->WiredPagesCount() == 0);
2753 
2754 		// just change the protection of all areas
2755 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2756 				tempArea = tempArea->cache_next) {
2757 			if (tempArea->page_protections != NULL) {
2758 				// Change the protection of all pages in this area.
2759 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2760 				map->Lock();
2761 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2762 					vm_page* page = it.Next();) {
2763 					if (!is_page_in_area(tempArea, page))
2764 						continue;
2765 
2766 					// The area must be readable in the same way it was
2767 					// previously writable.
2768 					addr_t address = virtual_page_address(tempArea, page);
2769 					uint32 protection = 0;
2770 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2771 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2772 						protection |= B_KERNEL_READ_AREA;
2773 					if ((pageProtection & B_READ_AREA) != 0)
2774 						protection |= B_READ_AREA;
2775 
2776 					map->ProtectPage(tempArea, address, protection);
2777 				}
2778 				map->Unlock();
2779 				continue;
2780 			}
2781 			// The area must be readable in the same way it was previously
2782 			// writable.
2783 			uint32 protection = 0;
2784 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2785 				protection |= B_KERNEL_READ_AREA;
2786 			if ((tempArea->protection & B_READ_AREA) != 0)
2787 				protection |= B_READ_AREA;
2788 
2789 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2790 			map->Lock();
2791 			map->ProtectArea(tempArea, protection);
2792 			map->Unlock();
2793 		}
2794 	}
2795 
2796 	vm_area_put_locked_cache(upperCache);
2797 
2798 	return B_OK;
2799 }
2800 
2801 
2802 area_id
2803 vm_copy_area(team_id team, const char* name, void** _address,
2804 	uint32 addressSpec, area_id sourceID)
2805 {
2806 	// Do the locking: target address space, all address spaces associated with
2807 	// the source cache, and the cache itself.
2808 	MultiAddressSpaceLocker locker;
2809 	VMAddressSpace* targetAddressSpace;
2810 	VMCache* cache;
2811 	VMArea* source;
2812 	AreaCacheLocker cacheLocker;
2813 	status_t status;
2814 	bool sharedArea;
2815 
2816 	page_num_t wiredPages = 0;
2817 	vm_page_reservation wiredPagesReservation;
2818 
2819 	bool restart;
2820 	do {
2821 		restart = false;
2822 
2823 		locker.Unset();
2824 		status = locker.AddTeam(team, true, &targetAddressSpace);
2825 		if (status == B_OK) {
2826 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2827 				&cache);
2828 		}
2829 		if (status != B_OK)
2830 			return status;
2831 
2832 		cacheLocker.SetTo(cache, true);	// already locked
2833 
2834 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2835 
2836 		page_num_t oldWiredPages = wiredPages;
2837 		wiredPages = 0;
2838 
2839 		// If the source area isn't shared, count the number of wired pages in
2840 		// the cache and reserve as many pages.
2841 		if (!sharedArea) {
2842 			wiredPages = cache->WiredPagesCount();
2843 
2844 			if (wiredPages > oldWiredPages) {
2845 				cacheLocker.Unlock();
2846 				locker.Unlock();
2847 
2848 				if (oldWiredPages > 0)
2849 					vm_page_unreserve_pages(&wiredPagesReservation);
2850 
2851 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2852 					VM_PRIORITY_USER);
2853 
2854 				restart = true;
2855 			}
2856 		} else if (oldWiredPages > 0)
2857 			vm_page_unreserve_pages(&wiredPagesReservation);
2858 	} while (restart);
2859 
2860 	// unreserve pages later
2861 	struct PagesUnreserver {
2862 		PagesUnreserver(vm_page_reservation* reservation)
2863 			:
2864 			fReservation(reservation)
2865 		{
2866 		}
2867 
2868 		~PagesUnreserver()
2869 		{
2870 			if (fReservation != NULL)
2871 				vm_page_unreserve_pages(fReservation);
2872 		}
2873 
2874 	private:
2875 		vm_page_reservation*	fReservation;
2876 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2877 
2878 	bool writableCopy
2879 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2880 	uint8* targetPageProtections = NULL;
2881 
2882 	if (source->page_protections != NULL) {
2883 		size_t bytes = area_page_protections_size(source->Size());
2884 		targetPageProtections = (uint8*)malloc_etc(bytes,
2885 			(source->address_space == VMAddressSpace::Kernel()
2886 					|| targetAddressSpace == VMAddressSpace::Kernel())
2887 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2888 		if (targetPageProtections == NULL)
2889 			return B_NO_MEMORY;
2890 
2891 		memcpy(targetPageProtections, source->page_protections, bytes);
2892 
2893 		if (!writableCopy) {
2894 			for (size_t i = 0; i < bytes; i++) {
2895 				if ((targetPageProtections[i]
2896 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2897 					writableCopy = true;
2898 					break;
2899 				}
2900 			}
2901 		}
2902 	}
2903 
2904 	if (addressSpec == B_CLONE_ADDRESS) {
2905 		addressSpec = B_EXACT_ADDRESS;
2906 		*_address = (void*)source->Base();
2907 	}
2908 
2909 	// First, create a cache on top of the source area, respectively use the
2910 	// existing one, if this is a shared area.
2911 
2912 	VMArea* target;
2913 	virtual_address_restrictions addressRestrictions = {};
2914 	addressRestrictions.address = *_address;
2915 	addressRestrictions.address_specification = addressSpec;
2916 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2917 		name, source->Size(), source->wiring, source->protection,
2918 		source->protection_max,
2919 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2920 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2921 		&addressRestrictions, true, &target, _address);
2922 	if (status < B_OK) {
2923 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2924 		return status;
2925 	}
2926 
2927 	if (targetPageProtections != NULL)
2928 		target->page_protections = targetPageProtections;
2929 
2930 	if (sharedArea) {
2931 		// The new area uses the old area's cache, but map_backing_store()
2932 		// hasn't acquired a ref. So we have to do that now.
2933 		cache->AcquireRefLocked();
2934 	}
2935 
2936 	// If the source area is writable, we need to move it one layer up as well
2937 
2938 	if (!sharedArea) {
2939 		if (writableCopy) {
2940 			// TODO: do something more useful if this fails!
2941 			if (vm_copy_on_write_area(cache,
2942 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2943 				panic("vm_copy_on_write_area() failed!\n");
2944 			}
2945 		}
2946 	}
2947 
2948 	// we return the ID of the newly created area
2949 	return target->id;
2950 }
2951 
2952 
2953 status_t
2954 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2955 	bool kernel)
2956 {
2957 	fix_protection(&newProtection);
2958 
2959 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2960 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2961 
2962 	if (!arch_vm_supports_protection(newProtection))
2963 		return B_NOT_SUPPORTED;
2964 
2965 	bool becomesWritable
2966 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2967 
2968 	// lock address spaces and cache
2969 	MultiAddressSpaceLocker locker;
2970 	VMCache* cache;
2971 	VMArea* area;
2972 	status_t status;
2973 	AreaCacheLocker cacheLocker;
2974 	bool isWritable;
2975 
2976 	bool restart;
2977 	do {
2978 		restart = false;
2979 
2980 		locker.Unset();
2981 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2982 		if (status != B_OK)
2983 			return status;
2984 
2985 		cacheLocker.SetTo(cache, true);	// already locked
2986 
2987 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2988 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2989 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2990 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2991 				" (%s)\n", team, newProtection, areaID, area->name);
2992 			return B_NOT_ALLOWED;
2993 		}
2994 		if (!kernel && area->protection_max != 0
2995 			&& (newProtection & area->protection_max)
2996 				!= (newProtection & B_USER_PROTECTION)) {
2997 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2998 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2999 				"area %" B_PRId32 " (%s)\n", team, newProtection,
3000 				area->protection_max, areaID, area->name);
3001 			return B_NOT_ALLOWED;
3002 		}
3003 
3004 		if (team != VMAddressSpace::KernelID()
3005 			&& area->address_space->ID() != team) {
3006 			// unless you're the kernel, you are only allowed to set
3007 			// the protection of your own areas
3008 			return B_NOT_ALLOWED;
3009 		}
3010 
3011 		if (area->protection == newProtection)
3012 			return B_OK;
3013 
3014 		isWritable
3015 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
3016 
3017 		// Make sure the area (respectively, if we're going to call
3018 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
3019 		// wired ranges.
3020 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
3021 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
3022 					otherArea = otherArea->cache_next) {
3023 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
3024 					restart = true;
3025 					break;
3026 				}
3027 			}
3028 		} else {
3029 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
3030 				restart = true;
3031 		}
3032 	} while (restart);
3033 
3034 	bool changePageProtection = true;
3035 	bool changeTopCachePagesOnly = false;
3036 
3037 	if (isWritable && !becomesWritable) {
3038 		// writable -> !writable
3039 
3040 		if (cache->source != NULL && cache->temporary) {
3041 			if (cache->CountWritableAreas(area) == 0) {
3042 				// Since this cache now lives from the pages in its source cache,
3043 				// we can change the cache's commitment to take only those pages
3044 				// into account that really are in this cache.
3045 
3046 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
3047 					team == VMAddressSpace::KernelID()
3048 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3049 
3050 				// TODO: we may be able to join with our source cache, if
3051 				// count == 0
3052 			}
3053 		}
3054 
3055 		// If only the writability changes, we can just remap the pages of the
3056 		// top cache, since the pages of lower caches are mapped read-only
3057 		// anyway. That's advantageous only, if the number of pages in the cache
3058 		// is significantly smaller than the number of pages in the area,
3059 		// though.
3060 		if (newProtection
3061 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
3062 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
3063 			changeTopCachePagesOnly = true;
3064 		}
3065 	} else if (!isWritable && becomesWritable) {
3066 		// !writable -> writable
3067 
3068 		if (!cache->consumers.IsEmpty()) {
3069 			// There are consumers -- we have to insert a new cache. Fortunately
3070 			// vm_copy_on_write_area() does everything that's needed.
3071 			changePageProtection = false;
3072 			status = vm_copy_on_write_area(cache, NULL);
3073 		} else {
3074 			// No consumers, so we don't need to insert a new one.
3075 			if (cache->source != NULL && cache->temporary) {
3076 				// the cache's commitment must contain all possible pages
3077 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
3078 					team == VMAddressSpace::KernelID()
3079 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3080 			}
3081 
3082 			if (status == B_OK && cache->source != NULL) {
3083 				// There's a source cache, hence we can't just change all pages'
3084 				// protection or we might allow writing into pages belonging to
3085 				// a lower cache.
3086 				changeTopCachePagesOnly = true;
3087 			}
3088 		}
3089 	} else {
3090 		// we don't have anything special to do in all other cases
3091 	}
3092 
3093 	if (status == B_OK) {
3094 		// remap existing pages in this cache
3095 		if (changePageProtection) {
3096 			VMTranslationMap* map = area->address_space->TranslationMap();
3097 			map->Lock();
3098 
3099 			if (changeTopCachePagesOnly) {
3100 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3101 				page_num_t lastPageOffset
3102 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3103 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3104 						vm_page* page = it.Next();) {
3105 					if (page->cache_offset >= firstPageOffset
3106 						&& page->cache_offset <= lastPageOffset) {
3107 						addr_t address = virtual_page_address(area, page);
3108 						map->ProtectPage(area, address, newProtection);
3109 					}
3110 				}
3111 			} else
3112 				map->ProtectArea(area, newProtection);
3113 
3114 			map->Unlock();
3115 		}
3116 
3117 		area->protection = newProtection;
3118 	}
3119 
3120 	return status;
3121 }
3122 
3123 
3124 status_t
3125 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3126 {
3127 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3128 	if (addressSpace == NULL)
3129 		return B_BAD_TEAM_ID;
3130 
3131 	VMTranslationMap* map = addressSpace->TranslationMap();
3132 
3133 	map->Lock();
3134 	uint32 dummyFlags;
3135 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3136 	map->Unlock();
3137 
3138 	addressSpace->Put();
3139 	return status;
3140 }
3141 
3142 
3143 /*!	The page's cache must be locked.
3144 */
3145 bool
3146 vm_test_map_modification(vm_page* page)
3147 {
3148 	if (page->modified)
3149 		return true;
3150 
3151 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3152 	vm_page_mapping* mapping;
3153 	while ((mapping = iterator.Next()) != NULL) {
3154 		VMArea* area = mapping->area;
3155 		VMTranslationMap* map = area->address_space->TranslationMap();
3156 
3157 		phys_addr_t physicalAddress;
3158 		uint32 flags;
3159 		map->Lock();
3160 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3161 		map->Unlock();
3162 
3163 		if ((flags & PAGE_MODIFIED) != 0)
3164 			return true;
3165 	}
3166 
3167 	return false;
3168 }
3169 
3170 
3171 /*!	The page's cache must be locked.
3172 */
3173 void
3174 vm_clear_map_flags(vm_page* page, uint32 flags)
3175 {
3176 	if ((flags & PAGE_ACCESSED) != 0)
3177 		page->accessed = false;
3178 	if ((flags & PAGE_MODIFIED) != 0)
3179 		page->modified = false;
3180 
3181 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3182 	vm_page_mapping* mapping;
3183 	while ((mapping = iterator.Next()) != NULL) {
3184 		VMArea* area = mapping->area;
3185 		VMTranslationMap* map = area->address_space->TranslationMap();
3186 
3187 		map->Lock();
3188 		map->ClearFlags(virtual_page_address(area, page), flags);
3189 		map->Unlock();
3190 	}
3191 }
3192 
3193 
3194 /*!	Removes all mappings from a page.
3195 	After you've called this function, the page is unmapped from memory and
3196 	the page's \c accessed and \c modified flags have been updated according
3197 	to the state of the mappings.
3198 	The page's cache must be locked.
3199 */
3200 void
3201 vm_remove_all_page_mappings(vm_page* page)
3202 {
3203 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3204 		VMArea* area = mapping->area;
3205 		VMTranslationMap* map = area->address_space->TranslationMap();
3206 		addr_t address = virtual_page_address(area, page);
3207 		map->UnmapPage(area, address, false);
3208 	}
3209 }
3210 
3211 
3212 int32
3213 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3214 {
3215 	int32 count = 0;
3216 
3217 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3218 	vm_page_mapping* mapping;
3219 	while ((mapping = iterator.Next()) != NULL) {
3220 		VMArea* area = mapping->area;
3221 		VMTranslationMap* map = area->address_space->TranslationMap();
3222 
3223 		bool modified;
3224 		if (map->ClearAccessedAndModified(area,
3225 				virtual_page_address(area, page), false, modified)) {
3226 			count++;
3227 		}
3228 
3229 		page->modified |= modified;
3230 	}
3231 
3232 
3233 	if (page->accessed) {
3234 		count++;
3235 		page->accessed = false;
3236 	}
3237 
3238 	return count;
3239 }
3240 
3241 
3242 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3243 	mappings.
3244 	The function iterates through the page mappings and removes them until
3245 	encountering one that has been accessed. From then on it will continue to
3246 	iterate, but only clear the accessed flag of the mapping. The page's
3247 	\c modified bit will be updated accordingly, the \c accessed bit will be
3248 	cleared.
3249 	\return The number of mapping accessed bits encountered, including the
3250 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3251 		of the page have been removed.
3252 */
3253 int32
3254 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3255 {
3256 	ASSERT(page->WiredCount() == 0);
3257 
3258 	if (page->accessed)
3259 		return vm_clear_page_mapping_accessed_flags(page);
3260 
3261 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3262 		VMArea* area = mapping->area;
3263 		VMTranslationMap* map = area->address_space->TranslationMap();
3264 		addr_t address = virtual_page_address(area, page);
3265 		bool modified = false;
3266 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3267 			page->accessed = true;
3268 			page->modified |= modified;
3269 			return vm_clear_page_mapping_accessed_flags(page);
3270 		}
3271 		page->modified |= modified;
3272 	}
3273 
3274 	return 0;
3275 }
3276 
3277 
3278 static int
3279 display_mem(int argc, char** argv)
3280 {
3281 	bool physical = false;
3282 	addr_t copyAddress;
3283 	int32 displayWidth;
3284 	int32 itemSize;
3285 	int32 num = -1;
3286 	addr_t address;
3287 	int i = 1, j;
3288 
3289 	if (argc > 1 && argv[1][0] == '-') {
3290 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
3291 			physical = true;
3292 			i++;
3293 		} else
3294 			i = 99;
3295 	}
3296 
3297 	if (argc < i + 1 || argc > i + 2) {
3298 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
3299 			"\tdl - 8 bytes\n"
3300 			"\tdw - 4 bytes\n"
3301 			"\tds - 2 bytes\n"
3302 			"\tdb - 1 byte\n"
3303 			"\tstring - a whole string\n"
3304 			"  -p or --physical only allows memory from a single page to be "
3305 			"displayed.\n");
3306 		return 0;
3307 	}
3308 
3309 	address = parse_expression(argv[i]);
3310 
3311 	if (argc > i + 1)
3312 		num = parse_expression(argv[i + 1]);
3313 
3314 	// build the format string
3315 	if (strcmp(argv[0], "db") == 0) {
3316 		itemSize = 1;
3317 		displayWidth = 16;
3318 	} else if (strcmp(argv[0], "ds") == 0) {
3319 		itemSize = 2;
3320 		displayWidth = 8;
3321 	} else if (strcmp(argv[0], "dw") == 0) {
3322 		itemSize = 4;
3323 		displayWidth = 4;
3324 	} else if (strcmp(argv[0], "dl") == 0) {
3325 		itemSize = 8;
3326 		displayWidth = 2;
3327 	} else if (strcmp(argv[0], "string") == 0) {
3328 		itemSize = 1;
3329 		displayWidth = -1;
3330 	} else {
3331 		kprintf("display_mem called in an invalid way!\n");
3332 		return 0;
3333 	}
3334 
3335 	if (num <= 0)
3336 		num = displayWidth;
3337 
3338 	void* physicalPageHandle = NULL;
3339 
3340 	if (physical) {
3341 		int32 offset = address & (B_PAGE_SIZE - 1);
3342 		if (num * itemSize + offset > B_PAGE_SIZE) {
3343 			num = (B_PAGE_SIZE - offset) / itemSize;
3344 			kprintf("NOTE: number of bytes has been cut to page size\n");
3345 		}
3346 
3347 		address = ROUNDDOWN(address, B_PAGE_SIZE);
3348 
3349 		if (vm_get_physical_page_debug(address, &copyAddress,
3350 				&physicalPageHandle) != B_OK) {
3351 			kprintf("getting the hardware page failed.");
3352 			return 0;
3353 		}
3354 
3355 		address += offset;
3356 		copyAddress += offset;
3357 	} else
3358 		copyAddress = address;
3359 
3360 	if (!strcmp(argv[0], "string")) {
3361 		kprintf("%p \"", (char*)copyAddress);
3362 
3363 		// string mode
3364 		for (i = 0; true; i++) {
3365 			char c;
3366 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
3367 					!= B_OK
3368 				|| c == '\0') {
3369 				break;
3370 			}
3371 
3372 			if (c == '\n')
3373 				kprintf("\\n");
3374 			else if (c == '\t')
3375 				kprintf("\\t");
3376 			else {
3377 				if (!isprint(c))
3378 					c = '.';
3379 
3380 				kprintf("%c", c);
3381 			}
3382 		}
3383 
3384 		kprintf("\"\n");
3385 	} else {
3386 		// number mode
3387 		for (i = 0; i < num; i++) {
3388 			uint64 value;
3389 
3390 			if ((i % displayWidth) == 0) {
3391 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
3392 				if (i != 0)
3393 					kprintf("\n");
3394 
3395 				kprintf("[0x%lx]  ", address + i * itemSize);
3396 
3397 				for (j = 0; j < displayed; j++) {
3398 					char c;
3399 					if (debug_memcpy(B_CURRENT_TEAM, &c,
3400 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
3401 						displayed = j;
3402 						break;
3403 					}
3404 					if (!isprint(c))
3405 						c = '.';
3406 
3407 					kprintf("%c", c);
3408 				}
3409 				if (num > displayWidth) {
3410 					// make sure the spacing in the last line is correct
3411 					for (j = displayed; j < displayWidth * itemSize; j++)
3412 						kprintf(" ");
3413 				}
3414 				kprintf("  ");
3415 			}
3416 
3417 			if (debug_memcpy(B_CURRENT_TEAM, &value,
3418 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
3419 				kprintf("read fault");
3420 				break;
3421 			}
3422 
3423 			switch (itemSize) {
3424 				case 1:
3425 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
3426 					break;
3427 				case 2:
3428 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3429 					break;
3430 				case 4:
3431 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3432 					break;
3433 				case 8:
3434 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3435 					break;
3436 			}
3437 		}
3438 
3439 		kprintf("\n");
3440 	}
3441 
3442 	if (physical) {
3443 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3444 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3445 	}
3446 	return 0;
3447 }
3448 
3449 
3450 static void
3451 dump_cache_tree_recursively(VMCache* cache, int level,
3452 	VMCache* highlightCache)
3453 {
3454 	// print this cache
3455 	for (int i = 0; i < level; i++)
3456 		kprintf("  ");
3457 	if (cache == highlightCache)
3458 		kprintf("%p <--\n", cache);
3459 	else
3460 		kprintf("%p\n", cache);
3461 
3462 	// recursively print its consumers
3463 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3464 			VMCache* consumer = it.Next();) {
3465 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3466 	}
3467 }
3468 
3469 
3470 static int
3471 dump_cache_tree(int argc, char** argv)
3472 {
3473 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3474 		kprintf("usage: %s <address>\n", argv[0]);
3475 		return 0;
3476 	}
3477 
3478 	addr_t address = parse_expression(argv[1]);
3479 	if (address == 0)
3480 		return 0;
3481 
3482 	VMCache* cache = (VMCache*)address;
3483 	VMCache* root = cache;
3484 
3485 	// find the root cache (the transitive source)
3486 	while (root->source != NULL)
3487 		root = root->source;
3488 
3489 	dump_cache_tree_recursively(root, 0, cache);
3490 
3491 	return 0;
3492 }
3493 
3494 
3495 const char*
3496 vm_cache_type_to_string(int32 type)
3497 {
3498 	switch (type) {
3499 		case CACHE_TYPE_RAM:
3500 			return "RAM";
3501 		case CACHE_TYPE_DEVICE:
3502 			return "device";
3503 		case CACHE_TYPE_VNODE:
3504 			return "vnode";
3505 		case CACHE_TYPE_NULL:
3506 			return "null";
3507 
3508 		default:
3509 			return "unknown";
3510 	}
3511 }
3512 
3513 
3514 #if DEBUG_CACHE_LIST
3515 
3516 static void
3517 update_cache_info_recursively(VMCache* cache, cache_info& info)
3518 {
3519 	info.page_count += cache->page_count;
3520 	if (cache->type == CACHE_TYPE_RAM)
3521 		info.committed += cache->committed_size;
3522 
3523 	// recurse
3524 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3525 			VMCache* consumer = it.Next();) {
3526 		update_cache_info_recursively(consumer, info);
3527 	}
3528 }
3529 
3530 
3531 static int
3532 cache_info_compare_page_count(const void* _a, const void* _b)
3533 {
3534 	const cache_info* a = (const cache_info*)_a;
3535 	const cache_info* b = (const cache_info*)_b;
3536 	if (a->page_count == b->page_count)
3537 		return 0;
3538 	return a->page_count < b->page_count ? 1 : -1;
3539 }
3540 
3541 
3542 static int
3543 cache_info_compare_committed(const void* _a, const void* _b)
3544 {
3545 	const cache_info* a = (const cache_info*)_a;
3546 	const cache_info* b = (const cache_info*)_b;
3547 	if (a->committed == b->committed)
3548 		return 0;
3549 	return a->committed < b->committed ? 1 : -1;
3550 }
3551 
3552 
3553 static void
3554 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3555 {
3556 	for (int i = 0; i < level; i++)
3557 		kprintf("  ");
3558 
3559 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3560 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3561 		cache->virtual_base, cache->virtual_end, cache->page_count);
3562 
3563 	if (level == 0)
3564 		kprintf("/%lu", info.page_count);
3565 
3566 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3567 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3568 
3569 		if (level == 0)
3570 			kprintf("/%lu", info.committed);
3571 	}
3572 
3573 	// areas
3574 	if (cache->areas != NULL) {
3575 		VMArea* area = cache->areas;
3576 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3577 			area->name, area->address_space->ID());
3578 
3579 		while (area->cache_next != NULL) {
3580 			area = area->cache_next;
3581 			kprintf(", %" B_PRId32, area->id);
3582 		}
3583 	}
3584 
3585 	kputs("\n");
3586 
3587 	// recurse
3588 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3589 			VMCache* consumer = it.Next();) {
3590 		dump_caches_recursively(consumer, info, level + 1);
3591 	}
3592 }
3593 
3594 
3595 static int
3596 dump_caches(int argc, char** argv)
3597 {
3598 	if (sCacheInfoTable == NULL) {
3599 		kprintf("No cache info table!\n");
3600 		return 0;
3601 	}
3602 
3603 	bool sortByPageCount = true;
3604 
3605 	for (int32 i = 1; i < argc; i++) {
3606 		if (strcmp(argv[i], "-c") == 0) {
3607 			sortByPageCount = false;
3608 		} else {
3609 			print_debugger_command_usage(argv[0]);
3610 			return 0;
3611 		}
3612 	}
3613 
3614 	uint32 totalCount = 0;
3615 	uint32 rootCount = 0;
3616 	off_t totalCommitted = 0;
3617 	page_num_t totalPages = 0;
3618 
3619 	VMCache* cache = gDebugCacheList;
3620 	while (cache) {
3621 		totalCount++;
3622 		if (cache->source == NULL) {
3623 			cache_info stackInfo;
3624 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3625 				? sCacheInfoTable[rootCount] : stackInfo;
3626 			rootCount++;
3627 			info.cache = cache;
3628 			info.page_count = 0;
3629 			info.committed = 0;
3630 			update_cache_info_recursively(cache, info);
3631 			totalCommitted += info.committed;
3632 			totalPages += info.page_count;
3633 		}
3634 
3635 		cache = cache->debug_next;
3636 	}
3637 
3638 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3639 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3640 			sortByPageCount
3641 				? &cache_info_compare_page_count
3642 				: &cache_info_compare_committed);
3643 	}
3644 
3645 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3646 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3647 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3648 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3649 			"page count" : "committed size");
3650 
3651 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3652 		for (uint32 i = 0; i < rootCount; i++) {
3653 			cache_info& info = sCacheInfoTable[i];
3654 			dump_caches_recursively(info.cache, info, 0);
3655 		}
3656 	} else
3657 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3658 
3659 	return 0;
3660 }
3661 
3662 #endif	// DEBUG_CACHE_LIST
3663 
3664 
3665 static int
3666 dump_cache(int argc, char** argv)
3667 {
3668 	VMCache* cache;
3669 	bool showPages = false;
3670 	int i = 1;
3671 
3672 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3673 		kprintf("usage: %s [-ps] <address>\n"
3674 			"  if -p is specified, all pages are shown, if -s is used\n"
3675 			"  only the cache info is shown respectively.\n", argv[0]);
3676 		return 0;
3677 	}
3678 	while (argv[i][0] == '-') {
3679 		char* arg = argv[i] + 1;
3680 		while (arg[0]) {
3681 			if (arg[0] == 'p')
3682 				showPages = true;
3683 			arg++;
3684 		}
3685 		i++;
3686 	}
3687 	if (argv[i] == NULL) {
3688 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3689 		return 0;
3690 	}
3691 
3692 	addr_t address = parse_expression(argv[i]);
3693 	if (address == 0)
3694 		return 0;
3695 
3696 	cache = (VMCache*)address;
3697 
3698 	cache->Dump(showPages);
3699 
3700 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3701 
3702 	return 0;
3703 }
3704 
3705 
3706 static void
3707 dump_area_struct(VMArea* area, bool mappings)
3708 {
3709 	kprintf("AREA: %p\n", area);
3710 	kprintf("name:\t\t'%s'\n", area->name);
3711 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3712 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3713 	kprintf("base:\t\t0x%lx\n", area->Base());
3714 	kprintf("size:\t\t0x%lx\n", area->Size());
3715 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3716 	kprintf("page_protection:%p\n", area->page_protections);
3717 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3718 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3719 	kprintf("cache:\t\t%p\n", area->cache);
3720 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3721 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3722 	kprintf("cache_next:\t%p\n", area->cache_next);
3723 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3724 
3725 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3726 	if (mappings) {
3727 		kprintf("page mappings:\n");
3728 		while (iterator.HasNext()) {
3729 			vm_page_mapping* mapping = iterator.Next();
3730 			kprintf("  %p", mapping->page);
3731 		}
3732 		kprintf("\n");
3733 	} else {
3734 		uint32 count = 0;
3735 		while (iterator.Next() != NULL) {
3736 			count++;
3737 		}
3738 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3739 	}
3740 }
3741 
3742 
3743 static int
3744 dump_area(int argc, char** argv)
3745 {
3746 	bool mappings = false;
3747 	bool found = false;
3748 	int32 index = 1;
3749 	VMArea* area;
3750 	addr_t num;
3751 
3752 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3753 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3754 			"All areas matching either id/address/name are listed. You can\n"
3755 			"force to check only a specific item by prefixing the specifier\n"
3756 			"with the id/contains/address/name keywords.\n"
3757 			"-m shows the area's mappings as well.\n");
3758 		return 0;
3759 	}
3760 
3761 	if (!strcmp(argv[1], "-m")) {
3762 		mappings = true;
3763 		index++;
3764 	}
3765 
3766 	int32 mode = 0xf;
3767 	if (!strcmp(argv[index], "id"))
3768 		mode = 1;
3769 	else if (!strcmp(argv[index], "contains"))
3770 		mode = 2;
3771 	else if (!strcmp(argv[index], "name"))
3772 		mode = 4;
3773 	else if (!strcmp(argv[index], "address"))
3774 		mode = 0;
3775 	if (mode != 0xf)
3776 		index++;
3777 
3778 	if (index >= argc) {
3779 		kprintf("No area specifier given.\n");
3780 		return 0;
3781 	}
3782 
3783 	num = parse_expression(argv[index]);
3784 
3785 	if (mode == 0) {
3786 		dump_area_struct((struct VMArea*)num, mappings);
3787 	} else {
3788 		// walk through the area list, looking for the arguments as a name
3789 
3790 		VMAreasTree::Iterator it = VMAreas::GetIterator();
3791 		while ((area = it.Next()) != NULL) {
3792 			if (((mode & 4) != 0
3793 					&& !strcmp(argv[index], area->name))
3794 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3795 					|| (((mode & 2) != 0 && area->Base() <= num
3796 						&& area->Base() + area->Size() > num))))) {
3797 				dump_area_struct(area, mappings);
3798 				found = true;
3799 			}
3800 		}
3801 
3802 		if (!found)
3803 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3804 	}
3805 
3806 	return 0;
3807 }
3808 
3809 
3810 static int
3811 dump_area_list(int argc, char** argv)
3812 {
3813 	VMArea* area;
3814 	const char* name = NULL;
3815 	int32 id = 0;
3816 
3817 	if (argc > 1) {
3818 		id = parse_expression(argv[1]);
3819 		if (id == 0)
3820 			name = argv[1];
3821 	}
3822 
3823 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3824 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3825 		B_PRINTF_POINTER_WIDTH, "size");
3826 
3827 	VMAreasTree::Iterator it = VMAreas::GetIterator();
3828 	while ((area = it.Next()) != NULL) {
3829 		if ((id != 0 && area->address_space->ID() != id)
3830 			|| (name != NULL && strstr(area->name, name) == NULL))
3831 			continue;
3832 
3833 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3834 			area->id, (void*)area->Base(), (void*)area->Size(),
3835 			area->protection, area->wiring, area->name);
3836 	}
3837 	return 0;
3838 }
3839 
3840 
3841 static int
3842 dump_available_memory(int argc, char** argv)
3843 {
3844 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3845 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3846 	return 0;
3847 }
3848 
3849 
3850 static int
3851 dump_mapping_info(int argc, char** argv)
3852 {
3853 	bool reverseLookup = false;
3854 	bool pageLookup = false;
3855 
3856 	int argi = 1;
3857 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3858 		const char* arg = argv[argi];
3859 		if (strcmp(arg, "-r") == 0) {
3860 			reverseLookup = true;
3861 		} else if (strcmp(arg, "-p") == 0) {
3862 			reverseLookup = true;
3863 			pageLookup = true;
3864 		} else {
3865 			print_debugger_command_usage(argv[0]);
3866 			return 0;
3867 		}
3868 	}
3869 
3870 	// We need at least one argument, the address. Optionally a thread ID can be
3871 	// specified.
3872 	if (argi >= argc || argi + 2 < argc) {
3873 		print_debugger_command_usage(argv[0]);
3874 		return 0;
3875 	}
3876 
3877 	uint64 addressValue;
3878 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3879 		return 0;
3880 
3881 	Team* team = NULL;
3882 	if (argi < argc) {
3883 		uint64 threadID;
3884 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3885 			return 0;
3886 
3887 		Thread* thread = Thread::GetDebug(threadID);
3888 		if (thread == NULL) {
3889 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3890 			return 0;
3891 		}
3892 
3893 		team = thread->team;
3894 	}
3895 
3896 	if (reverseLookup) {
3897 		phys_addr_t physicalAddress;
3898 		if (pageLookup) {
3899 			vm_page* page = (vm_page*)(addr_t)addressValue;
3900 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3901 		} else {
3902 			physicalAddress = (phys_addr_t)addressValue;
3903 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3904 		}
3905 
3906 		kprintf("    Team     Virtual Address      Area\n");
3907 		kprintf("--------------------------------------\n");
3908 
3909 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3910 			Callback()
3911 				:
3912 				fAddressSpace(NULL)
3913 			{
3914 			}
3915 
3916 			void SetAddressSpace(VMAddressSpace* addressSpace)
3917 			{
3918 				fAddressSpace = addressSpace;
3919 			}
3920 
3921 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3922 			{
3923 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3924 					virtualAddress);
3925 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3926 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3927 				else
3928 					kprintf("\n");
3929 				return false;
3930 			}
3931 
3932 		private:
3933 			VMAddressSpace*	fAddressSpace;
3934 		} callback;
3935 
3936 		if (team != NULL) {
3937 			// team specified -- get its address space
3938 			VMAddressSpace* addressSpace = team->address_space;
3939 			if (addressSpace == NULL) {
3940 				kprintf("Failed to get address space!\n");
3941 				return 0;
3942 			}
3943 
3944 			callback.SetAddressSpace(addressSpace);
3945 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3946 				physicalAddress, callback);
3947 		} else {
3948 			// no team specified -- iterate through all address spaces
3949 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3950 				addressSpace != NULL;
3951 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3952 				callback.SetAddressSpace(addressSpace);
3953 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3954 					physicalAddress, callback);
3955 			}
3956 		}
3957 	} else {
3958 		// get the address space
3959 		addr_t virtualAddress = (addr_t)addressValue;
3960 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3961 		VMAddressSpace* addressSpace;
3962 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3963 			addressSpace = VMAddressSpace::Kernel();
3964 		} else if (team != NULL) {
3965 			addressSpace = team->address_space;
3966 		} else {
3967 			Thread* thread = debug_get_debugged_thread();
3968 			if (thread == NULL || thread->team == NULL) {
3969 				kprintf("Failed to get team!\n");
3970 				return 0;
3971 			}
3972 
3973 			addressSpace = thread->team->address_space;
3974 		}
3975 
3976 		if (addressSpace == NULL) {
3977 			kprintf("Failed to get address space!\n");
3978 			return 0;
3979 		}
3980 
3981 		// let the translation map implementation do the job
3982 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3983 	}
3984 
3985 	return 0;
3986 }
3987 
3988 
3989 /*!	Deletes all areas and reserved regions in the given address space.
3990 
3991 	The caller must ensure that none of the areas has any wired ranges.
3992 
3993 	\param addressSpace The address space.
3994 	\param deletingAddressSpace \c true, if the address space is in the process
3995 		of being deleted.
3996 */
3997 void
3998 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3999 {
4000 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
4001 		addressSpace->ID()));
4002 
4003 	addressSpace->WriteLock();
4004 
4005 	// remove all reserved areas in this address space
4006 	addressSpace->UnreserveAllAddressRanges(0);
4007 
4008 	// remove all areas from the areas map at once (to avoid lock contention)
4009 	VMAreas::WriteLock();
4010 	{
4011 		VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
4012 		while (VMArea* area = it.Next())
4013 			VMAreas::Remove(area);
4014 	}
4015 	VMAreas::WriteUnlock();
4016 
4017 	// delete all the areas in this address space
4018 	while (VMArea* area = addressSpace->FirstArea()) {
4019 		ASSERT(!area->IsWired());
4020 		delete_area(addressSpace, area, deletingAddressSpace, true);
4021 	}
4022 
4023 	addressSpace->WriteUnlock();
4024 }
4025 
4026 
4027 static area_id
4028 vm_area_for(addr_t address, bool kernel)
4029 {
4030 	team_id team;
4031 	if (IS_USER_ADDRESS(address)) {
4032 		// we try the user team address space, if any
4033 		team = VMAddressSpace::CurrentID();
4034 		if (team < 0)
4035 			return team;
4036 	} else
4037 		team = VMAddressSpace::KernelID();
4038 
4039 	AddressSpaceReadLocker locker(team);
4040 	if (!locker.IsLocked())
4041 		return B_BAD_TEAM_ID;
4042 
4043 	VMArea* area = locker.AddressSpace()->LookupArea(address);
4044 	if (area != NULL) {
4045 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
4046 				&& (area->protection & B_KERNEL_AREA) != 0)
4047 			return B_ERROR;
4048 
4049 		return area->id;
4050 	}
4051 
4052 	return B_ERROR;
4053 }
4054 
4055 
4056 /*!	Frees physical pages that were used during the boot process.
4057 	\a end is inclusive.
4058 */
4059 static void
4060 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
4061 {
4062 	// free all physical pages in the specified range
4063 
4064 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
4065 		phys_addr_t physicalAddress;
4066 		uint32 flags;
4067 
4068 		if (map->Query(current, &physicalAddress, &flags) == B_OK
4069 			&& (flags & PAGE_PRESENT) != 0) {
4070 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4071 			if (page != NULL && page->State() != PAGE_STATE_FREE
4072 					&& page->State() != PAGE_STATE_CLEAR
4073 					&& page->State() != PAGE_STATE_UNUSED) {
4074 				DEBUG_PAGE_ACCESS_START(page);
4075 				vm_page_set_state(page, PAGE_STATE_FREE);
4076 			}
4077 		}
4078 	}
4079 
4080 	// unmap the memory
4081 	map->Unmap(start, end);
4082 }
4083 
4084 
4085 void
4086 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
4087 {
4088 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
4089 	addr_t end = start + (size - 1);
4090 	addr_t lastEnd = start;
4091 
4092 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
4093 		(void*)start, (void*)end));
4094 
4095 	// The areas are sorted in virtual address space order, so
4096 	// we just have to find the holes between them that fall
4097 	// into the area we should dispose
4098 
4099 	map->Lock();
4100 
4101 	for (VMAddressSpace::AreaIterator it
4102 				= VMAddressSpace::Kernel()->GetAreaIterator();
4103 			VMArea* area = it.Next();) {
4104 		addr_t areaStart = area->Base();
4105 		addr_t areaEnd = areaStart + (area->Size() - 1);
4106 
4107 		if (areaEnd < start)
4108 			continue;
4109 
4110 		if (areaStart > end) {
4111 			// we are done, the area is already beyond of what we have to free
4112 			break;
4113 		}
4114 
4115 		if (areaStart > lastEnd) {
4116 			// this is something we can free
4117 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
4118 				(void*)areaStart));
4119 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
4120 		}
4121 
4122 		if (areaEnd >= end) {
4123 			lastEnd = areaEnd;
4124 				// no +1 to prevent potential overflow
4125 			break;
4126 		}
4127 
4128 		lastEnd = areaEnd + 1;
4129 	}
4130 
4131 	if (lastEnd < end) {
4132 		// we can also get rid of some space at the end of the area
4133 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
4134 			(void*)end));
4135 		unmap_and_free_physical_pages(map, lastEnd, end);
4136 	}
4137 
4138 	map->Unlock();
4139 }
4140 
4141 
4142 static void
4143 create_preloaded_image_areas(struct preloaded_image* _image)
4144 {
4145 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
4146 	char name[B_OS_NAME_LENGTH];
4147 	void* address;
4148 	int32 length;
4149 
4150 	// use file name to create a good area name
4151 	char* fileName = strrchr(image->name, '/');
4152 	if (fileName == NULL)
4153 		fileName = image->name;
4154 	else
4155 		fileName++;
4156 
4157 	length = strlen(fileName);
4158 	// make sure there is enough space for the suffix
4159 	if (length > 25)
4160 		length = 25;
4161 
4162 	memcpy(name, fileName, length);
4163 	strcpy(name + length, "_text");
4164 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
4165 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4166 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
4167 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4168 		// this will later be remapped read-only/executable by the
4169 		// ELF initialization code
4170 
4171 	strcpy(name + length, "_data");
4172 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
4173 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
4174 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
4175 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4176 }
4177 
4178 
4179 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
4180 	Any boot loader resources contained in that arguments must not be accessed
4181 	anymore past this point.
4182 */
4183 void
4184 vm_free_kernel_args(kernel_args* args)
4185 {
4186 	uint32 i;
4187 
4188 	TRACE(("vm_free_kernel_args()\n"));
4189 
4190 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
4191 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
4192 		if (area >= B_OK)
4193 			delete_area(area);
4194 	}
4195 }
4196 
4197 
4198 static void
4199 allocate_kernel_args(kernel_args* args)
4200 {
4201 	TRACE(("allocate_kernel_args()\n"));
4202 
4203 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
4204 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
4205 
4206 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
4207 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
4208 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4209 	}
4210 }
4211 
4212 
4213 static void
4214 unreserve_boot_loader_ranges(kernel_args* args)
4215 {
4216 	TRACE(("unreserve_boot_loader_ranges()\n"));
4217 
4218 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4219 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
4220 			(void*)(addr_t)args->virtual_allocated_range[i].start,
4221 			args->virtual_allocated_range[i].size);
4222 	}
4223 }
4224 
4225 
4226 static void
4227 reserve_boot_loader_ranges(kernel_args* args)
4228 {
4229 	TRACE(("reserve_boot_loader_ranges()\n"));
4230 
4231 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
4232 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
4233 
4234 		// If the address is no kernel address, we just skip it. The
4235 		// architecture specific code has to deal with it.
4236 		if (!IS_KERNEL_ADDRESS(address)) {
4237 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
4238 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
4239 			continue;
4240 		}
4241 
4242 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
4243 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
4244 		if (status < B_OK)
4245 			panic("could not reserve boot loader ranges\n");
4246 	}
4247 }
4248 
4249 
4250 static addr_t
4251 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
4252 {
4253 	size = PAGE_ALIGN(size);
4254 
4255 	// find a slot in the virtual allocation addr range
4256 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
4257 		// check to see if the space between this one and the last is big enough
4258 		addr_t rangeStart = args->virtual_allocated_range[i].start;
4259 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
4260 			+ args->virtual_allocated_range[i - 1].size;
4261 
4262 		addr_t base = alignment > 0
4263 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
4264 
4265 		if (base >= KERNEL_BASE && base < rangeStart
4266 				&& rangeStart - base >= size) {
4267 			args->virtual_allocated_range[i - 1].size
4268 				+= base + size - previousRangeEnd;
4269 			return base;
4270 		}
4271 	}
4272 
4273 	// we hadn't found one between allocation ranges. this is ok.
4274 	// see if there's a gap after the last one
4275 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
4276 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
4277 		+ args->virtual_allocated_range[lastEntryIndex].size;
4278 	addr_t base = alignment > 0
4279 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
4280 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
4281 		args->virtual_allocated_range[lastEntryIndex].size
4282 			+= base + size - lastRangeEnd;
4283 		return base;
4284 	}
4285 
4286 	// see if there's a gap before the first one
4287 	addr_t rangeStart = args->virtual_allocated_range[0].start;
4288 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
4289 		base = rangeStart - size;
4290 		if (alignment > 0)
4291 			base = ROUNDDOWN(base, alignment);
4292 
4293 		if (base >= KERNEL_BASE) {
4294 			args->virtual_allocated_range[0].start = base;
4295 			args->virtual_allocated_range[0].size += rangeStart - base;
4296 			return base;
4297 		}
4298 	}
4299 
4300 	return 0;
4301 }
4302 
4303 
4304 static bool
4305 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
4306 {
4307 	// TODO: horrible brute-force method of determining if the page can be
4308 	// allocated
4309 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
4310 		if (address >= args->physical_memory_range[i].start
4311 			&& address < args->physical_memory_range[i].start
4312 				+ args->physical_memory_range[i].size)
4313 			return true;
4314 	}
4315 	return false;
4316 }
4317 
4318 
4319 page_num_t
4320 vm_allocate_early_physical_page(kernel_args* args)
4321 {
4322 	if (args->num_physical_allocated_ranges == 0) {
4323 		panic("early physical page allocations no longer possible!");
4324 		return 0;
4325 	}
4326 
4327 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4328 		phys_addr_t nextPage;
4329 
4330 		nextPage = args->physical_allocated_range[i].start
4331 			+ args->physical_allocated_range[i].size;
4332 		// see if the page after the next allocated paddr run can be allocated
4333 		if (i + 1 < args->num_physical_allocated_ranges
4334 			&& args->physical_allocated_range[i + 1].size != 0) {
4335 			// see if the next page will collide with the next allocated range
4336 			if (nextPage >= args->physical_allocated_range[i+1].start)
4337 				continue;
4338 		}
4339 		// see if the next physical page fits in the memory block
4340 		if (is_page_in_physical_memory_range(args, nextPage)) {
4341 			// we got one!
4342 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4343 			return nextPage / B_PAGE_SIZE;
4344 		}
4345 	}
4346 
4347 	// Expanding upwards didn't work, try going downwards.
4348 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
4349 		phys_addr_t nextPage;
4350 
4351 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
4352 		// see if the page after the prev allocated paddr run can be allocated
4353 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
4354 			// see if the next page will collide with the next allocated range
4355 			if (nextPage < args->physical_allocated_range[i-1].start
4356 				+ args->physical_allocated_range[i-1].size)
4357 				continue;
4358 		}
4359 		// see if the next physical page fits in the memory block
4360 		if (is_page_in_physical_memory_range(args, nextPage)) {
4361 			// we got one!
4362 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
4363 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
4364 			return nextPage / B_PAGE_SIZE;
4365 		}
4366 	}
4367 
4368 	return 0;
4369 		// could not allocate a block
4370 }
4371 
4372 
4373 /*!	This one uses the kernel_args' physical and virtual memory ranges to
4374 	allocate some pages before the VM is completely up.
4375 */
4376 addr_t
4377 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
4378 	uint32 attributes, addr_t alignment)
4379 {
4380 	if (physicalSize > virtualSize)
4381 		physicalSize = virtualSize;
4382 
4383 	// find the vaddr to allocate at
4384 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
4385 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
4386 	if (virtualBase == 0) {
4387 		panic("vm_allocate_early: could not allocate virtual address\n");
4388 		return 0;
4389 	}
4390 
4391 	// map the pages
4392 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
4393 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
4394 		if (physicalAddress == 0)
4395 			panic("error allocating early page!\n");
4396 
4397 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
4398 
4399 		status_t status = arch_vm_translation_map_early_map(args,
4400 			virtualBase + i * B_PAGE_SIZE,
4401 			physicalAddress * B_PAGE_SIZE, attributes,
4402 			&vm_allocate_early_physical_page);
4403 		if (status != B_OK)
4404 			panic("error mapping early page!");
4405 	}
4406 
4407 	return virtualBase;
4408 }
4409 
4410 
4411 /*!	The main entrance point to initialize the VM. */
4412 status_t
4413 vm_init(kernel_args* args)
4414 {
4415 	struct preloaded_image* image;
4416 	void* address;
4417 	status_t err = 0;
4418 	uint32 i;
4419 
4420 	TRACE(("vm_init: entry\n"));
4421 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
4422 	err = arch_vm_init(args);
4423 
4424 	// initialize some globals
4425 	vm_page_init_num_pages(args);
4426 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
4427 
4428 	slab_init(args);
4429 
4430 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4431 	off_t heapSize = INITIAL_HEAP_SIZE;
4432 	// try to accomodate low memory systems
4433 	while (heapSize > sAvailableMemory / 8)
4434 		heapSize /= 2;
4435 	if (heapSize < 1024 * 1024)
4436 		panic("vm_init: go buy some RAM please.");
4437 
4438 	// map in the new heap and initialize it
4439 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
4440 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
4441 	TRACE(("heap at 0x%lx\n", heapBase));
4442 	heap_init(heapBase, heapSize);
4443 #endif
4444 
4445 	// initialize the free page list and physical page mapper
4446 	vm_page_init(args);
4447 
4448 	// initialize the cache allocators
4449 	vm_cache_init(args);
4450 
4451 	{
4452 		status_t error = VMAreas::Init();
4453 		if (error != B_OK)
4454 			panic("vm_init: error initializing areas map\n");
4455 	}
4456 
4457 	VMAddressSpace::Init();
4458 	reserve_boot_loader_ranges(args);
4459 
4460 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4461 	heap_init_post_area();
4462 #endif
4463 
4464 	// Do any further initialization that the architecture dependant layers may
4465 	// need now
4466 	arch_vm_translation_map_init_post_area(args);
4467 	arch_vm_init_post_area(args);
4468 	vm_page_init_post_area(args);
4469 	slab_init_post_area();
4470 
4471 	// allocate areas to represent stuff that already exists
4472 
4473 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4474 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4475 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4476 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4477 #endif
4478 
4479 	allocate_kernel_args(args);
4480 
4481 	create_preloaded_image_areas(args->kernel_image);
4482 
4483 	// allocate areas for preloaded images
4484 	for (image = args->preloaded_images; image != NULL; image = image->next)
4485 		create_preloaded_image_areas(image);
4486 
4487 	// allocate kernel stacks
4488 	for (i = 0; i < args->num_cpus; i++) {
4489 		char name[64];
4490 
4491 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4492 		address = (void*)args->cpu_kstack[i].start;
4493 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4494 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4495 	}
4496 
4497 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4498 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4499 
4500 #if PARANOID_KERNEL_MALLOC
4501 	vm_block_address_range("uninitialized heap memory",
4502 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4503 #endif
4504 #if PARANOID_KERNEL_FREE
4505 	vm_block_address_range("freed heap memory",
4506 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4507 #endif
4508 
4509 	create_page_mappings_object_caches();
4510 
4511 #if DEBUG_CACHE_LIST
4512 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4513 		virtual_address_restrictions virtualRestrictions = {};
4514 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4515 		physical_address_restrictions physicalRestrictions = {};
4516 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4517 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4518 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4519 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4520 			&physicalRestrictions, (void**)&sCacheInfoTable);
4521 	}
4522 #endif	// DEBUG_CACHE_LIST
4523 
4524 	// add some debugger commands
4525 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4526 	add_debugger_command("area", &dump_area,
4527 		"Dump info about a particular area");
4528 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4529 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4530 #if DEBUG_CACHE_LIST
4531 	if (sCacheInfoTable != NULL) {
4532 		add_debugger_command_etc("caches", &dump_caches,
4533 			"List all VMCache trees",
4534 			"[ \"-c\" ]\n"
4535 			"All cache trees are listed sorted in decreasing order by number "
4536 				"of\n"
4537 			"used pages or, if \"-c\" is specified, by size of committed "
4538 				"memory.\n",
4539 			0);
4540 	}
4541 #endif
4542 	add_debugger_command("avail", &dump_available_memory,
4543 		"Dump available memory");
4544 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4545 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4546 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4547 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4548 	add_debugger_command("string", &display_mem, "dump strings");
4549 
4550 	add_debugger_command_etc("mapping", &dump_mapping_info,
4551 		"Print address mapping information",
4552 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4553 		"Prints low-level page mapping information for a given address. If\n"
4554 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4555 		"address that is looked up in the translation map of the current\n"
4556 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4557 		"\"-r\" is specified, <address> is a physical address that is\n"
4558 		"searched in the translation map of all teams, respectively the team\n"
4559 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4560 		"<address> is the address of a vm_page structure. The behavior is\n"
4561 		"equivalent to specifying \"-r\" with the physical address of that\n"
4562 		"page.\n",
4563 		0);
4564 
4565 	TRACE(("vm_init: exit\n"));
4566 
4567 	vm_cache_init_post_heap();
4568 
4569 	return err;
4570 }
4571 
4572 
4573 status_t
4574 vm_init_post_sem(kernel_args* args)
4575 {
4576 	// This frees all unused boot loader resources and makes its space available
4577 	// again
4578 	arch_vm_init_end(args);
4579 	unreserve_boot_loader_ranges(args);
4580 
4581 	// fill in all of the semaphores that were not allocated before
4582 	// since we're still single threaded and only the kernel address space
4583 	// exists, it isn't that hard to find all of the ones we need to create
4584 
4585 	arch_vm_translation_map_init_post_sem(args);
4586 
4587 	slab_init_post_sem();
4588 
4589 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4590 	heap_init_post_sem();
4591 #endif
4592 
4593 	return B_OK;
4594 }
4595 
4596 
4597 status_t
4598 vm_init_post_thread(kernel_args* args)
4599 {
4600 	vm_page_init_post_thread(args);
4601 	slab_init_post_thread();
4602 	return heap_init_post_thread();
4603 }
4604 
4605 
4606 status_t
4607 vm_init_post_modules(kernel_args* args)
4608 {
4609 	return arch_vm_init_post_modules(args);
4610 }
4611 
4612 
4613 void
4614 permit_page_faults(void)
4615 {
4616 	Thread* thread = thread_get_current_thread();
4617 	if (thread != NULL)
4618 		atomic_add(&thread->page_faults_allowed, 1);
4619 }
4620 
4621 
4622 void
4623 forbid_page_faults(void)
4624 {
4625 	Thread* thread = thread_get_current_thread();
4626 	if (thread != NULL)
4627 		atomic_add(&thread->page_faults_allowed, -1);
4628 }
4629 
4630 
4631 status_t
4632 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4633 	bool isUser, addr_t* newIP)
4634 {
4635 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4636 		faultAddress));
4637 
4638 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4639 
4640 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4641 	VMAddressSpace* addressSpace = NULL;
4642 
4643 	status_t status = B_OK;
4644 	*newIP = 0;
4645 	atomic_add((int32*)&sPageFaults, 1);
4646 
4647 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4648 		addressSpace = VMAddressSpace::GetKernel();
4649 	} else if (IS_USER_ADDRESS(pageAddress)) {
4650 		addressSpace = VMAddressSpace::GetCurrent();
4651 		if (addressSpace == NULL) {
4652 			if (!isUser) {
4653 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4654 					"memory!\n");
4655 				status = B_BAD_ADDRESS;
4656 				TPF(PageFaultError(-1,
4657 					VMPageFaultTracing
4658 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4659 			} else {
4660 				// XXX weird state.
4661 				panic("vm_page_fault: non kernel thread accessing user memory "
4662 					"that doesn't exist!\n");
4663 				status = B_BAD_ADDRESS;
4664 			}
4665 		}
4666 	} else {
4667 		// the hit was probably in the 64k DMZ between kernel and user space
4668 		// this keeps a user space thread from passing a buffer that crosses
4669 		// into kernel space
4670 		status = B_BAD_ADDRESS;
4671 		TPF(PageFaultError(-1,
4672 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4673 	}
4674 
4675 	if (status == B_OK) {
4676 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4677 			isUser, NULL);
4678 	}
4679 
4680 	if (status < B_OK) {
4681 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4682 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4683 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4684 			thread_get_current_thread_id());
4685 		if (!isUser) {
4686 			Thread* thread = thread_get_current_thread();
4687 			if (thread != NULL && thread->fault_handler != 0) {
4688 				// this will cause the arch dependant page fault handler to
4689 				// modify the IP on the interrupt frame or whatever to return
4690 				// to this address
4691 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4692 			} else {
4693 				// unhandled page fault in the kernel
4694 				panic("vm_page_fault: unhandled page fault in kernel space at "
4695 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4696 			}
4697 		} else {
4698 			Thread* thread = thread_get_current_thread();
4699 
4700 #ifdef TRACE_FAULTS
4701 			VMArea* area = NULL;
4702 			if (addressSpace != NULL) {
4703 				addressSpace->ReadLock();
4704 				area = addressSpace->LookupArea(faultAddress);
4705 			}
4706 
4707 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4708 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4709 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4710 				thread->team->Name(), thread->team->id,
4711 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4712 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4713 					area->Base() : 0x0));
4714 
4715 			if (addressSpace != NULL)
4716 				addressSpace->ReadUnlock();
4717 #endif
4718 
4719 			// If the thread has a signal handler for SIGSEGV, we simply
4720 			// send it the signal. Otherwise we notify the user debugger
4721 			// first.
4722 			struct sigaction action;
4723 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4724 					&& action.sa_handler != SIG_DFL
4725 					&& action.sa_handler != SIG_IGN)
4726 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4727 					SIGSEGV)) {
4728 				Signal signal(SIGSEGV,
4729 					status == B_PERMISSION_DENIED
4730 						? SEGV_ACCERR : SEGV_MAPERR,
4731 					EFAULT, thread->team->id);
4732 				signal.SetAddress((void*)address);
4733 				send_signal_to_thread(thread, signal, 0);
4734 			}
4735 		}
4736 	}
4737 
4738 	if (addressSpace != NULL)
4739 		addressSpace->Put();
4740 
4741 	return B_HANDLED_INTERRUPT;
4742 }
4743 
4744 
4745 struct PageFaultContext {
4746 	AddressSpaceReadLocker	addressSpaceLocker;
4747 	VMCacheChainLocker		cacheChainLocker;
4748 
4749 	VMTranslationMap*		map;
4750 	VMCache*				topCache;
4751 	off_t					cacheOffset;
4752 	vm_page_reservation		reservation;
4753 	bool					isWrite;
4754 
4755 	// return values
4756 	vm_page*				page;
4757 	bool					restart;
4758 	bool					pageAllocated;
4759 
4760 
4761 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4762 		:
4763 		addressSpaceLocker(addressSpace, true),
4764 		map(addressSpace->TranslationMap()),
4765 		isWrite(isWrite)
4766 	{
4767 	}
4768 
4769 	~PageFaultContext()
4770 	{
4771 		UnlockAll();
4772 		vm_page_unreserve_pages(&reservation);
4773 	}
4774 
4775 	void Prepare(VMCache* topCache, off_t cacheOffset)
4776 	{
4777 		this->topCache = topCache;
4778 		this->cacheOffset = cacheOffset;
4779 		page = NULL;
4780 		restart = false;
4781 		pageAllocated = false;
4782 
4783 		cacheChainLocker.SetTo(topCache);
4784 	}
4785 
4786 	void UnlockAll(VMCache* exceptCache = NULL)
4787 	{
4788 		topCache = NULL;
4789 		addressSpaceLocker.Unlock();
4790 		cacheChainLocker.Unlock(exceptCache);
4791 	}
4792 };
4793 
4794 
4795 /*!	Gets the page that should be mapped into the area.
4796 	Returns an error code other than \c B_OK, if the page couldn't be found or
4797 	paged in. The locking state of the address space and the caches is undefined
4798 	in that case.
4799 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4800 	had to unlock the address space and all caches and is supposed to be called
4801 	again.
4802 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4803 	found. It is returned in \c context.page. The address space will still be
4804 	locked as well as all caches starting from the top cache to at least the
4805 	cache the page lives in.
4806 */
4807 static status_t
4808 fault_get_page(PageFaultContext& context)
4809 {
4810 	VMCache* cache = context.topCache;
4811 	VMCache* lastCache = NULL;
4812 	vm_page* page = NULL;
4813 
4814 	while (cache != NULL) {
4815 		// We already hold the lock of the cache at this point.
4816 
4817 		lastCache = cache;
4818 
4819 		page = cache->LookupPage(context.cacheOffset);
4820 		if (page != NULL && page->busy) {
4821 			// page must be busy -- wait for it to become unbusy
4822 			context.UnlockAll(cache);
4823 			cache->ReleaseRefLocked();
4824 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4825 
4826 			// restart the whole process
4827 			context.restart = true;
4828 			return B_OK;
4829 		}
4830 
4831 		if (page != NULL)
4832 			break;
4833 
4834 		// The current cache does not contain the page we're looking for.
4835 
4836 		// see if the backing store has it
4837 		if (cache->HasPage(context.cacheOffset)) {
4838 			// insert a fresh page and mark it busy -- we're going to read it in
4839 			page = vm_page_allocate_page(&context.reservation,
4840 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4841 			cache->InsertPage(page, context.cacheOffset);
4842 
4843 			// We need to unlock all caches and the address space while reading
4844 			// the page in. Keep a reference to the cache around.
4845 			cache->AcquireRefLocked();
4846 			context.UnlockAll();
4847 
4848 			// read the page in
4849 			generic_io_vec vec;
4850 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4851 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4852 
4853 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4854 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4855 
4856 			cache->Lock();
4857 
4858 			if (status < B_OK) {
4859 				// on error remove and free the page
4860 				dprintf("reading page from cache %p returned: %s!\n",
4861 					cache, strerror(status));
4862 
4863 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4864 				cache->RemovePage(page);
4865 				vm_page_set_state(page, PAGE_STATE_FREE);
4866 
4867 				cache->ReleaseRefAndUnlock();
4868 				return status;
4869 			}
4870 
4871 			// mark the page unbusy again
4872 			cache->MarkPageUnbusy(page);
4873 
4874 			DEBUG_PAGE_ACCESS_END(page);
4875 
4876 			// Since we needed to unlock everything temporarily, the area
4877 			// situation might have changed. So we need to restart the whole
4878 			// process.
4879 			cache->ReleaseRefAndUnlock();
4880 			context.restart = true;
4881 			return B_OK;
4882 		}
4883 
4884 		cache = context.cacheChainLocker.LockSourceCache();
4885 	}
4886 
4887 	if (page == NULL) {
4888 		// There was no adequate page, determine the cache for a clean one.
4889 		// Read-only pages come in the deepest cache, only the top most cache
4890 		// may have direct write access.
4891 		cache = context.isWrite ? context.topCache : lastCache;
4892 
4893 		// allocate a clean page
4894 		page = vm_page_allocate_page(&context.reservation,
4895 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4896 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4897 			page->physical_page_number));
4898 
4899 		// insert the new page into our cache
4900 		cache->InsertPage(page, context.cacheOffset);
4901 		context.pageAllocated = true;
4902 	} else if (page->Cache() != context.topCache && context.isWrite) {
4903 		// We have a page that has the data we want, but in the wrong cache
4904 		// object so we need to copy it and stick it into the top cache.
4905 		vm_page* sourcePage = page;
4906 
4907 		// TODO: If memory is low, it might be a good idea to steal the page
4908 		// from our source cache -- if possible, that is.
4909 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4910 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4911 
4912 		// To not needlessly kill concurrency we unlock all caches but the top
4913 		// one while copying the page. Lacking another mechanism to ensure that
4914 		// the source page doesn't disappear, we mark it busy.
4915 		sourcePage->busy = true;
4916 		context.cacheChainLocker.UnlockKeepRefs(true);
4917 
4918 		// copy the page
4919 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4920 			sourcePage->physical_page_number * B_PAGE_SIZE);
4921 
4922 		context.cacheChainLocker.RelockCaches(true);
4923 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4924 
4925 		// insert the new page into our cache
4926 		context.topCache->InsertPage(page, context.cacheOffset);
4927 		context.pageAllocated = true;
4928 	} else
4929 		DEBUG_PAGE_ACCESS_START(page);
4930 
4931 	context.page = page;
4932 	return B_OK;
4933 }
4934 
4935 
4936 /*!	Makes sure the address in the given address space is mapped.
4937 
4938 	\param addressSpace The address space.
4939 	\param originalAddress The address. Doesn't need to be page aligned.
4940 	\param isWrite If \c true the address shall be write-accessible.
4941 	\param isUser If \c true the access is requested by a userland team.
4942 	\param wirePage On success, if non \c NULL, the wired count of the page
4943 		mapped at the given address is incremented and the page is returned
4944 		via this parameter.
4945 	\return \c B_OK on success, another error code otherwise.
4946 */
4947 static status_t
4948 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4949 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4950 {
4951 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4952 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4953 		originalAddress, isWrite, isUser));
4954 
4955 	PageFaultContext context(addressSpace, isWrite);
4956 
4957 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4958 	status_t status = B_OK;
4959 
4960 	addressSpace->IncrementFaultCount();
4961 
4962 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4963 	// the pages upfront makes sure we don't have any cache locked, so that the
4964 	// page daemon/thief can do their job without problems.
4965 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4966 		originalAddress);
4967 	context.addressSpaceLocker.Unlock();
4968 	vm_page_reserve_pages(&context.reservation, reservePages,
4969 		addressSpace == VMAddressSpace::Kernel()
4970 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4971 
4972 	while (true) {
4973 		context.addressSpaceLocker.Lock();
4974 
4975 		// get the area the fault was in
4976 		VMArea* area = addressSpace->LookupArea(address);
4977 		if (area == NULL) {
4978 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4979 				"space\n", originalAddress);
4980 			TPF(PageFaultError(-1,
4981 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4982 			status = B_BAD_ADDRESS;
4983 			break;
4984 		}
4985 
4986 		// check permissions
4987 		uint32 protection = get_area_page_protection(area, address);
4988 		if (isUser && (protection & B_USER_PROTECTION) == 0
4989 				&& (area->protection & B_KERNEL_AREA) != 0) {
4990 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4991 				area->id, (void*)originalAddress);
4992 			TPF(PageFaultError(area->id,
4993 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4994 			status = B_PERMISSION_DENIED;
4995 			break;
4996 		}
4997 		if (isWrite && (protection
4998 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4999 			dprintf("write access attempted on write-protected area 0x%"
5000 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
5001 			TPF(PageFaultError(area->id,
5002 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
5003 			status = B_PERMISSION_DENIED;
5004 			break;
5005 		} else if (isExecute && (protection
5006 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
5007 			dprintf("instruction fetch attempted on execute-protected area 0x%"
5008 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
5009 			TPF(PageFaultError(area->id,
5010 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
5011 			status = B_PERMISSION_DENIED;
5012 			break;
5013 		} else if (!isWrite && !isExecute && (protection
5014 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
5015 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
5016 				" at %p\n", area->id, (void*)originalAddress);
5017 			TPF(PageFaultError(area->id,
5018 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
5019 			status = B_PERMISSION_DENIED;
5020 			break;
5021 		}
5022 
5023 		// We have the area, it was a valid access, so let's try to resolve the
5024 		// page fault now.
5025 		// At first, the top most cache from the area is investigated.
5026 
5027 		context.Prepare(vm_area_get_locked_cache(area),
5028 			address - area->Base() + area->cache_offset);
5029 
5030 		// See if this cache has a fault handler -- this will do all the work
5031 		// for us.
5032 		{
5033 			// Note, since the page fault is resolved with interrupts enabled,
5034 			// the fault handler could be called more than once for the same
5035 			// reason -- the store must take this into account.
5036 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
5037 			if (status != B_BAD_HANDLER)
5038 				break;
5039 		}
5040 
5041 		// The top most cache has no fault handler, so let's see if the cache or
5042 		// its sources already have the page we're searching for (we're going
5043 		// from top to bottom).
5044 		status = fault_get_page(context);
5045 		if (status != B_OK) {
5046 			TPF(PageFaultError(area->id, status));
5047 			break;
5048 		}
5049 
5050 		if (context.restart)
5051 			continue;
5052 
5053 		// All went fine, all there is left to do is to map the page into the
5054 		// address space.
5055 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
5056 			context.page));
5057 
5058 		// If the page doesn't reside in the area's cache, we need to make sure
5059 		// it's mapped in read-only, so that we cannot overwrite someone else's
5060 		// data (copy-on-write)
5061 		uint32 newProtection = protection;
5062 		if (context.page->Cache() != context.topCache && !isWrite)
5063 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
5064 
5065 		bool unmapPage = false;
5066 		bool mapPage = true;
5067 
5068 		// check whether there's already a page mapped at the address
5069 		context.map->Lock();
5070 
5071 		phys_addr_t physicalAddress;
5072 		uint32 flags;
5073 		vm_page* mappedPage = NULL;
5074 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
5075 			&& (flags & PAGE_PRESENT) != 0
5076 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5077 				!= NULL) {
5078 			// Yep there's already a page. If it's ours, we can simply adjust
5079 			// its protection. Otherwise we have to unmap it.
5080 			if (mappedPage == context.page) {
5081 				context.map->ProtectPage(area, address, newProtection);
5082 					// Note: We assume that ProtectPage() is atomic (i.e.
5083 					// the page isn't temporarily unmapped), otherwise we'd have
5084 					// to make sure it isn't wired.
5085 				mapPage = false;
5086 			} else
5087 				unmapPage = true;
5088 		}
5089 
5090 		context.map->Unlock();
5091 
5092 		if (unmapPage) {
5093 			// If the page is wired, we can't unmap it. Wait until it is unwired
5094 			// again and restart. Note that the page cannot be wired for
5095 			// writing, since it it isn't in the topmost cache. So we can safely
5096 			// ignore ranges wired for writing (our own and other concurrent
5097 			// wiring attempts in progress) and in fact have to do that to avoid
5098 			// a deadlock.
5099 			VMAreaUnwiredWaiter waiter;
5100 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
5101 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
5102 				// unlock everything and wait
5103 				if (context.pageAllocated) {
5104 					// ... but since we allocated a page and inserted it into
5105 					// the top cache, remove and free it first. Otherwise we'd
5106 					// have a page from a lower cache mapped while an upper
5107 					// cache has a page that would shadow it.
5108 					context.topCache->RemovePage(context.page);
5109 					vm_page_free_etc(context.topCache, context.page,
5110 						&context.reservation);
5111 				} else
5112 					DEBUG_PAGE_ACCESS_END(context.page);
5113 
5114 				context.UnlockAll();
5115 				waiter.waitEntry.Wait();
5116 				continue;
5117 			}
5118 
5119 			// Note: The mapped page is a page of a lower cache. We are
5120 			// guaranteed to have that cached locked, our new page is a copy of
5121 			// that page, and the page is not busy. The logic for that guarantee
5122 			// is as follows: Since the page is mapped, it must live in the top
5123 			// cache (ruled out above) or any of its lower caches, and there is
5124 			// (was before the new page was inserted) no other page in any
5125 			// cache between the top cache and the page's cache (otherwise that
5126 			// would be mapped instead). That in turn means that our algorithm
5127 			// must have found it and therefore it cannot be busy either.
5128 			DEBUG_PAGE_ACCESS_START(mappedPage);
5129 			unmap_page(area, address);
5130 			DEBUG_PAGE_ACCESS_END(mappedPage);
5131 		}
5132 
5133 		if (mapPage) {
5134 			if (map_page(area, context.page, address, newProtection,
5135 					&context.reservation) != B_OK) {
5136 				// Mapping can only fail, when the page mapping object couldn't
5137 				// be allocated. Save for the missing mapping everything is
5138 				// fine, though. If this was a regular page fault, we'll simply
5139 				// leave and probably fault again. To make sure we'll have more
5140 				// luck then, we ensure that the minimum object reserve is
5141 				// available.
5142 				DEBUG_PAGE_ACCESS_END(context.page);
5143 
5144 				context.UnlockAll();
5145 
5146 				if (object_cache_reserve(page_mapping_object_cache_for(
5147 							context.page->physical_page_number), 1, 0)
5148 						!= B_OK) {
5149 					// Apparently the situation is serious. Let's get ourselves
5150 					// killed.
5151 					status = B_NO_MEMORY;
5152 				} else if (wirePage != NULL) {
5153 					// The caller expects us to wire the page. Since
5154 					// object_cache_reserve() succeeded, we should now be able
5155 					// to allocate a mapping structure. Restart.
5156 					continue;
5157 				}
5158 
5159 				break;
5160 			}
5161 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
5162 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
5163 
5164 		// also wire the page, if requested
5165 		if (wirePage != NULL && status == B_OK) {
5166 			increment_page_wired_count(context.page);
5167 			*wirePage = context.page;
5168 		}
5169 
5170 		DEBUG_PAGE_ACCESS_END(context.page);
5171 
5172 		break;
5173 	}
5174 
5175 	return status;
5176 }
5177 
5178 
5179 status_t
5180 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5181 {
5182 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
5183 }
5184 
5185 status_t
5186 vm_put_physical_page(addr_t vaddr, void* handle)
5187 {
5188 	return sPhysicalPageMapper->PutPage(vaddr, handle);
5189 }
5190 
5191 
5192 status_t
5193 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
5194 	void** _handle)
5195 {
5196 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
5197 }
5198 
5199 status_t
5200 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
5201 {
5202 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
5203 }
5204 
5205 
5206 status_t
5207 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
5208 {
5209 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
5210 }
5211 
5212 status_t
5213 vm_put_physical_page_debug(addr_t vaddr, void* handle)
5214 {
5215 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
5216 }
5217 
5218 
5219 void
5220 vm_get_info(system_info* info)
5221 {
5222 	swap_get_info(info);
5223 
5224 	MutexLocker locker(sAvailableMemoryLock);
5225 	info->needed_memory = sNeededMemory;
5226 	info->free_memory = sAvailableMemory;
5227 }
5228 
5229 
5230 uint32
5231 vm_num_page_faults(void)
5232 {
5233 	return sPageFaults;
5234 }
5235 
5236 
5237 off_t
5238 vm_available_memory(void)
5239 {
5240 	MutexLocker locker(sAvailableMemoryLock);
5241 	return sAvailableMemory;
5242 }
5243 
5244 
5245 off_t
5246 vm_available_not_needed_memory(void)
5247 {
5248 	MutexLocker locker(sAvailableMemoryLock);
5249 	return sAvailableMemory - sNeededMemory;
5250 }
5251 
5252 
5253 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
5254 	debugger.
5255 */
5256 off_t
5257 vm_available_not_needed_memory_debug(void)
5258 {
5259 	return sAvailableMemory - sNeededMemory;
5260 }
5261 
5262 
5263 size_t
5264 vm_kernel_address_space_left(void)
5265 {
5266 	return VMAddressSpace::Kernel()->FreeSpace();
5267 }
5268 
5269 
5270 void
5271 vm_unreserve_memory(size_t amount)
5272 {
5273 	mutex_lock(&sAvailableMemoryLock);
5274 
5275 	sAvailableMemory += amount;
5276 
5277 	mutex_unlock(&sAvailableMemoryLock);
5278 }
5279 
5280 
5281 status_t
5282 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
5283 {
5284 	size_t reserve = kMemoryReserveForPriority[priority];
5285 
5286 	MutexLocker locker(sAvailableMemoryLock);
5287 
5288 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
5289 
5290 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
5291 		sAvailableMemory -= amount;
5292 		return B_OK;
5293 	}
5294 
5295 	if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) {
5296 		// Do not wait for something that will never happen.
5297 		return B_NO_MEMORY;
5298 	}
5299 
5300 	if (timeout <= 0)
5301 		return B_NO_MEMORY;
5302 
5303 	// turn timeout into an absolute timeout
5304 	timeout += system_time();
5305 
5306 	// loop until we've got the memory or the timeout occurs
5307 	do {
5308 		sNeededMemory += amount;
5309 
5310 		// call the low resource manager
5311 		locker.Unlock();
5312 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
5313 			B_ABSOLUTE_TIMEOUT, timeout);
5314 		locker.Lock();
5315 
5316 		sNeededMemory -= amount;
5317 
5318 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
5319 			sAvailableMemory -= amount;
5320 			return B_OK;
5321 		}
5322 	} while (timeout > system_time());
5323 
5324 	return B_NO_MEMORY;
5325 }
5326 
5327 
5328 status_t
5329 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
5330 {
5331 	// NOTE: The caller is responsible for synchronizing calls to this function!
5332 
5333 	AddressSpaceReadLocker locker;
5334 	VMArea* area;
5335 	status_t status = locker.SetFromArea(id, area);
5336 	if (status != B_OK)
5337 		return status;
5338 
5339 	// nothing to do, if the type doesn't change
5340 	uint32 oldType = area->MemoryType();
5341 	if (type == oldType)
5342 		return B_OK;
5343 
5344 	// set the memory type of the area and the mapped pages
5345 	VMTranslationMap* map = area->address_space->TranslationMap();
5346 	map->Lock();
5347 	area->SetMemoryType(type);
5348 	map->ProtectArea(area, area->protection);
5349 	map->Unlock();
5350 
5351 	// set the physical memory type
5352 	status_t error = arch_vm_set_memory_type(area, physicalBase, type, NULL);
5353 	if (error != B_OK) {
5354 		// reset the memory type of the area and the mapped pages
5355 		map->Lock();
5356 		area->SetMemoryType(oldType);
5357 		map->ProtectArea(area, area->protection);
5358 		map->Unlock();
5359 		return error;
5360 	}
5361 
5362 	return B_OK;
5363 
5364 }
5365 
5366 
5367 /*!	This function enforces some protection properties:
5368 	 - kernel areas must be W^X (after kernel startup)
5369 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
5370 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
5371 */
5372 static void
5373 fix_protection(uint32* protection)
5374 {
5375 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
5376 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
5377 			|| (*protection & B_WRITE_AREA) != 0)
5378 		&& !gKernelStartup)
5379 		panic("kernel areas cannot be both writable and executable!");
5380 
5381 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
5382 		if ((*protection & B_WRITE_AREA) != 0)
5383 			*protection |= B_KERNEL_WRITE_AREA;
5384 		if ((*protection & B_READ_AREA) != 0)
5385 			*protection |= B_KERNEL_READ_AREA;
5386 	}
5387 }
5388 
5389 
5390 static void
5391 fill_area_info(struct VMArea* area, area_info* info, size_t size)
5392 {
5393 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
5394 	info->area = area->id;
5395 	info->address = (void*)area->Base();
5396 	info->size = area->Size();
5397 	info->protection = area->protection;
5398 	info->lock = area->wiring;
5399 	info->team = area->address_space->ID();
5400 	info->copy_count = 0;
5401 	info->in_count = 0;
5402 	info->out_count = 0;
5403 		// TODO: retrieve real values here!
5404 
5405 	VMCache* cache = vm_area_get_locked_cache(area);
5406 
5407 	// Note, this is a simplification; the cache could be larger than this area
5408 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5409 
5410 	vm_area_put_locked_cache(cache);
5411 }
5412 
5413 
5414 static status_t
5415 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5416 {
5417 	// is newSize a multiple of B_PAGE_SIZE?
5418 	if (newSize & (B_PAGE_SIZE - 1))
5419 		return B_BAD_VALUE;
5420 
5421 	// lock all affected address spaces and the cache
5422 	VMArea* area;
5423 	VMCache* cache;
5424 
5425 	MultiAddressSpaceLocker locker;
5426 	AreaCacheLocker cacheLocker;
5427 
5428 	status_t status;
5429 	size_t oldSize;
5430 	bool anyKernelArea;
5431 	bool restart;
5432 
5433 	do {
5434 		anyKernelArea = false;
5435 		restart = false;
5436 
5437 		locker.Unset();
5438 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5439 		if (status != B_OK)
5440 			return status;
5441 		cacheLocker.SetTo(cache, true);	// already locked
5442 
5443 		// enforce restrictions
5444 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
5445 				|| (area->protection & B_KERNEL_AREA) != 0)) {
5446 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5447 				"resize kernel area %" B_PRId32 " (%s)\n",
5448 				team_get_current_team_id(), areaID, area->name);
5449 			return B_NOT_ALLOWED;
5450 		}
5451 		// TODO: Enforce all restrictions (team, etc.)!
5452 
5453 		oldSize = area->Size();
5454 		if (newSize == oldSize)
5455 			return B_OK;
5456 
5457 		if (cache->type != CACHE_TYPE_RAM)
5458 			return B_NOT_ALLOWED;
5459 
5460 		if (oldSize < newSize) {
5461 			// We need to check if all areas of this cache can be resized.
5462 			for (VMArea* current = cache->areas; current != NULL;
5463 					current = current->cache_next) {
5464 				if (!current->address_space->CanResizeArea(current, newSize))
5465 					return B_ERROR;
5466 				anyKernelArea
5467 					|= current->address_space == VMAddressSpace::Kernel();
5468 			}
5469 		} else {
5470 			// We're shrinking the areas, so we must make sure the affected
5471 			// ranges are not wired.
5472 			for (VMArea* current = cache->areas; current != NULL;
5473 					current = current->cache_next) {
5474 				anyKernelArea
5475 					|= current->address_space == VMAddressSpace::Kernel();
5476 
5477 				if (wait_if_area_range_is_wired(current,
5478 						current->Base() + newSize, oldSize - newSize, &locker,
5479 						&cacheLocker)) {
5480 					restart = true;
5481 					break;
5482 				}
5483 			}
5484 		}
5485 	} while (restart);
5486 
5487 	// Okay, looks good so far, so let's do it
5488 
5489 	int priority = kernel && anyKernelArea
5490 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5491 	uint32 allocationFlags = kernel && anyKernelArea
5492 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5493 
5494 	if (oldSize < newSize) {
5495 		// Growing the cache can fail, so we do it first.
5496 		status = cache->Resize(cache->virtual_base + newSize, priority);
5497 		if (status != B_OK)
5498 			return status;
5499 	}
5500 
5501 	for (VMArea* current = cache->areas; current != NULL;
5502 			current = current->cache_next) {
5503 		status = current->address_space->ResizeArea(current, newSize,
5504 			allocationFlags);
5505 		if (status != B_OK)
5506 			break;
5507 
5508 		// We also need to unmap all pages beyond the new size, if the area has
5509 		// shrunk
5510 		if (newSize < oldSize) {
5511 			VMCacheChainLocker cacheChainLocker(cache);
5512 			cacheChainLocker.LockAllSourceCaches();
5513 
5514 			unmap_pages(current, current->Base() + newSize,
5515 				oldSize - newSize);
5516 
5517 			cacheChainLocker.Unlock(cache);
5518 		}
5519 	}
5520 
5521 	if (status == B_OK) {
5522 		// Shrink or grow individual page protections if in use.
5523 		if (area->page_protections != NULL) {
5524 			size_t bytes = area_page_protections_size(newSize);
5525 			uint8* newProtections
5526 				= (uint8*)realloc(area->page_protections, bytes);
5527 			if (newProtections == NULL)
5528 				status = B_NO_MEMORY;
5529 			else {
5530 				area->page_protections = newProtections;
5531 
5532 				if (oldSize < newSize) {
5533 					// init the additional page protections to that of the area
5534 					uint32 offset = area_page_protections_size(oldSize);
5535 					uint32 areaProtection = area->protection
5536 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5537 					memset(area->page_protections + offset,
5538 						areaProtection | (areaProtection << 4), bytes - offset);
5539 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5540 						uint8& entry = area->page_protections[offset - 1];
5541 						entry = (entry & 0x0f) | (areaProtection << 4);
5542 					}
5543 				}
5544 			}
5545 		}
5546 	}
5547 
5548 	// shrinking the cache can't fail, so we do it now
5549 	if (status == B_OK && newSize < oldSize)
5550 		status = cache->Resize(cache->virtual_base + newSize, priority);
5551 
5552 	if (status != B_OK) {
5553 		// Something failed -- resize the areas back to their original size.
5554 		// This can fail, too, in which case we're seriously screwed.
5555 		for (VMArea* current = cache->areas; current != NULL;
5556 				current = current->cache_next) {
5557 			if (current->address_space->ResizeArea(current, oldSize,
5558 					allocationFlags) != B_OK) {
5559 				panic("vm_resize_area(): Failed and not being able to restore "
5560 					"original state.");
5561 			}
5562 		}
5563 
5564 		cache->Resize(cache->virtual_base + oldSize, priority);
5565 	}
5566 
5567 	// TODO: we must honour the lock restrictions of this area
5568 	return status;
5569 }
5570 
5571 
5572 status_t
5573 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5574 {
5575 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5576 }
5577 
5578 
5579 status_t
5580 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5581 {
5582 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5583 }
5584 
5585 
5586 status_t
5587 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5588 	bool user)
5589 {
5590 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5591 }
5592 
5593 
5594 void
5595 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5596 {
5597 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5598 }
5599 
5600 
5601 /*!	Copies a range of memory directly from/to a page that might not be mapped
5602 	at the moment.
5603 
5604 	For \a unsafeMemory the current mapping (if any is ignored). The function
5605 	walks through the respective area's cache chain to find the physical page
5606 	and copies from/to it directly.
5607 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5608 	must not cross a page boundary.
5609 
5610 	\param teamID The team ID identifying the address space \a unsafeMemory is
5611 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5612 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5613 		is passed, the address space of the thread returned by
5614 		debug_get_debugged_thread() is used.
5615 	\param unsafeMemory The start of the unsafe memory range to be copied
5616 		from/to.
5617 	\param buffer A safely accessible kernel buffer to be copied from/to.
5618 	\param size The number of bytes to be copied.
5619 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5620 		\a unsafeMemory, the other way around otherwise.
5621 */
5622 status_t
5623 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5624 	size_t size, bool copyToUnsafe)
5625 {
5626 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5627 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5628 		return B_BAD_VALUE;
5629 	}
5630 
5631 	// get the address space for the debugged thread
5632 	VMAddressSpace* addressSpace;
5633 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5634 		addressSpace = VMAddressSpace::Kernel();
5635 	} else if (teamID == B_CURRENT_TEAM) {
5636 		Thread* thread = debug_get_debugged_thread();
5637 		if (thread == NULL || thread->team == NULL)
5638 			return B_BAD_ADDRESS;
5639 
5640 		addressSpace = thread->team->address_space;
5641 	} else
5642 		addressSpace = VMAddressSpace::DebugGet(teamID);
5643 
5644 	if (addressSpace == NULL)
5645 		return B_BAD_ADDRESS;
5646 
5647 	// get the area
5648 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5649 	if (area == NULL)
5650 		return B_BAD_ADDRESS;
5651 
5652 	// search the page
5653 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5654 		+ area->cache_offset;
5655 	VMCache* cache = area->cache;
5656 	vm_page* page = NULL;
5657 	while (cache != NULL) {
5658 		page = cache->DebugLookupPage(cacheOffset);
5659 		if (page != NULL)
5660 			break;
5661 
5662 		// Page not found in this cache -- if it is paged out, we must not try
5663 		// to get it from lower caches.
5664 		if (cache->DebugHasPage(cacheOffset))
5665 			break;
5666 
5667 		cache = cache->source;
5668 	}
5669 
5670 	if (page == NULL)
5671 		return B_UNSUPPORTED;
5672 
5673 	// copy from/to physical memory
5674 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5675 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5676 
5677 	if (copyToUnsafe) {
5678 		if (page->Cache() != area->cache)
5679 			return B_UNSUPPORTED;
5680 
5681 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5682 	}
5683 
5684 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5685 }
5686 
5687 
5688 /** Validate that a memory range is either fully in kernel space, or fully in
5689  *  userspace */
5690 static inline bool
5691 validate_memory_range(const void* addr, size_t size)
5692 {
5693 	addr_t address = (addr_t)addr;
5694 
5695 	// Check for overflows on all addresses.
5696 	if ((address + size) < address)
5697 		return false;
5698 
5699 	// Validate that the address range does not cross the kernel/user boundary.
5700 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5701 }
5702 
5703 
5704 //	#pragma mark - kernel public API
5705 
5706 
5707 status_t
5708 user_memcpy(void* to, const void* from, size_t size)
5709 {
5710 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5711 		return B_BAD_ADDRESS;
5712 
5713 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5714 		return B_BAD_ADDRESS;
5715 
5716 	return B_OK;
5717 }
5718 
5719 
5720 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5721 	the string in \a to, NULL-terminating the result.
5722 
5723 	\param to Pointer to the destination C-string.
5724 	\param from Pointer to the source C-string.
5725 	\param size Size in bytes of the string buffer pointed to by \a to.
5726 
5727 	\return strlen(\a from).
5728 */
5729 ssize_t
5730 user_strlcpy(char* to, const char* from, size_t size)
5731 {
5732 	if (to == NULL && size != 0)
5733 		return B_BAD_VALUE;
5734 	if (from == NULL)
5735 		return B_BAD_ADDRESS;
5736 
5737 	// Protect the source address from overflows.
5738 	size_t maxSize = size;
5739 	if ((addr_t)from + maxSize < (addr_t)from)
5740 		maxSize -= (addr_t)from + maxSize;
5741 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5742 		maxSize = USER_TOP - (addr_t)from;
5743 
5744 	if (!validate_memory_range(to, maxSize))
5745 		return B_BAD_ADDRESS;
5746 
5747 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5748 	if (result < 0)
5749 		return result;
5750 
5751 	// If we hit the address overflow boundary, fail.
5752 	if ((size_t)result >= maxSize && maxSize < size)
5753 		return B_BAD_ADDRESS;
5754 
5755 	return result;
5756 }
5757 
5758 
5759 status_t
5760 user_memset(void* s, char c, size_t count)
5761 {
5762 	if (!validate_memory_range(s, count))
5763 		return B_BAD_ADDRESS;
5764 
5765 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5766 		return B_BAD_ADDRESS;
5767 
5768 	return B_OK;
5769 }
5770 
5771 
5772 /*!	Wires a single page at the given address.
5773 
5774 	\param team The team whose address space the address belongs to. Supports
5775 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5776 		parameter is ignored.
5777 	\param address address The virtual address to wire down. Does not need to
5778 		be page aligned.
5779 	\param writable If \c true the page shall be writable.
5780 	\param info On success the info is filled in, among other things
5781 		containing the physical address the given virtual one translates to.
5782 	\return \c B_OK, when the page could be wired, another error code otherwise.
5783 */
5784 status_t
5785 vm_wire_page(team_id team, addr_t address, bool writable,
5786 	VMPageWiringInfo* info)
5787 {
5788 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5789 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5790 
5791 	// compute the page protection that is required
5792 	bool isUser = IS_USER_ADDRESS(address);
5793 	uint32 requiredProtection = PAGE_PRESENT
5794 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5795 	if (writable)
5796 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5797 
5798 	// get and read lock the address space
5799 	VMAddressSpace* addressSpace = NULL;
5800 	if (isUser) {
5801 		if (team == B_CURRENT_TEAM)
5802 			addressSpace = VMAddressSpace::GetCurrent();
5803 		else
5804 			addressSpace = VMAddressSpace::Get(team);
5805 	} else
5806 		addressSpace = VMAddressSpace::GetKernel();
5807 	if (addressSpace == NULL)
5808 		return B_ERROR;
5809 
5810 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5811 
5812 	VMTranslationMap* map = addressSpace->TranslationMap();
5813 	status_t error = B_OK;
5814 
5815 	// get the area
5816 	VMArea* area = addressSpace->LookupArea(pageAddress);
5817 	if (area == NULL) {
5818 		addressSpace->Put();
5819 		return B_BAD_ADDRESS;
5820 	}
5821 
5822 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5823 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5824 
5825 	// mark the area range wired
5826 	area->Wire(&info->range);
5827 
5828 	// Lock the area's cache chain and the translation map. Needed to look
5829 	// up the page and play with its wired count.
5830 	cacheChainLocker.LockAllSourceCaches();
5831 	map->Lock();
5832 
5833 	phys_addr_t physicalAddress;
5834 	uint32 flags;
5835 	vm_page* page;
5836 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5837 		&& (flags & requiredProtection) == requiredProtection
5838 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5839 			!= NULL) {
5840 		// Already mapped with the correct permissions -- just increment
5841 		// the page's wired count.
5842 		increment_page_wired_count(page);
5843 
5844 		map->Unlock();
5845 		cacheChainLocker.Unlock();
5846 		addressSpaceLocker.Unlock();
5847 	} else {
5848 		// Let vm_soft_fault() map the page for us, if possible. We need
5849 		// to fully unlock to avoid deadlocks. Since we have already
5850 		// wired the area itself, nothing disturbing will happen with it
5851 		// in the meantime.
5852 		map->Unlock();
5853 		cacheChainLocker.Unlock();
5854 		addressSpaceLocker.Unlock();
5855 
5856 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5857 			isUser, &page);
5858 
5859 		if (error != B_OK) {
5860 			// The page could not be mapped -- clean up.
5861 			VMCache* cache = vm_area_get_locked_cache(area);
5862 			area->Unwire(&info->range);
5863 			cache->ReleaseRefAndUnlock();
5864 			addressSpace->Put();
5865 			return error;
5866 		}
5867 	}
5868 
5869 	info->physicalAddress
5870 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5871 			+ address % B_PAGE_SIZE;
5872 	info->page = page;
5873 
5874 	return B_OK;
5875 }
5876 
5877 
5878 /*!	Unwires a single page previously wired via vm_wire_page().
5879 
5880 	\param info The same object passed to vm_wire_page() before.
5881 */
5882 void
5883 vm_unwire_page(VMPageWiringInfo* info)
5884 {
5885 	// lock the address space
5886 	VMArea* area = info->range.area;
5887 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5888 		// takes over our reference
5889 
5890 	// lock the top cache
5891 	VMCache* cache = vm_area_get_locked_cache(area);
5892 	VMCacheChainLocker cacheChainLocker(cache);
5893 
5894 	if (info->page->Cache() != cache) {
5895 		// The page is not in the top cache, so we lock the whole cache chain
5896 		// before touching the page's wired count.
5897 		cacheChainLocker.LockAllSourceCaches();
5898 	}
5899 
5900 	decrement_page_wired_count(info->page);
5901 
5902 	// remove the wired range from the range
5903 	area->Unwire(&info->range);
5904 
5905 	cacheChainLocker.Unlock();
5906 }
5907 
5908 
5909 /*!	Wires down the given address range in the specified team's address space.
5910 
5911 	If successful the function
5912 	- acquires a reference to the specified team's address space,
5913 	- adds respective wired ranges to all areas that intersect with the given
5914 	  address range,
5915 	- makes sure all pages in the given address range are mapped with the
5916 	  requested access permissions and increments their wired count.
5917 
5918 	It fails, when \a team doesn't specify a valid address space, when any part
5919 	of the specified address range is not covered by areas, when the concerned
5920 	areas don't allow mapping with the requested permissions, or when mapping
5921 	failed for another reason.
5922 
5923 	When successful the call must be balanced by a unlock_memory_etc() call with
5924 	the exact same parameters.
5925 
5926 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5927 		supported.
5928 	\param address The start of the address range to be wired.
5929 	\param numBytes The size of the address range to be wired.
5930 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5931 		requests that the range must be wired writable ("read from device
5932 		into memory").
5933 	\return \c B_OK on success, another error code otherwise.
5934 */
5935 status_t
5936 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5937 {
5938 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5939 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5940 
5941 	// compute the page protection that is required
5942 	bool isUser = IS_USER_ADDRESS(address);
5943 	bool writable = (flags & B_READ_DEVICE) == 0;
5944 	uint32 requiredProtection = PAGE_PRESENT
5945 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5946 	if (writable)
5947 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5948 
5949 	uint32 mallocFlags = isUser
5950 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5951 
5952 	// get and read lock the address space
5953 	VMAddressSpace* addressSpace = NULL;
5954 	if (isUser) {
5955 		if (team == B_CURRENT_TEAM)
5956 			addressSpace = VMAddressSpace::GetCurrent();
5957 		else
5958 			addressSpace = VMAddressSpace::Get(team);
5959 	} else
5960 		addressSpace = VMAddressSpace::GetKernel();
5961 	if (addressSpace == NULL)
5962 		return B_ERROR;
5963 
5964 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5965 		// We get a new address space reference here. The one we got above will
5966 		// be freed by unlock_memory_etc().
5967 
5968 	VMTranslationMap* map = addressSpace->TranslationMap();
5969 	status_t error = B_OK;
5970 
5971 	// iterate through all concerned areas
5972 	addr_t nextAddress = lockBaseAddress;
5973 	while (nextAddress != lockEndAddress) {
5974 		// get the next area
5975 		VMArea* area = addressSpace->LookupArea(nextAddress);
5976 		if (area == NULL) {
5977 			error = B_BAD_ADDRESS;
5978 			break;
5979 		}
5980 
5981 		addr_t areaStart = nextAddress;
5982 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5983 
5984 		// allocate the wired range (do that before locking the cache to avoid
5985 		// deadlocks)
5986 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5987 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5988 		if (range == NULL) {
5989 			error = B_NO_MEMORY;
5990 			break;
5991 		}
5992 
5993 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5994 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5995 
5996 		// mark the area range wired
5997 		area->Wire(range);
5998 
5999 		// Depending on the area cache type and the wiring, we may not need to
6000 		// look at the individual pages.
6001 		if (area->cache_type == CACHE_TYPE_NULL
6002 			|| area->cache_type == CACHE_TYPE_DEVICE
6003 			|| area->wiring == B_FULL_LOCK
6004 			|| area->wiring == B_CONTIGUOUS) {
6005 			nextAddress = areaEnd;
6006 			continue;
6007 		}
6008 
6009 		// Lock the area's cache chain and the translation map. Needed to look
6010 		// up pages and play with their wired count.
6011 		cacheChainLocker.LockAllSourceCaches();
6012 		map->Lock();
6013 
6014 		// iterate through the pages and wire them
6015 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6016 			phys_addr_t physicalAddress;
6017 			uint32 flags;
6018 
6019 			vm_page* page;
6020 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6021 				&& (flags & requiredProtection) == requiredProtection
6022 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6023 					!= NULL) {
6024 				// Already mapped with the correct permissions -- just increment
6025 				// the page's wired count.
6026 				increment_page_wired_count(page);
6027 			} else {
6028 				// Let vm_soft_fault() map the page for us, if possible. We need
6029 				// to fully unlock to avoid deadlocks. Since we have already
6030 				// wired the area itself, nothing disturbing will happen with it
6031 				// in the meantime.
6032 				map->Unlock();
6033 				cacheChainLocker.Unlock();
6034 				addressSpaceLocker.Unlock();
6035 
6036 				error = vm_soft_fault(addressSpace, nextAddress, writable,
6037 					false, isUser, &page);
6038 
6039 				addressSpaceLocker.Lock();
6040 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
6041 				cacheChainLocker.LockAllSourceCaches();
6042 				map->Lock();
6043 			}
6044 
6045 			if (error != B_OK)
6046 				break;
6047 		}
6048 
6049 		map->Unlock();
6050 
6051 		if (error == B_OK) {
6052 			cacheChainLocker.Unlock();
6053 		} else {
6054 			// An error occurred, so abort right here. If the current address
6055 			// is the first in this area, unwire the area, since we won't get
6056 			// to it when reverting what we've done so far.
6057 			if (nextAddress == areaStart) {
6058 				area->Unwire(range);
6059 				cacheChainLocker.Unlock();
6060 				range->~VMAreaWiredRange();
6061 				free_etc(range, mallocFlags);
6062 			} else
6063 				cacheChainLocker.Unlock();
6064 
6065 			break;
6066 		}
6067 	}
6068 
6069 	if (error != B_OK) {
6070 		// An error occurred, so unwire all that we've already wired. Note that
6071 		// even if not a single page was wired, unlock_memory_etc() is called
6072 		// to put the address space reference.
6073 		addressSpaceLocker.Unlock();
6074 		unlock_memory_etc(team, (void*)lockBaseAddress,
6075 			nextAddress - lockBaseAddress, flags);
6076 	}
6077 
6078 	return error;
6079 }
6080 
6081 
6082 status_t
6083 lock_memory(void* address, size_t numBytes, uint32 flags)
6084 {
6085 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6086 }
6087 
6088 
6089 /*!	Unwires an address range previously wired with lock_memory_etc().
6090 
6091 	Note that a call to this function must balance a previous lock_memory_etc()
6092 	call with exactly the same parameters.
6093 */
6094 status_t
6095 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
6096 {
6097 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
6098 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
6099 
6100 	// compute the page protection that is required
6101 	bool isUser = IS_USER_ADDRESS(address);
6102 	bool writable = (flags & B_READ_DEVICE) == 0;
6103 	uint32 requiredProtection = PAGE_PRESENT
6104 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
6105 	if (writable)
6106 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
6107 
6108 	uint32 mallocFlags = isUser
6109 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
6110 
6111 	// get and read lock the address space
6112 	VMAddressSpace* addressSpace = NULL;
6113 	if (isUser) {
6114 		if (team == B_CURRENT_TEAM)
6115 			addressSpace = VMAddressSpace::GetCurrent();
6116 		else
6117 			addressSpace = VMAddressSpace::Get(team);
6118 	} else
6119 		addressSpace = VMAddressSpace::GetKernel();
6120 	if (addressSpace == NULL)
6121 		return B_ERROR;
6122 
6123 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
6124 		// Take over the address space reference. We don't unlock until we're
6125 		// done.
6126 
6127 	VMTranslationMap* map = addressSpace->TranslationMap();
6128 	status_t error = B_OK;
6129 
6130 	// iterate through all concerned areas
6131 	addr_t nextAddress = lockBaseAddress;
6132 	while (nextAddress != lockEndAddress) {
6133 		// get the next area
6134 		VMArea* area = addressSpace->LookupArea(nextAddress);
6135 		if (area == NULL) {
6136 			error = B_BAD_ADDRESS;
6137 			break;
6138 		}
6139 
6140 		addr_t areaStart = nextAddress;
6141 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
6142 
6143 		// Lock the area's top cache. This is a requirement for
6144 		// VMArea::Unwire().
6145 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6146 
6147 		// Depending on the area cache type and the wiring, we may not need to
6148 		// look at the individual pages.
6149 		if (area->cache_type == CACHE_TYPE_NULL
6150 			|| area->cache_type == CACHE_TYPE_DEVICE
6151 			|| area->wiring == B_FULL_LOCK
6152 			|| area->wiring == B_CONTIGUOUS) {
6153 			// unwire the range (to avoid deadlocks we delete the range after
6154 			// unlocking the cache)
6155 			nextAddress = areaEnd;
6156 			VMAreaWiredRange* range = area->Unwire(areaStart,
6157 				areaEnd - areaStart, writable);
6158 			cacheChainLocker.Unlock();
6159 			if (range != NULL) {
6160 				range->~VMAreaWiredRange();
6161 				free_etc(range, mallocFlags);
6162 			}
6163 			continue;
6164 		}
6165 
6166 		// Lock the area's cache chain and the translation map. Needed to look
6167 		// up pages and play with their wired count.
6168 		cacheChainLocker.LockAllSourceCaches();
6169 		map->Lock();
6170 
6171 		// iterate through the pages and unwire them
6172 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
6173 			phys_addr_t physicalAddress;
6174 			uint32 flags;
6175 
6176 			vm_page* page;
6177 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
6178 				&& (flags & PAGE_PRESENT) != 0
6179 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
6180 					!= NULL) {
6181 				// Already mapped with the correct permissions -- just increment
6182 				// the page's wired count.
6183 				decrement_page_wired_count(page);
6184 			} else {
6185 				panic("unlock_memory_etc(): Failed to unwire page: address "
6186 					"space %p, address: %#" B_PRIxADDR, addressSpace,
6187 					nextAddress);
6188 				error = B_BAD_VALUE;
6189 				break;
6190 			}
6191 		}
6192 
6193 		map->Unlock();
6194 
6195 		// All pages are unwired. Remove the area's wired range as well (to
6196 		// avoid deadlocks we delete the range after unlocking the cache).
6197 		VMAreaWiredRange* range = area->Unwire(areaStart,
6198 			areaEnd - areaStart, writable);
6199 
6200 		cacheChainLocker.Unlock();
6201 
6202 		if (range != NULL) {
6203 			range->~VMAreaWiredRange();
6204 			free_etc(range, mallocFlags);
6205 		}
6206 
6207 		if (error != B_OK)
6208 			break;
6209 	}
6210 
6211 	// get rid of the address space reference lock_memory_etc() acquired
6212 	addressSpace->Put();
6213 
6214 	return error;
6215 }
6216 
6217 
6218 status_t
6219 unlock_memory(void* address, size_t numBytes, uint32 flags)
6220 {
6221 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
6222 }
6223 
6224 
6225 /*!	Similar to get_memory_map(), but also allows to specify the address space
6226 	for the memory in question and has a saner semantics.
6227 	Returns \c B_OK when the complete range could be translated or
6228 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
6229 	case the actual number of entries is written to \c *_numEntries. Any other
6230 	error case indicates complete failure; \c *_numEntries will be set to \c 0
6231 	in this case.
6232 */
6233 status_t
6234 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
6235 	physical_entry* table, uint32* _numEntries)
6236 {
6237 	uint32 numEntries = *_numEntries;
6238 	*_numEntries = 0;
6239 
6240 	VMAddressSpace* addressSpace;
6241 	addr_t virtualAddress = (addr_t)address;
6242 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
6243 	phys_addr_t physicalAddress;
6244 	status_t status = B_OK;
6245 	int32 index = -1;
6246 	addr_t offset = 0;
6247 	bool interrupts = are_interrupts_enabled();
6248 
6249 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
6250 		"entries)\n", team, address, numBytes, numEntries));
6251 
6252 	if (numEntries == 0 || numBytes == 0)
6253 		return B_BAD_VALUE;
6254 
6255 	// in which address space is the address to be found?
6256 	if (IS_USER_ADDRESS(virtualAddress)) {
6257 		if (team == B_CURRENT_TEAM)
6258 			addressSpace = VMAddressSpace::GetCurrent();
6259 		else
6260 			addressSpace = VMAddressSpace::Get(team);
6261 	} else
6262 		addressSpace = VMAddressSpace::GetKernel();
6263 
6264 	if (addressSpace == NULL)
6265 		return B_ERROR;
6266 
6267 	VMTranslationMap* map = addressSpace->TranslationMap();
6268 
6269 	if (interrupts)
6270 		map->Lock();
6271 
6272 	while (offset < numBytes) {
6273 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
6274 		uint32 flags;
6275 
6276 		if (interrupts) {
6277 			status = map->Query((addr_t)address + offset, &physicalAddress,
6278 				&flags);
6279 		} else {
6280 			status = map->QueryInterrupt((addr_t)address + offset,
6281 				&physicalAddress, &flags);
6282 		}
6283 		if (status < B_OK)
6284 			break;
6285 		if ((flags & PAGE_PRESENT) == 0) {
6286 			panic("get_memory_map() called on unmapped memory!");
6287 			return B_BAD_ADDRESS;
6288 		}
6289 
6290 		if (index < 0 && pageOffset > 0) {
6291 			physicalAddress += pageOffset;
6292 			if (bytes > B_PAGE_SIZE - pageOffset)
6293 				bytes = B_PAGE_SIZE - pageOffset;
6294 		}
6295 
6296 		// need to switch to the next physical_entry?
6297 		if (index < 0 || table[index].address
6298 				!= physicalAddress - table[index].size) {
6299 			if ((uint32)++index + 1 > numEntries) {
6300 				// table to small
6301 				break;
6302 			}
6303 			table[index].address = physicalAddress;
6304 			table[index].size = bytes;
6305 		} else {
6306 			// page does fit in current entry
6307 			table[index].size += bytes;
6308 		}
6309 
6310 		offset += bytes;
6311 	}
6312 
6313 	if (interrupts)
6314 		map->Unlock();
6315 
6316 	if (status != B_OK)
6317 		return status;
6318 
6319 	if ((uint32)index + 1 > numEntries) {
6320 		*_numEntries = index;
6321 		return B_BUFFER_OVERFLOW;
6322 	}
6323 
6324 	*_numEntries = index + 1;
6325 	return B_OK;
6326 }
6327 
6328 
6329 /*!	According to the BeBook, this function should always succeed.
6330 	This is no longer the case.
6331 */
6332 extern "C" int32
6333 __get_memory_map_haiku(const void* address, size_t numBytes,
6334 	physical_entry* table, int32 numEntries)
6335 {
6336 	uint32 entriesRead = numEntries;
6337 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
6338 		table, &entriesRead);
6339 	if (error != B_OK)
6340 		return error;
6341 
6342 	// close the entry list
6343 
6344 	// if it's only one entry, we will silently accept the missing ending
6345 	if (numEntries == 1)
6346 		return B_OK;
6347 
6348 	if (entriesRead + 1 > (uint32)numEntries)
6349 		return B_BUFFER_OVERFLOW;
6350 
6351 	table[entriesRead].address = 0;
6352 	table[entriesRead].size = 0;
6353 
6354 	return B_OK;
6355 }
6356 
6357 
6358 area_id
6359 area_for(void* address)
6360 {
6361 	return vm_area_for((addr_t)address, true);
6362 }
6363 
6364 
6365 area_id
6366 find_area(const char* name)
6367 {
6368 	return VMAreas::Find(name);
6369 }
6370 
6371 
6372 status_t
6373 _get_area_info(area_id id, area_info* info, size_t size)
6374 {
6375 	if (size != sizeof(area_info) || info == NULL)
6376 		return B_BAD_VALUE;
6377 
6378 	AddressSpaceReadLocker locker;
6379 	VMArea* area;
6380 	status_t status = locker.SetFromArea(id, area);
6381 	if (status != B_OK)
6382 		return status;
6383 
6384 	fill_area_info(area, info, size);
6385 	return B_OK;
6386 }
6387 
6388 
6389 status_t
6390 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
6391 {
6392 	addr_t nextBase = *(addr_t*)cookie;
6393 
6394 	// we're already through the list
6395 	if (nextBase == (addr_t)-1)
6396 		return B_ENTRY_NOT_FOUND;
6397 
6398 	if (team == B_CURRENT_TEAM)
6399 		team = team_get_current_team_id();
6400 
6401 	AddressSpaceReadLocker locker(team);
6402 	if (!locker.IsLocked())
6403 		return B_BAD_TEAM_ID;
6404 
6405 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
6406 	if (area == NULL) {
6407 		nextBase = (addr_t)-1;
6408 		return B_ENTRY_NOT_FOUND;
6409 	}
6410 
6411 	fill_area_info(area, info, size);
6412 	*cookie = (ssize_t)(area->Base() + 1);
6413 
6414 	return B_OK;
6415 }
6416 
6417 
6418 status_t
6419 set_area_protection(area_id area, uint32 newProtection)
6420 {
6421 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6422 		newProtection, true);
6423 }
6424 
6425 
6426 status_t
6427 resize_area(area_id areaID, size_t newSize)
6428 {
6429 	return vm_resize_area(areaID, newSize, true);
6430 }
6431 
6432 
6433 /*!	Transfers the specified area to a new team. The caller must be the owner
6434 	of the area.
6435 */
6436 area_id
6437 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6438 	bool kernel)
6439 {
6440 	area_info info;
6441 	status_t status = get_area_info(id, &info);
6442 	if (status != B_OK)
6443 		return status;
6444 
6445 	if (!kernel && info.team != thread_get_current_thread()->team->id)
6446 		return B_PERMISSION_DENIED;
6447 
6448 	// We need to mark the area cloneable so the following operations work.
6449 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
6450 	if (status != B_OK)
6451 		return status;
6452 
6453 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6454 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6455 	if (clonedArea < 0)
6456 		return clonedArea;
6457 
6458 	status = vm_delete_area(info.team, id, kernel);
6459 	if (status != B_OK) {
6460 		vm_delete_area(target, clonedArea, kernel);
6461 		return status;
6462 	}
6463 
6464 	// Now we can reset the protection to whatever it was before.
6465 	set_area_protection(clonedArea, info.protection);
6466 
6467 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6468 
6469 	return clonedArea;
6470 }
6471 
6472 
6473 extern "C" area_id
6474 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6475 	size_t numBytes, uint32 addressSpec, uint32 protection,
6476 	void** _virtualAddress)
6477 {
6478 	if (!arch_vm_supports_protection(protection))
6479 		return B_NOT_SUPPORTED;
6480 
6481 	fix_protection(&protection);
6482 
6483 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6484 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6485 		false);
6486 }
6487 
6488 
6489 area_id
6490 clone_area(const char* name, void** _address, uint32 addressSpec,
6491 	uint32 protection, area_id source)
6492 {
6493 	if ((protection & B_KERNEL_PROTECTION) == 0)
6494 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6495 
6496 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6497 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6498 }
6499 
6500 
6501 area_id
6502 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6503 	uint32 protection, uint32 flags, uint32 guardSize,
6504 	const virtual_address_restrictions* virtualAddressRestrictions,
6505 	const physical_address_restrictions* physicalAddressRestrictions,
6506 	void** _address)
6507 {
6508 	fix_protection(&protection);
6509 
6510 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6511 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6512 		true, _address);
6513 }
6514 
6515 
6516 extern "C" area_id
6517 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6518 	size_t size, uint32 lock, uint32 protection)
6519 {
6520 	fix_protection(&protection);
6521 
6522 	virtual_address_restrictions virtualRestrictions = {};
6523 	virtualRestrictions.address = *_address;
6524 	virtualRestrictions.address_specification = addressSpec;
6525 	physical_address_restrictions physicalRestrictions = {};
6526 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6527 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6528 		true, _address);
6529 }
6530 
6531 
6532 status_t
6533 delete_area(area_id area)
6534 {
6535 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6536 }
6537 
6538 
6539 //	#pragma mark - Userland syscalls
6540 
6541 
6542 status_t
6543 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6544 	addr_t size)
6545 {
6546 	// filter out some unavailable values (for userland)
6547 	switch (addressSpec) {
6548 		case B_ANY_KERNEL_ADDRESS:
6549 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6550 			return B_BAD_VALUE;
6551 	}
6552 
6553 	addr_t address;
6554 
6555 	if (!IS_USER_ADDRESS(userAddress)
6556 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6557 		return B_BAD_ADDRESS;
6558 
6559 	status_t status = vm_reserve_address_range(
6560 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6561 		RESERVED_AVOID_BASE);
6562 	if (status != B_OK)
6563 		return status;
6564 
6565 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6566 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6567 			(void*)address, size);
6568 		return B_BAD_ADDRESS;
6569 	}
6570 
6571 	return B_OK;
6572 }
6573 
6574 
6575 status_t
6576 _user_unreserve_address_range(addr_t address, addr_t size)
6577 {
6578 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6579 		(void*)address, size);
6580 }
6581 
6582 
6583 area_id
6584 _user_area_for(void* address)
6585 {
6586 	return vm_area_for((addr_t)address, false);
6587 }
6588 
6589 
6590 area_id
6591 _user_find_area(const char* userName)
6592 {
6593 	char name[B_OS_NAME_LENGTH];
6594 
6595 	if (!IS_USER_ADDRESS(userName)
6596 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6597 		return B_BAD_ADDRESS;
6598 
6599 	return find_area(name);
6600 }
6601 
6602 
6603 status_t
6604 _user_get_area_info(area_id area, area_info* userInfo)
6605 {
6606 	if (!IS_USER_ADDRESS(userInfo))
6607 		return B_BAD_ADDRESS;
6608 
6609 	area_info info;
6610 	status_t status = get_area_info(area, &info);
6611 	if (status < B_OK)
6612 		return status;
6613 
6614 	// TODO: do we want to prevent userland from seeing kernel protections?
6615 	//info.protection &= B_USER_PROTECTION;
6616 
6617 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6618 		return B_BAD_ADDRESS;
6619 
6620 	return status;
6621 }
6622 
6623 
6624 status_t
6625 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6626 {
6627 	ssize_t cookie;
6628 
6629 	if (!IS_USER_ADDRESS(userCookie)
6630 		|| !IS_USER_ADDRESS(userInfo)
6631 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6632 		return B_BAD_ADDRESS;
6633 
6634 	area_info info;
6635 	status_t status = _get_next_area_info(team, &cookie, &info,
6636 		sizeof(area_info));
6637 	if (status != B_OK)
6638 		return status;
6639 
6640 	//info.protection &= B_USER_PROTECTION;
6641 
6642 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6643 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6644 		return B_BAD_ADDRESS;
6645 
6646 	return status;
6647 }
6648 
6649 
6650 status_t
6651 _user_set_area_protection(area_id area, uint32 newProtection)
6652 {
6653 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
6654 		return B_BAD_VALUE;
6655 
6656 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6657 		newProtection, false);
6658 }
6659 
6660 
6661 status_t
6662 _user_resize_area(area_id area, size_t newSize)
6663 {
6664 	// TODO: Since we restrict deleting of areas to those owned by the team,
6665 	// we should also do that for resizing (check other functions, too).
6666 	return vm_resize_area(area, newSize, false);
6667 }
6668 
6669 
6670 area_id
6671 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6672 	team_id target)
6673 {
6674 	// filter out some unavailable values (for userland)
6675 	switch (addressSpec) {
6676 		case B_ANY_KERNEL_ADDRESS:
6677 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6678 			return B_BAD_VALUE;
6679 	}
6680 
6681 	void* address;
6682 	if (!IS_USER_ADDRESS(userAddress)
6683 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6684 		return B_BAD_ADDRESS;
6685 
6686 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6687 	if (newArea < B_OK)
6688 		return newArea;
6689 
6690 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6691 		return B_BAD_ADDRESS;
6692 
6693 	return newArea;
6694 }
6695 
6696 
6697 area_id
6698 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6699 	uint32 protection, area_id sourceArea)
6700 {
6701 	char name[B_OS_NAME_LENGTH];
6702 	void* address;
6703 
6704 	// filter out some unavailable values (for userland)
6705 	switch (addressSpec) {
6706 		case B_ANY_KERNEL_ADDRESS:
6707 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6708 			return B_BAD_VALUE;
6709 	}
6710 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6711 		return B_BAD_VALUE;
6712 
6713 	if (!IS_USER_ADDRESS(userName)
6714 		|| !IS_USER_ADDRESS(userAddress)
6715 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6716 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6717 		return B_BAD_ADDRESS;
6718 
6719 	fix_protection(&protection);
6720 
6721 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6722 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6723 		false);
6724 	if (clonedArea < B_OK)
6725 		return clonedArea;
6726 
6727 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6728 		delete_area(clonedArea);
6729 		return B_BAD_ADDRESS;
6730 	}
6731 
6732 	return clonedArea;
6733 }
6734 
6735 
6736 area_id
6737 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6738 	size_t size, uint32 lock, uint32 protection)
6739 {
6740 	char name[B_OS_NAME_LENGTH];
6741 	void* address;
6742 
6743 	// filter out some unavailable values (for userland)
6744 	switch (addressSpec) {
6745 		case B_ANY_KERNEL_ADDRESS:
6746 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6747 			return B_BAD_VALUE;
6748 	}
6749 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6750 		return B_BAD_VALUE;
6751 
6752 	if (!IS_USER_ADDRESS(userName)
6753 		|| !IS_USER_ADDRESS(userAddress)
6754 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6755 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6756 		return B_BAD_ADDRESS;
6757 
6758 	if (addressSpec == B_EXACT_ADDRESS
6759 		&& IS_KERNEL_ADDRESS(address))
6760 		return B_BAD_VALUE;
6761 
6762 	if (addressSpec == B_ANY_ADDRESS)
6763 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6764 	if (addressSpec == B_BASE_ADDRESS)
6765 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6766 
6767 	fix_protection(&protection);
6768 
6769 	virtual_address_restrictions virtualRestrictions = {};
6770 	virtualRestrictions.address = address;
6771 	virtualRestrictions.address_specification = addressSpec;
6772 	physical_address_restrictions physicalRestrictions = {};
6773 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6774 		size, lock, protection, 0, 0, &virtualRestrictions,
6775 		&physicalRestrictions, false, &address);
6776 
6777 	if (area >= B_OK
6778 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6779 		delete_area(area);
6780 		return B_BAD_ADDRESS;
6781 	}
6782 
6783 	return area;
6784 }
6785 
6786 
6787 status_t
6788 _user_delete_area(area_id area)
6789 {
6790 	// Unlike the BeOS implementation, you can now only delete areas
6791 	// that you have created yourself from userland.
6792 	// The documentation to delete_area() explicitly states that this
6793 	// will be restricted in the future, and so it will.
6794 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6795 }
6796 
6797 
6798 // TODO: create a BeOS style call for this!
6799 
6800 area_id
6801 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6802 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6803 	int fd, off_t offset)
6804 {
6805 	char name[B_OS_NAME_LENGTH];
6806 	void* address;
6807 	area_id area;
6808 
6809 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6810 		return B_BAD_VALUE;
6811 
6812 	fix_protection(&protection);
6813 
6814 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6815 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6816 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6817 		return B_BAD_ADDRESS;
6818 
6819 	if (addressSpec == B_EXACT_ADDRESS) {
6820 		if ((addr_t)address + size < (addr_t)address
6821 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6822 			return B_BAD_VALUE;
6823 		}
6824 		if (!IS_USER_ADDRESS(address)
6825 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6826 			return B_BAD_ADDRESS;
6827 		}
6828 	}
6829 
6830 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6831 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6832 		false);
6833 	if (area < B_OK)
6834 		return area;
6835 
6836 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6837 		return B_BAD_ADDRESS;
6838 
6839 	return area;
6840 }
6841 
6842 
6843 status_t
6844 _user_unmap_memory(void* _address, size_t size)
6845 {
6846 	addr_t address = (addr_t)_address;
6847 
6848 	// check params
6849 	if (size == 0 || (addr_t)address + size < (addr_t)address
6850 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6851 		return B_BAD_VALUE;
6852 	}
6853 
6854 	if (!IS_USER_ADDRESS(address)
6855 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6856 		return B_BAD_ADDRESS;
6857 	}
6858 
6859 	// Write lock the address space and ensure the address range is not wired.
6860 	AddressSpaceWriteLocker locker;
6861 	do {
6862 		status_t status = locker.SetTo(team_get_current_team_id());
6863 		if (status != B_OK)
6864 			return status;
6865 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6866 			size, &locker));
6867 
6868 	// unmap
6869 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6870 }
6871 
6872 
6873 status_t
6874 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6875 {
6876 	// check address range
6877 	addr_t address = (addr_t)_address;
6878 	size = PAGE_ALIGN(size);
6879 
6880 	if ((address % B_PAGE_SIZE) != 0)
6881 		return B_BAD_VALUE;
6882 	if (!is_user_address_range(_address, size)) {
6883 		// weird error code required by POSIX
6884 		return ENOMEM;
6885 	}
6886 
6887 	// extend and check protection
6888 	if ((protection & ~B_USER_PROTECTION) != 0)
6889 		return B_BAD_VALUE;
6890 
6891 	fix_protection(&protection);
6892 
6893 	// We need to write lock the address space, since we're going to play with
6894 	// the areas. Also make sure that none of the areas is wired and that we're
6895 	// actually allowed to change the protection.
6896 	AddressSpaceWriteLocker locker;
6897 
6898 	bool restart;
6899 	do {
6900 		restart = false;
6901 
6902 		status_t status = locker.SetTo(team_get_current_team_id());
6903 		if (status != B_OK)
6904 			return status;
6905 
6906 		// First round: Check whether the whole range is covered by areas and we
6907 		// are allowed to modify them.
6908 		addr_t currentAddress = address;
6909 		size_t sizeLeft = size;
6910 		while (sizeLeft > 0) {
6911 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6912 			if (area == NULL)
6913 				return B_NO_MEMORY;
6914 
6915 			if ((area->protection & B_KERNEL_AREA) != 0)
6916 				return B_NOT_ALLOWED;
6917 			if (area->protection_max != 0
6918 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6919 				return B_NOT_ALLOWED;
6920 			}
6921 
6922 			addr_t offset = currentAddress - area->Base();
6923 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6924 
6925 			AreaCacheLocker cacheLocker(area);
6926 
6927 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6928 					&locker, &cacheLocker)) {
6929 				restart = true;
6930 				break;
6931 			}
6932 
6933 			cacheLocker.Unlock();
6934 
6935 			currentAddress += rangeSize;
6936 			sizeLeft -= rangeSize;
6937 		}
6938 	} while (restart);
6939 
6940 	// Second round: If the protections differ from that of the area, create a
6941 	// page protection array and re-map mapped pages.
6942 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6943 	addr_t currentAddress = address;
6944 	size_t sizeLeft = size;
6945 	while (sizeLeft > 0) {
6946 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6947 		if (area == NULL)
6948 			return B_NO_MEMORY;
6949 
6950 		addr_t offset = currentAddress - area->Base();
6951 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6952 
6953 		currentAddress += rangeSize;
6954 		sizeLeft -= rangeSize;
6955 
6956 		if (area->page_protections == NULL) {
6957 			if (area->protection == protection)
6958 				continue;
6959 			if (offset == 0 && rangeSize == area->Size()) {
6960 				// The whole area is covered: let set_area_protection handle it.
6961 				status_t status = vm_set_area_protection(area->address_space->ID(),
6962 					area->id, protection, false);
6963 				if (status != B_OK)
6964 					return status;
6965 				continue;
6966 			}
6967 
6968 			status_t status = allocate_area_page_protections(area);
6969 			if (status != B_OK)
6970 				return status;
6971 		}
6972 
6973 		// We need to lock the complete cache chain, since we potentially unmap
6974 		// pages of lower caches.
6975 		VMCache* topCache = vm_area_get_locked_cache(area);
6976 		VMCacheChainLocker cacheChainLocker(topCache);
6977 		cacheChainLocker.LockAllSourceCaches();
6978 
6979 		// Adjust the committed size, if necessary.
6980 		if (topCache->source != NULL && topCache->temporary) {
6981 			const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6982 			ssize_t commitmentChange = 0;
6983 			for (addr_t pageAddress = area->Base() + offset;
6984 					pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6985 				if (topCache->LookupPage(pageAddress) != NULL) {
6986 					// This page should already be accounted for in the commitment.
6987 					continue;
6988 				}
6989 
6990 				const bool isWritable
6991 					= (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6992 
6993 				if (becomesWritable && !isWritable)
6994 					commitmentChange += B_PAGE_SIZE;
6995 				else if (!becomesWritable && isWritable)
6996 					commitmentChange -= B_PAGE_SIZE;
6997 			}
6998 
6999 			if (commitmentChange != 0) {
7000 				const off_t newCommitment = topCache->committed_size + commitmentChange;
7001 				ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
7002 				status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
7003 				if (status != B_OK)
7004 					return status;
7005 			}
7006 		}
7007 
7008 		for (addr_t pageAddress = area->Base() + offset;
7009 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
7010 			map->Lock();
7011 
7012 			set_area_page_protection(area, pageAddress, protection);
7013 
7014 			phys_addr_t physicalAddress;
7015 			uint32 flags;
7016 
7017 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
7018 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
7019 				map->Unlock();
7020 				continue;
7021 			}
7022 
7023 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
7024 			if (page == NULL) {
7025 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
7026 					"\n", area, physicalAddress);
7027 				map->Unlock();
7028 				return B_ERROR;
7029 			}
7030 
7031 			// If the page is not in the topmost cache and write access is
7032 			// requested, we have to unmap it. Otherwise we can re-map it with
7033 			// the new protection.
7034 			bool unmapPage = page->Cache() != topCache
7035 				&& (protection & B_WRITE_AREA) != 0;
7036 
7037 			if (!unmapPage)
7038 				map->ProtectPage(area, pageAddress, protection);
7039 
7040 			map->Unlock();
7041 
7042 			if (unmapPage) {
7043 				DEBUG_PAGE_ACCESS_START(page);
7044 				unmap_page(area, pageAddress);
7045 				DEBUG_PAGE_ACCESS_END(page);
7046 			}
7047 		}
7048 	}
7049 
7050 	return B_OK;
7051 }
7052 
7053 
7054 status_t
7055 _user_sync_memory(void* _address, size_t size, uint32 flags)
7056 {
7057 	addr_t address = (addr_t)_address;
7058 	size = PAGE_ALIGN(size);
7059 
7060 	// check params
7061 	if ((address % B_PAGE_SIZE) != 0)
7062 		return B_BAD_VALUE;
7063 	if (!is_user_address_range(_address, size)) {
7064 		// weird error code required by POSIX
7065 		return ENOMEM;
7066 	}
7067 
7068 	bool writeSync = (flags & MS_SYNC) != 0;
7069 	bool writeAsync = (flags & MS_ASYNC) != 0;
7070 	if (writeSync && writeAsync)
7071 		return B_BAD_VALUE;
7072 
7073 	if (size == 0 || (!writeSync && !writeAsync))
7074 		return B_OK;
7075 
7076 	// iterate through the range and sync all concerned areas
7077 	while (size > 0) {
7078 		// read lock the address space
7079 		AddressSpaceReadLocker locker;
7080 		status_t error = locker.SetTo(team_get_current_team_id());
7081 		if (error != B_OK)
7082 			return error;
7083 
7084 		// get the first area
7085 		VMArea* area = locker.AddressSpace()->LookupArea(address);
7086 		if (area == NULL)
7087 			return B_NO_MEMORY;
7088 
7089 		uint32 offset = address - area->Base();
7090 		size_t rangeSize = min_c(area->Size() - offset, size);
7091 		offset += area->cache_offset;
7092 
7093 		// lock the cache
7094 		AreaCacheLocker cacheLocker(area);
7095 		if (!cacheLocker)
7096 			return B_BAD_VALUE;
7097 		VMCache* cache = area->cache;
7098 
7099 		locker.Unlock();
7100 
7101 		uint32 firstPage = offset >> PAGE_SHIFT;
7102 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
7103 
7104 		// write the pages
7105 		if (cache->type == CACHE_TYPE_VNODE) {
7106 			if (writeSync) {
7107 				// synchronous
7108 				error = vm_page_write_modified_page_range(cache, firstPage,
7109 					endPage);
7110 				if (error != B_OK)
7111 					return error;
7112 			} else {
7113 				// asynchronous
7114 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
7115 				// TODO: This is probably not quite what is supposed to happen.
7116 				// Especially when a lot has to be written, it might take ages
7117 				// until it really hits the disk.
7118 			}
7119 		}
7120 
7121 		address += rangeSize;
7122 		size -= rangeSize;
7123 	}
7124 
7125 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
7126 	// synchronize multiple mappings of the same file. In our VM they never get
7127 	// out of sync, though, so we don't have to do anything.
7128 
7129 	return B_OK;
7130 }
7131 
7132 
7133 status_t
7134 _user_memory_advice(void* _address, size_t size, uint32 advice)
7135 {
7136 	addr_t address = (addr_t)_address;
7137 	if ((address % B_PAGE_SIZE) != 0)
7138 		return B_BAD_VALUE;
7139 
7140 	size = PAGE_ALIGN(size);
7141 	if (!is_user_address_range(_address, size)) {
7142 		// weird error code required by POSIX
7143 		return B_NO_MEMORY;
7144 	}
7145 
7146 	switch (advice) {
7147 		case MADV_NORMAL:
7148 		case MADV_SEQUENTIAL:
7149 		case MADV_RANDOM:
7150 		case MADV_WILLNEED:
7151 		case MADV_DONTNEED:
7152 			// TODO: Implement!
7153 			break;
7154 
7155 		case MADV_FREE:
7156 		{
7157 			AddressSpaceWriteLocker locker;
7158 			do {
7159 				status_t status = locker.SetTo(team_get_current_team_id());
7160 				if (status != B_OK)
7161 					return status;
7162 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
7163 					address, size, &locker));
7164 
7165 			discard_address_range(locker.AddressSpace(), address, size, false);
7166 			break;
7167 		}
7168 
7169 		default:
7170 			return B_BAD_VALUE;
7171 	}
7172 
7173 	return B_OK;
7174 }
7175 
7176 
7177 status_t
7178 _user_get_memory_properties(team_id teamID, const void* address,
7179 	uint32* _protected, uint32* _lock)
7180 {
7181 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
7182 		return B_BAD_ADDRESS;
7183 
7184 	AddressSpaceReadLocker locker;
7185 	status_t error = locker.SetTo(teamID);
7186 	if (error != B_OK)
7187 		return error;
7188 
7189 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
7190 	if (area == NULL)
7191 		return B_NO_MEMORY;
7192 
7193 	uint32 protection = get_area_page_protection(area, (addr_t)address);
7194 	uint32 wiring = area->wiring;
7195 
7196 	locker.Unlock();
7197 
7198 	error = user_memcpy(_protected, &protection, sizeof(protection));
7199 	if (error != B_OK)
7200 		return error;
7201 
7202 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
7203 
7204 	return error;
7205 }
7206 
7207 
7208 static status_t
7209 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
7210 {
7211 #if ENABLE_SWAP_SUPPORT
7212 	// check address range
7213 	addr_t address = (addr_t)_address;
7214 	size = PAGE_ALIGN(size);
7215 
7216 	if ((address % B_PAGE_SIZE) != 0)
7217 		return EINVAL;
7218 	if (!is_user_address_range(_address, size))
7219 		return EINVAL;
7220 
7221 	const addr_t endAddress = address + size;
7222 
7223 	AddressSpaceReadLocker addressSpaceLocker;
7224 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
7225 	if (error != B_OK)
7226 		return error;
7227 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
7228 
7229 	// iterate through all concerned areas
7230 	addr_t nextAddress = address;
7231 	while (nextAddress != endAddress) {
7232 		// get the next area
7233 		VMArea* area = addressSpace->LookupArea(nextAddress);
7234 		if (area == NULL) {
7235 			error = B_BAD_ADDRESS;
7236 			break;
7237 		}
7238 
7239 		const addr_t areaStart = nextAddress;
7240 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
7241 		nextAddress = areaEnd;
7242 
7243 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7244 		if (error != B_OK) {
7245 			// We don't need to unset or reset things on failure.
7246 			break;
7247 		}
7248 
7249 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
7250 		VMAnonymousCache* anonCache = NULL;
7251 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
7252 			// This memory will aready never be swapped. Nothing to do.
7253 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
7254 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
7255 				areaEnd - areaStart, swappable);
7256 		} else {
7257 			// Some other cache type? We cannot affect anything here.
7258 			error = EINVAL;
7259 		}
7260 
7261 		cacheChainLocker.Unlock();
7262 
7263 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
7264 		if (error != B_OK)
7265 			break;
7266 	}
7267 
7268 	return error;
7269 #else
7270 	// No swap support? Nothing to do.
7271 	return B_OK;
7272 #endif
7273 }
7274 
7275 
7276 status_t
7277 _user_mlock(const void* _address, size_t size)
7278 {
7279 	return user_set_memory_swappable(_address, size, false);
7280 }
7281 
7282 
7283 status_t
7284 _user_munlock(const void* _address, size_t size)
7285 {
7286 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
7287 	// if multiple clones of an area had mlock() called on them,
7288 	// munlock() must also be called on all of them to actually unlock.
7289 	// (At present, the first munlock() will unlock all.)
7290 	// TODO: fork() should automatically unlock memory in the child.
7291 	return user_set_memory_swappable(_address, size, true);
7292 }
7293 
7294 
7295 // #pragma mark -- compatibility
7296 
7297 
7298 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7299 
7300 
7301 struct physical_entry_beos {
7302 	uint32	address;
7303 	uint32	size;
7304 };
7305 
7306 
7307 /*!	The physical_entry structure has changed. We need to translate it to the
7308 	old one.
7309 */
7310 extern "C" int32
7311 __get_memory_map_beos(const void* _address, size_t numBytes,
7312 	physical_entry_beos* table, int32 numEntries)
7313 {
7314 	if (numEntries <= 0)
7315 		return B_BAD_VALUE;
7316 
7317 	const uint8* address = (const uint8*)_address;
7318 
7319 	int32 count = 0;
7320 	while (numBytes > 0 && count < numEntries) {
7321 		physical_entry entry;
7322 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
7323 		if (result < 0) {
7324 			if (result != B_BUFFER_OVERFLOW)
7325 				return result;
7326 		}
7327 
7328 		if (entry.address >= (phys_addr_t)1 << 32) {
7329 			panic("get_memory_map(): Address is greater 4 GB!");
7330 			return B_ERROR;
7331 		}
7332 
7333 		table[count].address = entry.address;
7334 		table[count++].size = entry.size;
7335 
7336 		address += entry.size;
7337 		numBytes -= entry.size;
7338 	}
7339 
7340 	// null-terminate the table, if possible
7341 	if (count < numEntries) {
7342 		table[count].address = 0;
7343 		table[count].size = 0;
7344 	}
7345 
7346 	return B_OK;
7347 }
7348 
7349 
7350 /*!	The type of the \a physicalAddress parameter has changed from void* to
7351 	phys_addr_t.
7352 */
7353 extern "C" area_id
7354 __map_physical_memory_beos(const char* name, void* physicalAddress,
7355 	size_t numBytes, uint32 addressSpec, uint32 protection,
7356 	void** _virtualAddress)
7357 {
7358 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
7359 		addressSpec, protection, _virtualAddress);
7360 }
7361 
7362 
7363 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
7364 	we meddle with the \a lock parameter to force 32 bit.
7365 */
7366 extern "C" area_id
7367 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
7368 	size_t size, uint32 lock, uint32 protection)
7369 {
7370 	switch (lock) {
7371 		case B_NO_LOCK:
7372 			break;
7373 		case B_FULL_LOCK:
7374 		case B_LAZY_LOCK:
7375 			lock = B_32_BIT_FULL_LOCK;
7376 			break;
7377 		case B_CONTIGUOUS:
7378 			lock = B_32_BIT_CONTIGUOUS;
7379 			break;
7380 	}
7381 
7382 	return __create_area_haiku(name, _address, addressSpec, size, lock,
7383 		protection);
7384 }
7385 
7386 
7387 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
7388 	"BASE");
7389 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
7390 	"map_physical_memory@", "BASE");
7391 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
7392 	"BASE");
7393 
7394 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7395 	"get_memory_map@@", "1_ALPHA3");
7396 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7397 	"map_physical_memory@@", "1_ALPHA3");
7398 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7399 	"1_ALPHA3");
7400 
7401 
7402 #else
7403 
7404 
7405 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
7406 	"get_memory_map@@", "BASE");
7407 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
7408 	"map_physical_memory@@", "BASE");
7409 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
7410 	"BASE");
7411 
7412 
7413 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
7414