xref: /haiku/src/system/kernel/vm/vm.cpp (revision fc7456e9b1ec38c941134ed6d01c438cf289381e)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 static ObjectCache** sPageMappingsObjectCaches;
248 static uint32 sPageMappingsMask;
249 
250 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
251 
252 static off_t sAvailableMemory;
253 static off_t sNeededMemory;
254 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
255 static uint32 sPageFaults;
256 
257 static VMPhysicalPageMapper* sPhysicalPageMapper;
258 
259 
260 // function declarations
261 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
262 	bool deletingAddressSpace, bool alreadyRemoved = false);
263 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
264 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
265 static status_t map_backing_store(VMAddressSpace* addressSpace,
266 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
267 	int protection, int protectionMax, int mapping, uint32 flags,
268 	const virtual_address_restrictions* addressRestrictions, bool kernel,
269 	VMArea** _area, void** _virtualAddress);
270 static void fix_protection(uint32* protection);
271 
272 
273 //	#pragma mark -
274 
275 
276 #if VM_PAGE_FAULT_TRACING
277 
278 namespace VMPageFaultTracing {
279 
280 class PageFaultStart : public AbstractTraceEntry {
281 public:
282 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
283 		:
284 		fAddress(address),
285 		fPC(pc),
286 		fWrite(write),
287 		fUser(user)
288 	{
289 		Initialized();
290 	}
291 
292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
295 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
296 	}
297 
298 private:
299 	addr_t	fAddress;
300 	addr_t	fPC;
301 	bool	fWrite;
302 	bool	fUser;
303 };
304 
305 
306 // page fault errors
307 enum {
308 	PAGE_FAULT_ERROR_NO_AREA		= 0,
309 	PAGE_FAULT_ERROR_KERNEL_ONLY,
310 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
311 	PAGE_FAULT_ERROR_READ_PROTECTED,
312 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
313 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
314 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
315 };
316 
317 
318 class PageFaultError : public AbstractTraceEntry {
319 public:
320 	PageFaultError(area_id area, status_t error)
321 		:
322 		fArea(area),
323 		fError(error)
324 	{
325 		Initialized();
326 	}
327 
328 	virtual void AddDump(TraceOutput& out)
329 	{
330 		switch (fError) {
331 			case PAGE_FAULT_ERROR_NO_AREA:
332 				out.Print("page fault error: no area");
333 				break;
334 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
335 				out.Print("page fault error: area: %ld, kernel only", fArea);
336 				break;
337 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
338 				out.Print("page fault error: area: %ld, write protected",
339 					fArea);
340 				break;
341 			case PAGE_FAULT_ERROR_READ_PROTECTED:
342 				out.Print("page fault error: area: %ld, read protected", fArea);
343 				break;
344 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
345 				out.Print("page fault error: area: %ld, execute protected",
346 					fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
349 				out.Print("page fault error: kernel touching bad user memory");
350 				break;
351 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
352 				out.Print("page fault error: no address space");
353 				break;
354 			default:
355 				out.Print("page fault error: area: %ld, error: %s", fArea,
356 					strerror(fError));
357 				break;
358 		}
359 	}
360 
361 private:
362 	area_id		fArea;
363 	status_t	fError;
364 };
365 
366 
367 class PageFaultDone : public AbstractTraceEntry {
368 public:
369 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
370 			vm_page* page)
371 		:
372 		fArea(area),
373 		fTopCache(topCache),
374 		fCache(cache),
375 		fPage(page)
376 	{
377 		Initialized();
378 	}
379 
380 	virtual void AddDump(TraceOutput& out)
381 	{
382 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
383 			"page: %p", fArea, fTopCache, fCache, fPage);
384 	}
385 
386 private:
387 	area_id		fArea;
388 	VMCache*	fTopCache;
389 	VMCache*	fCache;
390 	vm_page*	fPage;
391 };
392 
393 }	// namespace VMPageFaultTracing
394 
395 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
396 #else
397 #	define TPF(x) ;
398 #endif	// VM_PAGE_FAULT_TRACING
399 
400 
401 //	#pragma mark - page mappings allocation
402 
403 
404 static void
405 create_page_mappings_object_caches()
406 {
407 	// We want an even power of 2 smaller than the number of CPUs.
408 	const int32 numCPUs = smp_get_num_cpus();
409 	int32 count = next_power_of_2(numCPUs);
410 	if (count > numCPUs)
411 		count >>= 1;
412 	sPageMappingsMask = count - 1;
413 
414 	sPageMappingsObjectCaches = new object_cache*[count];
415 	if (sPageMappingsObjectCaches == NULL)
416 		panic("failed to allocate page mappings object_cache array");
417 
418 	for (int32 i = 0; i < count; i++) {
419 		char name[32];
420 		snprintf(name, sizeof(name), "page mappings %" B_PRId32, i);
421 
422 		object_cache* cache = create_object_cache_etc(name,
423 			sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
424 			NULL, NULL);
425 		if (cache == NULL)
426 			panic("failed to create page mappings object_cache");
427 
428 		object_cache_set_minimum_reserve(cache, 1024);
429 		sPageMappingsObjectCaches[i] = cache;
430 	}
431 }
432 
433 
434 static object_cache*
435 page_mapping_object_cache_for(page_num_t page)
436 {
437 	return sPageMappingsObjectCaches[page & sPageMappingsMask];
438 }
439 
440 
441 static vm_page_mapping*
442 allocate_page_mapping(page_num_t page, uint32 flags = 0)
443 {
444 	return (vm_page_mapping*)object_cache_alloc(page_mapping_object_cache_for(page),
445 		flags);
446 }
447 
448 
449 void
450 vm_free_page_mapping(page_num_t page, vm_page_mapping* mapping, uint32 flags)
451 {
452 	object_cache_free(page_mapping_object_cache_for(page), mapping, flags);
453 }
454 
455 
456 //	#pragma mark -
457 
458 
459 /*!	The page's cache must be locked.
460 */
461 static inline void
462 increment_page_wired_count(vm_page* page)
463 {
464 	if (!page->IsMapped())
465 		atomic_add(&gMappedPagesCount, 1);
466 	page->IncrementWiredCount();
467 }
468 
469 
470 /*!	The page's cache must be locked.
471 */
472 static inline void
473 decrement_page_wired_count(vm_page* page)
474 {
475 	page->DecrementWiredCount();
476 	if (!page->IsMapped())
477 		atomic_add(&gMappedPagesCount, -1);
478 }
479 
480 
481 static inline addr_t
482 virtual_page_address(VMArea* area, vm_page* page)
483 {
484 	return area->Base()
485 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
486 }
487 
488 
489 static inline bool
490 is_page_in_area(VMArea* area, vm_page* page)
491 {
492 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
493 	return pageCacheOffsetBytes >= area->cache_offset
494 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
495 }
496 
497 
498 //! You need to have the address space locked when calling this function
499 static VMArea*
500 lookup_area(VMAddressSpace* addressSpace, area_id id)
501 {
502 	VMAreas::ReadLock();
503 
504 	VMArea* area = VMAreas::LookupLocked(id);
505 	if (area != NULL && area->address_space != addressSpace)
506 		area = NULL;
507 
508 	VMAreas::ReadUnlock();
509 
510 	return area;
511 }
512 
513 
514 static inline size_t
515 area_page_protections_size(size_t areaSize)
516 {
517 	// In the page protections we store only the three user protections,
518 	// so we use 4 bits per page.
519 	return (areaSize / B_PAGE_SIZE + 1) / 2;
520 }
521 
522 
523 static status_t
524 allocate_area_page_protections(VMArea* area)
525 {
526 	size_t bytes = area_page_protections_size(area->Size());
527 	area->page_protections = (uint8*)malloc_etc(bytes,
528 		area->address_space == VMAddressSpace::Kernel()
529 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
530 	if (area->page_protections == NULL)
531 		return B_NO_MEMORY;
532 
533 	// init the page protections for all pages to that of the area
534 	uint32 areaProtection = area->protection
535 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
536 	memset(area->page_protections, areaProtection | (areaProtection << 4), bytes);
537 
538 	// clear protections from the area
539 	area->protection &= ~(B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA
540 		| B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA | B_KERNEL_EXECUTE_AREA);
541 	return B_OK;
542 }
543 
544 
545 static inline void
546 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
547 {
548 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
549 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
550 	uint8& entry = area->page_protections[pageIndex / 2];
551 	if (pageIndex % 2 == 0)
552 		entry = (entry & 0xf0) | protection;
553 	else
554 		entry = (entry & 0x0f) | (protection << 4);
555 }
556 
557 
558 static inline uint32
559 get_area_page_protection(VMArea* area, addr_t pageAddress)
560 {
561 	if (area->page_protections == NULL)
562 		return area->protection;
563 
564 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
565 	uint32 protection = area->page_protections[pageIndex / 2];
566 	if (pageIndex % 2 == 0)
567 		protection &= 0x0f;
568 	else
569 		protection >>= 4;
570 
571 	uint32 kernelProtection = 0;
572 	if ((protection & B_READ_AREA) != 0)
573 		kernelProtection |= B_KERNEL_READ_AREA;
574 	if ((protection & B_WRITE_AREA) != 0)
575 		kernelProtection |= B_KERNEL_WRITE_AREA;
576 
577 	// If this is a kernel area we return only the kernel flags.
578 	if (area->address_space == VMAddressSpace::Kernel())
579 		return kernelProtection;
580 
581 	return protection | kernelProtection;
582 }
583 
584 
585 static inline uint8*
586 realloc_page_protections(uint8* pageProtections, size_t areaSize,
587 	uint32 allocationFlags)
588 {
589 	size_t bytes = area_page_protections_size(areaSize);
590 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
591 }
592 
593 
594 /*!	The caller must have reserved enough pages the translation map
595 	implementation might need to map this page.
596 	The page's cache must be locked.
597 */
598 static status_t
599 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
600 	vm_page_reservation* reservation)
601 {
602 	VMTranslationMap* map = area->address_space->TranslationMap();
603 
604 	bool wasMapped = page->IsMapped();
605 
606 	if (area->wiring == B_NO_LOCK) {
607 		DEBUG_PAGE_ACCESS_CHECK(page);
608 
609 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
610 		vm_page_mapping* mapping = allocate_page_mapping(page->physical_page_number,
611 			CACHE_DONT_WAIT_FOR_MEMORY
612 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
613 		if (mapping == NULL)
614 			return B_NO_MEMORY;
615 
616 		mapping->page = page;
617 		mapping->area = area;
618 
619 		map->Lock();
620 
621 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
622 			area->MemoryType(), reservation);
623 
624 		// insert mapping into lists
625 		if (!page->IsMapped())
626 			atomic_add(&gMappedPagesCount, 1);
627 
628 		page->mappings.Add(mapping);
629 		area->mappings.Add(mapping);
630 
631 		map->Unlock();
632 	} else {
633 		DEBUG_PAGE_ACCESS_CHECK(page);
634 
635 		map->Lock();
636 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
637 			area->MemoryType(), reservation);
638 		map->Unlock();
639 
640 		increment_page_wired_count(page);
641 	}
642 
643 	if (!wasMapped) {
644 		// The page is mapped now, so we must not remain in the cached queue.
645 		// It also makes sense to move it from the inactive to the active, since
646 		// otherwise the page daemon wouldn't come to keep track of it (in idle
647 		// mode) -- if the page isn't touched, it will be deactivated after a
648 		// full iteration through the queue at the latest.
649 		if (page->State() == PAGE_STATE_CACHED
650 				|| page->State() == PAGE_STATE_INACTIVE) {
651 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
652 		}
653 	}
654 
655 	return B_OK;
656 }
657 
658 
659 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
660 	page's cache.
661 */
662 static inline bool
663 unmap_page(VMArea* area, addr_t virtualAddress)
664 {
665 	return area->address_space->TranslationMap()->UnmapPage(area,
666 		virtualAddress, true);
667 }
668 
669 
670 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
671 	mapped pages' caches.
672 */
673 static inline void
674 unmap_pages(VMArea* area, addr_t base, size_t size)
675 {
676 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
677 }
678 
679 
680 static inline bool
681 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
682 {
683 	if (address < area->Base()) {
684 		offset = area->Base() - address;
685 		if (offset >= size)
686 			return false;
687 
688 		address = area->Base();
689 		size -= offset;
690 		offset = 0;
691 		if (size > area->Size())
692 			size = area->Size();
693 
694 		return true;
695 	}
696 
697 	offset = address - area->Base();
698 	if (offset >= area->Size())
699 		return false;
700 
701 	if (size >= area->Size() - offset)
702 		size = area->Size() - offset;
703 
704 	return true;
705 }
706 
707 
708 /*!	Cuts a piece out of an area. If the given cut range covers the complete
709 	area, it is deleted. If it covers the beginning or the end, the area is
710 	resized accordingly. If the range covers some part in the middle of the
711 	area, it is split in two; in this case the second area is returned via
712 	\a _secondArea (the variable is left untouched in the other cases).
713 	The address space must be write locked.
714 	The caller must ensure that no part of the given range is wired.
715 */
716 static status_t
717 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
718 	addr_t size, VMArea** _secondArea, bool kernel)
719 {
720 	addr_t offset;
721 	if (!intersect_area(area, address, size, offset))
722 		return B_OK;
723 
724 	// Is the area fully covered?
725 	if (address == area->Base() && size == area->Size()) {
726 		delete_area(addressSpace, area, false);
727 		return B_OK;
728 	}
729 
730 	int priority;
731 	uint32 allocationFlags;
732 	if (addressSpace == VMAddressSpace::Kernel()) {
733 		priority = VM_PRIORITY_SYSTEM;
734 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
735 			| HEAP_DONT_LOCK_KERNEL_SPACE;
736 	} else {
737 		priority = VM_PRIORITY_USER;
738 		allocationFlags = 0;
739 	}
740 
741 	VMCache* cache = vm_area_get_locked_cache(area);
742 	VMCacheChainLocker cacheChainLocker(cache);
743 	cacheChainLocker.LockAllSourceCaches();
744 
745 	// If no one else uses the area's cache and it's an anonymous cache, we can
746 	// resize or split it, too.
747 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
748 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
749 
750 	const addr_t oldSize = area->Size();
751 
752 	// Cut the end only?
753 	if (offset > 0 && size == area->Size() - offset) {
754 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
755 			allocationFlags);
756 		if (error != B_OK)
757 			return error;
758 
759 		if (area->page_protections != NULL) {
760 			uint8* newProtections = realloc_page_protections(
761 				area->page_protections, area->Size(), allocationFlags);
762 
763 			if (newProtections == NULL) {
764 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
765 				return B_NO_MEMORY;
766 			}
767 
768 			area->page_protections = newProtections;
769 		}
770 
771 		// unmap pages
772 		unmap_pages(area, address, size);
773 
774 		if (onlyCacheUser) {
775 			// Since VMCache::Resize() can temporarily drop the lock, we must
776 			// unlock all lower caches to prevent locking order inversion.
777 			cacheChainLocker.Unlock(cache);
778 			cache->Resize(cache->virtual_base + offset, priority);
779 			cache->ReleaseRefAndUnlock();
780 		}
781 
782 		return B_OK;
783 	}
784 
785 	// Cut the beginning only?
786 	if (area->Base() == address) {
787 		uint8* newProtections = NULL;
788 		if (area->page_protections != NULL) {
789 			// Allocate all memory before shifting as the shift might lose some
790 			// bits.
791 			newProtections = realloc_page_protections(NULL, area->Size(),
792 				allocationFlags);
793 
794 			if (newProtections == NULL)
795 				return B_NO_MEMORY;
796 		}
797 
798 		// resize the area
799 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
800 			allocationFlags);
801 		if (error != B_OK) {
802 			if (newProtections != NULL)
803 				free_etc(newProtections, allocationFlags);
804 			return error;
805 		}
806 
807 		if (area->page_protections != NULL) {
808 			size_t oldBytes = area_page_protections_size(oldSize);
809 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
810 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
811 
812 			size_t bytes = area_page_protections_size(area->Size());
813 			memcpy(newProtections, area->page_protections, bytes);
814 			free_etc(area->page_protections, allocationFlags);
815 			area->page_protections = newProtections;
816 		}
817 
818 		// unmap pages
819 		unmap_pages(area, address, size);
820 
821 		if (onlyCacheUser) {
822 			// Since VMCache::Rebase() can temporarily drop the lock, we must
823 			// unlock all lower caches to prevent locking order inversion.
824 			cacheChainLocker.Unlock(cache);
825 			cache->Rebase(cache->virtual_base + size, priority);
826 			cache->ReleaseRefAndUnlock();
827 		}
828 		area->cache_offset += size;
829 
830 		return B_OK;
831 	}
832 
833 	// The tough part -- cut a piece out of the middle of the area.
834 	// We do that by shrinking the area to the begin section and creating a
835 	// new area for the end section.
836 	addr_t firstNewSize = offset;
837 	addr_t secondBase = address + size;
838 	addr_t secondSize = area->Size() - offset - size;
839 
840 	// unmap pages
841 	unmap_pages(area, address, area->Size() - firstNewSize);
842 
843 	// resize the area
844 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
845 		allocationFlags);
846 	if (error != B_OK)
847 		return error;
848 
849 	uint8* areaNewProtections = NULL;
850 	uint8* secondAreaNewProtections = NULL;
851 
852 	// Try to allocate the new memory before making some hard to reverse
853 	// changes.
854 	if (area->page_protections != NULL) {
855 		areaNewProtections = realloc_page_protections(NULL, area->Size(),
856 			allocationFlags);
857 		secondAreaNewProtections = realloc_page_protections(NULL, secondSize,
858 			allocationFlags);
859 
860 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
861 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
862 			free_etc(areaNewProtections, allocationFlags);
863 			free_etc(secondAreaNewProtections, allocationFlags);
864 			return B_NO_MEMORY;
865 		}
866 	}
867 
868 	virtual_address_restrictions addressRestrictions = {};
869 	addressRestrictions.address = (void*)secondBase;
870 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
871 	VMArea* secondArea;
872 
873 	if (onlyCacheUser) {
874 		// Create a new cache for the second area.
875 		VMCache* secondCache;
876 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
877 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
878 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
879 		if (error != B_OK) {
880 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
881 			free_etc(areaNewProtections, allocationFlags);
882 			free_etc(secondAreaNewProtections, allocationFlags);
883 			return error;
884 		}
885 
886 		secondCache->Lock();
887 		secondCache->temporary = cache->temporary;
888 		secondCache->virtual_base = area->cache_offset;
889 		secondCache->virtual_end = area->cache_offset + secondSize;
890 
891 		// Transfer the concerned pages from the first cache.
892 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
893 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
894 			area->cache_offset);
895 
896 		if (error == B_OK) {
897 			// Since VMCache::Resize() can temporarily drop the lock, we must
898 			// unlock all lower caches to prevent locking order inversion.
899 			cacheChainLocker.Unlock(cache);
900 			cache->Resize(cache->virtual_base + firstNewSize, priority);
901 			// Don't unlock the cache yet because we might have to resize it
902 			// back.
903 
904 			// Map the second area.
905 			error = map_backing_store(addressSpace, secondCache,
906 				area->cache_offset, area->name, secondSize, area->wiring,
907 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
908 				&addressRestrictions, kernel, &secondArea, NULL);
909 		}
910 
911 		if (error != B_OK) {
912 			// Restore the original cache.
913 			cache->Resize(cache->virtual_base + oldSize, priority);
914 
915 			// Move the pages back.
916 			status_t readoptStatus = cache->Adopt(secondCache,
917 				area->cache_offset, secondSize, adoptOffset);
918 			if (readoptStatus != B_OK) {
919 				// Some (swap) pages have not been moved back and will be lost
920 				// once the second cache is deleted.
921 				panic("failed to restore cache range: %s",
922 					strerror(readoptStatus));
923 
924 				// TODO: Handle out of memory cases by freeing memory and
925 				// retrying.
926 			}
927 
928 			cache->ReleaseRefAndUnlock();
929 			secondCache->ReleaseRefAndUnlock();
930 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
931 			free_etc(areaNewProtections, allocationFlags);
932 			free_etc(secondAreaNewProtections, allocationFlags);
933 			return error;
934 		}
935 
936 		// Now we can unlock it.
937 		cache->ReleaseRefAndUnlock();
938 		secondCache->Unlock();
939 	} else {
940 		error = map_backing_store(addressSpace, cache, area->cache_offset
941 			+ (secondBase - area->Base()),
942 			area->name, secondSize, area->wiring, area->protection,
943 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
944 			&addressRestrictions, kernel, &secondArea, NULL);
945 		if (error != B_OK) {
946 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
947 			free_etc(areaNewProtections, allocationFlags);
948 			free_etc(secondAreaNewProtections, allocationFlags);
949 			return error;
950 		}
951 		// We need a cache reference for the new area.
952 		cache->AcquireRefLocked();
953 	}
954 
955 	if (area->page_protections != NULL) {
956 		// Copy the protection bits of the first area.
957 		size_t areaBytes = area_page_protections_size(area->Size());
958 		memcpy(areaNewProtections, area->page_protections, areaBytes);
959 		uint8* areaOldProtections = area->page_protections;
960 		area->page_protections = areaNewProtections;
961 
962 		// Shift the protection bits of the second area to the start of
963 		// the old array.
964 		size_t oldBytes = area_page_protections_size(oldSize);
965 		addr_t secondAreaOffset = secondBase - area->Base();
966 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
967 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
968 
969 		// Copy the protection bits of the second area.
970 		size_t secondAreaBytes = area_page_protections_size(secondSize);
971 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
972 		secondArea->page_protections = secondAreaNewProtections;
973 
974 		// We don't need this anymore.
975 		free_etc(areaOldProtections, allocationFlags);
976 
977 		// Set the correct page protections for the second area.
978 		VMTranslationMap* map = addressSpace->TranslationMap();
979 		map->Lock();
980 		for (VMCachePagesTree::Iterator it
981 				= secondArea->cache->pages.GetIterator();
982 				vm_page* page = it.Next();) {
983 			if (is_page_in_area(secondArea, page)) {
984 				addr_t address = virtual_page_address(secondArea, page);
985 				uint32 pageProtection
986 					= get_area_page_protection(secondArea, address);
987 				map->ProtectPage(secondArea, address, pageProtection);
988 			}
989 		}
990 		map->Unlock();
991 	}
992 
993 	if (_secondArea != NULL)
994 		*_secondArea = secondArea;
995 
996 	return B_OK;
997 }
998 
999 
1000 /*!	Deletes or cuts all areas in the given address range.
1001 	The address space must be write-locked.
1002 	The caller must ensure that no part of the given range is wired.
1003 */
1004 static status_t
1005 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1006 	bool kernel)
1007 {
1008 	size = PAGE_ALIGN(size);
1009 
1010 	// Check, whether the caller is allowed to modify the concerned areas.
1011 	if (!kernel) {
1012 		for (VMAddressSpace::AreaRangeIterator it
1013 				= addressSpace->GetAreaRangeIterator(address, size);
1014 			VMArea* area = it.Next();) {
1015 
1016 			if ((area->protection & B_KERNEL_AREA) != 0) {
1017 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
1018 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
1019 					team_get_current_team_id(), area->id, area->name);
1020 				return B_NOT_ALLOWED;
1021 			}
1022 		}
1023 	}
1024 
1025 	for (VMAddressSpace::AreaRangeIterator it
1026 			= addressSpace->GetAreaRangeIterator(address, size);
1027 		VMArea* area = it.Next();) {
1028 
1029 		status_t error = cut_area(addressSpace, area, address, size, NULL,
1030 			kernel);
1031 		if (error != B_OK)
1032 			return error;
1033 			// Failing after already messing with areas is ugly, but we
1034 			// can't do anything about it.
1035 	}
1036 
1037 	return B_OK;
1038 }
1039 
1040 
1041 static status_t
1042 discard_area_range(VMArea* area, addr_t address, addr_t size)
1043 {
1044 	addr_t offset;
1045 	if (!intersect_area(area, address, size, offset))
1046 		return B_OK;
1047 
1048 	// If someone else uses the area's cache or it's not an anonymous cache, we
1049 	// can't discard.
1050 	VMCache* cache = vm_area_get_locked_cache(area);
1051 	if (cache->areas != area || area->cache_next != NULL
1052 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1053 		return B_OK;
1054 	}
1055 
1056 	VMCacheChainLocker cacheChainLocker(cache);
1057 	cacheChainLocker.LockAllSourceCaches();
1058 
1059 	unmap_pages(area, address, size);
1060 
1061 	// Since VMCache::Discard() can temporarily drop the lock, we must
1062 	// unlock all lower caches to prevent locking order inversion.
1063 	cacheChainLocker.Unlock(cache);
1064 	cache->Discard(cache->virtual_base + offset, size);
1065 	cache->ReleaseRefAndUnlock();
1066 
1067 	return B_OK;
1068 }
1069 
1070 
1071 static status_t
1072 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1073 	bool kernel)
1074 {
1075 	for (VMAddressSpace::AreaRangeIterator it
1076 		= addressSpace->GetAreaRangeIterator(address, size);
1077 			VMArea* area = it.Next();) {
1078 		status_t error = discard_area_range(area, address, size);
1079 		if (error != B_OK)
1080 			return error;
1081 	}
1082 
1083 	return B_OK;
1084 }
1085 
1086 
1087 /*! You need to hold the lock of the cache and the write lock of the address
1088 	space when calling this function.
1089 	Note, that in case of error your cache will be temporarily unlocked.
1090 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1091 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1092 	that no part of the specified address range (base \c *_virtualAddress, size
1093 	\a size) is wired. The cache will also be temporarily unlocked.
1094 */
1095 static status_t
1096 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1097 	const char* areaName, addr_t size, int wiring, int protection,
1098 	int protectionMax, int mapping,
1099 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1100 	bool kernel, VMArea** _area, void** _virtualAddress)
1101 {
1102 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1103 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1104 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1105 		addressSpace, cache, addressRestrictions->address, offset, size,
1106 		addressRestrictions->address_specification, wiring, protection,
1107 		protectionMax, _area, areaName));
1108 	cache->AssertLocked();
1109 
1110 	if (size == 0) {
1111 #if KDEBUG
1112 		panic("map_backing_store(): called with size=0 for area '%s'!",
1113 			areaName);
1114 #endif
1115 		return B_BAD_VALUE;
1116 	}
1117 	if (offset < 0)
1118 		return B_BAD_VALUE;
1119 
1120 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1121 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1122 	int priority;
1123 	if (addressSpace != VMAddressSpace::Kernel()) {
1124 		priority = VM_PRIORITY_USER;
1125 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1126 		priority = VM_PRIORITY_VIP;
1127 		allocationFlags |= HEAP_PRIORITY_VIP;
1128 	} else
1129 		priority = VM_PRIORITY_SYSTEM;
1130 
1131 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1132 		allocationFlags);
1133 	if (mapping != REGION_PRIVATE_MAP)
1134 		area->protection_max = protectionMax & B_USER_PROTECTION;
1135 	if (area == NULL)
1136 		return B_NO_MEMORY;
1137 
1138 	status_t status;
1139 
1140 	// if this is a private map, we need to create a new cache
1141 	// to handle the private copies of pages as they are written to
1142 	VMCache* sourceCache = cache;
1143 	if (mapping == REGION_PRIVATE_MAP) {
1144 		VMCache* newCache;
1145 
1146 		// create an anonymous cache
1147 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1148 			(protection & B_STACK_AREA) != 0
1149 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1150 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1151 		if (status != B_OK)
1152 			goto err1;
1153 
1154 		newCache->Lock();
1155 		newCache->temporary = 1;
1156 		newCache->virtual_base = offset;
1157 		newCache->virtual_end = offset + size;
1158 
1159 		cache->AddConsumer(newCache);
1160 
1161 		cache = newCache;
1162 	}
1163 
1164 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1165 		status = cache->SetMinimalCommitment(size, priority);
1166 		if (status != B_OK)
1167 			goto err2;
1168 	}
1169 
1170 	// check to see if this address space has entered DELETE state
1171 	if (addressSpace->IsBeingDeleted()) {
1172 		// okay, someone is trying to delete this address space now, so we can't
1173 		// insert the area, so back out
1174 		status = B_BAD_TEAM_ID;
1175 		goto err2;
1176 	}
1177 
1178 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1179 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1180 		// temporarily unlock the current cache since it might be mapped to
1181 		// some existing area, and unmap_address_range also needs to lock that
1182 		// cache to delete the area.
1183 		cache->Unlock();
1184 		status = unmap_address_range(addressSpace,
1185 			(addr_t)addressRestrictions->address, size, kernel);
1186 		cache->Lock();
1187 		if (status != B_OK)
1188 			goto err2;
1189 	}
1190 
1191 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1192 		allocationFlags, _virtualAddress);
1193 	if (status == B_NO_MEMORY
1194 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1195 		// Due to how many locks are held, we cannot wait here for space to be
1196 		// freed up, but we can at least notify the low_resource handler.
1197 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1198 	}
1199 	if (status != B_OK)
1200 		goto err2;
1201 
1202 	// attach the cache to the area
1203 	area->cache = cache;
1204 	area->cache_offset = offset;
1205 
1206 	// point the cache back to the area
1207 	cache->InsertAreaLocked(area);
1208 	if (mapping == REGION_PRIVATE_MAP)
1209 		cache->Unlock();
1210 
1211 	// insert the area in the global areas map
1212 	status = VMAreas::Insert(area);
1213 	if (status != B_OK)
1214 		goto err3;
1215 
1216 	// grab a ref to the address space (the area holds this)
1217 	addressSpace->Get();
1218 
1219 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1220 //		cache, sourceCache, areaName, area);
1221 
1222 	*_area = area;
1223 	return B_OK;
1224 
1225 err3:
1226 	cache->Lock();
1227 	cache->RemoveArea(area);
1228 	area->cache = NULL;
1229 err2:
1230 	if (mapping == REGION_PRIVATE_MAP) {
1231 		// We created this cache, so we must delete it again. Note, that we
1232 		// need to temporarily unlock the source cache or we'll otherwise
1233 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1234 		sourceCache->Unlock();
1235 		cache->ReleaseRefAndUnlock();
1236 		sourceCache->Lock();
1237 	}
1238 err1:
1239 	addressSpace->DeleteArea(area, allocationFlags);
1240 	return status;
1241 }
1242 
1243 
1244 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1245 	  locker1, locker2).
1246 */
1247 template<typename LockerType1, typename LockerType2>
1248 static inline bool
1249 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1250 {
1251 	area->cache->AssertLocked();
1252 
1253 	VMAreaUnwiredWaiter waiter;
1254 	if (!area->AddWaiterIfWired(&waiter))
1255 		return false;
1256 
1257 	// unlock everything and wait
1258 	if (locker1 != NULL)
1259 		locker1->Unlock();
1260 	if (locker2 != NULL)
1261 		locker2->Unlock();
1262 
1263 	waiter.waitEntry.Wait();
1264 
1265 	return true;
1266 }
1267 
1268 
1269 /*!	Checks whether the given area has any wired ranges intersecting with the
1270 	specified range and waits, if so.
1271 
1272 	When it has to wait, the function calls \c Unlock() on both \a locker1
1273 	and \a locker2, if given.
1274 	The area's top cache must be locked and must be unlocked as a side effect
1275 	of calling \c Unlock() on either \a locker1 or \a locker2.
1276 
1277 	If the function does not have to wait it does not modify or unlock any
1278 	object.
1279 
1280 	\param area The area to be checked.
1281 	\param base The base address of the range to check.
1282 	\param size The size of the address range to check.
1283 	\param locker1 An object to be unlocked when before starting to wait (may
1284 		be \c NULL).
1285 	\param locker2 An object to be unlocked when before starting to wait (may
1286 		be \c NULL).
1287 	\return \c true, if the function had to wait, \c false otherwise.
1288 */
1289 template<typename LockerType1, typename LockerType2>
1290 static inline bool
1291 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1292 	LockerType1* locker1, LockerType2* locker2)
1293 {
1294 	area->cache->AssertLocked();
1295 
1296 	VMAreaUnwiredWaiter waiter;
1297 	if (!area->AddWaiterIfWired(&waiter, base, size))
1298 		return false;
1299 
1300 	// unlock everything and wait
1301 	if (locker1 != NULL)
1302 		locker1->Unlock();
1303 	if (locker2 != NULL)
1304 		locker2->Unlock();
1305 
1306 	waiter.waitEntry.Wait();
1307 
1308 	return true;
1309 }
1310 
1311 
1312 /*!	Checks whether the given address space has any wired ranges intersecting
1313 	with the specified range and waits, if so.
1314 
1315 	Similar to wait_if_area_range_is_wired(), with the following differences:
1316 	- All areas intersecting with the range are checked (respectively all until
1317 	  one is found that contains a wired range intersecting with the given
1318 	  range).
1319 	- The given address space must at least be read-locked and must be unlocked
1320 	  when \c Unlock() is called on \a locker.
1321 	- None of the areas' caches are allowed to be locked.
1322 */
1323 template<typename LockerType>
1324 static inline bool
1325 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1326 	size_t size, LockerType* locker)
1327 {
1328 	for (VMAddressSpace::AreaRangeIterator it
1329 		= addressSpace->GetAreaRangeIterator(base, size);
1330 			VMArea* area = it.Next();) {
1331 
1332 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1333 
1334 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1335 			return true;
1336 	}
1337 
1338 	return false;
1339 }
1340 
1341 
1342 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1343 	It must be called in a situation where the kernel address space may be
1344 	locked.
1345 */
1346 status_t
1347 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1348 {
1349 	AddressSpaceReadLocker locker;
1350 	VMArea* area;
1351 	status_t status = locker.SetFromArea(id, area);
1352 	if (status != B_OK)
1353 		return status;
1354 
1355 	if (area->page_protections == NULL) {
1356 		status = allocate_area_page_protections(area);
1357 		if (status != B_OK)
1358 			return status;
1359 	}
1360 
1361 	*cookie = (void*)area;
1362 	return B_OK;
1363 }
1364 
1365 
1366 /*!	This is a debug helper function that can only be used with very specific
1367 	use cases.
1368 	Sets protection for the given address range to the protection specified.
1369 	If \a protection is 0 then the involved pages will be marked non-present
1370 	in the translation map to cause a fault on access. The pages aren't
1371 	actually unmapped however so that they can be marked present again with
1372 	additional calls to this function. For this to work the area must be
1373 	fully locked in memory so that the pages aren't otherwise touched.
1374 	This function does not lock the kernel address space and needs to be
1375 	supplied with a \a cookie retrieved from a successful call to
1376 	vm_prepare_kernel_area_debug_protection().
1377 */
1378 status_t
1379 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1380 	uint32 protection)
1381 {
1382 	// check address range
1383 	addr_t address = (addr_t)_address;
1384 	size = PAGE_ALIGN(size);
1385 
1386 	if ((address % B_PAGE_SIZE) != 0
1387 		|| (addr_t)address + size < (addr_t)address
1388 		|| !IS_KERNEL_ADDRESS(address)
1389 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1390 		return B_BAD_VALUE;
1391 	}
1392 
1393 	// Translate the kernel protection to user protection as we only store that.
1394 	if ((protection & B_KERNEL_READ_AREA) != 0)
1395 		protection |= B_READ_AREA;
1396 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1397 		protection |= B_WRITE_AREA;
1398 
1399 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1400 	VMTranslationMap* map = addressSpace->TranslationMap();
1401 	VMArea* area = (VMArea*)cookie;
1402 
1403 	addr_t offset = address - area->Base();
1404 	if (area->Size() - offset < size) {
1405 		panic("protect range not fully within supplied area");
1406 		return B_BAD_VALUE;
1407 	}
1408 
1409 	if (area->page_protections == NULL) {
1410 		panic("area has no page protections");
1411 		return B_BAD_VALUE;
1412 	}
1413 
1414 	// Invalidate the mapping entries so any access to them will fault or
1415 	// restore the mapping entries unchanged so that lookup will success again.
1416 	map->Lock();
1417 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1418 	map->Unlock();
1419 
1420 	// And set the proper page protections so that the fault case will actually
1421 	// fail and not simply try to map a new page.
1422 	for (addr_t pageAddress = address; pageAddress < address + size;
1423 			pageAddress += B_PAGE_SIZE) {
1424 		set_area_page_protection(area, pageAddress, protection);
1425 	}
1426 
1427 	return B_OK;
1428 }
1429 
1430 
1431 status_t
1432 vm_block_address_range(const char* name, void* address, addr_t size)
1433 {
1434 	if (!arch_vm_supports_protection(0))
1435 		return B_NOT_SUPPORTED;
1436 
1437 	AddressSpaceWriteLocker locker;
1438 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1439 	if (status != B_OK)
1440 		return status;
1441 
1442 	VMAddressSpace* addressSpace = locker.AddressSpace();
1443 
1444 	// create an anonymous cache
1445 	VMCache* cache;
1446 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1447 		VM_PRIORITY_SYSTEM);
1448 	if (status != B_OK)
1449 		return status;
1450 
1451 	cache->temporary = 1;
1452 	cache->virtual_end = size;
1453 	cache->Lock();
1454 
1455 	VMArea* area;
1456 	virtual_address_restrictions addressRestrictions = {};
1457 	addressRestrictions.address = address;
1458 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1459 	status = map_backing_store(addressSpace, cache, 0, name, size,
1460 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1461 		true, &area, NULL);
1462 	if (status != B_OK) {
1463 		cache->ReleaseRefAndUnlock();
1464 		return status;
1465 	}
1466 
1467 	cache->Unlock();
1468 	area->cache_type = CACHE_TYPE_RAM;
1469 	return area->id;
1470 }
1471 
1472 
1473 status_t
1474 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1475 {
1476 	AddressSpaceWriteLocker locker(team);
1477 	if (!locker.IsLocked())
1478 		return B_BAD_TEAM_ID;
1479 
1480 	VMAddressSpace* addressSpace = locker.AddressSpace();
1481 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1482 		addressSpace == VMAddressSpace::Kernel()
1483 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1484 }
1485 
1486 
1487 status_t
1488 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1489 	addr_t size, uint32 flags)
1490 {
1491 	if (size == 0)
1492 		return B_BAD_VALUE;
1493 
1494 	AddressSpaceWriteLocker locker(team);
1495 	if (!locker.IsLocked())
1496 		return B_BAD_TEAM_ID;
1497 
1498 	virtual_address_restrictions addressRestrictions = {};
1499 	addressRestrictions.address = *_address;
1500 	addressRestrictions.address_specification = addressSpec;
1501 	VMAddressSpace* addressSpace = locker.AddressSpace();
1502 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1503 		addressSpace == VMAddressSpace::Kernel()
1504 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1505 		_address);
1506 }
1507 
1508 
1509 area_id
1510 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1511 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1512 	const virtual_address_restrictions* virtualAddressRestrictions,
1513 	const physical_address_restrictions* physicalAddressRestrictions,
1514 	bool kernel, void** _address)
1515 {
1516 	VMArea* area;
1517 	VMCache* cache;
1518 	vm_page* page = NULL;
1519 	bool isStack = (protection & B_STACK_AREA) != 0;
1520 	page_num_t guardPages;
1521 	bool canOvercommit = false;
1522 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1523 		? VM_PAGE_ALLOC_CLEAR : 0;
1524 
1525 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1526 		team, name, size));
1527 
1528 	size = PAGE_ALIGN(size);
1529 	guardSize = PAGE_ALIGN(guardSize);
1530 	guardPages = guardSize / B_PAGE_SIZE;
1531 
1532 	if (size == 0 || size < guardSize)
1533 		return B_BAD_VALUE;
1534 	if (!arch_vm_supports_protection(protection))
1535 		return B_NOT_SUPPORTED;
1536 
1537 	if (team == B_CURRENT_TEAM)
1538 		team = VMAddressSpace::CurrentID();
1539 	if (team < 0)
1540 		return B_BAD_TEAM_ID;
1541 
1542 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1543 		canOvercommit = true;
1544 
1545 #ifdef DEBUG_KERNEL_STACKS
1546 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1547 		isStack = true;
1548 #endif
1549 
1550 	// check parameters
1551 	switch (virtualAddressRestrictions->address_specification) {
1552 		case B_ANY_ADDRESS:
1553 		case B_EXACT_ADDRESS:
1554 		case B_BASE_ADDRESS:
1555 		case B_ANY_KERNEL_ADDRESS:
1556 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1557 		case B_RANDOMIZED_ANY_ADDRESS:
1558 		case B_RANDOMIZED_BASE_ADDRESS:
1559 			break;
1560 
1561 		default:
1562 			return B_BAD_VALUE;
1563 	}
1564 
1565 	// If low or high physical address restrictions are given, we force
1566 	// B_CONTIGUOUS wiring, since only then we'll use
1567 	// vm_page_allocate_page_run() which deals with those restrictions.
1568 	if (physicalAddressRestrictions->low_address != 0
1569 		|| physicalAddressRestrictions->high_address != 0) {
1570 		wiring = B_CONTIGUOUS;
1571 	}
1572 
1573 	physical_address_restrictions stackPhysicalRestrictions;
1574 	bool doReserveMemory = false;
1575 	switch (wiring) {
1576 		case B_NO_LOCK:
1577 			break;
1578 		case B_FULL_LOCK:
1579 		case B_LAZY_LOCK:
1580 		case B_CONTIGUOUS:
1581 			doReserveMemory = true;
1582 			break;
1583 		case B_ALREADY_WIRED:
1584 			break;
1585 		case B_LOMEM:
1586 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1587 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1588 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1589 			wiring = B_CONTIGUOUS;
1590 			doReserveMemory = true;
1591 			break;
1592 		case B_32_BIT_FULL_LOCK:
1593 			if (B_HAIKU_PHYSICAL_BITS <= 32
1594 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1595 				wiring = B_FULL_LOCK;
1596 				doReserveMemory = true;
1597 				break;
1598 			}
1599 			// TODO: We don't really support this mode efficiently. Just fall
1600 			// through for now ...
1601 		case B_32_BIT_CONTIGUOUS:
1602 			#if B_HAIKU_PHYSICAL_BITS > 32
1603 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1604 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1605 					stackPhysicalRestrictions.high_address
1606 						= (phys_addr_t)1 << 32;
1607 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1608 				}
1609 			#endif
1610 			wiring = B_CONTIGUOUS;
1611 			doReserveMemory = true;
1612 			break;
1613 		default:
1614 			return B_BAD_VALUE;
1615 	}
1616 
1617 	// Optimization: For a single-page contiguous allocation without low/high
1618 	// memory restriction B_FULL_LOCK wiring suffices.
1619 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1620 		&& physicalAddressRestrictions->low_address == 0
1621 		&& physicalAddressRestrictions->high_address == 0) {
1622 		wiring = B_FULL_LOCK;
1623 	}
1624 
1625 	// For full lock or contiguous areas we're also going to map the pages and
1626 	// thus need to reserve pages for the mapping backend upfront.
1627 	addr_t reservedMapPages = 0;
1628 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1629 		AddressSpaceWriteLocker locker;
1630 		status_t status = locker.SetTo(team);
1631 		if (status != B_OK)
1632 			return status;
1633 
1634 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1635 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1636 	}
1637 
1638 	int priority;
1639 	if (team != VMAddressSpace::KernelID())
1640 		priority = VM_PRIORITY_USER;
1641 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1642 		priority = VM_PRIORITY_VIP;
1643 	else
1644 		priority = VM_PRIORITY_SYSTEM;
1645 
1646 	// Reserve memory before acquiring the address space lock. This reduces the
1647 	// chances of failure, since while holding the write lock to the address
1648 	// space (if it is the kernel address space that is), the low memory handler
1649 	// won't be able to free anything for us.
1650 	addr_t reservedMemory = 0;
1651 	if (doReserveMemory) {
1652 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1653 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1654 			return B_NO_MEMORY;
1655 		reservedMemory = size;
1656 		// TODO: We don't reserve the memory for the pages for the page
1657 		// directories/tables. We actually need to do since we currently don't
1658 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1659 		// there are actually less physical pages than there should be, which
1660 		// can get the VM into trouble in low memory situations.
1661 	}
1662 
1663 	AddressSpaceWriteLocker locker;
1664 	VMAddressSpace* addressSpace;
1665 	status_t status;
1666 
1667 	// For full lock areas reserve the pages before locking the address
1668 	// space. E.g. block caches can't release their memory while we hold the
1669 	// address space lock.
1670 	page_num_t reservedPages = reservedMapPages;
1671 	if (wiring == B_FULL_LOCK)
1672 		reservedPages += size / B_PAGE_SIZE;
1673 
1674 	vm_page_reservation reservation;
1675 	if (reservedPages > 0) {
1676 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1677 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1678 					priority)) {
1679 				reservedPages = 0;
1680 				status = B_WOULD_BLOCK;
1681 				goto err0;
1682 			}
1683 		} else
1684 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1685 	}
1686 
1687 	if (wiring == B_CONTIGUOUS) {
1688 		// we try to allocate the page run here upfront as this may easily
1689 		// fail for obvious reasons
1690 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1691 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1692 		if (page == NULL) {
1693 			status = B_NO_MEMORY;
1694 			goto err0;
1695 		}
1696 	}
1697 
1698 	// Lock the address space and, if B_EXACT_ADDRESS and
1699 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1700 	// is not wired.
1701 	do {
1702 		status = locker.SetTo(team);
1703 		if (status != B_OK)
1704 			goto err1;
1705 
1706 		addressSpace = locker.AddressSpace();
1707 	} while (virtualAddressRestrictions->address_specification
1708 			== B_EXACT_ADDRESS
1709 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1710 		&& wait_if_address_range_is_wired(addressSpace,
1711 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1712 
1713 	// create an anonymous cache
1714 	// if it's a stack, make sure that two pages are available at least
1715 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1716 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1717 		wiring == B_NO_LOCK, priority);
1718 	if (status != B_OK)
1719 		goto err1;
1720 
1721 	cache->temporary = 1;
1722 	cache->virtual_end = size;
1723 	cache->committed_size = reservedMemory;
1724 		// TODO: This should be done via a method.
1725 	reservedMemory = 0;
1726 
1727 	cache->Lock();
1728 
1729 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1730 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1731 		virtualAddressRestrictions, kernel, &area, _address);
1732 
1733 	if (status != B_OK) {
1734 		cache->ReleaseRefAndUnlock();
1735 		goto err1;
1736 	}
1737 
1738 	locker.DegradeToReadLock();
1739 
1740 	switch (wiring) {
1741 		case B_NO_LOCK:
1742 		case B_LAZY_LOCK:
1743 			// do nothing - the pages are mapped in as needed
1744 			break;
1745 
1746 		case B_FULL_LOCK:
1747 		{
1748 			// Allocate and map all pages for this area
1749 
1750 			off_t offset = 0;
1751 			for (addr_t address = area->Base();
1752 					address < area->Base() + (area->Size() - 1);
1753 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1754 #ifdef DEBUG_KERNEL_STACKS
1755 #	ifdef STACK_GROWS_DOWNWARDS
1756 				if (isStack && address < area->Base()
1757 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1758 #	else
1759 				if (isStack && address >= area->Base() + area->Size()
1760 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1761 #	endif
1762 					continue;
1763 #endif
1764 				vm_page* page = vm_page_allocate_page(&reservation,
1765 					PAGE_STATE_WIRED | pageAllocFlags);
1766 				cache->InsertPage(page, offset);
1767 				map_page(area, page, address, protection, &reservation);
1768 
1769 				DEBUG_PAGE_ACCESS_END(page);
1770 			}
1771 
1772 			break;
1773 		}
1774 
1775 		case B_ALREADY_WIRED:
1776 		{
1777 			// The pages should already be mapped. This is only really useful
1778 			// during boot time. Find the appropriate vm_page objects and stick
1779 			// them in the cache object.
1780 			VMTranslationMap* map = addressSpace->TranslationMap();
1781 			off_t offset = 0;
1782 
1783 			if (!gKernelStartup)
1784 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1785 
1786 			map->Lock();
1787 
1788 			for (addr_t virtualAddress = area->Base();
1789 					virtualAddress < area->Base() + (area->Size() - 1);
1790 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1791 				phys_addr_t physicalAddress;
1792 				uint32 flags;
1793 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1794 				if (status < B_OK) {
1795 					panic("looking up mapping failed for va 0x%lx\n",
1796 						virtualAddress);
1797 				}
1798 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1799 				if (page == NULL) {
1800 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1801 						"\n", physicalAddress);
1802 				}
1803 
1804 				DEBUG_PAGE_ACCESS_START(page);
1805 
1806 				cache->InsertPage(page, offset);
1807 				increment_page_wired_count(page);
1808 				vm_page_set_state(page, PAGE_STATE_WIRED);
1809 				page->busy = false;
1810 
1811 				DEBUG_PAGE_ACCESS_END(page);
1812 			}
1813 
1814 			map->Unlock();
1815 			break;
1816 		}
1817 
1818 		case B_CONTIGUOUS:
1819 		{
1820 			// We have already allocated our continuous pages run, so we can now
1821 			// just map them in the address space
1822 			VMTranslationMap* map = addressSpace->TranslationMap();
1823 			phys_addr_t physicalAddress
1824 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1825 			addr_t virtualAddress = area->Base();
1826 			off_t offset = 0;
1827 
1828 			map->Lock();
1829 
1830 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1831 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1832 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1833 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1834 				if (page == NULL)
1835 					panic("couldn't lookup physical page just allocated\n");
1836 
1837 				status = map->Map(virtualAddress, physicalAddress, protection,
1838 					area->MemoryType(), &reservation);
1839 				if (status < B_OK)
1840 					panic("couldn't map physical page in page run\n");
1841 
1842 				cache->InsertPage(page, offset);
1843 				increment_page_wired_count(page);
1844 
1845 				DEBUG_PAGE_ACCESS_END(page);
1846 			}
1847 
1848 			map->Unlock();
1849 			break;
1850 		}
1851 
1852 		default:
1853 			break;
1854 	}
1855 
1856 	cache->Unlock();
1857 
1858 	if (reservedPages > 0)
1859 		vm_page_unreserve_pages(&reservation);
1860 
1861 	TRACE(("vm_create_anonymous_area: done\n"));
1862 
1863 	area->cache_type = CACHE_TYPE_RAM;
1864 	return area->id;
1865 
1866 err1:
1867 	if (wiring == B_CONTIGUOUS) {
1868 		// we had reserved the area space upfront...
1869 		phys_addr_t pageNumber = page->physical_page_number;
1870 		int32 i;
1871 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1872 			page = vm_lookup_page(pageNumber);
1873 			if (page == NULL)
1874 				panic("couldn't lookup physical page just allocated\n");
1875 
1876 			vm_page_set_state(page, PAGE_STATE_FREE);
1877 		}
1878 	}
1879 
1880 err0:
1881 	if (reservedPages > 0)
1882 		vm_page_unreserve_pages(&reservation);
1883 	if (reservedMemory > 0)
1884 		vm_unreserve_memory(reservedMemory);
1885 
1886 	return status;
1887 }
1888 
1889 
1890 area_id
1891 vm_map_physical_memory(team_id team, const char* name, void** _address,
1892 	uint32 addressSpec, addr_t size, uint32 protection,
1893 	phys_addr_t physicalAddress, bool alreadyWired)
1894 {
1895 	VMArea* area;
1896 	VMCache* cache;
1897 	addr_t mapOffset;
1898 
1899 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1900 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1901 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1902 		addressSpec, size, protection, physicalAddress));
1903 
1904 	if (!arch_vm_supports_protection(protection))
1905 		return B_NOT_SUPPORTED;
1906 
1907 	AddressSpaceWriteLocker locker(team);
1908 	if (!locker.IsLocked())
1909 		return B_BAD_TEAM_ID;
1910 
1911 	// if the physical address is somewhat inside a page,
1912 	// move the actual area down to align on a page boundary
1913 	mapOffset = physicalAddress % B_PAGE_SIZE;
1914 	size += mapOffset;
1915 	physicalAddress -= mapOffset;
1916 
1917 	size = PAGE_ALIGN(size);
1918 
1919 	// create a device cache
1920 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1921 	if (status != B_OK)
1922 		return status;
1923 
1924 	cache->virtual_end = size;
1925 
1926 	cache->Lock();
1927 
1928 	virtual_address_restrictions addressRestrictions = {};
1929 	addressRestrictions.address = *_address;
1930 	addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK;
1931 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1932 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, CREATE_AREA_DONT_COMMIT_MEMORY,
1933 		&addressRestrictions, true, &area, _address);
1934 
1935 	if (status < B_OK)
1936 		cache->ReleaseRefLocked();
1937 
1938 	cache->Unlock();
1939 
1940 	if (status == B_OK) {
1941 		// Set requested memory type -- default to uncached, but allow
1942 		// that to be overridden by ranges that may already exist.
1943 		uint32 memoryType = addressSpec & B_MEMORY_TYPE_MASK;
1944 		const bool weak = (memoryType == 0);
1945 		if (weak)
1946 			memoryType = B_UNCACHED_MEMORY;
1947 
1948 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType,
1949 			weak ? &memoryType : NULL);
1950 
1951 		area->SetMemoryType(memoryType);
1952 
1953 		if (status != B_OK)
1954 			delete_area(locker.AddressSpace(), area, false);
1955 	}
1956 
1957 	if (status != B_OK)
1958 		return status;
1959 
1960 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1961 
1962 	if (alreadyWired) {
1963 		// The area is already mapped, but possibly not with the right
1964 		// memory type.
1965 		map->Lock();
1966 		map->ProtectArea(area, area->protection);
1967 		map->Unlock();
1968 	} else {
1969 		// Map the area completely.
1970 
1971 		// reserve pages needed for the mapping
1972 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1973 			area->Base() + (size - 1));
1974 		vm_page_reservation reservation;
1975 		vm_page_reserve_pages(&reservation, reservePages,
1976 			team == VMAddressSpace::KernelID()
1977 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1978 
1979 		map->Lock();
1980 
1981 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1982 			map->Map(area->Base() + offset, physicalAddress + offset,
1983 				protection, area->MemoryType(), &reservation);
1984 		}
1985 
1986 		map->Unlock();
1987 
1988 		vm_page_unreserve_pages(&reservation);
1989 	}
1990 
1991 	// modify the pointer returned to be offset back into the new area
1992 	// the same way the physical address in was offset
1993 	*_address = (void*)((addr_t)*_address + mapOffset);
1994 
1995 	area->cache_type = CACHE_TYPE_DEVICE;
1996 	return area->id;
1997 }
1998 
1999 
2000 /*!	Don't use!
2001 	TODO: This function was introduced to map physical page vecs to
2002 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
2003 	use a device cache and does not track vm_page::wired_count!
2004 */
2005 area_id
2006 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
2007 	uint32 addressSpec, addr_t* _size, uint32 protection,
2008 	struct generic_io_vec* vecs, uint32 vecCount)
2009 {
2010 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
2011 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
2012 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
2013 		addressSpec, _size, protection, vecs, vecCount));
2014 
2015 	if (!arch_vm_supports_protection(protection)
2016 		|| (addressSpec & B_MEMORY_TYPE_MASK) != 0) {
2017 		return B_NOT_SUPPORTED;
2018 	}
2019 
2020 	AddressSpaceWriteLocker locker(team);
2021 	if (!locker.IsLocked())
2022 		return B_BAD_TEAM_ID;
2023 
2024 	if (vecCount == 0)
2025 		return B_BAD_VALUE;
2026 
2027 	addr_t size = 0;
2028 	for (uint32 i = 0; i < vecCount; i++) {
2029 		if (vecs[i].base % B_PAGE_SIZE != 0
2030 			|| vecs[i].length % B_PAGE_SIZE != 0) {
2031 			return B_BAD_VALUE;
2032 		}
2033 
2034 		size += vecs[i].length;
2035 	}
2036 
2037 	// create a device cache
2038 	VMCache* cache;
2039 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
2040 	if (result != B_OK)
2041 		return result;
2042 
2043 	cache->virtual_end = size;
2044 
2045 	cache->Lock();
2046 
2047 	VMArea* area;
2048 	virtual_address_restrictions addressRestrictions = {};
2049 	addressRestrictions.address = *_address;
2050 	addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK;
2051 	result = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2052 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, CREATE_AREA_DONT_COMMIT_MEMORY,
2053 		&addressRestrictions, true, &area, _address);
2054 
2055 	if (result != B_OK)
2056 		cache->ReleaseRefLocked();
2057 
2058 	cache->Unlock();
2059 
2060 	if (result != B_OK)
2061 		return result;
2062 
2063 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2064 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2065 		area->Base() + (size - 1));
2066 
2067 	vm_page_reservation reservation;
2068 	vm_page_reserve_pages(&reservation, reservePages,
2069 			team == VMAddressSpace::KernelID()
2070 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2071 	map->Lock();
2072 
2073 	uint32 vecIndex = 0;
2074 	size_t vecOffset = 0;
2075 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2076 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2077 			vecOffset = 0;
2078 			vecIndex++;
2079 		}
2080 
2081 		if (vecIndex >= vecCount)
2082 			break;
2083 
2084 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2085 			protection, area->MemoryType(), &reservation);
2086 
2087 		vecOffset += B_PAGE_SIZE;
2088 	}
2089 
2090 	map->Unlock();
2091 	vm_page_unreserve_pages(&reservation);
2092 
2093 	if (_size != NULL)
2094 		*_size = size;
2095 
2096 	area->cache_type = CACHE_TYPE_DEVICE;
2097 	return area->id;
2098 }
2099 
2100 
2101 area_id
2102 vm_create_null_area(team_id team, const char* name, void** address,
2103 	uint32 addressSpec, addr_t size, uint32 flags)
2104 {
2105 	size = PAGE_ALIGN(size);
2106 
2107 	// Lock the address space and, if B_EXACT_ADDRESS and
2108 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2109 	// is not wired.
2110 	AddressSpaceWriteLocker locker;
2111 	do {
2112 		if (locker.SetTo(team) != B_OK)
2113 			return B_BAD_TEAM_ID;
2114 	} while (addressSpec == B_EXACT_ADDRESS
2115 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2116 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2117 			(addr_t)*address, size, &locker));
2118 
2119 	// create a null cache
2120 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2121 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2122 	VMCache* cache;
2123 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2124 	if (status != B_OK)
2125 		return status;
2126 
2127 	cache->temporary = 1;
2128 	cache->virtual_end = size;
2129 
2130 	cache->Lock();
2131 
2132 	VMArea* area;
2133 	virtual_address_restrictions addressRestrictions = {};
2134 	addressRestrictions.address = *address;
2135 	addressRestrictions.address_specification = addressSpec;
2136 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2137 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2138 		REGION_NO_PRIVATE_MAP, flags | CREATE_AREA_DONT_COMMIT_MEMORY,
2139 		&addressRestrictions, true, &area, address);
2140 
2141 	if (status < B_OK) {
2142 		cache->ReleaseRefAndUnlock();
2143 		return status;
2144 	}
2145 
2146 	cache->Unlock();
2147 
2148 	area->cache_type = CACHE_TYPE_NULL;
2149 	return area->id;
2150 }
2151 
2152 
2153 /*!	Creates the vnode cache for the specified \a vnode.
2154 	The vnode has to be marked busy when calling this function.
2155 */
2156 status_t
2157 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2158 {
2159 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2160 }
2161 
2162 
2163 /*!	\a cache must be locked. The area's address space must be read-locked.
2164 */
2165 static void
2166 pre_map_area_pages(VMArea* area, VMCache* cache,
2167 	vm_page_reservation* reservation, int32 maxCount)
2168 {
2169 	addr_t baseAddress = area->Base();
2170 	addr_t cacheOffset = area->cache_offset;
2171 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2172 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2173 
2174 	VMCachePagesTree::Iterator it = cache->pages.GetIterator(firstPage, true, true);
2175 	vm_page* page;
2176 	while ((page = it.Next()) != NULL && maxCount > 0) {
2177 		if (page->cache_offset >= endPage)
2178 			break;
2179 
2180 		// skip busy and inactive pages
2181 		if (page->busy || (page->usage_count == 0 && !page->accessed))
2182 			continue;
2183 
2184 		DEBUG_PAGE_ACCESS_START(page);
2185 		map_page(area, page,
2186 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2187 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2188 		maxCount--;
2189 		DEBUG_PAGE_ACCESS_END(page);
2190 	}
2191 }
2192 
2193 
2194 /*!	Will map the file specified by \a fd to an area in memory.
2195 	The file will be mirrored beginning at the specified \a offset. The
2196 	\a offset and \a size arguments have to be page aligned.
2197 */
2198 static area_id
2199 _vm_map_file(team_id team, const char* name, void** _address,
2200 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2201 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2202 {
2203 	// TODO: for binary files, we want to make sure that they get the
2204 	//	copy of a file at a given time, ie. later changes should not
2205 	//	make it into the mapped copy -- this will need quite some changes
2206 	//	to be done in a nice way
2207 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2208 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2209 
2210 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2211 	size = PAGE_ALIGN(size);
2212 
2213 	if (mapping == REGION_NO_PRIVATE_MAP)
2214 		protection |= B_SHARED_AREA;
2215 	if (addressSpec != B_EXACT_ADDRESS)
2216 		unmapAddressRange = false;
2217 
2218 	uint32 mappingFlags = 0;
2219 	if (unmapAddressRange)
2220 		mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2221 
2222 	if (fd < 0) {
2223 		virtual_address_restrictions virtualRestrictions = {};
2224 		virtualRestrictions.address = *_address;
2225 		virtualRestrictions.address_specification = addressSpec;
2226 		physical_address_restrictions physicalRestrictions = {};
2227 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2228 			mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2229 			_address);
2230 	}
2231 
2232 	// get the open flags of the FD
2233 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2234 	if (descriptor == NULL)
2235 		return EBADF;
2236 	int32 openMode = descriptor->open_mode;
2237 	put_fd(descriptor);
2238 
2239 	// The FD must open for reading at any rate. For shared mapping with write
2240 	// access, additionally the FD must be open for writing.
2241 	if ((openMode & O_ACCMODE) == O_WRONLY
2242 		|| (mapping == REGION_NO_PRIVATE_MAP
2243 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2244 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2245 		return EACCES;
2246 	}
2247 
2248 	uint32 protectionMax = 0;
2249 	if (mapping == REGION_NO_PRIVATE_MAP) {
2250 		if ((openMode & O_ACCMODE) == O_RDWR)
2251 			protectionMax = protection | B_USER_PROTECTION;
2252 		else
2253 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2254 	} else if (mapping == REGION_PRIVATE_MAP) {
2255 		// For privately mapped read-only regions, skip committing memory.
2256 		// (If protections are changed later on, memory will be committed then.)
2257 		if ((protection & B_WRITE_AREA) == 0)
2258 			mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2259 	}
2260 
2261 	// get the vnode for the object, this also grabs a ref to it
2262 	struct vnode* vnode = NULL;
2263 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2264 	if (status < B_OK)
2265 		return status;
2266 	VnodePutter vnodePutter(vnode);
2267 
2268 	// If we're going to pre-map pages, we need to reserve the pages needed by
2269 	// the mapping backend upfront.
2270 	page_num_t reservedPreMapPages = 0;
2271 	vm_page_reservation reservation;
2272 	if ((protection & B_READ_AREA) != 0) {
2273 		AddressSpaceWriteLocker locker;
2274 		status = locker.SetTo(team);
2275 		if (status != B_OK)
2276 			return status;
2277 
2278 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2279 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2280 
2281 		locker.Unlock();
2282 
2283 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2284 			team == VMAddressSpace::KernelID()
2285 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2286 	}
2287 
2288 	struct PageUnreserver {
2289 		PageUnreserver(vm_page_reservation* reservation)
2290 			:
2291 			fReservation(reservation)
2292 		{
2293 		}
2294 
2295 		~PageUnreserver()
2296 		{
2297 			if (fReservation != NULL)
2298 				vm_page_unreserve_pages(fReservation);
2299 		}
2300 
2301 		vm_page_reservation* fReservation;
2302 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2303 
2304 	// Lock the address space and, if the specified address range shall be
2305 	// unmapped, ensure it is not wired.
2306 	AddressSpaceWriteLocker locker;
2307 	do {
2308 		if (locker.SetTo(team) != B_OK)
2309 			return B_BAD_TEAM_ID;
2310 	} while (unmapAddressRange
2311 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2312 			(addr_t)*_address, size, &locker));
2313 
2314 	// TODO: this only works for file systems that use the file cache
2315 	VMCache* cache;
2316 	status = vfs_get_vnode_cache(vnode, &cache, false);
2317 	if (status < B_OK)
2318 		return status;
2319 
2320 	cache->Lock();
2321 
2322 	VMArea* area;
2323 	virtual_address_restrictions addressRestrictions = {};
2324 	addressRestrictions.address = *_address;
2325 	addressRestrictions.address_specification = addressSpec;
2326 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2327 		0, protection, protectionMax, mapping, mappingFlags,
2328 		&addressRestrictions, kernel, &area, _address);
2329 
2330 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2331 		// map_backing_store() cannot know we no longer need the ref
2332 		cache->ReleaseRefLocked();
2333 	}
2334 
2335 	if (status == B_OK && (protection & B_READ_AREA) != 0) {
2336 		// Pre-map at most 10MB worth of pages.
2337 		pre_map_area_pages(area, cache, &reservation,
2338 			(10LL * 1024 * 1024) / B_PAGE_SIZE);
2339 	}
2340 
2341 	cache->Unlock();
2342 
2343 	if (status == B_OK) {
2344 		// TODO: this probably deserves a smarter solution, e.g. probably
2345 		// trigger prefetch somewhere else.
2346 
2347 		// Prefetch at most 10MB starting from "offset", but only if the cache
2348 		// doesn't already contain more pages than the prefetch size.
2349 		const size_t prefetch = min_c(size, 10LL * 1024 * 1024);
2350 		if (cache->page_count < (prefetch / B_PAGE_SIZE))
2351 			cache_prefetch_vnode(vnode, offset, prefetch);
2352 	}
2353 
2354 	if (status != B_OK)
2355 		return status;
2356 
2357 	area->cache_type = CACHE_TYPE_VNODE;
2358 	return area->id;
2359 }
2360 
2361 
2362 area_id
2363 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2364 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2365 	int fd, off_t offset)
2366 {
2367 	if (!arch_vm_supports_protection(protection))
2368 		return B_NOT_SUPPORTED;
2369 
2370 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2371 		mapping, unmapAddressRange, fd, offset, true);
2372 }
2373 
2374 
2375 VMCache*
2376 vm_area_get_locked_cache(VMArea* area)
2377 {
2378 	rw_lock_read_lock(&sAreaCacheLock);
2379 
2380 	while (true) {
2381 		VMCache* cache = area->cache;
2382 
2383 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2384 			// cache has been deleted
2385 			rw_lock_read_lock(&sAreaCacheLock);
2386 			continue;
2387 		}
2388 
2389 		rw_lock_read_lock(&sAreaCacheLock);
2390 
2391 		if (cache == area->cache) {
2392 			cache->AcquireRefLocked();
2393 			rw_lock_read_unlock(&sAreaCacheLock);
2394 			return cache;
2395 		}
2396 
2397 		// the cache changed in the meantime
2398 		cache->Unlock();
2399 	}
2400 }
2401 
2402 
2403 void
2404 vm_area_put_locked_cache(VMCache* cache)
2405 {
2406 	cache->ReleaseRefAndUnlock();
2407 }
2408 
2409 
2410 area_id
2411 vm_clone_area(team_id team, const char* name, void** address,
2412 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2413 	bool kernel)
2414 {
2415 	VMArea* newArea = NULL;
2416 	VMArea* sourceArea;
2417 
2418 	// Check whether the source area exists and is cloneable. If so, mark it
2419 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2420 	{
2421 		AddressSpaceWriteLocker locker;
2422 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2423 		if (status != B_OK)
2424 			return status;
2425 
2426 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2427 			return B_NOT_ALLOWED;
2428 
2429 		sourceArea->protection |= B_SHARED_AREA;
2430 		protection |= B_SHARED_AREA;
2431 	}
2432 
2433 	// Now lock both address spaces and actually do the cloning.
2434 
2435 	MultiAddressSpaceLocker locker;
2436 	VMAddressSpace* sourceAddressSpace;
2437 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2438 	if (status != B_OK)
2439 		return status;
2440 
2441 	VMAddressSpace* targetAddressSpace;
2442 	status = locker.AddTeam(team, true, &targetAddressSpace);
2443 	if (status != B_OK)
2444 		return status;
2445 
2446 	status = locker.Lock();
2447 	if (status != B_OK)
2448 		return status;
2449 
2450 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2451 	if (sourceArea == NULL)
2452 		return B_BAD_VALUE;
2453 
2454 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2455 		return B_NOT_ALLOWED;
2456 
2457 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2458 
2459 	if (!kernel && sourceAddressSpace != targetAddressSpace
2460 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2461 #if KDEBUG
2462 		Team* team = thread_get_current_thread()->team;
2463 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2464 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2465 #endif
2466 		status = B_NOT_ALLOWED;
2467 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2468 		status = B_NOT_ALLOWED;
2469 	} else {
2470 		uint32 flags = 0;
2471 		if (mapping != REGION_PRIVATE_MAP)
2472 			flags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2473 
2474 		virtual_address_restrictions addressRestrictions = {};
2475 		addressRestrictions.address = *address;
2476 		addressRestrictions.address_specification = addressSpec;
2477 		status = map_backing_store(targetAddressSpace, cache,
2478 			sourceArea->cache_offset, name, sourceArea->Size(),
2479 			sourceArea->wiring, protection, sourceArea->protection_max,
2480 			mapping, flags, &addressRestrictions,
2481 			kernel, &newArea, address);
2482 	}
2483 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2484 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2485 		// to create a new cache, and has therefore already acquired a reference
2486 		// to the source cache - but otherwise it has no idea that we need
2487 		// one.
2488 		cache->AcquireRefLocked();
2489 	}
2490 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2491 		// we need to map in everything at this point
2492 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2493 			// we don't have actual pages to map but a physical area
2494 			VMTranslationMap* map
2495 				= sourceArea->address_space->TranslationMap();
2496 			map->Lock();
2497 
2498 			phys_addr_t physicalAddress;
2499 			uint32 oldProtection;
2500 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2501 
2502 			map->Unlock();
2503 
2504 			map = targetAddressSpace->TranslationMap();
2505 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2506 				newArea->Base() + (newArea->Size() - 1));
2507 
2508 			vm_page_reservation reservation;
2509 			vm_page_reserve_pages(&reservation, reservePages,
2510 				targetAddressSpace == VMAddressSpace::Kernel()
2511 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2512 			map->Lock();
2513 
2514 			for (addr_t offset = 0; offset < newArea->Size();
2515 					offset += B_PAGE_SIZE) {
2516 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2517 					protection, newArea->MemoryType(), &reservation);
2518 			}
2519 
2520 			map->Unlock();
2521 			vm_page_unreserve_pages(&reservation);
2522 		} else {
2523 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2524 			size_t reservePages = map->MaxPagesNeededToMap(
2525 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2526 			vm_page_reservation reservation;
2527 			vm_page_reserve_pages(&reservation, reservePages,
2528 				targetAddressSpace == VMAddressSpace::Kernel()
2529 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2530 
2531 			// map in all pages from source
2532 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2533 					vm_page* page  = it.Next();) {
2534 				if (!page->busy) {
2535 					DEBUG_PAGE_ACCESS_START(page);
2536 					map_page(newArea, page,
2537 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2538 							- newArea->cache_offset),
2539 						protection, &reservation);
2540 					DEBUG_PAGE_ACCESS_END(page);
2541 				}
2542 			}
2543 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2544 			// ensuring that!
2545 
2546 			vm_page_unreserve_pages(&reservation);
2547 		}
2548 	}
2549 	if (status == B_OK)
2550 		newArea->cache_type = sourceArea->cache_type;
2551 
2552 	vm_area_put_locked_cache(cache);
2553 
2554 	if (status < B_OK)
2555 		return status;
2556 
2557 	return newArea->id;
2558 }
2559 
2560 
2561 /*!	Deletes the specified area of the given address space.
2562 
2563 	The address space must be write-locked.
2564 	The caller must ensure that the area does not have any wired ranges.
2565 
2566 	\param addressSpace The address space containing the area.
2567 	\param area The area to be deleted.
2568 	\param deletingAddressSpace \c true, if the address space is in the process
2569 		of being deleted.
2570 	\param alreadyRemoved \c true, if the area was already removed from the global
2571 		areas map (and thus had its ID deallocated.)
2572 */
2573 static void
2574 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2575 	bool deletingAddressSpace, bool alreadyRemoved)
2576 {
2577 	ASSERT(!area->IsWired());
2578 
2579 	if (area->id >= 0 && !alreadyRemoved)
2580 		VMAreas::Remove(area);
2581 
2582 	// At this point the area is removed from the global hash table, but
2583 	// still exists in the area list.
2584 
2585 	// Unmap the virtual address space the area occupied.
2586 	{
2587 		// We need to lock the complete cache chain.
2588 		VMCache* topCache = vm_area_get_locked_cache(area);
2589 		VMCacheChainLocker cacheChainLocker(topCache);
2590 		cacheChainLocker.LockAllSourceCaches();
2591 
2592 		// If the area's top cache is a temporary cache and the area is the only
2593 		// one referencing it (besides us currently holding a second reference),
2594 		// the unmapping code doesn't need to care about preserving the accessed
2595 		// and dirty flags of the top cache page mappings.
2596 		bool ignoreTopCachePageFlags
2597 			= topCache->temporary && topCache->RefCount() == 2;
2598 
2599 		area->address_space->TranslationMap()->UnmapArea(area,
2600 			deletingAddressSpace, ignoreTopCachePageFlags);
2601 	}
2602 
2603 	if (!area->cache->temporary)
2604 		area->cache->WriteModified();
2605 
2606 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2607 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2608 
2609 	arch_vm_unset_memory_type(area);
2610 	addressSpace->RemoveArea(area, allocationFlags);
2611 	addressSpace->Put();
2612 
2613 	area->cache->RemoveArea(area);
2614 	area->cache->ReleaseRef();
2615 
2616 	addressSpace->DeleteArea(area, allocationFlags);
2617 }
2618 
2619 
2620 status_t
2621 vm_delete_area(team_id team, area_id id, bool kernel)
2622 {
2623 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2624 		team, id));
2625 
2626 	// lock the address space and make sure the area isn't wired
2627 	AddressSpaceWriteLocker locker;
2628 	VMArea* area;
2629 	AreaCacheLocker cacheLocker;
2630 
2631 	do {
2632 		status_t status = locker.SetFromArea(team, id, area);
2633 		if (status != B_OK)
2634 			return status;
2635 
2636 		cacheLocker.SetTo(area);
2637 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2638 
2639 	cacheLocker.Unlock();
2640 
2641 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2642 		return B_NOT_ALLOWED;
2643 
2644 	delete_area(locker.AddressSpace(), area, false);
2645 	return B_OK;
2646 }
2647 
2648 
2649 /*!	Creates a new cache on top of given cache, moves all areas from
2650 	the old cache to the new one, and changes the protection of all affected
2651 	areas' pages to read-only. If requested, wired pages are moved up to the
2652 	new cache and copies are added to the old cache in their place.
2653 	Preconditions:
2654 	- The given cache must be locked.
2655 	- All of the cache's areas' address spaces must be read locked.
2656 	- Either the cache must not have any wired ranges or a page reservation for
2657 	  all wired pages must be provided, so they can be copied.
2658 
2659 	\param lowerCache The cache on top of which a new cache shall be created.
2660 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2661 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2662 		has wired page. The wired pages are copied in this case.
2663 */
2664 static status_t
2665 vm_copy_on_write_area(VMCache* lowerCache,
2666 	vm_page_reservation* wiredPagesReservation)
2667 {
2668 	VMCache* upperCache;
2669 
2670 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2671 
2672 	// We need to separate the cache from its areas. The cache goes one level
2673 	// deeper and we create a new cache inbetween.
2674 
2675 	// create an anonymous cache
2676 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2677 		lowerCache->GuardSize() / B_PAGE_SIZE,
2678 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2679 		VM_PRIORITY_USER);
2680 	if (status != B_OK)
2681 		return status;
2682 
2683 	upperCache->Lock();
2684 
2685 	upperCache->temporary = 1;
2686 	upperCache->virtual_base = lowerCache->virtual_base;
2687 	upperCache->virtual_end = lowerCache->virtual_end;
2688 
2689 	// transfer the lower cache areas to the upper cache
2690 	rw_lock_write_lock(&sAreaCacheLock);
2691 	upperCache->TransferAreas(lowerCache);
2692 	rw_lock_write_unlock(&sAreaCacheLock);
2693 
2694 	lowerCache->AddConsumer(upperCache);
2695 
2696 	// We now need to remap all pages from all of the cache's areas read-only,
2697 	// so that a copy will be created on next write access. If there are wired
2698 	// pages, we keep their protection, move them to the upper cache and create
2699 	// copies for the lower cache.
2700 	if (wiredPagesReservation != NULL) {
2701 		// We need to handle wired pages -- iterate through the cache's pages.
2702 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2703 				vm_page* page = it.Next();) {
2704 			if (page->WiredCount() > 0) {
2705 				// allocate a new page and copy the wired one
2706 				vm_page* copiedPage = vm_page_allocate_page(
2707 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2708 
2709 				vm_memcpy_physical_page(
2710 					copiedPage->physical_page_number * B_PAGE_SIZE,
2711 					page->physical_page_number * B_PAGE_SIZE);
2712 
2713 				// move the wired page to the upper cache (note: removing is OK
2714 				// with the SplayTree iterator) and insert the copy
2715 				upperCache->MovePage(page);
2716 				lowerCache->InsertPage(copiedPage,
2717 					page->cache_offset * B_PAGE_SIZE);
2718 
2719 				DEBUG_PAGE_ACCESS_END(copiedPage);
2720 			} else {
2721 				// Change the protection of this page in all areas.
2722 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2723 						tempArea = tempArea->cache_next) {
2724 					if (!is_page_in_area(tempArea, page))
2725 						continue;
2726 
2727 					// The area must be readable in the same way it was
2728 					// previously writable.
2729 					addr_t address = virtual_page_address(tempArea, page);
2730 					uint32 protection = 0;
2731 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2732 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2733 						protection |= B_KERNEL_READ_AREA;
2734 					if ((pageProtection & B_READ_AREA) != 0)
2735 						protection |= B_READ_AREA;
2736 
2737 					VMTranslationMap* map
2738 						= tempArea->address_space->TranslationMap();
2739 					map->Lock();
2740 					map->ProtectPage(tempArea, address, protection);
2741 					map->Unlock();
2742 				}
2743 			}
2744 		}
2745 	} else {
2746 		ASSERT(lowerCache->WiredPagesCount() == 0);
2747 
2748 		// just change the protection of all areas
2749 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2750 				tempArea = tempArea->cache_next) {
2751 			if (tempArea->page_protections != NULL) {
2752 				// Change the protection of all pages in this area.
2753 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2754 				map->Lock();
2755 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2756 					vm_page* page = it.Next();) {
2757 					if (!is_page_in_area(tempArea, page))
2758 						continue;
2759 
2760 					// The area must be readable in the same way it was
2761 					// previously writable.
2762 					addr_t address = virtual_page_address(tempArea, page);
2763 					uint32 protection = 0;
2764 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2765 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2766 						protection |= B_KERNEL_READ_AREA;
2767 					if ((pageProtection & B_READ_AREA) != 0)
2768 						protection |= B_READ_AREA;
2769 
2770 					map->ProtectPage(tempArea, address, protection);
2771 				}
2772 				map->Unlock();
2773 				continue;
2774 			}
2775 			// The area must be readable in the same way it was previously
2776 			// writable.
2777 			uint32 protection = 0;
2778 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2779 				protection |= B_KERNEL_READ_AREA;
2780 			if ((tempArea->protection & B_READ_AREA) != 0)
2781 				protection |= B_READ_AREA;
2782 
2783 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2784 			map->Lock();
2785 			map->ProtectArea(tempArea, protection);
2786 			map->Unlock();
2787 		}
2788 	}
2789 
2790 	vm_area_put_locked_cache(upperCache);
2791 
2792 	return B_OK;
2793 }
2794 
2795 
2796 area_id
2797 vm_copy_area(team_id team, const char* name, void** _address,
2798 	uint32 addressSpec, area_id sourceID)
2799 {
2800 	// Do the locking: target address space, all address spaces associated with
2801 	// the source cache, and the cache itself.
2802 	MultiAddressSpaceLocker locker;
2803 	VMAddressSpace* targetAddressSpace;
2804 	VMCache* cache;
2805 	VMArea* source;
2806 	AreaCacheLocker cacheLocker;
2807 	status_t status;
2808 	bool sharedArea;
2809 
2810 	page_num_t wiredPages = 0;
2811 	vm_page_reservation wiredPagesReservation;
2812 
2813 	bool restart;
2814 	do {
2815 		restart = false;
2816 
2817 		locker.Unset();
2818 		status = locker.AddTeam(team, true, &targetAddressSpace);
2819 		if (status == B_OK) {
2820 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2821 				&cache);
2822 		}
2823 		if (status != B_OK)
2824 			return status;
2825 
2826 		cacheLocker.SetTo(cache, true);	// already locked
2827 
2828 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2829 
2830 		page_num_t oldWiredPages = wiredPages;
2831 		wiredPages = 0;
2832 
2833 		// If the source area isn't shared, count the number of wired pages in
2834 		// the cache and reserve as many pages.
2835 		if (!sharedArea) {
2836 			wiredPages = cache->WiredPagesCount();
2837 
2838 			if (wiredPages > oldWiredPages) {
2839 				cacheLocker.Unlock();
2840 				locker.Unlock();
2841 
2842 				if (oldWiredPages > 0)
2843 					vm_page_unreserve_pages(&wiredPagesReservation);
2844 
2845 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2846 					VM_PRIORITY_USER);
2847 
2848 				restart = true;
2849 			}
2850 		} else if (oldWiredPages > 0)
2851 			vm_page_unreserve_pages(&wiredPagesReservation);
2852 	} while (restart);
2853 
2854 	// unreserve pages later
2855 	struct PagesUnreserver {
2856 		PagesUnreserver(vm_page_reservation* reservation)
2857 			:
2858 			fReservation(reservation)
2859 		{
2860 		}
2861 
2862 		~PagesUnreserver()
2863 		{
2864 			if (fReservation != NULL)
2865 				vm_page_unreserve_pages(fReservation);
2866 		}
2867 
2868 	private:
2869 		vm_page_reservation*	fReservation;
2870 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2871 
2872 	bool writableCopy
2873 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2874 	uint8* targetPageProtections = NULL;
2875 
2876 	if (source->page_protections != NULL) {
2877 		size_t bytes = area_page_protections_size(source->Size());
2878 		targetPageProtections = (uint8*)malloc_etc(bytes,
2879 			(source->address_space == VMAddressSpace::Kernel()
2880 					|| targetAddressSpace == VMAddressSpace::Kernel())
2881 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2882 		if (targetPageProtections == NULL)
2883 			return B_NO_MEMORY;
2884 
2885 		memcpy(targetPageProtections, source->page_protections, bytes);
2886 
2887 		if (!writableCopy) {
2888 			for (size_t i = 0; i < bytes; i++) {
2889 				if ((targetPageProtections[i]
2890 						& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2891 					writableCopy = true;
2892 					break;
2893 				}
2894 			}
2895 		}
2896 	}
2897 
2898 	if (addressSpec == B_CLONE_ADDRESS) {
2899 		addressSpec = B_EXACT_ADDRESS;
2900 		*_address = (void*)source->Base();
2901 	}
2902 
2903 	// First, create a cache on top of the source area, respectively use the
2904 	// existing one, if this is a shared area.
2905 
2906 	VMArea* target;
2907 	virtual_address_restrictions addressRestrictions = {};
2908 	addressRestrictions.address = *_address;
2909 	addressRestrictions.address_specification = addressSpec;
2910 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2911 		name, source->Size(), source->wiring, source->protection,
2912 		source->protection_max,
2913 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2914 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2915 		&addressRestrictions, true, &target, _address);
2916 	if (status < B_OK) {
2917 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2918 		return status;
2919 	}
2920 
2921 	if (targetPageProtections != NULL)
2922 		target->page_protections = targetPageProtections;
2923 
2924 	if (sharedArea) {
2925 		// The new area uses the old area's cache, but map_backing_store()
2926 		// hasn't acquired a ref. So we have to do that now.
2927 		cache->AcquireRefLocked();
2928 	}
2929 
2930 	// If the source area is writable, we need to move it one layer up as well
2931 
2932 	if (!sharedArea) {
2933 		if (writableCopy) {
2934 			// TODO: do something more useful if this fails!
2935 			if (vm_copy_on_write_area(cache,
2936 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2937 				panic("vm_copy_on_write_area() failed!\n");
2938 			}
2939 		}
2940 	}
2941 
2942 	// we return the ID of the newly created area
2943 	return target->id;
2944 }
2945 
2946 
2947 status_t
2948 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2949 	bool kernel)
2950 {
2951 	fix_protection(&newProtection);
2952 
2953 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2954 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2955 
2956 	if (!arch_vm_supports_protection(newProtection))
2957 		return B_NOT_SUPPORTED;
2958 
2959 	bool becomesWritable
2960 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2961 
2962 	// lock address spaces and cache
2963 	MultiAddressSpaceLocker locker;
2964 	VMCache* cache;
2965 	VMArea* area;
2966 	status_t status;
2967 	AreaCacheLocker cacheLocker;
2968 	bool isWritable;
2969 
2970 	bool restart;
2971 	do {
2972 		restart = false;
2973 
2974 		locker.Unset();
2975 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2976 		if (status != B_OK)
2977 			return status;
2978 
2979 		cacheLocker.SetTo(cache, true);	// already locked
2980 
2981 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
2982 				|| (area->protection & B_KERNEL_AREA) != 0)) {
2983 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2984 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2985 				" (%s)\n", team, newProtection, areaID, area->name);
2986 			return B_NOT_ALLOWED;
2987 		}
2988 		if (!kernel && area->protection_max != 0
2989 			&& (newProtection & area->protection_max)
2990 				!= (newProtection & B_USER_PROTECTION)) {
2991 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2992 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
2993 				"area %" B_PRId32 " (%s)\n", team, newProtection,
2994 				area->protection_max, areaID, area->name);
2995 			return B_NOT_ALLOWED;
2996 		}
2997 
2998 		if (team != VMAddressSpace::KernelID()
2999 			&& area->address_space->ID() != team) {
3000 			// unless you're the kernel, you are only allowed to set
3001 			// the protection of your own areas
3002 			return B_NOT_ALLOWED;
3003 		}
3004 
3005 		if (area->protection == newProtection)
3006 			return B_OK;
3007 
3008 		isWritable
3009 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
3010 
3011 		// Make sure the area (respectively, if we're going to call
3012 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
3013 		// wired ranges.
3014 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
3015 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
3016 					otherArea = otherArea->cache_next) {
3017 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
3018 					restart = true;
3019 					break;
3020 				}
3021 			}
3022 		} else {
3023 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
3024 				restart = true;
3025 		}
3026 	} while (restart);
3027 
3028 	if (area->page_protections != NULL) {
3029 		// Get rid of the per-page protections.
3030 		free_etc(area->page_protections,
3031 			area->address_space == VMAddressSpace::Kernel() ? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
3032 		area->page_protections = NULL;
3033 
3034 		// Assume the existing protections don't match the new ones.
3035 		isWritable = !becomesWritable;
3036 	}
3037 
3038 	bool changePageProtection = true;
3039 	bool changeTopCachePagesOnly = false;
3040 
3041 	if (isWritable && !becomesWritable) {
3042 		// writable -> !writable
3043 
3044 		if (cache->source != NULL && cache->temporary) {
3045 			if (cache->CountWritableAreas(area) == 0) {
3046 				// Since this cache now lives from the pages in its source cache,
3047 				// we can change the cache's commitment to take only those pages
3048 				// into account that really are in this cache.
3049 
3050 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
3051 					team == VMAddressSpace::KernelID()
3052 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3053 
3054 				// TODO: we may be able to join with our source cache, if
3055 				// count == 0
3056 			}
3057 		}
3058 
3059 		// If only the writability changes, we can just remap the pages of the
3060 		// top cache, since the pages of lower caches are mapped read-only
3061 		// anyway. That's advantageous only, if the number of pages in the cache
3062 		// is significantly smaller than the number of pages in the area,
3063 		// though.
3064 		if (newProtection
3065 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
3066 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
3067 			changeTopCachePagesOnly = true;
3068 		}
3069 	} else if (!isWritable && becomesWritable) {
3070 		// !writable -> writable
3071 
3072 		if (!cache->consumers.IsEmpty()) {
3073 			// There are consumers -- we have to insert a new cache. Fortunately
3074 			// vm_copy_on_write_area() does everything that's needed.
3075 			changePageProtection = false;
3076 			status = vm_copy_on_write_area(cache, NULL);
3077 		} else {
3078 			// No consumers, so we don't need to insert a new one.
3079 			if (cache->source != NULL && cache->temporary) {
3080 				// the cache's commitment must contain all possible pages
3081 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
3082 					team == VMAddressSpace::KernelID()
3083 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3084 			}
3085 
3086 			if (status == B_OK && cache->source != NULL) {
3087 				// There's a source cache, hence we can't just change all pages'
3088 				// protection or we might allow writing into pages belonging to
3089 				// a lower cache.
3090 				changeTopCachePagesOnly = true;
3091 			}
3092 		}
3093 	} else {
3094 		// we don't have anything special to do in all other cases
3095 	}
3096 
3097 	if (status == B_OK) {
3098 		// remap existing pages in this cache
3099 		if (changePageProtection) {
3100 			VMTranslationMap* map = area->address_space->TranslationMap();
3101 			map->Lock();
3102 
3103 			if (changeTopCachePagesOnly) {
3104 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3105 				page_num_t lastPageOffset
3106 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3107 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3108 						vm_page* page = it.Next();) {
3109 					if (page->cache_offset >= firstPageOffset
3110 						&& page->cache_offset <= lastPageOffset) {
3111 						addr_t address = virtual_page_address(area, page);
3112 						map->ProtectPage(area, address, newProtection);
3113 					}
3114 				}
3115 			} else
3116 				map->ProtectArea(area, newProtection);
3117 
3118 			map->Unlock();
3119 		}
3120 
3121 		area->protection = newProtection;
3122 	}
3123 
3124 	return status;
3125 }
3126 
3127 
3128 status_t
3129 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3130 {
3131 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3132 	if (addressSpace == NULL)
3133 		return B_BAD_TEAM_ID;
3134 
3135 	VMTranslationMap* map = addressSpace->TranslationMap();
3136 
3137 	map->Lock();
3138 	uint32 dummyFlags;
3139 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3140 	map->Unlock();
3141 
3142 	addressSpace->Put();
3143 	return status;
3144 }
3145 
3146 
3147 /*!	The page's cache must be locked.
3148 */
3149 bool
3150 vm_test_map_modification(vm_page* page)
3151 {
3152 	if (page->modified)
3153 		return true;
3154 
3155 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3156 	vm_page_mapping* mapping;
3157 	while ((mapping = iterator.Next()) != NULL) {
3158 		VMArea* area = mapping->area;
3159 		VMTranslationMap* map = area->address_space->TranslationMap();
3160 
3161 		phys_addr_t physicalAddress;
3162 		uint32 flags;
3163 		map->Lock();
3164 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3165 		map->Unlock();
3166 
3167 		if ((flags & PAGE_MODIFIED) != 0)
3168 			return true;
3169 	}
3170 
3171 	return false;
3172 }
3173 
3174 
3175 /*!	The page's cache must be locked.
3176 */
3177 void
3178 vm_clear_map_flags(vm_page* page, uint32 flags)
3179 {
3180 	if ((flags & PAGE_ACCESSED) != 0)
3181 		page->accessed = false;
3182 	if ((flags & PAGE_MODIFIED) != 0)
3183 		page->modified = false;
3184 
3185 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3186 	vm_page_mapping* mapping;
3187 	while ((mapping = iterator.Next()) != NULL) {
3188 		VMArea* area = mapping->area;
3189 		VMTranslationMap* map = area->address_space->TranslationMap();
3190 
3191 		map->Lock();
3192 		map->ClearFlags(virtual_page_address(area, page), flags);
3193 		map->Unlock();
3194 	}
3195 }
3196 
3197 
3198 /*!	Removes all mappings from a page.
3199 	After you've called this function, the page is unmapped from memory and
3200 	the page's \c accessed and \c modified flags have been updated according
3201 	to the state of the mappings.
3202 	The page's cache must be locked.
3203 */
3204 void
3205 vm_remove_all_page_mappings(vm_page* page)
3206 {
3207 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3208 		VMArea* area = mapping->area;
3209 		VMTranslationMap* map = area->address_space->TranslationMap();
3210 		addr_t address = virtual_page_address(area, page);
3211 		map->UnmapPage(area, address, false);
3212 	}
3213 }
3214 
3215 
3216 int32
3217 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3218 {
3219 	int32 count = 0;
3220 
3221 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3222 	vm_page_mapping* mapping;
3223 	while ((mapping = iterator.Next()) != NULL) {
3224 		VMArea* area = mapping->area;
3225 		VMTranslationMap* map = area->address_space->TranslationMap();
3226 
3227 		bool modified;
3228 		if (map->ClearAccessedAndModified(area,
3229 				virtual_page_address(area, page), false, modified)) {
3230 			count++;
3231 		}
3232 
3233 		page->modified |= modified;
3234 	}
3235 
3236 
3237 	if (page->accessed) {
3238 		count++;
3239 		page->accessed = false;
3240 	}
3241 
3242 	return count;
3243 }
3244 
3245 
3246 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3247 	mappings.
3248 	The function iterates through the page mappings and removes them until
3249 	encountering one that has been accessed. From then on it will continue to
3250 	iterate, but only clear the accessed flag of the mapping. The page's
3251 	\c modified bit will be updated accordingly, the \c accessed bit will be
3252 	cleared.
3253 	\return The number of mapping accessed bits encountered, including the
3254 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3255 		of the page have been removed.
3256 */
3257 int32
3258 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3259 {
3260 	ASSERT(page->WiredCount() == 0);
3261 
3262 	if (page->accessed)
3263 		return vm_clear_page_mapping_accessed_flags(page);
3264 
3265 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3266 		VMArea* area = mapping->area;
3267 		VMTranslationMap* map = area->address_space->TranslationMap();
3268 		addr_t address = virtual_page_address(area, page);
3269 		bool modified = false;
3270 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3271 			page->accessed = true;
3272 			page->modified |= modified;
3273 			return vm_clear_page_mapping_accessed_flags(page);
3274 		}
3275 		page->modified |= modified;
3276 	}
3277 
3278 	return 0;
3279 }
3280 
3281 
3282 /*!	Deletes all areas and reserved regions in the given address space.
3283 
3284 	The caller must ensure that none of the areas has any wired ranges.
3285 
3286 	\param addressSpace The address space.
3287 	\param deletingAddressSpace \c true, if the address space is in the process
3288 		of being deleted.
3289 */
3290 void
3291 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3292 {
3293 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3294 		addressSpace->ID()));
3295 
3296 	addressSpace->WriteLock();
3297 
3298 	// remove all reserved areas in this address space
3299 	addressSpace->UnreserveAllAddressRanges(0);
3300 
3301 	// remove all areas from the areas map at once (to avoid lock contention)
3302 	VMAreas::WriteLock();
3303 	{
3304 		VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
3305 		while (VMArea* area = it.Next())
3306 			VMAreas::Remove(area);
3307 	}
3308 	VMAreas::WriteUnlock();
3309 
3310 	// delete all the areas in this address space
3311 	while (VMArea* area = addressSpace->FirstArea()) {
3312 		ASSERT(!area->IsWired());
3313 		delete_area(addressSpace, area, deletingAddressSpace, true);
3314 	}
3315 
3316 	addressSpace->WriteUnlock();
3317 }
3318 
3319 
3320 static area_id
3321 vm_area_for(addr_t address, bool kernel)
3322 {
3323 	team_id team;
3324 	if (IS_USER_ADDRESS(address)) {
3325 		// we try the user team address space, if any
3326 		team = VMAddressSpace::CurrentID();
3327 		if (team < 0)
3328 			return team;
3329 	} else
3330 		team = VMAddressSpace::KernelID();
3331 
3332 	AddressSpaceReadLocker locker(team);
3333 	if (!locker.IsLocked())
3334 		return B_BAD_TEAM_ID;
3335 
3336 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3337 	if (area != NULL) {
3338 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3339 				&& (area->protection & B_KERNEL_AREA) != 0)
3340 			return B_ERROR;
3341 
3342 		return area->id;
3343 	}
3344 
3345 	return B_ERROR;
3346 }
3347 
3348 
3349 /*!	Frees physical pages that were used during the boot process.
3350 	\a end is inclusive.
3351 */
3352 static void
3353 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3354 {
3355 	// free all physical pages in the specified range
3356 
3357 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3358 		phys_addr_t physicalAddress;
3359 		uint32 flags;
3360 
3361 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3362 			&& (flags & PAGE_PRESENT) != 0) {
3363 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3364 			if (page != NULL && page->State() != PAGE_STATE_FREE
3365 					&& page->State() != PAGE_STATE_CLEAR
3366 					&& page->State() != PAGE_STATE_UNUSED) {
3367 				DEBUG_PAGE_ACCESS_START(page);
3368 				vm_page_set_state(page, PAGE_STATE_FREE);
3369 			}
3370 		}
3371 	}
3372 
3373 	// unmap the memory
3374 	map->Unmap(start, end);
3375 }
3376 
3377 
3378 void
3379 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3380 {
3381 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3382 	addr_t end = start + (size - 1);
3383 	addr_t lastEnd = start;
3384 
3385 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3386 		(void*)start, (void*)end));
3387 
3388 	// The areas are sorted in virtual address space order, so
3389 	// we just have to find the holes between them that fall
3390 	// into the area we should dispose
3391 
3392 	map->Lock();
3393 
3394 	for (VMAddressSpace::AreaIterator it
3395 				= VMAddressSpace::Kernel()->GetAreaIterator();
3396 			VMArea* area = it.Next();) {
3397 		addr_t areaStart = area->Base();
3398 		addr_t areaEnd = areaStart + (area->Size() - 1);
3399 
3400 		if (areaEnd < start)
3401 			continue;
3402 
3403 		if (areaStart > end) {
3404 			// we are done, the area is already beyond of what we have to free
3405 			break;
3406 		}
3407 
3408 		if (areaStart > lastEnd) {
3409 			// this is something we can free
3410 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3411 				(void*)areaStart));
3412 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3413 		}
3414 
3415 		if (areaEnd >= end) {
3416 			lastEnd = areaEnd;
3417 				// no +1 to prevent potential overflow
3418 			break;
3419 		}
3420 
3421 		lastEnd = areaEnd + 1;
3422 	}
3423 
3424 	if (lastEnd < end) {
3425 		// we can also get rid of some space at the end of the area
3426 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3427 			(void*)end));
3428 		unmap_and_free_physical_pages(map, lastEnd, end);
3429 	}
3430 
3431 	map->Unlock();
3432 }
3433 
3434 
3435 static void
3436 create_preloaded_image_areas(struct preloaded_image* _image)
3437 {
3438 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3439 	char name[B_OS_NAME_LENGTH];
3440 	void* address;
3441 	int32 length;
3442 
3443 	// use file name to create a good area name
3444 	char* fileName = strrchr(image->name, '/');
3445 	if (fileName == NULL)
3446 		fileName = image->name;
3447 	else
3448 		fileName++;
3449 
3450 	length = strlen(fileName);
3451 	// make sure there is enough space for the suffix
3452 	if (length > 25)
3453 		length = 25;
3454 
3455 	memcpy(name, fileName, length);
3456 	strcpy(name + length, "_text");
3457 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3458 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3459 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3460 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3461 		// this will later be remapped read-only/executable by the
3462 		// ELF initialization code
3463 
3464 	strcpy(name + length, "_data");
3465 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3466 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3467 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3468 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3469 }
3470 
3471 
3472 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3473 	Any boot loader resources contained in that arguments must not be accessed
3474 	anymore past this point.
3475 */
3476 void
3477 vm_free_kernel_args(kernel_args* args)
3478 {
3479 	TRACE(("vm_free_kernel_args()\n"));
3480 
3481 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3482 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3483 		if (area >= B_OK)
3484 			delete_area(area);
3485 	}
3486 }
3487 
3488 
3489 static void
3490 allocate_kernel_args(kernel_args* args)
3491 {
3492 	TRACE(("allocate_kernel_args()\n"));
3493 
3494 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3495 		const addr_range& range = args->kernel_args_range[i];
3496 		void* address = (void*)(addr_t)range.start;
3497 
3498 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3499 			range.size, B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3500 	}
3501 }
3502 
3503 
3504 static void
3505 unreserve_boot_loader_ranges(kernel_args* args)
3506 {
3507 	TRACE(("unreserve_boot_loader_ranges()\n"));
3508 
3509 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3510 		const addr_range& range = args->virtual_allocated_range[i];
3511 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3512 			(void*)(addr_t)range.start, range.size);
3513 	}
3514 }
3515 
3516 
3517 static void
3518 reserve_boot_loader_ranges(kernel_args* args)
3519 {
3520 	TRACE(("reserve_boot_loader_ranges()\n"));
3521 
3522 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3523 		const addr_range& range = args->virtual_allocated_range[i];
3524 		void* address = (void*)(addr_t)range.start;
3525 
3526 		// If the address is no kernel address, we just skip it. The
3527 		// architecture specific code has to deal with it.
3528 		if (!IS_KERNEL_ADDRESS(address)) {
3529 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3530 				B_PRIu64 "\n", address, range.size);
3531 			continue;
3532 		}
3533 
3534 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3535 			&address, B_EXACT_ADDRESS, range.size, 0);
3536 		if (status < B_OK)
3537 			panic("could not reserve boot loader ranges\n");
3538 	}
3539 }
3540 
3541 
3542 static addr_t
3543 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3544 {
3545 	size = PAGE_ALIGN(size);
3546 	if (alignment <= B_PAGE_SIZE) {
3547 		// All allocations are naturally page-aligned.
3548 		alignment = 0;
3549 	} else {
3550 		ASSERT((alignment % B_PAGE_SIZE) == 0);
3551 	}
3552 
3553 	// Find a slot in the virtual allocation ranges.
3554 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3555 		// Check if the space between this one and the previous is big enough.
3556 		const addr_range& range = args->virtual_allocated_range[i];
3557 		addr_range& previousRange = args->virtual_allocated_range[i - 1];
3558 		const addr_t previousRangeEnd = previousRange.start + previousRange.size;
3559 
3560 		addr_t base = alignment > 0
3561 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3562 
3563 		if (base >= KERNEL_BASE && base < range.start && (range.start - base) >= size) {
3564 			previousRange.size += base + size - previousRangeEnd;
3565 			return base;
3566 		}
3567 	}
3568 
3569 	// We didn't find one between allocation ranges. This is OK.
3570 	// See if there's a gap after the last one.
3571 	addr_range& lastRange
3572 		= args->virtual_allocated_range[args->num_virtual_allocated_ranges - 1];
3573 	const addr_t lastRangeEnd = lastRange.start + lastRange.size;
3574 	addr_t base = alignment > 0
3575 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3576 	if ((KERNEL_BASE + (KERNEL_SIZE - 1) - base) >= size) {
3577 		lastRange.size += base + size - lastRangeEnd;
3578 		return base;
3579 	}
3580 
3581 	// See if there's a gap before the first one.
3582 	addr_range& firstRange = args->virtual_allocated_range[0];
3583 	if (firstRange.start > KERNEL_BASE && (firstRange.start - KERNEL_BASE) >= size) {
3584 		base = firstRange.start - size;
3585 		if (alignment > 0)
3586 			base = ROUNDDOWN(base, alignment);
3587 
3588 		if (base >= KERNEL_BASE) {
3589 			firstRange.size += firstRange.start - base;
3590 			firstRange.start = base;
3591 			return base;
3592 		}
3593 	}
3594 
3595 	return 0;
3596 }
3597 
3598 
3599 static bool
3600 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3601 {
3602 	// TODO: horrible brute-force method of determining if the page can be
3603 	// allocated
3604 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3605 		const addr_range& range = args->physical_memory_range[i];
3606 		if (address >= range.start && address < (range.start + range.size))
3607 			return true;
3608 	}
3609 	return false;
3610 }
3611 
3612 
3613 page_num_t
3614 vm_allocate_early_physical_page(kernel_args* args)
3615 {
3616 	return vm_allocate_early_physical_page_etc(args);
3617 }
3618 
3619 
3620 page_num_t
3621 vm_allocate_early_physical_page_etc(kernel_args* args, phys_addr_t maxAddress)
3622 {
3623 	if (args->num_physical_allocated_ranges == 0) {
3624 		panic("early physical page allocations no longer possible!");
3625 		return 0;
3626 	}
3627 	if (maxAddress == 0)
3628 		maxAddress = __HAIKU_PHYS_ADDR_MAX;
3629 
3630 #if defined(__HAIKU_ARCH_PHYSICAL_64_BIT)
3631 	// Check if the last physical range is above the 32-bit maximum.
3632 	const addr_range& lastMemoryRange =
3633 		args->physical_memory_range[args->num_physical_memory_ranges - 1];
3634 	const uint64 post32bitAddr = 0x100000000LL;
3635 	if ((lastMemoryRange.start + lastMemoryRange.size) > post32bitAddr
3636 			&& args->num_physical_allocated_ranges < MAX_PHYSICAL_ALLOCATED_RANGE) {
3637 		// To avoid consuming physical memory in the 32-bit range (which drivers may need),
3638 		// ensure the last allocated range at least ends past the 32-bit boundary.
3639 		const addr_range& lastAllocatedRange =
3640 			args->physical_allocated_range[args->num_physical_allocated_ranges - 1];
3641 		const phys_addr_t lastAllocatedPage = lastAllocatedRange.start + lastAllocatedRange.size;
3642 		if (lastAllocatedPage < post32bitAddr) {
3643 			// Create ranges until we have one at least starting at the first point past 4GB.
3644 			// (Some of the logic here is similar to the new-range code at the end of the method.)
3645 			for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3646 				addr_range& memoryRange = args->physical_memory_range[i];
3647 				if ((memoryRange.start + memoryRange.size) < lastAllocatedPage)
3648 					continue;
3649 				if (memoryRange.size < (B_PAGE_SIZE * 128))
3650 					continue;
3651 
3652 				uint64 rangeStart = memoryRange.start;
3653 				if ((memoryRange.start + memoryRange.size) <= post32bitAddr) {
3654 					if (memoryRange.start < lastAllocatedPage)
3655 						continue;
3656 
3657 					// Range has no pages allocated and ends before the 32-bit boundary.
3658 				} else {
3659 					// Range ends past the 32-bit boundary. It could have some pages allocated,
3660 					// but if we're here, we know that nothing is allocated above the boundary,
3661 					// so we want to create a new range with it regardless.
3662 					if (rangeStart < post32bitAddr)
3663 						rangeStart = post32bitAddr;
3664 				}
3665 
3666 				addr_range& allocatedRange =
3667 					args->physical_allocated_range[args->num_physical_allocated_ranges++];
3668 				allocatedRange.start = rangeStart;
3669 				allocatedRange.size = 0;
3670 
3671 				if (rangeStart >= post32bitAddr)
3672 					break;
3673 				if (args->num_physical_allocated_ranges == MAX_PHYSICAL_ALLOCATED_RANGE)
3674 					break;
3675 			}
3676 		}
3677 	}
3678 #endif
3679 
3680 	// Try expanding the existing physical ranges upwards.
3681 	for (int32 i = args->num_physical_allocated_ranges - 1; i >= 0; i--) {
3682 		addr_range& range = args->physical_allocated_range[i];
3683 		phys_addr_t nextPage = range.start + range.size;
3684 
3685 		// check constraints
3686 		if (nextPage > maxAddress)
3687 			continue;
3688 
3689 		// make sure the page does not collide with the next allocated range
3690 		if ((i + 1) < (int32)args->num_physical_allocated_ranges) {
3691 			addr_range& nextRange = args->physical_allocated_range[i + 1];
3692 			if (nextRange.size != 0 && nextPage >= nextRange.start)
3693 				continue;
3694 		}
3695 		// see if the next page fits in the memory block
3696 		if (is_page_in_physical_memory_range(args, nextPage)) {
3697 			// we got one!
3698 			range.size += B_PAGE_SIZE;
3699 			return nextPage / B_PAGE_SIZE;
3700 		}
3701 	}
3702 
3703 	// Expanding upwards didn't work, try going downwards.
3704 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3705 		addr_range& range = args->physical_allocated_range[i];
3706 		phys_addr_t nextPage = range.start - B_PAGE_SIZE;
3707 
3708 		// check constraints
3709 		if (nextPage > maxAddress)
3710 			continue;
3711 
3712 		// make sure the page does not collide with the previous allocated range
3713 		if (i > 0) {
3714 			addr_range& previousRange = args->physical_allocated_range[i - 1];
3715 			if (previousRange.size != 0 && nextPage < (previousRange.start + previousRange.size))
3716 				continue;
3717 		}
3718 		// see if the next physical page fits in the memory block
3719 		if (is_page_in_physical_memory_range(args, nextPage)) {
3720 			// we got one!
3721 			range.start -= B_PAGE_SIZE;
3722 			range.size += B_PAGE_SIZE;
3723 			return nextPage / B_PAGE_SIZE;
3724 		}
3725 	}
3726 
3727 	// Try starting a new range.
3728 	if (args->num_physical_allocated_ranges < MAX_PHYSICAL_ALLOCATED_RANGE) {
3729 		const addr_range& lastAllocatedRange =
3730 			args->physical_allocated_range[args->num_physical_allocated_ranges - 1];
3731 		const phys_addr_t lastAllocatedPage = lastAllocatedRange.start + lastAllocatedRange.size;
3732 
3733 		phys_addr_t nextPage = 0;
3734 		for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3735 			const addr_range& range = args->physical_memory_range[i];
3736 			// Ignore everything before the last-allocated page, as well as small ranges.
3737 			if (range.start < lastAllocatedPage || range.size < (B_PAGE_SIZE * 128))
3738 				continue;
3739 			if (range.start > maxAddress)
3740 				break;
3741 
3742 			nextPage = range.start;
3743 			break;
3744 		}
3745 
3746 		if (nextPage != 0) {
3747 			// we got one!
3748 			addr_range& range =
3749 				args->physical_allocated_range[args->num_physical_allocated_ranges++];
3750 			range.start = nextPage;
3751 			range.size = B_PAGE_SIZE;
3752 			return nextPage / B_PAGE_SIZE;
3753 		}
3754 	}
3755 
3756 	return 0;
3757 		// could not allocate a block
3758 }
3759 
3760 
3761 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3762 	allocate some pages before the VM is completely up.
3763 */
3764 addr_t
3765 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3766 	uint32 attributes, addr_t alignment)
3767 {
3768 	if (physicalSize > virtualSize)
3769 		physicalSize = virtualSize;
3770 
3771 	// find the vaddr to allocate at
3772 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3773 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3774 	if (virtualBase == 0) {
3775 		panic("vm_allocate_early: could not allocate virtual address\n");
3776 		return 0;
3777 	}
3778 
3779 	// map the pages
3780 	for (uint32 i = 0; i < HOWMANY(physicalSize, B_PAGE_SIZE); i++) {
3781 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3782 		if (physicalAddress == 0)
3783 			panic("error allocating early page!\n");
3784 
3785 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3786 
3787 		status_t status = arch_vm_translation_map_early_map(args,
3788 			virtualBase + i * B_PAGE_SIZE,
3789 			physicalAddress * B_PAGE_SIZE, attributes,
3790 			&vm_allocate_early_physical_page);
3791 		if (status != B_OK)
3792 			panic("error mapping early page!");
3793 	}
3794 
3795 	return virtualBase;
3796 }
3797 
3798 
3799 /*!	The main entrance point to initialize the VM. */
3800 status_t
3801 vm_init(kernel_args* args)
3802 {
3803 	struct preloaded_image* image;
3804 	void* address;
3805 	status_t err = 0;
3806 	uint32 i;
3807 
3808 	TRACE(("vm_init: entry\n"));
3809 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3810 	err = arch_vm_init(args);
3811 
3812 	// initialize some globals
3813 	vm_page_init_num_pages(args);
3814 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3815 
3816 	slab_init(args);
3817 
3818 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3819 	off_t heapSize = INITIAL_HEAP_SIZE;
3820 	// try to accomodate low memory systems
3821 	while (heapSize > sAvailableMemory / 8)
3822 		heapSize /= 2;
3823 	if (heapSize < 1024 * 1024)
3824 		panic("vm_init: go buy some RAM please.");
3825 
3826 	// map in the new heap and initialize it
3827 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3828 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3829 	TRACE(("heap at 0x%lx\n", heapBase));
3830 	heap_init(heapBase, heapSize);
3831 #endif
3832 
3833 	// initialize the free page list and physical page mapper
3834 	vm_page_init(args);
3835 
3836 	// initialize the cache allocators
3837 	vm_cache_init(args);
3838 
3839 	{
3840 		status_t error = VMAreas::Init();
3841 		if (error != B_OK)
3842 			panic("vm_init: error initializing areas map\n");
3843 	}
3844 
3845 	VMAddressSpace::Init();
3846 	reserve_boot_loader_ranges(args);
3847 
3848 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3849 	heap_init_post_area();
3850 #endif
3851 
3852 	// Do any further initialization that the architecture dependant layers may
3853 	// need now
3854 	arch_vm_translation_map_init_post_area(args);
3855 	arch_vm_init_post_area(args);
3856 	vm_page_init_post_area(args);
3857 	slab_init_post_area();
3858 
3859 	// allocate areas to represent stuff that already exists
3860 
3861 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3862 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3863 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3864 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3865 #endif
3866 
3867 	allocate_kernel_args(args);
3868 
3869 	create_preloaded_image_areas(args->kernel_image);
3870 
3871 	// allocate areas for preloaded images
3872 	for (image = args->preloaded_images; image != NULL; image = image->next)
3873 		create_preloaded_image_areas(image);
3874 
3875 	// allocate kernel stacks
3876 	for (i = 0; i < args->num_cpus; i++) {
3877 		char name[64];
3878 
3879 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
3880 		address = (void*)args->cpu_kstack[i].start;
3881 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3882 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3883 	}
3884 
3885 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3886 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3887 
3888 #if PARANOID_KERNEL_MALLOC
3889 	vm_block_address_range("uninitialized heap memory",
3890 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3891 #endif
3892 #if PARANOID_KERNEL_FREE
3893 	vm_block_address_range("freed heap memory",
3894 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3895 #endif
3896 
3897 	create_page_mappings_object_caches();
3898 
3899 	vm_debug_init();
3900 
3901 	TRACE(("vm_init: exit\n"));
3902 
3903 	vm_cache_init_post_heap();
3904 
3905 	return err;
3906 }
3907 
3908 
3909 status_t
3910 vm_init_post_sem(kernel_args* args)
3911 {
3912 	// This frees all unused boot loader resources and makes its space available
3913 	// again
3914 	arch_vm_init_end(args);
3915 	unreserve_boot_loader_ranges(args);
3916 
3917 	// fill in all of the semaphores that were not allocated before
3918 	// since we're still single threaded and only the kernel address space
3919 	// exists, it isn't that hard to find all of the ones we need to create
3920 
3921 	arch_vm_translation_map_init_post_sem(args);
3922 
3923 	slab_init_post_sem();
3924 
3925 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3926 	heap_init_post_sem();
3927 #endif
3928 
3929 	return B_OK;
3930 }
3931 
3932 
3933 status_t
3934 vm_init_post_thread(kernel_args* args)
3935 {
3936 	vm_page_init_post_thread(args);
3937 	slab_init_post_thread();
3938 	return heap_init_post_thread();
3939 }
3940 
3941 
3942 status_t
3943 vm_init_post_modules(kernel_args* args)
3944 {
3945 	return arch_vm_init_post_modules(args);
3946 }
3947 
3948 
3949 void
3950 permit_page_faults(void)
3951 {
3952 	Thread* thread = thread_get_current_thread();
3953 	if (thread != NULL)
3954 		atomic_add(&thread->page_faults_allowed, 1);
3955 }
3956 
3957 
3958 void
3959 forbid_page_faults(void)
3960 {
3961 	Thread* thread = thread_get_current_thread();
3962 	if (thread != NULL)
3963 		atomic_add(&thread->page_faults_allowed, -1);
3964 }
3965 
3966 
3967 status_t
3968 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
3969 	bool isUser, addr_t* newIP)
3970 {
3971 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3972 		faultAddress));
3973 
3974 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
3975 
3976 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
3977 	VMAddressSpace* addressSpace = NULL;
3978 
3979 	status_t status = B_OK;
3980 	*newIP = 0;
3981 	atomic_add((int32*)&sPageFaults, 1);
3982 
3983 	if (IS_KERNEL_ADDRESS(pageAddress)) {
3984 		addressSpace = VMAddressSpace::GetKernel();
3985 	} else if (IS_USER_ADDRESS(pageAddress)) {
3986 		addressSpace = VMAddressSpace::GetCurrent();
3987 		if (addressSpace == NULL) {
3988 			if (!isUser) {
3989 				dprintf("vm_page_fault: kernel thread accessing invalid user "
3990 					"memory!\n");
3991 				status = B_BAD_ADDRESS;
3992 				TPF(PageFaultError(-1,
3993 					VMPageFaultTracing
3994 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
3995 			} else {
3996 				// XXX weird state.
3997 				panic("vm_page_fault: non kernel thread accessing user memory "
3998 					"that doesn't exist!\n");
3999 				status = B_BAD_ADDRESS;
4000 			}
4001 		}
4002 	} else {
4003 		// the hit was probably in the 64k DMZ between kernel and user space
4004 		// this keeps a user space thread from passing a buffer that crosses
4005 		// into kernel space
4006 		status = B_BAD_ADDRESS;
4007 		TPF(PageFaultError(-1,
4008 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4009 	}
4010 
4011 	if (status == B_OK) {
4012 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4013 			isUser, NULL);
4014 	}
4015 
4016 	if (status < B_OK) {
4017 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4018 			"0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n",
4019 			strerror(status), address, faultAddress, isWrite, isUser, isExecute,
4020 			thread_get_current_thread_id());
4021 		if (!isUser) {
4022 			Thread* thread = thread_get_current_thread();
4023 			if (thread != NULL && thread->fault_handler != 0) {
4024 				// this will cause the arch dependant page fault handler to
4025 				// modify the IP on the interrupt frame or whatever to return
4026 				// to this address
4027 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4028 			} else {
4029 				// unhandled page fault in the kernel
4030 				panic("vm_page_fault: unhandled page fault in kernel space at "
4031 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4032 			}
4033 		} else {
4034 			Thread* thread = thread_get_current_thread();
4035 
4036 #ifdef TRACE_FAULTS
4037 			VMArea* area = NULL;
4038 			if (addressSpace != NULL) {
4039 				addressSpace->ReadLock();
4040 				area = addressSpace->LookupArea(faultAddress);
4041 			}
4042 
4043 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4044 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4045 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4046 				thread->team->Name(), thread->team->id,
4047 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4048 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4049 					area->Base() : 0x0));
4050 
4051 			if (addressSpace != NULL)
4052 				addressSpace->ReadUnlock();
4053 #endif
4054 
4055 			// If the thread has a signal handler for SIGSEGV, we simply
4056 			// send it the signal. Otherwise we notify the user debugger
4057 			// first.
4058 			struct sigaction action;
4059 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4060 					&& action.sa_handler != SIG_DFL
4061 					&& action.sa_handler != SIG_IGN)
4062 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4063 					SIGSEGV)) {
4064 				Signal signal(SIGSEGV,
4065 					status == B_PERMISSION_DENIED
4066 						? SEGV_ACCERR : SEGV_MAPERR,
4067 					EFAULT, thread->team->id);
4068 				signal.SetAddress((void*)address);
4069 				send_signal_to_thread(thread, signal, 0);
4070 			}
4071 		}
4072 	}
4073 
4074 	if (addressSpace != NULL)
4075 		addressSpace->Put();
4076 
4077 	return B_HANDLED_INTERRUPT;
4078 }
4079 
4080 
4081 struct PageFaultContext {
4082 	AddressSpaceReadLocker	addressSpaceLocker;
4083 	VMCacheChainLocker		cacheChainLocker;
4084 
4085 	VMTranslationMap*		map;
4086 	VMCache*				topCache;
4087 	off_t					cacheOffset;
4088 	vm_page_reservation		reservation;
4089 	bool					isWrite;
4090 
4091 	// return values
4092 	vm_page*				page;
4093 	bool					restart;
4094 	bool					pageAllocated;
4095 
4096 
4097 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4098 		:
4099 		addressSpaceLocker(addressSpace, true),
4100 		map(addressSpace->TranslationMap()),
4101 		isWrite(isWrite)
4102 	{
4103 	}
4104 
4105 	~PageFaultContext()
4106 	{
4107 		UnlockAll();
4108 		vm_page_unreserve_pages(&reservation);
4109 	}
4110 
4111 	void Prepare(VMCache* topCache, off_t cacheOffset)
4112 	{
4113 		this->topCache = topCache;
4114 		this->cacheOffset = cacheOffset;
4115 		page = NULL;
4116 		restart = false;
4117 		pageAllocated = false;
4118 
4119 		cacheChainLocker.SetTo(topCache);
4120 	}
4121 
4122 	void UnlockAll(VMCache* exceptCache = NULL)
4123 	{
4124 		topCache = NULL;
4125 		addressSpaceLocker.Unlock();
4126 		cacheChainLocker.Unlock(exceptCache);
4127 	}
4128 };
4129 
4130 
4131 /*!	Gets the page that should be mapped into the area.
4132 	Returns an error code other than \c B_OK, if the page couldn't be found or
4133 	paged in. The locking state of the address space and the caches is undefined
4134 	in that case.
4135 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4136 	had to unlock the address space and all caches and is supposed to be called
4137 	again.
4138 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4139 	found. It is returned in \c context.page. The address space will still be
4140 	locked as well as all caches starting from the top cache to at least the
4141 	cache the page lives in.
4142 */
4143 static status_t
4144 fault_get_page(PageFaultContext& context)
4145 {
4146 	VMCache* cache = context.topCache;
4147 	VMCache* lastCache = NULL;
4148 	vm_page* page = NULL;
4149 
4150 	while (cache != NULL) {
4151 		// We already hold the lock of the cache at this point.
4152 
4153 		lastCache = cache;
4154 
4155 		page = cache->LookupPage(context.cacheOffset);
4156 		if (page != NULL && page->busy) {
4157 			// page must be busy -- wait for it to become unbusy
4158 			context.UnlockAll(cache);
4159 			cache->ReleaseRefLocked();
4160 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4161 
4162 			// restart the whole process
4163 			context.restart = true;
4164 			return B_OK;
4165 		}
4166 
4167 		if (page != NULL)
4168 			break;
4169 
4170 		// The current cache does not contain the page we're looking for.
4171 
4172 		// see if the backing store has it
4173 		if (cache->HasPage(context.cacheOffset)) {
4174 			// insert a fresh page and mark it busy -- we're going to read it in
4175 			page = vm_page_allocate_page(&context.reservation,
4176 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4177 			cache->InsertPage(page, context.cacheOffset);
4178 
4179 			// We need to unlock all caches and the address space while reading
4180 			// the page in. Keep a reference to the cache around.
4181 			cache->AcquireRefLocked();
4182 			context.UnlockAll();
4183 
4184 			// read the page in
4185 			generic_io_vec vec;
4186 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4187 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4188 
4189 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4190 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4191 
4192 			cache->Lock();
4193 
4194 			if (status < B_OK) {
4195 				// on error remove and free the page
4196 				dprintf("reading page from cache %p returned: %s!\n",
4197 					cache, strerror(status));
4198 
4199 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4200 				cache->RemovePage(page);
4201 				vm_page_set_state(page, PAGE_STATE_FREE);
4202 
4203 				cache->ReleaseRefAndUnlock();
4204 				return status;
4205 			}
4206 
4207 			// mark the page unbusy again
4208 			cache->MarkPageUnbusy(page);
4209 
4210 			DEBUG_PAGE_ACCESS_END(page);
4211 
4212 			// Since we needed to unlock everything temporarily, the area
4213 			// situation might have changed. So we need to restart the whole
4214 			// process.
4215 			cache->ReleaseRefAndUnlock();
4216 			context.restart = true;
4217 			return B_OK;
4218 		}
4219 
4220 		cache = context.cacheChainLocker.LockSourceCache();
4221 	}
4222 
4223 	if (page == NULL) {
4224 		// There was no adequate page, determine the cache for a clean one.
4225 		// Read-only pages come in the deepest cache, only the top most cache
4226 		// may have direct write access.
4227 		cache = context.isWrite ? context.topCache : lastCache;
4228 
4229 		// allocate a clean page
4230 		page = vm_page_allocate_page(&context.reservation,
4231 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4232 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4233 			page->physical_page_number));
4234 
4235 		// insert the new page into our cache
4236 		cache->InsertPage(page, context.cacheOffset);
4237 		context.pageAllocated = true;
4238 	} else if (page->Cache() != context.topCache && context.isWrite) {
4239 		// We have a page that has the data we want, but in the wrong cache
4240 		// object so we need to copy it and stick it into the top cache.
4241 		vm_page* sourcePage = page;
4242 
4243 		// TODO: If memory is low, it might be a good idea to steal the page
4244 		// from our source cache -- if possible, that is.
4245 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4246 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4247 
4248 		// To not needlessly kill concurrency we unlock all caches but the top
4249 		// one while copying the page. Lacking another mechanism to ensure that
4250 		// the source page doesn't disappear, we mark it busy.
4251 		sourcePage->busy = true;
4252 		context.cacheChainLocker.UnlockKeepRefs(true);
4253 
4254 		// copy the page
4255 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4256 			sourcePage->physical_page_number * B_PAGE_SIZE);
4257 
4258 		context.cacheChainLocker.RelockCaches(true);
4259 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4260 
4261 		// insert the new page into our cache
4262 		context.topCache->InsertPage(page, context.cacheOffset);
4263 		context.pageAllocated = true;
4264 	} else
4265 		DEBUG_PAGE_ACCESS_START(page);
4266 
4267 	context.page = page;
4268 	return B_OK;
4269 }
4270 
4271 
4272 /*!	Makes sure the address in the given address space is mapped.
4273 
4274 	\param addressSpace The address space.
4275 	\param originalAddress The address. Doesn't need to be page aligned.
4276 	\param isWrite If \c true the address shall be write-accessible.
4277 	\param isUser If \c true the access is requested by a userland team.
4278 	\param wirePage On success, if non \c NULL, the wired count of the page
4279 		mapped at the given address is incremented and the page is returned
4280 		via this parameter.
4281 	\return \c B_OK on success, another error code otherwise.
4282 */
4283 static status_t
4284 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4285 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4286 {
4287 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4288 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4289 		originalAddress, isWrite, isUser));
4290 
4291 	PageFaultContext context(addressSpace, isWrite);
4292 
4293 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4294 	status_t status = B_OK;
4295 
4296 	addressSpace->IncrementFaultCount();
4297 
4298 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4299 	// the pages upfront makes sure we don't have any cache locked, so that the
4300 	// page daemon/thief can do their job without problems.
4301 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4302 		originalAddress);
4303 	context.addressSpaceLocker.Unlock();
4304 	vm_page_reserve_pages(&context.reservation, reservePages,
4305 		addressSpace == VMAddressSpace::Kernel()
4306 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4307 
4308 	while (true) {
4309 		context.addressSpaceLocker.Lock();
4310 
4311 		// get the area the fault was in
4312 		VMArea* area = addressSpace->LookupArea(address);
4313 		if (area == NULL) {
4314 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4315 				"space\n", originalAddress);
4316 			TPF(PageFaultError(-1,
4317 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4318 			status = B_BAD_ADDRESS;
4319 			break;
4320 		}
4321 
4322 		// check permissions
4323 		uint32 protection = get_area_page_protection(area, address);
4324 		if (isUser && (protection & B_USER_PROTECTION) == 0
4325 				&& (area->protection & B_KERNEL_AREA) != 0) {
4326 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4327 				area->id, (void*)originalAddress);
4328 			TPF(PageFaultError(area->id,
4329 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4330 			status = B_PERMISSION_DENIED;
4331 			break;
4332 		}
4333 		if (isWrite && (protection
4334 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4335 			dprintf("write access attempted on write-protected area 0x%"
4336 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4337 			TPF(PageFaultError(area->id,
4338 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4339 			status = B_PERMISSION_DENIED;
4340 			break;
4341 		} else if (isExecute && (protection
4342 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4343 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4344 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4345 			TPF(PageFaultError(area->id,
4346 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4347 			status = B_PERMISSION_DENIED;
4348 			break;
4349 		} else if (!isWrite && !isExecute && (protection
4350 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4351 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4352 				" at %p\n", area->id, (void*)originalAddress);
4353 			TPF(PageFaultError(area->id,
4354 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4355 			status = B_PERMISSION_DENIED;
4356 			break;
4357 		}
4358 
4359 		// We have the area, it was a valid access, so let's try to resolve the
4360 		// page fault now.
4361 		// At first, the top most cache from the area is investigated.
4362 
4363 		context.Prepare(vm_area_get_locked_cache(area),
4364 			address - area->Base() + area->cache_offset);
4365 
4366 		// See if this cache has a fault handler -- this will do all the work
4367 		// for us.
4368 		{
4369 			// Note, since the page fault is resolved with interrupts enabled,
4370 			// the fault handler could be called more than once for the same
4371 			// reason -- the store must take this into account.
4372 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4373 			if (status != B_BAD_HANDLER)
4374 				break;
4375 		}
4376 
4377 		// The top most cache has no fault handler, so let's see if the cache or
4378 		// its sources already have the page we're searching for (we're going
4379 		// from top to bottom).
4380 		status = fault_get_page(context);
4381 		if (status != B_OK) {
4382 			TPF(PageFaultError(area->id, status));
4383 			break;
4384 		}
4385 
4386 		if (context.restart)
4387 			continue;
4388 
4389 		// All went fine, all there is left to do is to map the page into the
4390 		// address space.
4391 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4392 			context.page));
4393 
4394 		// If the page doesn't reside in the area's cache, we need to make sure
4395 		// it's mapped in read-only, so that we cannot overwrite someone else's
4396 		// data (copy-on-write)
4397 		uint32 newProtection = protection;
4398 		if (context.page->Cache() != context.topCache && !isWrite)
4399 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4400 
4401 		bool unmapPage = false;
4402 		bool mapPage = true;
4403 
4404 		// check whether there's already a page mapped at the address
4405 		context.map->Lock();
4406 
4407 		phys_addr_t physicalAddress;
4408 		uint32 flags;
4409 		vm_page* mappedPage = NULL;
4410 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4411 			&& (flags & PAGE_PRESENT) != 0
4412 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4413 				!= NULL) {
4414 			// Yep there's already a page. If it's ours, we can simply adjust
4415 			// its protection. Otherwise we have to unmap it.
4416 			if (mappedPage == context.page) {
4417 				context.map->ProtectPage(area, address, newProtection);
4418 					// Note: We assume that ProtectPage() is atomic (i.e.
4419 					// the page isn't temporarily unmapped), otherwise we'd have
4420 					// to make sure it isn't wired.
4421 				mapPage = false;
4422 			} else
4423 				unmapPage = true;
4424 		}
4425 
4426 		context.map->Unlock();
4427 
4428 		if (unmapPage) {
4429 			// If the page is wired, we can't unmap it. Wait until it is unwired
4430 			// again and restart. Note that the page cannot be wired for
4431 			// writing, since it it isn't in the topmost cache. So we can safely
4432 			// ignore ranges wired for writing (our own and other concurrent
4433 			// wiring attempts in progress) and in fact have to do that to avoid
4434 			// a deadlock.
4435 			VMAreaUnwiredWaiter waiter;
4436 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4437 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4438 				// unlock everything and wait
4439 				if (context.pageAllocated) {
4440 					// ... but since we allocated a page and inserted it into
4441 					// the top cache, remove and free it first. Otherwise we'd
4442 					// have a page from a lower cache mapped while an upper
4443 					// cache has a page that would shadow it.
4444 					context.topCache->RemovePage(context.page);
4445 					vm_page_free_etc(context.topCache, context.page,
4446 						&context.reservation);
4447 				} else
4448 					DEBUG_PAGE_ACCESS_END(context.page);
4449 
4450 				context.UnlockAll();
4451 				waiter.waitEntry.Wait();
4452 				continue;
4453 			}
4454 
4455 			// Note: The mapped page is a page of a lower cache. We are
4456 			// guaranteed to have that cached locked, our new page is a copy of
4457 			// that page, and the page is not busy. The logic for that guarantee
4458 			// is as follows: Since the page is mapped, it must live in the top
4459 			// cache (ruled out above) or any of its lower caches, and there is
4460 			// (was before the new page was inserted) no other page in any
4461 			// cache between the top cache and the page's cache (otherwise that
4462 			// would be mapped instead). That in turn means that our algorithm
4463 			// must have found it and therefore it cannot be busy either.
4464 			DEBUG_PAGE_ACCESS_START(mappedPage);
4465 			unmap_page(area, address);
4466 			DEBUG_PAGE_ACCESS_END(mappedPage);
4467 		}
4468 
4469 		if (mapPage) {
4470 			if (map_page(area, context.page, address, newProtection,
4471 					&context.reservation) != B_OK) {
4472 				// Mapping can only fail, when the page mapping object couldn't
4473 				// be allocated. Save for the missing mapping everything is
4474 				// fine, though. If this was a regular page fault, we'll simply
4475 				// leave and probably fault again. To make sure we'll have more
4476 				// luck then, we ensure that the minimum object reserve is
4477 				// available.
4478 				DEBUG_PAGE_ACCESS_END(context.page);
4479 
4480 				context.UnlockAll();
4481 
4482 				if (object_cache_reserve(page_mapping_object_cache_for(
4483 							context.page->physical_page_number), 1, 0)
4484 						!= B_OK) {
4485 					// Apparently the situation is serious. Let's get ourselves
4486 					// killed.
4487 					status = B_NO_MEMORY;
4488 				} else if (wirePage != NULL) {
4489 					// The caller expects us to wire the page. Since
4490 					// object_cache_reserve() succeeded, we should now be able
4491 					// to allocate a mapping structure. Restart.
4492 					continue;
4493 				}
4494 
4495 				break;
4496 			}
4497 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4498 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4499 
4500 		// also wire the page, if requested
4501 		if (wirePage != NULL && status == B_OK) {
4502 			increment_page_wired_count(context.page);
4503 			*wirePage = context.page;
4504 		}
4505 
4506 		DEBUG_PAGE_ACCESS_END(context.page);
4507 
4508 		break;
4509 	}
4510 
4511 	return status;
4512 }
4513 
4514 
4515 status_t
4516 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4517 {
4518 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4519 }
4520 
4521 status_t
4522 vm_put_physical_page(addr_t vaddr, void* handle)
4523 {
4524 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4525 }
4526 
4527 
4528 status_t
4529 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4530 	void** _handle)
4531 {
4532 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4533 }
4534 
4535 status_t
4536 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4537 {
4538 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4539 }
4540 
4541 
4542 status_t
4543 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4544 {
4545 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4546 }
4547 
4548 status_t
4549 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4550 {
4551 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4552 }
4553 
4554 
4555 void
4556 vm_get_info(system_info* info)
4557 {
4558 	swap_get_info(info);
4559 
4560 	MutexLocker locker(sAvailableMemoryLock);
4561 	info->needed_memory = sNeededMemory;
4562 	info->free_memory = sAvailableMemory;
4563 }
4564 
4565 
4566 uint32
4567 vm_num_page_faults(void)
4568 {
4569 	return sPageFaults;
4570 }
4571 
4572 
4573 off_t
4574 vm_available_memory(void)
4575 {
4576 	MutexLocker locker(sAvailableMemoryLock);
4577 	return sAvailableMemory;
4578 }
4579 
4580 
4581 /*!	Like vm_available_memory(), but only for use in the kernel
4582 	debugger.
4583 */
4584 off_t
4585 vm_available_memory_debug(void)
4586 {
4587 	return sAvailableMemory;
4588 }
4589 
4590 
4591 off_t
4592 vm_available_not_needed_memory(void)
4593 {
4594 	MutexLocker locker(sAvailableMemoryLock);
4595 	return sAvailableMemory - sNeededMemory;
4596 }
4597 
4598 
4599 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4600 	debugger.
4601 */
4602 off_t
4603 vm_available_not_needed_memory_debug(void)
4604 {
4605 	return sAvailableMemory - sNeededMemory;
4606 }
4607 
4608 
4609 size_t
4610 vm_kernel_address_space_left(void)
4611 {
4612 	return VMAddressSpace::Kernel()->FreeSpace();
4613 }
4614 
4615 
4616 void
4617 vm_unreserve_memory(size_t amount)
4618 {
4619 	mutex_lock(&sAvailableMemoryLock);
4620 
4621 	sAvailableMemory += amount;
4622 
4623 	mutex_unlock(&sAvailableMemoryLock);
4624 }
4625 
4626 
4627 status_t
4628 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4629 {
4630 	size_t reserve = kMemoryReserveForPriority[priority];
4631 
4632 	MutexLocker locker(sAvailableMemoryLock);
4633 
4634 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4635 
4636 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4637 		sAvailableMemory -= amount;
4638 		return B_OK;
4639 	}
4640 
4641 	if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) {
4642 		// Do not wait for something that will never happen.
4643 		return B_NO_MEMORY;
4644 	}
4645 
4646 	if (timeout <= 0)
4647 		return B_NO_MEMORY;
4648 
4649 	// turn timeout into an absolute timeout
4650 	timeout += system_time();
4651 
4652 	// loop until we've got the memory or the timeout occurs
4653 	do {
4654 		sNeededMemory += amount;
4655 
4656 		// call the low resource manager
4657 		locker.Unlock();
4658 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4659 			B_ABSOLUTE_TIMEOUT, timeout);
4660 		locker.Lock();
4661 
4662 		sNeededMemory -= amount;
4663 
4664 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4665 			sAvailableMemory -= amount;
4666 			return B_OK;
4667 		}
4668 	} while (timeout > system_time());
4669 
4670 	return B_NO_MEMORY;
4671 }
4672 
4673 
4674 status_t
4675 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4676 {
4677 	// NOTE: The caller is responsible for synchronizing calls to this function!
4678 
4679 	AddressSpaceReadLocker locker;
4680 	VMArea* area;
4681 	status_t status = locker.SetFromArea(id, area);
4682 	if (status != B_OK)
4683 		return status;
4684 
4685 	// nothing to do, if the type doesn't change
4686 	uint32 oldType = area->MemoryType();
4687 	if (type == oldType)
4688 		return B_OK;
4689 
4690 	// set the memory type of the area and the mapped pages
4691 	VMTranslationMap* map = area->address_space->TranslationMap();
4692 	map->Lock();
4693 	area->SetMemoryType(type);
4694 	map->ProtectArea(area, area->protection);
4695 	map->Unlock();
4696 
4697 	// set the physical memory type
4698 	status_t error = arch_vm_set_memory_type(area, physicalBase, type, NULL);
4699 	if (error != B_OK) {
4700 		// reset the memory type of the area and the mapped pages
4701 		map->Lock();
4702 		area->SetMemoryType(oldType);
4703 		map->ProtectArea(area, area->protection);
4704 		map->Unlock();
4705 		return error;
4706 	}
4707 
4708 	return B_OK;
4709 
4710 }
4711 
4712 
4713 /*!	This function enforces some protection properties:
4714 	 - kernel areas must be W^X (after kernel startup)
4715 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4716 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4717 */
4718 static void
4719 fix_protection(uint32* protection)
4720 {
4721 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4722 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
4723 			|| (*protection & B_WRITE_AREA) != 0)
4724 		&& !gKernelStartup)
4725 		panic("kernel areas cannot be both writable and executable!");
4726 
4727 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4728 		if ((*protection & B_WRITE_AREA) != 0)
4729 			*protection |= B_KERNEL_WRITE_AREA;
4730 		if ((*protection & B_READ_AREA) != 0)
4731 			*protection |= B_KERNEL_READ_AREA;
4732 	}
4733 }
4734 
4735 
4736 static void
4737 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4738 {
4739 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4740 	info->area = area->id;
4741 	info->address = (void*)area->Base();
4742 	info->size = area->Size();
4743 	info->protection = area->protection;
4744 	info->lock = area->wiring;
4745 	info->team = area->address_space->ID();
4746 	info->copy_count = 0;
4747 	info->in_count = 0;
4748 	info->out_count = 0;
4749 		// TODO: retrieve real values here!
4750 
4751 	VMCache* cache = vm_area_get_locked_cache(area);
4752 
4753 	// Note, this is a simplification; the cache could be larger than this area
4754 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4755 
4756 	vm_area_put_locked_cache(cache);
4757 }
4758 
4759 
4760 static status_t
4761 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4762 {
4763 	// is newSize a multiple of B_PAGE_SIZE?
4764 	if (newSize & (B_PAGE_SIZE - 1))
4765 		return B_BAD_VALUE;
4766 
4767 	// lock all affected address spaces and the cache
4768 	VMArea* area;
4769 	VMCache* cache;
4770 
4771 	MultiAddressSpaceLocker locker;
4772 	AreaCacheLocker cacheLocker;
4773 
4774 	status_t status;
4775 	size_t oldSize;
4776 	bool anyKernelArea;
4777 	bool restart;
4778 
4779 	do {
4780 		anyKernelArea = false;
4781 		restart = false;
4782 
4783 		locker.Unset();
4784 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4785 		if (status != B_OK)
4786 			return status;
4787 		cacheLocker.SetTo(cache, true);	// already locked
4788 
4789 		// enforce restrictions
4790 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
4791 				|| (area->protection & B_KERNEL_AREA) != 0)) {
4792 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
4793 				"resize kernel area %" B_PRId32 " (%s)\n",
4794 				team_get_current_team_id(), areaID, area->name);
4795 			return B_NOT_ALLOWED;
4796 		}
4797 		// TODO: Enforce all restrictions (team, etc.)!
4798 
4799 		oldSize = area->Size();
4800 		if (newSize == oldSize)
4801 			return B_OK;
4802 
4803 		if (cache->type != CACHE_TYPE_RAM)
4804 			return B_NOT_ALLOWED;
4805 
4806 		if (oldSize < newSize) {
4807 			// We need to check if all areas of this cache can be resized.
4808 			for (VMArea* current = cache->areas; current != NULL;
4809 					current = current->cache_next) {
4810 				if (!current->address_space->CanResizeArea(current, newSize))
4811 					return B_ERROR;
4812 				anyKernelArea
4813 					|= current->address_space == VMAddressSpace::Kernel();
4814 			}
4815 		} else {
4816 			// We're shrinking the areas, so we must make sure the affected
4817 			// ranges are not wired.
4818 			for (VMArea* current = cache->areas; current != NULL;
4819 					current = current->cache_next) {
4820 				anyKernelArea
4821 					|= current->address_space == VMAddressSpace::Kernel();
4822 
4823 				if (wait_if_area_range_is_wired(current,
4824 						current->Base() + newSize, oldSize - newSize, &locker,
4825 						&cacheLocker)) {
4826 					restart = true;
4827 					break;
4828 				}
4829 			}
4830 		}
4831 	} while (restart);
4832 
4833 	// Okay, looks good so far, so let's do it
4834 
4835 	int priority = kernel && anyKernelArea
4836 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4837 	uint32 allocationFlags = kernel && anyKernelArea
4838 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4839 
4840 	if (oldSize < newSize) {
4841 		// Growing the cache can fail, so we do it first.
4842 		status = cache->Resize(cache->virtual_base + newSize, priority);
4843 		if (status != B_OK)
4844 			return status;
4845 	}
4846 
4847 	for (VMArea* current = cache->areas; current != NULL;
4848 			current = current->cache_next) {
4849 		status = current->address_space->ResizeArea(current, newSize,
4850 			allocationFlags);
4851 		if (status != B_OK)
4852 			break;
4853 
4854 		// We also need to unmap all pages beyond the new size, if the area has
4855 		// shrunk
4856 		if (newSize < oldSize) {
4857 			VMCacheChainLocker cacheChainLocker(cache);
4858 			cacheChainLocker.LockAllSourceCaches();
4859 
4860 			unmap_pages(current, current->Base() + newSize,
4861 				oldSize - newSize);
4862 
4863 			cacheChainLocker.Unlock(cache);
4864 		}
4865 	}
4866 
4867 	if (status == B_OK) {
4868 		// Shrink or grow individual page protections if in use.
4869 		if (area->page_protections != NULL) {
4870 			size_t bytes = area_page_protections_size(newSize);
4871 			uint8* newProtections
4872 				= (uint8*)realloc(area->page_protections, bytes);
4873 			if (newProtections == NULL)
4874 				status = B_NO_MEMORY;
4875 			else {
4876 				area->page_protections = newProtections;
4877 
4878 				if (oldSize < newSize) {
4879 					// init the additional page protections to that of the area
4880 					uint32 offset = area_page_protections_size(oldSize);
4881 					uint32 areaProtection = area->protection
4882 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4883 					memset(area->page_protections + offset,
4884 						areaProtection | (areaProtection << 4), bytes - offset);
4885 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4886 						uint8& entry = area->page_protections[offset - 1];
4887 						entry = (entry & 0x0f) | (areaProtection << 4);
4888 					}
4889 				}
4890 			}
4891 		}
4892 	}
4893 
4894 	// shrinking the cache can't fail, so we do it now
4895 	if (status == B_OK && newSize < oldSize)
4896 		status = cache->Resize(cache->virtual_base + newSize, priority);
4897 
4898 	if (status != B_OK) {
4899 		// Something failed -- resize the areas back to their original size.
4900 		// This can fail, too, in which case we're seriously screwed.
4901 		for (VMArea* current = cache->areas; current != NULL;
4902 				current = current->cache_next) {
4903 			if (current->address_space->ResizeArea(current, oldSize,
4904 					allocationFlags) != B_OK) {
4905 				panic("vm_resize_area(): Failed and not being able to restore "
4906 					"original state.");
4907 			}
4908 		}
4909 
4910 		cache->Resize(cache->virtual_base + oldSize, priority);
4911 	}
4912 
4913 	// TODO: we must honour the lock restrictions of this area
4914 	return status;
4915 }
4916 
4917 
4918 status_t
4919 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
4920 {
4921 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4922 }
4923 
4924 
4925 status_t
4926 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
4927 {
4928 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4929 }
4930 
4931 
4932 status_t
4933 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
4934 	bool user)
4935 {
4936 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4937 }
4938 
4939 
4940 void
4941 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
4942 {
4943 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4944 }
4945 
4946 
4947 /** Validate that a memory range is either fully in kernel space, or fully in
4948  *  userspace */
4949 static inline bool
4950 validate_memory_range(const void* addr, size_t size)
4951 {
4952 	addr_t address = (addr_t)addr;
4953 
4954 	// Check for overflows on all addresses.
4955 	if ((address + size) < address)
4956 		return false;
4957 
4958 	// Validate that the address range does not cross the kernel/user boundary.
4959 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
4960 }
4961 
4962 
4963 //	#pragma mark - kernel public API
4964 
4965 
4966 status_t
4967 user_memcpy(void* to, const void* from, size_t size)
4968 {
4969 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
4970 		return B_BAD_ADDRESS;
4971 
4972 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
4973 		return B_BAD_ADDRESS;
4974 
4975 	return B_OK;
4976 }
4977 
4978 
4979 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
4980 	the string in \a to, NULL-terminating the result.
4981 
4982 	\param to Pointer to the destination C-string.
4983 	\param from Pointer to the source C-string.
4984 	\param size Size in bytes of the string buffer pointed to by \a to.
4985 
4986 	\return strlen(\a from).
4987 */
4988 ssize_t
4989 user_strlcpy(char* to, const char* from, size_t size)
4990 {
4991 	if (to == NULL && size != 0)
4992 		return B_BAD_VALUE;
4993 	if (from == NULL)
4994 		return B_BAD_ADDRESS;
4995 
4996 	// Protect the source address from overflows.
4997 	size_t maxSize = size;
4998 	if ((addr_t)from + maxSize < (addr_t)from)
4999 		maxSize -= (addr_t)from + maxSize;
5000 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5001 		maxSize = USER_TOP - (addr_t)from;
5002 
5003 	if (!validate_memory_range(to, maxSize))
5004 		return B_BAD_ADDRESS;
5005 
5006 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5007 	if (result < 0)
5008 		return result;
5009 
5010 	// If we hit the address overflow boundary, fail.
5011 	if ((size_t)result >= maxSize && maxSize < size)
5012 		return B_BAD_ADDRESS;
5013 
5014 	return result;
5015 }
5016 
5017 
5018 status_t
5019 user_memset(void* s, char c, size_t count)
5020 {
5021 	if (!validate_memory_range(s, count))
5022 		return B_BAD_ADDRESS;
5023 
5024 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5025 		return B_BAD_ADDRESS;
5026 
5027 	return B_OK;
5028 }
5029 
5030 
5031 /*!	Wires a single page at the given address.
5032 
5033 	\param team The team whose address space the address belongs to. Supports
5034 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5035 		parameter is ignored.
5036 	\param address address The virtual address to wire down. Does not need to
5037 		be page aligned.
5038 	\param writable If \c true the page shall be writable.
5039 	\param info On success the info is filled in, among other things
5040 		containing the physical address the given virtual one translates to.
5041 	\return \c B_OK, when the page could be wired, another error code otherwise.
5042 */
5043 status_t
5044 vm_wire_page(team_id team, addr_t address, bool writable,
5045 	VMPageWiringInfo* info)
5046 {
5047 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5048 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5049 
5050 	// compute the page protection that is required
5051 	bool isUser = IS_USER_ADDRESS(address);
5052 	uint32 requiredProtection = PAGE_PRESENT
5053 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5054 	if (writable)
5055 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5056 
5057 	// get and read lock the address space
5058 	VMAddressSpace* addressSpace = NULL;
5059 	if (isUser) {
5060 		if (team == B_CURRENT_TEAM)
5061 			addressSpace = VMAddressSpace::GetCurrent();
5062 		else
5063 			addressSpace = VMAddressSpace::Get(team);
5064 	} else
5065 		addressSpace = VMAddressSpace::GetKernel();
5066 	if (addressSpace == NULL)
5067 		return B_ERROR;
5068 
5069 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5070 
5071 	VMTranslationMap* map = addressSpace->TranslationMap();
5072 	status_t error = B_OK;
5073 
5074 	// get the area
5075 	VMArea* area = addressSpace->LookupArea(pageAddress);
5076 	if (area == NULL) {
5077 		addressSpace->Put();
5078 		return B_BAD_ADDRESS;
5079 	}
5080 
5081 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5082 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5083 
5084 	// mark the area range wired
5085 	area->Wire(&info->range);
5086 
5087 	// Lock the area's cache chain and the translation map. Needed to look
5088 	// up the page and play with its wired count.
5089 	cacheChainLocker.LockAllSourceCaches();
5090 	map->Lock();
5091 
5092 	phys_addr_t physicalAddress;
5093 	uint32 flags;
5094 	vm_page* page;
5095 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5096 		&& (flags & requiredProtection) == requiredProtection
5097 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5098 			!= NULL) {
5099 		// Already mapped with the correct permissions -- just increment
5100 		// the page's wired count.
5101 		increment_page_wired_count(page);
5102 
5103 		map->Unlock();
5104 		cacheChainLocker.Unlock();
5105 		addressSpaceLocker.Unlock();
5106 	} else {
5107 		// Let vm_soft_fault() map the page for us, if possible. We need
5108 		// to fully unlock to avoid deadlocks. Since we have already
5109 		// wired the area itself, nothing disturbing will happen with it
5110 		// in the meantime.
5111 		map->Unlock();
5112 		cacheChainLocker.Unlock();
5113 		addressSpaceLocker.Unlock();
5114 
5115 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5116 			isUser, &page);
5117 
5118 		if (error != B_OK) {
5119 			// The page could not be mapped -- clean up.
5120 			VMCache* cache = vm_area_get_locked_cache(area);
5121 			area->Unwire(&info->range);
5122 			cache->ReleaseRefAndUnlock();
5123 			addressSpace->Put();
5124 			return error;
5125 		}
5126 	}
5127 
5128 	info->physicalAddress
5129 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5130 			+ address % B_PAGE_SIZE;
5131 	info->page = page;
5132 
5133 	return B_OK;
5134 }
5135 
5136 
5137 /*!	Unwires a single page previously wired via vm_wire_page().
5138 
5139 	\param info The same object passed to vm_wire_page() before.
5140 */
5141 void
5142 vm_unwire_page(VMPageWiringInfo* info)
5143 {
5144 	// lock the address space
5145 	VMArea* area = info->range.area;
5146 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5147 		// takes over our reference
5148 
5149 	// lock the top cache
5150 	VMCache* cache = vm_area_get_locked_cache(area);
5151 	VMCacheChainLocker cacheChainLocker(cache);
5152 
5153 	if (info->page->Cache() != cache) {
5154 		// The page is not in the top cache, so we lock the whole cache chain
5155 		// before touching the page's wired count.
5156 		cacheChainLocker.LockAllSourceCaches();
5157 	}
5158 
5159 	decrement_page_wired_count(info->page);
5160 
5161 	// remove the wired range from the range
5162 	area->Unwire(&info->range);
5163 
5164 	cacheChainLocker.Unlock();
5165 }
5166 
5167 
5168 /*!	Wires down the given address range in the specified team's address space.
5169 
5170 	If successful the function
5171 	- acquires a reference to the specified team's address space,
5172 	- adds respective wired ranges to all areas that intersect with the given
5173 	  address range,
5174 	- makes sure all pages in the given address range are mapped with the
5175 	  requested access permissions and increments their wired count.
5176 
5177 	It fails, when \a team doesn't specify a valid address space, when any part
5178 	of the specified address range is not covered by areas, when the concerned
5179 	areas don't allow mapping with the requested permissions, or when mapping
5180 	failed for another reason.
5181 
5182 	When successful the call must be balanced by a unlock_memory_etc() call with
5183 	the exact same parameters.
5184 
5185 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5186 		supported.
5187 	\param address The start of the address range to be wired.
5188 	\param numBytes The size of the address range to be wired.
5189 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5190 		requests that the range must be wired writable ("read from device
5191 		into memory").
5192 	\return \c B_OK on success, another error code otherwise.
5193 */
5194 status_t
5195 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5196 {
5197 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5198 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5199 
5200 	// compute the page protection that is required
5201 	bool isUser = IS_USER_ADDRESS(address);
5202 	bool writable = (flags & B_READ_DEVICE) == 0;
5203 	uint32 requiredProtection = PAGE_PRESENT
5204 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5205 	if (writable)
5206 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5207 
5208 	uint32 mallocFlags = isUser
5209 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5210 
5211 	// get and read lock the address space
5212 	VMAddressSpace* addressSpace = NULL;
5213 	if (isUser) {
5214 		if (team == B_CURRENT_TEAM)
5215 			addressSpace = VMAddressSpace::GetCurrent();
5216 		else
5217 			addressSpace = VMAddressSpace::Get(team);
5218 	} else
5219 		addressSpace = VMAddressSpace::GetKernel();
5220 	if (addressSpace == NULL)
5221 		return B_ERROR;
5222 
5223 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5224 		// We get a new address space reference here. The one we got above will
5225 		// be freed by unlock_memory_etc().
5226 
5227 	VMTranslationMap* map = addressSpace->TranslationMap();
5228 	status_t error = B_OK;
5229 
5230 	// iterate through all concerned areas
5231 	addr_t nextAddress = lockBaseAddress;
5232 	while (nextAddress != lockEndAddress) {
5233 		// get the next area
5234 		VMArea* area = addressSpace->LookupArea(nextAddress);
5235 		if (area == NULL) {
5236 			error = B_BAD_ADDRESS;
5237 			break;
5238 		}
5239 
5240 		addr_t areaStart = nextAddress;
5241 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5242 
5243 		// allocate the wired range (do that before locking the cache to avoid
5244 		// deadlocks)
5245 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5246 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5247 		if (range == NULL) {
5248 			error = B_NO_MEMORY;
5249 			break;
5250 		}
5251 
5252 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5253 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5254 
5255 		// mark the area range wired
5256 		area->Wire(range);
5257 
5258 		// Depending on the area cache type and the wiring, we may not need to
5259 		// look at the individual pages.
5260 		if (area->cache_type == CACHE_TYPE_NULL
5261 			|| area->cache_type == CACHE_TYPE_DEVICE
5262 			|| area->wiring == B_FULL_LOCK
5263 			|| area->wiring == B_CONTIGUOUS) {
5264 			nextAddress = areaEnd;
5265 			continue;
5266 		}
5267 
5268 		// Lock the area's cache chain and the translation map. Needed to look
5269 		// up pages and play with their wired count.
5270 		cacheChainLocker.LockAllSourceCaches();
5271 		map->Lock();
5272 
5273 		// iterate through the pages and wire them
5274 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5275 			phys_addr_t physicalAddress;
5276 			uint32 flags;
5277 
5278 			vm_page* page;
5279 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5280 				&& (flags & requiredProtection) == requiredProtection
5281 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5282 					!= NULL) {
5283 				// Already mapped with the correct permissions -- just increment
5284 				// the page's wired count.
5285 				increment_page_wired_count(page);
5286 			} else {
5287 				// Let vm_soft_fault() map the page for us, if possible. We need
5288 				// to fully unlock to avoid deadlocks. Since we have already
5289 				// wired the area itself, nothing disturbing will happen with it
5290 				// in the meantime.
5291 				map->Unlock();
5292 				cacheChainLocker.Unlock();
5293 				addressSpaceLocker.Unlock();
5294 
5295 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5296 					false, isUser, &page);
5297 
5298 				addressSpaceLocker.Lock();
5299 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5300 				cacheChainLocker.LockAllSourceCaches();
5301 				map->Lock();
5302 			}
5303 
5304 			if (error != B_OK)
5305 				break;
5306 		}
5307 
5308 		map->Unlock();
5309 
5310 		if (error == B_OK) {
5311 			cacheChainLocker.Unlock();
5312 		} else {
5313 			// An error occurred, so abort right here. If the current address
5314 			// is the first in this area, unwire the area, since we won't get
5315 			// to it when reverting what we've done so far.
5316 			if (nextAddress == areaStart) {
5317 				area->Unwire(range);
5318 				cacheChainLocker.Unlock();
5319 				range->~VMAreaWiredRange();
5320 				free_etc(range, mallocFlags);
5321 			} else
5322 				cacheChainLocker.Unlock();
5323 
5324 			break;
5325 		}
5326 	}
5327 
5328 	if (error != B_OK) {
5329 		// An error occurred, so unwire all that we've already wired. Note that
5330 		// even if not a single page was wired, unlock_memory_etc() is called
5331 		// to put the address space reference.
5332 		addressSpaceLocker.Unlock();
5333 		unlock_memory_etc(team, (void*)lockBaseAddress,
5334 			nextAddress - lockBaseAddress, flags);
5335 	}
5336 
5337 	return error;
5338 }
5339 
5340 
5341 status_t
5342 lock_memory(void* address, size_t numBytes, uint32 flags)
5343 {
5344 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5345 }
5346 
5347 
5348 /*!	Unwires an address range previously wired with lock_memory_etc().
5349 
5350 	Note that a call to this function must balance a previous lock_memory_etc()
5351 	call with exactly the same parameters.
5352 */
5353 status_t
5354 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5355 {
5356 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5357 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5358 
5359 	// compute the page protection that is required
5360 	bool isUser = IS_USER_ADDRESS(address);
5361 	bool writable = (flags & B_READ_DEVICE) == 0;
5362 	uint32 requiredProtection = PAGE_PRESENT
5363 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5364 	if (writable)
5365 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5366 
5367 	uint32 mallocFlags = isUser
5368 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5369 
5370 	// get and read lock the address space
5371 	VMAddressSpace* addressSpace = NULL;
5372 	if (isUser) {
5373 		if (team == B_CURRENT_TEAM)
5374 			addressSpace = VMAddressSpace::GetCurrent();
5375 		else
5376 			addressSpace = VMAddressSpace::Get(team);
5377 	} else
5378 		addressSpace = VMAddressSpace::GetKernel();
5379 	if (addressSpace == NULL)
5380 		return B_ERROR;
5381 
5382 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5383 		// Take over the address space reference. We don't unlock until we're
5384 		// done.
5385 
5386 	VMTranslationMap* map = addressSpace->TranslationMap();
5387 	status_t error = B_OK;
5388 
5389 	// iterate through all concerned areas
5390 	addr_t nextAddress = lockBaseAddress;
5391 	while (nextAddress != lockEndAddress) {
5392 		// get the next area
5393 		VMArea* area = addressSpace->LookupArea(nextAddress);
5394 		if (area == NULL) {
5395 			error = B_BAD_ADDRESS;
5396 			break;
5397 		}
5398 
5399 		addr_t areaStart = nextAddress;
5400 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5401 
5402 		// Lock the area's top cache. This is a requirement for
5403 		// VMArea::Unwire().
5404 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5405 
5406 		// Depending on the area cache type and the wiring, we may not need to
5407 		// look at the individual pages.
5408 		if (area->cache_type == CACHE_TYPE_NULL
5409 			|| area->cache_type == CACHE_TYPE_DEVICE
5410 			|| area->wiring == B_FULL_LOCK
5411 			|| area->wiring == B_CONTIGUOUS) {
5412 			// unwire the range (to avoid deadlocks we delete the range after
5413 			// unlocking the cache)
5414 			nextAddress = areaEnd;
5415 			VMAreaWiredRange* range = area->Unwire(areaStart,
5416 				areaEnd - areaStart, writable);
5417 			cacheChainLocker.Unlock();
5418 			if (range != NULL) {
5419 				range->~VMAreaWiredRange();
5420 				free_etc(range, mallocFlags);
5421 			}
5422 			continue;
5423 		}
5424 
5425 		// Lock the area's cache chain and the translation map. Needed to look
5426 		// up pages and play with their wired count.
5427 		cacheChainLocker.LockAllSourceCaches();
5428 		map->Lock();
5429 
5430 		// iterate through the pages and unwire them
5431 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5432 			phys_addr_t physicalAddress;
5433 			uint32 flags;
5434 
5435 			vm_page* page;
5436 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5437 				&& (flags & PAGE_PRESENT) != 0
5438 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5439 					!= NULL) {
5440 				// Already mapped with the correct permissions -- just increment
5441 				// the page's wired count.
5442 				decrement_page_wired_count(page);
5443 			} else {
5444 				panic("unlock_memory_etc(): Failed to unwire page: address "
5445 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5446 					nextAddress);
5447 				error = B_BAD_VALUE;
5448 				break;
5449 			}
5450 		}
5451 
5452 		map->Unlock();
5453 
5454 		// All pages are unwired. Remove the area's wired range as well (to
5455 		// avoid deadlocks we delete the range after unlocking the cache).
5456 		VMAreaWiredRange* range = area->Unwire(areaStart,
5457 			areaEnd - areaStart, writable);
5458 
5459 		cacheChainLocker.Unlock();
5460 
5461 		if (range != NULL) {
5462 			range->~VMAreaWiredRange();
5463 			free_etc(range, mallocFlags);
5464 		}
5465 
5466 		if (error != B_OK)
5467 			break;
5468 	}
5469 
5470 	// get rid of the address space reference lock_memory_etc() acquired
5471 	addressSpace->Put();
5472 
5473 	return error;
5474 }
5475 
5476 
5477 status_t
5478 unlock_memory(void* address, size_t numBytes, uint32 flags)
5479 {
5480 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5481 }
5482 
5483 
5484 /*!	Similar to get_memory_map(), but also allows to specify the address space
5485 	for the memory in question and has a saner semantics.
5486 	Returns \c B_OK when the complete range could be translated or
5487 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5488 	case the actual number of entries is written to \c *_numEntries. Any other
5489 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5490 	in this case.
5491 */
5492 status_t
5493 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5494 	physical_entry* table, uint32* _numEntries)
5495 {
5496 	uint32 numEntries = *_numEntries;
5497 	*_numEntries = 0;
5498 
5499 	VMAddressSpace* addressSpace;
5500 	addr_t virtualAddress = (addr_t)address;
5501 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5502 	phys_addr_t physicalAddress;
5503 	status_t status = B_OK;
5504 	int32 index = -1;
5505 	addr_t offset = 0;
5506 	bool interrupts = are_interrupts_enabled();
5507 
5508 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5509 		"entries)\n", team, address, numBytes, numEntries));
5510 
5511 	if (numEntries == 0 || numBytes == 0)
5512 		return B_BAD_VALUE;
5513 
5514 	// in which address space is the address to be found?
5515 	if (IS_USER_ADDRESS(virtualAddress)) {
5516 		if (team == B_CURRENT_TEAM)
5517 			addressSpace = VMAddressSpace::GetCurrent();
5518 		else
5519 			addressSpace = VMAddressSpace::Get(team);
5520 	} else
5521 		addressSpace = VMAddressSpace::GetKernel();
5522 
5523 	if (addressSpace == NULL)
5524 		return B_ERROR;
5525 
5526 	VMTranslationMap* map = addressSpace->TranslationMap();
5527 
5528 	if (interrupts)
5529 		map->Lock();
5530 
5531 	while (offset < numBytes) {
5532 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5533 		uint32 flags;
5534 
5535 		if (interrupts) {
5536 			status = map->Query((addr_t)address + offset, &physicalAddress,
5537 				&flags);
5538 		} else {
5539 			status = map->QueryInterrupt((addr_t)address + offset,
5540 				&physicalAddress, &flags);
5541 		}
5542 		if (status < B_OK)
5543 			break;
5544 		if ((flags & PAGE_PRESENT) == 0) {
5545 			panic("get_memory_map() called on unmapped memory!");
5546 			return B_BAD_ADDRESS;
5547 		}
5548 
5549 		if (index < 0 && pageOffset > 0) {
5550 			physicalAddress += pageOffset;
5551 			if (bytes > B_PAGE_SIZE - pageOffset)
5552 				bytes = B_PAGE_SIZE - pageOffset;
5553 		}
5554 
5555 		// need to switch to the next physical_entry?
5556 		if (index < 0 || table[index].address
5557 				!= physicalAddress - table[index].size) {
5558 			if ((uint32)++index + 1 > numEntries) {
5559 				// table to small
5560 				break;
5561 			}
5562 			table[index].address = physicalAddress;
5563 			table[index].size = bytes;
5564 		} else {
5565 			// page does fit in current entry
5566 			table[index].size += bytes;
5567 		}
5568 
5569 		offset += bytes;
5570 	}
5571 
5572 	if (interrupts)
5573 		map->Unlock();
5574 
5575 	if (status != B_OK)
5576 		return status;
5577 
5578 	if ((uint32)index + 1 > numEntries) {
5579 		*_numEntries = index;
5580 		return B_BUFFER_OVERFLOW;
5581 	}
5582 
5583 	*_numEntries = index + 1;
5584 	return B_OK;
5585 }
5586 
5587 
5588 /*!	According to the BeBook, this function should always succeed.
5589 	This is no longer the case.
5590 */
5591 extern "C" int32
5592 __get_memory_map_haiku(const void* address, size_t numBytes,
5593 	physical_entry* table, int32 numEntries)
5594 {
5595 	uint32 entriesRead = numEntries;
5596 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5597 		table, &entriesRead);
5598 	if (error != B_OK)
5599 		return error;
5600 
5601 	// close the entry list
5602 
5603 	// if it's only one entry, we will silently accept the missing ending
5604 	if (numEntries == 1)
5605 		return B_OK;
5606 
5607 	if (entriesRead + 1 > (uint32)numEntries)
5608 		return B_BUFFER_OVERFLOW;
5609 
5610 	table[entriesRead].address = 0;
5611 	table[entriesRead].size = 0;
5612 
5613 	return B_OK;
5614 }
5615 
5616 
5617 area_id
5618 area_for(void* address)
5619 {
5620 	return vm_area_for((addr_t)address, true);
5621 }
5622 
5623 
5624 area_id
5625 find_area(const char* name)
5626 {
5627 	return VMAreas::Find(name);
5628 }
5629 
5630 
5631 status_t
5632 _get_area_info(area_id id, area_info* info, size_t size)
5633 {
5634 	if (size != sizeof(area_info) || info == NULL)
5635 		return B_BAD_VALUE;
5636 
5637 	AddressSpaceReadLocker locker;
5638 	VMArea* area;
5639 	status_t status = locker.SetFromArea(id, area);
5640 	if (status != B_OK)
5641 		return status;
5642 
5643 	fill_area_info(area, info, size);
5644 	return B_OK;
5645 }
5646 
5647 
5648 status_t
5649 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5650 {
5651 	addr_t nextBase = *(addr_t*)cookie;
5652 
5653 	// we're already through the list
5654 	if (nextBase == (addr_t)-1)
5655 		return B_ENTRY_NOT_FOUND;
5656 
5657 	if (team == B_CURRENT_TEAM)
5658 		team = team_get_current_team_id();
5659 
5660 	AddressSpaceReadLocker locker(team);
5661 	if (!locker.IsLocked())
5662 		return B_BAD_TEAM_ID;
5663 
5664 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
5665 	if (area == NULL) {
5666 		nextBase = (addr_t)-1;
5667 		return B_ENTRY_NOT_FOUND;
5668 	}
5669 
5670 	fill_area_info(area, info, size);
5671 	*cookie = (ssize_t)(area->Base() + 1);
5672 
5673 	return B_OK;
5674 }
5675 
5676 
5677 status_t
5678 set_area_protection(area_id area, uint32 newProtection)
5679 {
5680 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5681 		newProtection, true);
5682 }
5683 
5684 
5685 status_t
5686 resize_area(area_id areaID, size_t newSize)
5687 {
5688 	return vm_resize_area(areaID, newSize, true);
5689 }
5690 
5691 
5692 /*!	Transfers the specified area to a new team. The caller must be the owner
5693 	of the area.
5694 */
5695 area_id
5696 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5697 	bool kernel)
5698 {
5699 	area_info info;
5700 	status_t status = get_area_info(id, &info);
5701 	if (status != B_OK)
5702 		return status;
5703 
5704 	if (!kernel && info.team != thread_get_current_thread()->team->id)
5705 		return B_PERMISSION_DENIED;
5706 
5707 	// We need to mark the area cloneable so the following operations work.
5708 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
5709 	if (status != B_OK)
5710 		return status;
5711 
5712 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5713 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5714 	if (clonedArea < 0)
5715 		return clonedArea;
5716 
5717 	status = vm_delete_area(info.team, id, kernel);
5718 	if (status != B_OK) {
5719 		vm_delete_area(target, clonedArea, kernel);
5720 		return status;
5721 	}
5722 
5723 	// Now we can reset the protection to whatever it was before.
5724 	set_area_protection(clonedArea, info.protection);
5725 
5726 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5727 
5728 	return clonedArea;
5729 }
5730 
5731 
5732 extern "C" area_id
5733 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5734 	size_t numBytes, uint32 addressSpec, uint32 protection,
5735 	void** _virtualAddress)
5736 {
5737 	if (!arch_vm_supports_protection(protection))
5738 		return B_NOT_SUPPORTED;
5739 
5740 	fix_protection(&protection);
5741 
5742 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5743 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5744 		false);
5745 }
5746 
5747 
5748 area_id
5749 clone_area(const char* name, void** _address, uint32 addressSpec,
5750 	uint32 protection, area_id source)
5751 {
5752 	if ((protection & B_KERNEL_PROTECTION) == 0)
5753 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5754 
5755 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5756 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5757 }
5758 
5759 
5760 area_id
5761 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
5762 	uint32 protection, uint32 flags, uint32 guardSize,
5763 	const virtual_address_restrictions* virtualAddressRestrictions,
5764 	const physical_address_restrictions* physicalAddressRestrictions,
5765 	void** _address)
5766 {
5767 	fix_protection(&protection);
5768 
5769 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5770 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
5771 		true, _address);
5772 }
5773 
5774 
5775 extern "C" area_id
5776 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5777 	size_t size, uint32 lock, uint32 protection)
5778 {
5779 	fix_protection(&protection);
5780 
5781 	virtual_address_restrictions virtualRestrictions = {};
5782 	virtualRestrictions.address = *_address;
5783 	virtualRestrictions.address_specification = addressSpec;
5784 	physical_address_restrictions physicalRestrictions = {};
5785 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5786 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
5787 		true, _address);
5788 }
5789 
5790 
5791 status_t
5792 delete_area(area_id area)
5793 {
5794 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5795 }
5796 
5797 
5798 //	#pragma mark - Userland syscalls
5799 
5800 
5801 status_t
5802 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5803 	addr_t size)
5804 {
5805 	// filter out some unavailable values (for userland)
5806 	switch (addressSpec) {
5807 		case B_ANY_KERNEL_ADDRESS:
5808 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5809 			return B_BAD_VALUE;
5810 	}
5811 
5812 	addr_t address;
5813 
5814 	if (!IS_USER_ADDRESS(userAddress)
5815 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5816 		return B_BAD_ADDRESS;
5817 
5818 	status_t status = vm_reserve_address_range(
5819 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5820 		RESERVED_AVOID_BASE);
5821 	if (status != B_OK)
5822 		return status;
5823 
5824 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5825 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5826 			(void*)address, size);
5827 		return B_BAD_ADDRESS;
5828 	}
5829 
5830 	return B_OK;
5831 }
5832 
5833 
5834 status_t
5835 _user_unreserve_address_range(addr_t address, addr_t size)
5836 {
5837 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5838 		(void*)address, size);
5839 }
5840 
5841 
5842 area_id
5843 _user_area_for(void* address)
5844 {
5845 	return vm_area_for((addr_t)address, false);
5846 }
5847 
5848 
5849 area_id
5850 _user_find_area(const char* userName)
5851 {
5852 	char name[B_OS_NAME_LENGTH];
5853 
5854 	if (!IS_USER_ADDRESS(userName)
5855 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5856 		return B_BAD_ADDRESS;
5857 
5858 	return find_area(name);
5859 }
5860 
5861 
5862 status_t
5863 _user_get_area_info(area_id area, area_info* userInfo)
5864 {
5865 	if (!IS_USER_ADDRESS(userInfo))
5866 		return B_BAD_ADDRESS;
5867 
5868 	area_info info;
5869 	status_t status = get_area_info(area, &info);
5870 	if (status < B_OK)
5871 		return status;
5872 
5873 	// TODO: do we want to prevent userland from seeing kernel protections?
5874 	//info.protection &= B_USER_PROTECTION;
5875 
5876 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5877 		return B_BAD_ADDRESS;
5878 
5879 	return status;
5880 }
5881 
5882 
5883 status_t
5884 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
5885 {
5886 	ssize_t cookie;
5887 
5888 	if (!IS_USER_ADDRESS(userCookie)
5889 		|| !IS_USER_ADDRESS(userInfo)
5890 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
5891 		return B_BAD_ADDRESS;
5892 
5893 	area_info info;
5894 	status_t status = _get_next_area_info(team, &cookie, &info,
5895 		sizeof(area_info));
5896 	if (status != B_OK)
5897 		return status;
5898 
5899 	//info.protection &= B_USER_PROTECTION;
5900 
5901 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
5902 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5903 		return B_BAD_ADDRESS;
5904 
5905 	return status;
5906 }
5907 
5908 
5909 status_t
5910 _user_set_area_protection(area_id area, uint32 newProtection)
5911 {
5912 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
5913 		return B_BAD_VALUE;
5914 
5915 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
5916 		newProtection, false);
5917 }
5918 
5919 
5920 status_t
5921 _user_resize_area(area_id area, size_t newSize)
5922 {
5923 	// TODO: Since we restrict deleting of areas to those owned by the team,
5924 	// we should also do that for resizing (check other functions, too).
5925 	return vm_resize_area(area, newSize, false);
5926 }
5927 
5928 
5929 area_id
5930 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
5931 	team_id target)
5932 {
5933 	// filter out some unavailable values (for userland)
5934 	switch (addressSpec) {
5935 		case B_ANY_KERNEL_ADDRESS:
5936 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5937 			return B_BAD_VALUE;
5938 	}
5939 
5940 	void* address;
5941 	if (!IS_USER_ADDRESS(userAddress)
5942 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5943 		return B_BAD_ADDRESS;
5944 
5945 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
5946 	if (newArea < B_OK)
5947 		return newArea;
5948 
5949 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5950 		return B_BAD_ADDRESS;
5951 
5952 	return newArea;
5953 }
5954 
5955 
5956 area_id
5957 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
5958 	uint32 protection, area_id sourceArea)
5959 {
5960 	char name[B_OS_NAME_LENGTH];
5961 	void* address;
5962 
5963 	// filter out some unavailable values (for userland)
5964 	switch (addressSpec) {
5965 		case B_ANY_KERNEL_ADDRESS:
5966 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5967 			return B_BAD_VALUE;
5968 	}
5969 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
5970 		return B_BAD_VALUE;
5971 
5972 	if (!IS_USER_ADDRESS(userName)
5973 		|| !IS_USER_ADDRESS(userAddress)
5974 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5975 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5976 		return B_BAD_ADDRESS;
5977 
5978 	fix_protection(&protection);
5979 
5980 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
5981 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
5982 		false);
5983 	if (clonedArea < B_OK)
5984 		return clonedArea;
5985 
5986 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5987 		delete_area(clonedArea);
5988 		return B_BAD_ADDRESS;
5989 	}
5990 
5991 	return clonedArea;
5992 }
5993 
5994 
5995 area_id
5996 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
5997 	size_t size, uint32 lock, uint32 protection)
5998 {
5999 	char name[B_OS_NAME_LENGTH];
6000 	void* address;
6001 
6002 	// filter out some unavailable values (for userland)
6003 	switch (addressSpec) {
6004 		case B_ANY_KERNEL_ADDRESS:
6005 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6006 			return B_BAD_VALUE;
6007 	}
6008 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6009 		return B_BAD_VALUE;
6010 
6011 	if (!IS_USER_ADDRESS(userName)
6012 		|| !IS_USER_ADDRESS(userAddress)
6013 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6014 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6015 		return B_BAD_ADDRESS;
6016 
6017 	if (addressSpec == B_EXACT_ADDRESS
6018 		&& IS_KERNEL_ADDRESS(address))
6019 		return B_BAD_VALUE;
6020 
6021 	if (addressSpec == B_ANY_ADDRESS)
6022 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6023 	if (addressSpec == B_BASE_ADDRESS)
6024 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6025 
6026 	fix_protection(&protection);
6027 
6028 	virtual_address_restrictions virtualRestrictions = {};
6029 	virtualRestrictions.address = address;
6030 	virtualRestrictions.address_specification = addressSpec;
6031 	physical_address_restrictions physicalRestrictions = {};
6032 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6033 		size, lock, protection, 0, 0, &virtualRestrictions,
6034 		&physicalRestrictions, false, &address);
6035 
6036 	if (area >= B_OK
6037 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6038 		delete_area(area);
6039 		return B_BAD_ADDRESS;
6040 	}
6041 
6042 	return area;
6043 }
6044 
6045 
6046 status_t
6047 _user_delete_area(area_id area)
6048 {
6049 	// Unlike the BeOS implementation, you can now only delete areas
6050 	// that you have created yourself from userland.
6051 	// The documentation to delete_area() explicitly states that this
6052 	// will be restricted in the future, and so it will.
6053 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6054 }
6055 
6056 
6057 // TODO: create a BeOS style call for this!
6058 
6059 area_id
6060 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6061 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6062 	int fd, off_t offset)
6063 {
6064 	char name[B_OS_NAME_LENGTH];
6065 	void* address;
6066 	area_id area;
6067 
6068 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6069 		return B_BAD_VALUE;
6070 
6071 	fix_protection(&protection);
6072 
6073 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6074 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6075 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6076 		return B_BAD_ADDRESS;
6077 
6078 	if (addressSpec == B_EXACT_ADDRESS) {
6079 		if ((addr_t)address + size < (addr_t)address
6080 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6081 			return B_BAD_VALUE;
6082 		}
6083 		if (!IS_USER_ADDRESS(address)
6084 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6085 			return B_BAD_ADDRESS;
6086 		}
6087 	}
6088 
6089 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6090 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6091 		false);
6092 	if (area < B_OK)
6093 		return area;
6094 
6095 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6096 		return B_BAD_ADDRESS;
6097 
6098 	return area;
6099 }
6100 
6101 
6102 status_t
6103 _user_unmap_memory(void* _address, size_t size)
6104 {
6105 	addr_t address = (addr_t)_address;
6106 
6107 	// check params
6108 	if (size == 0 || (addr_t)address + size < (addr_t)address
6109 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6110 		return B_BAD_VALUE;
6111 	}
6112 
6113 	if (!IS_USER_ADDRESS(address)
6114 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6115 		return B_BAD_ADDRESS;
6116 	}
6117 
6118 	// Write lock the address space and ensure the address range is not wired.
6119 	AddressSpaceWriteLocker locker;
6120 	do {
6121 		status_t status = locker.SetTo(team_get_current_team_id());
6122 		if (status != B_OK)
6123 			return status;
6124 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6125 			size, &locker));
6126 
6127 	// unmap
6128 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6129 }
6130 
6131 
6132 status_t
6133 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6134 {
6135 	// check address range
6136 	addr_t address = (addr_t)_address;
6137 	size = PAGE_ALIGN(size);
6138 
6139 	if ((address % B_PAGE_SIZE) != 0)
6140 		return B_BAD_VALUE;
6141 	if (!is_user_address_range(_address, size)) {
6142 		// weird error code required by POSIX
6143 		return ENOMEM;
6144 	}
6145 
6146 	// extend and check protection
6147 	if ((protection & ~B_USER_PROTECTION) != 0)
6148 		return B_BAD_VALUE;
6149 
6150 	fix_protection(&protection);
6151 
6152 	// We need to write lock the address space, since we're going to play with
6153 	// the areas. Also make sure that none of the areas is wired and that we're
6154 	// actually allowed to change the protection.
6155 	AddressSpaceWriteLocker locker;
6156 
6157 	bool restart;
6158 	do {
6159 		restart = false;
6160 
6161 		status_t status = locker.SetTo(team_get_current_team_id());
6162 		if (status != B_OK)
6163 			return status;
6164 
6165 		// First round: Check whether the whole range is covered by areas and we
6166 		// are allowed to modify them.
6167 		addr_t currentAddress = address;
6168 		size_t sizeLeft = size;
6169 		while (sizeLeft > 0) {
6170 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6171 			if (area == NULL)
6172 				return B_NO_MEMORY;
6173 
6174 			if ((area->protection & B_KERNEL_AREA) != 0)
6175 				return B_NOT_ALLOWED;
6176 			if (area->protection_max != 0
6177 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6178 				return B_NOT_ALLOWED;
6179 			}
6180 
6181 			addr_t offset = currentAddress - area->Base();
6182 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6183 
6184 			AreaCacheLocker cacheLocker(area);
6185 
6186 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6187 					&locker, &cacheLocker)) {
6188 				restart = true;
6189 				break;
6190 			}
6191 
6192 			cacheLocker.Unlock();
6193 
6194 			currentAddress += rangeSize;
6195 			sizeLeft -= rangeSize;
6196 		}
6197 	} while (restart);
6198 
6199 	// Second round: If the protections differ from that of the area, create a
6200 	// page protection array and re-map mapped pages.
6201 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6202 	addr_t currentAddress = address;
6203 	size_t sizeLeft = size;
6204 	while (sizeLeft > 0) {
6205 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6206 		if (area == NULL)
6207 			return B_NO_MEMORY;
6208 
6209 		addr_t offset = currentAddress - area->Base();
6210 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6211 
6212 		currentAddress += rangeSize;
6213 		sizeLeft -= rangeSize;
6214 
6215 		if (area->page_protections == NULL) {
6216 			if (area->protection == protection)
6217 				continue;
6218 			if (offset == 0 && rangeSize == area->Size()) {
6219 				// The whole area is covered: let set_area_protection handle it.
6220 				status_t status = vm_set_area_protection(area->address_space->ID(),
6221 					area->id, protection, false);
6222 				if (status != B_OK)
6223 					return status;
6224 				continue;
6225 			}
6226 
6227 			status_t status = allocate_area_page_protections(area);
6228 			if (status != B_OK)
6229 				return status;
6230 		}
6231 
6232 		// We need to lock the complete cache chain, since we potentially unmap
6233 		// pages of lower caches.
6234 		VMCache* topCache = vm_area_get_locked_cache(area);
6235 		VMCacheChainLocker cacheChainLocker(topCache);
6236 		cacheChainLocker.LockAllSourceCaches();
6237 
6238 		// Adjust the committed size, if necessary.
6239 		if (topCache->source != NULL && topCache->temporary) {
6240 			const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6241 			ssize_t commitmentChange = 0;
6242 			for (addr_t pageAddress = area->Base() + offset;
6243 					pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6244 				if (topCache->LookupPage(pageAddress) != NULL) {
6245 					// This page should already be accounted for in the commitment.
6246 					continue;
6247 				}
6248 
6249 				const bool isWritable
6250 					= (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6251 
6252 				if (becomesWritable && !isWritable)
6253 					commitmentChange += B_PAGE_SIZE;
6254 				else if (!becomesWritable && isWritable)
6255 					commitmentChange -= B_PAGE_SIZE;
6256 			}
6257 
6258 			if (commitmentChange != 0) {
6259 				const off_t newCommitment = topCache->committed_size + commitmentChange;
6260 				ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
6261 				status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
6262 				if (status != B_OK)
6263 					return status;
6264 			}
6265 		}
6266 
6267 		for (addr_t pageAddress = area->Base() + offset;
6268 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6269 			map->Lock();
6270 
6271 			set_area_page_protection(area, pageAddress, protection);
6272 
6273 			phys_addr_t physicalAddress;
6274 			uint32 flags;
6275 
6276 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6277 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6278 				map->Unlock();
6279 				continue;
6280 			}
6281 
6282 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6283 			if (page == NULL) {
6284 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6285 					"\n", area, physicalAddress);
6286 				map->Unlock();
6287 				return B_ERROR;
6288 			}
6289 
6290 			// If the page is not in the topmost cache and write access is
6291 			// requested, we have to unmap it. Otherwise we can re-map it with
6292 			// the new protection.
6293 			bool unmapPage = page->Cache() != topCache
6294 				&& (protection & B_WRITE_AREA) != 0;
6295 
6296 			if (!unmapPage)
6297 				map->ProtectPage(area, pageAddress, protection);
6298 
6299 			map->Unlock();
6300 
6301 			if (unmapPage) {
6302 				DEBUG_PAGE_ACCESS_START(page);
6303 				unmap_page(area, pageAddress);
6304 				DEBUG_PAGE_ACCESS_END(page);
6305 			}
6306 		}
6307 	}
6308 
6309 	return B_OK;
6310 }
6311 
6312 
6313 status_t
6314 _user_sync_memory(void* _address, size_t size, uint32 flags)
6315 {
6316 	addr_t address = (addr_t)_address;
6317 	size = PAGE_ALIGN(size);
6318 
6319 	// check params
6320 	if ((address % B_PAGE_SIZE) != 0)
6321 		return B_BAD_VALUE;
6322 	if (!is_user_address_range(_address, size)) {
6323 		// weird error code required by POSIX
6324 		return ENOMEM;
6325 	}
6326 
6327 	bool writeSync = (flags & MS_SYNC) != 0;
6328 	bool writeAsync = (flags & MS_ASYNC) != 0;
6329 	if (writeSync && writeAsync)
6330 		return B_BAD_VALUE;
6331 
6332 	if (size == 0 || (!writeSync && !writeAsync))
6333 		return B_OK;
6334 
6335 	// iterate through the range and sync all concerned areas
6336 	while (size > 0) {
6337 		// read lock the address space
6338 		AddressSpaceReadLocker locker;
6339 		status_t error = locker.SetTo(team_get_current_team_id());
6340 		if (error != B_OK)
6341 			return error;
6342 
6343 		// get the first area
6344 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6345 		if (area == NULL)
6346 			return B_NO_MEMORY;
6347 
6348 		uint32 offset = address - area->Base();
6349 		size_t rangeSize = min_c(area->Size() - offset, size);
6350 		offset += area->cache_offset;
6351 
6352 		// lock the cache
6353 		AreaCacheLocker cacheLocker(area);
6354 		if (!cacheLocker)
6355 			return B_BAD_VALUE;
6356 		VMCache* cache = area->cache;
6357 
6358 		locker.Unlock();
6359 
6360 		uint32 firstPage = offset >> PAGE_SHIFT;
6361 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6362 
6363 		// write the pages
6364 		if (cache->type == CACHE_TYPE_VNODE) {
6365 			if (writeSync) {
6366 				// synchronous
6367 				error = vm_page_write_modified_page_range(cache, firstPage,
6368 					endPage);
6369 				if (error != B_OK)
6370 					return error;
6371 			} else {
6372 				// asynchronous
6373 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6374 				// TODO: This is probably not quite what is supposed to happen.
6375 				// Especially when a lot has to be written, it might take ages
6376 				// until it really hits the disk.
6377 			}
6378 		}
6379 
6380 		address += rangeSize;
6381 		size -= rangeSize;
6382 	}
6383 
6384 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6385 	// synchronize multiple mappings of the same file. In our VM they never get
6386 	// out of sync, though, so we don't have to do anything.
6387 
6388 	return B_OK;
6389 }
6390 
6391 
6392 status_t
6393 _user_memory_advice(void* _address, size_t size, uint32 advice)
6394 {
6395 	addr_t address = (addr_t)_address;
6396 	if ((address % B_PAGE_SIZE) != 0)
6397 		return B_BAD_VALUE;
6398 
6399 	size = PAGE_ALIGN(size);
6400 	if (!is_user_address_range(_address, size)) {
6401 		// weird error code required by POSIX
6402 		return B_NO_MEMORY;
6403 	}
6404 
6405 	switch (advice) {
6406 		case MADV_NORMAL:
6407 		case MADV_SEQUENTIAL:
6408 		case MADV_RANDOM:
6409 		case MADV_WILLNEED:
6410 		case MADV_DONTNEED:
6411 			// TODO: Implement!
6412 			break;
6413 
6414 		case MADV_FREE:
6415 		{
6416 			AddressSpaceWriteLocker locker;
6417 			do {
6418 				status_t status = locker.SetTo(team_get_current_team_id());
6419 				if (status != B_OK)
6420 					return status;
6421 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6422 					address, size, &locker));
6423 
6424 			discard_address_range(locker.AddressSpace(), address, size, false);
6425 			break;
6426 		}
6427 
6428 		default:
6429 			return B_BAD_VALUE;
6430 	}
6431 
6432 	return B_OK;
6433 }
6434 
6435 
6436 status_t
6437 _user_get_memory_properties(team_id teamID, const void* address,
6438 	uint32* _protected, uint32* _lock)
6439 {
6440 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6441 		return B_BAD_ADDRESS;
6442 
6443 	AddressSpaceReadLocker locker;
6444 	status_t error = locker.SetTo(teamID);
6445 	if (error != B_OK)
6446 		return error;
6447 
6448 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6449 	if (area == NULL)
6450 		return B_NO_MEMORY;
6451 
6452 	uint32 protection = get_area_page_protection(area, (addr_t)address);
6453 	uint32 wiring = area->wiring;
6454 
6455 	locker.Unlock();
6456 
6457 	error = user_memcpy(_protected, &protection, sizeof(protection));
6458 	if (error != B_OK)
6459 		return error;
6460 
6461 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6462 
6463 	return error;
6464 }
6465 
6466 
6467 static status_t
6468 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
6469 {
6470 #if ENABLE_SWAP_SUPPORT
6471 	// check address range
6472 	addr_t address = (addr_t)_address;
6473 	size = PAGE_ALIGN(size);
6474 
6475 	if ((address % B_PAGE_SIZE) != 0)
6476 		return EINVAL;
6477 	if (!is_user_address_range(_address, size))
6478 		return EINVAL;
6479 
6480 	const addr_t endAddress = address + size;
6481 
6482 	AddressSpaceReadLocker addressSpaceLocker;
6483 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
6484 	if (error != B_OK)
6485 		return error;
6486 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
6487 
6488 	// iterate through all concerned areas
6489 	addr_t nextAddress = address;
6490 	while (nextAddress != endAddress) {
6491 		// get the next area
6492 		VMArea* area = addressSpace->LookupArea(nextAddress);
6493 		if (area == NULL) {
6494 			error = B_BAD_ADDRESS;
6495 			break;
6496 		}
6497 
6498 		const addr_t areaStart = nextAddress;
6499 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
6500 		nextAddress = areaEnd;
6501 
6502 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6503 		if (error != B_OK) {
6504 			// We don't need to unset or reset things on failure.
6505 			break;
6506 		}
6507 
6508 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6509 		VMAnonymousCache* anonCache = NULL;
6510 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
6511 			// This memory will aready never be swapped. Nothing to do.
6512 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
6513 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
6514 				areaEnd - areaStart, swappable);
6515 		} else {
6516 			// Some other cache type? We cannot affect anything here.
6517 			error = EINVAL;
6518 		}
6519 
6520 		cacheChainLocker.Unlock();
6521 
6522 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6523 		if (error != B_OK)
6524 			break;
6525 	}
6526 
6527 	return error;
6528 #else
6529 	// No swap support? Nothing to do.
6530 	return B_OK;
6531 #endif
6532 }
6533 
6534 
6535 status_t
6536 _user_mlock(const void* _address, size_t size)
6537 {
6538 	return user_set_memory_swappable(_address, size, false);
6539 }
6540 
6541 
6542 status_t
6543 _user_munlock(const void* _address, size_t size)
6544 {
6545 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
6546 	// if multiple clones of an area had mlock() called on them,
6547 	// munlock() must also be called on all of them to actually unlock.
6548 	// (At present, the first munlock() will unlock all.)
6549 	// TODO: fork() should automatically unlock memory in the child.
6550 	return user_set_memory_swappable(_address, size, true);
6551 }
6552 
6553 
6554 // #pragma mark -- compatibility
6555 
6556 
6557 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6558 
6559 
6560 struct physical_entry_beos {
6561 	uint32	address;
6562 	uint32	size;
6563 };
6564 
6565 
6566 /*!	The physical_entry structure has changed. We need to translate it to the
6567 	old one.
6568 */
6569 extern "C" int32
6570 __get_memory_map_beos(const void* _address, size_t numBytes,
6571 	physical_entry_beos* table, int32 numEntries)
6572 {
6573 	if (numEntries <= 0)
6574 		return B_BAD_VALUE;
6575 
6576 	const uint8* address = (const uint8*)_address;
6577 
6578 	int32 count = 0;
6579 	while (numBytes > 0 && count < numEntries) {
6580 		physical_entry entry;
6581 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6582 		if (result < 0) {
6583 			if (result != B_BUFFER_OVERFLOW)
6584 				return result;
6585 		}
6586 
6587 		if (entry.address >= (phys_addr_t)1 << 32) {
6588 			panic("get_memory_map(): Address is greater 4 GB!");
6589 			return B_ERROR;
6590 		}
6591 
6592 		table[count].address = entry.address;
6593 		table[count++].size = entry.size;
6594 
6595 		address += entry.size;
6596 		numBytes -= entry.size;
6597 	}
6598 
6599 	// null-terminate the table, if possible
6600 	if (count < numEntries) {
6601 		table[count].address = 0;
6602 		table[count].size = 0;
6603 	}
6604 
6605 	return B_OK;
6606 }
6607 
6608 
6609 /*!	The type of the \a physicalAddress parameter has changed from void* to
6610 	phys_addr_t.
6611 */
6612 extern "C" area_id
6613 __map_physical_memory_beos(const char* name, void* physicalAddress,
6614 	size_t numBytes, uint32 addressSpec, uint32 protection,
6615 	void** _virtualAddress)
6616 {
6617 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6618 		addressSpec, protection, _virtualAddress);
6619 }
6620 
6621 
6622 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6623 	we meddle with the \a lock parameter to force 32 bit.
6624 */
6625 extern "C" area_id
6626 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6627 	size_t size, uint32 lock, uint32 protection)
6628 {
6629 	switch (lock) {
6630 		case B_NO_LOCK:
6631 			break;
6632 		case B_FULL_LOCK:
6633 		case B_LAZY_LOCK:
6634 			lock = B_32_BIT_FULL_LOCK;
6635 			break;
6636 		case B_CONTIGUOUS:
6637 			lock = B_32_BIT_CONTIGUOUS;
6638 			break;
6639 	}
6640 
6641 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6642 		protection);
6643 }
6644 
6645 
6646 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6647 	"BASE");
6648 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6649 	"map_physical_memory@", "BASE");
6650 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6651 	"BASE");
6652 
6653 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6654 	"get_memory_map@@", "1_ALPHA3");
6655 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6656 	"map_physical_memory@@", "1_ALPHA3");
6657 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6658 	"1_ALPHA3");
6659 
6660 
6661 #else
6662 
6663 
6664 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6665 	"get_memory_map@@", "BASE");
6666 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6667 	"map_physical_memory@@", "BASE");
6668 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6669 	"BASE");
6670 
6671 
6672 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6673