xref: /haiku/src/system/kernel/vm/vm.cpp (revision 6eafb4b041ad79cb936b2041fdb9c56b1209cc10)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 class AreaCacheLocking {
77 public:
78 	inline bool Lock(VMCache* lockable)
79 	{
80 		return false;
81 	}
82 
83 	inline void Unlock(VMCache* lockable)
84 	{
85 		vm_area_put_locked_cache(lockable);
86 	}
87 };
88 
89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
90 public:
91 	inline AreaCacheLocker(VMCache* cache = NULL)
92 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
93 	{
94 	}
95 
96 	inline AreaCacheLocker(VMArea* area)
97 		: AutoLocker<VMCache, AreaCacheLocking>()
98 	{
99 		SetTo(area);
100 	}
101 
102 	inline void SetTo(VMCache* cache, bool alreadyLocked)
103 	{
104 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
105 	}
106 
107 	inline void SetTo(VMArea* area)
108 	{
109 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
110 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
111 	}
112 };
113 
114 
115 class VMCacheChainLocker {
116 public:
117 	VMCacheChainLocker()
118 		:
119 		fTopCache(NULL),
120 		fBottomCache(NULL)
121 	{
122 	}
123 
124 	VMCacheChainLocker(VMCache* topCache)
125 		:
126 		fTopCache(topCache),
127 		fBottomCache(topCache)
128 	{
129 	}
130 
131 	~VMCacheChainLocker()
132 	{
133 		Unlock();
134 	}
135 
136 	void SetTo(VMCache* topCache)
137 	{
138 		fTopCache = topCache;
139 		fBottomCache = topCache;
140 
141 		if (topCache != NULL)
142 			topCache->SetUserData(NULL);
143 	}
144 
145 	VMCache* LockSourceCache()
146 	{
147 		if (fBottomCache == NULL || fBottomCache->source == NULL)
148 			return NULL;
149 
150 		VMCache* previousCache = fBottomCache;
151 
152 		fBottomCache = fBottomCache->source;
153 		fBottomCache->Lock();
154 		fBottomCache->AcquireRefLocked();
155 		fBottomCache->SetUserData(previousCache);
156 
157 		return fBottomCache;
158 	}
159 
160 	void LockAllSourceCaches()
161 	{
162 		while (LockSourceCache() != NULL) {
163 		}
164 	}
165 
166 	void Unlock(VMCache* exceptCache = NULL)
167 	{
168 		if (fTopCache == NULL)
169 			return;
170 
171 		// Unlock caches in source -> consumer direction. This is important to
172 		// avoid double-locking and a reversal of locking order in case a cache
173 		// is eligable for merging.
174 		VMCache* cache = fBottomCache;
175 		while (cache != NULL) {
176 			VMCache* nextCache = (VMCache*)cache->UserData();
177 			if (cache != exceptCache)
178 				cache->ReleaseRefAndUnlock(cache != fTopCache);
179 
180 			if (cache == fTopCache)
181 				break;
182 
183 			cache = nextCache;
184 		}
185 
186 		fTopCache = NULL;
187 		fBottomCache = NULL;
188 	}
189 
190 	void UnlockKeepRefs(bool keepTopCacheLocked)
191 	{
192 		if (fTopCache == NULL)
193 			return;
194 
195 		VMCache* nextCache = fBottomCache;
196 		VMCache* cache = NULL;
197 
198 		while (keepTopCacheLocked
199 				? nextCache != fTopCache : cache != fTopCache) {
200 			cache = nextCache;
201 			nextCache = (VMCache*)cache->UserData();
202 			cache->Unlock(cache != fTopCache);
203 		}
204 	}
205 
206 	void RelockCaches(bool topCacheLocked)
207 	{
208 		if (fTopCache == NULL)
209 			return;
210 
211 		VMCache* nextCache = fTopCache;
212 		VMCache* cache = NULL;
213 		if (topCacheLocked) {
214 			cache = nextCache;
215 			nextCache = cache->source;
216 		}
217 
218 		while (cache != fBottomCache && nextCache != NULL) {
219 			VMCache* consumer = cache;
220 			cache = nextCache;
221 			nextCache = cache->source;
222 			cache->Lock();
223 			cache->SetUserData(consumer);
224 		}
225 	}
226 
227 private:
228 	VMCache*	fTopCache;
229 	VMCache*	fBottomCache;
230 };
231 
232 
233 // The memory reserve an allocation of the certain priority must not touch.
234 static const size_t kMemoryReserveForPriority[] = {
235 	VM_MEMORY_RESERVE_USER,		// user
236 	VM_MEMORY_RESERVE_SYSTEM,	// system
237 	0							// VIP
238 };
239 
240 
241 ObjectCache* gPageMappingsObjectCache;
242 
243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
244 
245 static off_t sAvailableMemory;
246 static off_t sNeededMemory;
247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
248 static uint32 sPageFaults;
249 
250 static VMPhysicalPageMapper* sPhysicalPageMapper;
251 
252 #if DEBUG_CACHE_LIST
253 
254 struct cache_info {
255 	VMCache*	cache;
256 	addr_t		page_count;
257 	addr_t		committed;
258 };
259 
260 static const int kCacheInfoTableCount = 100 * 1024;
261 static cache_info* sCacheInfoTable;
262 
263 #endif	// DEBUG_CACHE_LIST
264 
265 
266 // function declarations
267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
268 	bool addressSpaceCleanup);
269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
270 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
271 static status_t map_backing_store(VMAddressSpace* addressSpace,
272 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
273 	int protection, int mapping, uint32 flags,
274 	const virtual_address_restrictions* addressRestrictions, bool kernel,
275 	VMArea** _area, void** _virtualAddress);
276 static void fix_protection(uint32* protection);
277 
278 
279 //	#pragma mark -
280 
281 
282 #if VM_PAGE_FAULT_TRACING
283 
284 namespace VMPageFaultTracing {
285 
286 class PageFaultStart : public AbstractTraceEntry {
287 public:
288 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
289 		:
290 		fAddress(address),
291 		fPC(pc),
292 		fWrite(write),
293 		fUser(user)
294 	{
295 		Initialized();
296 	}
297 
298 	virtual void AddDump(TraceOutput& out)
299 	{
300 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
301 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
302 	}
303 
304 private:
305 	addr_t	fAddress;
306 	addr_t	fPC;
307 	bool	fWrite;
308 	bool	fUser;
309 };
310 
311 
312 // page fault errors
313 enum {
314 	PAGE_FAULT_ERROR_NO_AREA		= 0,
315 	PAGE_FAULT_ERROR_KERNEL_ONLY,
316 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
317 	PAGE_FAULT_ERROR_READ_PROTECTED,
318 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
319 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
320 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
321 };
322 
323 
324 class PageFaultError : public AbstractTraceEntry {
325 public:
326 	PageFaultError(area_id area, status_t error)
327 		:
328 		fArea(area),
329 		fError(error)
330 	{
331 		Initialized();
332 	}
333 
334 	virtual void AddDump(TraceOutput& out)
335 	{
336 		switch (fError) {
337 			case PAGE_FAULT_ERROR_NO_AREA:
338 				out.Print("page fault error: no area");
339 				break;
340 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
341 				out.Print("page fault error: area: %ld, kernel only", fArea);
342 				break;
343 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
344 				out.Print("page fault error: area: %ld, write protected",
345 					fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_READ_PROTECTED:
348 				out.Print("page fault error: area: %ld, read protected", fArea);
349 				break;
350 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
351 				out.Print("page fault error: area: %ld, execute protected",
352 					fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
355 				out.Print("page fault error: kernel touching bad user memory");
356 				break;
357 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
358 				out.Print("page fault error: no address space");
359 				break;
360 			default:
361 				out.Print("page fault error: area: %ld, error: %s", fArea,
362 					strerror(fError));
363 				break;
364 		}
365 	}
366 
367 private:
368 	area_id		fArea;
369 	status_t	fError;
370 };
371 
372 
373 class PageFaultDone : public AbstractTraceEntry {
374 public:
375 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
376 			vm_page* page)
377 		:
378 		fArea(area),
379 		fTopCache(topCache),
380 		fCache(cache),
381 		fPage(page)
382 	{
383 		Initialized();
384 	}
385 
386 	virtual void AddDump(TraceOutput& out)
387 	{
388 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
389 			"page: %p", fArea, fTopCache, fCache, fPage);
390 	}
391 
392 private:
393 	area_id		fArea;
394 	VMCache*	fTopCache;
395 	VMCache*	fCache;
396 	vm_page*	fPage;
397 };
398 
399 }	// namespace VMPageFaultTracing
400 
401 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
402 #else
403 #	define TPF(x) ;
404 #endif	// VM_PAGE_FAULT_TRACING
405 
406 
407 //	#pragma mark -
408 
409 
410 /*!	The page's cache must be locked.
411 */
412 static inline void
413 increment_page_wired_count(vm_page* page)
414 {
415 	if (!page->IsMapped())
416 		atomic_add(&gMappedPagesCount, 1);
417 	page->IncrementWiredCount();
418 }
419 
420 
421 /*!	The page's cache must be locked.
422 */
423 static inline void
424 decrement_page_wired_count(vm_page* page)
425 {
426 	page->DecrementWiredCount();
427 	if (!page->IsMapped())
428 		atomic_add(&gMappedPagesCount, -1);
429 }
430 
431 
432 static inline addr_t
433 virtual_page_address(VMArea* area, vm_page* page)
434 {
435 	return area->Base()
436 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
437 }
438 
439 
440 //! You need to have the address space locked when calling this function
441 static VMArea*
442 lookup_area(VMAddressSpace* addressSpace, area_id id)
443 {
444 	VMAreaHash::ReadLock();
445 
446 	VMArea* area = VMAreaHash::LookupLocked(id);
447 	if (area != NULL && area->address_space != addressSpace)
448 		area = NULL;
449 
450 	VMAreaHash::ReadUnlock();
451 
452 	return area;
453 }
454 
455 
456 static status_t
457 allocate_area_page_protections(VMArea* area)
458 {
459 	// In the page protections we store only the three user protections,
460 	// so we use 4 bits per page.
461 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
462 	area->page_protections = (uint8*)malloc_etc(bytes,
463 		HEAP_DONT_LOCK_KERNEL_SPACE);
464 	if (area->page_protections == NULL)
465 		return B_NO_MEMORY;
466 
467 	// init the page protections for all pages to that of the area
468 	uint32 areaProtection = area->protection
469 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
470 	memset(area->page_protections, areaProtection | (areaProtection << 4),
471 		bytes);
472 	return B_OK;
473 }
474 
475 
476 static inline void
477 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
478 {
479 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
480 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
481 	uint8& entry = area->page_protections[pageIndex / 2];
482 	if (pageIndex % 2 == 0)
483 		entry = (entry & 0xf0) | protection;
484 	else
485 		entry = (entry & 0x0f) | (protection << 4);
486 }
487 
488 
489 static inline uint32
490 get_area_page_protection(VMArea* area, addr_t pageAddress)
491 {
492 	if (area->page_protections == NULL)
493 		return area->protection;
494 
495 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
496 	uint32 protection = area->page_protections[pageIndex / 2];
497 	if (pageIndex % 2 == 0)
498 		protection &= 0x0f;
499 	else
500 		protection >>= 4;
501 
502 	// If this is a kernel area we translate the user flags to kernel flags.
503 	if (area->address_space == VMAddressSpace::Kernel()) {
504 		uint32 kernelProtection = 0;
505 		if ((protection & B_READ_AREA) != 0)
506 			kernelProtection |= B_KERNEL_READ_AREA;
507 		if ((protection & B_WRITE_AREA) != 0)
508 			kernelProtection |= B_KERNEL_WRITE_AREA;
509 
510 		return kernelProtection;
511 	}
512 
513 	return protection | B_KERNEL_READ_AREA
514 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
515 }
516 
517 
518 /*!	The caller must have reserved enough pages the translation map
519 	implementation might need to map this page.
520 	The page's cache must be locked.
521 */
522 static status_t
523 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
524 	vm_page_reservation* reservation)
525 {
526 	VMTranslationMap* map = area->address_space->TranslationMap();
527 
528 	bool wasMapped = page->IsMapped();
529 
530 	if (area->wiring == B_NO_LOCK) {
531 		DEBUG_PAGE_ACCESS_CHECK(page);
532 
533 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
534 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
535 			gPageMappingsObjectCache,
536 			CACHE_DONT_WAIT_FOR_MEMORY
537 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
538 		if (mapping == NULL)
539 			return B_NO_MEMORY;
540 
541 		mapping->page = page;
542 		mapping->area = area;
543 
544 		map->Lock();
545 
546 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
547 			area->MemoryType(), reservation);
548 
549 		// insert mapping into lists
550 		if (!page->IsMapped())
551 			atomic_add(&gMappedPagesCount, 1);
552 
553 		page->mappings.Add(mapping);
554 		area->mappings.Add(mapping);
555 
556 		map->Unlock();
557 	} else {
558 		DEBUG_PAGE_ACCESS_CHECK(page);
559 
560 		map->Lock();
561 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
562 			area->MemoryType(), reservation);
563 		map->Unlock();
564 
565 		increment_page_wired_count(page);
566 	}
567 
568 	if (!wasMapped) {
569 		// The page is mapped now, so we must not remain in the cached queue.
570 		// It also makes sense to move it from the inactive to the active, since
571 		// otherwise the page daemon wouldn't come to keep track of it (in idle
572 		// mode) -- if the page isn't touched, it will be deactivated after a
573 		// full iteration through the queue at the latest.
574 		if (page->State() == PAGE_STATE_CACHED
575 				|| page->State() == PAGE_STATE_INACTIVE) {
576 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
577 		}
578 	}
579 
580 	return B_OK;
581 }
582 
583 
584 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
585 	page's cache.
586 */
587 static inline bool
588 unmap_page(VMArea* area, addr_t virtualAddress)
589 {
590 	return area->address_space->TranslationMap()->UnmapPage(area,
591 		virtualAddress, true);
592 }
593 
594 
595 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
596 	mapped pages' caches.
597 */
598 static inline void
599 unmap_pages(VMArea* area, addr_t base, size_t size)
600 {
601 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
602 }
603 
604 
605 /*!	Cuts a piece out of an area. If the given cut range covers the complete
606 	area, it is deleted. If it covers the beginning or the end, the area is
607 	resized accordingly. If the range covers some part in the middle of the
608 	area, it is split in two; in this case the second area is returned via
609 	\a _secondArea (the variable is left untouched in the other cases).
610 	The address space must be write locked.
611 	The caller must ensure that no part of the given range is wired.
612 */
613 static status_t
614 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
615 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
616 {
617 	// Does the cut range intersect with the area at all?
618 	addr_t areaLast = area->Base() + (area->Size() - 1);
619 	if (area->Base() > lastAddress || areaLast < address)
620 		return B_OK;
621 
622 	// Is the area fully covered?
623 	if (area->Base() >= address && areaLast <= lastAddress) {
624 		delete_area(addressSpace, area, false);
625 		return B_OK;
626 	}
627 
628 	int priority;
629 	uint32 allocationFlags;
630 	if (addressSpace == VMAddressSpace::Kernel()) {
631 		priority = VM_PRIORITY_SYSTEM;
632 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
633 			| HEAP_DONT_LOCK_KERNEL_SPACE;
634 	} else {
635 		priority = VM_PRIORITY_USER;
636 		allocationFlags = 0;
637 	}
638 
639 	VMCache* cache = vm_area_get_locked_cache(area);
640 	VMCacheChainLocker cacheChainLocker(cache);
641 	cacheChainLocker.LockAllSourceCaches();
642 
643 	// Cut the end only?
644 	if (areaLast <= lastAddress) {
645 		size_t oldSize = area->Size();
646 		size_t newSize = address - area->Base();
647 
648 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
649 			allocationFlags);
650 		if (error != B_OK)
651 			return error;
652 
653 		// unmap pages
654 		unmap_pages(area, address, oldSize - newSize);
655 
656 		// If no one else uses the area's cache, we can resize it, too.
657 		if (cache->areas == area && area->cache_next == NULL
658 			&& cache->consumers.IsEmpty()
659 			&& cache->type == CACHE_TYPE_RAM) {
660 			// Since VMCache::Resize() can temporarily drop the lock, we must
661 			// unlock all lower caches to prevent locking order inversion.
662 			cacheChainLocker.Unlock(cache);
663 			cache->Resize(cache->virtual_base + newSize, priority);
664 			cache->ReleaseRefAndUnlock();
665 		}
666 
667 		return B_OK;
668 	}
669 
670 	// Cut the beginning only?
671 	if (area->Base() >= address) {
672 		addr_t oldBase = area->Base();
673 		addr_t newBase = lastAddress + 1;
674 		size_t newSize = areaLast - lastAddress;
675 
676 		// unmap pages
677 		unmap_pages(area, oldBase, newBase - oldBase);
678 
679 		// resize the area
680 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
681 			allocationFlags);
682 		if (error != B_OK)
683 			return error;
684 
685 		// TODO: If no one else uses the area's cache, we should resize it, too!
686 
687 		area->cache_offset += newBase - oldBase;
688 
689 		return B_OK;
690 	}
691 
692 	// The tough part -- cut a piece out of the middle of the area.
693 	// We do that by shrinking the area to the begin section and creating a
694 	// new area for the end section.
695 
696 	addr_t firstNewSize = address - area->Base();
697 	addr_t secondBase = lastAddress + 1;
698 	addr_t secondSize = areaLast - lastAddress;
699 
700 	// unmap pages
701 	unmap_pages(area, address, area->Size() - firstNewSize);
702 
703 	// resize the area
704 	addr_t oldSize = area->Size();
705 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
706 		allocationFlags);
707 	if (error != B_OK)
708 		return error;
709 
710 	// TODO: If no one else uses the area's cache, we might want to create a
711 	// new cache for the second area, transfer the concerned pages from the
712 	// first cache to it and resize the first cache.
713 
714 	// map the second area
715 	virtual_address_restrictions addressRestrictions = {};
716 	addressRestrictions.address = (void*)secondBase;
717 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
718 	VMArea* secondArea;
719 	error = map_backing_store(addressSpace, cache,
720 		area->cache_offset + (secondBase - area->Base()), area->name,
721 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
722 		&addressRestrictions, kernel, &secondArea, NULL);
723 	if (error != B_OK) {
724 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
725 		return error;
726 	}
727 
728 	// We need a cache reference for the new area.
729 	cache->AcquireRefLocked();
730 
731 	if (_secondArea != NULL)
732 		*_secondArea = secondArea;
733 
734 	return B_OK;
735 }
736 
737 
738 /*!	Deletes all areas in the given address range.
739 	The address space must be write-locked.
740 	The caller must ensure that no part of the given range is wired.
741 */
742 static status_t
743 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
744 	bool kernel)
745 {
746 	size = PAGE_ALIGN(size);
747 	addr_t lastAddress = address + (size - 1);
748 
749 	// Check, whether the caller is allowed to modify the concerned areas.
750 	if (!kernel) {
751 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
752 				VMArea* area = it.Next();) {
753 			addr_t areaLast = area->Base() + (area->Size() - 1);
754 			if (area->Base() < lastAddress && address < areaLast) {
755 				if ((area->protection & B_KERNEL_AREA) != 0)
756 					return B_NOT_ALLOWED;
757 			}
758 		}
759 	}
760 
761 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
762 			VMArea* area = it.Next();) {
763 		addr_t areaLast = area->Base() + (area->Size() - 1);
764 		if (area->Base() < lastAddress && address < areaLast) {
765 			status_t error = cut_area(addressSpace, area, address,
766 				lastAddress, NULL, kernel);
767 			if (error != B_OK)
768 				return error;
769 				// Failing after already messing with areas is ugly, but we
770 				// can't do anything about it.
771 		}
772 	}
773 
774 	return B_OK;
775 }
776 
777 
778 /*! You need to hold the lock of the cache and the write lock of the address
779 	space when calling this function.
780 	Note, that in case of error your cache will be temporarily unlocked.
781 	If \a addressSpec is \c B_EXACT_ADDRESS and the
782 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
783 	that no part of the specified address range (base \c *_virtualAddress, size
784 	\a size) is wired.
785 */
786 static status_t
787 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
788 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
789 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
790 	bool kernel, VMArea** _area, void** _virtualAddress)
791 {
792 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
793 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
794 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
795 		addressRestrictions->address, offset, size,
796 		addressRestrictions->address_specification, wiring, protection,
797 		_area, areaName));
798 	cache->AssertLocked();
799 
800 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
801 		| HEAP_DONT_LOCK_KERNEL_SPACE;
802 	int priority;
803 	if (addressSpace != VMAddressSpace::Kernel()) {
804 		priority = VM_PRIORITY_USER;
805 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
806 		priority = VM_PRIORITY_VIP;
807 		allocationFlags |= HEAP_PRIORITY_VIP;
808 	} else
809 		priority = VM_PRIORITY_SYSTEM;
810 
811 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
812 		allocationFlags);
813 	if (area == NULL)
814 		return B_NO_MEMORY;
815 
816 	status_t status;
817 
818 	// if this is a private map, we need to create a new cache
819 	// to handle the private copies of pages as they are written to
820 	VMCache* sourceCache = cache;
821 	if (mapping == REGION_PRIVATE_MAP) {
822 		VMCache* newCache;
823 
824 		// create an anonymous cache
825 		status = VMCacheFactory::CreateAnonymousCache(newCache,
826 			(protection & B_STACK_AREA) != 0
827 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
828 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
829 		if (status != B_OK)
830 			goto err1;
831 
832 		newCache->Lock();
833 		newCache->temporary = 1;
834 		newCache->virtual_base = offset;
835 		newCache->virtual_end = offset + size;
836 
837 		cache->AddConsumer(newCache);
838 
839 		cache = newCache;
840 	}
841 
842 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
843 		status = cache->SetMinimalCommitment(size, priority);
844 		if (status != B_OK)
845 			goto err2;
846 	}
847 
848 	// check to see if this address space has entered DELETE state
849 	if (addressSpace->IsBeingDeleted()) {
850 		// okay, someone is trying to delete this address space now, so we can't
851 		// insert the area, so back out
852 		status = B_BAD_TEAM_ID;
853 		goto err2;
854 	}
855 
856 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
857 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
858 		status = unmap_address_range(addressSpace,
859 			(addr_t)addressRestrictions->address, size, kernel);
860 		if (status != B_OK)
861 			goto err2;
862 	}
863 
864 	status = addressSpace->InsertArea(area, size, addressRestrictions,
865 		allocationFlags, _virtualAddress);
866 	if (status != B_OK) {
867 		// TODO: wait and try again once this is working in the backend
868 #if 0
869 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
870 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
871 				0, 0);
872 		}
873 #endif
874 		goto err2;
875 	}
876 
877 	// attach the cache to the area
878 	area->cache = cache;
879 	area->cache_offset = offset;
880 
881 	// point the cache back to the area
882 	cache->InsertAreaLocked(area);
883 	if (mapping == REGION_PRIVATE_MAP)
884 		cache->Unlock();
885 
886 	// insert the area in the global area hash table
887 	VMAreaHash::Insert(area);
888 
889 	// grab a ref to the address space (the area holds this)
890 	addressSpace->Get();
891 
892 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
893 //		cache, sourceCache, areaName, area);
894 
895 	*_area = area;
896 	return B_OK;
897 
898 err2:
899 	if (mapping == REGION_PRIVATE_MAP) {
900 		// We created this cache, so we must delete it again. Note, that we
901 		// need to temporarily unlock the source cache or we'll otherwise
902 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
903 		sourceCache->Unlock();
904 		cache->ReleaseRefAndUnlock();
905 		sourceCache->Lock();
906 	}
907 err1:
908 	addressSpace->DeleteArea(area, allocationFlags);
909 	return status;
910 }
911 
912 
913 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
914 	  locker1, locker2).
915 */
916 template<typename LockerType1, typename LockerType2>
917 static inline bool
918 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
919 {
920 	area->cache->AssertLocked();
921 
922 	VMAreaUnwiredWaiter waiter;
923 	if (!area->AddWaiterIfWired(&waiter))
924 		return false;
925 
926 	// unlock everything and wait
927 	if (locker1 != NULL)
928 		locker1->Unlock();
929 	if (locker2 != NULL)
930 		locker2->Unlock();
931 
932 	waiter.waitEntry.Wait();
933 
934 	return true;
935 }
936 
937 
938 /*!	Checks whether the given area has any wired ranges intersecting with the
939 	specified range and waits, if so.
940 
941 	When it has to wait, the function calls \c Unlock() on both \a locker1
942 	and \a locker2, if given.
943 	The area's top cache must be locked and must be unlocked as a side effect
944 	of calling \c Unlock() on either \a locker1 or \a locker2.
945 
946 	If the function does not have to wait it does not modify or unlock any
947 	object.
948 
949 	\param area The area to be checked.
950 	\param base The base address of the range to check.
951 	\param size The size of the address range to check.
952 	\param locker1 An object to be unlocked when before starting to wait (may
953 		be \c NULL).
954 	\param locker2 An object to be unlocked when before starting to wait (may
955 		be \c NULL).
956 	\return \c true, if the function had to wait, \c false otherwise.
957 */
958 template<typename LockerType1, typename LockerType2>
959 static inline bool
960 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
961 	LockerType1* locker1, LockerType2* locker2)
962 {
963 	area->cache->AssertLocked();
964 
965 	VMAreaUnwiredWaiter waiter;
966 	if (!area->AddWaiterIfWired(&waiter, base, size))
967 		return false;
968 
969 	// unlock everything and wait
970 	if (locker1 != NULL)
971 		locker1->Unlock();
972 	if (locker2 != NULL)
973 		locker2->Unlock();
974 
975 	waiter.waitEntry.Wait();
976 
977 	return true;
978 }
979 
980 
981 /*!	Checks whether the given address space has any wired ranges intersecting
982 	with the specified range and waits, if so.
983 
984 	Similar to wait_if_area_range_is_wired(), with the following differences:
985 	- All areas intersecting with the range are checked (respectively all until
986 	  one is found that contains a wired range intersecting with the given
987 	  range).
988 	- The given address space must at least be read-locked and must be unlocked
989 	  when \c Unlock() is called on \a locker.
990 	- None of the areas' caches are allowed to be locked.
991 */
992 template<typename LockerType>
993 static inline bool
994 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
995 	size_t size, LockerType* locker)
996 {
997 	addr_t end = base + size - 1;
998 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
999 			VMArea* area = it.Next();) {
1000 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1001 		if (area->Base() > end)
1002 			return false;
1003 
1004 		if (base >= area->Base() + area->Size() - 1)
1005 			continue;
1006 
1007 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1008 
1009 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1010 			return true;
1011 	}
1012 
1013 	return false;
1014 }
1015 
1016 
1017 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1018 	It must be called in a situation where the kernel address space may be
1019 	locked.
1020 */
1021 status_t
1022 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1023 {
1024 	AddressSpaceReadLocker locker;
1025 	VMArea* area;
1026 	status_t status = locker.SetFromArea(id, area);
1027 	if (status != B_OK)
1028 		return status;
1029 
1030 	if (area->page_protections == NULL) {
1031 		status = allocate_area_page_protections(area);
1032 		if (status != B_OK)
1033 			return status;
1034 	}
1035 
1036 	*cookie = (void*)area;
1037 	return B_OK;
1038 }
1039 
1040 
1041 /*!	This is a debug helper function that can only be used with very specific
1042 	use cases.
1043 	Sets protection for the given address range to the protection specified.
1044 	If \a protection is 0 then the involved pages will be marked non-present
1045 	in the translation map to cause a fault on access. The pages aren't
1046 	actually unmapped however so that they can be marked present again with
1047 	additional calls to this function. For this to work the area must be
1048 	fully locked in memory so that the pages aren't otherwise touched.
1049 	This function does not lock the kernel address space and needs to be
1050 	supplied with a \a cookie retrieved from a successful call to
1051 	vm_prepare_kernel_area_debug_protection().
1052 */
1053 status_t
1054 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1055 	uint32 protection)
1056 {
1057 	// check address range
1058 	addr_t address = (addr_t)_address;
1059 	size = PAGE_ALIGN(size);
1060 
1061 	if ((address % B_PAGE_SIZE) != 0
1062 		|| (addr_t)address + size < (addr_t)address
1063 		|| !IS_KERNEL_ADDRESS(address)
1064 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1065 		return B_BAD_VALUE;
1066 	}
1067 
1068 	// Translate the kernel protection to user protection as we only store that.
1069 	if ((protection & B_KERNEL_READ_AREA) != 0)
1070 		protection |= B_READ_AREA;
1071 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1072 		protection |= B_WRITE_AREA;
1073 
1074 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1075 	VMTranslationMap* map = addressSpace->TranslationMap();
1076 	VMArea* area = (VMArea*)cookie;
1077 
1078 	addr_t offset = address - area->Base();
1079 	if (area->Size() - offset < size) {
1080 		panic("protect range not fully within supplied area");
1081 		return B_BAD_VALUE;
1082 	}
1083 
1084 	if (area->page_protections == NULL) {
1085 		panic("area has no page protections");
1086 		return B_BAD_VALUE;
1087 	}
1088 
1089 	// Invalidate the mapping entries so any access to them will fault or
1090 	// restore the mapping entries unchanged so that lookup will success again.
1091 	map->Lock();
1092 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1093 	map->Unlock();
1094 
1095 	// And set the proper page protections so that the fault case will actually
1096 	// fail and not simply try to map a new page.
1097 	for (addr_t pageAddress = address; pageAddress < address + size;
1098 			pageAddress += B_PAGE_SIZE) {
1099 		set_area_page_protection(area, pageAddress, protection);
1100 	}
1101 
1102 	return B_OK;
1103 }
1104 
1105 
1106 status_t
1107 vm_block_address_range(const char* name, void* address, addr_t size)
1108 {
1109 	if (!arch_vm_supports_protection(0))
1110 		return B_NOT_SUPPORTED;
1111 
1112 	AddressSpaceWriteLocker locker;
1113 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1114 	if (status != B_OK)
1115 		return status;
1116 
1117 	VMAddressSpace* addressSpace = locker.AddressSpace();
1118 
1119 	// create an anonymous cache
1120 	VMCache* cache;
1121 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1122 		VM_PRIORITY_SYSTEM);
1123 	if (status != B_OK)
1124 		return status;
1125 
1126 	cache->temporary = 1;
1127 	cache->virtual_end = size;
1128 	cache->Lock();
1129 
1130 	VMArea* area;
1131 	virtual_address_restrictions addressRestrictions = {};
1132 	addressRestrictions.address = address;
1133 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1134 	status = map_backing_store(addressSpace, cache, 0, name, size,
1135 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1136 		true, &area, NULL);
1137 	if (status != B_OK) {
1138 		cache->ReleaseRefAndUnlock();
1139 		return status;
1140 	}
1141 
1142 	cache->Unlock();
1143 	area->cache_type = CACHE_TYPE_RAM;
1144 	return area->id;
1145 }
1146 
1147 
1148 status_t
1149 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1150 {
1151 	AddressSpaceWriteLocker locker(team);
1152 	if (!locker.IsLocked())
1153 		return B_BAD_TEAM_ID;
1154 
1155 	VMAddressSpace* addressSpace = locker.AddressSpace();
1156 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1157 		addressSpace == VMAddressSpace::Kernel()
1158 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1159 }
1160 
1161 
1162 status_t
1163 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1164 	addr_t size, uint32 flags)
1165 {
1166 	if (size == 0)
1167 		return B_BAD_VALUE;
1168 
1169 	AddressSpaceWriteLocker locker(team);
1170 	if (!locker.IsLocked())
1171 		return B_BAD_TEAM_ID;
1172 
1173 	virtual_address_restrictions addressRestrictions = {};
1174 	addressRestrictions.address = *_address;
1175 	addressRestrictions.address_specification = addressSpec;
1176 	VMAddressSpace* addressSpace = locker.AddressSpace();
1177 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1178 		addressSpace == VMAddressSpace::Kernel()
1179 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1180 		_address);
1181 }
1182 
1183 
1184 area_id
1185 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1186 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1187 	const virtual_address_restrictions* virtualAddressRestrictions,
1188 	const physical_address_restrictions* physicalAddressRestrictions,
1189 	bool kernel, void** _address)
1190 {
1191 	VMArea* area;
1192 	VMCache* cache;
1193 	vm_page* page = NULL;
1194 	bool isStack = (protection & B_STACK_AREA) != 0;
1195 	page_num_t guardPages;
1196 	bool canOvercommit = false;
1197 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1198 		? VM_PAGE_ALLOC_CLEAR : 0;
1199 
1200 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1201 		team, name, size));
1202 
1203 	size = PAGE_ALIGN(size);
1204 	guardSize = PAGE_ALIGN(guardSize);
1205 	guardPages = guardSize / B_PAGE_SIZE;
1206 
1207 	if (size == 0 || size < guardSize)
1208 		return B_BAD_VALUE;
1209 	if (!arch_vm_supports_protection(protection))
1210 		return B_NOT_SUPPORTED;
1211 
1212 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1213 		canOvercommit = true;
1214 
1215 #ifdef DEBUG_KERNEL_STACKS
1216 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1217 		isStack = true;
1218 #endif
1219 
1220 	// check parameters
1221 	switch (virtualAddressRestrictions->address_specification) {
1222 		case B_ANY_ADDRESS:
1223 		case B_EXACT_ADDRESS:
1224 		case B_BASE_ADDRESS:
1225 		case B_ANY_KERNEL_ADDRESS:
1226 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1227 		case B_RANDOMIZED_ANY_ADDRESS:
1228 		case B_RANDOMIZED_BASE_ADDRESS:
1229 			break;
1230 
1231 		default:
1232 			return B_BAD_VALUE;
1233 	}
1234 
1235 	// If low or high physical address restrictions are given, we force
1236 	// B_CONTIGUOUS wiring, since only then we'll use
1237 	// vm_page_allocate_page_run() which deals with those restrictions.
1238 	if (physicalAddressRestrictions->low_address != 0
1239 		|| physicalAddressRestrictions->high_address != 0) {
1240 		wiring = B_CONTIGUOUS;
1241 	}
1242 
1243 	physical_address_restrictions stackPhysicalRestrictions;
1244 	bool doReserveMemory = false;
1245 	switch (wiring) {
1246 		case B_NO_LOCK:
1247 			break;
1248 		case B_FULL_LOCK:
1249 		case B_LAZY_LOCK:
1250 		case B_CONTIGUOUS:
1251 			doReserveMemory = true;
1252 			break;
1253 		case B_ALREADY_WIRED:
1254 			break;
1255 		case B_LOMEM:
1256 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1257 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1258 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1259 			wiring = B_CONTIGUOUS;
1260 			doReserveMemory = true;
1261 			break;
1262 		case B_32_BIT_FULL_LOCK:
1263 			if (B_HAIKU_PHYSICAL_BITS <= 32
1264 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1265 				wiring = B_FULL_LOCK;
1266 				doReserveMemory = true;
1267 				break;
1268 			}
1269 			// TODO: We don't really support this mode efficiently. Just fall
1270 			// through for now ...
1271 		case B_32_BIT_CONTIGUOUS:
1272 			#if B_HAIKU_PHYSICAL_BITS > 32
1273 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1274 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1275 					stackPhysicalRestrictions.high_address
1276 						= (phys_addr_t)1 << 32;
1277 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1278 				}
1279 			#endif
1280 			wiring = B_CONTIGUOUS;
1281 			doReserveMemory = true;
1282 			break;
1283 		default:
1284 			return B_BAD_VALUE;
1285 	}
1286 
1287 	// Optimization: For a single-page contiguous allocation without low/high
1288 	// memory restriction B_FULL_LOCK wiring suffices.
1289 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1290 		&& physicalAddressRestrictions->low_address == 0
1291 		&& physicalAddressRestrictions->high_address == 0) {
1292 		wiring = B_FULL_LOCK;
1293 	}
1294 
1295 	// For full lock or contiguous areas we're also going to map the pages and
1296 	// thus need to reserve pages for the mapping backend upfront.
1297 	addr_t reservedMapPages = 0;
1298 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1299 		AddressSpaceWriteLocker locker;
1300 		status_t status = locker.SetTo(team);
1301 		if (status != B_OK)
1302 			return status;
1303 
1304 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1305 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1306 	}
1307 
1308 	int priority;
1309 	if (team != VMAddressSpace::KernelID())
1310 		priority = VM_PRIORITY_USER;
1311 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1312 		priority = VM_PRIORITY_VIP;
1313 	else
1314 		priority = VM_PRIORITY_SYSTEM;
1315 
1316 	// Reserve memory before acquiring the address space lock. This reduces the
1317 	// chances of failure, since while holding the write lock to the address
1318 	// space (if it is the kernel address space that is), the low memory handler
1319 	// won't be able to free anything for us.
1320 	addr_t reservedMemory = 0;
1321 	if (doReserveMemory) {
1322 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1323 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1324 			return B_NO_MEMORY;
1325 		reservedMemory = size;
1326 		// TODO: We don't reserve the memory for the pages for the page
1327 		// directories/tables. We actually need to do since we currently don't
1328 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1329 		// there are actually less physical pages than there should be, which
1330 		// can get the VM into trouble in low memory situations.
1331 	}
1332 
1333 	AddressSpaceWriteLocker locker;
1334 	VMAddressSpace* addressSpace;
1335 	status_t status;
1336 
1337 	// For full lock areas reserve the pages before locking the address
1338 	// space. E.g. block caches can't release their memory while we hold the
1339 	// address space lock.
1340 	page_num_t reservedPages = reservedMapPages;
1341 	if (wiring == B_FULL_LOCK)
1342 		reservedPages += size / B_PAGE_SIZE;
1343 
1344 	vm_page_reservation reservation;
1345 	if (reservedPages > 0) {
1346 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1347 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1348 					priority)) {
1349 				reservedPages = 0;
1350 				status = B_WOULD_BLOCK;
1351 				goto err0;
1352 			}
1353 		} else
1354 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1355 	}
1356 
1357 	if (wiring == B_CONTIGUOUS) {
1358 		// we try to allocate the page run here upfront as this may easily
1359 		// fail for obvious reasons
1360 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1361 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1362 		if (page == NULL) {
1363 			status = B_NO_MEMORY;
1364 			goto err0;
1365 		}
1366 	}
1367 
1368 	// Lock the address space and, if B_EXACT_ADDRESS and
1369 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1370 	// is not wired.
1371 	do {
1372 		status = locker.SetTo(team);
1373 		if (status != B_OK)
1374 			goto err1;
1375 
1376 		addressSpace = locker.AddressSpace();
1377 	} while (virtualAddressRestrictions->address_specification
1378 			== B_EXACT_ADDRESS
1379 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1380 		&& wait_if_address_range_is_wired(addressSpace,
1381 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1382 
1383 	// create an anonymous cache
1384 	// if it's a stack, make sure that two pages are available at least
1385 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1386 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1387 		wiring == B_NO_LOCK, priority);
1388 	if (status != B_OK)
1389 		goto err1;
1390 
1391 	cache->temporary = 1;
1392 	cache->virtual_end = size;
1393 	cache->committed_size = reservedMemory;
1394 		// TODO: This should be done via a method.
1395 	reservedMemory = 0;
1396 
1397 	cache->Lock();
1398 
1399 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1400 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1401 		kernel, &area, _address);
1402 
1403 	if (status != B_OK) {
1404 		cache->ReleaseRefAndUnlock();
1405 		goto err1;
1406 	}
1407 
1408 	locker.DegradeToReadLock();
1409 
1410 	switch (wiring) {
1411 		case B_NO_LOCK:
1412 		case B_LAZY_LOCK:
1413 			// do nothing - the pages are mapped in as needed
1414 			break;
1415 
1416 		case B_FULL_LOCK:
1417 		{
1418 			// Allocate and map all pages for this area
1419 
1420 			off_t offset = 0;
1421 			for (addr_t address = area->Base();
1422 					address < area->Base() + (area->Size() - 1);
1423 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1424 #ifdef DEBUG_KERNEL_STACKS
1425 #	ifdef STACK_GROWS_DOWNWARDS
1426 				if (isStack && address < area->Base()
1427 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1428 #	else
1429 				if (isStack && address >= area->Base() + area->Size()
1430 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1431 #	endif
1432 					continue;
1433 #endif
1434 				vm_page* page = vm_page_allocate_page(&reservation,
1435 					PAGE_STATE_WIRED | pageAllocFlags);
1436 				cache->InsertPage(page, offset);
1437 				map_page(area, page, address, protection, &reservation);
1438 
1439 				DEBUG_PAGE_ACCESS_END(page);
1440 			}
1441 
1442 			break;
1443 		}
1444 
1445 		case B_ALREADY_WIRED:
1446 		{
1447 			// The pages should already be mapped. This is only really useful
1448 			// during boot time. Find the appropriate vm_page objects and stick
1449 			// them in the cache object.
1450 			VMTranslationMap* map = addressSpace->TranslationMap();
1451 			off_t offset = 0;
1452 
1453 			if (!gKernelStartup)
1454 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1455 
1456 			map->Lock();
1457 
1458 			for (addr_t virtualAddress = area->Base();
1459 					virtualAddress < area->Base() + (area->Size() - 1);
1460 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1461 				phys_addr_t physicalAddress;
1462 				uint32 flags;
1463 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1464 				if (status < B_OK) {
1465 					panic("looking up mapping failed for va 0x%lx\n",
1466 						virtualAddress);
1467 				}
1468 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1469 				if (page == NULL) {
1470 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1471 						"\n", physicalAddress);
1472 				}
1473 
1474 				DEBUG_PAGE_ACCESS_START(page);
1475 
1476 				cache->InsertPage(page, offset);
1477 				increment_page_wired_count(page);
1478 				vm_page_set_state(page, PAGE_STATE_WIRED);
1479 				page->busy = false;
1480 
1481 				DEBUG_PAGE_ACCESS_END(page);
1482 			}
1483 
1484 			map->Unlock();
1485 			break;
1486 		}
1487 
1488 		case B_CONTIGUOUS:
1489 		{
1490 			// We have already allocated our continuous pages run, so we can now
1491 			// just map them in the address space
1492 			VMTranslationMap* map = addressSpace->TranslationMap();
1493 			phys_addr_t physicalAddress
1494 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1495 			addr_t virtualAddress = area->Base();
1496 			off_t offset = 0;
1497 
1498 			map->Lock();
1499 
1500 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1501 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1502 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1503 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1504 				if (page == NULL)
1505 					panic("couldn't lookup physical page just allocated\n");
1506 
1507 				status = map->Map(virtualAddress, physicalAddress, protection,
1508 					area->MemoryType(), &reservation);
1509 				if (status < B_OK)
1510 					panic("couldn't map physical page in page run\n");
1511 
1512 				cache->InsertPage(page, offset);
1513 				increment_page_wired_count(page);
1514 
1515 				DEBUG_PAGE_ACCESS_END(page);
1516 			}
1517 
1518 			map->Unlock();
1519 			break;
1520 		}
1521 
1522 		default:
1523 			break;
1524 	}
1525 
1526 	cache->Unlock();
1527 
1528 	if (reservedPages > 0)
1529 		vm_page_unreserve_pages(&reservation);
1530 
1531 	TRACE(("vm_create_anonymous_area: done\n"));
1532 
1533 	area->cache_type = CACHE_TYPE_RAM;
1534 	return area->id;
1535 
1536 err1:
1537 	if (wiring == B_CONTIGUOUS) {
1538 		// we had reserved the area space upfront...
1539 		phys_addr_t pageNumber = page->physical_page_number;
1540 		int32 i;
1541 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1542 			page = vm_lookup_page(pageNumber);
1543 			if (page == NULL)
1544 				panic("couldn't lookup physical page just allocated\n");
1545 
1546 			vm_page_set_state(page, PAGE_STATE_FREE);
1547 		}
1548 	}
1549 
1550 err0:
1551 	if (reservedPages > 0)
1552 		vm_page_unreserve_pages(&reservation);
1553 	if (reservedMemory > 0)
1554 		vm_unreserve_memory(reservedMemory);
1555 
1556 	return status;
1557 }
1558 
1559 
1560 area_id
1561 vm_map_physical_memory(team_id team, const char* name, void** _address,
1562 	uint32 addressSpec, addr_t size, uint32 protection,
1563 	phys_addr_t physicalAddress, bool alreadyWired)
1564 {
1565 	VMArea* area;
1566 	VMCache* cache;
1567 	addr_t mapOffset;
1568 
1569 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1570 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1571 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1572 		addressSpec, size, protection, physicalAddress));
1573 
1574 	if (!arch_vm_supports_protection(protection))
1575 		return B_NOT_SUPPORTED;
1576 
1577 	AddressSpaceWriteLocker locker(team);
1578 	if (!locker.IsLocked())
1579 		return B_BAD_TEAM_ID;
1580 
1581 	// if the physical address is somewhat inside a page,
1582 	// move the actual area down to align on a page boundary
1583 	mapOffset = physicalAddress % B_PAGE_SIZE;
1584 	size += mapOffset;
1585 	physicalAddress -= mapOffset;
1586 
1587 	size = PAGE_ALIGN(size);
1588 
1589 	// create a device cache
1590 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1591 	if (status != B_OK)
1592 		return status;
1593 
1594 	cache->virtual_end = size;
1595 
1596 	cache->Lock();
1597 
1598 	virtual_address_restrictions addressRestrictions = {};
1599 	addressRestrictions.address = *_address;
1600 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1601 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1602 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1603 		true, &area, _address);
1604 
1605 	if (status < B_OK)
1606 		cache->ReleaseRefLocked();
1607 
1608 	cache->Unlock();
1609 
1610 	if (status == B_OK) {
1611 		// set requested memory type -- use uncached, if not given
1612 		uint32 memoryType = addressSpec & B_MTR_MASK;
1613 		if (memoryType == 0)
1614 			memoryType = B_MTR_UC;
1615 
1616 		area->SetMemoryType(memoryType);
1617 
1618 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1619 		if (status != B_OK)
1620 			delete_area(locker.AddressSpace(), area, false);
1621 	}
1622 
1623 	if (status != B_OK)
1624 		return status;
1625 
1626 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1627 
1628 	if (alreadyWired) {
1629 		// The area is already mapped, but possibly not with the right
1630 		// memory type.
1631 		map->Lock();
1632 		map->ProtectArea(area, area->protection);
1633 		map->Unlock();
1634 	} else {
1635 		// Map the area completely.
1636 
1637 		// reserve pages needed for the mapping
1638 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1639 			area->Base() + (size - 1));
1640 		vm_page_reservation reservation;
1641 		vm_page_reserve_pages(&reservation, reservePages,
1642 			team == VMAddressSpace::KernelID()
1643 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1644 
1645 		map->Lock();
1646 
1647 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1648 			map->Map(area->Base() + offset, physicalAddress + offset,
1649 				protection, area->MemoryType(), &reservation);
1650 		}
1651 
1652 		map->Unlock();
1653 
1654 		vm_page_unreserve_pages(&reservation);
1655 	}
1656 
1657 	// modify the pointer returned to be offset back into the new area
1658 	// the same way the physical address in was offset
1659 	*_address = (void*)((addr_t)*_address + mapOffset);
1660 
1661 	area->cache_type = CACHE_TYPE_DEVICE;
1662 	return area->id;
1663 }
1664 
1665 
1666 /*!	Don't use!
1667 	TODO: This function was introduced to map physical page vecs to
1668 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1669 	use a device cache and does not track vm_page::wired_count!
1670 */
1671 area_id
1672 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1673 	uint32 addressSpec, addr_t* _size, uint32 protection,
1674 	struct generic_io_vec* vecs, uint32 vecCount)
1675 {
1676 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1677 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1678 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1679 		addressSpec, _size, protection, vecs, vecCount));
1680 
1681 	if (!arch_vm_supports_protection(protection)
1682 		|| (addressSpec & B_MTR_MASK) != 0) {
1683 		return B_NOT_SUPPORTED;
1684 	}
1685 
1686 	AddressSpaceWriteLocker locker(team);
1687 	if (!locker.IsLocked())
1688 		return B_BAD_TEAM_ID;
1689 
1690 	if (vecCount == 0)
1691 		return B_BAD_VALUE;
1692 
1693 	addr_t size = 0;
1694 	for (uint32 i = 0; i < vecCount; i++) {
1695 		if (vecs[i].base % B_PAGE_SIZE != 0
1696 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1697 			return B_BAD_VALUE;
1698 		}
1699 
1700 		size += vecs[i].length;
1701 	}
1702 
1703 	// create a device cache
1704 	VMCache* cache;
1705 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1706 	if (result != B_OK)
1707 		return result;
1708 
1709 	cache->virtual_end = size;
1710 
1711 	cache->Lock();
1712 
1713 	VMArea* area;
1714 	virtual_address_restrictions addressRestrictions = {};
1715 	addressRestrictions.address = *_address;
1716 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1717 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1718 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1719 		&addressRestrictions, true, &area, _address);
1720 
1721 	if (result != B_OK)
1722 		cache->ReleaseRefLocked();
1723 
1724 	cache->Unlock();
1725 
1726 	if (result != B_OK)
1727 		return result;
1728 
1729 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1730 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1731 		area->Base() + (size - 1));
1732 
1733 	vm_page_reservation reservation;
1734 	vm_page_reserve_pages(&reservation, reservePages,
1735 			team == VMAddressSpace::KernelID()
1736 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1737 	map->Lock();
1738 
1739 	uint32 vecIndex = 0;
1740 	size_t vecOffset = 0;
1741 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1742 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1743 			vecOffset = 0;
1744 			vecIndex++;
1745 		}
1746 
1747 		if (vecIndex >= vecCount)
1748 			break;
1749 
1750 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1751 			protection, area->MemoryType(), &reservation);
1752 
1753 		vecOffset += B_PAGE_SIZE;
1754 	}
1755 
1756 	map->Unlock();
1757 	vm_page_unreserve_pages(&reservation);
1758 
1759 	if (_size != NULL)
1760 		*_size = size;
1761 
1762 	area->cache_type = CACHE_TYPE_DEVICE;
1763 	return area->id;
1764 }
1765 
1766 
1767 area_id
1768 vm_create_null_area(team_id team, const char* name, void** address,
1769 	uint32 addressSpec, addr_t size, uint32 flags)
1770 {
1771 	size = PAGE_ALIGN(size);
1772 
1773 	// Lock the address space and, if B_EXACT_ADDRESS and
1774 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1775 	// is not wired.
1776 	AddressSpaceWriteLocker locker;
1777 	do {
1778 		if (locker.SetTo(team) != B_OK)
1779 			return B_BAD_TEAM_ID;
1780 	} while (addressSpec == B_EXACT_ADDRESS
1781 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1782 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1783 			(addr_t)*address, size, &locker));
1784 
1785 	// create a null cache
1786 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1787 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1788 	VMCache* cache;
1789 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1790 	if (status != B_OK)
1791 		return status;
1792 
1793 	cache->temporary = 1;
1794 	cache->virtual_end = size;
1795 
1796 	cache->Lock();
1797 
1798 	VMArea* area;
1799 	virtual_address_restrictions addressRestrictions = {};
1800 	addressRestrictions.address = *address;
1801 	addressRestrictions.address_specification = addressSpec;
1802 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1803 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1804 		&addressRestrictions, true, &area, address);
1805 
1806 	if (status < B_OK) {
1807 		cache->ReleaseRefAndUnlock();
1808 		return status;
1809 	}
1810 
1811 	cache->Unlock();
1812 
1813 	area->cache_type = CACHE_TYPE_NULL;
1814 	return area->id;
1815 }
1816 
1817 
1818 /*!	Creates the vnode cache for the specified \a vnode.
1819 	The vnode has to be marked busy when calling this function.
1820 */
1821 status_t
1822 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1823 {
1824 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1825 }
1826 
1827 
1828 /*!	\a cache must be locked. The area's address space must be read-locked.
1829 */
1830 static void
1831 pre_map_area_pages(VMArea* area, VMCache* cache,
1832 	vm_page_reservation* reservation)
1833 {
1834 	addr_t baseAddress = area->Base();
1835 	addr_t cacheOffset = area->cache_offset;
1836 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1837 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1838 
1839 	for (VMCachePagesTree::Iterator it
1840 				= cache->pages.GetIterator(firstPage, true, true);
1841 			vm_page* page = it.Next();) {
1842 		if (page->cache_offset >= endPage)
1843 			break;
1844 
1845 		// skip busy and inactive pages
1846 		if (page->busy || page->usage_count == 0)
1847 			continue;
1848 
1849 		DEBUG_PAGE_ACCESS_START(page);
1850 		map_page(area, page,
1851 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1852 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1853 		DEBUG_PAGE_ACCESS_END(page);
1854 	}
1855 }
1856 
1857 
1858 /*!	Will map the file specified by \a fd to an area in memory.
1859 	The file will be mirrored beginning at the specified \a offset. The
1860 	\a offset and \a size arguments have to be page aligned.
1861 */
1862 static area_id
1863 _vm_map_file(team_id team, const char* name, void** _address,
1864 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1865 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1866 {
1867 	// TODO: for binary files, we want to make sure that they get the
1868 	//	copy of a file at a given time, ie. later changes should not
1869 	//	make it into the mapped copy -- this will need quite some changes
1870 	//	to be done in a nice way
1871 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1872 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1873 
1874 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1875 	size = PAGE_ALIGN(size);
1876 
1877 	if (mapping == REGION_NO_PRIVATE_MAP)
1878 		protection |= B_SHARED_AREA;
1879 	if (addressSpec != B_EXACT_ADDRESS)
1880 		unmapAddressRange = false;
1881 
1882 	if (fd < 0) {
1883 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1884 		virtual_address_restrictions virtualRestrictions = {};
1885 		virtualRestrictions.address = *_address;
1886 		virtualRestrictions.address_specification = addressSpec;
1887 		physical_address_restrictions physicalRestrictions = {};
1888 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1889 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1890 			_address);
1891 	}
1892 
1893 	// get the open flags of the FD
1894 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1895 	if (descriptor == NULL)
1896 		return EBADF;
1897 	int32 openMode = descriptor->open_mode;
1898 	put_fd(descriptor);
1899 
1900 	// The FD must open for reading at any rate. For shared mapping with write
1901 	// access, additionally the FD must be open for writing.
1902 	if ((openMode & O_ACCMODE) == O_WRONLY
1903 		|| (mapping == REGION_NO_PRIVATE_MAP
1904 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1905 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1906 		return EACCES;
1907 	}
1908 
1909 	// get the vnode for the object, this also grabs a ref to it
1910 	struct vnode* vnode = NULL;
1911 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1912 	if (status < B_OK)
1913 		return status;
1914 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1915 
1916 	// If we're going to pre-map pages, we need to reserve the pages needed by
1917 	// the mapping backend upfront.
1918 	page_num_t reservedPreMapPages = 0;
1919 	vm_page_reservation reservation;
1920 	if ((protection & B_READ_AREA) != 0) {
1921 		AddressSpaceWriteLocker locker;
1922 		status = locker.SetTo(team);
1923 		if (status != B_OK)
1924 			return status;
1925 
1926 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1927 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1928 
1929 		locker.Unlock();
1930 
1931 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1932 			team == VMAddressSpace::KernelID()
1933 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1934 	}
1935 
1936 	struct PageUnreserver {
1937 		PageUnreserver(vm_page_reservation* reservation)
1938 			:
1939 			fReservation(reservation)
1940 		{
1941 		}
1942 
1943 		~PageUnreserver()
1944 		{
1945 			if (fReservation != NULL)
1946 				vm_page_unreserve_pages(fReservation);
1947 		}
1948 
1949 		vm_page_reservation* fReservation;
1950 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1951 
1952 	// Lock the address space and, if the specified address range shall be
1953 	// unmapped, ensure it is not wired.
1954 	AddressSpaceWriteLocker locker;
1955 	do {
1956 		if (locker.SetTo(team) != B_OK)
1957 			return B_BAD_TEAM_ID;
1958 	} while (unmapAddressRange
1959 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1960 			(addr_t)*_address, size, &locker));
1961 
1962 	// TODO: this only works for file systems that use the file cache
1963 	VMCache* cache;
1964 	status = vfs_get_vnode_cache(vnode, &cache, false);
1965 	if (status < B_OK)
1966 		return status;
1967 
1968 	cache->Lock();
1969 
1970 	VMArea* area;
1971 	virtual_address_restrictions addressRestrictions = {};
1972 	addressRestrictions.address = *_address;
1973 	addressRestrictions.address_specification = addressSpec;
1974 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1975 		0, protection, mapping,
1976 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1977 		&addressRestrictions, kernel, &area, _address);
1978 
1979 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1980 		// map_backing_store() cannot know we no longer need the ref
1981 		cache->ReleaseRefLocked();
1982 	}
1983 
1984 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1985 		pre_map_area_pages(area, cache, &reservation);
1986 
1987 	cache->Unlock();
1988 
1989 	if (status == B_OK) {
1990 		// TODO: this probably deserves a smarter solution, ie. don't always
1991 		// prefetch stuff, and also, probably don't trigger it at this place.
1992 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1993 			// prefetches at max 10 MB starting from "offset"
1994 	}
1995 
1996 	if (status != B_OK)
1997 		return status;
1998 
1999 	area->cache_type = CACHE_TYPE_VNODE;
2000 	return area->id;
2001 }
2002 
2003 
2004 area_id
2005 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2006 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2007 	int fd, off_t offset)
2008 {
2009 	if (!arch_vm_supports_protection(protection))
2010 		return B_NOT_SUPPORTED;
2011 
2012 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2013 		mapping, unmapAddressRange, fd, offset, true);
2014 }
2015 
2016 
2017 VMCache*
2018 vm_area_get_locked_cache(VMArea* area)
2019 {
2020 	rw_lock_read_lock(&sAreaCacheLock);
2021 
2022 	while (true) {
2023 		VMCache* cache = area->cache;
2024 
2025 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2026 			// cache has been deleted
2027 			rw_lock_read_lock(&sAreaCacheLock);
2028 			continue;
2029 		}
2030 
2031 		rw_lock_read_lock(&sAreaCacheLock);
2032 
2033 		if (cache == area->cache) {
2034 			cache->AcquireRefLocked();
2035 			rw_lock_read_unlock(&sAreaCacheLock);
2036 			return cache;
2037 		}
2038 
2039 		// the cache changed in the meantime
2040 		cache->Unlock();
2041 	}
2042 }
2043 
2044 
2045 void
2046 vm_area_put_locked_cache(VMCache* cache)
2047 {
2048 	cache->ReleaseRefAndUnlock();
2049 }
2050 
2051 
2052 area_id
2053 vm_clone_area(team_id team, const char* name, void** address,
2054 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2055 	bool kernel)
2056 {
2057 	VMArea* newArea = NULL;
2058 	VMArea* sourceArea;
2059 
2060 	// Check whether the source area exists and is cloneable. If so, mark it
2061 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2062 	{
2063 		AddressSpaceWriteLocker locker;
2064 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2065 		if (status != B_OK)
2066 			return status;
2067 
2068 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2069 			return B_NOT_ALLOWED;
2070 
2071 		sourceArea->protection |= B_SHARED_AREA;
2072 		protection |= B_SHARED_AREA;
2073 	}
2074 
2075 	// Now lock both address spaces and actually do the cloning.
2076 
2077 	MultiAddressSpaceLocker locker;
2078 	VMAddressSpace* sourceAddressSpace;
2079 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2080 	if (status != B_OK)
2081 		return status;
2082 
2083 	VMAddressSpace* targetAddressSpace;
2084 	status = locker.AddTeam(team, true, &targetAddressSpace);
2085 	if (status != B_OK)
2086 		return status;
2087 
2088 	status = locker.Lock();
2089 	if (status != B_OK)
2090 		return status;
2091 
2092 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2093 	if (sourceArea == NULL)
2094 		return B_BAD_VALUE;
2095 
2096 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2097 		return B_NOT_ALLOWED;
2098 
2099 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2100 
2101 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2102 	//	have been adapted. Maybe it should be part of the kernel settings,
2103 	//	anyway (so that old drivers can always work).
2104 #if 0
2105 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2106 		&& addressSpace != VMAddressSpace::Kernel()
2107 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2108 		// kernel areas must not be cloned in userland, unless explicitly
2109 		// declared user-cloneable upon construction
2110 		status = B_NOT_ALLOWED;
2111 	} else
2112 #endif
2113 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2114 		status = B_NOT_ALLOWED;
2115 	else {
2116 		virtual_address_restrictions addressRestrictions = {};
2117 		addressRestrictions.address = *address;
2118 		addressRestrictions.address_specification = addressSpec;
2119 		status = map_backing_store(targetAddressSpace, cache,
2120 			sourceArea->cache_offset, name, sourceArea->Size(),
2121 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2122 			kernel, &newArea, address);
2123 	}
2124 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2125 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2126 		// to create a new cache, and has therefore already acquired a reference
2127 		// to the source cache - but otherwise it has no idea that we need
2128 		// one.
2129 		cache->AcquireRefLocked();
2130 	}
2131 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2132 		// we need to map in everything at this point
2133 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2134 			// we don't have actual pages to map but a physical area
2135 			VMTranslationMap* map
2136 				= sourceArea->address_space->TranslationMap();
2137 			map->Lock();
2138 
2139 			phys_addr_t physicalAddress;
2140 			uint32 oldProtection;
2141 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2142 
2143 			map->Unlock();
2144 
2145 			map = targetAddressSpace->TranslationMap();
2146 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2147 				newArea->Base() + (newArea->Size() - 1));
2148 
2149 			vm_page_reservation reservation;
2150 			vm_page_reserve_pages(&reservation, reservePages,
2151 				targetAddressSpace == VMAddressSpace::Kernel()
2152 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2153 			map->Lock();
2154 
2155 			for (addr_t offset = 0; offset < newArea->Size();
2156 					offset += B_PAGE_SIZE) {
2157 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2158 					protection, newArea->MemoryType(), &reservation);
2159 			}
2160 
2161 			map->Unlock();
2162 			vm_page_unreserve_pages(&reservation);
2163 		} else {
2164 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2165 			size_t reservePages = map->MaxPagesNeededToMap(
2166 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2167 			vm_page_reservation reservation;
2168 			vm_page_reserve_pages(&reservation, reservePages,
2169 				targetAddressSpace == VMAddressSpace::Kernel()
2170 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2171 
2172 			// map in all pages from source
2173 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2174 					vm_page* page  = it.Next();) {
2175 				if (!page->busy) {
2176 					DEBUG_PAGE_ACCESS_START(page);
2177 					map_page(newArea, page,
2178 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2179 							- newArea->cache_offset),
2180 						protection, &reservation);
2181 					DEBUG_PAGE_ACCESS_END(page);
2182 				}
2183 			}
2184 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2185 			// ensuring that!
2186 
2187 			vm_page_unreserve_pages(&reservation);
2188 		}
2189 	}
2190 	if (status == B_OK)
2191 		newArea->cache_type = sourceArea->cache_type;
2192 
2193 	vm_area_put_locked_cache(cache);
2194 
2195 	if (status < B_OK)
2196 		return status;
2197 
2198 	return newArea->id;
2199 }
2200 
2201 
2202 /*!	Deletes the specified area of the given address space.
2203 
2204 	The address space must be write-locked.
2205 	The caller must ensure that the area does not have any wired ranges.
2206 
2207 	\param addressSpace The address space containing the area.
2208 	\param area The area to be deleted.
2209 	\param deletingAddressSpace \c true, if the address space is in the process
2210 		of being deleted.
2211 */
2212 static void
2213 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2214 	bool deletingAddressSpace)
2215 {
2216 	ASSERT(!area->IsWired());
2217 
2218 	VMAreaHash::Remove(area);
2219 
2220 	// At this point the area is removed from the global hash table, but
2221 	// still exists in the area list.
2222 
2223 	// Unmap the virtual address space the area occupied.
2224 	{
2225 		// We need to lock the complete cache chain.
2226 		VMCache* topCache = vm_area_get_locked_cache(area);
2227 		VMCacheChainLocker cacheChainLocker(topCache);
2228 		cacheChainLocker.LockAllSourceCaches();
2229 
2230 		// If the area's top cache is a temporary cache and the area is the only
2231 		// one referencing it (besides us currently holding a second reference),
2232 		// the unmapping code doesn't need to care about preserving the accessed
2233 		// and dirty flags of the top cache page mappings.
2234 		bool ignoreTopCachePageFlags
2235 			= topCache->temporary && topCache->RefCount() == 2;
2236 
2237 		area->address_space->TranslationMap()->UnmapArea(area,
2238 			deletingAddressSpace, ignoreTopCachePageFlags);
2239 	}
2240 
2241 	if (!area->cache->temporary)
2242 		area->cache->WriteModified();
2243 
2244 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2245 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2246 
2247 	arch_vm_unset_memory_type(area);
2248 	addressSpace->RemoveArea(area, allocationFlags);
2249 	addressSpace->Put();
2250 
2251 	area->cache->RemoveArea(area);
2252 	area->cache->ReleaseRef();
2253 
2254 	addressSpace->DeleteArea(area, allocationFlags);
2255 }
2256 
2257 
2258 status_t
2259 vm_delete_area(team_id team, area_id id, bool kernel)
2260 {
2261 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2262 		team, id));
2263 
2264 	// lock the address space and make sure the area isn't wired
2265 	AddressSpaceWriteLocker locker;
2266 	VMArea* area;
2267 	AreaCacheLocker cacheLocker;
2268 
2269 	do {
2270 		status_t status = locker.SetFromArea(team, id, area);
2271 		if (status != B_OK)
2272 			return status;
2273 
2274 		cacheLocker.SetTo(area);
2275 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2276 
2277 	cacheLocker.Unlock();
2278 
2279 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2280 		return B_NOT_ALLOWED;
2281 
2282 	delete_area(locker.AddressSpace(), area, false);
2283 	return B_OK;
2284 }
2285 
2286 
2287 /*!	Creates a new cache on top of given cache, moves all areas from
2288 	the old cache to the new one, and changes the protection of all affected
2289 	areas' pages to read-only. If requested, wired pages are moved up to the
2290 	new cache and copies are added to the old cache in their place.
2291 	Preconditions:
2292 	- The given cache must be locked.
2293 	- All of the cache's areas' address spaces must be read locked.
2294 	- Either the cache must not have any wired ranges or a page reservation for
2295 	  all wired pages must be provided, so they can be copied.
2296 
2297 	\param lowerCache The cache on top of which a new cache shall be created.
2298 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2299 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2300 		has wired page. The wired pages are copied in this case.
2301 */
2302 static status_t
2303 vm_copy_on_write_area(VMCache* lowerCache,
2304 	vm_page_reservation* wiredPagesReservation)
2305 {
2306 	VMCache* upperCache;
2307 
2308 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2309 
2310 	// We need to separate the cache from its areas. The cache goes one level
2311 	// deeper and we create a new cache inbetween.
2312 
2313 	// create an anonymous cache
2314 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2315 		lowerCache->GuardSize() / B_PAGE_SIZE,
2316 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2317 		VM_PRIORITY_USER);
2318 	if (status != B_OK)
2319 		return status;
2320 
2321 	upperCache->Lock();
2322 
2323 	upperCache->temporary = 1;
2324 	upperCache->virtual_base = lowerCache->virtual_base;
2325 	upperCache->virtual_end = lowerCache->virtual_end;
2326 
2327 	// transfer the lower cache areas to the upper cache
2328 	rw_lock_write_lock(&sAreaCacheLock);
2329 	upperCache->TransferAreas(lowerCache);
2330 	rw_lock_write_unlock(&sAreaCacheLock);
2331 
2332 	lowerCache->AddConsumer(upperCache);
2333 
2334 	// We now need to remap all pages from all of the cache's areas read-only,
2335 	// so that a copy will be created on next write access. If there are wired
2336 	// pages, we keep their protection, move them to the upper cache and create
2337 	// copies for the lower cache.
2338 	if (wiredPagesReservation != NULL) {
2339 		// We need to handle wired pages -- iterate through the cache's pages.
2340 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2341 				vm_page* page = it.Next();) {
2342 			if (page->WiredCount() > 0) {
2343 				// allocate a new page and copy the wired one
2344 				vm_page* copiedPage = vm_page_allocate_page(
2345 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2346 
2347 				vm_memcpy_physical_page(
2348 					copiedPage->physical_page_number * B_PAGE_SIZE,
2349 					page->physical_page_number * B_PAGE_SIZE);
2350 
2351 				// move the wired page to the upper cache (note: removing is OK
2352 				// with the SplayTree iterator) and insert the copy
2353 				upperCache->MovePage(page);
2354 				lowerCache->InsertPage(copiedPage,
2355 					page->cache_offset * B_PAGE_SIZE);
2356 
2357 				DEBUG_PAGE_ACCESS_END(copiedPage);
2358 			} else {
2359 				// Change the protection of this page in all areas.
2360 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2361 						tempArea = tempArea->cache_next) {
2362 					// The area must be readable in the same way it was
2363 					// previously writable.
2364 					uint32 protection = B_KERNEL_READ_AREA;
2365 					if ((tempArea->protection & B_READ_AREA) != 0)
2366 						protection |= B_READ_AREA;
2367 
2368 					VMTranslationMap* map
2369 						= tempArea->address_space->TranslationMap();
2370 					map->Lock();
2371 					map->ProtectPage(tempArea,
2372 						virtual_page_address(tempArea, page), protection);
2373 					map->Unlock();
2374 				}
2375 			}
2376 		}
2377 	} else {
2378 		ASSERT(lowerCache->WiredPagesCount() == 0);
2379 
2380 		// just change the protection of all areas
2381 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2382 				tempArea = tempArea->cache_next) {
2383 			// The area must be readable in the same way it was previously
2384 			// writable.
2385 			uint32 protection = B_KERNEL_READ_AREA;
2386 			if ((tempArea->protection & B_READ_AREA) != 0)
2387 				protection |= B_READ_AREA;
2388 
2389 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2390 			map->Lock();
2391 			map->ProtectArea(tempArea, protection);
2392 			map->Unlock();
2393 		}
2394 	}
2395 
2396 	vm_area_put_locked_cache(upperCache);
2397 
2398 	return B_OK;
2399 }
2400 
2401 
2402 area_id
2403 vm_copy_area(team_id team, const char* name, void** _address,
2404 	uint32 addressSpec, uint32 protection, area_id sourceID)
2405 {
2406 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2407 
2408 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2409 		// set the same protection for the kernel as for userland
2410 		protection |= B_KERNEL_READ_AREA;
2411 		if (writableCopy)
2412 			protection |= B_KERNEL_WRITE_AREA;
2413 	}
2414 
2415 	// Do the locking: target address space, all address spaces associated with
2416 	// the source cache, and the cache itself.
2417 	MultiAddressSpaceLocker locker;
2418 	VMAddressSpace* targetAddressSpace;
2419 	VMCache* cache;
2420 	VMArea* source;
2421 	AreaCacheLocker cacheLocker;
2422 	status_t status;
2423 	bool sharedArea;
2424 
2425 	page_num_t wiredPages = 0;
2426 	vm_page_reservation wiredPagesReservation;
2427 
2428 	bool restart;
2429 	do {
2430 		restart = false;
2431 
2432 		locker.Unset();
2433 		status = locker.AddTeam(team, true, &targetAddressSpace);
2434 		if (status == B_OK) {
2435 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2436 				&cache);
2437 		}
2438 		if (status != B_OK)
2439 			return status;
2440 
2441 		cacheLocker.SetTo(cache, true);	// already locked
2442 
2443 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2444 
2445 		page_num_t oldWiredPages = wiredPages;
2446 		wiredPages = 0;
2447 
2448 		// If the source area isn't shared, count the number of wired pages in
2449 		// the cache and reserve as many pages.
2450 		if (!sharedArea) {
2451 			wiredPages = cache->WiredPagesCount();
2452 
2453 			if (wiredPages > oldWiredPages) {
2454 				cacheLocker.Unlock();
2455 				locker.Unlock();
2456 
2457 				if (oldWiredPages > 0)
2458 					vm_page_unreserve_pages(&wiredPagesReservation);
2459 
2460 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2461 					VM_PRIORITY_USER);
2462 
2463 				restart = true;
2464 			}
2465 		} else if (oldWiredPages > 0)
2466 			vm_page_unreserve_pages(&wiredPagesReservation);
2467 	} while (restart);
2468 
2469 	// unreserve pages later
2470 	struct PagesUnreserver {
2471 		PagesUnreserver(vm_page_reservation* reservation)
2472 			:
2473 			fReservation(reservation)
2474 		{
2475 		}
2476 
2477 		~PagesUnreserver()
2478 		{
2479 			if (fReservation != NULL)
2480 				vm_page_unreserve_pages(fReservation);
2481 		}
2482 
2483 	private:
2484 		vm_page_reservation*	fReservation;
2485 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2486 
2487 	if (addressSpec == B_CLONE_ADDRESS) {
2488 		addressSpec = B_EXACT_ADDRESS;
2489 		*_address = (void*)source->Base();
2490 	}
2491 
2492 	// First, create a cache on top of the source area, respectively use the
2493 	// existing one, if this is a shared area.
2494 
2495 	VMArea* target;
2496 	virtual_address_restrictions addressRestrictions = {};
2497 	addressRestrictions.address = *_address;
2498 	addressRestrictions.address_specification = addressSpec;
2499 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2500 		name, source->Size(), source->wiring, protection,
2501 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2502 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2503 		&addressRestrictions, true, &target, _address);
2504 	if (status < B_OK)
2505 		return status;
2506 
2507 	if (sharedArea) {
2508 		// The new area uses the old area's cache, but map_backing_store()
2509 		// hasn't acquired a ref. So we have to do that now.
2510 		cache->AcquireRefLocked();
2511 	}
2512 
2513 	// If the source area is writable, we need to move it one layer up as well
2514 
2515 	if (!sharedArea) {
2516 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2517 			// TODO: do something more useful if this fails!
2518 			if (vm_copy_on_write_area(cache,
2519 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2520 				panic("vm_copy_on_write_area() failed!\n");
2521 			}
2522 		}
2523 	}
2524 
2525 	// we return the ID of the newly created area
2526 	return target->id;
2527 }
2528 
2529 
2530 status_t
2531 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2532 	bool kernel)
2533 {
2534 	fix_protection(&newProtection);
2535 
2536 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2537 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2538 
2539 	if (!arch_vm_supports_protection(newProtection))
2540 		return B_NOT_SUPPORTED;
2541 
2542 	bool becomesWritable
2543 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2544 
2545 	// lock address spaces and cache
2546 	MultiAddressSpaceLocker locker;
2547 	VMCache* cache;
2548 	VMArea* area;
2549 	status_t status;
2550 	AreaCacheLocker cacheLocker;
2551 	bool isWritable;
2552 
2553 	bool restart;
2554 	do {
2555 		restart = false;
2556 
2557 		locker.Unset();
2558 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2559 		if (status != B_OK)
2560 			return status;
2561 
2562 		cacheLocker.SetTo(cache, true);	// already locked
2563 
2564 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2565 			return B_NOT_ALLOWED;
2566 
2567 		if (area->protection == newProtection)
2568 			return B_OK;
2569 
2570 		if (team != VMAddressSpace::KernelID()
2571 			&& area->address_space->ID() != team) {
2572 			// unless you're the kernel, you are only allowed to set
2573 			// the protection of your own areas
2574 			return B_NOT_ALLOWED;
2575 		}
2576 
2577 		isWritable
2578 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2579 
2580 		// Make sure the area (respectively, if we're going to call
2581 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2582 		// wired ranges.
2583 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2584 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2585 					otherArea = otherArea->cache_next) {
2586 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2587 					restart = true;
2588 					break;
2589 				}
2590 			}
2591 		} else {
2592 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2593 				restart = true;
2594 		}
2595 	} while (restart);
2596 
2597 	bool changePageProtection = true;
2598 	bool changeTopCachePagesOnly = false;
2599 
2600 	if (isWritable && !becomesWritable) {
2601 		// writable -> !writable
2602 
2603 		if (cache->source != NULL && cache->temporary) {
2604 			if (cache->CountWritableAreas(area) == 0) {
2605 				// Since this cache now lives from the pages in its source cache,
2606 				// we can change the cache's commitment to take only those pages
2607 				// into account that really are in this cache.
2608 
2609 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2610 					team == VMAddressSpace::KernelID()
2611 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2612 
2613 				// TODO: we may be able to join with our source cache, if
2614 				// count == 0
2615 			}
2616 		}
2617 
2618 		// If only the writability changes, we can just remap the pages of the
2619 		// top cache, since the pages of lower caches are mapped read-only
2620 		// anyway. That's advantageous only, if the number of pages in the cache
2621 		// is significantly smaller than the number of pages in the area,
2622 		// though.
2623 		if (newProtection
2624 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2625 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2626 			changeTopCachePagesOnly = true;
2627 		}
2628 	} else if (!isWritable && becomesWritable) {
2629 		// !writable -> writable
2630 
2631 		if (!cache->consumers.IsEmpty()) {
2632 			// There are consumers -- we have to insert a new cache. Fortunately
2633 			// vm_copy_on_write_area() does everything that's needed.
2634 			changePageProtection = false;
2635 			status = vm_copy_on_write_area(cache, NULL);
2636 		} else {
2637 			// No consumers, so we don't need to insert a new one.
2638 			if (cache->source != NULL && cache->temporary) {
2639 				// the cache's commitment must contain all possible pages
2640 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2641 					team == VMAddressSpace::KernelID()
2642 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2643 			}
2644 
2645 			if (status == B_OK && cache->source != NULL) {
2646 				// There's a source cache, hence we can't just change all pages'
2647 				// protection or we might allow writing into pages belonging to
2648 				// a lower cache.
2649 				changeTopCachePagesOnly = true;
2650 			}
2651 		}
2652 	} else {
2653 		// we don't have anything special to do in all other cases
2654 	}
2655 
2656 	if (status == B_OK) {
2657 		// remap existing pages in this cache
2658 		if (changePageProtection) {
2659 			VMTranslationMap* map = area->address_space->TranslationMap();
2660 			map->Lock();
2661 
2662 			if (changeTopCachePagesOnly) {
2663 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2664 				page_num_t lastPageOffset
2665 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2666 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2667 						vm_page* page = it.Next();) {
2668 					if (page->cache_offset >= firstPageOffset
2669 						&& page->cache_offset <= lastPageOffset) {
2670 						addr_t address = virtual_page_address(area, page);
2671 						map->ProtectPage(area, address, newProtection);
2672 					}
2673 				}
2674 			} else
2675 				map->ProtectArea(area, newProtection);
2676 
2677 			map->Unlock();
2678 		}
2679 
2680 		area->protection = newProtection;
2681 	}
2682 
2683 	return status;
2684 }
2685 
2686 
2687 status_t
2688 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2689 {
2690 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2691 	if (addressSpace == NULL)
2692 		return B_BAD_TEAM_ID;
2693 
2694 	VMTranslationMap* map = addressSpace->TranslationMap();
2695 
2696 	map->Lock();
2697 	uint32 dummyFlags;
2698 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2699 	map->Unlock();
2700 
2701 	addressSpace->Put();
2702 	return status;
2703 }
2704 
2705 
2706 /*!	The page's cache must be locked.
2707 */
2708 bool
2709 vm_test_map_modification(vm_page* page)
2710 {
2711 	if (page->modified)
2712 		return true;
2713 
2714 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2715 	vm_page_mapping* mapping;
2716 	while ((mapping = iterator.Next()) != NULL) {
2717 		VMArea* area = mapping->area;
2718 		VMTranslationMap* map = area->address_space->TranslationMap();
2719 
2720 		phys_addr_t physicalAddress;
2721 		uint32 flags;
2722 		map->Lock();
2723 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2724 		map->Unlock();
2725 
2726 		if ((flags & PAGE_MODIFIED) != 0)
2727 			return true;
2728 	}
2729 
2730 	return false;
2731 }
2732 
2733 
2734 /*!	The page's cache must be locked.
2735 */
2736 void
2737 vm_clear_map_flags(vm_page* page, uint32 flags)
2738 {
2739 	if ((flags & PAGE_ACCESSED) != 0)
2740 		page->accessed = false;
2741 	if ((flags & PAGE_MODIFIED) != 0)
2742 		page->modified = false;
2743 
2744 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2745 	vm_page_mapping* mapping;
2746 	while ((mapping = iterator.Next()) != NULL) {
2747 		VMArea* area = mapping->area;
2748 		VMTranslationMap* map = area->address_space->TranslationMap();
2749 
2750 		map->Lock();
2751 		map->ClearFlags(virtual_page_address(area, page), flags);
2752 		map->Unlock();
2753 	}
2754 }
2755 
2756 
2757 /*!	Removes all mappings from a page.
2758 	After you've called this function, the page is unmapped from memory and
2759 	the page's \c accessed and \c modified flags have been updated according
2760 	to the state of the mappings.
2761 	The page's cache must be locked.
2762 */
2763 void
2764 vm_remove_all_page_mappings(vm_page* page)
2765 {
2766 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2767 		VMArea* area = mapping->area;
2768 		VMTranslationMap* map = area->address_space->TranslationMap();
2769 		addr_t address = virtual_page_address(area, page);
2770 		map->UnmapPage(area, address, false);
2771 	}
2772 }
2773 
2774 
2775 int32
2776 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2777 {
2778 	int32 count = 0;
2779 
2780 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2781 	vm_page_mapping* mapping;
2782 	while ((mapping = iterator.Next()) != NULL) {
2783 		VMArea* area = mapping->area;
2784 		VMTranslationMap* map = area->address_space->TranslationMap();
2785 
2786 		bool modified;
2787 		if (map->ClearAccessedAndModified(area,
2788 				virtual_page_address(area, page), false, modified)) {
2789 			count++;
2790 		}
2791 
2792 		page->modified |= modified;
2793 	}
2794 
2795 
2796 	if (page->accessed) {
2797 		count++;
2798 		page->accessed = false;
2799 	}
2800 
2801 	return count;
2802 }
2803 
2804 
2805 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2806 	mappings.
2807 	The function iterates through the page mappings and removes them until
2808 	encountering one that has been accessed. From then on it will continue to
2809 	iterate, but only clear the accessed flag of the mapping. The page's
2810 	\c modified bit will be updated accordingly, the \c accessed bit will be
2811 	cleared.
2812 	\return The number of mapping accessed bits encountered, including the
2813 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2814 		of the page have been removed.
2815 */
2816 int32
2817 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2818 {
2819 	ASSERT(page->WiredCount() == 0);
2820 
2821 	if (page->accessed)
2822 		return vm_clear_page_mapping_accessed_flags(page);
2823 
2824 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2825 		VMArea* area = mapping->area;
2826 		VMTranslationMap* map = area->address_space->TranslationMap();
2827 		addr_t address = virtual_page_address(area, page);
2828 		bool modified = false;
2829 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2830 			page->accessed = true;
2831 			page->modified |= modified;
2832 			return vm_clear_page_mapping_accessed_flags(page);
2833 		}
2834 		page->modified |= modified;
2835 	}
2836 
2837 	return 0;
2838 }
2839 
2840 
2841 static int
2842 display_mem(int argc, char** argv)
2843 {
2844 	bool physical = false;
2845 	addr_t copyAddress;
2846 	int32 displayWidth;
2847 	int32 itemSize;
2848 	int32 num = -1;
2849 	addr_t address;
2850 	int i = 1, j;
2851 
2852 	if (argc > 1 && argv[1][0] == '-') {
2853 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2854 			physical = true;
2855 			i++;
2856 		} else
2857 			i = 99;
2858 	}
2859 
2860 	if (argc < i + 1 || argc > i + 2) {
2861 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2862 			"\tdl - 8 bytes\n"
2863 			"\tdw - 4 bytes\n"
2864 			"\tds - 2 bytes\n"
2865 			"\tdb - 1 byte\n"
2866 			"\tstring - a whole string\n"
2867 			"  -p or --physical only allows memory from a single page to be "
2868 			"displayed.\n");
2869 		return 0;
2870 	}
2871 
2872 	address = parse_expression(argv[i]);
2873 
2874 	if (argc > i + 1)
2875 		num = parse_expression(argv[i + 1]);
2876 
2877 	// build the format string
2878 	if (strcmp(argv[0], "db") == 0) {
2879 		itemSize = 1;
2880 		displayWidth = 16;
2881 	} else if (strcmp(argv[0], "ds") == 0) {
2882 		itemSize = 2;
2883 		displayWidth = 8;
2884 	} else if (strcmp(argv[0], "dw") == 0) {
2885 		itemSize = 4;
2886 		displayWidth = 4;
2887 	} else if (strcmp(argv[0], "dl") == 0) {
2888 		itemSize = 8;
2889 		displayWidth = 2;
2890 	} else if (strcmp(argv[0], "string") == 0) {
2891 		itemSize = 1;
2892 		displayWidth = -1;
2893 	} else {
2894 		kprintf("display_mem called in an invalid way!\n");
2895 		return 0;
2896 	}
2897 
2898 	if (num <= 0)
2899 		num = displayWidth;
2900 
2901 	void* physicalPageHandle = NULL;
2902 
2903 	if (physical) {
2904 		int32 offset = address & (B_PAGE_SIZE - 1);
2905 		if (num * itemSize + offset > B_PAGE_SIZE) {
2906 			num = (B_PAGE_SIZE - offset) / itemSize;
2907 			kprintf("NOTE: number of bytes has been cut to page size\n");
2908 		}
2909 
2910 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2911 
2912 		if (vm_get_physical_page_debug(address, &copyAddress,
2913 				&physicalPageHandle) != B_OK) {
2914 			kprintf("getting the hardware page failed.");
2915 			return 0;
2916 		}
2917 
2918 		address += offset;
2919 		copyAddress += offset;
2920 	} else
2921 		copyAddress = address;
2922 
2923 	if (!strcmp(argv[0], "string")) {
2924 		kprintf("%p \"", (char*)copyAddress);
2925 
2926 		// string mode
2927 		for (i = 0; true; i++) {
2928 			char c;
2929 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2930 					!= B_OK
2931 				|| c == '\0') {
2932 				break;
2933 			}
2934 
2935 			if (c == '\n')
2936 				kprintf("\\n");
2937 			else if (c == '\t')
2938 				kprintf("\\t");
2939 			else {
2940 				if (!isprint(c))
2941 					c = '.';
2942 
2943 				kprintf("%c", c);
2944 			}
2945 		}
2946 
2947 		kprintf("\"\n");
2948 	} else {
2949 		// number mode
2950 		for (i = 0; i < num; i++) {
2951 			uint32 value;
2952 
2953 			if ((i % displayWidth) == 0) {
2954 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2955 				if (i != 0)
2956 					kprintf("\n");
2957 
2958 				kprintf("[0x%lx]  ", address + i * itemSize);
2959 
2960 				for (j = 0; j < displayed; j++) {
2961 					char c;
2962 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2963 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2964 						displayed = j;
2965 						break;
2966 					}
2967 					if (!isprint(c))
2968 						c = '.';
2969 
2970 					kprintf("%c", c);
2971 				}
2972 				if (num > displayWidth) {
2973 					// make sure the spacing in the last line is correct
2974 					for (j = displayed; j < displayWidth * itemSize; j++)
2975 						kprintf(" ");
2976 				}
2977 				kprintf("  ");
2978 			}
2979 
2980 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2981 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2982 				kprintf("read fault");
2983 				break;
2984 			}
2985 
2986 			switch (itemSize) {
2987 				case 1:
2988 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2989 					break;
2990 				case 2:
2991 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2992 					break;
2993 				case 4:
2994 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2995 					break;
2996 				case 8:
2997 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
2998 					break;
2999 			}
3000 		}
3001 
3002 		kprintf("\n");
3003 	}
3004 
3005 	if (physical) {
3006 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3007 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3008 	}
3009 	return 0;
3010 }
3011 
3012 
3013 static void
3014 dump_cache_tree_recursively(VMCache* cache, int level,
3015 	VMCache* highlightCache)
3016 {
3017 	// print this cache
3018 	for (int i = 0; i < level; i++)
3019 		kprintf("  ");
3020 	if (cache == highlightCache)
3021 		kprintf("%p <--\n", cache);
3022 	else
3023 		kprintf("%p\n", cache);
3024 
3025 	// recursively print its consumers
3026 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3027 			VMCache* consumer = it.Next();) {
3028 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3029 	}
3030 }
3031 
3032 
3033 static int
3034 dump_cache_tree(int argc, char** argv)
3035 {
3036 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3037 		kprintf("usage: %s <address>\n", argv[0]);
3038 		return 0;
3039 	}
3040 
3041 	addr_t address = parse_expression(argv[1]);
3042 	if (address == 0)
3043 		return 0;
3044 
3045 	VMCache* cache = (VMCache*)address;
3046 	VMCache* root = cache;
3047 
3048 	// find the root cache (the transitive source)
3049 	while (root->source != NULL)
3050 		root = root->source;
3051 
3052 	dump_cache_tree_recursively(root, 0, cache);
3053 
3054 	return 0;
3055 }
3056 
3057 
3058 const char*
3059 vm_cache_type_to_string(int32 type)
3060 {
3061 	switch (type) {
3062 		case CACHE_TYPE_RAM:
3063 			return "RAM";
3064 		case CACHE_TYPE_DEVICE:
3065 			return "device";
3066 		case CACHE_TYPE_VNODE:
3067 			return "vnode";
3068 		case CACHE_TYPE_NULL:
3069 			return "null";
3070 
3071 		default:
3072 			return "unknown";
3073 	}
3074 }
3075 
3076 
3077 #if DEBUG_CACHE_LIST
3078 
3079 static void
3080 update_cache_info_recursively(VMCache* cache, cache_info& info)
3081 {
3082 	info.page_count += cache->page_count;
3083 	if (cache->type == CACHE_TYPE_RAM)
3084 		info.committed += cache->committed_size;
3085 
3086 	// recurse
3087 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3088 			VMCache* consumer = it.Next();) {
3089 		update_cache_info_recursively(consumer, info);
3090 	}
3091 }
3092 
3093 
3094 static int
3095 cache_info_compare_page_count(const void* _a, const void* _b)
3096 {
3097 	const cache_info* a = (const cache_info*)_a;
3098 	const cache_info* b = (const cache_info*)_b;
3099 	if (a->page_count == b->page_count)
3100 		return 0;
3101 	return a->page_count < b->page_count ? 1 : -1;
3102 }
3103 
3104 
3105 static int
3106 cache_info_compare_committed(const void* _a, const void* _b)
3107 {
3108 	const cache_info* a = (const cache_info*)_a;
3109 	const cache_info* b = (const cache_info*)_b;
3110 	if (a->committed == b->committed)
3111 		return 0;
3112 	return a->committed < b->committed ? 1 : -1;
3113 }
3114 
3115 
3116 static void
3117 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3118 {
3119 	for (int i = 0; i < level; i++)
3120 		kprintf("  ");
3121 
3122 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3123 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3124 		cache->virtual_base, cache->virtual_end, cache->page_count);
3125 
3126 	if (level == 0)
3127 		kprintf("/%lu", info.page_count);
3128 
3129 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3130 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3131 
3132 		if (level == 0)
3133 			kprintf("/%lu", info.committed);
3134 	}
3135 
3136 	// areas
3137 	if (cache->areas != NULL) {
3138 		VMArea* area = cache->areas;
3139 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3140 			area->name, area->address_space->ID());
3141 
3142 		while (area->cache_next != NULL) {
3143 			area = area->cache_next;
3144 			kprintf(", %" B_PRId32, area->id);
3145 		}
3146 	}
3147 
3148 	kputs("\n");
3149 
3150 	// recurse
3151 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3152 			VMCache* consumer = it.Next();) {
3153 		dump_caches_recursively(consumer, info, level + 1);
3154 	}
3155 }
3156 
3157 
3158 static int
3159 dump_caches(int argc, char** argv)
3160 {
3161 	if (sCacheInfoTable == NULL) {
3162 		kprintf("No cache info table!\n");
3163 		return 0;
3164 	}
3165 
3166 	bool sortByPageCount = true;
3167 
3168 	for (int32 i = 1; i < argc; i++) {
3169 		if (strcmp(argv[i], "-c") == 0) {
3170 			sortByPageCount = false;
3171 		} else {
3172 			print_debugger_command_usage(argv[0]);
3173 			return 0;
3174 		}
3175 	}
3176 
3177 	uint32 totalCount = 0;
3178 	uint32 rootCount = 0;
3179 	off_t totalCommitted = 0;
3180 	page_num_t totalPages = 0;
3181 
3182 	VMCache* cache = gDebugCacheList;
3183 	while (cache) {
3184 		totalCount++;
3185 		if (cache->source == NULL) {
3186 			cache_info stackInfo;
3187 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3188 				? sCacheInfoTable[rootCount] : stackInfo;
3189 			rootCount++;
3190 			info.cache = cache;
3191 			info.page_count = 0;
3192 			info.committed = 0;
3193 			update_cache_info_recursively(cache, info);
3194 			totalCommitted += info.committed;
3195 			totalPages += info.page_count;
3196 		}
3197 
3198 		cache = cache->debug_next;
3199 	}
3200 
3201 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3202 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3203 			sortByPageCount
3204 				? &cache_info_compare_page_count
3205 				: &cache_info_compare_committed);
3206 	}
3207 
3208 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3209 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3210 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3211 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3212 			"page count" : "committed size");
3213 
3214 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3215 		for (uint32 i = 0; i < rootCount; i++) {
3216 			cache_info& info = sCacheInfoTable[i];
3217 			dump_caches_recursively(info.cache, info, 0);
3218 		}
3219 	} else
3220 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3221 
3222 	return 0;
3223 }
3224 
3225 #endif	// DEBUG_CACHE_LIST
3226 
3227 
3228 static int
3229 dump_cache(int argc, char** argv)
3230 {
3231 	VMCache* cache;
3232 	bool showPages = false;
3233 	int i = 1;
3234 
3235 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3236 		kprintf("usage: %s [-ps] <address>\n"
3237 			"  if -p is specified, all pages are shown, if -s is used\n"
3238 			"  only the cache info is shown respectively.\n", argv[0]);
3239 		return 0;
3240 	}
3241 	while (argv[i][0] == '-') {
3242 		char* arg = argv[i] + 1;
3243 		while (arg[0]) {
3244 			if (arg[0] == 'p')
3245 				showPages = true;
3246 			arg++;
3247 		}
3248 		i++;
3249 	}
3250 	if (argv[i] == NULL) {
3251 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3252 		return 0;
3253 	}
3254 
3255 	addr_t address = parse_expression(argv[i]);
3256 	if (address == 0)
3257 		return 0;
3258 
3259 	cache = (VMCache*)address;
3260 
3261 	cache->Dump(showPages);
3262 
3263 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3264 
3265 	return 0;
3266 }
3267 
3268 
3269 static void
3270 dump_area_struct(VMArea* area, bool mappings)
3271 {
3272 	kprintf("AREA: %p\n", area);
3273 	kprintf("name:\t\t'%s'\n", area->name);
3274 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3275 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3276 	kprintf("base:\t\t0x%lx\n", area->Base());
3277 	kprintf("size:\t\t0x%lx\n", area->Size());
3278 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3279 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3280 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3281 	kprintf("cache:\t\t%p\n", area->cache);
3282 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3283 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3284 	kprintf("cache_next:\t%p\n", area->cache_next);
3285 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3286 
3287 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3288 	if (mappings) {
3289 		kprintf("page mappings:\n");
3290 		while (iterator.HasNext()) {
3291 			vm_page_mapping* mapping = iterator.Next();
3292 			kprintf("  %p", mapping->page);
3293 		}
3294 		kprintf("\n");
3295 	} else {
3296 		uint32 count = 0;
3297 		while (iterator.Next() != NULL) {
3298 			count++;
3299 		}
3300 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3301 	}
3302 }
3303 
3304 
3305 static int
3306 dump_area(int argc, char** argv)
3307 {
3308 	bool mappings = false;
3309 	bool found = false;
3310 	int32 index = 1;
3311 	VMArea* area;
3312 	addr_t num;
3313 
3314 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3315 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3316 			"All areas matching either id/address/name are listed. You can\n"
3317 			"force to check only a specific item by prefixing the specifier\n"
3318 			"with the id/contains/address/name keywords.\n"
3319 			"-m shows the area's mappings as well.\n");
3320 		return 0;
3321 	}
3322 
3323 	if (!strcmp(argv[1], "-m")) {
3324 		mappings = true;
3325 		index++;
3326 	}
3327 
3328 	int32 mode = 0xf;
3329 	if (!strcmp(argv[index], "id"))
3330 		mode = 1;
3331 	else if (!strcmp(argv[index], "contains"))
3332 		mode = 2;
3333 	else if (!strcmp(argv[index], "name"))
3334 		mode = 4;
3335 	else if (!strcmp(argv[index], "address"))
3336 		mode = 0;
3337 	if (mode != 0xf)
3338 		index++;
3339 
3340 	if (index >= argc) {
3341 		kprintf("No area specifier given.\n");
3342 		return 0;
3343 	}
3344 
3345 	num = parse_expression(argv[index]);
3346 
3347 	if (mode == 0) {
3348 		dump_area_struct((struct VMArea*)num, mappings);
3349 	} else {
3350 		// walk through the area list, looking for the arguments as a name
3351 
3352 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3353 		while ((area = it.Next()) != NULL) {
3354 			if (((mode & 4) != 0 && area->name != NULL
3355 					&& !strcmp(argv[index], area->name))
3356 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3357 					|| (((mode & 2) != 0 && area->Base() <= num
3358 						&& area->Base() + area->Size() > num))))) {
3359 				dump_area_struct(area, mappings);
3360 				found = true;
3361 			}
3362 		}
3363 
3364 		if (!found)
3365 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3366 	}
3367 
3368 	return 0;
3369 }
3370 
3371 
3372 static int
3373 dump_area_list(int argc, char** argv)
3374 {
3375 	VMArea* area;
3376 	const char* name = NULL;
3377 	int32 id = 0;
3378 
3379 	if (argc > 1) {
3380 		id = parse_expression(argv[1]);
3381 		if (id == 0)
3382 			name = argv[1];
3383 	}
3384 
3385 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3386 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3387 		B_PRINTF_POINTER_WIDTH, "size");
3388 
3389 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3390 	while ((area = it.Next()) != NULL) {
3391 		if ((id != 0 && area->address_space->ID() != id)
3392 			|| (name != NULL && strstr(area->name, name) == NULL))
3393 			continue;
3394 
3395 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3396 			area->id, (void*)area->Base(), (void*)area->Size(),
3397 			area->protection, area->wiring, area->name);
3398 	}
3399 	return 0;
3400 }
3401 
3402 
3403 static int
3404 dump_available_memory(int argc, char** argv)
3405 {
3406 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3407 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3408 	return 0;
3409 }
3410 
3411 
3412 static int
3413 dump_mapping_info(int argc, char** argv)
3414 {
3415 	bool reverseLookup = false;
3416 	bool pageLookup = false;
3417 
3418 	int argi = 1;
3419 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3420 		const char* arg = argv[argi];
3421 		if (strcmp(arg, "-r") == 0) {
3422 			reverseLookup = true;
3423 		} else if (strcmp(arg, "-p") == 0) {
3424 			reverseLookup = true;
3425 			pageLookup = true;
3426 		} else {
3427 			print_debugger_command_usage(argv[0]);
3428 			return 0;
3429 		}
3430 	}
3431 
3432 	// We need at least one argument, the address. Optionally a thread ID can be
3433 	// specified.
3434 	if (argi >= argc || argi + 2 < argc) {
3435 		print_debugger_command_usage(argv[0]);
3436 		return 0;
3437 	}
3438 
3439 	uint64 addressValue;
3440 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3441 		return 0;
3442 
3443 	Team* team = NULL;
3444 	if (argi < argc) {
3445 		uint64 threadID;
3446 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3447 			return 0;
3448 
3449 		Thread* thread = Thread::GetDebug(threadID);
3450 		if (thread == NULL) {
3451 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3452 			return 0;
3453 		}
3454 
3455 		team = thread->team;
3456 	}
3457 
3458 	if (reverseLookup) {
3459 		phys_addr_t physicalAddress;
3460 		if (pageLookup) {
3461 			vm_page* page = (vm_page*)(addr_t)addressValue;
3462 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3463 		} else {
3464 			physicalAddress = (phys_addr_t)addressValue;
3465 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3466 		}
3467 
3468 		kprintf("    Team     Virtual Address      Area\n");
3469 		kprintf("--------------------------------------\n");
3470 
3471 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3472 			Callback()
3473 				:
3474 				fAddressSpace(NULL)
3475 			{
3476 			}
3477 
3478 			void SetAddressSpace(VMAddressSpace* addressSpace)
3479 			{
3480 				fAddressSpace = addressSpace;
3481 			}
3482 
3483 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3484 			{
3485 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3486 					virtualAddress);
3487 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3488 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3489 				else
3490 					kprintf("\n");
3491 				return false;
3492 			}
3493 
3494 		private:
3495 			VMAddressSpace*	fAddressSpace;
3496 		} callback;
3497 
3498 		if (team != NULL) {
3499 			// team specified -- get its address space
3500 			VMAddressSpace* addressSpace = team->address_space;
3501 			if (addressSpace == NULL) {
3502 				kprintf("Failed to get address space!\n");
3503 				return 0;
3504 			}
3505 
3506 			callback.SetAddressSpace(addressSpace);
3507 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3508 				physicalAddress, callback);
3509 		} else {
3510 			// no team specified -- iterate through all address spaces
3511 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3512 				addressSpace != NULL;
3513 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3514 				callback.SetAddressSpace(addressSpace);
3515 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3516 					physicalAddress, callback);
3517 			}
3518 		}
3519 	} else {
3520 		// get the address space
3521 		addr_t virtualAddress = (addr_t)addressValue;
3522 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3523 		VMAddressSpace* addressSpace;
3524 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3525 			addressSpace = VMAddressSpace::Kernel();
3526 		} else if (team != NULL) {
3527 			addressSpace = team->address_space;
3528 		} else {
3529 			Thread* thread = debug_get_debugged_thread();
3530 			if (thread == NULL || thread->team == NULL) {
3531 				kprintf("Failed to get team!\n");
3532 				return 0;
3533 			}
3534 
3535 			addressSpace = thread->team->address_space;
3536 		}
3537 
3538 		if (addressSpace == NULL) {
3539 			kprintf("Failed to get address space!\n");
3540 			return 0;
3541 		}
3542 
3543 		// let the translation map implementation do the job
3544 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3545 	}
3546 
3547 	return 0;
3548 }
3549 
3550 
3551 /*!	Deletes all areas and reserved regions in the given address space.
3552 
3553 	The caller must ensure that none of the areas has any wired ranges.
3554 
3555 	\param addressSpace The address space.
3556 	\param deletingAddressSpace \c true, if the address space is in the process
3557 		of being deleted.
3558 */
3559 void
3560 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3561 {
3562 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3563 		addressSpace->ID()));
3564 
3565 	addressSpace->WriteLock();
3566 
3567 	// remove all reserved areas in this address space
3568 	addressSpace->UnreserveAllAddressRanges(0);
3569 
3570 	// delete all the areas in this address space
3571 	while (VMArea* area = addressSpace->FirstArea()) {
3572 		ASSERT(!area->IsWired());
3573 		delete_area(addressSpace, area, deletingAddressSpace);
3574 	}
3575 
3576 	addressSpace->WriteUnlock();
3577 }
3578 
3579 
3580 static area_id
3581 vm_area_for(addr_t address, bool kernel)
3582 {
3583 	team_id team;
3584 	if (IS_USER_ADDRESS(address)) {
3585 		// we try the user team address space, if any
3586 		team = VMAddressSpace::CurrentID();
3587 		if (team < 0)
3588 			return team;
3589 	} else
3590 		team = VMAddressSpace::KernelID();
3591 
3592 	AddressSpaceReadLocker locker(team);
3593 	if (!locker.IsLocked())
3594 		return B_BAD_TEAM_ID;
3595 
3596 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3597 	if (area != NULL) {
3598 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3599 			return B_ERROR;
3600 
3601 		return area->id;
3602 	}
3603 
3604 	return B_ERROR;
3605 }
3606 
3607 
3608 /*!	Frees physical pages that were used during the boot process.
3609 	\a end is inclusive.
3610 */
3611 static void
3612 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3613 {
3614 	// free all physical pages in the specified range
3615 
3616 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3617 		phys_addr_t physicalAddress;
3618 		uint32 flags;
3619 
3620 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3621 			&& (flags & PAGE_PRESENT) != 0) {
3622 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3623 			if (page != NULL && page->State() != PAGE_STATE_FREE
3624 					 && page->State() != PAGE_STATE_CLEAR
3625 					 && page->State() != PAGE_STATE_UNUSED) {
3626 				DEBUG_PAGE_ACCESS_START(page);
3627 				vm_page_set_state(page, PAGE_STATE_FREE);
3628 			}
3629 		}
3630 	}
3631 
3632 	// unmap the memory
3633 	map->Unmap(start, end);
3634 }
3635 
3636 
3637 void
3638 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3639 {
3640 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3641 	addr_t end = start + (size - 1);
3642 	addr_t lastEnd = start;
3643 
3644 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3645 		(void*)start, (void*)end));
3646 
3647 	// The areas are sorted in virtual address space order, so
3648 	// we just have to find the holes between them that fall
3649 	// into the area we should dispose
3650 
3651 	map->Lock();
3652 
3653 	for (VMAddressSpace::AreaIterator it
3654 				= VMAddressSpace::Kernel()->GetAreaIterator();
3655 			VMArea* area = it.Next();) {
3656 		addr_t areaStart = area->Base();
3657 		addr_t areaEnd = areaStart + (area->Size() - 1);
3658 
3659 		if (areaEnd < start)
3660 			continue;
3661 
3662 		if (areaStart > end) {
3663 			// we are done, the area is already beyond of what we have to free
3664 			break;
3665 		}
3666 
3667 		if (areaStart > lastEnd) {
3668 			// this is something we can free
3669 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3670 				(void*)areaStart));
3671 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3672 		}
3673 
3674 		if (areaEnd >= end) {
3675 			lastEnd = areaEnd;
3676 				// no +1 to prevent potential overflow
3677 			break;
3678 		}
3679 
3680 		lastEnd = areaEnd + 1;
3681 	}
3682 
3683 	if (lastEnd < end) {
3684 		// we can also get rid of some space at the end of the area
3685 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3686 			(void*)end));
3687 		unmap_and_free_physical_pages(map, lastEnd, end);
3688 	}
3689 
3690 	map->Unlock();
3691 }
3692 
3693 
3694 static void
3695 create_preloaded_image_areas(struct preloaded_image* _image)
3696 {
3697 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3698 	char name[B_OS_NAME_LENGTH];
3699 	void* address;
3700 	int32 length;
3701 
3702 	// use file name to create a good area name
3703 	char* fileName = strrchr(image->name, '/');
3704 	if (fileName == NULL)
3705 		fileName = image->name;
3706 	else
3707 		fileName++;
3708 
3709 	length = strlen(fileName);
3710 	// make sure there is enough space for the suffix
3711 	if (length > 25)
3712 		length = 25;
3713 
3714 	memcpy(name, fileName, length);
3715 	strcpy(name + length, "_text");
3716 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3717 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3718 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3719 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3720 		// this will later be remapped read-only/executable by the
3721 		// ELF initialization code
3722 
3723 	strcpy(name + length, "_data");
3724 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3725 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3726 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3727 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3728 }
3729 
3730 
3731 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3732 	Any boot loader resources contained in that arguments must not be accessed
3733 	anymore past this point.
3734 */
3735 void
3736 vm_free_kernel_args(kernel_args* args)
3737 {
3738 	uint32 i;
3739 
3740 	TRACE(("vm_free_kernel_args()\n"));
3741 
3742 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3743 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3744 		if (area >= B_OK)
3745 			delete_area(area);
3746 	}
3747 }
3748 
3749 
3750 static void
3751 allocate_kernel_args(kernel_args* args)
3752 {
3753 	TRACE(("allocate_kernel_args()\n"));
3754 
3755 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3756 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3757 
3758 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3759 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3760 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3761 	}
3762 }
3763 
3764 
3765 static void
3766 unreserve_boot_loader_ranges(kernel_args* args)
3767 {
3768 	TRACE(("unreserve_boot_loader_ranges()\n"));
3769 
3770 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3771 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3772 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3773 			args->virtual_allocated_range[i].size);
3774 	}
3775 }
3776 
3777 
3778 static void
3779 reserve_boot_loader_ranges(kernel_args* args)
3780 {
3781 	TRACE(("reserve_boot_loader_ranges()\n"));
3782 
3783 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3784 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3785 
3786 		// If the address is no kernel address, we just skip it. The
3787 		// architecture specific code has to deal with it.
3788 		if (!IS_KERNEL_ADDRESS(address)) {
3789 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3790 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3791 			continue;
3792 		}
3793 
3794 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3795 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3796 		if (status < B_OK)
3797 			panic("could not reserve boot loader ranges\n");
3798 	}
3799 }
3800 
3801 
3802 static addr_t
3803 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3804 {
3805 	size = PAGE_ALIGN(size);
3806 
3807 	// find a slot in the virtual allocation addr range
3808 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3809 		// check to see if the space between this one and the last is big enough
3810 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3811 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3812 			+ args->virtual_allocated_range[i - 1].size;
3813 
3814 		addr_t base = alignment > 0
3815 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3816 
3817 		if (base >= KERNEL_BASE && base < rangeStart
3818 				&& rangeStart - base >= size) {
3819 			args->virtual_allocated_range[i - 1].size
3820 				+= base + size - previousRangeEnd;
3821 			return base;
3822 		}
3823 	}
3824 
3825 	// we hadn't found one between allocation ranges. this is ok.
3826 	// see if there's a gap after the last one
3827 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3828 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3829 		+ args->virtual_allocated_range[lastEntryIndex].size;
3830 	addr_t base = alignment > 0
3831 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3832 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3833 		args->virtual_allocated_range[lastEntryIndex].size
3834 			+= base + size - lastRangeEnd;
3835 		return base;
3836 	}
3837 
3838 	// see if there's a gap before the first one
3839 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3840 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3841 		base = rangeStart - size;
3842 		if (alignment > 0)
3843 			base = ROUNDDOWN(base, alignment);
3844 
3845 		if (base >= KERNEL_BASE) {
3846 			args->virtual_allocated_range[0].start = base;
3847 			args->virtual_allocated_range[0].size += rangeStart - base;
3848 			return base;
3849 		}
3850 	}
3851 
3852 	return 0;
3853 }
3854 
3855 
3856 static bool
3857 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3858 {
3859 	// TODO: horrible brute-force method of determining if the page can be
3860 	// allocated
3861 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3862 		if (address >= args->physical_memory_range[i].start
3863 			&& address < args->physical_memory_range[i].start
3864 				+ args->physical_memory_range[i].size)
3865 			return true;
3866 	}
3867 	return false;
3868 }
3869 
3870 
3871 page_num_t
3872 vm_allocate_early_physical_page(kernel_args* args)
3873 {
3874 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3875 		phys_addr_t nextPage;
3876 
3877 		nextPage = args->physical_allocated_range[i].start
3878 			+ args->physical_allocated_range[i].size;
3879 		// see if the page after the next allocated paddr run can be allocated
3880 		if (i + 1 < args->num_physical_allocated_ranges
3881 			&& args->physical_allocated_range[i + 1].size != 0) {
3882 			// see if the next page will collide with the next allocated range
3883 			if (nextPage >= args->physical_allocated_range[i+1].start)
3884 				continue;
3885 		}
3886 		// see if the next physical page fits in the memory block
3887 		if (is_page_in_physical_memory_range(args, nextPage)) {
3888 			// we got one!
3889 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3890 			return nextPage / B_PAGE_SIZE;
3891 		}
3892 	}
3893 
3894 	// Expanding upwards didn't work, try going downwards.
3895 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3896 		phys_addr_t nextPage;
3897 
3898 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3899 		// see if the page after the prev allocated paddr run can be allocated
3900 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3901 			// see if the next page will collide with the next allocated range
3902 			if (nextPage < args->physical_allocated_range[i-1].start
3903 				+ args->physical_allocated_range[i-1].size)
3904 				continue;
3905 		}
3906 		// see if the next physical page fits in the memory block
3907 		if (is_page_in_physical_memory_range(args, nextPage)) {
3908 			// we got one!
3909 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3910 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3911 			return nextPage / B_PAGE_SIZE;
3912 		}
3913 	}
3914 
3915 	return 0;
3916 		// could not allocate a block
3917 }
3918 
3919 
3920 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3921 	allocate some pages before the VM is completely up.
3922 */
3923 addr_t
3924 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3925 	uint32 attributes, addr_t alignment)
3926 {
3927 	if (physicalSize > virtualSize)
3928 		physicalSize = virtualSize;
3929 
3930 	// find the vaddr to allocate at
3931 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3932 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3933 	if (virtualBase == 0) {
3934 		panic("vm_allocate_early: could not allocate virtual address\n");
3935 		return 0;
3936 	}
3937 
3938 	// map the pages
3939 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3940 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3941 		if (physicalAddress == 0)
3942 			panic("error allocating early page!\n");
3943 
3944 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3945 
3946 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3947 			physicalAddress * B_PAGE_SIZE, attributes,
3948 			&vm_allocate_early_physical_page);
3949 	}
3950 
3951 	return virtualBase;
3952 }
3953 
3954 
3955 /*!	The main entrance point to initialize the VM. */
3956 status_t
3957 vm_init(kernel_args* args)
3958 {
3959 	struct preloaded_image* image;
3960 	void* address;
3961 	status_t err = 0;
3962 	uint32 i;
3963 
3964 	TRACE(("vm_init: entry\n"));
3965 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3966 	err = arch_vm_init(args);
3967 
3968 	// initialize some globals
3969 	vm_page_init_num_pages(args);
3970 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3971 
3972 	slab_init(args);
3973 
3974 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3975 	off_t heapSize = INITIAL_HEAP_SIZE;
3976 	// try to accomodate low memory systems
3977 	while (heapSize > sAvailableMemory / 8)
3978 		heapSize /= 2;
3979 	if (heapSize < 1024 * 1024)
3980 		panic("vm_init: go buy some RAM please.");
3981 
3982 	// map in the new heap and initialize it
3983 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3984 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3985 	TRACE(("heap at 0x%lx\n", heapBase));
3986 	heap_init(heapBase, heapSize);
3987 #endif
3988 
3989 	// initialize the free page list and physical page mapper
3990 	vm_page_init(args);
3991 
3992 	// initialize the cache allocators
3993 	vm_cache_init(args);
3994 
3995 	{
3996 		status_t error = VMAreaHash::Init();
3997 		if (error != B_OK)
3998 			panic("vm_init: error initializing area hash table\n");
3999 	}
4000 
4001 	VMAddressSpace::Init();
4002 	reserve_boot_loader_ranges(args);
4003 
4004 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4005 	heap_init_post_area();
4006 #endif
4007 
4008 	// Do any further initialization that the architecture dependant layers may
4009 	// need now
4010 	arch_vm_translation_map_init_post_area(args);
4011 	arch_vm_init_post_area(args);
4012 	vm_page_init_post_area(args);
4013 	slab_init_post_area();
4014 
4015 	// allocate areas to represent stuff that already exists
4016 
4017 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4018 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4019 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4020 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4021 #endif
4022 
4023 	allocate_kernel_args(args);
4024 
4025 	create_preloaded_image_areas(args->kernel_image);
4026 
4027 	// allocate areas for preloaded images
4028 	for (image = args->preloaded_images; image != NULL; image = image->next)
4029 		create_preloaded_image_areas(image);
4030 
4031 	// allocate kernel stacks
4032 	for (i = 0; i < args->num_cpus; i++) {
4033 		char name[64];
4034 
4035 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4036 		address = (void*)args->cpu_kstack[i].start;
4037 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4038 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4039 	}
4040 
4041 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4042 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4043 
4044 #if PARANOID_KERNEL_MALLOC
4045 	vm_block_address_range("uninitialized heap memory",
4046 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4047 #endif
4048 #if PARANOID_KERNEL_FREE
4049 	vm_block_address_range("freed heap memory",
4050 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4051 #endif
4052 
4053 	// create the object cache for the page mappings
4054 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4055 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4056 		NULL, NULL);
4057 	if (gPageMappingsObjectCache == NULL)
4058 		panic("failed to create page mappings object cache");
4059 
4060 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4061 
4062 #if DEBUG_CACHE_LIST
4063 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4064 		virtual_address_restrictions virtualRestrictions = {};
4065 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4066 		physical_address_restrictions physicalRestrictions = {};
4067 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4068 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4069 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4070 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4071 			&physicalRestrictions, (void**)&sCacheInfoTable);
4072 	}
4073 #endif	// DEBUG_CACHE_LIST
4074 
4075 	// add some debugger commands
4076 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4077 	add_debugger_command("area", &dump_area,
4078 		"Dump info about a particular area");
4079 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4080 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4081 #if DEBUG_CACHE_LIST
4082 	if (sCacheInfoTable != NULL) {
4083 		add_debugger_command_etc("caches", &dump_caches,
4084 			"List all VMCache trees",
4085 			"[ \"-c\" ]\n"
4086 			"All cache trees are listed sorted in decreasing order by number "
4087 				"of\n"
4088 			"used pages or, if \"-c\" is specified, by size of committed "
4089 				"memory.\n",
4090 			0);
4091 	}
4092 #endif
4093 	add_debugger_command("avail", &dump_available_memory,
4094 		"Dump available memory");
4095 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4096 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4097 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4098 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4099 	add_debugger_command("string", &display_mem, "dump strings");
4100 
4101 	add_debugger_command_etc("mapping", &dump_mapping_info,
4102 		"Print address mapping information",
4103 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4104 		"Prints low-level page mapping information for a given address. If\n"
4105 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4106 		"address that is looked up in the translation map of the current\n"
4107 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4108 		"\"-r\" is specified, <address> is a physical address that is\n"
4109 		"searched in the translation map of all teams, respectively the team\n"
4110 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4111 		"<address> is the address of a vm_page structure. The behavior is\n"
4112 		"equivalent to specifying \"-r\" with the physical address of that\n"
4113 		"page.\n",
4114 		0);
4115 
4116 	TRACE(("vm_init: exit\n"));
4117 
4118 	vm_cache_init_post_heap();
4119 
4120 	return err;
4121 }
4122 
4123 
4124 status_t
4125 vm_init_post_sem(kernel_args* args)
4126 {
4127 	// This frees all unused boot loader resources and makes its space available
4128 	// again
4129 	arch_vm_init_end(args);
4130 	unreserve_boot_loader_ranges(args);
4131 
4132 	// fill in all of the semaphores that were not allocated before
4133 	// since we're still single threaded and only the kernel address space
4134 	// exists, it isn't that hard to find all of the ones we need to create
4135 
4136 	arch_vm_translation_map_init_post_sem(args);
4137 
4138 	slab_init_post_sem();
4139 
4140 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4141 	heap_init_post_sem();
4142 #endif
4143 
4144 	return B_OK;
4145 }
4146 
4147 
4148 status_t
4149 vm_init_post_thread(kernel_args* args)
4150 {
4151 	vm_page_init_post_thread(args);
4152 	slab_init_post_thread();
4153 	return heap_init_post_thread();
4154 }
4155 
4156 
4157 status_t
4158 vm_init_post_modules(kernel_args* args)
4159 {
4160 	return arch_vm_init_post_modules(args);
4161 }
4162 
4163 
4164 void
4165 permit_page_faults(void)
4166 {
4167 	Thread* thread = thread_get_current_thread();
4168 	if (thread != NULL)
4169 		atomic_add(&thread->page_faults_allowed, 1);
4170 }
4171 
4172 
4173 void
4174 forbid_page_faults(void)
4175 {
4176 	Thread* thread = thread_get_current_thread();
4177 	if (thread != NULL)
4178 		atomic_add(&thread->page_faults_allowed, -1);
4179 }
4180 
4181 
4182 status_t
4183 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4184 	bool isUser, addr_t* newIP)
4185 {
4186 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4187 		faultAddress));
4188 
4189 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4190 
4191 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4192 	VMAddressSpace* addressSpace = NULL;
4193 
4194 	status_t status = B_OK;
4195 	*newIP = 0;
4196 	atomic_add((int32*)&sPageFaults, 1);
4197 
4198 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4199 		addressSpace = VMAddressSpace::GetKernel();
4200 	} else if (IS_USER_ADDRESS(pageAddress)) {
4201 		addressSpace = VMAddressSpace::GetCurrent();
4202 		if (addressSpace == NULL) {
4203 			if (!isUser) {
4204 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4205 					"memory!\n");
4206 				status = B_BAD_ADDRESS;
4207 				TPF(PageFaultError(-1,
4208 					VMPageFaultTracing
4209 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4210 			} else {
4211 				// XXX weird state.
4212 				panic("vm_page_fault: non kernel thread accessing user memory "
4213 					"that doesn't exist!\n");
4214 				status = B_BAD_ADDRESS;
4215 			}
4216 		}
4217 	} else {
4218 		// the hit was probably in the 64k DMZ between kernel and user space
4219 		// this keeps a user space thread from passing a buffer that crosses
4220 		// into kernel space
4221 		status = B_BAD_ADDRESS;
4222 		TPF(PageFaultError(-1,
4223 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4224 	}
4225 
4226 	if (status == B_OK) {
4227 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4228 			isUser, NULL);
4229 	}
4230 
4231 	if (status < B_OK) {
4232 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4233 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4234 			strerror(status), address, faultAddress, isWrite, isUser,
4235 			thread_get_current_thread_id());
4236 		if (!isUser) {
4237 			Thread* thread = thread_get_current_thread();
4238 			if (thread != NULL && thread->fault_handler != 0) {
4239 				// this will cause the arch dependant page fault handler to
4240 				// modify the IP on the interrupt frame or whatever to return
4241 				// to this address
4242 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4243 			} else {
4244 				// unhandled page fault in the kernel
4245 				panic("vm_page_fault: unhandled page fault in kernel space at "
4246 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4247 			}
4248 		} else {
4249 #if 1
4250 			// TODO: remove me once we have proper userland debugging support
4251 			// (and tools)
4252 			VMArea* area = NULL;
4253 			if (addressSpace != NULL) {
4254 				addressSpace->ReadLock();
4255 				area = addressSpace->LookupArea(faultAddress);
4256 			}
4257 
4258 			Thread* thread = thread_get_current_thread();
4259 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4260 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4261 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4262 				thread->team->Name(), thread->team->id,
4263 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4264 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4265 					area->Base() : 0x0));
4266 
4267 			// We can print a stack trace of the userland thread here.
4268 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4269 // fault and someone is already waiting for a write lock on the same address
4270 // space. This thread will then try to acquire the lock again and will
4271 // be queued after the writer.
4272 #	if 0
4273 			if (area) {
4274 				struct stack_frame {
4275 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4276 						struct stack_frame*	previous;
4277 						void*				return_address;
4278 					#else
4279 						// ...
4280 					#warning writeme
4281 					#endif
4282 				} frame;
4283 #		ifdef __INTEL__
4284 				struct iframe* iframe = x86_get_user_iframe();
4285 				if (iframe == NULL)
4286 					panic("iframe is NULL!");
4287 
4288 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4289 					sizeof(struct stack_frame));
4290 #		elif defined(__POWERPC__)
4291 				struct iframe* iframe = ppc_get_user_iframe();
4292 				if (iframe == NULL)
4293 					panic("iframe is NULL!");
4294 
4295 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4296 					sizeof(struct stack_frame));
4297 #		else
4298 #			warning "vm_page_fault() stack trace won't work"
4299 				status = B_ERROR;
4300 #		endif
4301 
4302 				dprintf("stack trace:\n");
4303 				int32 maxFrames = 50;
4304 				while (status == B_OK && --maxFrames >= 0
4305 						&& frame.return_address != NULL) {
4306 					dprintf("  %p", frame.return_address);
4307 					area = addressSpace->LookupArea(
4308 						(addr_t)frame.return_address);
4309 					if (area) {
4310 						dprintf(" (%s + %#lx)", area->name,
4311 							(addr_t)frame.return_address - area->Base());
4312 					}
4313 					dprintf("\n");
4314 
4315 					status = user_memcpy(&frame, frame.previous,
4316 						sizeof(struct stack_frame));
4317 				}
4318 			}
4319 #	endif	// 0 (stack trace)
4320 
4321 			if (addressSpace != NULL)
4322 				addressSpace->ReadUnlock();
4323 #endif
4324 
4325 			// If the thread has a signal handler for SIGSEGV, we simply
4326 			// send it the signal. Otherwise we notify the user debugger
4327 			// first.
4328 			struct sigaction action;
4329 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4330 					&& action.sa_handler != SIG_DFL
4331 					&& action.sa_handler != SIG_IGN)
4332 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4333 					SIGSEGV)) {
4334 				Signal signal(SIGSEGV,
4335 					status == B_PERMISSION_DENIED
4336 						? SEGV_ACCERR : SEGV_MAPERR,
4337 					EFAULT, thread->team->id);
4338 				signal.SetAddress((void*)address);
4339 				send_signal_to_thread(thread, signal, 0);
4340 			}
4341 		}
4342 	}
4343 
4344 	if (addressSpace != NULL)
4345 		addressSpace->Put();
4346 
4347 	return B_HANDLED_INTERRUPT;
4348 }
4349 
4350 
4351 struct PageFaultContext {
4352 	AddressSpaceReadLocker	addressSpaceLocker;
4353 	VMCacheChainLocker		cacheChainLocker;
4354 
4355 	VMTranslationMap*		map;
4356 	VMCache*				topCache;
4357 	off_t					cacheOffset;
4358 	vm_page_reservation		reservation;
4359 	bool					isWrite;
4360 
4361 	// return values
4362 	vm_page*				page;
4363 	bool					restart;
4364 	bool					pageAllocated;
4365 
4366 
4367 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4368 		:
4369 		addressSpaceLocker(addressSpace, true),
4370 		map(addressSpace->TranslationMap()),
4371 		isWrite(isWrite)
4372 	{
4373 	}
4374 
4375 	~PageFaultContext()
4376 	{
4377 		UnlockAll();
4378 		vm_page_unreserve_pages(&reservation);
4379 	}
4380 
4381 	void Prepare(VMCache* topCache, off_t cacheOffset)
4382 	{
4383 		this->topCache = topCache;
4384 		this->cacheOffset = cacheOffset;
4385 		page = NULL;
4386 		restart = false;
4387 		pageAllocated = false;
4388 
4389 		cacheChainLocker.SetTo(topCache);
4390 	}
4391 
4392 	void UnlockAll(VMCache* exceptCache = NULL)
4393 	{
4394 		topCache = NULL;
4395 		addressSpaceLocker.Unlock();
4396 		cacheChainLocker.Unlock(exceptCache);
4397 	}
4398 };
4399 
4400 
4401 /*!	Gets the page that should be mapped into the area.
4402 	Returns an error code other than \c B_OK, if the page couldn't be found or
4403 	paged in. The locking state of the address space and the caches is undefined
4404 	in that case.
4405 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4406 	had to unlock the address space and all caches and is supposed to be called
4407 	again.
4408 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4409 	found. It is returned in \c context.page. The address space will still be
4410 	locked as well as all caches starting from the top cache to at least the
4411 	cache the page lives in.
4412 */
4413 static status_t
4414 fault_get_page(PageFaultContext& context)
4415 {
4416 	VMCache* cache = context.topCache;
4417 	VMCache* lastCache = NULL;
4418 	vm_page* page = NULL;
4419 
4420 	while (cache != NULL) {
4421 		// We already hold the lock of the cache at this point.
4422 
4423 		lastCache = cache;
4424 
4425 		page = cache->LookupPage(context.cacheOffset);
4426 		if (page != NULL && page->busy) {
4427 			// page must be busy -- wait for it to become unbusy
4428 			context.UnlockAll(cache);
4429 			cache->ReleaseRefLocked();
4430 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4431 
4432 			// restart the whole process
4433 			context.restart = true;
4434 			return B_OK;
4435 		}
4436 
4437 		if (page != NULL)
4438 			break;
4439 
4440 		// The current cache does not contain the page we're looking for.
4441 
4442 		// see if the backing store has it
4443 		if (cache->HasPage(context.cacheOffset)) {
4444 			// insert a fresh page and mark it busy -- we're going to read it in
4445 			page = vm_page_allocate_page(&context.reservation,
4446 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4447 			cache->InsertPage(page, context.cacheOffset);
4448 
4449 			// We need to unlock all caches and the address space while reading
4450 			// the page in. Keep a reference to the cache around.
4451 			cache->AcquireRefLocked();
4452 			context.UnlockAll();
4453 
4454 			// read the page in
4455 			generic_io_vec vec;
4456 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4457 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4458 
4459 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4460 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4461 
4462 			cache->Lock();
4463 
4464 			if (status < B_OK) {
4465 				// on error remove and free the page
4466 				dprintf("reading page from cache %p returned: %s!\n",
4467 					cache, strerror(status));
4468 
4469 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4470 				cache->RemovePage(page);
4471 				vm_page_set_state(page, PAGE_STATE_FREE);
4472 
4473 				cache->ReleaseRefAndUnlock();
4474 				return status;
4475 			}
4476 
4477 			// mark the page unbusy again
4478 			cache->MarkPageUnbusy(page);
4479 
4480 			DEBUG_PAGE_ACCESS_END(page);
4481 
4482 			// Since we needed to unlock everything temporarily, the area
4483 			// situation might have changed. So we need to restart the whole
4484 			// process.
4485 			cache->ReleaseRefAndUnlock();
4486 			context.restart = true;
4487 			return B_OK;
4488 		}
4489 
4490 		cache = context.cacheChainLocker.LockSourceCache();
4491 	}
4492 
4493 	if (page == NULL) {
4494 		// There was no adequate page, determine the cache for a clean one.
4495 		// Read-only pages come in the deepest cache, only the top most cache
4496 		// may have direct write access.
4497 		cache = context.isWrite ? context.topCache : lastCache;
4498 
4499 		// allocate a clean page
4500 		page = vm_page_allocate_page(&context.reservation,
4501 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4502 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4503 			page->physical_page_number));
4504 
4505 		// insert the new page into our cache
4506 		cache->InsertPage(page, context.cacheOffset);
4507 		context.pageAllocated = true;
4508 	} else if (page->Cache() != context.topCache && context.isWrite) {
4509 		// We have a page that has the data we want, but in the wrong cache
4510 		// object so we need to copy it and stick it into the top cache.
4511 		vm_page* sourcePage = page;
4512 
4513 		// TODO: If memory is low, it might be a good idea to steal the page
4514 		// from our source cache -- if possible, that is.
4515 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4516 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4517 
4518 		// To not needlessly kill concurrency we unlock all caches but the top
4519 		// one while copying the page. Lacking another mechanism to ensure that
4520 		// the source page doesn't disappear, we mark it busy.
4521 		sourcePage->busy = true;
4522 		context.cacheChainLocker.UnlockKeepRefs(true);
4523 
4524 		// copy the page
4525 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4526 			sourcePage->physical_page_number * B_PAGE_SIZE);
4527 
4528 		context.cacheChainLocker.RelockCaches(true);
4529 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4530 
4531 		// insert the new page into our cache
4532 		context.topCache->InsertPage(page, context.cacheOffset);
4533 		context.pageAllocated = true;
4534 	} else
4535 		DEBUG_PAGE_ACCESS_START(page);
4536 
4537 	context.page = page;
4538 	return B_OK;
4539 }
4540 
4541 
4542 /*!	Makes sure the address in the given address space is mapped.
4543 
4544 	\param addressSpace The address space.
4545 	\param originalAddress The address. Doesn't need to be page aligned.
4546 	\param isWrite If \c true the address shall be write-accessible.
4547 	\param isUser If \c true the access is requested by a userland team.
4548 	\param wirePage On success, if non \c NULL, the wired count of the page
4549 		mapped at the given address is incremented and the page is returned
4550 		via this parameter.
4551 	\return \c B_OK on success, another error code otherwise.
4552 */
4553 static status_t
4554 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4555 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4556 {
4557 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4558 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4559 		originalAddress, isWrite, isUser));
4560 
4561 	PageFaultContext context(addressSpace, isWrite);
4562 
4563 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4564 	status_t status = B_OK;
4565 
4566 	addressSpace->IncrementFaultCount();
4567 
4568 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4569 	// the pages upfront makes sure we don't have any cache locked, so that the
4570 	// page daemon/thief can do their job without problems.
4571 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4572 		originalAddress);
4573 	context.addressSpaceLocker.Unlock();
4574 	vm_page_reserve_pages(&context.reservation, reservePages,
4575 		addressSpace == VMAddressSpace::Kernel()
4576 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4577 
4578 	while (true) {
4579 		context.addressSpaceLocker.Lock();
4580 
4581 		// get the area the fault was in
4582 		VMArea* area = addressSpace->LookupArea(address);
4583 		if (area == NULL) {
4584 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4585 				"space\n", originalAddress);
4586 			TPF(PageFaultError(-1,
4587 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4588 			status = B_BAD_ADDRESS;
4589 			break;
4590 		}
4591 
4592 		// check permissions
4593 		uint32 protection = get_area_page_protection(area, address);
4594 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4595 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4596 				area->id, (void*)originalAddress);
4597 			TPF(PageFaultError(area->id,
4598 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4599 			status = B_PERMISSION_DENIED;
4600 			break;
4601 		}
4602 		if (isWrite && (protection
4603 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4604 			dprintf("write access attempted on write-protected area 0x%"
4605 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4606 			TPF(PageFaultError(area->id,
4607 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4608 			status = B_PERMISSION_DENIED;
4609 			break;
4610 		} else if (isExecute && (protection
4611 				& (B_EXECUTE_AREA
4612 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4613 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4614 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4615 			TPF(PageFaultError(area->id,
4616 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4617 			status = B_PERMISSION_DENIED;
4618 			break;
4619 		} else if (!isWrite && !isExecute && (protection
4620 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4621 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4622 				" at %p\n", area->id, (void*)originalAddress);
4623 			TPF(PageFaultError(area->id,
4624 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4625 			status = B_PERMISSION_DENIED;
4626 			break;
4627 		}
4628 
4629 		// We have the area, it was a valid access, so let's try to resolve the
4630 		// page fault now.
4631 		// At first, the top most cache from the area is investigated.
4632 
4633 		context.Prepare(vm_area_get_locked_cache(area),
4634 			address - area->Base() + area->cache_offset);
4635 
4636 		// See if this cache has a fault handler -- this will do all the work
4637 		// for us.
4638 		{
4639 			// Note, since the page fault is resolved with interrupts enabled,
4640 			// the fault handler could be called more than once for the same
4641 			// reason -- the store must take this into account.
4642 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4643 			if (status != B_BAD_HANDLER)
4644 				break;
4645 		}
4646 
4647 		// The top most cache has no fault handler, so let's see if the cache or
4648 		// its sources already have the page we're searching for (we're going
4649 		// from top to bottom).
4650 		status = fault_get_page(context);
4651 		if (status != B_OK) {
4652 			TPF(PageFaultError(area->id, status));
4653 			break;
4654 		}
4655 
4656 		if (context.restart)
4657 			continue;
4658 
4659 		// All went fine, all there is left to do is to map the page into the
4660 		// address space.
4661 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4662 			context.page));
4663 
4664 		// If the page doesn't reside in the area's cache, we need to make sure
4665 		// it's mapped in read-only, so that we cannot overwrite someone else's
4666 		// data (copy-on-write)
4667 		uint32 newProtection = protection;
4668 		if (context.page->Cache() != context.topCache && !isWrite)
4669 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4670 
4671 		bool unmapPage = false;
4672 		bool mapPage = true;
4673 
4674 		// check whether there's already a page mapped at the address
4675 		context.map->Lock();
4676 
4677 		phys_addr_t physicalAddress;
4678 		uint32 flags;
4679 		vm_page* mappedPage = NULL;
4680 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4681 			&& (flags & PAGE_PRESENT) != 0
4682 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4683 				!= NULL) {
4684 			// Yep there's already a page. If it's ours, we can simply adjust
4685 			// its protection. Otherwise we have to unmap it.
4686 			if (mappedPage == context.page) {
4687 				context.map->ProtectPage(area, address, newProtection);
4688 					// Note: We assume that ProtectPage() is atomic (i.e.
4689 					// the page isn't temporarily unmapped), otherwise we'd have
4690 					// to make sure it isn't wired.
4691 				mapPage = false;
4692 			} else
4693 				unmapPage = true;
4694 		}
4695 
4696 		context.map->Unlock();
4697 
4698 		if (unmapPage) {
4699 			// If the page is wired, we can't unmap it. Wait until it is unwired
4700 			// again and restart. Note that the page cannot be wired for
4701 			// writing, since it it isn't in the topmost cache. So we can safely
4702 			// ignore ranges wired for writing (our own and other concurrent
4703 			// wiring attempts in progress) and in fact have to do that to avoid
4704 			// a deadlock.
4705 			VMAreaUnwiredWaiter waiter;
4706 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4707 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4708 				// unlock everything and wait
4709 				if (context.pageAllocated) {
4710 					// ... but since we allocated a page and inserted it into
4711 					// the top cache, remove and free it first. Otherwise we'd
4712 					// have a page from a lower cache mapped while an upper
4713 					// cache has a page that would shadow it.
4714 					context.topCache->RemovePage(context.page);
4715 					vm_page_free_etc(context.topCache, context.page,
4716 						&context.reservation);
4717 				} else
4718 					DEBUG_PAGE_ACCESS_END(context.page);
4719 
4720 				context.UnlockAll();
4721 				waiter.waitEntry.Wait();
4722 				continue;
4723 			}
4724 
4725 			// Note: The mapped page is a page of a lower cache. We are
4726 			// guaranteed to have that cached locked, our new page is a copy of
4727 			// that page, and the page is not busy. The logic for that guarantee
4728 			// is as follows: Since the page is mapped, it must live in the top
4729 			// cache (ruled out above) or any of its lower caches, and there is
4730 			// (was before the new page was inserted) no other page in any
4731 			// cache between the top cache and the page's cache (otherwise that
4732 			// would be mapped instead). That in turn means that our algorithm
4733 			// must have found it and therefore it cannot be busy either.
4734 			DEBUG_PAGE_ACCESS_START(mappedPage);
4735 			unmap_page(area, address);
4736 			DEBUG_PAGE_ACCESS_END(mappedPage);
4737 		}
4738 
4739 		if (mapPage) {
4740 			if (map_page(area, context.page, address, newProtection,
4741 					&context.reservation) != B_OK) {
4742 				// Mapping can only fail, when the page mapping object couldn't
4743 				// be allocated. Save for the missing mapping everything is
4744 				// fine, though. If this was a regular page fault, we'll simply
4745 				// leave and probably fault again. To make sure we'll have more
4746 				// luck then, we ensure that the minimum object reserve is
4747 				// available.
4748 				DEBUG_PAGE_ACCESS_END(context.page);
4749 
4750 				context.UnlockAll();
4751 
4752 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4753 						!= B_OK) {
4754 					// Apparently the situation is serious. Let's get ourselves
4755 					// killed.
4756 					status = B_NO_MEMORY;
4757 				} else if (wirePage != NULL) {
4758 					// The caller expects us to wire the page. Since
4759 					// object_cache_reserve() succeeded, we should now be able
4760 					// to allocate a mapping structure. Restart.
4761 					continue;
4762 				}
4763 
4764 				break;
4765 			}
4766 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4767 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4768 
4769 		// also wire the page, if requested
4770 		if (wirePage != NULL && status == B_OK) {
4771 			increment_page_wired_count(context.page);
4772 			*wirePage = context.page;
4773 		}
4774 
4775 		DEBUG_PAGE_ACCESS_END(context.page);
4776 
4777 		break;
4778 	}
4779 
4780 	return status;
4781 }
4782 
4783 
4784 status_t
4785 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4786 {
4787 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4788 }
4789 
4790 status_t
4791 vm_put_physical_page(addr_t vaddr, void* handle)
4792 {
4793 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4794 }
4795 
4796 
4797 status_t
4798 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4799 	void** _handle)
4800 {
4801 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4802 }
4803 
4804 status_t
4805 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4806 {
4807 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4808 }
4809 
4810 
4811 status_t
4812 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4813 {
4814 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4815 }
4816 
4817 status_t
4818 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4819 {
4820 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4821 }
4822 
4823 
4824 void
4825 vm_get_info(system_info* info)
4826 {
4827 	swap_get_info(info);
4828 
4829 	MutexLocker locker(sAvailableMemoryLock);
4830 	info->needed_memory = sNeededMemory;
4831 	info->free_memory = sAvailableMemory;
4832 }
4833 
4834 
4835 uint32
4836 vm_num_page_faults(void)
4837 {
4838 	return sPageFaults;
4839 }
4840 
4841 
4842 off_t
4843 vm_available_memory(void)
4844 {
4845 	MutexLocker locker(sAvailableMemoryLock);
4846 	return sAvailableMemory;
4847 }
4848 
4849 
4850 off_t
4851 vm_available_not_needed_memory(void)
4852 {
4853 	MutexLocker locker(sAvailableMemoryLock);
4854 	return sAvailableMemory - sNeededMemory;
4855 }
4856 
4857 
4858 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4859 	debugger.
4860 */
4861 off_t
4862 vm_available_not_needed_memory_debug(void)
4863 {
4864 	return sAvailableMemory - sNeededMemory;
4865 }
4866 
4867 
4868 size_t
4869 vm_kernel_address_space_left(void)
4870 {
4871 	return VMAddressSpace::Kernel()->FreeSpace();
4872 }
4873 
4874 
4875 void
4876 vm_unreserve_memory(size_t amount)
4877 {
4878 	mutex_lock(&sAvailableMemoryLock);
4879 
4880 	sAvailableMemory += amount;
4881 
4882 	mutex_unlock(&sAvailableMemoryLock);
4883 }
4884 
4885 
4886 status_t
4887 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4888 {
4889 	size_t reserve = kMemoryReserveForPriority[priority];
4890 
4891 	MutexLocker locker(sAvailableMemoryLock);
4892 
4893 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4894 
4895 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4896 		sAvailableMemory -= amount;
4897 		return B_OK;
4898 	}
4899 
4900 	if (timeout <= 0)
4901 		return B_NO_MEMORY;
4902 
4903 	// turn timeout into an absolute timeout
4904 	timeout += system_time();
4905 
4906 	// loop until we've got the memory or the timeout occurs
4907 	do {
4908 		sNeededMemory += amount;
4909 
4910 		// call the low resource manager
4911 		locker.Unlock();
4912 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4913 			B_ABSOLUTE_TIMEOUT, timeout);
4914 		locker.Lock();
4915 
4916 		sNeededMemory -= amount;
4917 
4918 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4919 			sAvailableMemory -= amount;
4920 			return B_OK;
4921 		}
4922 	} while (timeout > system_time());
4923 
4924 	return B_NO_MEMORY;
4925 }
4926 
4927 
4928 status_t
4929 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4930 {
4931 	// NOTE: The caller is responsible for synchronizing calls to this function!
4932 
4933 	AddressSpaceReadLocker locker;
4934 	VMArea* area;
4935 	status_t status = locker.SetFromArea(id, area);
4936 	if (status != B_OK)
4937 		return status;
4938 
4939 	// nothing to do, if the type doesn't change
4940 	uint32 oldType = area->MemoryType();
4941 	if (type == oldType)
4942 		return B_OK;
4943 
4944 	// set the memory type of the area and the mapped pages
4945 	VMTranslationMap* map = area->address_space->TranslationMap();
4946 	map->Lock();
4947 	area->SetMemoryType(type);
4948 	map->ProtectArea(area, area->protection);
4949 	map->Unlock();
4950 
4951 	// set the physical memory type
4952 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4953 	if (error != B_OK) {
4954 		// reset the memory type of the area and the mapped pages
4955 		map->Lock();
4956 		area->SetMemoryType(oldType);
4957 		map->ProtectArea(area, area->protection);
4958 		map->Unlock();
4959 		return error;
4960 	}
4961 
4962 	return B_OK;
4963 
4964 }
4965 
4966 
4967 /*!	This function enforces some protection properties:
4968 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4969 	 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well
4970 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4971 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4972 	   and B_KERNEL_WRITE_AREA.
4973 */
4974 static void
4975 fix_protection(uint32* protection)
4976 {
4977 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4978 		if ((*protection & B_USER_PROTECTION) == 0
4979 			|| (*protection & B_WRITE_AREA) != 0)
4980 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4981 		else
4982 			*protection |= B_KERNEL_READ_AREA;
4983 		if ((*protection & B_EXECUTE_AREA) != 0)
4984 			*protection |= B_KERNEL_EXECUTE_AREA;
4985 	}
4986 }
4987 
4988 
4989 static void
4990 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4991 {
4992 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4993 	info->area = area->id;
4994 	info->address = (void*)area->Base();
4995 	info->size = area->Size();
4996 	info->protection = area->protection;
4997 	info->lock = B_FULL_LOCK;
4998 	info->team = area->address_space->ID();
4999 	info->copy_count = 0;
5000 	info->in_count = 0;
5001 	info->out_count = 0;
5002 		// TODO: retrieve real values here!
5003 
5004 	VMCache* cache = vm_area_get_locked_cache(area);
5005 
5006 	// Note, this is a simplification; the cache could be larger than this area
5007 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5008 
5009 	vm_area_put_locked_cache(cache);
5010 }
5011 
5012 
5013 static status_t
5014 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5015 {
5016 	// is newSize a multiple of B_PAGE_SIZE?
5017 	if (newSize & (B_PAGE_SIZE - 1))
5018 		return B_BAD_VALUE;
5019 
5020 	// lock all affected address spaces and the cache
5021 	VMArea* area;
5022 	VMCache* cache;
5023 
5024 	MultiAddressSpaceLocker locker;
5025 	AreaCacheLocker cacheLocker;
5026 
5027 	status_t status;
5028 	size_t oldSize;
5029 	bool anyKernelArea;
5030 	bool restart;
5031 
5032 	do {
5033 		anyKernelArea = false;
5034 		restart = false;
5035 
5036 		locker.Unset();
5037 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5038 		if (status != B_OK)
5039 			return status;
5040 		cacheLocker.SetTo(cache, true);	// already locked
5041 
5042 		// enforce restrictions
5043 		if (!kernel) {
5044 			if ((area->protection & B_KERNEL_AREA) != 0)
5045 				return B_NOT_ALLOWED;
5046 			// TODO: Enforce all restrictions (team, etc.)!
5047 		}
5048 
5049 		oldSize = area->Size();
5050 		if (newSize == oldSize)
5051 			return B_OK;
5052 
5053 		if (cache->type != CACHE_TYPE_RAM)
5054 			return B_NOT_ALLOWED;
5055 
5056 		if (oldSize < newSize) {
5057 			// We need to check if all areas of this cache can be resized.
5058 			for (VMArea* current = cache->areas; current != NULL;
5059 					current = current->cache_next) {
5060 				if (!current->address_space->CanResizeArea(current, newSize))
5061 					return B_ERROR;
5062 				anyKernelArea
5063 					|= current->address_space == VMAddressSpace::Kernel();
5064 			}
5065 		} else {
5066 			// We're shrinking the areas, so we must make sure the affected
5067 			// ranges are not wired.
5068 			for (VMArea* current = cache->areas; current != NULL;
5069 					current = current->cache_next) {
5070 				anyKernelArea
5071 					|= current->address_space == VMAddressSpace::Kernel();
5072 
5073 				if (wait_if_area_range_is_wired(current,
5074 						current->Base() + newSize, oldSize - newSize, &locker,
5075 						&cacheLocker)) {
5076 					restart = true;
5077 					break;
5078 				}
5079 			}
5080 		}
5081 	} while (restart);
5082 
5083 	// Okay, looks good so far, so let's do it
5084 
5085 	int priority = kernel && anyKernelArea
5086 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5087 	uint32 allocationFlags = kernel && anyKernelArea
5088 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5089 
5090 	if (oldSize < newSize) {
5091 		// Growing the cache can fail, so we do it first.
5092 		status = cache->Resize(cache->virtual_base + newSize, priority);
5093 		if (status != B_OK)
5094 			return status;
5095 	}
5096 
5097 	for (VMArea* current = cache->areas; current != NULL;
5098 			current = current->cache_next) {
5099 		status = current->address_space->ResizeArea(current, newSize,
5100 			allocationFlags);
5101 		if (status != B_OK)
5102 			break;
5103 
5104 		// We also need to unmap all pages beyond the new size, if the area has
5105 		// shrunk
5106 		if (newSize < oldSize) {
5107 			VMCacheChainLocker cacheChainLocker(cache);
5108 			cacheChainLocker.LockAllSourceCaches();
5109 
5110 			unmap_pages(current, current->Base() + newSize,
5111 				oldSize - newSize);
5112 
5113 			cacheChainLocker.Unlock(cache);
5114 		}
5115 	}
5116 
5117 	if (status == B_OK) {
5118 		// Shrink or grow individual page protections if in use.
5119 		if (area->page_protections != NULL) {
5120 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5121 			uint8* newProtections
5122 				= (uint8*)realloc(area->page_protections, bytes);
5123 			if (newProtections == NULL)
5124 				status = B_NO_MEMORY;
5125 			else {
5126 				area->page_protections = newProtections;
5127 
5128 				if (oldSize < newSize) {
5129 					// init the additional page protections to that of the area
5130 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5131 					uint32 areaProtection = area->protection
5132 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5133 					memset(area->page_protections + offset,
5134 						areaProtection | (areaProtection << 4), bytes - offset);
5135 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5136 						uint8& entry = area->page_protections[offset - 1];
5137 						entry = (entry & 0x0f) | (areaProtection << 4);
5138 					}
5139 				}
5140 			}
5141 		}
5142 	}
5143 
5144 	// shrinking the cache can't fail, so we do it now
5145 	if (status == B_OK && newSize < oldSize)
5146 		status = cache->Resize(cache->virtual_base + newSize, priority);
5147 
5148 	if (status != B_OK) {
5149 		// Something failed -- resize the areas back to their original size.
5150 		// This can fail, too, in which case we're seriously screwed.
5151 		for (VMArea* current = cache->areas; current != NULL;
5152 				current = current->cache_next) {
5153 			if (current->address_space->ResizeArea(current, oldSize,
5154 					allocationFlags) != B_OK) {
5155 				panic("vm_resize_area(): Failed and not being able to restore "
5156 					"original state.");
5157 			}
5158 		}
5159 
5160 		cache->Resize(cache->virtual_base + oldSize, priority);
5161 	}
5162 
5163 	// TODO: we must honour the lock restrictions of this area
5164 	return status;
5165 }
5166 
5167 
5168 status_t
5169 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5170 {
5171 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5172 }
5173 
5174 
5175 status_t
5176 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5177 {
5178 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5179 }
5180 
5181 
5182 status_t
5183 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5184 	bool user)
5185 {
5186 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5187 }
5188 
5189 
5190 void
5191 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5192 {
5193 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5194 }
5195 
5196 
5197 /*!	Copies a range of memory directly from/to a page that might not be mapped
5198 	at the moment.
5199 
5200 	For \a unsafeMemory the current mapping (if any is ignored). The function
5201 	walks through the respective area's cache chain to find the physical page
5202 	and copies from/to it directly.
5203 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5204 	must not cross a page boundary.
5205 
5206 	\param teamID The team ID identifying the address space \a unsafeMemory is
5207 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5208 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5209 		is passed, the address space of the thread returned by
5210 		debug_get_debugged_thread() is used.
5211 	\param unsafeMemory The start of the unsafe memory range to be copied
5212 		from/to.
5213 	\param buffer A safely accessible kernel buffer to be copied from/to.
5214 	\param size The number of bytes to be copied.
5215 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5216 		\a unsafeMemory, the other way around otherwise.
5217 */
5218 status_t
5219 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5220 	size_t size, bool copyToUnsafe)
5221 {
5222 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5223 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5224 		return B_BAD_VALUE;
5225 	}
5226 
5227 	// get the address space for the debugged thread
5228 	VMAddressSpace* addressSpace;
5229 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5230 		addressSpace = VMAddressSpace::Kernel();
5231 	} else if (teamID == B_CURRENT_TEAM) {
5232 		Thread* thread = debug_get_debugged_thread();
5233 		if (thread == NULL || thread->team == NULL)
5234 			return B_BAD_ADDRESS;
5235 
5236 		addressSpace = thread->team->address_space;
5237 	} else
5238 		addressSpace = VMAddressSpace::DebugGet(teamID);
5239 
5240 	if (addressSpace == NULL)
5241 		return B_BAD_ADDRESS;
5242 
5243 	// get the area
5244 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5245 	if (area == NULL)
5246 		return B_BAD_ADDRESS;
5247 
5248 	// search the page
5249 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5250 		+ area->cache_offset;
5251 	VMCache* cache = area->cache;
5252 	vm_page* page = NULL;
5253 	while (cache != NULL) {
5254 		page = cache->DebugLookupPage(cacheOffset);
5255 		if (page != NULL)
5256 			break;
5257 
5258 		// Page not found in this cache -- if it is paged out, we must not try
5259 		// to get it from lower caches.
5260 		if (cache->DebugHasPage(cacheOffset))
5261 			break;
5262 
5263 		cache = cache->source;
5264 	}
5265 
5266 	if (page == NULL)
5267 		return B_UNSUPPORTED;
5268 
5269 	// copy from/to physical memory
5270 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5271 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5272 
5273 	if (copyToUnsafe) {
5274 		if (page->Cache() != area->cache)
5275 			return B_UNSUPPORTED;
5276 
5277 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5278 	}
5279 
5280 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5281 }
5282 
5283 
5284 //	#pragma mark - kernel public API
5285 
5286 
5287 status_t
5288 user_memcpy(void* to, const void* from, size_t size)
5289 {
5290 	// don't allow address overflows
5291 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5292 		return B_BAD_ADDRESS;
5293 
5294 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5295 		return B_BAD_ADDRESS;
5296 
5297 	return B_OK;
5298 }
5299 
5300 
5301 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5302 	the string in \a to, NULL-terminating the result.
5303 
5304 	\param to Pointer to the destination C-string.
5305 	\param from Pointer to the source C-string.
5306 	\param size Size in bytes of the string buffer pointed to by \a to.
5307 
5308 	\return strlen(\a from).
5309 */
5310 ssize_t
5311 user_strlcpy(char* to, const char* from, size_t size)
5312 {
5313 	if (to == NULL && size != 0)
5314 		return B_BAD_VALUE;
5315 	if (from == NULL)
5316 		return B_BAD_ADDRESS;
5317 
5318 	// limit size to avoid address overflows
5319 	size_t maxSize = std::min(size,
5320 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5321 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5322 		// the source address might still overflow.
5323 
5324 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5325 
5326 	// If we hit the address overflow boundary, fail.
5327 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5328 			&& maxSize < size)) {
5329 		return B_BAD_ADDRESS;
5330 	}
5331 
5332 	return result;
5333 }
5334 
5335 
5336 status_t
5337 user_memset(void* s, char c, size_t count)
5338 {
5339 	// don't allow address overflows
5340 	if ((addr_t)s + count < (addr_t)s)
5341 		return B_BAD_ADDRESS;
5342 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5343 		return B_BAD_ADDRESS;
5344 
5345 	return B_OK;
5346 }
5347 
5348 
5349 /*!	Wires a single page at the given address.
5350 
5351 	\param team The team whose address space the address belongs to. Supports
5352 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5353 		parameter is ignored.
5354 	\param address address The virtual address to wire down. Does not need to
5355 		be page aligned.
5356 	\param writable If \c true the page shall be writable.
5357 	\param info On success the info is filled in, among other things
5358 		containing the physical address the given virtual one translates to.
5359 	\return \c B_OK, when the page could be wired, another error code otherwise.
5360 */
5361 status_t
5362 vm_wire_page(team_id team, addr_t address, bool writable,
5363 	VMPageWiringInfo* info)
5364 {
5365 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5366 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5367 
5368 	// compute the page protection that is required
5369 	bool isUser = IS_USER_ADDRESS(address);
5370 	uint32 requiredProtection = PAGE_PRESENT
5371 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5372 	if (writable)
5373 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5374 
5375 	// get and read lock the address space
5376 	VMAddressSpace* addressSpace = NULL;
5377 	if (isUser) {
5378 		if (team == B_CURRENT_TEAM)
5379 			addressSpace = VMAddressSpace::GetCurrent();
5380 		else
5381 			addressSpace = VMAddressSpace::Get(team);
5382 	} else
5383 		addressSpace = VMAddressSpace::GetKernel();
5384 	if (addressSpace == NULL)
5385 		return B_ERROR;
5386 
5387 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5388 
5389 	VMTranslationMap* map = addressSpace->TranslationMap();
5390 	status_t error = B_OK;
5391 
5392 	// get the area
5393 	VMArea* area = addressSpace->LookupArea(pageAddress);
5394 	if (area == NULL) {
5395 		addressSpace->Put();
5396 		return B_BAD_ADDRESS;
5397 	}
5398 
5399 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5400 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5401 
5402 	// mark the area range wired
5403 	area->Wire(&info->range);
5404 
5405 	// Lock the area's cache chain and the translation map. Needed to look
5406 	// up the page and play with its wired count.
5407 	cacheChainLocker.LockAllSourceCaches();
5408 	map->Lock();
5409 
5410 	phys_addr_t physicalAddress;
5411 	uint32 flags;
5412 	vm_page* page;
5413 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5414 		&& (flags & requiredProtection) == requiredProtection
5415 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5416 			!= NULL) {
5417 		// Already mapped with the correct permissions -- just increment
5418 		// the page's wired count.
5419 		increment_page_wired_count(page);
5420 
5421 		map->Unlock();
5422 		cacheChainLocker.Unlock();
5423 		addressSpaceLocker.Unlock();
5424 	} else {
5425 		// Let vm_soft_fault() map the page for us, if possible. We need
5426 		// to fully unlock to avoid deadlocks. Since we have already
5427 		// wired the area itself, nothing disturbing will happen with it
5428 		// in the meantime.
5429 		map->Unlock();
5430 		cacheChainLocker.Unlock();
5431 		addressSpaceLocker.Unlock();
5432 
5433 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5434 			isUser, &page);
5435 
5436 		if (error != B_OK) {
5437 			// The page could not be mapped -- clean up.
5438 			VMCache* cache = vm_area_get_locked_cache(area);
5439 			area->Unwire(&info->range);
5440 			cache->ReleaseRefAndUnlock();
5441 			addressSpace->Put();
5442 			return error;
5443 		}
5444 	}
5445 
5446 	info->physicalAddress
5447 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5448 			+ address % B_PAGE_SIZE;
5449 	info->page = page;
5450 
5451 	return B_OK;
5452 }
5453 
5454 
5455 /*!	Unwires a single page previously wired via vm_wire_page().
5456 
5457 	\param info The same object passed to vm_wire_page() before.
5458 */
5459 void
5460 vm_unwire_page(VMPageWiringInfo* info)
5461 {
5462 	// lock the address space
5463 	VMArea* area = info->range.area;
5464 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5465 		// takes over our reference
5466 
5467 	// lock the top cache
5468 	VMCache* cache = vm_area_get_locked_cache(area);
5469 	VMCacheChainLocker cacheChainLocker(cache);
5470 
5471 	if (info->page->Cache() != cache) {
5472 		// The page is not in the top cache, so we lock the whole cache chain
5473 		// before touching the page's wired count.
5474 		cacheChainLocker.LockAllSourceCaches();
5475 	}
5476 
5477 	decrement_page_wired_count(info->page);
5478 
5479 	// remove the wired range from the range
5480 	area->Unwire(&info->range);
5481 
5482 	cacheChainLocker.Unlock();
5483 }
5484 
5485 
5486 /*!	Wires down the given address range in the specified team's address space.
5487 
5488 	If successful the function
5489 	- acquires a reference to the specified team's address space,
5490 	- adds respective wired ranges to all areas that intersect with the given
5491 	  address range,
5492 	- makes sure all pages in the given address range are mapped with the
5493 	  requested access permissions and increments their wired count.
5494 
5495 	It fails, when \a team doesn't specify a valid address space, when any part
5496 	of the specified address range is not covered by areas, when the concerned
5497 	areas don't allow mapping with the requested permissions, or when mapping
5498 	failed for another reason.
5499 
5500 	When successful the call must be balanced by a unlock_memory_etc() call with
5501 	the exact same parameters.
5502 
5503 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5504 		supported.
5505 	\param address The start of the address range to be wired.
5506 	\param numBytes The size of the address range to be wired.
5507 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5508 		requests that the range must be wired writable ("read from device
5509 		into memory").
5510 	\return \c B_OK on success, another error code otherwise.
5511 */
5512 status_t
5513 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5514 {
5515 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5516 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5517 
5518 	// compute the page protection that is required
5519 	bool isUser = IS_USER_ADDRESS(address);
5520 	bool writable = (flags & B_READ_DEVICE) == 0;
5521 	uint32 requiredProtection = PAGE_PRESENT
5522 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5523 	if (writable)
5524 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5525 
5526 	uint32 mallocFlags = isUser
5527 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5528 
5529 	// get and read lock the address space
5530 	VMAddressSpace* addressSpace = NULL;
5531 	if (isUser) {
5532 		if (team == B_CURRENT_TEAM)
5533 			addressSpace = VMAddressSpace::GetCurrent();
5534 		else
5535 			addressSpace = VMAddressSpace::Get(team);
5536 	} else
5537 		addressSpace = VMAddressSpace::GetKernel();
5538 	if (addressSpace == NULL)
5539 		return B_ERROR;
5540 
5541 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5542 		// We get a new address space reference here. The one we got above will
5543 		// be freed by unlock_memory_etc().
5544 
5545 	VMTranslationMap* map = addressSpace->TranslationMap();
5546 	status_t error = B_OK;
5547 
5548 	// iterate through all concerned areas
5549 	addr_t nextAddress = lockBaseAddress;
5550 	while (nextAddress != lockEndAddress) {
5551 		// get the next area
5552 		VMArea* area = addressSpace->LookupArea(nextAddress);
5553 		if (area == NULL) {
5554 			error = B_BAD_ADDRESS;
5555 			break;
5556 		}
5557 
5558 		addr_t areaStart = nextAddress;
5559 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5560 
5561 		// allocate the wired range (do that before locking the cache to avoid
5562 		// deadlocks)
5563 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5564 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5565 		if (range == NULL) {
5566 			error = B_NO_MEMORY;
5567 			break;
5568 		}
5569 
5570 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5571 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5572 
5573 		// mark the area range wired
5574 		area->Wire(range);
5575 
5576 		// Depending on the area cache type and the wiring, we may not need to
5577 		// look at the individual pages.
5578 		if (area->cache_type == CACHE_TYPE_NULL
5579 			|| area->cache_type == CACHE_TYPE_DEVICE
5580 			|| area->wiring == B_FULL_LOCK
5581 			|| area->wiring == B_CONTIGUOUS) {
5582 			nextAddress = areaEnd;
5583 			continue;
5584 		}
5585 
5586 		// Lock the area's cache chain and the translation map. Needed to look
5587 		// up pages and play with their wired count.
5588 		cacheChainLocker.LockAllSourceCaches();
5589 		map->Lock();
5590 
5591 		// iterate through the pages and wire them
5592 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5593 			phys_addr_t physicalAddress;
5594 			uint32 flags;
5595 
5596 			vm_page* page;
5597 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5598 				&& (flags & requiredProtection) == requiredProtection
5599 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5600 					!= NULL) {
5601 				// Already mapped with the correct permissions -- just increment
5602 				// the page's wired count.
5603 				increment_page_wired_count(page);
5604 			} else {
5605 				// Let vm_soft_fault() map the page for us, if possible. We need
5606 				// to fully unlock to avoid deadlocks. Since we have already
5607 				// wired the area itself, nothing disturbing will happen with it
5608 				// in the meantime.
5609 				map->Unlock();
5610 				cacheChainLocker.Unlock();
5611 				addressSpaceLocker.Unlock();
5612 
5613 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5614 					false, isUser, &page);
5615 
5616 				addressSpaceLocker.Lock();
5617 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5618 				cacheChainLocker.LockAllSourceCaches();
5619 				map->Lock();
5620 			}
5621 
5622 			if (error != B_OK)
5623 				break;
5624 		}
5625 
5626 		map->Unlock();
5627 
5628 		if (error == B_OK) {
5629 			cacheChainLocker.Unlock();
5630 		} else {
5631 			// An error occurred, so abort right here. If the current address
5632 			// is the first in this area, unwire the area, since we won't get
5633 			// to it when reverting what we've done so far.
5634 			if (nextAddress == areaStart) {
5635 				area->Unwire(range);
5636 				cacheChainLocker.Unlock();
5637 				range->~VMAreaWiredRange();
5638 				free_etc(range, mallocFlags);
5639 			} else
5640 				cacheChainLocker.Unlock();
5641 
5642 			break;
5643 		}
5644 	}
5645 
5646 	if (error != B_OK) {
5647 		// An error occurred, so unwire all that we've already wired. Note that
5648 		// even if not a single page was wired, unlock_memory_etc() is called
5649 		// to put the address space reference.
5650 		addressSpaceLocker.Unlock();
5651 		unlock_memory_etc(team, (void*)lockBaseAddress,
5652 			nextAddress - lockBaseAddress, flags);
5653 	}
5654 
5655 	return error;
5656 }
5657 
5658 
5659 status_t
5660 lock_memory(void* address, size_t numBytes, uint32 flags)
5661 {
5662 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5663 }
5664 
5665 
5666 /*!	Unwires an address range previously wired with lock_memory_etc().
5667 
5668 	Note that a call to this function must balance a previous lock_memory_etc()
5669 	call with exactly the same parameters.
5670 */
5671 status_t
5672 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5673 {
5674 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5675 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5676 
5677 	// compute the page protection that is required
5678 	bool isUser = IS_USER_ADDRESS(address);
5679 	bool writable = (flags & B_READ_DEVICE) == 0;
5680 	uint32 requiredProtection = PAGE_PRESENT
5681 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5682 	if (writable)
5683 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5684 
5685 	uint32 mallocFlags = isUser
5686 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5687 
5688 	// get and read lock the address space
5689 	VMAddressSpace* addressSpace = NULL;
5690 	if (isUser) {
5691 		if (team == B_CURRENT_TEAM)
5692 			addressSpace = VMAddressSpace::GetCurrent();
5693 		else
5694 			addressSpace = VMAddressSpace::Get(team);
5695 	} else
5696 		addressSpace = VMAddressSpace::GetKernel();
5697 	if (addressSpace == NULL)
5698 		return B_ERROR;
5699 
5700 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5701 		// Take over the address space reference. We don't unlock until we're
5702 		// done.
5703 
5704 	VMTranslationMap* map = addressSpace->TranslationMap();
5705 	status_t error = B_OK;
5706 
5707 	// iterate through all concerned areas
5708 	addr_t nextAddress = lockBaseAddress;
5709 	while (nextAddress != lockEndAddress) {
5710 		// get the next area
5711 		VMArea* area = addressSpace->LookupArea(nextAddress);
5712 		if (area == NULL) {
5713 			error = B_BAD_ADDRESS;
5714 			break;
5715 		}
5716 
5717 		addr_t areaStart = nextAddress;
5718 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5719 
5720 		// Lock the area's top cache. This is a requirement for
5721 		// VMArea::Unwire().
5722 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5723 
5724 		// Depending on the area cache type and the wiring, we may not need to
5725 		// look at the individual pages.
5726 		if (area->cache_type == CACHE_TYPE_NULL
5727 			|| area->cache_type == CACHE_TYPE_DEVICE
5728 			|| area->wiring == B_FULL_LOCK
5729 			|| area->wiring == B_CONTIGUOUS) {
5730 			// unwire the range (to avoid deadlocks we delete the range after
5731 			// unlocking the cache)
5732 			nextAddress = areaEnd;
5733 			VMAreaWiredRange* range = area->Unwire(areaStart,
5734 				areaEnd - areaStart, writable);
5735 			cacheChainLocker.Unlock();
5736 			if (range != NULL) {
5737 				range->~VMAreaWiredRange();
5738 				free_etc(range, mallocFlags);
5739 			}
5740 			continue;
5741 		}
5742 
5743 		// Lock the area's cache chain and the translation map. Needed to look
5744 		// up pages and play with their wired count.
5745 		cacheChainLocker.LockAllSourceCaches();
5746 		map->Lock();
5747 
5748 		// iterate through the pages and unwire them
5749 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5750 			phys_addr_t physicalAddress;
5751 			uint32 flags;
5752 
5753 			vm_page* page;
5754 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5755 				&& (flags & PAGE_PRESENT) != 0
5756 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5757 					!= NULL) {
5758 				// Already mapped with the correct permissions -- just increment
5759 				// the page's wired count.
5760 				decrement_page_wired_count(page);
5761 			} else {
5762 				panic("unlock_memory_etc(): Failed to unwire page: address "
5763 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5764 					nextAddress);
5765 				error = B_BAD_VALUE;
5766 				break;
5767 			}
5768 		}
5769 
5770 		map->Unlock();
5771 
5772 		// All pages are unwired. Remove the area's wired range as well (to
5773 		// avoid deadlocks we delete the range after unlocking the cache).
5774 		VMAreaWiredRange* range = area->Unwire(areaStart,
5775 			areaEnd - areaStart, writable);
5776 
5777 		cacheChainLocker.Unlock();
5778 
5779 		if (range != NULL) {
5780 			range->~VMAreaWiredRange();
5781 			free_etc(range, mallocFlags);
5782 		}
5783 
5784 		if (error != B_OK)
5785 			break;
5786 	}
5787 
5788 	// get rid of the address space reference lock_memory_etc() acquired
5789 	addressSpace->Put();
5790 
5791 	return error;
5792 }
5793 
5794 
5795 status_t
5796 unlock_memory(void* address, size_t numBytes, uint32 flags)
5797 {
5798 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5799 }
5800 
5801 
5802 /*!	Similar to get_memory_map(), but also allows to specify the address space
5803 	for the memory in question and has a saner semantics.
5804 	Returns \c B_OK when the complete range could be translated or
5805 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5806 	case the actual number of entries is written to \c *_numEntries. Any other
5807 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5808 	in this case.
5809 */
5810 status_t
5811 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5812 	physical_entry* table, uint32* _numEntries)
5813 {
5814 	uint32 numEntries = *_numEntries;
5815 	*_numEntries = 0;
5816 
5817 	VMAddressSpace* addressSpace;
5818 	addr_t virtualAddress = (addr_t)address;
5819 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5820 	phys_addr_t physicalAddress;
5821 	status_t status = B_OK;
5822 	int32 index = -1;
5823 	addr_t offset = 0;
5824 	bool interrupts = are_interrupts_enabled();
5825 
5826 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5827 		"entries)\n", team, address, numBytes, numEntries));
5828 
5829 	if (numEntries == 0 || numBytes == 0)
5830 		return B_BAD_VALUE;
5831 
5832 	// in which address space is the address to be found?
5833 	if (IS_USER_ADDRESS(virtualAddress)) {
5834 		if (team == B_CURRENT_TEAM)
5835 			addressSpace = VMAddressSpace::GetCurrent();
5836 		else
5837 			addressSpace = VMAddressSpace::Get(team);
5838 	} else
5839 		addressSpace = VMAddressSpace::GetKernel();
5840 
5841 	if (addressSpace == NULL)
5842 		return B_ERROR;
5843 
5844 	VMTranslationMap* map = addressSpace->TranslationMap();
5845 
5846 	if (interrupts)
5847 		map->Lock();
5848 
5849 	while (offset < numBytes) {
5850 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5851 		uint32 flags;
5852 
5853 		if (interrupts) {
5854 			status = map->Query((addr_t)address + offset, &physicalAddress,
5855 				&flags);
5856 		} else {
5857 			status = map->QueryInterrupt((addr_t)address + offset,
5858 				&physicalAddress, &flags);
5859 		}
5860 		if (status < B_OK)
5861 			break;
5862 		if ((flags & PAGE_PRESENT) == 0) {
5863 			panic("get_memory_map() called on unmapped memory!");
5864 			return B_BAD_ADDRESS;
5865 		}
5866 
5867 		if (index < 0 && pageOffset > 0) {
5868 			physicalAddress += pageOffset;
5869 			if (bytes > B_PAGE_SIZE - pageOffset)
5870 				bytes = B_PAGE_SIZE - pageOffset;
5871 		}
5872 
5873 		// need to switch to the next physical_entry?
5874 		if (index < 0 || table[index].address
5875 				!= physicalAddress - table[index].size) {
5876 			if ((uint32)++index + 1 > numEntries) {
5877 				// table to small
5878 				break;
5879 			}
5880 			table[index].address = physicalAddress;
5881 			table[index].size = bytes;
5882 		} else {
5883 			// page does fit in current entry
5884 			table[index].size += bytes;
5885 		}
5886 
5887 		offset += bytes;
5888 	}
5889 
5890 	if (interrupts)
5891 		map->Unlock();
5892 
5893 	if (status != B_OK)
5894 		return status;
5895 
5896 	if ((uint32)index + 1 > numEntries) {
5897 		*_numEntries = index;
5898 		return B_BUFFER_OVERFLOW;
5899 	}
5900 
5901 	*_numEntries = index + 1;
5902 	return B_OK;
5903 }
5904 
5905 
5906 /*!	According to the BeBook, this function should always succeed.
5907 	This is no longer the case.
5908 */
5909 extern "C" int32
5910 __get_memory_map_haiku(const void* address, size_t numBytes,
5911 	physical_entry* table, int32 numEntries)
5912 {
5913 	uint32 entriesRead = numEntries;
5914 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5915 		table, &entriesRead);
5916 	if (error != B_OK)
5917 		return error;
5918 
5919 	// close the entry list
5920 
5921 	// if it's only one entry, we will silently accept the missing ending
5922 	if (numEntries == 1)
5923 		return B_OK;
5924 
5925 	if (entriesRead + 1 > (uint32)numEntries)
5926 		return B_BUFFER_OVERFLOW;
5927 
5928 	table[entriesRead].address = 0;
5929 	table[entriesRead].size = 0;
5930 
5931 	return B_OK;
5932 }
5933 
5934 
5935 area_id
5936 area_for(void* address)
5937 {
5938 	return vm_area_for((addr_t)address, true);
5939 }
5940 
5941 
5942 area_id
5943 find_area(const char* name)
5944 {
5945 	return VMAreaHash::Find(name);
5946 }
5947 
5948 
5949 status_t
5950 _get_area_info(area_id id, area_info* info, size_t size)
5951 {
5952 	if (size != sizeof(area_info) || info == NULL)
5953 		return B_BAD_VALUE;
5954 
5955 	AddressSpaceReadLocker locker;
5956 	VMArea* area;
5957 	status_t status = locker.SetFromArea(id, area);
5958 	if (status != B_OK)
5959 		return status;
5960 
5961 	fill_area_info(area, info, size);
5962 	return B_OK;
5963 }
5964 
5965 
5966 status_t
5967 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5968 {
5969 	addr_t nextBase = *(addr_t*)cookie;
5970 
5971 	// we're already through the list
5972 	if (nextBase == (addr_t)-1)
5973 		return B_ENTRY_NOT_FOUND;
5974 
5975 	if (team == B_CURRENT_TEAM)
5976 		team = team_get_current_team_id();
5977 
5978 	AddressSpaceReadLocker locker(team);
5979 	if (!locker.IsLocked())
5980 		return B_BAD_TEAM_ID;
5981 
5982 	VMArea* area;
5983 	for (VMAddressSpace::AreaIterator it
5984 				= locker.AddressSpace()->GetAreaIterator();
5985 			(area = it.Next()) != NULL;) {
5986 		if (area->Base() > nextBase)
5987 			break;
5988 	}
5989 
5990 	if (area == NULL) {
5991 		nextBase = (addr_t)-1;
5992 		return B_ENTRY_NOT_FOUND;
5993 	}
5994 
5995 	fill_area_info(area, info, size);
5996 	*cookie = (ssize_t)(area->Base());
5997 
5998 	return B_OK;
5999 }
6000 
6001 
6002 status_t
6003 set_area_protection(area_id area, uint32 newProtection)
6004 {
6005 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6006 		newProtection, true);
6007 }
6008 
6009 
6010 status_t
6011 resize_area(area_id areaID, size_t newSize)
6012 {
6013 	return vm_resize_area(areaID, newSize, true);
6014 }
6015 
6016 
6017 /*!	Transfers the specified area to a new team. The caller must be the owner
6018 	of the area.
6019 */
6020 area_id
6021 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6022 	bool kernel)
6023 {
6024 	area_info info;
6025 	status_t status = get_area_info(id, &info);
6026 	if (status != B_OK)
6027 		return status;
6028 
6029 	if (info.team != thread_get_current_thread()->team->id)
6030 		return B_PERMISSION_DENIED;
6031 
6032 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6033 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6034 	if (clonedArea < 0)
6035 		return clonedArea;
6036 
6037 	status = vm_delete_area(info.team, id, kernel);
6038 	if (status != B_OK) {
6039 		vm_delete_area(target, clonedArea, kernel);
6040 		return status;
6041 	}
6042 
6043 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6044 
6045 	return clonedArea;
6046 }
6047 
6048 
6049 extern "C" area_id
6050 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6051 	size_t numBytes, uint32 addressSpec, uint32 protection,
6052 	void** _virtualAddress)
6053 {
6054 	if (!arch_vm_supports_protection(protection))
6055 		return B_NOT_SUPPORTED;
6056 
6057 	fix_protection(&protection);
6058 
6059 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6060 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6061 		false);
6062 }
6063 
6064 
6065 area_id
6066 clone_area(const char* name, void** _address, uint32 addressSpec,
6067 	uint32 protection, area_id source)
6068 {
6069 	if ((protection & B_KERNEL_PROTECTION) == 0)
6070 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6071 
6072 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6073 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6074 }
6075 
6076 
6077 area_id
6078 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
6079 	uint32 protection, uint32 flags, uint32 guardSize,
6080 	const virtual_address_restrictions* virtualAddressRestrictions,
6081 	const physical_address_restrictions* physicalAddressRestrictions,
6082 	void** _address)
6083 {
6084 	fix_protection(&protection);
6085 
6086 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6087 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6088 		true, _address);
6089 }
6090 
6091 
6092 extern "C" area_id
6093 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6094 	size_t size, uint32 lock, uint32 protection)
6095 {
6096 	fix_protection(&protection);
6097 
6098 	virtual_address_restrictions virtualRestrictions = {};
6099 	virtualRestrictions.address = *_address;
6100 	virtualRestrictions.address_specification = addressSpec;
6101 	physical_address_restrictions physicalRestrictions = {};
6102 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6103 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6104 		true, _address);
6105 }
6106 
6107 
6108 status_t
6109 delete_area(area_id area)
6110 {
6111 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6112 }
6113 
6114 
6115 //	#pragma mark - Userland syscalls
6116 
6117 
6118 status_t
6119 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6120 	addr_t size)
6121 {
6122 	// filter out some unavailable values (for userland)
6123 	switch (addressSpec) {
6124 		case B_ANY_KERNEL_ADDRESS:
6125 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6126 			return B_BAD_VALUE;
6127 	}
6128 
6129 	addr_t address;
6130 
6131 	if (!IS_USER_ADDRESS(userAddress)
6132 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6133 		return B_BAD_ADDRESS;
6134 
6135 	status_t status = vm_reserve_address_range(
6136 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6137 		RESERVED_AVOID_BASE);
6138 	if (status != B_OK)
6139 		return status;
6140 
6141 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6142 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6143 			(void*)address, size);
6144 		return B_BAD_ADDRESS;
6145 	}
6146 
6147 	return B_OK;
6148 }
6149 
6150 
6151 status_t
6152 _user_unreserve_address_range(addr_t address, addr_t size)
6153 {
6154 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6155 		(void*)address, size);
6156 }
6157 
6158 
6159 area_id
6160 _user_area_for(void* address)
6161 {
6162 	return vm_area_for((addr_t)address, false);
6163 }
6164 
6165 
6166 area_id
6167 _user_find_area(const char* userName)
6168 {
6169 	char name[B_OS_NAME_LENGTH];
6170 
6171 	if (!IS_USER_ADDRESS(userName)
6172 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6173 		return B_BAD_ADDRESS;
6174 
6175 	return find_area(name);
6176 }
6177 
6178 
6179 status_t
6180 _user_get_area_info(area_id area, area_info* userInfo)
6181 {
6182 	if (!IS_USER_ADDRESS(userInfo))
6183 		return B_BAD_ADDRESS;
6184 
6185 	area_info info;
6186 	status_t status = get_area_info(area, &info);
6187 	if (status < B_OK)
6188 		return status;
6189 
6190 	// TODO: do we want to prevent userland from seeing kernel protections?
6191 	//info.protection &= B_USER_PROTECTION;
6192 
6193 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6194 		return B_BAD_ADDRESS;
6195 
6196 	return status;
6197 }
6198 
6199 
6200 status_t
6201 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6202 {
6203 	ssize_t cookie;
6204 
6205 	if (!IS_USER_ADDRESS(userCookie)
6206 		|| !IS_USER_ADDRESS(userInfo)
6207 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6208 		return B_BAD_ADDRESS;
6209 
6210 	area_info info;
6211 	status_t status = _get_next_area_info(team, &cookie, &info,
6212 		sizeof(area_info));
6213 	if (status != B_OK)
6214 		return status;
6215 
6216 	//info.protection &= B_USER_PROTECTION;
6217 
6218 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6219 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6220 		return B_BAD_ADDRESS;
6221 
6222 	return status;
6223 }
6224 
6225 
6226 status_t
6227 _user_set_area_protection(area_id area, uint32 newProtection)
6228 {
6229 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6230 		return B_BAD_VALUE;
6231 
6232 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6233 		newProtection, false);
6234 }
6235 
6236 
6237 status_t
6238 _user_resize_area(area_id area, size_t newSize)
6239 {
6240 	// TODO: Since we restrict deleting of areas to those owned by the team,
6241 	// we should also do that for resizing (check other functions, too).
6242 	return vm_resize_area(area, newSize, false);
6243 }
6244 
6245 
6246 area_id
6247 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6248 	team_id target)
6249 {
6250 	// filter out some unavailable values (for userland)
6251 	switch (addressSpec) {
6252 		case B_ANY_KERNEL_ADDRESS:
6253 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6254 			return B_BAD_VALUE;
6255 	}
6256 
6257 	void* address;
6258 	if (!IS_USER_ADDRESS(userAddress)
6259 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6260 		return B_BAD_ADDRESS;
6261 
6262 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6263 	if (newArea < B_OK)
6264 		return newArea;
6265 
6266 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6267 		return B_BAD_ADDRESS;
6268 
6269 	return newArea;
6270 }
6271 
6272 
6273 area_id
6274 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6275 	uint32 protection, area_id sourceArea)
6276 {
6277 	char name[B_OS_NAME_LENGTH];
6278 	void* address;
6279 
6280 	// filter out some unavailable values (for userland)
6281 	switch (addressSpec) {
6282 		case B_ANY_KERNEL_ADDRESS:
6283 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6284 			return B_BAD_VALUE;
6285 	}
6286 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6287 		return B_BAD_VALUE;
6288 
6289 	if (!IS_USER_ADDRESS(userName)
6290 		|| !IS_USER_ADDRESS(userAddress)
6291 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6292 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6293 		return B_BAD_ADDRESS;
6294 
6295 	fix_protection(&protection);
6296 
6297 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6298 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6299 		false);
6300 	if (clonedArea < B_OK)
6301 		return clonedArea;
6302 
6303 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6304 		delete_area(clonedArea);
6305 		return B_BAD_ADDRESS;
6306 	}
6307 
6308 	return clonedArea;
6309 }
6310 
6311 
6312 area_id
6313 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6314 	size_t size, uint32 lock, uint32 protection)
6315 {
6316 	char name[B_OS_NAME_LENGTH];
6317 	void* address;
6318 
6319 	// filter out some unavailable values (for userland)
6320 	switch (addressSpec) {
6321 		case B_ANY_KERNEL_ADDRESS:
6322 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6323 			return B_BAD_VALUE;
6324 	}
6325 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6326 		return B_BAD_VALUE;
6327 
6328 	if (!IS_USER_ADDRESS(userName)
6329 		|| !IS_USER_ADDRESS(userAddress)
6330 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6331 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6332 		return B_BAD_ADDRESS;
6333 
6334 	if (addressSpec == B_EXACT_ADDRESS
6335 		&& IS_KERNEL_ADDRESS(address))
6336 		return B_BAD_VALUE;
6337 
6338 	if (addressSpec == B_ANY_ADDRESS)
6339 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6340 	if (addressSpec == B_BASE_ADDRESS)
6341 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6342 
6343 	fix_protection(&protection);
6344 
6345 	virtual_address_restrictions virtualRestrictions = {};
6346 	virtualRestrictions.address = address;
6347 	virtualRestrictions.address_specification = addressSpec;
6348 	physical_address_restrictions physicalRestrictions = {};
6349 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6350 		size, lock, protection, 0, 0, &virtualRestrictions,
6351 		&physicalRestrictions, false, &address);
6352 
6353 	if (area >= B_OK
6354 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6355 		delete_area(area);
6356 		return B_BAD_ADDRESS;
6357 	}
6358 
6359 	return area;
6360 }
6361 
6362 
6363 status_t
6364 _user_delete_area(area_id area)
6365 {
6366 	// Unlike the BeOS implementation, you can now only delete areas
6367 	// that you have created yourself from userland.
6368 	// The documentation to delete_area() explicitly states that this
6369 	// will be restricted in the future, and so it will.
6370 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6371 }
6372 
6373 
6374 // TODO: create a BeOS style call for this!
6375 
6376 area_id
6377 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6378 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6379 	int fd, off_t offset)
6380 {
6381 	char name[B_OS_NAME_LENGTH];
6382 	void* address;
6383 	area_id area;
6384 
6385 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6386 		return B_BAD_VALUE;
6387 
6388 	fix_protection(&protection);
6389 
6390 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6391 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6392 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6393 		return B_BAD_ADDRESS;
6394 
6395 	if (addressSpec == B_EXACT_ADDRESS) {
6396 		if ((addr_t)address + size < (addr_t)address
6397 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6398 			return B_BAD_VALUE;
6399 		}
6400 		if (!IS_USER_ADDRESS(address)
6401 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6402 			return B_BAD_ADDRESS;
6403 		}
6404 	}
6405 
6406 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6407 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6408 		false);
6409 	if (area < B_OK)
6410 		return area;
6411 
6412 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6413 		return B_BAD_ADDRESS;
6414 
6415 	return area;
6416 }
6417 
6418 
6419 status_t
6420 _user_unmap_memory(void* _address, size_t size)
6421 {
6422 	addr_t address = (addr_t)_address;
6423 
6424 	// check params
6425 	if (size == 0 || (addr_t)address + size < (addr_t)address
6426 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6427 		return B_BAD_VALUE;
6428 	}
6429 
6430 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6431 		return B_BAD_ADDRESS;
6432 
6433 	// Write lock the address space and ensure the address range is not wired.
6434 	AddressSpaceWriteLocker locker;
6435 	do {
6436 		status_t status = locker.SetTo(team_get_current_team_id());
6437 		if (status != B_OK)
6438 			return status;
6439 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6440 			size, &locker));
6441 
6442 	// unmap
6443 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6444 }
6445 
6446 
6447 status_t
6448 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6449 {
6450 	// check address range
6451 	addr_t address = (addr_t)_address;
6452 	size = PAGE_ALIGN(size);
6453 
6454 	if ((address % B_PAGE_SIZE) != 0)
6455 		return B_BAD_VALUE;
6456 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6457 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6458 		// weird error code required by POSIX
6459 		return ENOMEM;
6460 	}
6461 
6462 	// extend and check protection
6463 	if ((protection & ~B_USER_PROTECTION) != 0)
6464 		return B_BAD_VALUE;
6465 
6466 	fix_protection(&protection);
6467 
6468 	// We need to write lock the address space, since we're going to play with
6469 	// the areas. Also make sure that none of the areas is wired and that we're
6470 	// actually allowed to change the protection.
6471 	AddressSpaceWriteLocker locker;
6472 
6473 	bool restart;
6474 	do {
6475 		restart = false;
6476 
6477 		status_t status = locker.SetTo(team_get_current_team_id());
6478 		if (status != B_OK)
6479 			return status;
6480 
6481 		// First round: Check whether the whole range is covered by areas and we
6482 		// are allowed to modify them.
6483 		addr_t currentAddress = address;
6484 		size_t sizeLeft = size;
6485 		while (sizeLeft > 0) {
6486 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6487 			if (area == NULL)
6488 				return B_NO_MEMORY;
6489 
6490 			if ((area->protection & B_KERNEL_AREA) != 0)
6491 				return B_NOT_ALLOWED;
6492 
6493 			// TODO: For (shared) mapped files we should check whether the new
6494 			// protections are compatible with the file permissions. We don't
6495 			// have a way to do that yet, though.
6496 
6497 			addr_t offset = currentAddress - area->Base();
6498 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6499 
6500 			AreaCacheLocker cacheLocker(area);
6501 
6502 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6503 					&locker, &cacheLocker)) {
6504 				restart = true;
6505 				break;
6506 			}
6507 
6508 			cacheLocker.Unlock();
6509 
6510 			currentAddress += rangeSize;
6511 			sizeLeft -= rangeSize;
6512 		}
6513 	} while (restart);
6514 
6515 	// Second round: If the protections differ from that of the area, create a
6516 	// page protection array and re-map mapped pages.
6517 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6518 	addr_t currentAddress = address;
6519 	size_t sizeLeft = size;
6520 	while (sizeLeft > 0) {
6521 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6522 		if (area == NULL)
6523 			return B_NO_MEMORY;
6524 
6525 		addr_t offset = currentAddress - area->Base();
6526 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6527 
6528 		currentAddress += rangeSize;
6529 		sizeLeft -= rangeSize;
6530 
6531 		if (area->page_protections == NULL) {
6532 			if (area->protection == protection)
6533 				continue;
6534 
6535 			status_t status = allocate_area_page_protections(area);
6536 			if (status != B_OK)
6537 				return status;
6538 		}
6539 
6540 		// We need to lock the complete cache chain, since we potentially unmap
6541 		// pages of lower caches.
6542 		VMCache* topCache = vm_area_get_locked_cache(area);
6543 		VMCacheChainLocker cacheChainLocker(topCache);
6544 		cacheChainLocker.LockAllSourceCaches();
6545 
6546 		for (addr_t pageAddress = area->Base() + offset;
6547 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6548 			map->Lock();
6549 
6550 			set_area_page_protection(area, pageAddress, protection);
6551 
6552 			phys_addr_t physicalAddress;
6553 			uint32 flags;
6554 
6555 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6556 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6557 				map->Unlock();
6558 				continue;
6559 			}
6560 
6561 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6562 			if (page == NULL) {
6563 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6564 					"\n", area, physicalAddress);
6565 				map->Unlock();
6566 				return B_ERROR;
6567 			}
6568 
6569 			// If the page is not in the topmost cache and write access is
6570 			// requested, we have to unmap it. Otherwise we can re-map it with
6571 			// the new protection.
6572 			bool unmapPage = page->Cache() != topCache
6573 				&& (protection & B_WRITE_AREA) != 0;
6574 
6575 			if (!unmapPage)
6576 				map->ProtectPage(area, pageAddress, protection);
6577 
6578 			map->Unlock();
6579 
6580 			if (unmapPage) {
6581 				DEBUG_PAGE_ACCESS_START(page);
6582 				unmap_page(area, pageAddress);
6583 				DEBUG_PAGE_ACCESS_END(page);
6584 			}
6585 		}
6586 	}
6587 
6588 	return B_OK;
6589 }
6590 
6591 
6592 status_t
6593 _user_sync_memory(void* _address, size_t size, uint32 flags)
6594 {
6595 	addr_t address = (addr_t)_address;
6596 	size = PAGE_ALIGN(size);
6597 
6598 	// check params
6599 	if ((address % B_PAGE_SIZE) != 0)
6600 		return B_BAD_VALUE;
6601 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6602 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6603 		// weird error code required by POSIX
6604 		return ENOMEM;
6605 	}
6606 
6607 	bool writeSync = (flags & MS_SYNC) != 0;
6608 	bool writeAsync = (flags & MS_ASYNC) != 0;
6609 	if (writeSync && writeAsync)
6610 		return B_BAD_VALUE;
6611 
6612 	if (size == 0 || (!writeSync && !writeAsync))
6613 		return B_OK;
6614 
6615 	// iterate through the range and sync all concerned areas
6616 	while (size > 0) {
6617 		// read lock the address space
6618 		AddressSpaceReadLocker locker;
6619 		status_t error = locker.SetTo(team_get_current_team_id());
6620 		if (error != B_OK)
6621 			return error;
6622 
6623 		// get the first area
6624 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6625 		if (area == NULL)
6626 			return B_NO_MEMORY;
6627 
6628 		uint32 offset = address - area->Base();
6629 		size_t rangeSize = min_c(area->Size() - offset, size);
6630 		offset += area->cache_offset;
6631 
6632 		// lock the cache
6633 		AreaCacheLocker cacheLocker(area);
6634 		if (!cacheLocker)
6635 			return B_BAD_VALUE;
6636 		VMCache* cache = area->cache;
6637 
6638 		locker.Unlock();
6639 
6640 		uint32 firstPage = offset >> PAGE_SHIFT;
6641 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6642 
6643 		// write the pages
6644 		if (cache->type == CACHE_TYPE_VNODE) {
6645 			if (writeSync) {
6646 				// synchronous
6647 				error = vm_page_write_modified_page_range(cache, firstPage,
6648 					endPage);
6649 				if (error != B_OK)
6650 					return error;
6651 			} else {
6652 				// asynchronous
6653 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6654 				// TODO: This is probably not quite what is supposed to happen.
6655 				// Especially when a lot has to be written, it might take ages
6656 				// until it really hits the disk.
6657 			}
6658 		}
6659 
6660 		address += rangeSize;
6661 		size -= rangeSize;
6662 	}
6663 
6664 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6665 	// synchronize multiple mappings of the same file. In our VM they never get
6666 	// out of sync, though, so we don't have to do anything.
6667 
6668 	return B_OK;
6669 }
6670 
6671 
6672 status_t
6673 _user_memory_advice(void* address, size_t size, uint32 advice)
6674 {
6675 	// TODO: Implement!
6676 	return B_OK;
6677 }
6678 
6679 
6680 status_t
6681 _user_get_memory_properties(team_id teamID, const void* address,
6682 	uint32* _protected, uint32* _lock)
6683 {
6684 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6685 		return B_BAD_ADDRESS;
6686 
6687 	AddressSpaceReadLocker locker;
6688 	status_t error = locker.SetTo(teamID);
6689 	if (error != B_OK)
6690 		return error;
6691 
6692 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6693 	if (area == NULL)
6694 		return B_NO_MEMORY;
6695 
6696 
6697 	uint32 protection = area->protection;
6698 	if (area->page_protections != NULL)
6699 		protection = get_area_page_protection(area, (addr_t)address);
6700 
6701 	uint32 wiring = area->wiring;
6702 
6703 	locker.Unlock();
6704 
6705 	error = user_memcpy(_protected, &protection, sizeof(protection));
6706 	if (error != B_OK)
6707 		return error;
6708 
6709 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6710 
6711 	return error;
6712 }
6713 
6714 
6715 // #pragma mark -- compatibility
6716 
6717 
6718 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6719 
6720 
6721 struct physical_entry_beos {
6722 	uint32	address;
6723 	uint32	size;
6724 };
6725 
6726 
6727 /*!	The physical_entry structure has changed. We need to translate it to the
6728 	old one.
6729 */
6730 extern "C" int32
6731 __get_memory_map_beos(const void* _address, size_t numBytes,
6732 	physical_entry_beos* table, int32 numEntries)
6733 {
6734 	if (numEntries <= 0)
6735 		return B_BAD_VALUE;
6736 
6737 	const uint8* address = (const uint8*)_address;
6738 
6739 	int32 count = 0;
6740 	while (numBytes > 0 && count < numEntries) {
6741 		physical_entry entry;
6742 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6743 		if (result < 0) {
6744 			if (result != B_BUFFER_OVERFLOW)
6745 				return result;
6746 		}
6747 
6748 		if (entry.address >= (phys_addr_t)1 << 32) {
6749 			panic("get_memory_map(): Address is greater 4 GB!");
6750 			return B_ERROR;
6751 		}
6752 
6753 		table[count].address = entry.address;
6754 		table[count++].size = entry.size;
6755 
6756 		address += entry.size;
6757 		numBytes -= entry.size;
6758 	}
6759 
6760 	// null-terminate the table, if possible
6761 	if (count < numEntries) {
6762 		table[count].address = 0;
6763 		table[count].size = 0;
6764 	}
6765 
6766 	return B_OK;
6767 }
6768 
6769 
6770 /*!	The type of the \a physicalAddress parameter has changed from void* to
6771 	phys_addr_t.
6772 */
6773 extern "C" area_id
6774 __map_physical_memory_beos(const char* name, void* physicalAddress,
6775 	size_t numBytes, uint32 addressSpec, uint32 protection,
6776 	void** _virtualAddress)
6777 {
6778 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6779 		addressSpec, protection, _virtualAddress);
6780 }
6781 
6782 
6783 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6784 	we meddle with the \a lock parameter to force 32 bit.
6785 */
6786 extern "C" area_id
6787 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6788 	size_t size, uint32 lock, uint32 protection)
6789 {
6790 	switch (lock) {
6791 		case B_NO_LOCK:
6792 			break;
6793 		case B_FULL_LOCK:
6794 		case B_LAZY_LOCK:
6795 			lock = B_32_BIT_FULL_LOCK;
6796 			break;
6797 		case B_CONTIGUOUS:
6798 			lock = B_32_BIT_CONTIGUOUS;
6799 			break;
6800 	}
6801 
6802 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6803 		protection);
6804 }
6805 
6806 
6807 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6808 	"BASE");
6809 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6810 	"map_physical_memory@", "BASE");
6811 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6812 	"BASE");
6813 
6814 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6815 	"get_memory_map@@", "1_ALPHA3");
6816 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6817 	"map_physical_memory@@", "1_ALPHA3");
6818 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6819 	"1_ALPHA3");
6820 
6821 
6822 #else
6823 
6824 
6825 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6826 	"get_memory_map@@", "BASE");
6827 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6828 	"map_physical_memory@@", "BASE");
6829 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6830 	"BASE");
6831 
6832 
6833 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6834