xref: /haiku/src/system/kernel/vm/vm.cpp (revision a629567a9001547736cfe892cdf992be16868fed)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <boot/elf.h>
31 #include <boot/stage2.h>
32 #include <condition_variable.h>
33 #include <console.h>
34 #include <debug.h>
35 #include <file_cache.h>
36 #include <fs/fd.h>
37 #include <heap.h>
38 #include <kernel.h>
39 #include <int.h>
40 #include <lock.h>
41 #include <low_resource_manager.h>
42 #include <slab/Slab.h>
43 #include <smp.h>
44 #include <system_info.h>
45 #include <thread.h>
46 #include <team.h>
47 #include <tracing.h>
48 #include <util/AutoLock.h>
49 #include <util/khash.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 class AreaCacheLocking {
77 public:
78 	inline bool Lock(VMCache* lockable)
79 	{
80 		return false;
81 	}
82 
83 	inline void Unlock(VMCache* lockable)
84 	{
85 		vm_area_put_locked_cache(lockable);
86 	}
87 };
88 
89 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
90 public:
91 	inline AreaCacheLocker(VMCache* cache = NULL)
92 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
93 	{
94 	}
95 
96 	inline AreaCacheLocker(VMArea* area)
97 		: AutoLocker<VMCache, AreaCacheLocking>()
98 	{
99 		SetTo(area);
100 	}
101 
102 	inline void SetTo(VMCache* cache, bool alreadyLocked)
103 	{
104 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
105 	}
106 
107 	inline void SetTo(VMArea* area)
108 	{
109 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
110 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
111 	}
112 };
113 
114 
115 class VMCacheChainLocker {
116 public:
117 	VMCacheChainLocker()
118 		:
119 		fTopCache(NULL),
120 		fBottomCache(NULL)
121 	{
122 	}
123 
124 	VMCacheChainLocker(VMCache* topCache)
125 		:
126 		fTopCache(topCache),
127 		fBottomCache(topCache)
128 	{
129 	}
130 
131 	~VMCacheChainLocker()
132 	{
133 		Unlock();
134 	}
135 
136 	void SetTo(VMCache* topCache)
137 	{
138 		fTopCache = topCache;
139 		fBottomCache = topCache;
140 
141 		if (topCache != NULL)
142 			topCache->SetUserData(NULL);
143 	}
144 
145 	VMCache* LockSourceCache()
146 	{
147 		if (fBottomCache == NULL || fBottomCache->source == NULL)
148 			return NULL;
149 
150 		VMCache* previousCache = fBottomCache;
151 
152 		fBottomCache = fBottomCache->source;
153 		fBottomCache->Lock();
154 		fBottomCache->AcquireRefLocked();
155 		fBottomCache->SetUserData(previousCache);
156 
157 		return fBottomCache;
158 	}
159 
160 	void LockAllSourceCaches()
161 	{
162 		while (LockSourceCache() != NULL) {
163 		}
164 	}
165 
166 	void Unlock(VMCache* exceptCache = NULL)
167 	{
168 		if (fTopCache == NULL)
169 			return;
170 
171 		// Unlock caches in source -> consumer direction. This is important to
172 		// avoid double-locking and a reversal of locking order in case a cache
173 		// is eligable for merging.
174 		VMCache* cache = fBottomCache;
175 		while (cache != NULL) {
176 			VMCache* nextCache = (VMCache*)cache->UserData();
177 			if (cache != exceptCache)
178 				cache->ReleaseRefAndUnlock(cache != fTopCache);
179 
180 			if (cache == fTopCache)
181 				break;
182 
183 			cache = nextCache;
184 		}
185 
186 		fTopCache = NULL;
187 		fBottomCache = NULL;
188 	}
189 
190 	void UnlockKeepRefs(bool keepTopCacheLocked)
191 	{
192 		if (fTopCache == NULL)
193 			return;
194 
195 		VMCache* nextCache = fBottomCache;
196 		VMCache* cache = NULL;
197 
198 		while (keepTopCacheLocked
199 				? nextCache != fTopCache : cache != fTopCache) {
200 			cache = nextCache;
201 			nextCache = (VMCache*)cache->UserData();
202 			cache->Unlock(cache != fTopCache);
203 		}
204 	}
205 
206 	void RelockCaches(bool topCacheLocked)
207 	{
208 		if (fTopCache == NULL)
209 			return;
210 
211 		VMCache* nextCache = fTopCache;
212 		VMCache* cache = NULL;
213 		if (topCacheLocked) {
214 			cache = nextCache;
215 			nextCache = cache->source;
216 		}
217 
218 		while (cache != fBottomCache && nextCache != NULL) {
219 			VMCache* consumer = cache;
220 			cache = nextCache;
221 			nextCache = cache->source;
222 			cache->Lock();
223 			cache->SetUserData(consumer);
224 		}
225 	}
226 
227 private:
228 	VMCache*	fTopCache;
229 	VMCache*	fBottomCache;
230 };
231 
232 
233 // The memory reserve an allocation of the certain priority must not touch.
234 static const size_t kMemoryReserveForPriority[] = {
235 	VM_MEMORY_RESERVE_USER,		// user
236 	VM_MEMORY_RESERVE_SYSTEM,	// system
237 	0							// VIP
238 };
239 
240 
241 ObjectCache* gPageMappingsObjectCache;
242 
243 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
244 
245 static off_t sAvailableMemory;
246 static off_t sNeededMemory;
247 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
248 static uint32 sPageFaults;
249 
250 static VMPhysicalPageMapper* sPhysicalPageMapper;
251 
252 #if DEBUG_CACHE_LIST
253 
254 struct cache_info {
255 	VMCache*	cache;
256 	addr_t		page_count;
257 	addr_t		committed;
258 };
259 
260 static const int kCacheInfoTableCount = 100 * 1024;
261 static cache_info* sCacheInfoTable;
262 
263 #endif	// DEBUG_CACHE_LIST
264 
265 
266 // function declarations
267 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
268 	bool addressSpaceCleanup);
269 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
270 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage,
271 	VMAreaWiredRange* wiredRange = NULL);
272 static status_t map_backing_store(VMAddressSpace* addressSpace,
273 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
274 	int protection, int mapping, uint32 flags,
275 	const virtual_address_restrictions* addressRestrictions, bool kernel,
276 	VMArea** _area, void** _virtualAddress);
277 static void fix_protection(uint32* protection);
278 
279 
280 //	#pragma mark -
281 
282 
283 #if VM_PAGE_FAULT_TRACING
284 
285 namespace VMPageFaultTracing {
286 
287 class PageFaultStart : public AbstractTraceEntry {
288 public:
289 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
290 		:
291 		fAddress(address),
292 		fPC(pc),
293 		fWrite(write),
294 		fUser(user)
295 	{
296 		Initialized();
297 	}
298 
299 	virtual void AddDump(TraceOutput& out)
300 	{
301 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
302 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
303 	}
304 
305 private:
306 	addr_t	fAddress;
307 	addr_t	fPC;
308 	bool	fWrite;
309 	bool	fUser;
310 };
311 
312 
313 // page fault errors
314 enum {
315 	PAGE_FAULT_ERROR_NO_AREA		= 0,
316 	PAGE_FAULT_ERROR_KERNEL_ONLY,
317 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
318 	PAGE_FAULT_ERROR_READ_PROTECTED,
319 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
320 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
321 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
322 };
323 
324 
325 class PageFaultError : public AbstractTraceEntry {
326 public:
327 	PageFaultError(area_id area, status_t error)
328 		:
329 		fArea(area),
330 		fError(error)
331 	{
332 		Initialized();
333 	}
334 
335 	virtual void AddDump(TraceOutput& out)
336 	{
337 		switch (fError) {
338 			case PAGE_FAULT_ERROR_NO_AREA:
339 				out.Print("page fault error: no area");
340 				break;
341 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
342 				out.Print("page fault error: area: %ld, kernel only", fArea);
343 				break;
344 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
345 				out.Print("page fault error: area: %ld, write protected",
346 					fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_READ_PROTECTED:
349 				out.Print("page fault error: area: %ld, read protected", fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
352 				out.Print("page fault error: area: %ld, execute protected",
353 					fArea);
354 				break;
355 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
356 				out.Print("page fault error: kernel touching bad user memory");
357 				break;
358 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
359 				out.Print("page fault error: no address space");
360 				break;
361 			default:
362 				out.Print("page fault error: area: %ld, error: %s", fArea,
363 					strerror(fError));
364 				break;
365 		}
366 	}
367 
368 private:
369 	area_id		fArea;
370 	status_t	fError;
371 };
372 
373 
374 class PageFaultDone : public AbstractTraceEntry {
375 public:
376 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
377 			vm_page* page)
378 		:
379 		fArea(area),
380 		fTopCache(topCache),
381 		fCache(cache),
382 		fPage(page)
383 	{
384 		Initialized();
385 	}
386 
387 	virtual void AddDump(TraceOutput& out)
388 	{
389 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
390 			"page: %p", fArea, fTopCache, fCache, fPage);
391 	}
392 
393 private:
394 	area_id		fArea;
395 	VMCache*	fTopCache;
396 	VMCache*	fCache;
397 	vm_page*	fPage;
398 };
399 
400 }	// namespace VMPageFaultTracing
401 
402 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
403 #else
404 #	define TPF(x) ;
405 #endif	// VM_PAGE_FAULT_TRACING
406 
407 
408 //	#pragma mark -
409 
410 
411 /*!	The page's cache must be locked.
412 */
413 static inline void
414 increment_page_wired_count(vm_page* page)
415 {
416 	if (!page->IsMapped())
417 		atomic_add(&gMappedPagesCount, 1);
418 	page->IncrementWiredCount();
419 }
420 
421 
422 /*!	The page's cache must be locked.
423 */
424 static inline void
425 decrement_page_wired_count(vm_page* page)
426 {
427 	page->DecrementWiredCount();
428 	if (!page->IsMapped())
429 		atomic_add(&gMappedPagesCount, -1);
430 }
431 
432 
433 static inline addr_t
434 virtual_page_address(VMArea* area, vm_page* page)
435 {
436 	return area->Base()
437 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
438 }
439 
440 
441 //! You need to have the address space locked when calling this function
442 static VMArea*
443 lookup_area(VMAddressSpace* addressSpace, area_id id)
444 {
445 	VMAreaHash::ReadLock();
446 
447 	VMArea* area = VMAreaHash::LookupLocked(id);
448 	if (area != NULL && area->address_space != addressSpace)
449 		area = NULL;
450 
451 	VMAreaHash::ReadUnlock();
452 
453 	return area;
454 }
455 
456 
457 static status_t
458 allocate_area_page_protections(VMArea* area)
459 {
460 	// In the page protections we store only the three user protections,
461 	// so we use 4 bits per page.
462 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
463 	area->page_protections = (uint8*)malloc_etc(bytes,
464 		HEAP_DONT_LOCK_KERNEL_SPACE);
465 	if (area->page_protections == NULL)
466 		return B_NO_MEMORY;
467 
468 	// init the page protections for all pages to that of the area
469 	uint32 areaProtection = area->protection
470 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
471 	memset(area->page_protections, areaProtection | (areaProtection << 4),
472 		bytes);
473 	return B_OK;
474 }
475 
476 
477 static inline void
478 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
479 {
480 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
481 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
482 	uint8& entry = area->page_protections[pageIndex / 2];
483 	if (pageIndex % 2 == 0)
484 		entry = (entry & 0xf0) | protection;
485 	else
486 		entry = (entry & 0x0f) | (protection << 4);
487 }
488 
489 
490 static inline uint32
491 get_area_page_protection(VMArea* area, addr_t pageAddress)
492 {
493 	if (area->page_protections == NULL)
494 		return area->protection;
495 
496 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
497 	uint32 protection = area->page_protections[pageIndex / 2];
498 	if (pageIndex % 2 == 0)
499 		protection &= 0x0f;
500 	else
501 		protection >>= 4;
502 
503 	// If this is a kernel area we translate the user flags to kernel flags.
504 	if (area->address_space == VMAddressSpace::Kernel()) {
505 		uint32 kernelProtection = 0;
506 		if ((protection & B_READ_AREA) != 0)
507 			kernelProtection |= B_KERNEL_READ_AREA;
508 		if ((protection & B_WRITE_AREA) != 0)
509 			kernelProtection |= B_KERNEL_WRITE_AREA;
510 
511 		return kernelProtection;
512 	}
513 
514 	return protection | B_KERNEL_READ_AREA
515 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
516 }
517 
518 
519 /*!	The caller must have reserved enough pages the translation map
520 	implementation might need to map this page.
521 	The page's cache must be locked.
522 */
523 static status_t
524 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
525 	vm_page_reservation* reservation)
526 {
527 	VMTranslationMap* map = area->address_space->TranslationMap();
528 
529 	bool wasMapped = page->IsMapped();
530 
531 	if (area->wiring == B_NO_LOCK) {
532 		DEBUG_PAGE_ACCESS_CHECK(page);
533 
534 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
535 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
536 			gPageMappingsObjectCache,
537 			CACHE_DONT_WAIT_FOR_MEMORY
538 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
539 		if (mapping == NULL)
540 			return B_NO_MEMORY;
541 
542 		mapping->page = page;
543 		mapping->area = area;
544 
545 		map->Lock();
546 
547 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
548 			area->MemoryType(), reservation);
549 
550 		// insert mapping into lists
551 		if (!page->IsMapped())
552 			atomic_add(&gMappedPagesCount, 1);
553 
554 		page->mappings.Add(mapping);
555 		area->mappings.Add(mapping);
556 
557 		map->Unlock();
558 	} else {
559 		DEBUG_PAGE_ACCESS_CHECK(page);
560 
561 		map->Lock();
562 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
563 			area->MemoryType(), reservation);
564 		map->Unlock();
565 
566 		increment_page_wired_count(page);
567 	}
568 
569 	if (!wasMapped) {
570 		// The page is mapped now, so we must not remain in the cached queue.
571 		// It also makes sense to move it from the inactive to the active, since
572 		// otherwise the page daemon wouldn't come to keep track of it (in idle
573 		// mode) -- if the page isn't touched, it will be deactivated after a
574 		// full iteration through the queue at the latest.
575 		if (page->State() == PAGE_STATE_CACHED
576 				|| page->State() == PAGE_STATE_INACTIVE) {
577 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
578 		}
579 	}
580 
581 	return B_OK;
582 }
583 
584 
585 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
586 	page's cache.
587 */
588 static inline bool
589 unmap_page(VMArea* area, addr_t virtualAddress)
590 {
591 	return area->address_space->TranslationMap()->UnmapPage(area,
592 		virtualAddress, true);
593 }
594 
595 
596 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
597 	mapped pages' caches.
598 */
599 static inline void
600 unmap_pages(VMArea* area, addr_t base, size_t size)
601 {
602 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
603 }
604 
605 
606 /*!	Cuts a piece out of an area. If the given cut range covers the complete
607 	area, it is deleted. If it covers the beginning or the end, the area is
608 	resized accordingly. If the range covers some part in the middle of the
609 	area, it is split in two; in this case the second area is returned via
610 	\a _secondArea (the variable is left untouched in the other cases).
611 	The address space must be write locked.
612 	The caller must ensure that no part of the given range is wired.
613 */
614 static status_t
615 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
616 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
617 {
618 	// Does the cut range intersect with the area at all?
619 	addr_t areaLast = area->Base() + (area->Size() - 1);
620 	if (area->Base() > lastAddress || areaLast < address)
621 		return B_OK;
622 
623 	// Is the area fully covered?
624 	if (area->Base() >= address && areaLast <= lastAddress) {
625 		delete_area(addressSpace, area, false);
626 		return B_OK;
627 	}
628 
629 	int priority;
630 	uint32 allocationFlags;
631 	if (addressSpace == VMAddressSpace::Kernel()) {
632 		priority = VM_PRIORITY_SYSTEM;
633 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
634 			| HEAP_DONT_LOCK_KERNEL_SPACE;
635 	} else {
636 		priority = VM_PRIORITY_USER;
637 		allocationFlags = 0;
638 	}
639 
640 	VMCache* cache = vm_area_get_locked_cache(area);
641 	VMCacheChainLocker cacheChainLocker(cache);
642 	cacheChainLocker.LockAllSourceCaches();
643 
644 	// Cut the end only?
645 	if (areaLast <= lastAddress) {
646 		size_t oldSize = area->Size();
647 		size_t newSize = address - area->Base();
648 
649 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
650 			allocationFlags);
651 		if (error != B_OK)
652 			return error;
653 
654 		// unmap pages
655 		unmap_pages(area, address, oldSize - newSize);
656 
657 		// If no one else uses the area's cache, we can resize it, too.
658 		if (cache->areas == area && area->cache_next == NULL
659 			&& cache->consumers.IsEmpty()
660 			&& cache->type == CACHE_TYPE_RAM) {
661 			// Since VMCache::Resize() can temporarily drop the lock, we must
662 			// unlock all lower caches to prevent locking order inversion.
663 			cacheChainLocker.Unlock(cache);
664 			cache->Resize(cache->virtual_base + newSize, priority);
665 			cache->ReleaseRefAndUnlock();
666 		}
667 
668 		return B_OK;
669 	}
670 
671 	// Cut the beginning only?
672 	if (area->Base() >= address) {
673 		addr_t oldBase = area->Base();
674 		addr_t newBase = lastAddress + 1;
675 		size_t newSize = areaLast - lastAddress;
676 
677 		// unmap pages
678 		unmap_pages(area, oldBase, newBase - oldBase);
679 
680 		// resize the area
681 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
682 			allocationFlags);
683 		if (error != B_OK)
684 			return error;
685 
686 		// TODO: If no one else uses the area's cache, we should resize it, too!
687 
688 		area->cache_offset += newBase - oldBase;
689 
690 		return B_OK;
691 	}
692 
693 	// The tough part -- cut a piece out of the middle of the area.
694 	// We do that by shrinking the area to the begin section and creating a
695 	// new area for the end section.
696 
697 	addr_t firstNewSize = address - area->Base();
698 	addr_t secondBase = lastAddress + 1;
699 	addr_t secondSize = areaLast - lastAddress;
700 
701 	// unmap pages
702 	unmap_pages(area, address, area->Size() - firstNewSize);
703 
704 	// resize the area
705 	addr_t oldSize = area->Size();
706 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
707 		allocationFlags);
708 	if (error != B_OK)
709 		return error;
710 
711 	// TODO: If no one else uses the area's cache, we might want to create a
712 	// new cache for the second area, transfer the concerned pages from the
713 	// first cache to it and resize the first cache.
714 
715 	// map the second area
716 	virtual_address_restrictions addressRestrictions = {};
717 	addressRestrictions.address = (void*)secondBase;
718 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
719 	VMArea* secondArea;
720 	error = map_backing_store(addressSpace, cache,
721 		area->cache_offset + (secondBase - area->Base()), area->name,
722 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
723 		&addressRestrictions, kernel, &secondArea, NULL);
724 	if (error != B_OK) {
725 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
726 		return error;
727 	}
728 
729 	// We need a cache reference for the new area.
730 	cache->AcquireRefLocked();
731 
732 	if (_secondArea != NULL)
733 		*_secondArea = secondArea;
734 
735 	return B_OK;
736 }
737 
738 
739 /*!	Deletes all areas in the given address range.
740 	The address space must be write-locked.
741 	The caller must ensure that no part of the given range is wired.
742 */
743 static status_t
744 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
745 	bool kernel)
746 {
747 	size = PAGE_ALIGN(size);
748 	addr_t lastAddress = address + (size - 1);
749 
750 	// Check, whether the caller is allowed to modify the concerned areas.
751 	if (!kernel) {
752 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
753 				VMArea* area = it.Next();) {
754 			addr_t areaLast = area->Base() + (area->Size() - 1);
755 			if (area->Base() < lastAddress && address < areaLast) {
756 				if ((area->protection & B_KERNEL_AREA) != 0)
757 					return B_NOT_ALLOWED;
758 			}
759 		}
760 	}
761 
762 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
763 			VMArea* area = it.Next();) {
764 		addr_t areaLast = area->Base() + (area->Size() - 1);
765 		if (area->Base() < lastAddress && address < areaLast) {
766 			status_t error = cut_area(addressSpace, area, address,
767 				lastAddress, NULL, kernel);
768 			if (error != B_OK)
769 				return error;
770 				// Failing after already messing with areas is ugly, but we
771 				// can't do anything about it.
772 		}
773 	}
774 
775 	return B_OK;
776 }
777 
778 
779 /*! You need to hold the lock of the cache and the write lock of the address
780 	space when calling this function.
781 	Note, that in case of error your cache will be temporarily unlocked.
782 	If \a addressSpec is \c B_EXACT_ADDRESS and the
783 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
784 	that no part of the specified address range (base \c *_virtualAddress, size
785 	\a size) is wired.
786 */
787 static status_t
788 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
789 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
790 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
791 	bool kernel, VMArea** _area, void** _virtualAddress)
792 {
793 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
794 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
795 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
796 		addressRestrictions->address, offset, size,
797 		addressRestrictions->address_specification, wiring, protection,
798 		_area, areaName));
799 	cache->AssertLocked();
800 
801 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
802 		| HEAP_DONT_LOCK_KERNEL_SPACE;
803 	int priority;
804 	if (addressSpace != VMAddressSpace::Kernel()) {
805 		priority = VM_PRIORITY_USER;
806 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
807 		priority = VM_PRIORITY_VIP;
808 		allocationFlags |= HEAP_PRIORITY_VIP;
809 	} else
810 		priority = VM_PRIORITY_SYSTEM;
811 
812 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
813 		allocationFlags);
814 	if (area == NULL)
815 		return B_NO_MEMORY;
816 
817 	status_t status;
818 
819 	// if this is a private map, we need to create a new cache
820 	// to handle the private copies of pages as they are written to
821 	VMCache* sourceCache = cache;
822 	if (mapping == REGION_PRIVATE_MAP) {
823 		VMCache* newCache;
824 
825 		// create an anonymous cache
826 		status = VMCacheFactory::CreateAnonymousCache(newCache,
827 			(protection & B_STACK_AREA) != 0
828 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
829 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
830 		if (status != B_OK)
831 			goto err1;
832 
833 		newCache->Lock();
834 		newCache->temporary = 1;
835 		newCache->virtual_base = offset;
836 		newCache->virtual_end = offset + size;
837 
838 		cache->AddConsumer(newCache);
839 
840 		cache = newCache;
841 	}
842 
843 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
844 		status = cache->SetMinimalCommitment(size, priority);
845 		if (status != B_OK)
846 			goto err2;
847 	}
848 
849 	// check to see if this address space has entered DELETE state
850 	if (addressSpace->IsBeingDeleted()) {
851 		// okay, someone is trying to delete this address space now, so we can't
852 		// insert the area, so back out
853 		status = B_BAD_TEAM_ID;
854 		goto err2;
855 	}
856 
857 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
858 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
859 		status = unmap_address_range(addressSpace,
860 			(addr_t)addressRestrictions->address, size, kernel);
861 		if (status != B_OK)
862 			goto err2;
863 	}
864 
865 	status = addressSpace->InsertArea(area, size, addressRestrictions,
866 		allocationFlags, _virtualAddress);
867 	if (status != B_OK) {
868 		// TODO: wait and try again once this is working in the backend
869 #if 0
870 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
871 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
872 				0, 0);
873 		}
874 #endif
875 		goto err2;
876 	}
877 
878 	// attach the cache to the area
879 	area->cache = cache;
880 	area->cache_offset = offset;
881 
882 	// point the cache back to the area
883 	cache->InsertAreaLocked(area);
884 	if (mapping == REGION_PRIVATE_MAP)
885 		cache->Unlock();
886 
887 	// insert the area in the global area hash table
888 	VMAreaHash::Insert(area);
889 
890 	// grab a ref to the address space (the area holds this)
891 	addressSpace->Get();
892 
893 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
894 //		cache, sourceCache, areaName, area);
895 
896 	*_area = area;
897 	return B_OK;
898 
899 err2:
900 	if (mapping == REGION_PRIVATE_MAP) {
901 		// We created this cache, so we must delete it again. Note, that we
902 		// need to temporarily unlock the source cache or we'll otherwise
903 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
904 		sourceCache->Unlock();
905 		cache->ReleaseRefAndUnlock();
906 		sourceCache->Lock();
907 	}
908 err1:
909 	addressSpace->DeleteArea(area, allocationFlags);
910 	return status;
911 }
912 
913 
914 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
915 	  locker1, locker2).
916 */
917 template<typename LockerType1, typename LockerType2>
918 static inline bool
919 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
920 {
921 	area->cache->AssertLocked();
922 
923 	VMAreaUnwiredWaiter waiter;
924 	if (!area->AddWaiterIfWired(&waiter))
925 		return false;
926 
927 	// unlock everything and wait
928 	if (locker1 != NULL)
929 		locker1->Unlock();
930 	if (locker2 != NULL)
931 		locker2->Unlock();
932 
933 	waiter.waitEntry.Wait();
934 
935 	return true;
936 }
937 
938 
939 /*!	Checks whether the given area has any wired ranges intersecting with the
940 	specified range and waits, if so.
941 
942 	When it has to wait, the function calls \c Unlock() on both \a locker1
943 	and \a locker2, if given.
944 	The area's top cache must be locked and must be unlocked as a side effect
945 	of calling \c Unlock() on either \a locker1 or \a locker2.
946 
947 	If the function does not have to wait it does not modify or unlock any
948 	object.
949 
950 	\param area The area to be checked.
951 	\param base The base address of the range to check.
952 	\param size The size of the address range to check.
953 	\param locker1 An object to be unlocked when before starting to wait (may
954 		be \c NULL).
955 	\param locker2 An object to be unlocked when before starting to wait (may
956 		be \c NULL).
957 	\return \c true, if the function had to wait, \c false otherwise.
958 */
959 template<typename LockerType1, typename LockerType2>
960 static inline bool
961 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
962 	LockerType1* locker1, LockerType2* locker2)
963 {
964 	area->cache->AssertLocked();
965 
966 	VMAreaUnwiredWaiter waiter;
967 	if (!area->AddWaiterIfWired(&waiter, base, size))
968 		return false;
969 
970 	// unlock everything and wait
971 	if (locker1 != NULL)
972 		locker1->Unlock();
973 	if (locker2 != NULL)
974 		locker2->Unlock();
975 
976 	waiter.waitEntry.Wait();
977 
978 	return true;
979 }
980 
981 
982 /*!	Checks whether the given address space has any wired ranges intersecting
983 	with the specified range and waits, if so.
984 
985 	Similar to wait_if_area_range_is_wired(), with the following differences:
986 	- All areas intersecting with the range are checked (respectively all until
987 	  one is found that contains a wired range intersecting with the given
988 	  range).
989 	- The given address space must at least be read-locked and must be unlocked
990 	  when \c Unlock() is called on \a locker.
991 	- None of the areas' caches are allowed to be locked.
992 */
993 template<typename LockerType>
994 static inline bool
995 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
996 	size_t size, LockerType* locker)
997 {
998 	addr_t end = base + size - 1;
999 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1000 			VMArea* area = it.Next();) {
1001 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1002 		if (area->Base() > end)
1003 			return false;
1004 
1005 		if (base >= area->Base() + area->Size() - 1)
1006 			continue;
1007 
1008 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1009 
1010 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1011 			return true;
1012 	}
1013 
1014 	return false;
1015 }
1016 
1017 
1018 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1019 	It must be called in a situation where the kernel address space may be
1020 	locked.
1021 */
1022 status_t
1023 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1024 {
1025 	AddressSpaceReadLocker locker;
1026 	VMArea* area;
1027 	status_t status = locker.SetFromArea(id, area);
1028 	if (status != B_OK)
1029 		return status;
1030 
1031 	if (area->page_protections == NULL) {
1032 		status = allocate_area_page_protections(area);
1033 		if (status != B_OK)
1034 			return status;
1035 	}
1036 
1037 	*cookie = (void*)area;
1038 	return B_OK;
1039 }
1040 
1041 
1042 /*!	This is a debug helper function that can only be used with very specific
1043 	use cases.
1044 	Sets protection for the given address range to the protection specified.
1045 	If \a protection is 0 then the involved pages will be marked non-present
1046 	in the translation map to cause a fault on access. The pages aren't
1047 	actually unmapped however so that they can be marked present again with
1048 	additional calls to this function. For this to work the area must be
1049 	fully locked in memory so that the pages aren't otherwise touched.
1050 	This function does not lock the kernel address space and needs to be
1051 	supplied with a \a cookie retrieved from a successful call to
1052 	vm_prepare_kernel_area_debug_protection().
1053 */
1054 status_t
1055 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1056 	uint32 protection)
1057 {
1058 	// check address range
1059 	addr_t address = (addr_t)_address;
1060 	size = PAGE_ALIGN(size);
1061 
1062 	if ((address % B_PAGE_SIZE) != 0
1063 		|| (addr_t)address + size < (addr_t)address
1064 		|| !IS_KERNEL_ADDRESS(address)
1065 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1066 		return B_BAD_VALUE;
1067 	}
1068 
1069 	// Translate the kernel protection to user protection as we only store that.
1070 	if ((protection & B_KERNEL_READ_AREA) != 0)
1071 		protection |= B_READ_AREA;
1072 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1073 		protection |= B_WRITE_AREA;
1074 
1075 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1076 	VMTranslationMap* map = addressSpace->TranslationMap();
1077 	VMArea* area = (VMArea*)cookie;
1078 
1079 	addr_t offset = address - area->Base();
1080 	if (area->Size() - offset < size) {
1081 		panic("protect range not fully within supplied area");
1082 		return B_BAD_VALUE;
1083 	}
1084 
1085 	if (area->page_protections == NULL) {
1086 		panic("area has no page protections");
1087 		return B_BAD_VALUE;
1088 	}
1089 
1090 	// Invalidate the mapping entries so any access to them will fault or
1091 	// restore the mapping entries unchanged so that lookup will success again.
1092 	map->Lock();
1093 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1094 	map->Unlock();
1095 
1096 	// And set the proper page protections so that the fault case will actually
1097 	// fail and not simply try to map a new page.
1098 	for (addr_t pageAddress = address; pageAddress < address + size;
1099 			pageAddress += B_PAGE_SIZE) {
1100 		set_area_page_protection(area, pageAddress, protection);
1101 	}
1102 
1103 	return B_OK;
1104 }
1105 
1106 
1107 status_t
1108 vm_block_address_range(const char* name, void* address, addr_t size)
1109 {
1110 	if (!arch_vm_supports_protection(0))
1111 		return B_NOT_SUPPORTED;
1112 
1113 	AddressSpaceWriteLocker locker;
1114 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1115 	if (status != B_OK)
1116 		return status;
1117 
1118 	VMAddressSpace* addressSpace = locker.AddressSpace();
1119 
1120 	// create an anonymous cache
1121 	VMCache* cache;
1122 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1123 		VM_PRIORITY_SYSTEM);
1124 	if (status != B_OK)
1125 		return status;
1126 
1127 	cache->temporary = 1;
1128 	cache->virtual_end = size;
1129 	cache->Lock();
1130 
1131 	VMArea* area;
1132 	virtual_address_restrictions addressRestrictions = {};
1133 	addressRestrictions.address = address;
1134 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1135 	status = map_backing_store(addressSpace, cache, 0, name, size,
1136 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1137 		true, &area, NULL);
1138 	if (status != B_OK) {
1139 		cache->ReleaseRefAndUnlock();
1140 		return status;
1141 	}
1142 
1143 	cache->Unlock();
1144 	area->cache_type = CACHE_TYPE_RAM;
1145 	return area->id;
1146 }
1147 
1148 
1149 status_t
1150 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1151 {
1152 	AddressSpaceWriteLocker locker(team);
1153 	if (!locker.IsLocked())
1154 		return B_BAD_TEAM_ID;
1155 
1156 	VMAddressSpace* addressSpace = locker.AddressSpace();
1157 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1158 		addressSpace == VMAddressSpace::Kernel()
1159 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1160 }
1161 
1162 
1163 status_t
1164 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1165 	addr_t size, uint32 flags)
1166 {
1167 	if (size == 0)
1168 		return B_BAD_VALUE;
1169 
1170 	AddressSpaceWriteLocker locker(team);
1171 	if (!locker.IsLocked())
1172 		return B_BAD_TEAM_ID;
1173 
1174 	virtual_address_restrictions addressRestrictions = {};
1175 	addressRestrictions.address = *_address;
1176 	addressRestrictions.address_specification = addressSpec;
1177 	VMAddressSpace* addressSpace = locker.AddressSpace();
1178 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1179 		addressSpace == VMAddressSpace::Kernel()
1180 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1181 		_address);
1182 }
1183 
1184 
1185 area_id
1186 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1187 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1188 	const virtual_address_restrictions* virtualAddressRestrictions,
1189 	const physical_address_restrictions* physicalAddressRestrictions,
1190 	bool kernel, void** _address)
1191 {
1192 	VMArea* area;
1193 	VMCache* cache;
1194 	vm_page* page = NULL;
1195 	bool isStack = (protection & B_STACK_AREA) != 0;
1196 	page_num_t guardPages;
1197 	bool canOvercommit = false;
1198 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1199 		? VM_PAGE_ALLOC_CLEAR : 0;
1200 
1201 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1202 		team, name, size));
1203 
1204 	size = PAGE_ALIGN(size);
1205 	guardSize = PAGE_ALIGN(guardSize);
1206 	guardPages = guardSize / B_PAGE_SIZE;
1207 
1208 	if (size == 0 || size < guardSize)
1209 		return B_BAD_VALUE;
1210 	if (!arch_vm_supports_protection(protection))
1211 		return B_NOT_SUPPORTED;
1212 
1213 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1214 		canOvercommit = true;
1215 
1216 #ifdef DEBUG_KERNEL_STACKS
1217 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1218 		isStack = true;
1219 #endif
1220 
1221 	// check parameters
1222 	switch (virtualAddressRestrictions->address_specification) {
1223 		case B_ANY_ADDRESS:
1224 		case B_EXACT_ADDRESS:
1225 		case B_BASE_ADDRESS:
1226 		case B_ANY_KERNEL_ADDRESS:
1227 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1228 		case B_RANDOMIZED_ANY_ADDRESS:
1229 		case B_RANDOMIZED_BASE_ADDRESS:
1230 			break;
1231 
1232 		default:
1233 			return B_BAD_VALUE;
1234 	}
1235 
1236 	// If low or high physical address restrictions are given, we force
1237 	// B_CONTIGUOUS wiring, since only then we'll use
1238 	// vm_page_allocate_page_run() which deals with those restrictions.
1239 	if (physicalAddressRestrictions->low_address != 0
1240 		|| physicalAddressRestrictions->high_address != 0) {
1241 		wiring = B_CONTIGUOUS;
1242 	}
1243 
1244 	physical_address_restrictions stackPhysicalRestrictions;
1245 	bool doReserveMemory = false;
1246 	switch (wiring) {
1247 		case B_NO_LOCK:
1248 			break;
1249 		case B_FULL_LOCK:
1250 		case B_LAZY_LOCK:
1251 		case B_CONTIGUOUS:
1252 			doReserveMemory = true;
1253 			break;
1254 		case B_ALREADY_WIRED:
1255 			break;
1256 		case B_LOMEM:
1257 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1258 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1259 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1260 			wiring = B_CONTIGUOUS;
1261 			doReserveMemory = true;
1262 			break;
1263 		case B_32_BIT_FULL_LOCK:
1264 			if (B_HAIKU_PHYSICAL_BITS <= 32
1265 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1266 				wiring = B_FULL_LOCK;
1267 				doReserveMemory = true;
1268 				break;
1269 			}
1270 			// TODO: We don't really support this mode efficiently. Just fall
1271 			// through for now ...
1272 		case B_32_BIT_CONTIGUOUS:
1273 			#if B_HAIKU_PHYSICAL_BITS > 32
1274 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1275 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1276 					stackPhysicalRestrictions.high_address
1277 						= (phys_addr_t)1 << 32;
1278 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1279 				}
1280 			#endif
1281 			wiring = B_CONTIGUOUS;
1282 			doReserveMemory = true;
1283 			break;
1284 		default:
1285 			return B_BAD_VALUE;
1286 	}
1287 
1288 	// Optimization: For a single-page contiguous allocation without low/high
1289 	// memory restriction B_FULL_LOCK wiring suffices.
1290 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1291 		&& physicalAddressRestrictions->low_address == 0
1292 		&& physicalAddressRestrictions->high_address == 0) {
1293 		wiring = B_FULL_LOCK;
1294 	}
1295 
1296 	// For full lock or contiguous areas we're also going to map the pages and
1297 	// thus need to reserve pages for the mapping backend upfront.
1298 	addr_t reservedMapPages = 0;
1299 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1300 		AddressSpaceWriteLocker locker;
1301 		status_t status = locker.SetTo(team);
1302 		if (status != B_OK)
1303 			return status;
1304 
1305 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1306 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1307 	}
1308 
1309 	int priority;
1310 	if (team != VMAddressSpace::KernelID())
1311 		priority = VM_PRIORITY_USER;
1312 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1313 		priority = VM_PRIORITY_VIP;
1314 	else
1315 		priority = VM_PRIORITY_SYSTEM;
1316 
1317 	// Reserve memory before acquiring the address space lock. This reduces the
1318 	// chances of failure, since while holding the write lock to the address
1319 	// space (if it is the kernel address space that is), the low memory handler
1320 	// won't be able to free anything for us.
1321 	addr_t reservedMemory = 0;
1322 	if (doReserveMemory) {
1323 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1324 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1325 			return B_NO_MEMORY;
1326 		reservedMemory = size;
1327 		// TODO: We don't reserve the memory for the pages for the page
1328 		// directories/tables. We actually need to do since we currently don't
1329 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1330 		// there are actually less physical pages than there should be, which
1331 		// can get the VM into trouble in low memory situations.
1332 	}
1333 
1334 	AddressSpaceWriteLocker locker;
1335 	VMAddressSpace* addressSpace;
1336 	status_t status;
1337 
1338 	// For full lock areas reserve the pages before locking the address
1339 	// space. E.g. block caches can't release their memory while we hold the
1340 	// address space lock.
1341 	page_num_t reservedPages = reservedMapPages;
1342 	if (wiring == B_FULL_LOCK)
1343 		reservedPages += size / B_PAGE_SIZE;
1344 
1345 	vm_page_reservation reservation;
1346 	if (reservedPages > 0) {
1347 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1348 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1349 					priority)) {
1350 				reservedPages = 0;
1351 				status = B_WOULD_BLOCK;
1352 				goto err0;
1353 			}
1354 		} else
1355 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1356 	}
1357 
1358 	if (wiring == B_CONTIGUOUS) {
1359 		// we try to allocate the page run here upfront as this may easily
1360 		// fail for obvious reasons
1361 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1362 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1363 		if (page == NULL) {
1364 			status = B_NO_MEMORY;
1365 			goto err0;
1366 		}
1367 	}
1368 
1369 	// Lock the address space and, if B_EXACT_ADDRESS and
1370 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1371 	// is not wired.
1372 	do {
1373 		status = locker.SetTo(team);
1374 		if (status != B_OK)
1375 			goto err1;
1376 
1377 		addressSpace = locker.AddressSpace();
1378 	} while (virtualAddressRestrictions->address_specification
1379 			== B_EXACT_ADDRESS
1380 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1381 		&& wait_if_address_range_is_wired(addressSpace,
1382 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1383 
1384 	// create an anonymous cache
1385 	// if it's a stack, make sure that two pages are available at least
1386 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1387 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1388 		wiring == B_NO_LOCK, priority);
1389 	if (status != B_OK)
1390 		goto err1;
1391 
1392 	cache->temporary = 1;
1393 	cache->virtual_end = size;
1394 	cache->committed_size = reservedMemory;
1395 		// TODO: This should be done via a method.
1396 	reservedMemory = 0;
1397 
1398 	cache->Lock();
1399 
1400 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1401 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1402 		kernel, &area, _address);
1403 
1404 	if (status != B_OK) {
1405 		cache->ReleaseRefAndUnlock();
1406 		goto err1;
1407 	}
1408 
1409 	locker.DegradeToReadLock();
1410 
1411 	switch (wiring) {
1412 		case B_NO_LOCK:
1413 		case B_LAZY_LOCK:
1414 			// do nothing - the pages are mapped in as needed
1415 			break;
1416 
1417 		case B_FULL_LOCK:
1418 		{
1419 			// Allocate and map all pages for this area
1420 
1421 			off_t offset = 0;
1422 			for (addr_t address = area->Base();
1423 					address < area->Base() + (area->Size() - 1);
1424 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1425 #ifdef DEBUG_KERNEL_STACKS
1426 #	ifdef STACK_GROWS_DOWNWARDS
1427 				if (isStack && address < area->Base()
1428 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1429 #	else
1430 				if (isStack && address >= area->Base() + area->Size()
1431 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1432 #	endif
1433 					continue;
1434 #endif
1435 				vm_page* page = vm_page_allocate_page(&reservation,
1436 					PAGE_STATE_WIRED | pageAllocFlags);
1437 				cache->InsertPage(page, offset);
1438 				map_page(area, page, address, protection, &reservation);
1439 
1440 				DEBUG_PAGE_ACCESS_END(page);
1441 			}
1442 
1443 			break;
1444 		}
1445 
1446 		case B_ALREADY_WIRED:
1447 		{
1448 			// The pages should already be mapped. This is only really useful
1449 			// during boot time. Find the appropriate vm_page objects and stick
1450 			// them in the cache object.
1451 			VMTranslationMap* map = addressSpace->TranslationMap();
1452 			off_t offset = 0;
1453 
1454 			if (!gKernelStartup)
1455 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1456 
1457 			map->Lock();
1458 
1459 			for (addr_t virtualAddress = area->Base();
1460 					virtualAddress < area->Base() + (area->Size() - 1);
1461 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1462 				phys_addr_t physicalAddress;
1463 				uint32 flags;
1464 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1465 				if (status < B_OK) {
1466 					panic("looking up mapping failed for va 0x%lx\n",
1467 						virtualAddress);
1468 				}
1469 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1470 				if (page == NULL) {
1471 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1472 						"\n", physicalAddress);
1473 				}
1474 
1475 				DEBUG_PAGE_ACCESS_START(page);
1476 
1477 				cache->InsertPage(page, offset);
1478 				increment_page_wired_count(page);
1479 				vm_page_set_state(page, PAGE_STATE_WIRED);
1480 				page->busy = false;
1481 
1482 				DEBUG_PAGE_ACCESS_END(page);
1483 			}
1484 
1485 			map->Unlock();
1486 			break;
1487 		}
1488 
1489 		case B_CONTIGUOUS:
1490 		{
1491 			// We have already allocated our continuous pages run, so we can now
1492 			// just map them in the address space
1493 			VMTranslationMap* map = addressSpace->TranslationMap();
1494 			phys_addr_t physicalAddress
1495 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1496 			addr_t virtualAddress = area->Base();
1497 			off_t offset = 0;
1498 
1499 			map->Lock();
1500 
1501 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1502 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1503 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1504 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1505 				if (page == NULL)
1506 					panic("couldn't lookup physical page just allocated\n");
1507 
1508 				status = map->Map(virtualAddress, physicalAddress, protection,
1509 					area->MemoryType(), &reservation);
1510 				if (status < B_OK)
1511 					panic("couldn't map physical page in page run\n");
1512 
1513 				cache->InsertPage(page, offset);
1514 				increment_page_wired_count(page);
1515 
1516 				DEBUG_PAGE_ACCESS_END(page);
1517 			}
1518 
1519 			map->Unlock();
1520 			break;
1521 		}
1522 
1523 		default:
1524 			break;
1525 	}
1526 
1527 	cache->Unlock();
1528 
1529 	if (reservedPages > 0)
1530 		vm_page_unreserve_pages(&reservation);
1531 
1532 	TRACE(("vm_create_anonymous_area: done\n"));
1533 
1534 	area->cache_type = CACHE_TYPE_RAM;
1535 	return area->id;
1536 
1537 err1:
1538 	if (wiring == B_CONTIGUOUS) {
1539 		// we had reserved the area space upfront...
1540 		phys_addr_t pageNumber = page->physical_page_number;
1541 		int32 i;
1542 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1543 			page = vm_lookup_page(pageNumber);
1544 			if (page == NULL)
1545 				panic("couldn't lookup physical page just allocated\n");
1546 
1547 			vm_page_set_state(page, PAGE_STATE_FREE);
1548 		}
1549 	}
1550 
1551 err0:
1552 	if (reservedPages > 0)
1553 		vm_page_unreserve_pages(&reservation);
1554 	if (reservedMemory > 0)
1555 		vm_unreserve_memory(reservedMemory);
1556 
1557 	return status;
1558 }
1559 
1560 
1561 area_id
1562 vm_map_physical_memory(team_id team, const char* name, void** _address,
1563 	uint32 addressSpec, addr_t size, uint32 protection,
1564 	phys_addr_t physicalAddress, bool alreadyWired)
1565 {
1566 	VMArea* area;
1567 	VMCache* cache;
1568 	addr_t mapOffset;
1569 
1570 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1571 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1572 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1573 		addressSpec, size, protection, physicalAddress));
1574 
1575 	if (!arch_vm_supports_protection(protection))
1576 		return B_NOT_SUPPORTED;
1577 
1578 	AddressSpaceWriteLocker locker(team);
1579 	if (!locker.IsLocked())
1580 		return B_BAD_TEAM_ID;
1581 
1582 	// if the physical address is somewhat inside a page,
1583 	// move the actual area down to align on a page boundary
1584 	mapOffset = physicalAddress % B_PAGE_SIZE;
1585 	size += mapOffset;
1586 	physicalAddress -= mapOffset;
1587 
1588 	size = PAGE_ALIGN(size);
1589 
1590 	// create a device cache
1591 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1592 	if (status != B_OK)
1593 		return status;
1594 
1595 	cache->virtual_end = size;
1596 
1597 	cache->Lock();
1598 
1599 	virtual_address_restrictions addressRestrictions = {};
1600 	addressRestrictions.address = *_address;
1601 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1602 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1603 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1604 		true, &area, _address);
1605 
1606 	if (status < B_OK)
1607 		cache->ReleaseRefLocked();
1608 
1609 	cache->Unlock();
1610 
1611 	if (status == B_OK) {
1612 		// set requested memory type -- use uncached, if not given
1613 		uint32 memoryType = addressSpec & B_MTR_MASK;
1614 		if (memoryType == 0)
1615 			memoryType = B_MTR_UC;
1616 
1617 		area->SetMemoryType(memoryType);
1618 
1619 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1620 		if (status != B_OK)
1621 			delete_area(locker.AddressSpace(), area, false);
1622 	}
1623 
1624 	if (status != B_OK)
1625 		return status;
1626 
1627 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1628 
1629 	if (alreadyWired) {
1630 		// The area is already mapped, but possibly not with the right
1631 		// memory type.
1632 		map->Lock();
1633 		map->ProtectArea(area, area->protection);
1634 		map->Unlock();
1635 	} else {
1636 		// Map the area completely.
1637 
1638 		// reserve pages needed for the mapping
1639 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1640 			area->Base() + (size - 1));
1641 		vm_page_reservation reservation;
1642 		vm_page_reserve_pages(&reservation, reservePages,
1643 			team == VMAddressSpace::KernelID()
1644 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1645 
1646 		map->Lock();
1647 
1648 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1649 			map->Map(area->Base() + offset, physicalAddress + offset,
1650 				protection, area->MemoryType(), &reservation);
1651 		}
1652 
1653 		map->Unlock();
1654 
1655 		vm_page_unreserve_pages(&reservation);
1656 	}
1657 
1658 	// modify the pointer returned to be offset back into the new area
1659 	// the same way the physical address in was offset
1660 	*_address = (void*)((addr_t)*_address + mapOffset);
1661 
1662 	area->cache_type = CACHE_TYPE_DEVICE;
1663 	return area->id;
1664 }
1665 
1666 
1667 /*!	Don't use!
1668 	TODO: This function was introduced to map physical page vecs to
1669 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1670 	use a device cache and does not track vm_page::wired_count!
1671 */
1672 area_id
1673 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1674 	uint32 addressSpec, addr_t* _size, uint32 protection,
1675 	struct generic_io_vec* vecs, uint32 vecCount)
1676 {
1677 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1678 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1679 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1680 		addressSpec, _size, protection, vecs, vecCount));
1681 
1682 	if (!arch_vm_supports_protection(protection)
1683 		|| (addressSpec & B_MTR_MASK) != 0) {
1684 		return B_NOT_SUPPORTED;
1685 	}
1686 
1687 	AddressSpaceWriteLocker locker(team);
1688 	if (!locker.IsLocked())
1689 		return B_BAD_TEAM_ID;
1690 
1691 	if (vecCount == 0)
1692 		return B_BAD_VALUE;
1693 
1694 	addr_t size = 0;
1695 	for (uint32 i = 0; i < vecCount; i++) {
1696 		if (vecs[i].base % B_PAGE_SIZE != 0
1697 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1698 			return B_BAD_VALUE;
1699 		}
1700 
1701 		size += vecs[i].length;
1702 	}
1703 
1704 	// create a device cache
1705 	VMCache* cache;
1706 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1707 	if (result != B_OK)
1708 		return result;
1709 
1710 	cache->virtual_end = size;
1711 
1712 	cache->Lock();
1713 
1714 	VMArea* area;
1715 	virtual_address_restrictions addressRestrictions = {};
1716 	addressRestrictions.address = *_address;
1717 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1718 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1719 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1720 		&addressRestrictions, true, &area, _address);
1721 
1722 	if (result != B_OK)
1723 		cache->ReleaseRefLocked();
1724 
1725 	cache->Unlock();
1726 
1727 	if (result != B_OK)
1728 		return result;
1729 
1730 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1731 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1732 		area->Base() + (size - 1));
1733 
1734 	vm_page_reservation reservation;
1735 	vm_page_reserve_pages(&reservation, reservePages,
1736 			team == VMAddressSpace::KernelID()
1737 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1738 	map->Lock();
1739 
1740 	uint32 vecIndex = 0;
1741 	size_t vecOffset = 0;
1742 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1743 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1744 			vecOffset = 0;
1745 			vecIndex++;
1746 		}
1747 
1748 		if (vecIndex >= vecCount)
1749 			break;
1750 
1751 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1752 			protection, area->MemoryType(), &reservation);
1753 
1754 		vecOffset += B_PAGE_SIZE;
1755 	}
1756 
1757 	map->Unlock();
1758 	vm_page_unreserve_pages(&reservation);
1759 
1760 	if (_size != NULL)
1761 		*_size = size;
1762 
1763 	area->cache_type = CACHE_TYPE_DEVICE;
1764 	return area->id;
1765 }
1766 
1767 
1768 area_id
1769 vm_create_null_area(team_id team, const char* name, void** address,
1770 	uint32 addressSpec, addr_t size, uint32 flags)
1771 {
1772 	size = PAGE_ALIGN(size);
1773 
1774 	// Lock the address space and, if B_EXACT_ADDRESS and
1775 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1776 	// is not wired.
1777 	AddressSpaceWriteLocker locker;
1778 	do {
1779 		if (locker.SetTo(team) != B_OK)
1780 			return B_BAD_TEAM_ID;
1781 	} while (addressSpec == B_EXACT_ADDRESS
1782 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1783 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1784 			(addr_t)*address, size, &locker));
1785 
1786 	// create a null cache
1787 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1788 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1789 	VMCache* cache;
1790 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1791 	if (status != B_OK)
1792 		return status;
1793 
1794 	cache->temporary = 1;
1795 	cache->virtual_end = size;
1796 
1797 	cache->Lock();
1798 
1799 	VMArea* area;
1800 	virtual_address_restrictions addressRestrictions = {};
1801 	addressRestrictions.address = *address;
1802 	addressRestrictions.address_specification = addressSpec;
1803 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1804 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1805 		&addressRestrictions, true, &area, address);
1806 
1807 	if (status < B_OK) {
1808 		cache->ReleaseRefAndUnlock();
1809 		return status;
1810 	}
1811 
1812 	cache->Unlock();
1813 
1814 	area->cache_type = CACHE_TYPE_NULL;
1815 	return area->id;
1816 }
1817 
1818 
1819 /*!	Creates the vnode cache for the specified \a vnode.
1820 	The vnode has to be marked busy when calling this function.
1821 */
1822 status_t
1823 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1824 {
1825 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1826 }
1827 
1828 
1829 /*!	\a cache must be locked. The area's address space must be read-locked.
1830 */
1831 static void
1832 pre_map_area_pages(VMArea* area, VMCache* cache,
1833 	vm_page_reservation* reservation)
1834 {
1835 	addr_t baseAddress = area->Base();
1836 	addr_t cacheOffset = area->cache_offset;
1837 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1838 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1839 
1840 	for (VMCachePagesTree::Iterator it
1841 				= cache->pages.GetIterator(firstPage, true, true);
1842 			vm_page* page = it.Next();) {
1843 		if (page->cache_offset >= endPage)
1844 			break;
1845 
1846 		// skip busy and inactive pages
1847 		if (page->busy || page->usage_count == 0)
1848 			continue;
1849 
1850 		DEBUG_PAGE_ACCESS_START(page);
1851 		map_page(area, page,
1852 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1853 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1854 		DEBUG_PAGE_ACCESS_END(page);
1855 	}
1856 }
1857 
1858 
1859 /*!	Will map the file specified by \a fd to an area in memory.
1860 	The file will be mirrored beginning at the specified \a offset. The
1861 	\a offset and \a size arguments have to be page aligned.
1862 */
1863 static area_id
1864 _vm_map_file(team_id team, const char* name, void** _address,
1865 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1866 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1867 {
1868 	// TODO: for binary files, we want to make sure that they get the
1869 	//	copy of a file at a given time, ie. later changes should not
1870 	//	make it into the mapped copy -- this will need quite some changes
1871 	//	to be done in a nice way
1872 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1873 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1874 
1875 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1876 	size = PAGE_ALIGN(size);
1877 
1878 	if (mapping == REGION_NO_PRIVATE_MAP)
1879 		protection |= B_SHARED_AREA;
1880 	if (addressSpec != B_EXACT_ADDRESS)
1881 		unmapAddressRange = false;
1882 
1883 	if (fd < 0) {
1884 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1885 		virtual_address_restrictions virtualRestrictions = {};
1886 		virtualRestrictions.address = *_address;
1887 		virtualRestrictions.address_specification = addressSpec;
1888 		physical_address_restrictions physicalRestrictions = {};
1889 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1890 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1891 			_address);
1892 	}
1893 
1894 	// get the open flags of the FD
1895 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1896 	if (descriptor == NULL)
1897 		return EBADF;
1898 	int32 openMode = descriptor->open_mode;
1899 	put_fd(descriptor);
1900 
1901 	// The FD must open for reading at any rate. For shared mapping with write
1902 	// access, additionally the FD must be open for writing.
1903 	if ((openMode & O_ACCMODE) == O_WRONLY
1904 		|| (mapping == REGION_NO_PRIVATE_MAP
1905 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1906 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1907 		return EACCES;
1908 	}
1909 
1910 	// get the vnode for the object, this also grabs a ref to it
1911 	struct vnode* vnode = NULL;
1912 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1913 	if (status < B_OK)
1914 		return status;
1915 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1916 
1917 	// If we're going to pre-map pages, we need to reserve the pages needed by
1918 	// the mapping backend upfront.
1919 	page_num_t reservedPreMapPages = 0;
1920 	vm_page_reservation reservation;
1921 	if ((protection & B_READ_AREA) != 0) {
1922 		AddressSpaceWriteLocker locker;
1923 		status = locker.SetTo(team);
1924 		if (status != B_OK)
1925 			return status;
1926 
1927 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1928 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1929 
1930 		locker.Unlock();
1931 
1932 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1933 			team == VMAddressSpace::KernelID()
1934 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1935 	}
1936 
1937 	struct PageUnreserver {
1938 		PageUnreserver(vm_page_reservation* reservation)
1939 			:
1940 			fReservation(reservation)
1941 		{
1942 		}
1943 
1944 		~PageUnreserver()
1945 		{
1946 			if (fReservation != NULL)
1947 				vm_page_unreserve_pages(fReservation);
1948 		}
1949 
1950 		vm_page_reservation* fReservation;
1951 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1952 
1953 	// Lock the address space and, if the specified address range shall be
1954 	// unmapped, ensure it is not wired.
1955 	AddressSpaceWriteLocker locker;
1956 	do {
1957 		if (locker.SetTo(team) != B_OK)
1958 			return B_BAD_TEAM_ID;
1959 	} while (unmapAddressRange
1960 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1961 			(addr_t)*_address, size, &locker));
1962 
1963 	// TODO: this only works for file systems that use the file cache
1964 	VMCache* cache;
1965 	status = vfs_get_vnode_cache(vnode, &cache, false);
1966 	if (status < B_OK)
1967 		return status;
1968 
1969 	cache->Lock();
1970 
1971 	VMArea* area;
1972 	virtual_address_restrictions addressRestrictions = {};
1973 	addressRestrictions.address = *_address;
1974 	addressRestrictions.address_specification = addressSpec;
1975 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1976 		0, protection, mapping,
1977 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1978 		&addressRestrictions, kernel, &area, _address);
1979 
1980 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1981 		// map_backing_store() cannot know we no longer need the ref
1982 		cache->ReleaseRefLocked();
1983 	}
1984 
1985 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1986 		pre_map_area_pages(area, cache, &reservation);
1987 
1988 	cache->Unlock();
1989 
1990 	if (status == B_OK) {
1991 		// TODO: this probably deserves a smarter solution, ie. don't always
1992 		// prefetch stuff, and also, probably don't trigger it at this place.
1993 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1994 			// prefetches at max 10 MB starting from "offset"
1995 	}
1996 
1997 	if (status != B_OK)
1998 		return status;
1999 
2000 	area->cache_type = CACHE_TYPE_VNODE;
2001 	return area->id;
2002 }
2003 
2004 
2005 area_id
2006 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2007 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2008 	int fd, off_t offset)
2009 {
2010 	if (!arch_vm_supports_protection(protection))
2011 		return B_NOT_SUPPORTED;
2012 
2013 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2014 		mapping, unmapAddressRange, fd, offset, true);
2015 }
2016 
2017 
2018 VMCache*
2019 vm_area_get_locked_cache(VMArea* area)
2020 {
2021 	rw_lock_read_lock(&sAreaCacheLock);
2022 
2023 	while (true) {
2024 		VMCache* cache = area->cache;
2025 
2026 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2027 			// cache has been deleted
2028 			rw_lock_read_lock(&sAreaCacheLock);
2029 			continue;
2030 		}
2031 
2032 		rw_lock_read_lock(&sAreaCacheLock);
2033 
2034 		if (cache == area->cache) {
2035 			cache->AcquireRefLocked();
2036 			rw_lock_read_unlock(&sAreaCacheLock);
2037 			return cache;
2038 		}
2039 
2040 		// the cache changed in the meantime
2041 		cache->Unlock();
2042 	}
2043 }
2044 
2045 
2046 void
2047 vm_area_put_locked_cache(VMCache* cache)
2048 {
2049 	cache->ReleaseRefAndUnlock();
2050 }
2051 
2052 
2053 area_id
2054 vm_clone_area(team_id team, const char* name, void** address,
2055 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2056 	bool kernel)
2057 {
2058 	VMArea* newArea = NULL;
2059 	VMArea* sourceArea;
2060 
2061 	// Check whether the source area exists and is cloneable. If so, mark it
2062 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2063 	{
2064 		AddressSpaceWriteLocker locker;
2065 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2066 		if (status != B_OK)
2067 			return status;
2068 
2069 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2070 			return B_NOT_ALLOWED;
2071 
2072 		sourceArea->protection |= B_SHARED_AREA;
2073 		protection |= B_SHARED_AREA;
2074 	}
2075 
2076 	// Now lock both address spaces and actually do the cloning.
2077 
2078 	MultiAddressSpaceLocker locker;
2079 	VMAddressSpace* sourceAddressSpace;
2080 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2081 	if (status != B_OK)
2082 		return status;
2083 
2084 	VMAddressSpace* targetAddressSpace;
2085 	status = locker.AddTeam(team, true, &targetAddressSpace);
2086 	if (status != B_OK)
2087 		return status;
2088 
2089 	status = locker.Lock();
2090 	if (status != B_OK)
2091 		return status;
2092 
2093 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2094 	if (sourceArea == NULL)
2095 		return B_BAD_VALUE;
2096 
2097 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2098 		return B_NOT_ALLOWED;
2099 
2100 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2101 
2102 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2103 	//	have been adapted. Maybe it should be part of the kernel settings,
2104 	//	anyway (so that old drivers can always work).
2105 #if 0
2106 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2107 		&& addressSpace != VMAddressSpace::Kernel()
2108 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2109 		// kernel areas must not be cloned in userland, unless explicitly
2110 		// declared user-cloneable upon construction
2111 		status = B_NOT_ALLOWED;
2112 	} else
2113 #endif
2114 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2115 		status = B_NOT_ALLOWED;
2116 	else {
2117 		virtual_address_restrictions addressRestrictions = {};
2118 		addressRestrictions.address = *address;
2119 		addressRestrictions.address_specification = addressSpec;
2120 		status = map_backing_store(targetAddressSpace, cache,
2121 			sourceArea->cache_offset, name, sourceArea->Size(),
2122 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2123 			kernel, &newArea, address);
2124 	}
2125 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2126 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2127 		// to create a new cache, and has therefore already acquired a reference
2128 		// to the source cache - but otherwise it has no idea that we need
2129 		// one.
2130 		cache->AcquireRefLocked();
2131 	}
2132 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2133 		// we need to map in everything at this point
2134 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2135 			// we don't have actual pages to map but a physical area
2136 			VMTranslationMap* map
2137 				= sourceArea->address_space->TranslationMap();
2138 			map->Lock();
2139 
2140 			phys_addr_t physicalAddress;
2141 			uint32 oldProtection;
2142 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2143 
2144 			map->Unlock();
2145 
2146 			map = targetAddressSpace->TranslationMap();
2147 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2148 				newArea->Base() + (newArea->Size() - 1));
2149 
2150 			vm_page_reservation reservation;
2151 			vm_page_reserve_pages(&reservation, reservePages,
2152 				targetAddressSpace == VMAddressSpace::Kernel()
2153 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2154 			map->Lock();
2155 
2156 			for (addr_t offset = 0; offset < newArea->Size();
2157 					offset += B_PAGE_SIZE) {
2158 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2159 					protection, newArea->MemoryType(), &reservation);
2160 			}
2161 
2162 			map->Unlock();
2163 			vm_page_unreserve_pages(&reservation);
2164 		} else {
2165 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2166 			size_t reservePages = map->MaxPagesNeededToMap(
2167 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2168 			vm_page_reservation reservation;
2169 			vm_page_reserve_pages(&reservation, reservePages,
2170 				targetAddressSpace == VMAddressSpace::Kernel()
2171 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2172 
2173 			// map in all pages from source
2174 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2175 					vm_page* page  = it.Next();) {
2176 				if (!page->busy) {
2177 					DEBUG_PAGE_ACCESS_START(page);
2178 					map_page(newArea, page,
2179 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2180 							- newArea->cache_offset),
2181 						protection, &reservation);
2182 					DEBUG_PAGE_ACCESS_END(page);
2183 				}
2184 			}
2185 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2186 			// ensuring that!
2187 
2188 			vm_page_unreserve_pages(&reservation);
2189 		}
2190 	}
2191 	if (status == B_OK)
2192 		newArea->cache_type = sourceArea->cache_type;
2193 
2194 	vm_area_put_locked_cache(cache);
2195 
2196 	if (status < B_OK)
2197 		return status;
2198 
2199 	return newArea->id;
2200 }
2201 
2202 
2203 /*!	Deletes the specified area of the given address space.
2204 
2205 	The address space must be write-locked.
2206 	The caller must ensure that the area does not have any wired ranges.
2207 
2208 	\param addressSpace The address space containing the area.
2209 	\param area The area to be deleted.
2210 	\param deletingAddressSpace \c true, if the address space is in the process
2211 		of being deleted.
2212 */
2213 static void
2214 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2215 	bool deletingAddressSpace)
2216 {
2217 	ASSERT(!area->IsWired());
2218 
2219 	VMAreaHash::Remove(area);
2220 
2221 	// At this point the area is removed from the global hash table, but
2222 	// still exists in the area list.
2223 
2224 	// Unmap the virtual address space the area occupied.
2225 	{
2226 		// We need to lock the complete cache chain.
2227 		VMCache* topCache = vm_area_get_locked_cache(area);
2228 		VMCacheChainLocker cacheChainLocker(topCache);
2229 		cacheChainLocker.LockAllSourceCaches();
2230 
2231 		// If the area's top cache is a temporary cache and the area is the only
2232 		// one referencing it (besides us currently holding a second reference),
2233 		// the unmapping code doesn't need to care about preserving the accessed
2234 		// and dirty flags of the top cache page mappings.
2235 		bool ignoreTopCachePageFlags
2236 			= topCache->temporary && topCache->RefCount() == 2;
2237 
2238 		area->address_space->TranslationMap()->UnmapArea(area,
2239 			deletingAddressSpace, ignoreTopCachePageFlags);
2240 	}
2241 
2242 	if (!area->cache->temporary)
2243 		area->cache->WriteModified();
2244 
2245 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2246 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2247 
2248 	arch_vm_unset_memory_type(area);
2249 	addressSpace->RemoveArea(area, allocationFlags);
2250 	addressSpace->Put();
2251 
2252 	area->cache->RemoveArea(area);
2253 	area->cache->ReleaseRef();
2254 
2255 	addressSpace->DeleteArea(area, allocationFlags);
2256 }
2257 
2258 
2259 status_t
2260 vm_delete_area(team_id team, area_id id, bool kernel)
2261 {
2262 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2263 		team, id));
2264 
2265 	// lock the address space and make sure the area isn't wired
2266 	AddressSpaceWriteLocker locker;
2267 	VMArea* area;
2268 	AreaCacheLocker cacheLocker;
2269 
2270 	do {
2271 		status_t status = locker.SetFromArea(team, id, area);
2272 		if (status != B_OK)
2273 			return status;
2274 
2275 		cacheLocker.SetTo(area);
2276 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2277 
2278 	cacheLocker.Unlock();
2279 
2280 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2281 		return B_NOT_ALLOWED;
2282 
2283 	delete_area(locker.AddressSpace(), area, false);
2284 	return B_OK;
2285 }
2286 
2287 
2288 /*!	Creates a new cache on top of given cache, moves all areas from
2289 	the old cache to the new one, and changes the protection of all affected
2290 	areas' pages to read-only. If requested, wired pages are moved up to the
2291 	new cache and copies are added to the old cache in their place.
2292 	Preconditions:
2293 	- The given cache must be locked.
2294 	- All of the cache's areas' address spaces must be read locked.
2295 	- Either the cache must not have any wired ranges or a page reservation for
2296 	  all wired pages must be provided, so they can be copied.
2297 
2298 	\param lowerCache The cache on top of which a new cache shall be created.
2299 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2300 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2301 		has wired page. The wired pages are copied in this case.
2302 */
2303 static status_t
2304 vm_copy_on_write_area(VMCache* lowerCache,
2305 	vm_page_reservation* wiredPagesReservation)
2306 {
2307 	VMCache* upperCache;
2308 
2309 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2310 
2311 	// We need to separate the cache from its areas. The cache goes one level
2312 	// deeper and we create a new cache inbetween.
2313 
2314 	// create an anonymous cache
2315 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2316 		lowerCache->GuardSize() / B_PAGE_SIZE,
2317 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2318 		VM_PRIORITY_USER);
2319 	if (status != B_OK)
2320 		return status;
2321 
2322 	upperCache->Lock();
2323 
2324 	upperCache->temporary = 1;
2325 	upperCache->virtual_base = lowerCache->virtual_base;
2326 	upperCache->virtual_end = lowerCache->virtual_end;
2327 
2328 	// transfer the lower cache areas to the upper cache
2329 	rw_lock_write_lock(&sAreaCacheLock);
2330 	upperCache->TransferAreas(lowerCache);
2331 	rw_lock_write_unlock(&sAreaCacheLock);
2332 
2333 	lowerCache->AddConsumer(upperCache);
2334 
2335 	// We now need to remap all pages from all of the cache's areas read-only,
2336 	// so that a copy will be created on next write access. If there are wired
2337 	// pages, we keep their protection, move them to the upper cache and create
2338 	// copies for the lower cache.
2339 	if (wiredPagesReservation != NULL) {
2340 		// We need to handle wired pages -- iterate through the cache's pages.
2341 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2342 				vm_page* page = it.Next();) {
2343 			if (page->WiredCount() > 0) {
2344 				// allocate a new page and copy the wired one
2345 				vm_page* copiedPage = vm_page_allocate_page(
2346 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2347 
2348 				vm_memcpy_physical_page(
2349 					copiedPage->physical_page_number * B_PAGE_SIZE,
2350 					page->physical_page_number * B_PAGE_SIZE);
2351 
2352 				// move the wired page to the upper cache (note: removing is OK
2353 				// with the SplayTree iterator) and insert the copy
2354 				upperCache->MovePage(page);
2355 				lowerCache->InsertPage(copiedPage,
2356 					page->cache_offset * B_PAGE_SIZE);
2357 
2358 				DEBUG_PAGE_ACCESS_END(copiedPage);
2359 			} else {
2360 				// Change the protection of this page in all areas.
2361 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2362 						tempArea = tempArea->cache_next) {
2363 					// The area must be readable in the same way it was
2364 					// previously writable.
2365 					uint32 protection = B_KERNEL_READ_AREA;
2366 					if ((tempArea->protection & B_READ_AREA) != 0)
2367 						protection |= B_READ_AREA;
2368 
2369 					VMTranslationMap* map
2370 						= tempArea->address_space->TranslationMap();
2371 					map->Lock();
2372 					map->ProtectPage(tempArea,
2373 						virtual_page_address(tempArea, page), protection);
2374 					map->Unlock();
2375 				}
2376 			}
2377 		}
2378 	} else {
2379 		ASSERT(lowerCache->WiredPagesCount() == 0);
2380 
2381 		// just change the protection of all areas
2382 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2383 				tempArea = tempArea->cache_next) {
2384 			// The area must be readable in the same way it was previously
2385 			// writable.
2386 			uint32 protection = B_KERNEL_READ_AREA;
2387 			if ((tempArea->protection & B_READ_AREA) != 0)
2388 				protection |= B_READ_AREA;
2389 
2390 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2391 			map->Lock();
2392 			map->ProtectArea(tempArea, protection);
2393 			map->Unlock();
2394 		}
2395 	}
2396 
2397 	vm_area_put_locked_cache(upperCache);
2398 
2399 	return B_OK;
2400 }
2401 
2402 
2403 area_id
2404 vm_copy_area(team_id team, const char* name, void** _address,
2405 	uint32 addressSpec, uint32 protection, area_id sourceID)
2406 {
2407 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2408 
2409 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2410 		// set the same protection for the kernel as for userland
2411 		protection |= B_KERNEL_READ_AREA;
2412 		if (writableCopy)
2413 			protection |= B_KERNEL_WRITE_AREA;
2414 	}
2415 
2416 	// Do the locking: target address space, all address spaces associated with
2417 	// the source cache, and the cache itself.
2418 	MultiAddressSpaceLocker locker;
2419 	VMAddressSpace* targetAddressSpace;
2420 	VMCache* cache;
2421 	VMArea* source;
2422 	AreaCacheLocker cacheLocker;
2423 	status_t status;
2424 	bool sharedArea;
2425 
2426 	page_num_t wiredPages = 0;
2427 	vm_page_reservation wiredPagesReservation;
2428 
2429 	bool restart;
2430 	do {
2431 		restart = false;
2432 
2433 		locker.Unset();
2434 		status = locker.AddTeam(team, true, &targetAddressSpace);
2435 		if (status == B_OK) {
2436 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2437 				&cache);
2438 		}
2439 		if (status != B_OK)
2440 			return status;
2441 
2442 		cacheLocker.SetTo(cache, true);	// already locked
2443 
2444 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2445 
2446 		page_num_t oldWiredPages = wiredPages;
2447 		wiredPages = 0;
2448 
2449 		// If the source area isn't shared, count the number of wired pages in
2450 		// the cache and reserve as many pages.
2451 		if (!sharedArea) {
2452 			wiredPages = cache->WiredPagesCount();
2453 
2454 			if (wiredPages > oldWiredPages) {
2455 				cacheLocker.Unlock();
2456 				locker.Unlock();
2457 
2458 				if (oldWiredPages > 0)
2459 					vm_page_unreserve_pages(&wiredPagesReservation);
2460 
2461 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2462 					VM_PRIORITY_USER);
2463 
2464 				restart = true;
2465 			}
2466 		} else if (oldWiredPages > 0)
2467 			vm_page_unreserve_pages(&wiredPagesReservation);
2468 	} while (restart);
2469 
2470 	// unreserve pages later
2471 	struct PagesUnreserver {
2472 		PagesUnreserver(vm_page_reservation* reservation)
2473 			:
2474 			fReservation(reservation)
2475 		{
2476 		}
2477 
2478 		~PagesUnreserver()
2479 		{
2480 			if (fReservation != NULL)
2481 				vm_page_unreserve_pages(fReservation);
2482 		}
2483 
2484 	private:
2485 		vm_page_reservation*	fReservation;
2486 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2487 
2488 	if (addressSpec == B_CLONE_ADDRESS) {
2489 		addressSpec = B_EXACT_ADDRESS;
2490 		*_address = (void*)source->Base();
2491 	}
2492 
2493 	// First, create a cache on top of the source area, respectively use the
2494 	// existing one, if this is a shared area.
2495 
2496 	VMArea* target;
2497 	virtual_address_restrictions addressRestrictions = {};
2498 	addressRestrictions.address = *_address;
2499 	addressRestrictions.address_specification = addressSpec;
2500 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2501 		name, source->Size(), source->wiring, protection,
2502 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2503 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2504 		&addressRestrictions, true, &target, _address);
2505 	if (status < B_OK)
2506 		return status;
2507 
2508 	if (sharedArea) {
2509 		// The new area uses the old area's cache, but map_backing_store()
2510 		// hasn't acquired a ref. So we have to do that now.
2511 		cache->AcquireRefLocked();
2512 	}
2513 
2514 	// If the source area is writable, we need to move it one layer up as well
2515 
2516 	if (!sharedArea) {
2517 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2518 			// TODO: do something more useful if this fails!
2519 			if (vm_copy_on_write_area(cache,
2520 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2521 				panic("vm_copy_on_write_area() failed!\n");
2522 			}
2523 		}
2524 	}
2525 
2526 	// we return the ID of the newly created area
2527 	return target->id;
2528 }
2529 
2530 
2531 status_t
2532 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2533 	bool kernel)
2534 {
2535 	fix_protection(&newProtection);
2536 
2537 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2538 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2539 
2540 	if (!arch_vm_supports_protection(newProtection))
2541 		return B_NOT_SUPPORTED;
2542 
2543 	bool becomesWritable
2544 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2545 
2546 	// lock address spaces and cache
2547 	MultiAddressSpaceLocker locker;
2548 	VMCache* cache;
2549 	VMArea* area;
2550 	status_t status;
2551 	AreaCacheLocker cacheLocker;
2552 	bool isWritable;
2553 
2554 	bool restart;
2555 	do {
2556 		restart = false;
2557 
2558 		locker.Unset();
2559 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2560 		if (status != B_OK)
2561 			return status;
2562 
2563 		cacheLocker.SetTo(cache, true);	// already locked
2564 
2565 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2566 			return B_NOT_ALLOWED;
2567 
2568 		if (area->protection == newProtection)
2569 			return B_OK;
2570 
2571 		if (team != VMAddressSpace::KernelID()
2572 			&& area->address_space->ID() != team) {
2573 			// unless you're the kernel, you are only allowed to set
2574 			// the protection of your own areas
2575 			return B_NOT_ALLOWED;
2576 		}
2577 
2578 		isWritable
2579 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2580 
2581 		// Make sure the area (respectively, if we're going to call
2582 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2583 		// wired ranges.
2584 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2585 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2586 					otherArea = otherArea->cache_next) {
2587 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2588 					restart = true;
2589 					break;
2590 				}
2591 			}
2592 		} else {
2593 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2594 				restart = true;
2595 		}
2596 	} while (restart);
2597 
2598 	bool changePageProtection = true;
2599 	bool changeTopCachePagesOnly = false;
2600 
2601 	if (isWritable && !becomesWritable) {
2602 		// writable -> !writable
2603 
2604 		if (cache->source != NULL && cache->temporary) {
2605 			if (cache->CountWritableAreas(area) == 0) {
2606 				// Since this cache now lives from the pages in its source cache,
2607 				// we can change the cache's commitment to take only those pages
2608 				// into account that really are in this cache.
2609 
2610 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2611 					team == VMAddressSpace::KernelID()
2612 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2613 
2614 				// TODO: we may be able to join with our source cache, if
2615 				// count == 0
2616 			}
2617 		}
2618 
2619 		// If only the writability changes, we can just remap the pages of the
2620 		// top cache, since the pages of lower caches are mapped read-only
2621 		// anyway. That's advantageous only, if the number of pages in the cache
2622 		// is significantly smaller than the number of pages in the area,
2623 		// though.
2624 		if (newProtection
2625 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2626 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2627 			changeTopCachePagesOnly = true;
2628 		}
2629 	} else if (!isWritable && becomesWritable) {
2630 		// !writable -> writable
2631 
2632 		if (!cache->consumers.IsEmpty()) {
2633 			// There are consumers -- we have to insert a new cache. Fortunately
2634 			// vm_copy_on_write_area() does everything that's needed.
2635 			changePageProtection = false;
2636 			status = vm_copy_on_write_area(cache, NULL);
2637 		} else {
2638 			// No consumers, so we don't need to insert a new one.
2639 			if (cache->source != NULL && cache->temporary) {
2640 				// the cache's commitment must contain all possible pages
2641 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2642 					team == VMAddressSpace::KernelID()
2643 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2644 			}
2645 
2646 			if (status == B_OK && cache->source != NULL) {
2647 				// There's a source cache, hence we can't just change all pages'
2648 				// protection or we might allow writing into pages belonging to
2649 				// a lower cache.
2650 				changeTopCachePagesOnly = true;
2651 			}
2652 		}
2653 	} else {
2654 		// we don't have anything special to do in all other cases
2655 	}
2656 
2657 	if (status == B_OK) {
2658 		// remap existing pages in this cache
2659 		if (changePageProtection) {
2660 			VMTranslationMap* map = area->address_space->TranslationMap();
2661 			map->Lock();
2662 
2663 			if (changeTopCachePagesOnly) {
2664 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2665 				page_num_t lastPageOffset
2666 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2667 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2668 						vm_page* page = it.Next();) {
2669 					if (page->cache_offset >= firstPageOffset
2670 						&& page->cache_offset <= lastPageOffset) {
2671 						addr_t address = virtual_page_address(area, page);
2672 						map->ProtectPage(area, address, newProtection);
2673 					}
2674 				}
2675 			} else
2676 				map->ProtectArea(area, newProtection);
2677 
2678 			map->Unlock();
2679 		}
2680 
2681 		area->protection = newProtection;
2682 	}
2683 
2684 	return status;
2685 }
2686 
2687 
2688 status_t
2689 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2690 {
2691 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2692 	if (addressSpace == NULL)
2693 		return B_BAD_TEAM_ID;
2694 
2695 	VMTranslationMap* map = addressSpace->TranslationMap();
2696 
2697 	map->Lock();
2698 	uint32 dummyFlags;
2699 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2700 	map->Unlock();
2701 
2702 	addressSpace->Put();
2703 	return status;
2704 }
2705 
2706 
2707 /*!	The page's cache must be locked.
2708 */
2709 bool
2710 vm_test_map_modification(vm_page* page)
2711 {
2712 	if (page->modified)
2713 		return true;
2714 
2715 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2716 	vm_page_mapping* mapping;
2717 	while ((mapping = iterator.Next()) != NULL) {
2718 		VMArea* area = mapping->area;
2719 		VMTranslationMap* map = area->address_space->TranslationMap();
2720 
2721 		phys_addr_t physicalAddress;
2722 		uint32 flags;
2723 		map->Lock();
2724 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2725 		map->Unlock();
2726 
2727 		if ((flags & PAGE_MODIFIED) != 0)
2728 			return true;
2729 	}
2730 
2731 	return false;
2732 }
2733 
2734 
2735 /*!	The page's cache must be locked.
2736 */
2737 void
2738 vm_clear_map_flags(vm_page* page, uint32 flags)
2739 {
2740 	if ((flags & PAGE_ACCESSED) != 0)
2741 		page->accessed = false;
2742 	if ((flags & PAGE_MODIFIED) != 0)
2743 		page->modified = false;
2744 
2745 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2746 	vm_page_mapping* mapping;
2747 	while ((mapping = iterator.Next()) != NULL) {
2748 		VMArea* area = mapping->area;
2749 		VMTranslationMap* map = area->address_space->TranslationMap();
2750 
2751 		map->Lock();
2752 		map->ClearFlags(virtual_page_address(area, page), flags);
2753 		map->Unlock();
2754 	}
2755 }
2756 
2757 
2758 /*!	Removes all mappings from a page.
2759 	After you've called this function, the page is unmapped from memory and
2760 	the page's \c accessed and \c modified flags have been updated according
2761 	to the state of the mappings.
2762 	The page's cache must be locked.
2763 */
2764 void
2765 vm_remove_all_page_mappings(vm_page* page)
2766 {
2767 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2768 		VMArea* area = mapping->area;
2769 		VMTranslationMap* map = area->address_space->TranslationMap();
2770 		addr_t address = virtual_page_address(area, page);
2771 		map->UnmapPage(area, address, false);
2772 	}
2773 }
2774 
2775 
2776 int32
2777 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2778 {
2779 	int32 count = 0;
2780 
2781 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2782 	vm_page_mapping* mapping;
2783 	while ((mapping = iterator.Next()) != NULL) {
2784 		VMArea* area = mapping->area;
2785 		VMTranslationMap* map = area->address_space->TranslationMap();
2786 
2787 		bool modified;
2788 		if (map->ClearAccessedAndModified(area,
2789 				virtual_page_address(area, page), false, modified)) {
2790 			count++;
2791 		}
2792 
2793 		page->modified |= modified;
2794 	}
2795 
2796 
2797 	if (page->accessed) {
2798 		count++;
2799 		page->accessed = false;
2800 	}
2801 
2802 	return count;
2803 }
2804 
2805 
2806 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2807 	mappings.
2808 	The function iterates through the page mappings and removes them until
2809 	encountering one that has been accessed. From then on it will continue to
2810 	iterate, but only clear the accessed flag of the mapping. The page's
2811 	\c modified bit will be updated accordingly, the \c accessed bit will be
2812 	cleared.
2813 	\return The number of mapping accessed bits encountered, including the
2814 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2815 		of the page have been removed.
2816 */
2817 int32
2818 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2819 {
2820 	ASSERT(page->WiredCount() == 0);
2821 
2822 	if (page->accessed)
2823 		return vm_clear_page_mapping_accessed_flags(page);
2824 
2825 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2826 		VMArea* area = mapping->area;
2827 		VMTranslationMap* map = area->address_space->TranslationMap();
2828 		addr_t address = virtual_page_address(area, page);
2829 		bool modified = false;
2830 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2831 			page->accessed = true;
2832 			page->modified |= modified;
2833 			return vm_clear_page_mapping_accessed_flags(page);
2834 		}
2835 		page->modified |= modified;
2836 	}
2837 
2838 	return 0;
2839 }
2840 
2841 
2842 static int
2843 display_mem(int argc, char** argv)
2844 {
2845 	bool physical = false;
2846 	addr_t copyAddress;
2847 	int32 displayWidth;
2848 	int32 itemSize;
2849 	int32 num = -1;
2850 	addr_t address;
2851 	int i = 1, j;
2852 
2853 	if (argc > 1 && argv[1][0] == '-') {
2854 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2855 			physical = true;
2856 			i++;
2857 		} else
2858 			i = 99;
2859 	}
2860 
2861 	if (argc < i + 1 || argc > i + 2) {
2862 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2863 			"\tdl - 8 bytes\n"
2864 			"\tdw - 4 bytes\n"
2865 			"\tds - 2 bytes\n"
2866 			"\tdb - 1 byte\n"
2867 			"\tstring - a whole string\n"
2868 			"  -p or --physical only allows memory from a single page to be "
2869 			"displayed.\n");
2870 		return 0;
2871 	}
2872 
2873 	address = parse_expression(argv[i]);
2874 
2875 	if (argc > i + 1)
2876 		num = parse_expression(argv[i + 1]);
2877 
2878 	// build the format string
2879 	if (strcmp(argv[0], "db") == 0) {
2880 		itemSize = 1;
2881 		displayWidth = 16;
2882 	} else if (strcmp(argv[0], "ds") == 0) {
2883 		itemSize = 2;
2884 		displayWidth = 8;
2885 	} else if (strcmp(argv[0], "dw") == 0) {
2886 		itemSize = 4;
2887 		displayWidth = 4;
2888 	} else if (strcmp(argv[0], "dl") == 0) {
2889 		itemSize = 8;
2890 		displayWidth = 2;
2891 	} else if (strcmp(argv[0], "string") == 0) {
2892 		itemSize = 1;
2893 		displayWidth = -1;
2894 	} else {
2895 		kprintf("display_mem called in an invalid way!\n");
2896 		return 0;
2897 	}
2898 
2899 	if (num <= 0)
2900 		num = displayWidth;
2901 
2902 	void* physicalPageHandle = NULL;
2903 
2904 	if (physical) {
2905 		int32 offset = address & (B_PAGE_SIZE - 1);
2906 		if (num * itemSize + offset > B_PAGE_SIZE) {
2907 			num = (B_PAGE_SIZE - offset) / itemSize;
2908 			kprintf("NOTE: number of bytes has been cut to page size\n");
2909 		}
2910 
2911 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2912 
2913 		if (vm_get_physical_page_debug(address, &copyAddress,
2914 				&physicalPageHandle) != B_OK) {
2915 			kprintf("getting the hardware page failed.");
2916 			return 0;
2917 		}
2918 
2919 		address += offset;
2920 		copyAddress += offset;
2921 	} else
2922 		copyAddress = address;
2923 
2924 	if (!strcmp(argv[0], "string")) {
2925 		kprintf("%p \"", (char*)copyAddress);
2926 
2927 		// string mode
2928 		for (i = 0; true; i++) {
2929 			char c;
2930 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2931 					!= B_OK
2932 				|| c == '\0') {
2933 				break;
2934 			}
2935 
2936 			if (c == '\n')
2937 				kprintf("\\n");
2938 			else if (c == '\t')
2939 				kprintf("\\t");
2940 			else {
2941 				if (!isprint(c))
2942 					c = '.';
2943 
2944 				kprintf("%c", c);
2945 			}
2946 		}
2947 
2948 		kprintf("\"\n");
2949 	} else {
2950 		// number mode
2951 		for (i = 0; i < num; i++) {
2952 			uint32 value;
2953 
2954 			if ((i % displayWidth) == 0) {
2955 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2956 				if (i != 0)
2957 					kprintf("\n");
2958 
2959 				kprintf("[0x%lx]  ", address + i * itemSize);
2960 
2961 				for (j = 0; j < displayed; j++) {
2962 					char c;
2963 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2964 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2965 						displayed = j;
2966 						break;
2967 					}
2968 					if (!isprint(c))
2969 						c = '.';
2970 
2971 					kprintf("%c", c);
2972 				}
2973 				if (num > displayWidth) {
2974 					// make sure the spacing in the last line is correct
2975 					for (j = displayed; j < displayWidth * itemSize; j++)
2976 						kprintf(" ");
2977 				}
2978 				kprintf("  ");
2979 			}
2980 
2981 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2982 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2983 				kprintf("read fault");
2984 				break;
2985 			}
2986 
2987 			switch (itemSize) {
2988 				case 1:
2989 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2990 					break;
2991 				case 2:
2992 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2993 					break;
2994 				case 4:
2995 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2996 					break;
2997 				case 8:
2998 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
2999 					break;
3000 			}
3001 		}
3002 
3003 		kprintf("\n");
3004 	}
3005 
3006 	if (physical) {
3007 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3008 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3009 	}
3010 	return 0;
3011 }
3012 
3013 
3014 static void
3015 dump_cache_tree_recursively(VMCache* cache, int level,
3016 	VMCache* highlightCache)
3017 {
3018 	// print this cache
3019 	for (int i = 0; i < level; i++)
3020 		kprintf("  ");
3021 	if (cache == highlightCache)
3022 		kprintf("%p <--\n", cache);
3023 	else
3024 		kprintf("%p\n", cache);
3025 
3026 	// recursively print its consumers
3027 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3028 			VMCache* consumer = it.Next();) {
3029 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3030 	}
3031 }
3032 
3033 
3034 static int
3035 dump_cache_tree(int argc, char** argv)
3036 {
3037 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3038 		kprintf("usage: %s <address>\n", argv[0]);
3039 		return 0;
3040 	}
3041 
3042 	addr_t address = parse_expression(argv[1]);
3043 	if (address == 0)
3044 		return 0;
3045 
3046 	VMCache* cache = (VMCache*)address;
3047 	VMCache* root = cache;
3048 
3049 	// find the root cache (the transitive source)
3050 	while (root->source != NULL)
3051 		root = root->source;
3052 
3053 	dump_cache_tree_recursively(root, 0, cache);
3054 
3055 	return 0;
3056 }
3057 
3058 
3059 const char*
3060 vm_cache_type_to_string(int32 type)
3061 {
3062 	switch (type) {
3063 		case CACHE_TYPE_RAM:
3064 			return "RAM";
3065 		case CACHE_TYPE_DEVICE:
3066 			return "device";
3067 		case CACHE_TYPE_VNODE:
3068 			return "vnode";
3069 		case CACHE_TYPE_NULL:
3070 			return "null";
3071 
3072 		default:
3073 			return "unknown";
3074 	}
3075 }
3076 
3077 
3078 #if DEBUG_CACHE_LIST
3079 
3080 static void
3081 update_cache_info_recursively(VMCache* cache, cache_info& info)
3082 {
3083 	info.page_count += cache->page_count;
3084 	if (cache->type == CACHE_TYPE_RAM)
3085 		info.committed += cache->committed_size;
3086 
3087 	// recurse
3088 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3089 			VMCache* consumer = it.Next();) {
3090 		update_cache_info_recursively(consumer, info);
3091 	}
3092 }
3093 
3094 
3095 static int
3096 cache_info_compare_page_count(const void* _a, const void* _b)
3097 {
3098 	const cache_info* a = (const cache_info*)_a;
3099 	const cache_info* b = (const cache_info*)_b;
3100 	if (a->page_count == b->page_count)
3101 		return 0;
3102 	return a->page_count < b->page_count ? 1 : -1;
3103 }
3104 
3105 
3106 static int
3107 cache_info_compare_committed(const void* _a, const void* _b)
3108 {
3109 	const cache_info* a = (const cache_info*)_a;
3110 	const cache_info* b = (const cache_info*)_b;
3111 	if (a->committed == b->committed)
3112 		return 0;
3113 	return a->committed < b->committed ? 1 : -1;
3114 }
3115 
3116 
3117 static void
3118 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3119 {
3120 	for (int i = 0; i < level; i++)
3121 		kprintf("  ");
3122 
3123 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3124 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3125 		cache->virtual_base, cache->virtual_end, cache->page_count);
3126 
3127 	if (level == 0)
3128 		kprintf("/%lu", info.page_count);
3129 
3130 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3131 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3132 
3133 		if (level == 0)
3134 			kprintf("/%lu", info.committed);
3135 	}
3136 
3137 	// areas
3138 	if (cache->areas != NULL) {
3139 		VMArea* area = cache->areas;
3140 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3141 			area->name, area->address_space->ID());
3142 
3143 		while (area->cache_next != NULL) {
3144 			area = area->cache_next;
3145 			kprintf(", %" B_PRId32, area->id);
3146 		}
3147 	}
3148 
3149 	kputs("\n");
3150 
3151 	// recurse
3152 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3153 			VMCache* consumer = it.Next();) {
3154 		dump_caches_recursively(consumer, info, level + 1);
3155 	}
3156 }
3157 
3158 
3159 static int
3160 dump_caches(int argc, char** argv)
3161 {
3162 	if (sCacheInfoTable == NULL) {
3163 		kprintf("No cache info table!\n");
3164 		return 0;
3165 	}
3166 
3167 	bool sortByPageCount = true;
3168 
3169 	for (int32 i = 1; i < argc; i++) {
3170 		if (strcmp(argv[i], "-c") == 0) {
3171 			sortByPageCount = false;
3172 		} else {
3173 			print_debugger_command_usage(argv[0]);
3174 			return 0;
3175 		}
3176 	}
3177 
3178 	uint32 totalCount = 0;
3179 	uint32 rootCount = 0;
3180 	off_t totalCommitted = 0;
3181 	page_num_t totalPages = 0;
3182 
3183 	VMCache* cache = gDebugCacheList;
3184 	while (cache) {
3185 		totalCount++;
3186 		if (cache->source == NULL) {
3187 			cache_info stackInfo;
3188 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3189 				? sCacheInfoTable[rootCount] : stackInfo;
3190 			rootCount++;
3191 			info.cache = cache;
3192 			info.page_count = 0;
3193 			info.committed = 0;
3194 			update_cache_info_recursively(cache, info);
3195 			totalCommitted += info.committed;
3196 			totalPages += info.page_count;
3197 		}
3198 
3199 		cache = cache->debug_next;
3200 	}
3201 
3202 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3203 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3204 			sortByPageCount
3205 				? &cache_info_compare_page_count
3206 				: &cache_info_compare_committed);
3207 	}
3208 
3209 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3210 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3211 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3212 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3213 			"page count" : "committed size");
3214 
3215 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3216 		for (uint32 i = 0; i < rootCount; i++) {
3217 			cache_info& info = sCacheInfoTable[i];
3218 			dump_caches_recursively(info.cache, info, 0);
3219 		}
3220 	} else
3221 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3222 
3223 	return 0;
3224 }
3225 
3226 #endif	// DEBUG_CACHE_LIST
3227 
3228 
3229 static int
3230 dump_cache(int argc, char** argv)
3231 {
3232 	VMCache* cache;
3233 	bool showPages = false;
3234 	int i = 1;
3235 
3236 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3237 		kprintf("usage: %s [-ps] <address>\n"
3238 			"  if -p is specified, all pages are shown, if -s is used\n"
3239 			"  only the cache info is shown respectively.\n", argv[0]);
3240 		return 0;
3241 	}
3242 	while (argv[i][0] == '-') {
3243 		char* arg = argv[i] + 1;
3244 		while (arg[0]) {
3245 			if (arg[0] == 'p')
3246 				showPages = true;
3247 			arg++;
3248 		}
3249 		i++;
3250 	}
3251 	if (argv[i] == NULL) {
3252 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3253 		return 0;
3254 	}
3255 
3256 	addr_t address = parse_expression(argv[i]);
3257 	if (address == 0)
3258 		return 0;
3259 
3260 	cache = (VMCache*)address;
3261 
3262 	cache->Dump(showPages);
3263 
3264 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3265 
3266 	return 0;
3267 }
3268 
3269 
3270 static void
3271 dump_area_struct(VMArea* area, bool mappings)
3272 {
3273 	kprintf("AREA: %p\n", area);
3274 	kprintf("name:\t\t'%s'\n", area->name);
3275 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3276 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3277 	kprintf("base:\t\t0x%lx\n", area->Base());
3278 	kprintf("size:\t\t0x%lx\n", area->Size());
3279 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3280 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3281 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3282 	kprintf("cache:\t\t%p\n", area->cache);
3283 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3284 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3285 	kprintf("cache_next:\t%p\n", area->cache_next);
3286 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3287 
3288 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3289 	if (mappings) {
3290 		kprintf("page mappings:\n");
3291 		while (iterator.HasNext()) {
3292 			vm_page_mapping* mapping = iterator.Next();
3293 			kprintf("  %p", mapping->page);
3294 		}
3295 		kprintf("\n");
3296 	} else {
3297 		uint32 count = 0;
3298 		while (iterator.Next() != NULL) {
3299 			count++;
3300 		}
3301 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3302 	}
3303 }
3304 
3305 
3306 static int
3307 dump_area(int argc, char** argv)
3308 {
3309 	bool mappings = false;
3310 	bool found = false;
3311 	int32 index = 1;
3312 	VMArea* area;
3313 	addr_t num;
3314 
3315 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3316 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3317 			"All areas matching either id/address/name are listed. You can\n"
3318 			"force to check only a specific item by prefixing the specifier\n"
3319 			"with the id/contains/address/name keywords.\n"
3320 			"-m shows the area's mappings as well.\n");
3321 		return 0;
3322 	}
3323 
3324 	if (!strcmp(argv[1], "-m")) {
3325 		mappings = true;
3326 		index++;
3327 	}
3328 
3329 	int32 mode = 0xf;
3330 	if (!strcmp(argv[index], "id"))
3331 		mode = 1;
3332 	else if (!strcmp(argv[index], "contains"))
3333 		mode = 2;
3334 	else if (!strcmp(argv[index], "name"))
3335 		mode = 4;
3336 	else if (!strcmp(argv[index], "address"))
3337 		mode = 0;
3338 	if (mode != 0xf)
3339 		index++;
3340 
3341 	if (index >= argc) {
3342 		kprintf("No area specifier given.\n");
3343 		return 0;
3344 	}
3345 
3346 	num = parse_expression(argv[index]);
3347 
3348 	if (mode == 0) {
3349 		dump_area_struct((struct VMArea*)num, mappings);
3350 	} else {
3351 		// walk through the area list, looking for the arguments as a name
3352 
3353 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3354 		while ((area = it.Next()) != NULL) {
3355 			if (((mode & 4) != 0 && area->name != NULL
3356 					&& !strcmp(argv[index], area->name))
3357 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3358 					|| (((mode & 2) != 0 && area->Base() <= num
3359 						&& area->Base() + area->Size() > num))))) {
3360 				dump_area_struct(area, mappings);
3361 				found = true;
3362 			}
3363 		}
3364 
3365 		if (!found)
3366 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3367 	}
3368 
3369 	return 0;
3370 }
3371 
3372 
3373 static int
3374 dump_area_list(int argc, char** argv)
3375 {
3376 	VMArea* area;
3377 	const char* name = NULL;
3378 	int32 id = 0;
3379 
3380 	if (argc > 1) {
3381 		id = parse_expression(argv[1]);
3382 		if (id == 0)
3383 			name = argv[1];
3384 	}
3385 
3386 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3387 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3388 		B_PRINTF_POINTER_WIDTH, "size");
3389 
3390 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3391 	while ((area = it.Next()) != NULL) {
3392 		if ((id != 0 && area->address_space->ID() != id)
3393 			|| (name != NULL && strstr(area->name, name) == NULL))
3394 			continue;
3395 
3396 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3397 			area->id, (void*)area->Base(), (void*)area->Size(),
3398 			area->protection, area->wiring, area->name);
3399 	}
3400 	return 0;
3401 }
3402 
3403 
3404 static int
3405 dump_available_memory(int argc, char** argv)
3406 {
3407 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3408 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3409 	return 0;
3410 }
3411 
3412 
3413 static int
3414 dump_mapping_info(int argc, char** argv)
3415 {
3416 	bool reverseLookup = false;
3417 	bool pageLookup = false;
3418 
3419 	int argi = 1;
3420 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3421 		const char* arg = argv[argi];
3422 		if (strcmp(arg, "-r") == 0) {
3423 			reverseLookup = true;
3424 		} else if (strcmp(arg, "-p") == 0) {
3425 			reverseLookup = true;
3426 			pageLookup = true;
3427 		} else {
3428 			print_debugger_command_usage(argv[0]);
3429 			return 0;
3430 		}
3431 	}
3432 
3433 	// We need at least one argument, the address. Optionally a thread ID can be
3434 	// specified.
3435 	if (argi >= argc || argi + 2 < argc) {
3436 		print_debugger_command_usage(argv[0]);
3437 		return 0;
3438 	}
3439 
3440 	uint64 addressValue;
3441 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3442 		return 0;
3443 
3444 	Team* team = NULL;
3445 	if (argi < argc) {
3446 		uint64 threadID;
3447 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3448 			return 0;
3449 
3450 		Thread* thread = Thread::GetDebug(threadID);
3451 		if (thread == NULL) {
3452 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3453 			return 0;
3454 		}
3455 
3456 		team = thread->team;
3457 	}
3458 
3459 	if (reverseLookup) {
3460 		phys_addr_t physicalAddress;
3461 		if (pageLookup) {
3462 			vm_page* page = (vm_page*)(addr_t)addressValue;
3463 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3464 		} else {
3465 			physicalAddress = (phys_addr_t)addressValue;
3466 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3467 		}
3468 
3469 		kprintf("    Team     Virtual Address      Area\n");
3470 		kprintf("--------------------------------------\n");
3471 
3472 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3473 			Callback()
3474 				:
3475 				fAddressSpace(NULL)
3476 			{
3477 			}
3478 
3479 			void SetAddressSpace(VMAddressSpace* addressSpace)
3480 			{
3481 				fAddressSpace = addressSpace;
3482 			}
3483 
3484 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3485 			{
3486 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3487 					virtualAddress);
3488 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3489 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3490 				else
3491 					kprintf("\n");
3492 				return false;
3493 			}
3494 
3495 		private:
3496 			VMAddressSpace*	fAddressSpace;
3497 		} callback;
3498 
3499 		if (team != NULL) {
3500 			// team specified -- get its address space
3501 			VMAddressSpace* addressSpace = team->address_space;
3502 			if (addressSpace == NULL) {
3503 				kprintf("Failed to get address space!\n");
3504 				return 0;
3505 			}
3506 
3507 			callback.SetAddressSpace(addressSpace);
3508 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3509 				physicalAddress, callback);
3510 		} else {
3511 			// no team specified -- iterate through all address spaces
3512 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3513 				addressSpace != NULL;
3514 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3515 				callback.SetAddressSpace(addressSpace);
3516 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3517 					physicalAddress, callback);
3518 			}
3519 		}
3520 	} else {
3521 		// get the address space
3522 		addr_t virtualAddress = (addr_t)addressValue;
3523 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3524 		VMAddressSpace* addressSpace;
3525 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3526 			addressSpace = VMAddressSpace::Kernel();
3527 		} else if (team != NULL) {
3528 			addressSpace = team->address_space;
3529 		} else {
3530 			Thread* thread = debug_get_debugged_thread();
3531 			if (thread == NULL || thread->team == NULL) {
3532 				kprintf("Failed to get team!\n");
3533 				return 0;
3534 			}
3535 
3536 			addressSpace = thread->team->address_space;
3537 		}
3538 
3539 		if (addressSpace == NULL) {
3540 			kprintf("Failed to get address space!\n");
3541 			return 0;
3542 		}
3543 
3544 		// let the translation map implementation do the job
3545 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3546 	}
3547 
3548 	return 0;
3549 }
3550 
3551 
3552 /*!	Deletes all areas and reserved regions in the given address space.
3553 
3554 	The caller must ensure that none of the areas has any wired ranges.
3555 
3556 	\param addressSpace The address space.
3557 	\param deletingAddressSpace \c true, if the address space is in the process
3558 		of being deleted.
3559 */
3560 void
3561 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3562 {
3563 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3564 		addressSpace->ID()));
3565 
3566 	addressSpace->WriteLock();
3567 
3568 	// remove all reserved areas in this address space
3569 	addressSpace->UnreserveAllAddressRanges(0);
3570 
3571 	// delete all the areas in this address space
3572 	while (VMArea* area = addressSpace->FirstArea()) {
3573 		ASSERT(!area->IsWired());
3574 		delete_area(addressSpace, area, deletingAddressSpace);
3575 	}
3576 
3577 	addressSpace->WriteUnlock();
3578 }
3579 
3580 
3581 static area_id
3582 vm_area_for(addr_t address, bool kernel)
3583 {
3584 	team_id team;
3585 	if (IS_USER_ADDRESS(address)) {
3586 		// we try the user team address space, if any
3587 		team = VMAddressSpace::CurrentID();
3588 		if (team < 0)
3589 			return team;
3590 	} else
3591 		team = VMAddressSpace::KernelID();
3592 
3593 	AddressSpaceReadLocker locker(team);
3594 	if (!locker.IsLocked())
3595 		return B_BAD_TEAM_ID;
3596 
3597 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3598 	if (area != NULL) {
3599 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3600 			return B_ERROR;
3601 
3602 		return area->id;
3603 	}
3604 
3605 	return B_ERROR;
3606 }
3607 
3608 
3609 /*!	Frees physical pages that were used during the boot process.
3610 	\a end is inclusive.
3611 */
3612 static void
3613 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3614 {
3615 	// free all physical pages in the specified range
3616 
3617 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3618 		phys_addr_t physicalAddress;
3619 		uint32 flags;
3620 
3621 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3622 			&& (flags & PAGE_PRESENT) != 0) {
3623 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3624 			if (page != NULL && page->State() != PAGE_STATE_FREE
3625 					 && page->State() != PAGE_STATE_CLEAR
3626 					 && page->State() != PAGE_STATE_UNUSED) {
3627 				DEBUG_PAGE_ACCESS_START(page);
3628 				vm_page_set_state(page, PAGE_STATE_FREE);
3629 			}
3630 		}
3631 	}
3632 
3633 	// unmap the memory
3634 	map->Unmap(start, end);
3635 }
3636 
3637 
3638 void
3639 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3640 {
3641 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3642 	addr_t end = start + (size - 1);
3643 	addr_t lastEnd = start;
3644 
3645 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3646 		(void*)start, (void*)end));
3647 
3648 	// The areas are sorted in virtual address space order, so
3649 	// we just have to find the holes between them that fall
3650 	// into the area we should dispose
3651 
3652 	map->Lock();
3653 
3654 	for (VMAddressSpace::AreaIterator it
3655 				= VMAddressSpace::Kernel()->GetAreaIterator();
3656 			VMArea* area = it.Next();) {
3657 		addr_t areaStart = area->Base();
3658 		addr_t areaEnd = areaStart + (area->Size() - 1);
3659 
3660 		if (areaEnd < start)
3661 			continue;
3662 
3663 		if (areaStart > end) {
3664 			// we are done, the area is already beyond of what we have to free
3665 			break;
3666 		}
3667 
3668 		if (areaStart > lastEnd) {
3669 			// this is something we can free
3670 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3671 				(void*)areaStart));
3672 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3673 		}
3674 
3675 		if (areaEnd >= end) {
3676 			lastEnd = areaEnd;
3677 				// no +1 to prevent potential overflow
3678 			break;
3679 		}
3680 
3681 		lastEnd = areaEnd + 1;
3682 	}
3683 
3684 	if (lastEnd < end) {
3685 		// we can also get rid of some space at the end of the area
3686 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3687 			(void*)end));
3688 		unmap_and_free_physical_pages(map, lastEnd, end);
3689 	}
3690 
3691 	map->Unlock();
3692 }
3693 
3694 
3695 static void
3696 create_preloaded_image_areas(struct preloaded_image* _image)
3697 {
3698 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3699 	char name[B_OS_NAME_LENGTH];
3700 	void* address;
3701 	int32 length;
3702 
3703 	// use file name to create a good area name
3704 	char* fileName = strrchr(image->name, '/');
3705 	if (fileName == NULL)
3706 		fileName = image->name;
3707 	else
3708 		fileName++;
3709 
3710 	length = strlen(fileName);
3711 	// make sure there is enough space for the suffix
3712 	if (length > 25)
3713 		length = 25;
3714 
3715 	memcpy(name, fileName, length);
3716 	strcpy(name + length, "_text");
3717 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3718 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3719 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3720 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3721 		// this will later be remapped read-only/executable by the
3722 		// ELF initialization code
3723 
3724 	strcpy(name + length, "_data");
3725 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3726 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3727 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3728 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3729 }
3730 
3731 
3732 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3733 	Any boot loader resources contained in that arguments must not be accessed
3734 	anymore past this point.
3735 */
3736 void
3737 vm_free_kernel_args(kernel_args* args)
3738 {
3739 	uint32 i;
3740 
3741 	TRACE(("vm_free_kernel_args()\n"));
3742 
3743 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3744 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3745 		if (area >= B_OK)
3746 			delete_area(area);
3747 	}
3748 }
3749 
3750 
3751 static void
3752 allocate_kernel_args(kernel_args* args)
3753 {
3754 	TRACE(("allocate_kernel_args()\n"));
3755 
3756 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3757 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3758 
3759 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3760 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3761 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3762 	}
3763 }
3764 
3765 
3766 static void
3767 unreserve_boot_loader_ranges(kernel_args* args)
3768 {
3769 	TRACE(("unreserve_boot_loader_ranges()\n"));
3770 
3771 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3772 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3773 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3774 			args->virtual_allocated_range[i].size);
3775 	}
3776 }
3777 
3778 
3779 static void
3780 reserve_boot_loader_ranges(kernel_args* args)
3781 {
3782 	TRACE(("reserve_boot_loader_ranges()\n"));
3783 
3784 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3785 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3786 
3787 		// If the address is no kernel address, we just skip it. The
3788 		// architecture specific code has to deal with it.
3789 		if (!IS_KERNEL_ADDRESS(address)) {
3790 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3791 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3792 			continue;
3793 		}
3794 
3795 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3796 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3797 		if (status < B_OK)
3798 			panic("could not reserve boot loader ranges\n");
3799 	}
3800 }
3801 
3802 
3803 static addr_t
3804 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3805 {
3806 	size = PAGE_ALIGN(size);
3807 
3808 	// find a slot in the virtual allocation addr range
3809 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3810 		// check to see if the space between this one and the last is big enough
3811 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3812 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3813 			+ args->virtual_allocated_range[i - 1].size;
3814 
3815 		addr_t base = alignment > 0
3816 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3817 
3818 		if (base >= KERNEL_BASE && base < rangeStart
3819 				&& rangeStart - base >= size) {
3820 			args->virtual_allocated_range[i - 1].size
3821 				+= base + size - previousRangeEnd;
3822 			return base;
3823 		}
3824 	}
3825 
3826 	// we hadn't found one between allocation ranges. this is ok.
3827 	// see if there's a gap after the last one
3828 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3829 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3830 		+ args->virtual_allocated_range[lastEntryIndex].size;
3831 	addr_t base = alignment > 0
3832 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3833 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3834 		args->virtual_allocated_range[lastEntryIndex].size
3835 			+= base + size - lastRangeEnd;
3836 		return base;
3837 	}
3838 
3839 	// see if there's a gap before the first one
3840 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3841 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3842 		base = rangeStart - size;
3843 		if (alignment > 0)
3844 			base = ROUNDDOWN(base, alignment);
3845 
3846 		if (base >= KERNEL_BASE) {
3847 			args->virtual_allocated_range[0].start = base;
3848 			args->virtual_allocated_range[0].size += rangeStart - base;
3849 			return base;
3850 		}
3851 	}
3852 
3853 	return 0;
3854 }
3855 
3856 
3857 static bool
3858 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3859 {
3860 	// TODO: horrible brute-force method of determining if the page can be
3861 	// allocated
3862 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3863 		if (address >= args->physical_memory_range[i].start
3864 			&& address < args->physical_memory_range[i].start
3865 				+ args->physical_memory_range[i].size)
3866 			return true;
3867 	}
3868 	return false;
3869 }
3870 
3871 
3872 page_num_t
3873 vm_allocate_early_physical_page(kernel_args* args)
3874 {
3875 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3876 		phys_addr_t nextPage;
3877 
3878 		nextPage = args->physical_allocated_range[i].start
3879 			+ args->physical_allocated_range[i].size;
3880 		// see if the page after the next allocated paddr run can be allocated
3881 		if (i + 1 < args->num_physical_allocated_ranges
3882 			&& args->physical_allocated_range[i + 1].size != 0) {
3883 			// see if the next page will collide with the next allocated range
3884 			if (nextPage >= args->physical_allocated_range[i+1].start)
3885 				continue;
3886 		}
3887 		// see if the next physical page fits in the memory block
3888 		if (is_page_in_physical_memory_range(args, nextPage)) {
3889 			// we got one!
3890 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3891 			return nextPage / B_PAGE_SIZE;
3892 		}
3893 	}
3894 
3895 	return 0;
3896 		// could not allocate a block
3897 }
3898 
3899 
3900 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3901 	allocate some pages before the VM is completely up.
3902 */
3903 addr_t
3904 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3905 	uint32 attributes, addr_t alignment)
3906 {
3907 	if (physicalSize > virtualSize)
3908 		physicalSize = virtualSize;
3909 
3910 	// find the vaddr to allocate at
3911 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3912 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3913 	if (virtualBase == 0) {
3914 		panic("vm_allocate_early: could not allocate virtual address\n");
3915 		return 0;
3916 	}
3917 
3918 	// map the pages
3919 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3920 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3921 		if (physicalAddress == 0)
3922 			panic("error allocating early page!\n");
3923 
3924 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3925 
3926 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3927 			physicalAddress * B_PAGE_SIZE, attributes,
3928 			&vm_allocate_early_physical_page);
3929 	}
3930 
3931 	return virtualBase;
3932 }
3933 
3934 
3935 /*!	The main entrance point to initialize the VM. */
3936 status_t
3937 vm_init(kernel_args* args)
3938 {
3939 	struct preloaded_image* image;
3940 	void* address;
3941 	status_t err = 0;
3942 	uint32 i;
3943 
3944 	TRACE(("vm_init: entry\n"));
3945 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3946 	err = arch_vm_init(args);
3947 
3948 	// initialize some globals
3949 	vm_page_init_num_pages(args);
3950 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3951 
3952 	slab_init(args);
3953 
3954 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3955 	size_t heapSize = INITIAL_HEAP_SIZE;
3956 	// try to accomodate low memory systems
3957 	while (heapSize > sAvailableMemory / 8)
3958 		heapSize /= 2;
3959 	if (heapSize < 1024 * 1024)
3960 		panic("vm_init: go buy some RAM please.");
3961 
3962 	// map in the new heap and initialize it
3963 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3964 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3965 	TRACE(("heap at 0x%lx\n", heapBase));
3966 	heap_init(heapBase, heapSize);
3967 #endif
3968 
3969 	// initialize the free page list and physical page mapper
3970 	vm_page_init(args);
3971 
3972 	// initialize the cache allocators
3973 	vm_cache_init(args);
3974 
3975 	{
3976 		status_t error = VMAreaHash::Init();
3977 		if (error != B_OK)
3978 			panic("vm_init: error initializing area hash table\n");
3979 	}
3980 
3981 	VMAddressSpace::Init();
3982 	reserve_boot_loader_ranges(args);
3983 
3984 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3985 	heap_init_post_area();
3986 #endif
3987 
3988 	// Do any further initialization that the architecture dependant layers may
3989 	// need now
3990 	arch_vm_translation_map_init_post_area(args);
3991 	arch_vm_init_post_area(args);
3992 	vm_page_init_post_area(args);
3993 	slab_init_post_area();
3994 
3995 	// allocate areas to represent stuff that already exists
3996 
3997 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3998 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3999 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4000 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4001 #endif
4002 
4003 	allocate_kernel_args(args);
4004 
4005 	create_preloaded_image_areas(args->kernel_image);
4006 
4007 	// allocate areas for preloaded images
4008 	for (image = args->preloaded_images; image != NULL; image = image->next)
4009 		create_preloaded_image_areas(image);
4010 
4011 	// allocate kernel stacks
4012 	for (i = 0; i < args->num_cpus; i++) {
4013 		char name[64];
4014 
4015 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4016 		address = (void*)args->cpu_kstack[i].start;
4017 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4018 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4019 	}
4020 
4021 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4022 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4023 
4024 #if PARANOID_KERNEL_MALLOC
4025 	vm_block_address_range("uninitialized heap memory",
4026 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4027 #endif
4028 #if PARANOID_KERNEL_FREE
4029 	vm_block_address_range("freed heap memory",
4030 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4031 #endif
4032 
4033 	// create the object cache for the page mappings
4034 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4035 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4036 		NULL, NULL);
4037 	if (gPageMappingsObjectCache == NULL)
4038 		panic("failed to create page mappings object cache");
4039 
4040 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4041 
4042 #if DEBUG_CACHE_LIST
4043 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4044 		virtual_address_restrictions virtualRestrictions = {};
4045 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4046 		physical_address_restrictions physicalRestrictions = {};
4047 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4048 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4049 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4050 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4051 			&physicalRestrictions, (void**)&sCacheInfoTable);
4052 	}
4053 #endif	// DEBUG_CACHE_LIST
4054 
4055 	// add some debugger commands
4056 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4057 	add_debugger_command("area", &dump_area,
4058 		"Dump info about a particular area");
4059 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4060 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4061 #if DEBUG_CACHE_LIST
4062 	if (sCacheInfoTable != NULL) {
4063 		add_debugger_command_etc("caches", &dump_caches,
4064 			"List all VMCache trees",
4065 			"[ \"-c\" ]\n"
4066 			"All cache trees are listed sorted in decreasing order by number "
4067 				"of\n"
4068 			"used pages or, if \"-c\" is specified, by size of committed "
4069 				"memory.\n",
4070 			0);
4071 	}
4072 #endif
4073 	add_debugger_command("avail", &dump_available_memory,
4074 		"Dump available memory");
4075 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4076 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4077 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4078 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4079 	add_debugger_command("string", &display_mem, "dump strings");
4080 
4081 	add_debugger_command_etc("mapping", &dump_mapping_info,
4082 		"Print address mapping information",
4083 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4084 		"Prints low-level page mapping information for a given address. If\n"
4085 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4086 		"address that is looked up in the translation map of the current\n"
4087 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4088 		"\"-r\" is specified, <address> is a physical address that is\n"
4089 		"searched in the translation map of all teams, respectively the team\n"
4090 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4091 		"<address> is the address of a vm_page structure. The behavior is\n"
4092 		"equivalent to specifying \"-r\" with the physical address of that\n"
4093 		"page.\n",
4094 		0);
4095 
4096 	TRACE(("vm_init: exit\n"));
4097 
4098 	vm_cache_init_post_heap();
4099 
4100 	return err;
4101 }
4102 
4103 
4104 status_t
4105 vm_init_post_sem(kernel_args* args)
4106 {
4107 	// This frees all unused boot loader resources and makes its space available
4108 	// again
4109 	arch_vm_init_end(args);
4110 	unreserve_boot_loader_ranges(args);
4111 
4112 	// fill in all of the semaphores that were not allocated before
4113 	// since we're still single threaded and only the kernel address space
4114 	// exists, it isn't that hard to find all of the ones we need to create
4115 
4116 	arch_vm_translation_map_init_post_sem(args);
4117 
4118 	slab_init_post_sem();
4119 
4120 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4121 	heap_init_post_sem();
4122 #endif
4123 
4124 	return B_OK;
4125 }
4126 
4127 
4128 status_t
4129 vm_init_post_thread(kernel_args* args)
4130 {
4131 	vm_page_init_post_thread(args);
4132 	slab_init_post_thread();
4133 	return heap_init_post_thread();
4134 }
4135 
4136 
4137 status_t
4138 vm_init_post_modules(kernel_args* args)
4139 {
4140 	return arch_vm_init_post_modules(args);
4141 }
4142 
4143 
4144 void
4145 permit_page_faults(void)
4146 {
4147 	Thread* thread = thread_get_current_thread();
4148 	if (thread != NULL)
4149 		atomic_add(&thread->page_faults_allowed, 1);
4150 }
4151 
4152 
4153 void
4154 forbid_page_faults(void)
4155 {
4156 	Thread* thread = thread_get_current_thread();
4157 	if (thread != NULL)
4158 		atomic_add(&thread->page_faults_allowed, -1);
4159 }
4160 
4161 
4162 status_t
4163 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4164 	bool isUser, addr_t* newIP)
4165 {
4166 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4167 		faultAddress));
4168 
4169 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4170 
4171 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4172 	VMAddressSpace* addressSpace = NULL;
4173 
4174 	status_t status = B_OK;
4175 	*newIP = 0;
4176 	atomic_add((int32*)&sPageFaults, 1);
4177 
4178 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4179 		addressSpace = VMAddressSpace::GetKernel();
4180 	} else if (IS_USER_ADDRESS(pageAddress)) {
4181 		addressSpace = VMAddressSpace::GetCurrent();
4182 		if (addressSpace == NULL) {
4183 			if (!isUser) {
4184 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4185 					"memory!\n");
4186 				status = B_BAD_ADDRESS;
4187 				TPF(PageFaultError(-1,
4188 					VMPageFaultTracing
4189 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4190 			} else {
4191 				// XXX weird state.
4192 				panic("vm_page_fault: non kernel thread accessing user memory "
4193 					"that doesn't exist!\n");
4194 				status = B_BAD_ADDRESS;
4195 			}
4196 		}
4197 	} else {
4198 		// the hit was probably in the 64k DMZ between kernel and user space
4199 		// this keeps a user space thread from passing a buffer that crosses
4200 		// into kernel space
4201 		status = B_BAD_ADDRESS;
4202 		TPF(PageFaultError(-1,
4203 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4204 	}
4205 
4206 	if (status == B_OK) {
4207 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4208 			isUser, NULL);
4209 	}
4210 
4211 	if (status < B_OK) {
4212 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4213 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4214 			strerror(status), address, faultAddress, isWrite, isUser,
4215 			thread_get_current_thread_id());
4216 		if (!isUser) {
4217 			Thread* thread = thread_get_current_thread();
4218 			if (thread != NULL && thread->fault_handler != 0) {
4219 				// this will cause the arch dependant page fault handler to
4220 				// modify the IP on the interrupt frame or whatever to return
4221 				// to this address
4222 				*newIP = thread->fault_handler;
4223 			} else {
4224 				// unhandled page fault in the kernel
4225 				panic("vm_page_fault: unhandled page fault in kernel space at "
4226 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4227 			}
4228 		} else {
4229 #if 1
4230 			// TODO: remove me once we have proper userland debugging support
4231 			// (and tools)
4232 			VMArea* area = NULL;
4233 			if (addressSpace != NULL) {
4234 				addressSpace->ReadLock();
4235 				area = addressSpace->LookupArea(faultAddress);
4236 			}
4237 
4238 			Thread* thread = thread_get_current_thread();
4239 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4240 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4241 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4242 				thread->team->Name(), thread->team->id,
4243 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4244 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4245 					area->Base() : 0x0));
4246 
4247 			// We can print a stack trace of the userland thread here.
4248 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4249 // fault and someone is already waiting for a write lock on the same address
4250 // space. This thread will then try to acquire the lock again and will
4251 // be queued after the writer.
4252 #	if 0
4253 			if (area) {
4254 				struct stack_frame {
4255 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4256 						struct stack_frame*	previous;
4257 						void*				return_address;
4258 					#else
4259 						// ...
4260 					#warning writeme
4261 					#endif
4262 				} frame;
4263 #		ifdef __INTEL__
4264 				struct iframe* iframe = x86_get_user_iframe();
4265 				if (iframe == NULL)
4266 					panic("iframe is NULL!");
4267 
4268 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4269 					sizeof(struct stack_frame));
4270 #		elif defined(__POWERPC__)
4271 				struct iframe* iframe = ppc_get_user_iframe();
4272 				if (iframe == NULL)
4273 					panic("iframe is NULL!");
4274 
4275 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4276 					sizeof(struct stack_frame));
4277 #		else
4278 #			warning "vm_page_fault() stack trace won't work"
4279 				status = B_ERROR;
4280 #		endif
4281 
4282 				dprintf("stack trace:\n");
4283 				int32 maxFrames = 50;
4284 				while (status == B_OK && --maxFrames >= 0
4285 						&& frame.return_address != NULL) {
4286 					dprintf("  %p", frame.return_address);
4287 					area = addressSpace->LookupArea(
4288 						(addr_t)frame.return_address);
4289 					if (area) {
4290 						dprintf(" (%s + %#lx)", area->name,
4291 							(addr_t)frame.return_address - area->Base());
4292 					}
4293 					dprintf("\n");
4294 
4295 					status = user_memcpy(&frame, frame.previous,
4296 						sizeof(struct stack_frame));
4297 				}
4298 			}
4299 #	endif	// 0 (stack trace)
4300 
4301 			if (addressSpace != NULL)
4302 				addressSpace->ReadUnlock();
4303 #endif
4304 
4305 			// If the thread has a signal handler for SIGSEGV, we simply
4306 			// send it the signal. Otherwise we notify the user debugger
4307 			// first.
4308 			struct sigaction action;
4309 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4310 					&& action.sa_handler != SIG_DFL
4311 					&& action.sa_handler != SIG_IGN)
4312 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4313 					SIGSEGV)) {
4314 				Signal signal(SIGSEGV,
4315 					status == B_PERMISSION_DENIED
4316 						? SEGV_ACCERR : SEGV_MAPERR,
4317 					EFAULT, thread->team->id);
4318 				signal.SetAddress((void*)address);
4319 				send_signal_to_thread(thread, signal, 0);
4320 			}
4321 		}
4322 	}
4323 
4324 	if (addressSpace != NULL)
4325 		addressSpace->Put();
4326 
4327 	return B_HANDLED_INTERRUPT;
4328 }
4329 
4330 
4331 struct PageFaultContext {
4332 	AddressSpaceReadLocker	addressSpaceLocker;
4333 	VMCacheChainLocker		cacheChainLocker;
4334 
4335 	VMTranslationMap*		map;
4336 	VMCache*				topCache;
4337 	off_t					cacheOffset;
4338 	vm_page_reservation		reservation;
4339 	bool					isWrite;
4340 
4341 	// return values
4342 	vm_page*				page;
4343 	bool					restart;
4344 
4345 
4346 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4347 		:
4348 		addressSpaceLocker(addressSpace, true),
4349 		map(addressSpace->TranslationMap()),
4350 		isWrite(isWrite)
4351 	{
4352 	}
4353 
4354 	~PageFaultContext()
4355 	{
4356 		UnlockAll();
4357 		vm_page_unreserve_pages(&reservation);
4358 	}
4359 
4360 	void Prepare(VMCache* topCache, off_t cacheOffset)
4361 	{
4362 		this->topCache = topCache;
4363 		this->cacheOffset = cacheOffset;
4364 		page = NULL;
4365 		restart = false;
4366 
4367 		cacheChainLocker.SetTo(topCache);
4368 	}
4369 
4370 	void UnlockAll(VMCache* exceptCache = NULL)
4371 	{
4372 		topCache = NULL;
4373 		addressSpaceLocker.Unlock();
4374 		cacheChainLocker.Unlock(exceptCache);
4375 	}
4376 };
4377 
4378 
4379 /*!	Gets the page that should be mapped into the area.
4380 	Returns an error code other than \c B_OK, if the page couldn't be found or
4381 	paged in. The locking state of the address space and the caches is undefined
4382 	in that case.
4383 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4384 	had to unlock the address space and all caches and is supposed to be called
4385 	again.
4386 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4387 	found. It is returned in \c context.page. The address space will still be
4388 	locked as well as all caches starting from the top cache to at least the
4389 	cache the page lives in.
4390 */
4391 static status_t
4392 fault_get_page(PageFaultContext& context)
4393 {
4394 	VMCache* cache = context.topCache;
4395 	VMCache* lastCache = NULL;
4396 	vm_page* page = NULL;
4397 
4398 	while (cache != NULL) {
4399 		// We already hold the lock of the cache at this point.
4400 
4401 		lastCache = cache;
4402 
4403 		page = cache->LookupPage(context.cacheOffset);
4404 		if (page != NULL && page->busy) {
4405 			// page must be busy -- wait for it to become unbusy
4406 			context.UnlockAll(cache);
4407 			cache->ReleaseRefLocked();
4408 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4409 
4410 			// restart the whole process
4411 			context.restart = true;
4412 			return B_OK;
4413 		}
4414 
4415 		if (page != NULL)
4416 			break;
4417 
4418 		// The current cache does not contain the page we're looking for.
4419 
4420 		// see if the backing store has it
4421 		if (cache->HasPage(context.cacheOffset)) {
4422 			// insert a fresh page and mark it busy -- we're going to read it in
4423 			page = vm_page_allocate_page(&context.reservation,
4424 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4425 			cache->InsertPage(page, context.cacheOffset);
4426 
4427 			// We need to unlock all caches and the address space while reading
4428 			// the page in. Keep a reference to the cache around.
4429 			cache->AcquireRefLocked();
4430 			context.UnlockAll();
4431 
4432 			// read the page in
4433 			generic_io_vec vec;
4434 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4435 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4436 
4437 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4438 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4439 
4440 			cache->Lock();
4441 
4442 			if (status < B_OK) {
4443 				// on error remove and free the page
4444 				dprintf("reading page from cache %p returned: %s!\n",
4445 					cache, strerror(status));
4446 
4447 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4448 				cache->RemovePage(page);
4449 				vm_page_set_state(page, PAGE_STATE_FREE);
4450 
4451 				cache->ReleaseRefAndUnlock();
4452 				return status;
4453 			}
4454 
4455 			// mark the page unbusy again
4456 			cache->MarkPageUnbusy(page);
4457 
4458 			DEBUG_PAGE_ACCESS_END(page);
4459 
4460 			// Since we needed to unlock everything temporarily, the area
4461 			// situation might have changed. So we need to restart the whole
4462 			// process.
4463 			cache->ReleaseRefAndUnlock();
4464 			context.restart = true;
4465 			return B_OK;
4466 		}
4467 
4468 		cache = context.cacheChainLocker.LockSourceCache();
4469 	}
4470 
4471 	if (page == NULL) {
4472 		// There was no adequate page, determine the cache for a clean one.
4473 		// Read-only pages come in the deepest cache, only the top most cache
4474 		// may have direct write access.
4475 		cache = context.isWrite ? context.topCache : lastCache;
4476 
4477 		// allocate a clean page
4478 		page = vm_page_allocate_page(&context.reservation,
4479 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4480 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4481 			page->physical_page_number));
4482 
4483 		// insert the new page into our cache
4484 		cache->InsertPage(page, context.cacheOffset);
4485 	} else if (page->Cache() != context.topCache && context.isWrite) {
4486 		// We have a page that has the data we want, but in the wrong cache
4487 		// object so we need to copy it and stick it into the top cache.
4488 		vm_page* sourcePage = page;
4489 
4490 		// TODO: If memory is low, it might be a good idea to steal the page
4491 		// from our source cache -- if possible, that is.
4492 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4493 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4494 
4495 		// To not needlessly kill concurrency we unlock all caches but the top
4496 		// one while copying the page. Lacking another mechanism to ensure that
4497 		// the source page doesn't disappear, we mark it busy.
4498 		sourcePage->busy = true;
4499 		context.cacheChainLocker.UnlockKeepRefs(true);
4500 
4501 		// copy the page
4502 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4503 			sourcePage->physical_page_number * B_PAGE_SIZE);
4504 
4505 		context.cacheChainLocker.RelockCaches(true);
4506 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4507 
4508 		// insert the new page into our cache
4509 		context.topCache->InsertPage(page, context.cacheOffset);
4510 	} else
4511 		DEBUG_PAGE_ACCESS_START(page);
4512 
4513 	context.page = page;
4514 	return B_OK;
4515 }
4516 
4517 
4518 /*!	Makes sure the address in the given address space is mapped.
4519 
4520 	\param addressSpace The address space.
4521 	\param originalAddress The address. Doesn't need to be page aligned.
4522 	\param isWrite If \c true the address shall be write-accessible.
4523 	\param isUser If \c true the access is requested by a userland team.
4524 	\param wirePage On success, if non \c NULL, the wired count of the page
4525 		mapped at the given address is incremented and the page is returned
4526 		via this parameter.
4527 	\param wiredRange If given, this wiredRange is ignored when checking whether
4528 		an already mapped page at the virtual address can be unmapped.
4529 	\return \c B_OK on success, another error code otherwise.
4530 */
4531 static status_t
4532 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4533 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage,
4534 	VMAreaWiredRange* wiredRange)
4535 {
4536 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4537 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4538 		originalAddress, isWrite, isUser));
4539 
4540 	PageFaultContext context(addressSpace, isWrite);
4541 
4542 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4543 	status_t status = B_OK;
4544 
4545 	addressSpace->IncrementFaultCount();
4546 
4547 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4548 	// the pages upfront makes sure we don't have any cache locked, so that the
4549 	// page daemon/thief can do their job without problems.
4550 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4551 		originalAddress);
4552 	context.addressSpaceLocker.Unlock();
4553 	vm_page_reserve_pages(&context.reservation, reservePages,
4554 		addressSpace == VMAddressSpace::Kernel()
4555 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4556 
4557 	while (true) {
4558 		context.addressSpaceLocker.Lock();
4559 
4560 		// get the area the fault was in
4561 		VMArea* area = addressSpace->LookupArea(address);
4562 		if (area == NULL) {
4563 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4564 				"space\n", originalAddress);
4565 			TPF(PageFaultError(-1,
4566 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4567 			status = B_BAD_ADDRESS;
4568 			break;
4569 		}
4570 
4571 		// check permissions
4572 		uint32 protection = get_area_page_protection(area, address);
4573 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4574 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4575 				area->id, (void*)originalAddress);
4576 			TPF(PageFaultError(area->id,
4577 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4578 			status = B_PERMISSION_DENIED;
4579 			break;
4580 		}
4581 		if (isWrite && (protection
4582 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4583 			dprintf("write access attempted on write-protected area 0x%"
4584 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4585 			TPF(PageFaultError(area->id,
4586 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4587 			status = B_PERMISSION_DENIED;
4588 			break;
4589 		} else if (isExecute && (protection
4590 				& (B_EXECUTE_AREA
4591 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4592 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4593 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4594 			TPF(PageFaultError(area->id,
4595 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4596 			status = B_PERMISSION_DENIED;
4597 			break;
4598 		} else if (!isWrite && !isExecute && (protection
4599 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4600 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4601 				" at %p\n", area->id, (void*)originalAddress);
4602 			TPF(PageFaultError(area->id,
4603 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4604 			status = B_PERMISSION_DENIED;
4605 			break;
4606 		}
4607 
4608 		// We have the area, it was a valid access, so let's try to resolve the
4609 		// page fault now.
4610 		// At first, the top most cache from the area is investigated.
4611 
4612 		context.Prepare(vm_area_get_locked_cache(area),
4613 			address - area->Base() + area->cache_offset);
4614 
4615 		// See if this cache has a fault handler -- this will do all the work
4616 		// for us.
4617 		{
4618 			// Note, since the page fault is resolved with interrupts enabled,
4619 			// the fault handler could be called more than once for the same
4620 			// reason -- the store must take this into account.
4621 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4622 			if (status != B_BAD_HANDLER)
4623 				break;
4624 		}
4625 
4626 		// The top most cache has no fault handler, so let's see if the cache or
4627 		// its sources already have the page we're searching for (we're going
4628 		// from top to bottom).
4629 		status = fault_get_page(context);
4630 		if (status != B_OK) {
4631 			TPF(PageFaultError(area->id, status));
4632 			break;
4633 		}
4634 
4635 		if (context.restart)
4636 			continue;
4637 
4638 		// All went fine, all there is left to do is to map the page into the
4639 		// address space.
4640 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4641 			context.page));
4642 
4643 		// If the page doesn't reside in the area's cache, we need to make sure
4644 		// it's mapped in read-only, so that we cannot overwrite someone else's
4645 		// data (copy-on-write)
4646 		uint32 newProtection = protection;
4647 		if (context.page->Cache() != context.topCache && !isWrite)
4648 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4649 
4650 		bool unmapPage = false;
4651 		bool mapPage = true;
4652 
4653 		// check whether there's already a page mapped at the address
4654 		context.map->Lock();
4655 
4656 		phys_addr_t physicalAddress;
4657 		uint32 flags;
4658 		vm_page* mappedPage = NULL;
4659 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4660 			&& (flags & PAGE_PRESENT) != 0
4661 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4662 				!= NULL) {
4663 			// Yep there's already a page. If it's ours, we can simply adjust
4664 			// its protection. Otherwise we have to unmap it.
4665 			if (mappedPage == context.page) {
4666 				context.map->ProtectPage(area, address, newProtection);
4667 					// Note: We assume that ProtectPage() is atomic (i.e.
4668 					// the page isn't temporarily unmapped), otherwise we'd have
4669 					// to make sure it isn't wired.
4670 				mapPage = false;
4671 			} else
4672 				unmapPage = true;
4673 		}
4674 
4675 		context.map->Unlock();
4676 
4677 		if (unmapPage) {
4678 			// If the page is wired, we can't unmap it. Wait until it is unwired
4679 			// again and restart.
4680 			VMAreaUnwiredWaiter waiter;
4681 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4682 					wiredRange)) {
4683 				// unlock everything and wait
4684 				context.UnlockAll();
4685 				waiter.waitEntry.Wait();
4686 				continue;
4687 			}
4688 
4689 			// Note: The mapped page is a page of a lower cache. We are
4690 			// guaranteed to have that cached locked, our new page is a copy of
4691 			// that page, and the page is not busy. The logic for that guarantee
4692 			// is as follows: Since the page is mapped, it must live in the top
4693 			// cache (ruled out above) or any of its lower caches, and there is
4694 			// (was before the new page was inserted) no other page in any
4695 			// cache between the top cache and the page's cache (otherwise that
4696 			// would be mapped instead). That in turn means that our algorithm
4697 			// must have found it and therefore it cannot be busy either.
4698 			DEBUG_PAGE_ACCESS_START(mappedPage);
4699 			unmap_page(area, address);
4700 			DEBUG_PAGE_ACCESS_END(mappedPage);
4701 		}
4702 
4703 		if (mapPage) {
4704 			if (map_page(area, context.page, address, newProtection,
4705 					&context.reservation) != B_OK) {
4706 				// Mapping can only fail, when the page mapping object couldn't
4707 				// be allocated. Save for the missing mapping everything is
4708 				// fine, though. If this was a regular page fault, we'll simply
4709 				// leave and probably fault again. To make sure we'll have more
4710 				// luck then, we ensure that the minimum object reserve is
4711 				// available.
4712 				DEBUG_PAGE_ACCESS_END(context.page);
4713 
4714 				context.UnlockAll();
4715 
4716 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4717 						!= B_OK) {
4718 					// Apparently the situation is serious. Let's get ourselves
4719 					// killed.
4720 					status = B_NO_MEMORY;
4721 				} else if (wirePage != NULL) {
4722 					// The caller expects us to wire the page. Since
4723 					// object_cache_reserve() succeeded, we should now be able
4724 					// to allocate a mapping structure. Restart.
4725 					continue;
4726 				}
4727 
4728 				break;
4729 			}
4730 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4731 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4732 
4733 		// also wire the page, if requested
4734 		if (wirePage != NULL && status == B_OK) {
4735 			increment_page_wired_count(context.page);
4736 			*wirePage = context.page;
4737 		}
4738 
4739 		DEBUG_PAGE_ACCESS_END(context.page);
4740 
4741 		break;
4742 	}
4743 
4744 	return status;
4745 }
4746 
4747 
4748 status_t
4749 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4750 {
4751 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4752 }
4753 
4754 status_t
4755 vm_put_physical_page(addr_t vaddr, void* handle)
4756 {
4757 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4758 }
4759 
4760 
4761 status_t
4762 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4763 	void** _handle)
4764 {
4765 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4766 }
4767 
4768 status_t
4769 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4770 {
4771 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4772 }
4773 
4774 
4775 status_t
4776 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4777 {
4778 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4779 }
4780 
4781 status_t
4782 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4783 {
4784 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4785 }
4786 
4787 
4788 void
4789 vm_get_info(system_info* info)
4790 {
4791 	swap_get_info(info);
4792 
4793 	MutexLocker locker(sAvailableMemoryLock);
4794 	info->needed_memory = sNeededMemory;
4795 	info->free_memory = sAvailableMemory;
4796 }
4797 
4798 
4799 uint32
4800 vm_num_page_faults(void)
4801 {
4802 	return sPageFaults;
4803 }
4804 
4805 
4806 off_t
4807 vm_available_memory(void)
4808 {
4809 	MutexLocker locker(sAvailableMemoryLock);
4810 	return sAvailableMemory;
4811 }
4812 
4813 
4814 off_t
4815 vm_available_not_needed_memory(void)
4816 {
4817 	MutexLocker locker(sAvailableMemoryLock);
4818 	return sAvailableMemory - sNeededMemory;
4819 }
4820 
4821 
4822 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4823 	debugger.
4824 */
4825 off_t
4826 vm_available_not_needed_memory_debug(void)
4827 {
4828 	return sAvailableMemory - sNeededMemory;
4829 }
4830 
4831 
4832 size_t
4833 vm_kernel_address_space_left(void)
4834 {
4835 	return VMAddressSpace::Kernel()->FreeSpace();
4836 }
4837 
4838 
4839 void
4840 vm_unreserve_memory(size_t amount)
4841 {
4842 	mutex_lock(&sAvailableMemoryLock);
4843 
4844 	sAvailableMemory += amount;
4845 
4846 	mutex_unlock(&sAvailableMemoryLock);
4847 }
4848 
4849 
4850 status_t
4851 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4852 {
4853 	size_t reserve = kMemoryReserveForPriority[priority];
4854 
4855 	MutexLocker locker(sAvailableMemoryLock);
4856 
4857 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4858 
4859 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4860 		sAvailableMemory -= amount;
4861 		return B_OK;
4862 	}
4863 
4864 	if (timeout <= 0)
4865 		return B_NO_MEMORY;
4866 
4867 	// turn timeout into an absolute timeout
4868 	timeout += system_time();
4869 
4870 	// loop until we've got the memory or the timeout occurs
4871 	do {
4872 		sNeededMemory += amount;
4873 
4874 		// call the low resource manager
4875 		locker.Unlock();
4876 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4877 			B_ABSOLUTE_TIMEOUT, timeout);
4878 		locker.Lock();
4879 
4880 		sNeededMemory -= amount;
4881 
4882 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4883 			sAvailableMemory -= amount;
4884 			return B_OK;
4885 		}
4886 	} while (timeout > system_time());
4887 
4888 	return B_NO_MEMORY;
4889 }
4890 
4891 
4892 status_t
4893 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4894 {
4895 	// NOTE: The caller is responsible for synchronizing calls to this function!
4896 
4897 	AddressSpaceReadLocker locker;
4898 	VMArea* area;
4899 	status_t status = locker.SetFromArea(id, area);
4900 	if (status != B_OK)
4901 		return status;
4902 
4903 	// nothing to do, if the type doesn't change
4904 	uint32 oldType = area->MemoryType();
4905 	if (type == oldType)
4906 		return B_OK;
4907 
4908 	// set the memory type of the area and the mapped pages
4909 	VMTranslationMap* map = area->address_space->TranslationMap();
4910 	map->Lock();
4911 	area->SetMemoryType(type);
4912 	map->ProtectArea(area, area->protection);
4913 	map->Unlock();
4914 
4915 	// set the physical memory type
4916 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4917 	if (error != B_OK) {
4918 		// reset the memory type of the area and the mapped pages
4919 		map->Lock();
4920 		area->SetMemoryType(oldType);
4921 		map->ProtectArea(area, area->protection);
4922 		map->Unlock();
4923 		return error;
4924 	}
4925 
4926 	return B_OK;
4927 
4928 }
4929 
4930 
4931 /*!	This function enforces some protection properties:
4932 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4933 	 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well
4934 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4935 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4936 	   and B_KERNEL_WRITE_AREA.
4937 */
4938 static void
4939 fix_protection(uint32* protection)
4940 {
4941 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4942 		if ((*protection & B_USER_PROTECTION) == 0
4943 			|| (*protection & B_WRITE_AREA) != 0)
4944 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4945 		else
4946 			*protection |= B_KERNEL_READ_AREA;
4947 		if ((*protection & B_EXECUTE_AREA) != 0)
4948 			*protection |= B_KERNEL_EXECUTE_AREA;
4949 	}
4950 }
4951 
4952 
4953 static void
4954 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4955 {
4956 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4957 	info->area = area->id;
4958 	info->address = (void*)area->Base();
4959 	info->size = area->Size();
4960 	info->protection = area->protection;
4961 	info->lock = B_FULL_LOCK;
4962 	info->team = area->address_space->ID();
4963 	info->copy_count = 0;
4964 	info->in_count = 0;
4965 	info->out_count = 0;
4966 		// TODO: retrieve real values here!
4967 
4968 	VMCache* cache = vm_area_get_locked_cache(area);
4969 
4970 	// Note, this is a simplification; the cache could be larger than this area
4971 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4972 
4973 	vm_area_put_locked_cache(cache);
4974 }
4975 
4976 
4977 static status_t
4978 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4979 {
4980 	// is newSize a multiple of B_PAGE_SIZE?
4981 	if (newSize & (B_PAGE_SIZE - 1))
4982 		return B_BAD_VALUE;
4983 
4984 	// lock all affected address spaces and the cache
4985 	VMArea* area;
4986 	VMCache* cache;
4987 
4988 	MultiAddressSpaceLocker locker;
4989 	AreaCacheLocker cacheLocker;
4990 
4991 	status_t status;
4992 	size_t oldSize;
4993 	bool anyKernelArea;
4994 	bool restart;
4995 
4996 	do {
4997 		anyKernelArea = false;
4998 		restart = false;
4999 
5000 		locker.Unset();
5001 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5002 		if (status != B_OK)
5003 			return status;
5004 		cacheLocker.SetTo(cache, true);	// already locked
5005 
5006 		// enforce restrictions
5007 		if (!kernel) {
5008 			if ((area->protection & B_KERNEL_AREA) != 0)
5009 				return B_NOT_ALLOWED;
5010 			// TODO: Enforce all restrictions (team, etc.)!
5011 		}
5012 
5013 		oldSize = area->Size();
5014 		if (newSize == oldSize)
5015 			return B_OK;
5016 
5017 		if (cache->type != CACHE_TYPE_RAM)
5018 			return B_NOT_ALLOWED;
5019 
5020 		if (oldSize < newSize) {
5021 			// We need to check if all areas of this cache can be resized.
5022 			for (VMArea* current = cache->areas; current != NULL;
5023 					current = current->cache_next) {
5024 				if (!current->address_space->CanResizeArea(current, newSize))
5025 					return B_ERROR;
5026 				anyKernelArea
5027 					|= current->address_space == VMAddressSpace::Kernel();
5028 			}
5029 		} else {
5030 			// We're shrinking the areas, so we must make sure the affected
5031 			// ranges are not wired.
5032 			for (VMArea* current = cache->areas; current != NULL;
5033 					current = current->cache_next) {
5034 				anyKernelArea
5035 					|= current->address_space == VMAddressSpace::Kernel();
5036 
5037 				if (wait_if_area_range_is_wired(current,
5038 						current->Base() + newSize, oldSize - newSize, &locker,
5039 						&cacheLocker)) {
5040 					restart = true;
5041 					break;
5042 				}
5043 			}
5044 		}
5045 	} while (restart);
5046 
5047 	// Okay, looks good so far, so let's do it
5048 
5049 	int priority = kernel && anyKernelArea
5050 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5051 	uint32 allocationFlags = kernel && anyKernelArea
5052 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5053 
5054 	if (oldSize < newSize) {
5055 		// Growing the cache can fail, so we do it first.
5056 		status = cache->Resize(cache->virtual_base + newSize, priority);
5057 		if (status != B_OK)
5058 			return status;
5059 	}
5060 
5061 	for (VMArea* current = cache->areas; current != NULL;
5062 			current = current->cache_next) {
5063 		status = current->address_space->ResizeArea(current, newSize,
5064 			allocationFlags);
5065 		if (status != B_OK)
5066 			break;
5067 
5068 		// We also need to unmap all pages beyond the new size, if the area has
5069 		// shrunk
5070 		if (newSize < oldSize) {
5071 			VMCacheChainLocker cacheChainLocker(cache);
5072 			cacheChainLocker.LockAllSourceCaches();
5073 
5074 			unmap_pages(current, current->Base() + newSize,
5075 				oldSize - newSize);
5076 
5077 			cacheChainLocker.Unlock(cache);
5078 		}
5079 	}
5080 
5081 	if (status == B_OK) {
5082 		// Shrink or grow individual page protections if in use.
5083 		if (area->page_protections != NULL) {
5084 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5085 			uint8* newProtections
5086 				= (uint8*)realloc(area->page_protections, bytes);
5087 			if (newProtections == NULL)
5088 				status = B_NO_MEMORY;
5089 			else {
5090 				area->page_protections = newProtections;
5091 
5092 				if (oldSize < newSize) {
5093 					// init the additional page protections to that of the area
5094 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5095 					uint32 areaProtection = area->protection
5096 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5097 					memset(area->page_protections + offset,
5098 						areaProtection | (areaProtection << 4), bytes - offset);
5099 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5100 						uint8& entry = area->page_protections[offset - 1];
5101 						entry = (entry & 0x0f) | (areaProtection << 4);
5102 					}
5103 				}
5104 			}
5105 		}
5106 	}
5107 
5108 	// shrinking the cache can't fail, so we do it now
5109 	if (status == B_OK && newSize < oldSize)
5110 		status = cache->Resize(cache->virtual_base + newSize, priority);
5111 
5112 	if (status != B_OK) {
5113 		// Something failed -- resize the areas back to their original size.
5114 		// This can fail, too, in which case we're seriously screwed.
5115 		for (VMArea* current = cache->areas; current != NULL;
5116 				current = current->cache_next) {
5117 			if (current->address_space->ResizeArea(current, oldSize,
5118 					allocationFlags) != B_OK) {
5119 				panic("vm_resize_area(): Failed and not being able to restore "
5120 					"original state.");
5121 			}
5122 		}
5123 
5124 		cache->Resize(cache->virtual_base + oldSize, priority);
5125 	}
5126 
5127 	// TODO: we must honour the lock restrictions of this area
5128 	return status;
5129 }
5130 
5131 
5132 status_t
5133 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5134 {
5135 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5136 }
5137 
5138 
5139 status_t
5140 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5141 {
5142 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5143 }
5144 
5145 
5146 status_t
5147 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5148 	bool user)
5149 {
5150 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5151 }
5152 
5153 
5154 void
5155 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5156 {
5157 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5158 }
5159 
5160 
5161 /*!	Copies a range of memory directly from/to a page that might not be mapped
5162 	at the moment.
5163 
5164 	For \a unsafeMemory the current mapping (if any is ignored). The function
5165 	walks through the respective area's cache chain to find the physical page
5166 	and copies from/to it directly.
5167 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5168 	must not cross a page boundary.
5169 
5170 	\param teamID The team ID identifying the address space \a unsafeMemory is
5171 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5172 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5173 		is passed, the address space of the thread returned by
5174 		debug_get_debugged_thread() is used.
5175 	\param unsafeMemory The start of the unsafe memory range to be copied
5176 		from/to.
5177 	\param buffer A safely accessible kernel buffer to be copied from/to.
5178 	\param size The number of bytes to be copied.
5179 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5180 		\a unsafeMemory, the other way around otherwise.
5181 */
5182 status_t
5183 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5184 	size_t size, bool copyToUnsafe)
5185 {
5186 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5187 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5188 		return B_BAD_VALUE;
5189 	}
5190 
5191 	// get the address space for the debugged thread
5192 	VMAddressSpace* addressSpace;
5193 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5194 		addressSpace = VMAddressSpace::Kernel();
5195 	} else if (teamID == B_CURRENT_TEAM) {
5196 		Thread* thread = debug_get_debugged_thread();
5197 		if (thread == NULL || thread->team == NULL)
5198 			return B_BAD_ADDRESS;
5199 
5200 		addressSpace = thread->team->address_space;
5201 	} else
5202 		addressSpace = VMAddressSpace::DebugGet(teamID);
5203 
5204 	if (addressSpace == NULL)
5205 		return B_BAD_ADDRESS;
5206 
5207 	// get the area
5208 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5209 	if (area == NULL)
5210 		return B_BAD_ADDRESS;
5211 
5212 	// search the page
5213 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5214 		+ area->cache_offset;
5215 	VMCache* cache = area->cache;
5216 	vm_page* page = NULL;
5217 	while (cache != NULL) {
5218 		page = cache->DebugLookupPage(cacheOffset);
5219 		if (page != NULL)
5220 			break;
5221 
5222 		// Page not found in this cache -- if it is paged out, we must not try
5223 		// to get it from lower caches.
5224 		if (cache->DebugHasPage(cacheOffset))
5225 			break;
5226 
5227 		cache = cache->source;
5228 	}
5229 
5230 	if (page == NULL)
5231 		return B_UNSUPPORTED;
5232 
5233 	// copy from/to physical memory
5234 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5235 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5236 
5237 	if (copyToUnsafe) {
5238 		if (page->Cache() != area->cache)
5239 			return B_UNSUPPORTED;
5240 
5241 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5242 	}
5243 
5244 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5245 }
5246 
5247 
5248 //	#pragma mark - kernel public API
5249 
5250 
5251 status_t
5252 user_memcpy(void* to, const void* from, size_t size)
5253 {
5254 	// don't allow address overflows
5255 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5256 		return B_BAD_ADDRESS;
5257 
5258 	if (arch_cpu_user_memcpy(to, from, size,
5259 			&thread_get_current_thread()->fault_handler) < B_OK)
5260 		return B_BAD_ADDRESS;
5261 
5262 	return B_OK;
5263 }
5264 
5265 
5266 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5267 	the string in \a to, NULL-terminating the result.
5268 
5269 	\param to Pointer to the destination C-string.
5270 	\param from Pointer to the source C-string.
5271 	\param size Size in bytes of the string buffer pointed to by \a to.
5272 
5273 	\return strlen(\a from).
5274 */
5275 ssize_t
5276 user_strlcpy(char* to, const char* from, size_t size)
5277 {
5278 	if (to == NULL && size != 0)
5279 		return B_BAD_VALUE;
5280 	if (from == NULL)
5281 		return B_BAD_ADDRESS;
5282 
5283 	// limit size to avoid address overflows
5284 	size_t maxSize = std::min(size,
5285 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5286 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5287 		// the source address might still overflow.
5288 
5289 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
5290 		&thread_get_current_thread()->fault_handler);
5291 
5292 	// If we hit the address overflow boundary, fail.
5293 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5294 			&& maxSize < size)) {
5295 		return B_BAD_ADDRESS;
5296 	}
5297 
5298 	return result;
5299 }
5300 
5301 
5302 status_t
5303 user_memset(void* s, char c, size_t count)
5304 {
5305 	// don't allow address overflows
5306 	if ((addr_t)s + count < (addr_t)s)
5307 		return B_BAD_ADDRESS;
5308 
5309 	if (arch_cpu_user_memset(s, c, count,
5310 			&thread_get_current_thread()->fault_handler) < B_OK)
5311 		return B_BAD_ADDRESS;
5312 
5313 	return B_OK;
5314 }
5315 
5316 
5317 /*!	Wires a single page at the given address.
5318 
5319 	\param team The team whose address space the address belongs to. Supports
5320 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5321 		parameter is ignored.
5322 	\param address address The virtual address to wire down. Does not need to
5323 		be page aligned.
5324 	\param writable If \c true the page shall be writable.
5325 	\param info On success the info is filled in, among other things
5326 		containing the physical address the given virtual one translates to.
5327 	\return \c B_OK, when the page could be wired, another error code otherwise.
5328 */
5329 status_t
5330 vm_wire_page(team_id team, addr_t address, bool writable,
5331 	VMPageWiringInfo* info)
5332 {
5333 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5334 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5335 
5336 	// compute the page protection that is required
5337 	bool isUser = IS_USER_ADDRESS(address);
5338 	uint32 requiredProtection = PAGE_PRESENT
5339 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5340 	if (writable)
5341 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5342 
5343 	// get and read lock the address space
5344 	VMAddressSpace* addressSpace = NULL;
5345 	if (isUser) {
5346 		if (team == B_CURRENT_TEAM)
5347 			addressSpace = VMAddressSpace::GetCurrent();
5348 		else
5349 			addressSpace = VMAddressSpace::Get(team);
5350 	} else
5351 		addressSpace = VMAddressSpace::GetKernel();
5352 	if (addressSpace == NULL)
5353 		return B_ERROR;
5354 
5355 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5356 
5357 	VMTranslationMap* map = addressSpace->TranslationMap();
5358 	status_t error = B_OK;
5359 
5360 	// get the area
5361 	VMArea* area = addressSpace->LookupArea(pageAddress);
5362 	if (area == NULL) {
5363 		addressSpace->Put();
5364 		return B_BAD_ADDRESS;
5365 	}
5366 
5367 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5368 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5369 
5370 	// mark the area range wired
5371 	area->Wire(&info->range);
5372 
5373 	// Lock the area's cache chain and the translation map. Needed to look
5374 	// up the page and play with its wired count.
5375 	cacheChainLocker.LockAllSourceCaches();
5376 	map->Lock();
5377 
5378 	phys_addr_t physicalAddress;
5379 	uint32 flags;
5380 	vm_page* page;
5381 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5382 		&& (flags & requiredProtection) == requiredProtection
5383 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5384 			!= NULL) {
5385 		// Already mapped with the correct permissions -- just increment
5386 		// the page's wired count.
5387 		increment_page_wired_count(page);
5388 
5389 		map->Unlock();
5390 		cacheChainLocker.Unlock();
5391 		addressSpaceLocker.Unlock();
5392 	} else {
5393 		// Let vm_soft_fault() map the page for us, if possible. We need
5394 		// to fully unlock to avoid deadlocks. Since we have already
5395 		// wired the area itself, nothing disturbing will happen with it
5396 		// in the meantime.
5397 		map->Unlock();
5398 		cacheChainLocker.Unlock();
5399 		addressSpaceLocker.Unlock();
5400 
5401 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5402 			isUser, &page, &info->range);
5403 
5404 		if (error != B_OK) {
5405 			// The page could not be mapped -- clean up.
5406 			VMCache* cache = vm_area_get_locked_cache(area);
5407 			area->Unwire(&info->range);
5408 			cache->ReleaseRefAndUnlock();
5409 			addressSpace->Put();
5410 			return error;
5411 		}
5412 	}
5413 
5414 	info->physicalAddress
5415 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5416 			+ address % B_PAGE_SIZE;
5417 	info->page = page;
5418 
5419 	return B_OK;
5420 }
5421 
5422 
5423 /*!	Unwires a single page previously wired via vm_wire_page().
5424 
5425 	\param info The same object passed to vm_wire_page() before.
5426 */
5427 void
5428 vm_unwire_page(VMPageWiringInfo* info)
5429 {
5430 	// lock the address space
5431 	VMArea* area = info->range.area;
5432 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5433 		// takes over our reference
5434 
5435 	// lock the top cache
5436 	VMCache* cache = vm_area_get_locked_cache(area);
5437 	VMCacheChainLocker cacheChainLocker(cache);
5438 
5439 	if (info->page->Cache() != cache) {
5440 		// The page is not in the top cache, so we lock the whole cache chain
5441 		// before touching the page's wired count.
5442 		cacheChainLocker.LockAllSourceCaches();
5443 	}
5444 
5445 	decrement_page_wired_count(info->page);
5446 
5447 	// remove the wired range from the range
5448 	area->Unwire(&info->range);
5449 
5450 	cacheChainLocker.Unlock();
5451 }
5452 
5453 
5454 /*!	Wires down the given address range in the specified team's address space.
5455 
5456 	If successful the function
5457 	- acquires a reference to the specified team's address space,
5458 	- adds respective wired ranges to all areas that intersect with the given
5459 	  address range,
5460 	- makes sure all pages in the given address range are mapped with the
5461 	  requested access permissions and increments their wired count.
5462 
5463 	It fails, when \a team doesn't specify a valid address space, when any part
5464 	of the specified address range is not covered by areas, when the concerned
5465 	areas don't allow mapping with the requested permissions, or when mapping
5466 	failed for another reason.
5467 
5468 	When successful the call must be balanced by a unlock_memory_etc() call with
5469 	the exact same parameters.
5470 
5471 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5472 		supported.
5473 	\param address The start of the address range to be wired.
5474 	\param numBytes The size of the address range to be wired.
5475 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5476 		requests that the range must be wired writable ("read from device
5477 		into memory").
5478 	\return \c B_OK on success, another error code otherwise.
5479 */
5480 status_t
5481 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5482 {
5483 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5484 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5485 
5486 	// compute the page protection that is required
5487 	bool isUser = IS_USER_ADDRESS(address);
5488 	bool writable = (flags & B_READ_DEVICE) == 0;
5489 	uint32 requiredProtection = PAGE_PRESENT
5490 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5491 	if (writable)
5492 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5493 
5494 	uint32 mallocFlags = isUser
5495 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5496 
5497 	// get and read lock the address space
5498 	VMAddressSpace* addressSpace = NULL;
5499 	if (isUser) {
5500 		if (team == B_CURRENT_TEAM)
5501 			addressSpace = VMAddressSpace::GetCurrent();
5502 		else
5503 			addressSpace = VMAddressSpace::Get(team);
5504 	} else
5505 		addressSpace = VMAddressSpace::GetKernel();
5506 	if (addressSpace == NULL)
5507 		return B_ERROR;
5508 
5509 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5510 		// We get a new address space reference here. The one we got above will
5511 		// be freed by unlock_memory_etc().
5512 
5513 	VMTranslationMap* map = addressSpace->TranslationMap();
5514 	status_t error = B_OK;
5515 
5516 	// iterate through all concerned areas
5517 	addr_t nextAddress = lockBaseAddress;
5518 	while (nextAddress != lockEndAddress) {
5519 		// get the next area
5520 		VMArea* area = addressSpace->LookupArea(nextAddress);
5521 		if (area == NULL) {
5522 			error = B_BAD_ADDRESS;
5523 			break;
5524 		}
5525 
5526 		addr_t areaStart = nextAddress;
5527 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5528 
5529 		// allocate the wired range (do that before locking the cache to avoid
5530 		// deadlocks)
5531 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5532 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5533 		if (range == NULL) {
5534 			error = B_NO_MEMORY;
5535 			break;
5536 		}
5537 
5538 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5539 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5540 
5541 		// mark the area range wired
5542 		area->Wire(range);
5543 
5544 		// Depending on the area cache type and the wiring, we may not need to
5545 		// look at the individual pages.
5546 		if (area->cache_type == CACHE_TYPE_NULL
5547 			|| area->cache_type == CACHE_TYPE_DEVICE
5548 			|| area->wiring == B_FULL_LOCK
5549 			|| area->wiring == B_CONTIGUOUS) {
5550 			nextAddress = areaEnd;
5551 			continue;
5552 		}
5553 
5554 		// Lock the area's cache chain and the translation map. Needed to look
5555 		// up pages and play with their wired count.
5556 		cacheChainLocker.LockAllSourceCaches();
5557 		map->Lock();
5558 
5559 		// iterate through the pages and wire them
5560 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5561 			phys_addr_t physicalAddress;
5562 			uint32 flags;
5563 
5564 			vm_page* page;
5565 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5566 				&& (flags & requiredProtection) == requiredProtection
5567 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5568 					!= NULL) {
5569 				// Already mapped with the correct permissions -- just increment
5570 				// the page's wired count.
5571 				increment_page_wired_count(page);
5572 			} else {
5573 				// Let vm_soft_fault() map the page for us, if possible. We need
5574 				// to fully unlock to avoid deadlocks. Since we have already
5575 				// wired the area itself, nothing disturbing will happen with it
5576 				// in the meantime.
5577 				map->Unlock();
5578 				cacheChainLocker.Unlock();
5579 				addressSpaceLocker.Unlock();
5580 
5581 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5582 					false, isUser, &page, range);
5583 
5584 				addressSpaceLocker.Lock();
5585 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5586 				cacheChainLocker.LockAllSourceCaches();
5587 				map->Lock();
5588 			}
5589 
5590 			if (error != B_OK)
5591 				break;
5592 		}
5593 
5594 		map->Unlock();
5595 
5596 		if (error == B_OK) {
5597 			cacheChainLocker.Unlock();
5598 		} else {
5599 			// An error occurred, so abort right here. If the current address
5600 			// is the first in this area, unwire the area, since we won't get
5601 			// to it when reverting what we've done so far.
5602 			if (nextAddress == areaStart) {
5603 				area->Unwire(range);
5604 				cacheChainLocker.Unlock();
5605 				range->~VMAreaWiredRange();
5606 				free_etc(range, mallocFlags);
5607 			} else
5608 				cacheChainLocker.Unlock();
5609 
5610 			break;
5611 		}
5612 	}
5613 
5614 	if (error != B_OK) {
5615 		// An error occurred, so unwire all that we've already wired. Note that
5616 		// even if not a single page was wired, unlock_memory_etc() is called
5617 		// to put the address space reference.
5618 		addressSpaceLocker.Unlock();
5619 		unlock_memory_etc(team, (void*)lockBaseAddress,
5620 			nextAddress - lockBaseAddress, flags);
5621 	}
5622 
5623 	return error;
5624 }
5625 
5626 
5627 status_t
5628 lock_memory(void* address, size_t numBytes, uint32 flags)
5629 {
5630 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5631 }
5632 
5633 
5634 /*!	Unwires an address range previously wired with lock_memory_etc().
5635 
5636 	Note that a call to this function must balance a previous lock_memory_etc()
5637 	call with exactly the same parameters.
5638 */
5639 status_t
5640 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5641 {
5642 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5643 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5644 
5645 	// compute the page protection that is required
5646 	bool isUser = IS_USER_ADDRESS(address);
5647 	bool writable = (flags & B_READ_DEVICE) == 0;
5648 	uint32 requiredProtection = PAGE_PRESENT
5649 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5650 	if (writable)
5651 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5652 
5653 	uint32 mallocFlags = isUser
5654 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5655 
5656 	// get and read lock the address space
5657 	VMAddressSpace* addressSpace = NULL;
5658 	if (isUser) {
5659 		if (team == B_CURRENT_TEAM)
5660 			addressSpace = VMAddressSpace::GetCurrent();
5661 		else
5662 			addressSpace = VMAddressSpace::Get(team);
5663 	} else
5664 		addressSpace = VMAddressSpace::GetKernel();
5665 	if (addressSpace == NULL)
5666 		return B_ERROR;
5667 
5668 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5669 		// Take over the address space reference. We don't unlock until we're
5670 		// done.
5671 
5672 	VMTranslationMap* map = addressSpace->TranslationMap();
5673 	status_t error = B_OK;
5674 
5675 	// iterate through all concerned areas
5676 	addr_t nextAddress = lockBaseAddress;
5677 	while (nextAddress != lockEndAddress) {
5678 		// get the next area
5679 		VMArea* area = addressSpace->LookupArea(nextAddress);
5680 		if (area == NULL) {
5681 			error = B_BAD_ADDRESS;
5682 			break;
5683 		}
5684 
5685 		addr_t areaStart = nextAddress;
5686 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5687 
5688 		// Lock the area's top cache. This is a requirement for
5689 		// VMArea::Unwire().
5690 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5691 
5692 		// Depending on the area cache type and the wiring, we may not need to
5693 		// look at the individual pages.
5694 		if (area->cache_type == CACHE_TYPE_NULL
5695 			|| area->cache_type == CACHE_TYPE_DEVICE
5696 			|| area->wiring == B_FULL_LOCK
5697 			|| area->wiring == B_CONTIGUOUS) {
5698 			// unwire the range (to avoid deadlocks we delete the range after
5699 			// unlocking the cache)
5700 			nextAddress = areaEnd;
5701 			VMAreaWiredRange* range = area->Unwire(areaStart,
5702 				areaEnd - areaStart, writable);
5703 			cacheChainLocker.Unlock();
5704 			if (range != NULL) {
5705 				range->~VMAreaWiredRange();
5706 				free_etc(range, mallocFlags);
5707 			}
5708 			continue;
5709 		}
5710 
5711 		// Lock the area's cache chain and the translation map. Needed to look
5712 		// up pages and play with their wired count.
5713 		cacheChainLocker.LockAllSourceCaches();
5714 		map->Lock();
5715 
5716 		// iterate through the pages and unwire them
5717 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5718 			phys_addr_t physicalAddress;
5719 			uint32 flags;
5720 
5721 			vm_page* page;
5722 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5723 				&& (flags & PAGE_PRESENT) != 0
5724 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5725 					!= NULL) {
5726 				// Already mapped with the correct permissions -- just increment
5727 				// the page's wired count.
5728 				decrement_page_wired_count(page);
5729 			} else {
5730 				panic("unlock_memory_etc(): Failed to unwire page: address "
5731 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5732 					nextAddress);
5733 				error = B_BAD_VALUE;
5734 				break;
5735 			}
5736 		}
5737 
5738 		map->Unlock();
5739 
5740 		// All pages are unwired. Remove the area's wired range as well (to
5741 		// avoid deadlocks we delete the range after unlocking the cache).
5742 		VMAreaWiredRange* range = area->Unwire(areaStart,
5743 			areaEnd - areaStart, writable);
5744 
5745 		cacheChainLocker.Unlock();
5746 
5747 		if (range != NULL) {
5748 			range->~VMAreaWiredRange();
5749 			free_etc(range, mallocFlags);
5750 		}
5751 
5752 		if (error != B_OK)
5753 			break;
5754 	}
5755 
5756 	// get rid of the address space reference lock_memory_etc() acquired
5757 	addressSpace->Put();
5758 
5759 	return error;
5760 }
5761 
5762 
5763 status_t
5764 unlock_memory(void* address, size_t numBytes, uint32 flags)
5765 {
5766 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5767 }
5768 
5769 
5770 /*!	Similar to get_memory_map(), but also allows to specify the address space
5771 	for the memory in question and has a saner semantics.
5772 	Returns \c B_OK when the complete range could be translated or
5773 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5774 	case the actual number of entries is written to \c *_numEntries. Any other
5775 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5776 	in this case.
5777 */
5778 status_t
5779 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5780 	physical_entry* table, uint32* _numEntries)
5781 {
5782 	uint32 numEntries = *_numEntries;
5783 	*_numEntries = 0;
5784 
5785 	VMAddressSpace* addressSpace;
5786 	addr_t virtualAddress = (addr_t)address;
5787 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5788 	phys_addr_t physicalAddress;
5789 	status_t status = B_OK;
5790 	int32 index = -1;
5791 	addr_t offset = 0;
5792 	bool interrupts = are_interrupts_enabled();
5793 
5794 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5795 		"entries)\n", team, address, numBytes, numEntries));
5796 
5797 	if (numEntries == 0 || numBytes == 0)
5798 		return B_BAD_VALUE;
5799 
5800 	// in which address space is the address to be found?
5801 	if (IS_USER_ADDRESS(virtualAddress)) {
5802 		if (team == B_CURRENT_TEAM)
5803 			addressSpace = VMAddressSpace::GetCurrent();
5804 		else
5805 			addressSpace = VMAddressSpace::Get(team);
5806 	} else
5807 		addressSpace = VMAddressSpace::GetKernel();
5808 
5809 	if (addressSpace == NULL)
5810 		return B_ERROR;
5811 
5812 	VMTranslationMap* map = addressSpace->TranslationMap();
5813 
5814 	if (interrupts)
5815 		map->Lock();
5816 
5817 	while (offset < numBytes) {
5818 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5819 		uint32 flags;
5820 
5821 		if (interrupts) {
5822 			status = map->Query((addr_t)address + offset, &physicalAddress,
5823 				&flags);
5824 		} else {
5825 			status = map->QueryInterrupt((addr_t)address + offset,
5826 				&physicalAddress, &flags);
5827 		}
5828 		if (status < B_OK)
5829 			break;
5830 		if ((flags & PAGE_PRESENT) == 0) {
5831 			panic("get_memory_map() called on unmapped memory!");
5832 			return B_BAD_ADDRESS;
5833 		}
5834 
5835 		if (index < 0 && pageOffset > 0) {
5836 			physicalAddress += pageOffset;
5837 			if (bytes > B_PAGE_SIZE - pageOffset)
5838 				bytes = B_PAGE_SIZE - pageOffset;
5839 		}
5840 
5841 		// need to switch to the next physical_entry?
5842 		if (index < 0 || table[index].address
5843 				!= physicalAddress - table[index].size) {
5844 			if ((uint32)++index + 1 > numEntries) {
5845 				// table to small
5846 				break;
5847 			}
5848 			table[index].address = physicalAddress;
5849 			table[index].size = bytes;
5850 		} else {
5851 			// page does fit in current entry
5852 			table[index].size += bytes;
5853 		}
5854 
5855 		offset += bytes;
5856 	}
5857 
5858 	if (interrupts)
5859 		map->Unlock();
5860 
5861 	if (status != B_OK)
5862 		return status;
5863 
5864 	if ((uint32)index + 1 > numEntries) {
5865 		*_numEntries = index;
5866 		return B_BUFFER_OVERFLOW;
5867 	}
5868 
5869 	*_numEntries = index + 1;
5870 	return B_OK;
5871 }
5872 
5873 
5874 /*!	According to the BeBook, this function should always succeed.
5875 	This is no longer the case.
5876 */
5877 extern "C" int32
5878 __get_memory_map_haiku(const void* address, size_t numBytes,
5879 	physical_entry* table, int32 numEntries)
5880 {
5881 	uint32 entriesRead = numEntries;
5882 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5883 		table, &entriesRead);
5884 	if (error != B_OK)
5885 		return error;
5886 
5887 	// close the entry list
5888 
5889 	// if it's only one entry, we will silently accept the missing ending
5890 	if (numEntries == 1)
5891 		return B_OK;
5892 
5893 	if (entriesRead + 1 > (uint32)numEntries)
5894 		return B_BUFFER_OVERFLOW;
5895 
5896 	table[entriesRead].address = 0;
5897 	table[entriesRead].size = 0;
5898 
5899 	return B_OK;
5900 }
5901 
5902 
5903 area_id
5904 area_for(void* address)
5905 {
5906 	return vm_area_for((addr_t)address, true);
5907 }
5908 
5909 
5910 area_id
5911 find_area(const char* name)
5912 {
5913 	return VMAreaHash::Find(name);
5914 }
5915 
5916 
5917 status_t
5918 _get_area_info(area_id id, area_info* info, size_t size)
5919 {
5920 	if (size != sizeof(area_info) || info == NULL)
5921 		return B_BAD_VALUE;
5922 
5923 	AddressSpaceReadLocker locker;
5924 	VMArea* area;
5925 	status_t status = locker.SetFromArea(id, area);
5926 	if (status != B_OK)
5927 		return status;
5928 
5929 	fill_area_info(area, info, size);
5930 	return B_OK;
5931 }
5932 
5933 
5934 status_t
5935 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5936 {
5937 	addr_t nextBase = *(addr_t*)cookie;
5938 
5939 	// we're already through the list
5940 	if (nextBase == (addr_t)-1)
5941 		return B_ENTRY_NOT_FOUND;
5942 
5943 	if (team == B_CURRENT_TEAM)
5944 		team = team_get_current_team_id();
5945 
5946 	AddressSpaceReadLocker locker(team);
5947 	if (!locker.IsLocked())
5948 		return B_BAD_TEAM_ID;
5949 
5950 	VMArea* area;
5951 	for (VMAddressSpace::AreaIterator it
5952 				= locker.AddressSpace()->GetAreaIterator();
5953 			(area = it.Next()) != NULL;) {
5954 		if (area->Base() > nextBase)
5955 			break;
5956 	}
5957 
5958 	if (area == NULL) {
5959 		nextBase = (addr_t)-1;
5960 		return B_ENTRY_NOT_FOUND;
5961 	}
5962 
5963 	fill_area_info(area, info, size);
5964 	*cookie = (ssize_t)(area->Base());
5965 
5966 	return B_OK;
5967 }
5968 
5969 
5970 status_t
5971 set_area_protection(area_id area, uint32 newProtection)
5972 {
5973 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5974 		newProtection, true);
5975 }
5976 
5977 
5978 status_t
5979 resize_area(area_id areaID, size_t newSize)
5980 {
5981 	return vm_resize_area(areaID, newSize, true);
5982 }
5983 
5984 
5985 /*!	Transfers the specified area to a new team. The caller must be the owner
5986 	of the area.
5987 */
5988 area_id
5989 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5990 	bool kernel)
5991 {
5992 	area_info info;
5993 	status_t status = get_area_info(id, &info);
5994 	if (status != B_OK)
5995 		return status;
5996 
5997 	if (info.team != thread_get_current_thread()->team->id)
5998 		return B_PERMISSION_DENIED;
5999 
6000 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6001 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6002 	if (clonedArea < 0)
6003 		return clonedArea;
6004 
6005 	status = vm_delete_area(info.team, id, kernel);
6006 	if (status != B_OK) {
6007 		vm_delete_area(target, clonedArea, kernel);
6008 		return status;
6009 	}
6010 
6011 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6012 
6013 	return clonedArea;
6014 }
6015 
6016 
6017 extern "C" area_id
6018 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6019 	size_t numBytes, uint32 addressSpec, uint32 protection,
6020 	void** _virtualAddress)
6021 {
6022 	if (!arch_vm_supports_protection(protection))
6023 		return B_NOT_SUPPORTED;
6024 
6025 	fix_protection(&protection);
6026 
6027 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6028 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6029 		false);
6030 }
6031 
6032 
6033 area_id
6034 clone_area(const char* name, void** _address, uint32 addressSpec,
6035 	uint32 protection, area_id source)
6036 {
6037 	if ((protection & B_KERNEL_PROTECTION) == 0)
6038 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6039 
6040 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6041 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6042 }
6043 
6044 
6045 area_id
6046 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
6047 	uint32 protection, uint32 flags, uint32 guardSize,
6048 	const virtual_address_restrictions* virtualAddressRestrictions,
6049 	const physical_address_restrictions* physicalAddressRestrictions,
6050 	void** _address)
6051 {
6052 	fix_protection(&protection);
6053 
6054 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6055 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6056 		true, _address);
6057 }
6058 
6059 
6060 extern "C" area_id
6061 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6062 	size_t size, uint32 lock, uint32 protection)
6063 {
6064 	fix_protection(&protection);
6065 
6066 	virtual_address_restrictions virtualRestrictions = {};
6067 	virtualRestrictions.address = *_address;
6068 	virtualRestrictions.address_specification = addressSpec;
6069 	physical_address_restrictions physicalRestrictions = {};
6070 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6071 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6072 		true, _address);
6073 }
6074 
6075 
6076 status_t
6077 delete_area(area_id area)
6078 {
6079 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6080 }
6081 
6082 
6083 //	#pragma mark - Userland syscalls
6084 
6085 
6086 status_t
6087 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6088 	addr_t size)
6089 {
6090 	// filter out some unavailable values (for userland)
6091 	switch (addressSpec) {
6092 		case B_ANY_KERNEL_ADDRESS:
6093 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6094 			return B_BAD_VALUE;
6095 	}
6096 
6097 	addr_t address;
6098 
6099 	if (!IS_USER_ADDRESS(userAddress)
6100 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6101 		return B_BAD_ADDRESS;
6102 
6103 	status_t status = vm_reserve_address_range(
6104 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6105 		RESERVED_AVOID_BASE);
6106 	if (status != B_OK)
6107 		return status;
6108 
6109 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6110 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6111 			(void*)address, size);
6112 		return B_BAD_ADDRESS;
6113 	}
6114 
6115 	return B_OK;
6116 }
6117 
6118 
6119 status_t
6120 _user_unreserve_address_range(addr_t address, addr_t size)
6121 {
6122 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6123 		(void*)address, size);
6124 }
6125 
6126 
6127 area_id
6128 _user_area_for(void* address)
6129 {
6130 	return vm_area_for((addr_t)address, false);
6131 }
6132 
6133 
6134 area_id
6135 _user_find_area(const char* userName)
6136 {
6137 	char name[B_OS_NAME_LENGTH];
6138 
6139 	if (!IS_USER_ADDRESS(userName)
6140 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6141 		return B_BAD_ADDRESS;
6142 
6143 	return find_area(name);
6144 }
6145 
6146 
6147 status_t
6148 _user_get_area_info(area_id area, area_info* userInfo)
6149 {
6150 	if (!IS_USER_ADDRESS(userInfo))
6151 		return B_BAD_ADDRESS;
6152 
6153 	area_info info;
6154 	status_t status = get_area_info(area, &info);
6155 	if (status < B_OK)
6156 		return status;
6157 
6158 	// TODO: do we want to prevent userland from seeing kernel protections?
6159 	//info.protection &= B_USER_PROTECTION;
6160 
6161 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6162 		return B_BAD_ADDRESS;
6163 
6164 	return status;
6165 }
6166 
6167 
6168 status_t
6169 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6170 {
6171 	ssize_t cookie;
6172 
6173 	if (!IS_USER_ADDRESS(userCookie)
6174 		|| !IS_USER_ADDRESS(userInfo)
6175 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6176 		return B_BAD_ADDRESS;
6177 
6178 	area_info info;
6179 	status_t status = _get_next_area_info(team, &cookie, &info,
6180 		sizeof(area_info));
6181 	if (status != B_OK)
6182 		return status;
6183 
6184 	//info.protection &= B_USER_PROTECTION;
6185 
6186 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6187 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6188 		return B_BAD_ADDRESS;
6189 
6190 	return status;
6191 }
6192 
6193 
6194 status_t
6195 _user_set_area_protection(area_id area, uint32 newProtection)
6196 {
6197 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6198 		return B_BAD_VALUE;
6199 
6200 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6201 		newProtection, false);
6202 }
6203 
6204 
6205 status_t
6206 _user_resize_area(area_id area, size_t newSize)
6207 {
6208 	// TODO: Since we restrict deleting of areas to those owned by the team,
6209 	// we should also do that for resizing (check other functions, too).
6210 	return vm_resize_area(area, newSize, false);
6211 }
6212 
6213 
6214 area_id
6215 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6216 	team_id target)
6217 {
6218 	// filter out some unavailable values (for userland)
6219 	switch (addressSpec) {
6220 		case B_ANY_KERNEL_ADDRESS:
6221 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6222 			return B_BAD_VALUE;
6223 	}
6224 
6225 	void* address;
6226 	if (!IS_USER_ADDRESS(userAddress)
6227 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6228 		return B_BAD_ADDRESS;
6229 
6230 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6231 	if (newArea < B_OK)
6232 		return newArea;
6233 
6234 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6235 		return B_BAD_ADDRESS;
6236 
6237 	return newArea;
6238 }
6239 
6240 
6241 area_id
6242 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6243 	uint32 protection, area_id sourceArea)
6244 {
6245 	char name[B_OS_NAME_LENGTH];
6246 	void* address;
6247 
6248 	// filter out some unavailable values (for userland)
6249 	switch (addressSpec) {
6250 		case B_ANY_KERNEL_ADDRESS:
6251 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6252 			return B_BAD_VALUE;
6253 	}
6254 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6255 		return B_BAD_VALUE;
6256 
6257 	if (!IS_USER_ADDRESS(userName)
6258 		|| !IS_USER_ADDRESS(userAddress)
6259 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6260 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6261 		return B_BAD_ADDRESS;
6262 
6263 	fix_protection(&protection);
6264 
6265 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6266 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6267 		false);
6268 	if (clonedArea < B_OK)
6269 		return clonedArea;
6270 
6271 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6272 		delete_area(clonedArea);
6273 		return B_BAD_ADDRESS;
6274 	}
6275 
6276 	return clonedArea;
6277 }
6278 
6279 
6280 area_id
6281 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6282 	size_t size, uint32 lock, uint32 protection)
6283 {
6284 	char name[B_OS_NAME_LENGTH];
6285 	void* address;
6286 
6287 	// filter out some unavailable values (for userland)
6288 	switch (addressSpec) {
6289 		case B_ANY_KERNEL_ADDRESS:
6290 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6291 			return B_BAD_VALUE;
6292 	}
6293 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6294 		return B_BAD_VALUE;
6295 
6296 	if (!IS_USER_ADDRESS(userName)
6297 		|| !IS_USER_ADDRESS(userAddress)
6298 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6299 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6300 		return B_BAD_ADDRESS;
6301 
6302 	if (addressSpec == B_EXACT_ADDRESS
6303 		&& IS_KERNEL_ADDRESS(address))
6304 		return B_BAD_VALUE;
6305 
6306 	if (addressSpec == B_ANY_ADDRESS)
6307 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6308 	if (addressSpec == B_BASE_ADDRESS)
6309 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6310 
6311 	fix_protection(&protection);
6312 
6313 	virtual_address_restrictions virtualRestrictions = {};
6314 	virtualRestrictions.address = address;
6315 	virtualRestrictions.address_specification = addressSpec;
6316 	physical_address_restrictions physicalRestrictions = {};
6317 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6318 		size, lock, protection, 0, 0, &virtualRestrictions,
6319 		&physicalRestrictions, false, &address);
6320 
6321 	if (area >= B_OK
6322 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6323 		delete_area(area);
6324 		return B_BAD_ADDRESS;
6325 	}
6326 
6327 	return area;
6328 }
6329 
6330 
6331 status_t
6332 _user_delete_area(area_id area)
6333 {
6334 	// Unlike the BeOS implementation, you can now only delete areas
6335 	// that you have created yourself from userland.
6336 	// The documentation to delete_area() explicitly states that this
6337 	// will be restricted in the future, and so it will.
6338 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6339 }
6340 
6341 
6342 // TODO: create a BeOS style call for this!
6343 
6344 area_id
6345 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6346 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6347 	int fd, off_t offset)
6348 {
6349 	char name[B_OS_NAME_LENGTH];
6350 	void* address;
6351 	area_id area;
6352 
6353 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6354 		return B_BAD_VALUE;
6355 
6356 	fix_protection(&protection);
6357 
6358 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6359 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6360 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6361 		return B_BAD_ADDRESS;
6362 
6363 	if (addressSpec == B_EXACT_ADDRESS) {
6364 		if ((addr_t)address + size < (addr_t)address
6365 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6366 			return B_BAD_VALUE;
6367 		}
6368 		if (!IS_USER_ADDRESS(address)
6369 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6370 			return B_BAD_ADDRESS;
6371 		}
6372 	}
6373 
6374 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6375 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6376 		false);
6377 	if (area < B_OK)
6378 		return area;
6379 
6380 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6381 		return B_BAD_ADDRESS;
6382 
6383 	return area;
6384 }
6385 
6386 
6387 status_t
6388 _user_unmap_memory(void* _address, size_t size)
6389 {
6390 	addr_t address = (addr_t)_address;
6391 
6392 	// check params
6393 	if (size == 0 || (addr_t)address + size < (addr_t)address
6394 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6395 		return B_BAD_VALUE;
6396 	}
6397 
6398 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6399 		return B_BAD_ADDRESS;
6400 
6401 	// Write lock the address space and ensure the address range is not wired.
6402 	AddressSpaceWriteLocker locker;
6403 	do {
6404 		status_t status = locker.SetTo(team_get_current_team_id());
6405 		if (status != B_OK)
6406 			return status;
6407 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6408 			size, &locker));
6409 
6410 	// unmap
6411 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6412 }
6413 
6414 
6415 status_t
6416 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6417 {
6418 	// check address range
6419 	addr_t address = (addr_t)_address;
6420 	size = PAGE_ALIGN(size);
6421 
6422 	if ((address % B_PAGE_SIZE) != 0)
6423 		return B_BAD_VALUE;
6424 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6425 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6426 		// weird error code required by POSIX
6427 		return ENOMEM;
6428 	}
6429 
6430 	// extend and check protection
6431 	if ((protection & ~B_USER_PROTECTION) != 0)
6432 		return B_BAD_VALUE;
6433 
6434 	fix_protection(&protection);
6435 
6436 	// We need to write lock the address space, since we're going to play with
6437 	// the areas. Also make sure that none of the areas is wired and that we're
6438 	// actually allowed to change the protection.
6439 	AddressSpaceWriteLocker locker;
6440 
6441 	bool restart;
6442 	do {
6443 		restart = false;
6444 
6445 		status_t status = locker.SetTo(team_get_current_team_id());
6446 		if (status != B_OK)
6447 			return status;
6448 
6449 		// First round: Check whether the whole range is covered by areas and we
6450 		// are allowed to modify them.
6451 		addr_t currentAddress = address;
6452 		size_t sizeLeft = size;
6453 		while (sizeLeft > 0) {
6454 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6455 			if (area == NULL)
6456 				return B_NO_MEMORY;
6457 
6458 			if ((area->protection & B_KERNEL_AREA) != 0)
6459 				return B_NOT_ALLOWED;
6460 
6461 			// TODO: For (shared) mapped files we should check whether the new
6462 			// protections are compatible with the file permissions. We don't
6463 			// have a way to do that yet, though.
6464 
6465 			addr_t offset = currentAddress - area->Base();
6466 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6467 
6468 			AreaCacheLocker cacheLocker(area);
6469 
6470 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6471 					&locker, &cacheLocker)) {
6472 				restart = true;
6473 				break;
6474 			}
6475 
6476 			cacheLocker.Unlock();
6477 
6478 			currentAddress += rangeSize;
6479 			sizeLeft -= rangeSize;
6480 		}
6481 	} while (restart);
6482 
6483 	// Second round: If the protections differ from that of the area, create a
6484 	// page protection array and re-map mapped pages.
6485 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6486 	addr_t currentAddress = address;
6487 	size_t sizeLeft = size;
6488 	while (sizeLeft > 0) {
6489 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6490 		if (area == NULL)
6491 			return B_NO_MEMORY;
6492 
6493 		addr_t offset = currentAddress - area->Base();
6494 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6495 
6496 		currentAddress += rangeSize;
6497 		sizeLeft -= rangeSize;
6498 
6499 		if (area->page_protections == NULL) {
6500 			if (area->protection == protection)
6501 				continue;
6502 
6503 			status_t status = allocate_area_page_protections(area);
6504 			if (status != B_OK)
6505 				return status;
6506 		}
6507 
6508 		// We need to lock the complete cache chain, since we potentially unmap
6509 		// pages of lower caches.
6510 		VMCache* topCache = vm_area_get_locked_cache(area);
6511 		VMCacheChainLocker cacheChainLocker(topCache);
6512 		cacheChainLocker.LockAllSourceCaches();
6513 
6514 		for (addr_t pageAddress = area->Base() + offset;
6515 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6516 			map->Lock();
6517 
6518 			set_area_page_protection(area, pageAddress, protection);
6519 
6520 			phys_addr_t physicalAddress;
6521 			uint32 flags;
6522 
6523 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6524 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6525 				map->Unlock();
6526 				continue;
6527 			}
6528 
6529 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6530 			if (page == NULL) {
6531 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6532 					"\n", area, physicalAddress);
6533 				map->Unlock();
6534 				return B_ERROR;
6535 			}
6536 
6537 			// If the page is not in the topmost cache and write access is
6538 			// requested, we have to unmap it. Otherwise we can re-map it with
6539 			// the new protection.
6540 			bool unmapPage = page->Cache() != topCache
6541 				&& (protection & B_WRITE_AREA) != 0;
6542 
6543 			if (!unmapPage)
6544 				map->ProtectPage(area, pageAddress, protection);
6545 
6546 			map->Unlock();
6547 
6548 			if (unmapPage) {
6549 				DEBUG_PAGE_ACCESS_START(page);
6550 				unmap_page(area, pageAddress);
6551 				DEBUG_PAGE_ACCESS_END(page);
6552 			}
6553 		}
6554 	}
6555 
6556 	return B_OK;
6557 }
6558 
6559 
6560 status_t
6561 _user_sync_memory(void* _address, size_t size, uint32 flags)
6562 {
6563 	addr_t address = (addr_t)_address;
6564 	size = PAGE_ALIGN(size);
6565 
6566 	// check params
6567 	if ((address % B_PAGE_SIZE) != 0)
6568 		return B_BAD_VALUE;
6569 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6570 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6571 		// weird error code required by POSIX
6572 		return ENOMEM;
6573 	}
6574 
6575 	bool writeSync = (flags & MS_SYNC) != 0;
6576 	bool writeAsync = (flags & MS_ASYNC) != 0;
6577 	if (writeSync && writeAsync)
6578 		return B_BAD_VALUE;
6579 
6580 	if (size == 0 || (!writeSync && !writeAsync))
6581 		return B_OK;
6582 
6583 	// iterate through the range and sync all concerned areas
6584 	while (size > 0) {
6585 		// read lock the address space
6586 		AddressSpaceReadLocker locker;
6587 		status_t error = locker.SetTo(team_get_current_team_id());
6588 		if (error != B_OK)
6589 			return error;
6590 
6591 		// get the first area
6592 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6593 		if (area == NULL)
6594 			return B_NO_MEMORY;
6595 
6596 		uint32 offset = address - area->Base();
6597 		size_t rangeSize = min_c(area->Size() - offset, size);
6598 		offset += area->cache_offset;
6599 
6600 		// lock the cache
6601 		AreaCacheLocker cacheLocker(area);
6602 		if (!cacheLocker)
6603 			return B_BAD_VALUE;
6604 		VMCache* cache = area->cache;
6605 
6606 		locker.Unlock();
6607 
6608 		uint32 firstPage = offset >> PAGE_SHIFT;
6609 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6610 
6611 		// write the pages
6612 		if (cache->type == CACHE_TYPE_VNODE) {
6613 			if (writeSync) {
6614 				// synchronous
6615 				error = vm_page_write_modified_page_range(cache, firstPage,
6616 					endPage);
6617 				if (error != B_OK)
6618 					return error;
6619 			} else {
6620 				// asynchronous
6621 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6622 				// TODO: This is probably not quite what is supposed to happen.
6623 				// Especially when a lot has to be written, it might take ages
6624 				// until it really hits the disk.
6625 			}
6626 		}
6627 
6628 		address += rangeSize;
6629 		size -= rangeSize;
6630 	}
6631 
6632 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6633 	// synchronize multiple mappings of the same file. In our VM they never get
6634 	// out of sync, though, so we don't have to do anything.
6635 
6636 	return B_OK;
6637 }
6638 
6639 
6640 status_t
6641 _user_memory_advice(void* address, size_t size, uint32 advice)
6642 {
6643 	// TODO: Implement!
6644 	return B_OK;
6645 }
6646 
6647 
6648 status_t
6649 _user_get_memory_properties(team_id teamID, const void* address,
6650 	uint32* _protected, uint32* _lock)
6651 {
6652 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6653 		return B_BAD_ADDRESS;
6654 
6655 	AddressSpaceReadLocker locker;
6656 	status_t error = locker.SetTo(teamID);
6657 	if (error != B_OK)
6658 		return error;
6659 
6660 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6661 	if (area == NULL)
6662 		return B_NO_MEMORY;
6663 
6664 
6665 	uint32 protection = area->protection;
6666 	if (area->page_protections != NULL)
6667 		protection = get_area_page_protection(area, (addr_t)address);
6668 
6669 	uint32 wiring = area->wiring;
6670 
6671 	locker.Unlock();
6672 
6673 	error = user_memcpy(_protected, &protection, sizeof(protection));
6674 	if (error != B_OK)
6675 		return error;
6676 
6677 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6678 
6679 	return error;
6680 }
6681 
6682 
6683 // #pragma mark -- compatibility
6684 
6685 
6686 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6687 
6688 
6689 struct physical_entry_beos {
6690 	uint32	address;
6691 	uint32	size;
6692 };
6693 
6694 
6695 /*!	The physical_entry structure has changed. We need to translate it to the
6696 	old one.
6697 */
6698 extern "C" int32
6699 __get_memory_map_beos(const void* _address, size_t numBytes,
6700 	physical_entry_beos* table, int32 numEntries)
6701 {
6702 	if (numEntries <= 0)
6703 		return B_BAD_VALUE;
6704 
6705 	const uint8* address = (const uint8*)_address;
6706 
6707 	int32 count = 0;
6708 	while (numBytes > 0 && count < numEntries) {
6709 		physical_entry entry;
6710 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6711 		if (result < 0) {
6712 			if (result != B_BUFFER_OVERFLOW)
6713 				return result;
6714 		}
6715 
6716 		if (entry.address >= (phys_addr_t)1 << 32) {
6717 			panic("get_memory_map(): Address is greater 4 GB!");
6718 			return B_ERROR;
6719 		}
6720 
6721 		table[count].address = entry.address;
6722 		table[count++].size = entry.size;
6723 
6724 		address += entry.size;
6725 		numBytes -= entry.size;
6726 	}
6727 
6728 	// null-terminate the table, if possible
6729 	if (count < numEntries) {
6730 		table[count].address = 0;
6731 		table[count].size = 0;
6732 	}
6733 
6734 	return B_OK;
6735 }
6736 
6737 
6738 /*!	The type of the \a physicalAddress parameter has changed from void* to
6739 	phys_addr_t.
6740 */
6741 extern "C" area_id
6742 __map_physical_memory_beos(const char* name, void* physicalAddress,
6743 	size_t numBytes, uint32 addressSpec, uint32 protection,
6744 	void** _virtualAddress)
6745 {
6746 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6747 		addressSpec, protection, _virtualAddress);
6748 }
6749 
6750 
6751 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6752 	we meddle with the \a lock parameter to force 32 bit.
6753 */
6754 extern "C" area_id
6755 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6756 	size_t size, uint32 lock, uint32 protection)
6757 {
6758 	switch (lock) {
6759 		case B_NO_LOCK:
6760 			break;
6761 		case B_FULL_LOCK:
6762 		case B_LAZY_LOCK:
6763 			lock = B_32_BIT_FULL_LOCK;
6764 			break;
6765 		case B_CONTIGUOUS:
6766 			lock = B_32_BIT_CONTIGUOUS;
6767 			break;
6768 	}
6769 
6770 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6771 		protection);
6772 }
6773 
6774 
6775 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6776 	"BASE");
6777 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6778 	"map_physical_memory@", "BASE");
6779 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6780 	"BASE");
6781 
6782 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6783 	"get_memory_map@@", "1_ALPHA3");
6784 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6785 	"map_physical_memory@@", "1_ALPHA3");
6786 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6787 	"1_ALPHA3");
6788 
6789 
6790 #else
6791 
6792 
6793 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6794 	"get_memory_map@@", "BASE");
6795 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6796 	"map_physical_memory@@", "BASE");
6797 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6798 	"BASE");
6799 
6800 
6801 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6802