xref: /haiku/src/system/kernel/vm/vm.cpp (revision 44d19f4d32b8f7e9c01f00294c87ca5cc2e057f7)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_priv.h>
52 #include <vm/VMAddressSpace.h>
53 #include <vm/VMArea.h>
54 #include <vm/VMCache.h>
55 
56 #include "VMAddressSpaceLocking.h"
57 #include "VMAnonymousCache.h"
58 #include "VMAnonymousNoSwapCache.h"
59 #include "IORequest.h"
60 
61 
62 //#define TRACE_VM
63 //#define TRACE_FAULTS
64 #ifdef TRACE_VM
65 #	define TRACE(x) dprintf x
66 #else
67 #	define TRACE(x) ;
68 #endif
69 #ifdef TRACE_FAULTS
70 #	define FTRACE(x) dprintf x
71 #else
72 #	define FTRACE(x) ;
73 #endif
74 
75 
76 namespace {
77 
78 class AreaCacheLocking {
79 public:
80 	inline bool Lock(VMCache* lockable)
81 	{
82 		return false;
83 	}
84 
85 	inline void Unlock(VMCache* lockable)
86 	{
87 		vm_area_put_locked_cache(lockable);
88 	}
89 };
90 
91 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92 public:
93 	inline AreaCacheLocker(VMCache* cache = NULL)
94 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95 	{
96 	}
97 
98 	inline AreaCacheLocker(VMArea* area)
99 		: AutoLocker<VMCache, AreaCacheLocking>()
100 	{
101 		SetTo(area);
102 	}
103 
104 	inline void SetTo(VMCache* cache, bool alreadyLocked)
105 	{
106 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107 	}
108 
109 	inline void SetTo(VMArea* area)
110 	{
111 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113 	}
114 };
115 
116 
117 class VMCacheChainLocker {
118 public:
119 	VMCacheChainLocker()
120 		:
121 		fTopCache(NULL),
122 		fBottomCache(NULL)
123 	{
124 	}
125 
126 	VMCacheChainLocker(VMCache* topCache)
127 		:
128 		fTopCache(topCache),
129 		fBottomCache(topCache)
130 	{
131 	}
132 
133 	~VMCacheChainLocker()
134 	{
135 		Unlock();
136 	}
137 
138 	void SetTo(VMCache* topCache)
139 	{
140 		fTopCache = topCache;
141 		fBottomCache = topCache;
142 
143 		if (topCache != NULL)
144 			topCache->SetUserData(NULL);
145 	}
146 
147 	VMCache* LockSourceCache()
148 	{
149 		if (fBottomCache == NULL || fBottomCache->source == NULL)
150 			return NULL;
151 
152 		VMCache* previousCache = fBottomCache;
153 
154 		fBottomCache = fBottomCache->source;
155 		fBottomCache->Lock();
156 		fBottomCache->AcquireRefLocked();
157 		fBottomCache->SetUserData(previousCache);
158 
159 		return fBottomCache;
160 	}
161 
162 	void LockAllSourceCaches()
163 	{
164 		while (LockSourceCache() != NULL) {
165 		}
166 	}
167 
168 	void Unlock(VMCache* exceptCache = NULL)
169 	{
170 		if (fTopCache == NULL)
171 			return;
172 
173 		// Unlock caches in source -> consumer direction. This is important to
174 		// avoid double-locking and a reversal of locking order in case a cache
175 		// is eligable for merging.
176 		VMCache* cache = fBottomCache;
177 		while (cache != NULL) {
178 			VMCache* nextCache = (VMCache*)cache->UserData();
179 			if (cache != exceptCache)
180 				cache->ReleaseRefAndUnlock(cache != fTopCache);
181 
182 			if (cache == fTopCache)
183 				break;
184 
185 			cache = nextCache;
186 		}
187 
188 		fTopCache = NULL;
189 		fBottomCache = NULL;
190 	}
191 
192 	void UnlockKeepRefs(bool keepTopCacheLocked)
193 	{
194 		if (fTopCache == NULL)
195 			return;
196 
197 		VMCache* nextCache = fBottomCache;
198 		VMCache* cache = NULL;
199 
200 		while (keepTopCacheLocked
201 				? nextCache != fTopCache : cache != fTopCache) {
202 			cache = nextCache;
203 			nextCache = (VMCache*)cache->UserData();
204 			cache->Unlock(cache != fTopCache);
205 		}
206 	}
207 
208 	void RelockCaches(bool topCacheLocked)
209 	{
210 		if (fTopCache == NULL)
211 			return;
212 
213 		VMCache* nextCache = fTopCache;
214 		VMCache* cache = NULL;
215 		if (topCacheLocked) {
216 			cache = nextCache;
217 			nextCache = cache->source;
218 		}
219 
220 		while (cache != fBottomCache && nextCache != NULL) {
221 			VMCache* consumer = cache;
222 			cache = nextCache;
223 			nextCache = cache->source;
224 			cache->Lock();
225 			cache->SetUserData(consumer);
226 		}
227 	}
228 
229 private:
230 	VMCache*	fTopCache;
231 	VMCache*	fBottomCache;
232 };
233 
234 } // namespace
235 
236 
237 // The memory reserve an allocation of the certain priority must not touch.
238 static const size_t kMemoryReserveForPriority[] = {
239 	VM_MEMORY_RESERVE_USER,		// user
240 	VM_MEMORY_RESERVE_SYSTEM,	// system
241 	0							// VIP
242 };
243 
244 
245 ObjectCache* gPageMappingsObjectCache;
246 
247 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248 
249 static off_t sAvailableMemory;
250 static off_t sNeededMemory;
251 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252 static uint32 sPageFaults;
253 
254 static VMPhysicalPageMapper* sPhysicalPageMapper;
255 
256 #if DEBUG_CACHE_LIST
257 
258 struct cache_info {
259 	VMCache*	cache;
260 	addr_t		page_count;
261 	addr_t		committed;
262 };
263 
264 static const int kCacheInfoTableCount = 100 * 1024;
265 static cache_info* sCacheInfoTable;
266 
267 #endif	// DEBUG_CACHE_LIST
268 
269 
270 // function declarations
271 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272 	bool addressSpaceCleanup);
273 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275 static status_t map_backing_store(VMAddressSpace* addressSpace,
276 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277 	int protection, int mapping, uint32 flags,
278 	const virtual_address_restrictions* addressRestrictions, bool kernel,
279 	VMArea** _area, void** _virtualAddress);
280 static void fix_protection(uint32* protection);
281 
282 
283 //	#pragma mark -
284 
285 
286 #if VM_PAGE_FAULT_TRACING
287 
288 namespace VMPageFaultTracing {
289 
290 class PageFaultStart : public AbstractTraceEntry {
291 public:
292 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293 		:
294 		fAddress(address),
295 		fPC(pc),
296 		fWrite(write),
297 		fUser(user)
298 	{
299 		Initialized();
300 	}
301 
302 	virtual void AddDump(TraceOutput& out)
303 	{
304 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306 	}
307 
308 private:
309 	addr_t	fAddress;
310 	addr_t	fPC;
311 	bool	fWrite;
312 	bool	fUser;
313 };
314 
315 
316 // page fault errors
317 enum {
318 	PAGE_FAULT_ERROR_NO_AREA		= 0,
319 	PAGE_FAULT_ERROR_KERNEL_ONLY,
320 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321 	PAGE_FAULT_ERROR_READ_PROTECTED,
322 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325 };
326 
327 
328 class PageFaultError : public AbstractTraceEntry {
329 public:
330 	PageFaultError(area_id area, status_t error)
331 		:
332 		fArea(area),
333 		fError(error)
334 	{
335 		Initialized();
336 	}
337 
338 	virtual void AddDump(TraceOutput& out)
339 	{
340 		switch (fError) {
341 			case PAGE_FAULT_ERROR_NO_AREA:
342 				out.Print("page fault error: no area");
343 				break;
344 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345 				out.Print("page fault error: area: %ld, kernel only", fArea);
346 				break;
347 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348 				out.Print("page fault error: area: %ld, write protected",
349 					fArea);
350 				break;
351 			case PAGE_FAULT_ERROR_READ_PROTECTED:
352 				out.Print("page fault error: area: %ld, read protected", fArea);
353 				break;
354 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355 				out.Print("page fault error: area: %ld, execute protected",
356 					fArea);
357 				break;
358 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359 				out.Print("page fault error: kernel touching bad user memory");
360 				break;
361 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362 				out.Print("page fault error: no address space");
363 				break;
364 			default:
365 				out.Print("page fault error: area: %ld, error: %s", fArea,
366 					strerror(fError));
367 				break;
368 		}
369 	}
370 
371 private:
372 	area_id		fArea;
373 	status_t	fError;
374 };
375 
376 
377 class PageFaultDone : public AbstractTraceEntry {
378 public:
379 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380 			vm_page* page)
381 		:
382 		fArea(area),
383 		fTopCache(topCache),
384 		fCache(cache),
385 		fPage(page)
386 	{
387 		Initialized();
388 	}
389 
390 	virtual void AddDump(TraceOutput& out)
391 	{
392 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393 			"page: %p", fArea, fTopCache, fCache, fPage);
394 	}
395 
396 private:
397 	area_id		fArea;
398 	VMCache*	fTopCache;
399 	VMCache*	fCache;
400 	vm_page*	fPage;
401 };
402 
403 }	// namespace VMPageFaultTracing
404 
405 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406 #else
407 #	define TPF(x) ;
408 #endif	// VM_PAGE_FAULT_TRACING
409 
410 
411 //	#pragma mark -
412 
413 
414 /*!	The page's cache must be locked.
415 */
416 static inline void
417 increment_page_wired_count(vm_page* page)
418 {
419 	if (!page->IsMapped())
420 		atomic_add(&gMappedPagesCount, 1);
421 	page->IncrementWiredCount();
422 }
423 
424 
425 /*!	The page's cache must be locked.
426 */
427 static inline void
428 decrement_page_wired_count(vm_page* page)
429 {
430 	page->DecrementWiredCount();
431 	if (!page->IsMapped())
432 		atomic_add(&gMappedPagesCount, -1);
433 }
434 
435 
436 static inline addr_t
437 virtual_page_address(VMArea* area, vm_page* page)
438 {
439 	return area->Base()
440 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441 }
442 
443 
444 //! You need to have the address space locked when calling this function
445 static VMArea*
446 lookup_area(VMAddressSpace* addressSpace, area_id id)
447 {
448 	VMAreaHash::ReadLock();
449 
450 	VMArea* area = VMAreaHash::LookupLocked(id);
451 	if (area != NULL && area->address_space != addressSpace)
452 		area = NULL;
453 
454 	VMAreaHash::ReadUnlock();
455 
456 	return area;
457 }
458 
459 
460 static status_t
461 allocate_area_page_protections(VMArea* area)
462 {
463 	// In the page protections we store only the three user protections,
464 	// so we use 4 bits per page.
465 	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466 	area->page_protections = (uint8*)malloc_etc(bytes,
467 		HEAP_DONT_LOCK_KERNEL_SPACE);
468 	if (area->page_protections == NULL)
469 		return B_NO_MEMORY;
470 
471 	// init the page protections for all pages to that of the area
472 	uint32 areaProtection = area->protection
473 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474 	memset(area->page_protections, areaProtection | (areaProtection << 4),
475 		bytes);
476 	return B_OK;
477 }
478 
479 
480 static inline void
481 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482 {
483 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485 	uint8& entry = area->page_protections[pageIndex / 2];
486 	if (pageIndex % 2 == 0)
487 		entry = (entry & 0xf0) | protection;
488 	else
489 		entry = (entry & 0x0f) | (protection << 4);
490 }
491 
492 
493 static inline uint32
494 get_area_page_protection(VMArea* area, addr_t pageAddress)
495 {
496 	if (area->page_protections == NULL)
497 		return area->protection;
498 
499 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500 	uint32 protection = area->page_protections[pageIndex / 2];
501 	if (pageIndex % 2 == 0)
502 		protection &= 0x0f;
503 	else
504 		protection >>= 4;
505 
506 	// If this is a kernel area we translate the user flags to kernel flags.
507 	if (area->address_space == VMAddressSpace::Kernel()) {
508 		uint32 kernelProtection = 0;
509 		if ((protection & B_READ_AREA) != 0)
510 			kernelProtection |= B_KERNEL_READ_AREA;
511 		if ((protection & B_WRITE_AREA) != 0)
512 			kernelProtection |= B_KERNEL_WRITE_AREA;
513 
514 		return kernelProtection;
515 	}
516 
517 	return protection | B_KERNEL_READ_AREA
518 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519 }
520 
521 
522 /*!	The caller must have reserved enough pages the translation map
523 	implementation might need to map this page.
524 	The page's cache must be locked.
525 */
526 static status_t
527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528 	vm_page_reservation* reservation)
529 {
530 	VMTranslationMap* map = area->address_space->TranslationMap();
531 
532 	bool wasMapped = page->IsMapped();
533 
534 	if (area->wiring == B_NO_LOCK) {
535 		DEBUG_PAGE_ACCESS_CHECK(page);
536 
537 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539 			gPageMappingsObjectCache,
540 			CACHE_DONT_WAIT_FOR_MEMORY
541 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542 		if (mapping == NULL)
543 			return B_NO_MEMORY;
544 
545 		mapping->page = page;
546 		mapping->area = area;
547 
548 		map->Lock();
549 
550 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551 			area->MemoryType(), reservation);
552 
553 		// insert mapping into lists
554 		if (!page->IsMapped())
555 			atomic_add(&gMappedPagesCount, 1);
556 
557 		page->mappings.Add(mapping);
558 		area->mappings.Add(mapping);
559 
560 		map->Unlock();
561 	} else {
562 		DEBUG_PAGE_ACCESS_CHECK(page);
563 
564 		map->Lock();
565 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566 			area->MemoryType(), reservation);
567 		map->Unlock();
568 
569 		increment_page_wired_count(page);
570 	}
571 
572 	if (!wasMapped) {
573 		// The page is mapped now, so we must not remain in the cached queue.
574 		// It also makes sense to move it from the inactive to the active, since
575 		// otherwise the page daemon wouldn't come to keep track of it (in idle
576 		// mode) -- if the page isn't touched, it will be deactivated after a
577 		// full iteration through the queue at the latest.
578 		if (page->State() == PAGE_STATE_CACHED
579 				|| page->State() == PAGE_STATE_INACTIVE) {
580 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581 		}
582 	}
583 
584 	return B_OK;
585 }
586 
587 
588 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
589 	page's cache.
590 */
591 static inline bool
592 unmap_page(VMArea* area, addr_t virtualAddress)
593 {
594 	return area->address_space->TranslationMap()->UnmapPage(area,
595 		virtualAddress, true);
596 }
597 
598 
599 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
600 	mapped pages' caches.
601 */
602 static inline void
603 unmap_pages(VMArea* area, addr_t base, size_t size)
604 {
605 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606 }
607 
608 
609 /*!	Cuts a piece out of an area. If the given cut range covers the complete
610 	area, it is deleted. If it covers the beginning or the end, the area is
611 	resized accordingly. If the range covers some part in the middle of the
612 	area, it is split in two; in this case the second area is returned via
613 	\a _secondArea (the variable is left untouched in the other cases).
614 	The address space must be write locked.
615 	The caller must ensure that no part of the given range is wired.
616 */
617 static status_t
618 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620 {
621 	// Does the cut range intersect with the area at all?
622 	addr_t areaLast = area->Base() + (area->Size() - 1);
623 	if (area->Base() > lastAddress || areaLast < address)
624 		return B_OK;
625 
626 	// Is the area fully covered?
627 	if (area->Base() >= address && areaLast <= lastAddress) {
628 		delete_area(addressSpace, area, false);
629 		return B_OK;
630 	}
631 
632 	int priority;
633 	uint32 allocationFlags;
634 	if (addressSpace == VMAddressSpace::Kernel()) {
635 		priority = VM_PRIORITY_SYSTEM;
636 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637 			| HEAP_DONT_LOCK_KERNEL_SPACE;
638 	} else {
639 		priority = VM_PRIORITY_USER;
640 		allocationFlags = 0;
641 	}
642 
643 	VMCache* cache = vm_area_get_locked_cache(area);
644 	VMCacheChainLocker cacheChainLocker(cache);
645 	cacheChainLocker.LockAllSourceCaches();
646 
647 	// Cut the end only?
648 	if (areaLast <= lastAddress) {
649 		size_t oldSize = area->Size();
650 		size_t newSize = address - area->Base();
651 
652 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653 			allocationFlags);
654 		if (error != B_OK)
655 			return error;
656 
657 		// unmap pages
658 		unmap_pages(area, address, oldSize - newSize);
659 
660 		// If no one else uses the area's cache, we can resize it, too.
661 		if (cache->areas == area && area->cache_next == NULL
662 			&& cache->consumers.IsEmpty()
663 			&& cache->type == CACHE_TYPE_RAM) {
664 			// Since VMCache::Resize() can temporarily drop the lock, we must
665 			// unlock all lower caches to prevent locking order inversion.
666 			cacheChainLocker.Unlock(cache);
667 			cache->Resize(cache->virtual_base + newSize, priority);
668 			cache->ReleaseRefAndUnlock();
669 		}
670 
671 		return B_OK;
672 	}
673 
674 	// Cut the beginning only?
675 	if (area->Base() >= address) {
676 		addr_t oldBase = area->Base();
677 		addr_t newBase = lastAddress + 1;
678 		size_t newSize = areaLast - lastAddress;
679 
680 		// unmap pages
681 		unmap_pages(area, oldBase, newBase - oldBase);
682 
683 		// resize the area
684 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685 			allocationFlags);
686 		if (error != B_OK)
687 			return error;
688 
689 		// TODO: If no one else uses the area's cache, we should resize it, too!
690 
691 		area->cache_offset += newBase - oldBase;
692 
693 		return B_OK;
694 	}
695 
696 	// The tough part -- cut a piece out of the middle of the area.
697 	// We do that by shrinking the area to the begin section and creating a
698 	// new area for the end section.
699 
700 	addr_t firstNewSize = address - area->Base();
701 	addr_t secondBase = lastAddress + 1;
702 	addr_t secondSize = areaLast - lastAddress;
703 
704 	// unmap pages
705 	unmap_pages(area, address, area->Size() - firstNewSize);
706 
707 	// resize the area
708 	addr_t oldSize = area->Size();
709 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710 		allocationFlags);
711 	if (error != B_OK)
712 		return error;
713 
714 	// TODO: If no one else uses the area's cache, we might want to create a
715 	// new cache for the second area, transfer the concerned pages from the
716 	// first cache to it and resize the first cache.
717 
718 	// map the second area
719 	virtual_address_restrictions addressRestrictions = {};
720 	addressRestrictions.address = (void*)secondBase;
721 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722 	VMArea* secondArea;
723 	error = map_backing_store(addressSpace, cache,
724 		area->cache_offset + (secondBase - area->Base()), area->name,
725 		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726 		&addressRestrictions, kernel, &secondArea, NULL);
727 	if (error != B_OK) {
728 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729 		return error;
730 	}
731 
732 	// We need a cache reference for the new area.
733 	cache->AcquireRefLocked();
734 
735 	if (_secondArea != NULL)
736 		*_secondArea = secondArea;
737 
738 	return B_OK;
739 }
740 
741 
742 /*!	Deletes all areas in the given address range.
743 	The address space must be write-locked.
744 	The caller must ensure that no part of the given range is wired.
745 */
746 static status_t
747 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748 	bool kernel)
749 {
750 	size = PAGE_ALIGN(size);
751 	addr_t lastAddress = address + (size - 1);
752 
753 	// Check, whether the caller is allowed to modify the concerned areas.
754 	if (!kernel) {
755 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756 				VMArea* area = it.Next();) {
757 			addr_t areaLast = area->Base() + (area->Size() - 1);
758 			if (area->Base() < lastAddress && address < areaLast) {
759 				if ((area->protection & B_KERNEL_AREA) != 0)
760 					return B_NOT_ALLOWED;
761 			}
762 		}
763 	}
764 
765 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
766 			VMArea* area = it.Next();) {
767 		addr_t areaLast = area->Base() + (area->Size() - 1);
768 		if (area->Base() < lastAddress && address < areaLast) {
769 			status_t error = cut_area(addressSpace, area, address,
770 				lastAddress, NULL, kernel);
771 			if (error != B_OK)
772 				return error;
773 				// Failing after already messing with areas is ugly, but we
774 				// can't do anything about it.
775 		}
776 	}
777 
778 	return B_OK;
779 }
780 
781 
782 /*! You need to hold the lock of the cache and the write lock of the address
783 	space when calling this function.
784 	Note, that in case of error your cache will be temporarily unlocked.
785 	If \a addressSpec is \c B_EXACT_ADDRESS and the
786 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
787 	that no part of the specified address range (base \c *_virtualAddress, size
788 	\a size) is wired.
789 */
790 static status_t
791 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
792 	const char* areaName, addr_t size, int wiring, int protection, int mapping,
793 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
794 	bool kernel, VMArea** _area, void** _virtualAddress)
795 {
796 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
797 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
798 		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
799 		addressRestrictions->address, offset, size,
800 		addressRestrictions->address_specification, wiring, protection,
801 		_area, areaName));
802 	cache->AssertLocked();
803 
804 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
805 		| HEAP_DONT_LOCK_KERNEL_SPACE;
806 	int priority;
807 	if (addressSpace != VMAddressSpace::Kernel()) {
808 		priority = VM_PRIORITY_USER;
809 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
810 		priority = VM_PRIORITY_VIP;
811 		allocationFlags |= HEAP_PRIORITY_VIP;
812 	} else
813 		priority = VM_PRIORITY_SYSTEM;
814 
815 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
816 		allocationFlags);
817 	if (area == NULL)
818 		return B_NO_MEMORY;
819 
820 	status_t status;
821 
822 	// if this is a private map, we need to create a new cache
823 	// to handle the private copies of pages as they are written to
824 	VMCache* sourceCache = cache;
825 	if (mapping == REGION_PRIVATE_MAP) {
826 		VMCache* newCache;
827 
828 		// create an anonymous cache
829 		status = VMCacheFactory::CreateAnonymousCache(newCache,
830 			(protection & B_STACK_AREA) != 0
831 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
832 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
833 		if (status != B_OK)
834 			goto err1;
835 
836 		newCache->Lock();
837 		newCache->temporary = 1;
838 		newCache->virtual_base = offset;
839 		newCache->virtual_end = offset + size;
840 
841 		cache->AddConsumer(newCache);
842 
843 		cache = newCache;
844 	}
845 
846 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
847 		status = cache->SetMinimalCommitment(size, priority);
848 		if (status != B_OK)
849 			goto err2;
850 	}
851 
852 	// check to see if this address space has entered DELETE state
853 	if (addressSpace->IsBeingDeleted()) {
854 		// okay, someone is trying to delete this address space now, so we can't
855 		// insert the area, so back out
856 		status = B_BAD_TEAM_ID;
857 		goto err2;
858 	}
859 
860 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
861 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
862 		status = unmap_address_range(addressSpace,
863 			(addr_t)addressRestrictions->address, size, kernel);
864 		if (status != B_OK)
865 			goto err2;
866 	}
867 
868 	status = addressSpace->InsertArea(area, size, addressRestrictions,
869 		allocationFlags, _virtualAddress);
870 	if (status != B_OK) {
871 		// TODO: wait and try again once this is working in the backend
872 #if 0
873 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
874 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
875 				0, 0);
876 		}
877 #endif
878 		goto err2;
879 	}
880 
881 	// attach the cache to the area
882 	area->cache = cache;
883 	area->cache_offset = offset;
884 
885 	// point the cache back to the area
886 	cache->InsertAreaLocked(area);
887 	if (mapping == REGION_PRIVATE_MAP)
888 		cache->Unlock();
889 
890 	// insert the area in the global area hash table
891 	VMAreaHash::Insert(area);
892 
893 	// grab a ref to the address space (the area holds this)
894 	addressSpace->Get();
895 
896 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
897 //		cache, sourceCache, areaName, area);
898 
899 	*_area = area;
900 	return B_OK;
901 
902 err2:
903 	if (mapping == REGION_PRIVATE_MAP) {
904 		// We created this cache, so we must delete it again. Note, that we
905 		// need to temporarily unlock the source cache or we'll otherwise
906 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
907 		sourceCache->Unlock();
908 		cache->ReleaseRefAndUnlock();
909 		sourceCache->Lock();
910 	}
911 err1:
912 	addressSpace->DeleteArea(area, allocationFlags);
913 	return status;
914 }
915 
916 
917 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
918 	  locker1, locker2).
919 */
920 template<typename LockerType1, typename LockerType2>
921 static inline bool
922 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
923 {
924 	area->cache->AssertLocked();
925 
926 	VMAreaUnwiredWaiter waiter;
927 	if (!area->AddWaiterIfWired(&waiter))
928 		return false;
929 
930 	// unlock everything and wait
931 	if (locker1 != NULL)
932 		locker1->Unlock();
933 	if (locker2 != NULL)
934 		locker2->Unlock();
935 
936 	waiter.waitEntry.Wait();
937 
938 	return true;
939 }
940 
941 
942 /*!	Checks whether the given area has any wired ranges intersecting with the
943 	specified range and waits, if so.
944 
945 	When it has to wait, the function calls \c Unlock() on both \a locker1
946 	and \a locker2, if given.
947 	The area's top cache must be locked and must be unlocked as a side effect
948 	of calling \c Unlock() on either \a locker1 or \a locker2.
949 
950 	If the function does not have to wait it does not modify or unlock any
951 	object.
952 
953 	\param area The area to be checked.
954 	\param base The base address of the range to check.
955 	\param size The size of the address range to check.
956 	\param locker1 An object to be unlocked when before starting to wait (may
957 		be \c NULL).
958 	\param locker2 An object to be unlocked when before starting to wait (may
959 		be \c NULL).
960 	\return \c true, if the function had to wait, \c false otherwise.
961 */
962 template<typename LockerType1, typename LockerType2>
963 static inline bool
964 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
965 	LockerType1* locker1, LockerType2* locker2)
966 {
967 	area->cache->AssertLocked();
968 
969 	VMAreaUnwiredWaiter waiter;
970 	if (!area->AddWaiterIfWired(&waiter, base, size))
971 		return false;
972 
973 	// unlock everything and wait
974 	if (locker1 != NULL)
975 		locker1->Unlock();
976 	if (locker2 != NULL)
977 		locker2->Unlock();
978 
979 	waiter.waitEntry.Wait();
980 
981 	return true;
982 }
983 
984 
985 /*!	Checks whether the given address space has any wired ranges intersecting
986 	with the specified range and waits, if so.
987 
988 	Similar to wait_if_area_range_is_wired(), with the following differences:
989 	- All areas intersecting with the range are checked (respectively all until
990 	  one is found that contains a wired range intersecting with the given
991 	  range).
992 	- The given address space must at least be read-locked and must be unlocked
993 	  when \c Unlock() is called on \a locker.
994 	- None of the areas' caches are allowed to be locked.
995 */
996 template<typename LockerType>
997 static inline bool
998 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
999 	size_t size, LockerType* locker)
1000 {
1001 	addr_t end = base + size - 1;
1002 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1003 			VMArea* area = it.Next();) {
1004 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1005 		if (area->Base() > end)
1006 			return false;
1007 
1008 		if (base >= area->Base() + area->Size() - 1)
1009 			continue;
1010 
1011 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1012 
1013 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1014 			return true;
1015 	}
1016 
1017 	return false;
1018 }
1019 
1020 
1021 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1022 	It must be called in a situation where the kernel address space may be
1023 	locked.
1024 */
1025 status_t
1026 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1027 {
1028 	AddressSpaceReadLocker locker;
1029 	VMArea* area;
1030 	status_t status = locker.SetFromArea(id, area);
1031 	if (status != B_OK)
1032 		return status;
1033 
1034 	if (area->page_protections == NULL) {
1035 		status = allocate_area_page_protections(area);
1036 		if (status != B_OK)
1037 			return status;
1038 	}
1039 
1040 	*cookie = (void*)area;
1041 	return B_OK;
1042 }
1043 
1044 
1045 /*!	This is a debug helper function that can only be used with very specific
1046 	use cases.
1047 	Sets protection for the given address range to the protection specified.
1048 	If \a protection is 0 then the involved pages will be marked non-present
1049 	in the translation map to cause a fault on access. The pages aren't
1050 	actually unmapped however so that they can be marked present again with
1051 	additional calls to this function. For this to work the area must be
1052 	fully locked in memory so that the pages aren't otherwise touched.
1053 	This function does not lock the kernel address space and needs to be
1054 	supplied with a \a cookie retrieved from a successful call to
1055 	vm_prepare_kernel_area_debug_protection().
1056 */
1057 status_t
1058 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1059 	uint32 protection)
1060 {
1061 	// check address range
1062 	addr_t address = (addr_t)_address;
1063 	size = PAGE_ALIGN(size);
1064 
1065 	if ((address % B_PAGE_SIZE) != 0
1066 		|| (addr_t)address + size < (addr_t)address
1067 		|| !IS_KERNEL_ADDRESS(address)
1068 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1069 		return B_BAD_VALUE;
1070 	}
1071 
1072 	// Translate the kernel protection to user protection as we only store that.
1073 	if ((protection & B_KERNEL_READ_AREA) != 0)
1074 		protection |= B_READ_AREA;
1075 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1076 		protection |= B_WRITE_AREA;
1077 
1078 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1079 	VMTranslationMap* map = addressSpace->TranslationMap();
1080 	VMArea* area = (VMArea*)cookie;
1081 
1082 	addr_t offset = address - area->Base();
1083 	if (area->Size() - offset < size) {
1084 		panic("protect range not fully within supplied area");
1085 		return B_BAD_VALUE;
1086 	}
1087 
1088 	if (area->page_protections == NULL) {
1089 		panic("area has no page protections");
1090 		return B_BAD_VALUE;
1091 	}
1092 
1093 	// Invalidate the mapping entries so any access to them will fault or
1094 	// restore the mapping entries unchanged so that lookup will success again.
1095 	map->Lock();
1096 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1097 	map->Unlock();
1098 
1099 	// And set the proper page protections so that the fault case will actually
1100 	// fail and not simply try to map a new page.
1101 	for (addr_t pageAddress = address; pageAddress < address + size;
1102 			pageAddress += B_PAGE_SIZE) {
1103 		set_area_page_protection(area, pageAddress, protection);
1104 	}
1105 
1106 	return B_OK;
1107 }
1108 
1109 
1110 status_t
1111 vm_block_address_range(const char* name, void* address, addr_t size)
1112 {
1113 	if (!arch_vm_supports_protection(0))
1114 		return B_NOT_SUPPORTED;
1115 
1116 	AddressSpaceWriteLocker locker;
1117 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1118 	if (status != B_OK)
1119 		return status;
1120 
1121 	VMAddressSpace* addressSpace = locker.AddressSpace();
1122 
1123 	// create an anonymous cache
1124 	VMCache* cache;
1125 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1126 		VM_PRIORITY_SYSTEM);
1127 	if (status != B_OK)
1128 		return status;
1129 
1130 	cache->temporary = 1;
1131 	cache->virtual_end = size;
1132 	cache->Lock();
1133 
1134 	VMArea* area;
1135 	virtual_address_restrictions addressRestrictions = {};
1136 	addressRestrictions.address = address;
1137 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1138 	status = map_backing_store(addressSpace, cache, 0, name, size,
1139 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1140 		true, &area, NULL);
1141 	if (status != B_OK) {
1142 		cache->ReleaseRefAndUnlock();
1143 		return status;
1144 	}
1145 
1146 	cache->Unlock();
1147 	area->cache_type = CACHE_TYPE_RAM;
1148 	return area->id;
1149 }
1150 
1151 
1152 status_t
1153 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1154 {
1155 	AddressSpaceWriteLocker locker(team);
1156 	if (!locker.IsLocked())
1157 		return B_BAD_TEAM_ID;
1158 
1159 	VMAddressSpace* addressSpace = locker.AddressSpace();
1160 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1161 		addressSpace == VMAddressSpace::Kernel()
1162 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1163 }
1164 
1165 
1166 status_t
1167 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1168 	addr_t size, uint32 flags)
1169 {
1170 	if (size == 0)
1171 		return B_BAD_VALUE;
1172 
1173 	AddressSpaceWriteLocker locker(team);
1174 	if (!locker.IsLocked())
1175 		return B_BAD_TEAM_ID;
1176 
1177 	virtual_address_restrictions addressRestrictions = {};
1178 	addressRestrictions.address = *_address;
1179 	addressRestrictions.address_specification = addressSpec;
1180 	VMAddressSpace* addressSpace = locker.AddressSpace();
1181 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1182 		addressSpace == VMAddressSpace::Kernel()
1183 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1184 		_address);
1185 }
1186 
1187 
1188 area_id
1189 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1190 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1191 	const virtual_address_restrictions* virtualAddressRestrictions,
1192 	const physical_address_restrictions* physicalAddressRestrictions,
1193 	bool kernel, void** _address)
1194 {
1195 	VMArea* area;
1196 	VMCache* cache;
1197 	vm_page* page = NULL;
1198 	bool isStack = (protection & B_STACK_AREA) != 0;
1199 	page_num_t guardPages;
1200 	bool canOvercommit = false;
1201 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1202 		? VM_PAGE_ALLOC_CLEAR : 0;
1203 
1204 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1205 		team, name, size));
1206 
1207 	size = PAGE_ALIGN(size);
1208 	guardSize = PAGE_ALIGN(guardSize);
1209 	guardPages = guardSize / B_PAGE_SIZE;
1210 
1211 	if (size == 0 || size < guardSize)
1212 		return B_BAD_VALUE;
1213 	if (!arch_vm_supports_protection(protection))
1214 		return B_NOT_SUPPORTED;
1215 
1216 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1217 		canOvercommit = true;
1218 
1219 #ifdef DEBUG_KERNEL_STACKS
1220 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1221 		isStack = true;
1222 #endif
1223 
1224 	// check parameters
1225 	switch (virtualAddressRestrictions->address_specification) {
1226 		case B_ANY_ADDRESS:
1227 		case B_EXACT_ADDRESS:
1228 		case B_BASE_ADDRESS:
1229 		case B_ANY_KERNEL_ADDRESS:
1230 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1231 		case B_RANDOMIZED_ANY_ADDRESS:
1232 		case B_RANDOMIZED_BASE_ADDRESS:
1233 			break;
1234 
1235 		default:
1236 			return B_BAD_VALUE;
1237 	}
1238 
1239 	// If low or high physical address restrictions are given, we force
1240 	// B_CONTIGUOUS wiring, since only then we'll use
1241 	// vm_page_allocate_page_run() which deals with those restrictions.
1242 	if (physicalAddressRestrictions->low_address != 0
1243 		|| physicalAddressRestrictions->high_address != 0) {
1244 		wiring = B_CONTIGUOUS;
1245 	}
1246 
1247 	physical_address_restrictions stackPhysicalRestrictions;
1248 	bool doReserveMemory = false;
1249 	switch (wiring) {
1250 		case B_NO_LOCK:
1251 			break;
1252 		case B_FULL_LOCK:
1253 		case B_LAZY_LOCK:
1254 		case B_CONTIGUOUS:
1255 			doReserveMemory = true;
1256 			break;
1257 		case B_ALREADY_WIRED:
1258 			break;
1259 		case B_LOMEM:
1260 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1261 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1262 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1263 			wiring = B_CONTIGUOUS;
1264 			doReserveMemory = true;
1265 			break;
1266 		case B_32_BIT_FULL_LOCK:
1267 			if (B_HAIKU_PHYSICAL_BITS <= 32
1268 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1269 				wiring = B_FULL_LOCK;
1270 				doReserveMemory = true;
1271 				break;
1272 			}
1273 			// TODO: We don't really support this mode efficiently. Just fall
1274 			// through for now ...
1275 		case B_32_BIT_CONTIGUOUS:
1276 			#if B_HAIKU_PHYSICAL_BITS > 32
1277 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1278 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1279 					stackPhysicalRestrictions.high_address
1280 						= (phys_addr_t)1 << 32;
1281 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1282 				}
1283 			#endif
1284 			wiring = B_CONTIGUOUS;
1285 			doReserveMemory = true;
1286 			break;
1287 		default:
1288 			return B_BAD_VALUE;
1289 	}
1290 
1291 	// Optimization: For a single-page contiguous allocation without low/high
1292 	// memory restriction B_FULL_LOCK wiring suffices.
1293 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1294 		&& physicalAddressRestrictions->low_address == 0
1295 		&& physicalAddressRestrictions->high_address == 0) {
1296 		wiring = B_FULL_LOCK;
1297 	}
1298 
1299 	// For full lock or contiguous areas we're also going to map the pages and
1300 	// thus need to reserve pages for the mapping backend upfront.
1301 	addr_t reservedMapPages = 0;
1302 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1303 		AddressSpaceWriteLocker locker;
1304 		status_t status = locker.SetTo(team);
1305 		if (status != B_OK)
1306 			return status;
1307 
1308 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1309 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1310 	}
1311 
1312 	int priority;
1313 	if (team != VMAddressSpace::KernelID())
1314 		priority = VM_PRIORITY_USER;
1315 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1316 		priority = VM_PRIORITY_VIP;
1317 	else
1318 		priority = VM_PRIORITY_SYSTEM;
1319 
1320 	// Reserve memory before acquiring the address space lock. This reduces the
1321 	// chances of failure, since while holding the write lock to the address
1322 	// space (if it is the kernel address space that is), the low memory handler
1323 	// won't be able to free anything for us.
1324 	addr_t reservedMemory = 0;
1325 	if (doReserveMemory) {
1326 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1327 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1328 			return B_NO_MEMORY;
1329 		reservedMemory = size;
1330 		// TODO: We don't reserve the memory for the pages for the page
1331 		// directories/tables. We actually need to do since we currently don't
1332 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1333 		// there are actually less physical pages than there should be, which
1334 		// can get the VM into trouble in low memory situations.
1335 	}
1336 
1337 	AddressSpaceWriteLocker locker;
1338 	VMAddressSpace* addressSpace;
1339 	status_t status;
1340 
1341 	// For full lock areas reserve the pages before locking the address
1342 	// space. E.g. block caches can't release their memory while we hold the
1343 	// address space lock.
1344 	page_num_t reservedPages = reservedMapPages;
1345 	if (wiring == B_FULL_LOCK)
1346 		reservedPages += size / B_PAGE_SIZE;
1347 
1348 	vm_page_reservation reservation;
1349 	if (reservedPages > 0) {
1350 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1351 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1352 					priority)) {
1353 				reservedPages = 0;
1354 				status = B_WOULD_BLOCK;
1355 				goto err0;
1356 			}
1357 		} else
1358 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1359 	}
1360 
1361 	if (wiring == B_CONTIGUOUS) {
1362 		// we try to allocate the page run here upfront as this may easily
1363 		// fail for obvious reasons
1364 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1365 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1366 		if (page == NULL) {
1367 			status = B_NO_MEMORY;
1368 			goto err0;
1369 		}
1370 	}
1371 
1372 	// Lock the address space and, if B_EXACT_ADDRESS and
1373 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1374 	// is not wired.
1375 	do {
1376 		status = locker.SetTo(team);
1377 		if (status != B_OK)
1378 			goto err1;
1379 
1380 		addressSpace = locker.AddressSpace();
1381 	} while (virtualAddressRestrictions->address_specification
1382 			== B_EXACT_ADDRESS
1383 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1384 		&& wait_if_address_range_is_wired(addressSpace,
1385 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1386 
1387 	// create an anonymous cache
1388 	// if it's a stack, make sure that two pages are available at least
1389 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1390 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1391 		wiring == B_NO_LOCK, priority);
1392 	if (status != B_OK)
1393 		goto err1;
1394 
1395 	cache->temporary = 1;
1396 	cache->virtual_end = size;
1397 	cache->committed_size = reservedMemory;
1398 		// TODO: This should be done via a method.
1399 	reservedMemory = 0;
1400 
1401 	cache->Lock();
1402 
1403 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1404 		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1405 		kernel, &area, _address);
1406 
1407 	if (status != B_OK) {
1408 		cache->ReleaseRefAndUnlock();
1409 		goto err1;
1410 	}
1411 
1412 	locker.DegradeToReadLock();
1413 
1414 	switch (wiring) {
1415 		case B_NO_LOCK:
1416 		case B_LAZY_LOCK:
1417 			// do nothing - the pages are mapped in as needed
1418 			break;
1419 
1420 		case B_FULL_LOCK:
1421 		{
1422 			// Allocate and map all pages for this area
1423 
1424 			off_t offset = 0;
1425 			for (addr_t address = area->Base();
1426 					address < area->Base() + (area->Size() - 1);
1427 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1428 #ifdef DEBUG_KERNEL_STACKS
1429 #	ifdef STACK_GROWS_DOWNWARDS
1430 				if (isStack && address < area->Base()
1431 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1432 #	else
1433 				if (isStack && address >= area->Base() + area->Size()
1434 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1435 #	endif
1436 					continue;
1437 #endif
1438 				vm_page* page = vm_page_allocate_page(&reservation,
1439 					PAGE_STATE_WIRED | pageAllocFlags);
1440 				cache->InsertPage(page, offset);
1441 				map_page(area, page, address, protection, &reservation);
1442 
1443 				DEBUG_PAGE_ACCESS_END(page);
1444 			}
1445 
1446 			break;
1447 		}
1448 
1449 		case B_ALREADY_WIRED:
1450 		{
1451 			// The pages should already be mapped. This is only really useful
1452 			// during boot time. Find the appropriate vm_page objects and stick
1453 			// them in the cache object.
1454 			VMTranslationMap* map = addressSpace->TranslationMap();
1455 			off_t offset = 0;
1456 
1457 			if (!gKernelStartup)
1458 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1459 
1460 			map->Lock();
1461 
1462 			for (addr_t virtualAddress = area->Base();
1463 					virtualAddress < area->Base() + (area->Size() - 1);
1464 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1465 				phys_addr_t physicalAddress;
1466 				uint32 flags;
1467 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1468 				if (status < B_OK) {
1469 					panic("looking up mapping failed for va 0x%lx\n",
1470 						virtualAddress);
1471 				}
1472 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1473 				if (page == NULL) {
1474 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1475 						"\n", physicalAddress);
1476 				}
1477 
1478 				DEBUG_PAGE_ACCESS_START(page);
1479 
1480 				cache->InsertPage(page, offset);
1481 				increment_page_wired_count(page);
1482 				vm_page_set_state(page, PAGE_STATE_WIRED);
1483 				page->busy = false;
1484 
1485 				DEBUG_PAGE_ACCESS_END(page);
1486 			}
1487 
1488 			map->Unlock();
1489 			break;
1490 		}
1491 
1492 		case B_CONTIGUOUS:
1493 		{
1494 			// We have already allocated our continuous pages run, so we can now
1495 			// just map them in the address space
1496 			VMTranslationMap* map = addressSpace->TranslationMap();
1497 			phys_addr_t physicalAddress
1498 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1499 			addr_t virtualAddress = area->Base();
1500 			off_t offset = 0;
1501 
1502 			map->Lock();
1503 
1504 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1505 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1506 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1507 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1508 				if (page == NULL)
1509 					panic("couldn't lookup physical page just allocated\n");
1510 
1511 				status = map->Map(virtualAddress, physicalAddress, protection,
1512 					area->MemoryType(), &reservation);
1513 				if (status < B_OK)
1514 					panic("couldn't map physical page in page run\n");
1515 
1516 				cache->InsertPage(page, offset);
1517 				increment_page_wired_count(page);
1518 
1519 				DEBUG_PAGE_ACCESS_END(page);
1520 			}
1521 
1522 			map->Unlock();
1523 			break;
1524 		}
1525 
1526 		default:
1527 			break;
1528 	}
1529 
1530 	cache->Unlock();
1531 
1532 	if (reservedPages > 0)
1533 		vm_page_unreserve_pages(&reservation);
1534 
1535 	TRACE(("vm_create_anonymous_area: done\n"));
1536 
1537 	area->cache_type = CACHE_TYPE_RAM;
1538 	return area->id;
1539 
1540 err1:
1541 	if (wiring == B_CONTIGUOUS) {
1542 		// we had reserved the area space upfront...
1543 		phys_addr_t pageNumber = page->physical_page_number;
1544 		int32 i;
1545 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1546 			page = vm_lookup_page(pageNumber);
1547 			if (page == NULL)
1548 				panic("couldn't lookup physical page just allocated\n");
1549 
1550 			vm_page_set_state(page, PAGE_STATE_FREE);
1551 		}
1552 	}
1553 
1554 err0:
1555 	if (reservedPages > 0)
1556 		vm_page_unreserve_pages(&reservation);
1557 	if (reservedMemory > 0)
1558 		vm_unreserve_memory(reservedMemory);
1559 
1560 	return status;
1561 }
1562 
1563 
1564 area_id
1565 vm_map_physical_memory(team_id team, const char* name, void** _address,
1566 	uint32 addressSpec, addr_t size, uint32 protection,
1567 	phys_addr_t physicalAddress, bool alreadyWired)
1568 {
1569 	VMArea* area;
1570 	VMCache* cache;
1571 	addr_t mapOffset;
1572 
1573 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1574 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1575 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1576 		addressSpec, size, protection, physicalAddress));
1577 
1578 	if (!arch_vm_supports_protection(protection))
1579 		return B_NOT_SUPPORTED;
1580 
1581 	AddressSpaceWriteLocker locker(team);
1582 	if (!locker.IsLocked())
1583 		return B_BAD_TEAM_ID;
1584 
1585 	// if the physical address is somewhat inside a page,
1586 	// move the actual area down to align on a page boundary
1587 	mapOffset = physicalAddress % B_PAGE_SIZE;
1588 	size += mapOffset;
1589 	physicalAddress -= mapOffset;
1590 
1591 	size = PAGE_ALIGN(size);
1592 
1593 	// create a device cache
1594 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1595 	if (status != B_OK)
1596 		return status;
1597 
1598 	cache->virtual_end = size;
1599 
1600 	cache->Lock();
1601 
1602 	virtual_address_restrictions addressRestrictions = {};
1603 	addressRestrictions.address = *_address;
1604 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1605 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1606 		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1607 		true, &area, _address);
1608 
1609 	if (status < B_OK)
1610 		cache->ReleaseRefLocked();
1611 
1612 	cache->Unlock();
1613 
1614 	if (status == B_OK) {
1615 		// set requested memory type -- use uncached, if not given
1616 		uint32 memoryType = addressSpec & B_MTR_MASK;
1617 		if (memoryType == 0)
1618 			memoryType = B_MTR_UC;
1619 
1620 		area->SetMemoryType(memoryType);
1621 
1622 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1623 		if (status != B_OK)
1624 			delete_area(locker.AddressSpace(), area, false);
1625 	}
1626 
1627 	if (status != B_OK)
1628 		return status;
1629 
1630 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1631 
1632 	if (alreadyWired) {
1633 		// The area is already mapped, but possibly not with the right
1634 		// memory type.
1635 		map->Lock();
1636 		map->ProtectArea(area, area->protection);
1637 		map->Unlock();
1638 	} else {
1639 		// Map the area completely.
1640 
1641 		// reserve pages needed for the mapping
1642 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1643 			area->Base() + (size - 1));
1644 		vm_page_reservation reservation;
1645 		vm_page_reserve_pages(&reservation, reservePages,
1646 			team == VMAddressSpace::KernelID()
1647 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1648 
1649 		map->Lock();
1650 
1651 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1652 			map->Map(area->Base() + offset, physicalAddress + offset,
1653 				protection, area->MemoryType(), &reservation);
1654 		}
1655 
1656 		map->Unlock();
1657 
1658 		vm_page_unreserve_pages(&reservation);
1659 	}
1660 
1661 	// modify the pointer returned to be offset back into the new area
1662 	// the same way the physical address in was offset
1663 	*_address = (void*)((addr_t)*_address + mapOffset);
1664 
1665 	area->cache_type = CACHE_TYPE_DEVICE;
1666 	return area->id;
1667 }
1668 
1669 
1670 /*!	Don't use!
1671 	TODO: This function was introduced to map physical page vecs to
1672 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1673 	use a device cache and does not track vm_page::wired_count!
1674 */
1675 area_id
1676 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1677 	uint32 addressSpec, addr_t* _size, uint32 protection,
1678 	struct generic_io_vec* vecs, uint32 vecCount)
1679 {
1680 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1681 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1682 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1683 		addressSpec, _size, protection, vecs, vecCount));
1684 
1685 	if (!arch_vm_supports_protection(protection)
1686 		|| (addressSpec & B_MTR_MASK) != 0) {
1687 		return B_NOT_SUPPORTED;
1688 	}
1689 
1690 	AddressSpaceWriteLocker locker(team);
1691 	if (!locker.IsLocked())
1692 		return B_BAD_TEAM_ID;
1693 
1694 	if (vecCount == 0)
1695 		return B_BAD_VALUE;
1696 
1697 	addr_t size = 0;
1698 	for (uint32 i = 0; i < vecCount; i++) {
1699 		if (vecs[i].base % B_PAGE_SIZE != 0
1700 			|| vecs[i].length % B_PAGE_SIZE != 0) {
1701 			return B_BAD_VALUE;
1702 		}
1703 
1704 		size += vecs[i].length;
1705 	}
1706 
1707 	// create a device cache
1708 	VMCache* cache;
1709 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1710 	if (result != B_OK)
1711 		return result;
1712 
1713 	cache->virtual_end = size;
1714 
1715 	cache->Lock();
1716 
1717 	VMArea* area;
1718 	virtual_address_restrictions addressRestrictions = {};
1719 	addressRestrictions.address = *_address;
1720 	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1721 	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1722 		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1723 		&addressRestrictions, true, &area, _address);
1724 
1725 	if (result != B_OK)
1726 		cache->ReleaseRefLocked();
1727 
1728 	cache->Unlock();
1729 
1730 	if (result != B_OK)
1731 		return result;
1732 
1733 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1734 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1735 		area->Base() + (size - 1));
1736 
1737 	vm_page_reservation reservation;
1738 	vm_page_reserve_pages(&reservation, reservePages,
1739 			team == VMAddressSpace::KernelID()
1740 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1741 	map->Lock();
1742 
1743 	uint32 vecIndex = 0;
1744 	size_t vecOffset = 0;
1745 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1746 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1747 			vecOffset = 0;
1748 			vecIndex++;
1749 		}
1750 
1751 		if (vecIndex >= vecCount)
1752 			break;
1753 
1754 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1755 			protection, area->MemoryType(), &reservation);
1756 
1757 		vecOffset += B_PAGE_SIZE;
1758 	}
1759 
1760 	map->Unlock();
1761 	vm_page_unreserve_pages(&reservation);
1762 
1763 	if (_size != NULL)
1764 		*_size = size;
1765 
1766 	area->cache_type = CACHE_TYPE_DEVICE;
1767 	return area->id;
1768 }
1769 
1770 
1771 area_id
1772 vm_create_null_area(team_id team, const char* name, void** address,
1773 	uint32 addressSpec, addr_t size, uint32 flags)
1774 {
1775 	size = PAGE_ALIGN(size);
1776 
1777 	// Lock the address space and, if B_EXACT_ADDRESS and
1778 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1779 	// is not wired.
1780 	AddressSpaceWriteLocker locker;
1781 	do {
1782 		if (locker.SetTo(team) != B_OK)
1783 			return B_BAD_TEAM_ID;
1784 	} while (addressSpec == B_EXACT_ADDRESS
1785 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1786 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1787 			(addr_t)*address, size, &locker));
1788 
1789 	// create a null cache
1790 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1791 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1792 	VMCache* cache;
1793 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1794 	if (status != B_OK)
1795 		return status;
1796 
1797 	cache->temporary = 1;
1798 	cache->virtual_end = size;
1799 
1800 	cache->Lock();
1801 
1802 	VMArea* area;
1803 	virtual_address_restrictions addressRestrictions = {};
1804 	addressRestrictions.address = *address;
1805 	addressRestrictions.address_specification = addressSpec;
1806 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1807 		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1808 		&addressRestrictions, true, &area, address);
1809 
1810 	if (status < B_OK) {
1811 		cache->ReleaseRefAndUnlock();
1812 		return status;
1813 	}
1814 
1815 	cache->Unlock();
1816 
1817 	area->cache_type = CACHE_TYPE_NULL;
1818 	return area->id;
1819 }
1820 
1821 
1822 /*!	Creates the vnode cache for the specified \a vnode.
1823 	The vnode has to be marked busy when calling this function.
1824 */
1825 status_t
1826 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1827 {
1828 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1829 }
1830 
1831 
1832 /*!	\a cache must be locked. The area's address space must be read-locked.
1833 */
1834 static void
1835 pre_map_area_pages(VMArea* area, VMCache* cache,
1836 	vm_page_reservation* reservation)
1837 {
1838 	addr_t baseAddress = area->Base();
1839 	addr_t cacheOffset = area->cache_offset;
1840 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1841 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1842 
1843 	for (VMCachePagesTree::Iterator it
1844 				= cache->pages.GetIterator(firstPage, true, true);
1845 			vm_page* page = it.Next();) {
1846 		if (page->cache_offset >= endPage)
1847 			break;
1848 
1849 		// skip busy and inactive pages
1850 		if (page->busy || page->usage_count == 0)
1851 			continue;
1852 
1853 		DEBUG_PAGE_ACCESS_START(page);
1854 		map_page(area, page,
1855 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1856 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1857 		DEBUG_PAGE_ACCESS_END(page);
1858 	}
1859 }
1860 
1861 
1862 /*!	Will map the file specified by \a fd to an area in memory.
1863 	The file will be mirrored beginning at the specified \a offset. The
1864 	\a offset and \a size arguments have to be page aligned.
1865 */
1866 static area_id
1867 _vm_map_file(team_id team, const char* name, void** _address,
1868 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1869 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1870 {
1871 	// TODO: for binary files, we want to make sure that they get the
1872 	//	copy of a file at a given time, ie. later changes should not
1873 	//	make it into the mapped copy -- this will need quite some changes
1874 	//	to be done in a nice way
1875 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1876 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1877 
1878 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1879 	size = PAGE_ALIGN(size);
1880 
1881 	if (mapping == REGION_NO_PRIVATE_MAP)
1882 		protection |= B_SHARED_AREA;
1883 	if (addressSpec != B_EXACT_ADDRESS)
1884 		unmapAddressRange = false;
1885 
1886 	if (fd < 0) {
1887 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1888 		virtual_address_restrictions virtualRestrictions = {};
1889 		virtualRestrictions.address = *_address;
1890 		virtualRestrictions.address_specification = addressSpec;
1891 		physical_address_restrictions physicalRestrictions = {};
1892 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1893 			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1894 			_address);
1895 	}
1896 
1897 	// get the open flags of the FD
1898 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1899 	if (descriptor == NULL)
1900 		return EBADF;
1901 	int32 openMode = descriptor->open_mode;
1902 	put_fd(descriptor);
1903 
1904 	// The FD must open for reading at any rate. For shared mapping with write
1905 	// access, additionally the FD must be open for writing.
1906 	if ((openMode & O_ACCMODE) == O_WRONLY
1907 		|| (mapping == REGION_NO_PRIVATE_MAP
1908 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1909 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1910 		return EACCES;
1911 	}
1912 
1913 	// get the vnode for the object, this also grabs a ref to it
1914 	struct vnode* vnode = NULL;
1915 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1916 	if (status < B_OK)
1917 		return status;
1918 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1919 
1920 	// If we're going to pre-map pages, we need to reserve the pages needed by
1921 	// the mapping backend upfront.
1922 	page_num_t reservedPreMapPages = 0;
1923 	vm_page_reservation reservation;
1924 	if ((protection & B_READ_AREA) != 0) {
1925 		AddressSpaceWriteLocker locker;
1926 		status = locker.SetTo(team);
1927 		if (status != B_OK)
1928 			return status;
1929 
1930 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1931 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1932 
1933 		locker.Unlock();
1934 
1935 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1936 			team == VMAddressSpace::KernelID()
1937 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1938 	}
1939 
1940 	struct PageUnreserver {
1941 		PageUnreserver(vm_page_reservation* reservation)
1942 			:
1943 			fReservation(reservation)
1944 		{
1945 		}
1946 
1947 		~PageUnreserver()
1948 		{
1949 			if (fReservation != NULL)
1950 				vm_page_unreserve_pages(fReservation);
1951 		}
1952 
1953 		vm_page_reservation* fReservation;
1954 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1955 
1956 	// Lock the address space and, if the specified address range shall be
1957 	// unmapped, ensure it is not wired.
1958 	AddressSpaceWriteLocker locker;
1959 	do {
1960 		if (locker.SetTo(team) != B_OK)
1961 			return B_BAD_TEAM_ID;
1962 	} while (unmapAddressRange
1963 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1964 			(addr_t)*_address, size, &locker));
1965 
1966 	// TODO: this only works for file systems that use the file cache
1967 	VMCache* cache;
1968 	status = vfs_get_vnode_cache(vnode, &cache, false);
1969 	if (status < B_OK)
1970 		return status;
1971 
1972 	cache->Lock();
1973 
1974 	VMArea* area;
1975 	virtual_address_restrictions addressRestrictions = {};
1976 	addressRestrictions.address = *_address;
1977 	addressRestrictions.address_specification = addressSpec;
1978 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1979 		0, protection, mapping,
1980 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1981 		&addressRestrictions, kernel, &area, _address);
1982 
1983 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1984 		// map_backing_store() cannot know we no longer need the ref
1985 		cache->ReleaseRefLocked();
1986 	}
1987 
1988 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1989 		pre_map_area_pages(area, cache, &reservation);
1990 
1991 	cache->Unlock();
1992 
1993 	if (status == B_OK) {
1994 		// TODO: this probably deserves a smarter solution, ie. don't always
1995 		// prefetch stuff, and also, probably don't trigger it at this place.
1996 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1997 			// prefetches at max 10 MB starting from "offset"
1998 	}
1999 
2000 	if (status != B_OK)
2001 		return status;
2002 
2003 	area->cache_type = CACHE_TYPE_VNODE;
2004 	return area->id;
2005 }
2006 
2007 
2008 area_id
2009 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2010 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2011 	int fd, off_t offset)
2012 {
2013 	if (!arch_vm_supports_protection(protection))
2014 		return B_NOT_SUPPORTED;
2015 
2016 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2017 		mapping, unmapAddressRange, fd, offset, true);
2018 }
2019 
2020 
2021 VMCache*
2022 vm_area_get_locked_cache(VMArea* area)
2023 {
2024 	rw_lock_read_lock(&sAreaCacheLock);
2025 
2026 	while (true) {
2027 		VMCache* cache = area->cache;
2028 
2029 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2030 			// cache has been deleted
2031 			rw_lock_read_lock(&sAreaCacheLock);
2032 			continue;
2033 		}
2034 
2035 		rw_lock_read_lock(&sAreaCacheLock);
2036 
2037 		if (cache == area->cache) {
2038 			cache->AcquireRefLocked();
2039 			rw_lock_read_unlock(&sAreaCacheLock);
2040 			return cache;
2041 		}
2042 
2043 		// the cache changed in the meantime
2044 		cache->Unlock();
2045 	}
2046 }
2047 
2048 
2049 void
2050 vm_area_put_locked_cache(VMCache* cache)
2051 {
2052 	cache->ReleaseRefAndUnlock();
2053 }
2054 
2055 
2056 area_id
2057 vm_clone_area(team_id team, const char* name, void** address,
2058 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2059 	bool kernel)
2060 {
2061 	VMArea* newArea = NULL;
2062 	VMArea* sourceArea;
2063 
2064 	// Check whether the source area exists and is cloneable. If so, mark it
2065 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2066 	{
2067 		AddressSpaceWriteLocker locker;
2068 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2069 		if (status != B_OK)
2070 			return status;
2071 
2072 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2073 			return B_NOT_ALLOWED;
2074 
2075 		sourceArea->protection |= B_SHARED_AREA;
2076 		protection |= B_SHARED_AREA;
2077 	}
2078 
2079 	// Now lock both address spaces and actually do the cloning.
2080 
2081 	MultiAddressSpaceLocker locker;
2082 	VMAddressSpace* sourceAddressSpace;
2083 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2084 	if (status != B_OK)
2085 		return status;
2086 
2087 	VMAddressSpace* targetAddressSpace;
2088 	status = locker.AddTeam(team, true, &targetAddressSpace);
2089 	if (status != B_OK)
2090 		return status;
2091 
2092 	status = locker.Lock();
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2097 	if (sourceArea == NULL)
2098 		return B_BAD_VALUE;
2099 
2100 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2101 		return B_NOT_ALLOWED;
2102 
2103 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2104 
2105 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
2106 	//	have been adapted. Maybe it should be part of the kernel settings,
2107 	//	anyway (so that old drivers can always work).
2108 #if 0
2109 	if (sourceArea->aspace == VMAddressSpace::Kernel()
2110 		&& addressSpace != VMAddressSpace::Kernel()
2111 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
2112 		// kernel areas must not be cloned in userland, unless explicitly
2113 		// declared user-cloneable upon construction
2114 		status = B_NOT_ALLOWED;
2115 	} else
2116 #endif
2117 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
2118 		status = B_NOT_ALLOWED;
2119 	else {
2120 		virtual_address_restrictions addressRestrictions = {};
2121 		addressRestrictions.address = *address;
2122 		addressRestrictions.address_specification = addressSpec;
2123 		status = map_backing_store(targetAddressSpace, cache,
2124 			sourceArea->cache_offset, name, sourceArea->Size(),
2125 			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2126 			kernel, &newArea, address);
2127 	}
2128 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2129 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2130 		// to create a new cache, and has therefore already acquired a reference
2131 		// to the source cache - but otherwise it has no idea that we need
2132 		// one.
2133 		cache->AcquireRefLocked();
2134 	}
2135 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2136 		// we need to map in everything at this point
2137 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2138 			// we don't have actual pages to map but a physical area
2139 			VMTranslationMap* map
2140 				= sourceArea->address_space->TranslationMap();
2141 			map->Lock();
2142 
2143 			phys_addr_t physicalAddress;
2144 			uint32 oldProtection;
2145 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2146 
2147 			map->Unlock();
2148 
2149 			map = targetAddressSpace->TranslationMap();
2150 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2151 				newArea->Base() + (newArea->Size() - 1));
2152 
2153 			vm_page_reservation reservation;
2154 			vm_page_reserve_pages(&reservation, reservePages,
2155 				targetAddressSpace == VMAddressSpace::Kernel()
2156 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2157 			map->Lock();
2158 
2159 			for (addr_t offset = 0; offset < newArea->Size();
2160 					offset += B_PAGE_SIZE) {
2161 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2162 					protection, newArea->MemoryType(), &reservation);
2163 			}
2164 
2165 			map->Unlock();
2166 			vm_page_unreserve_pages(&reservation);
2167 		} else {
2168 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2169 			size_t reservePages = map->MaxPagesNeededToMap(
2170 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2171 			vm_page_reservation reservation;
2172 			vm_page_reserve_pages(&reservation, reservePages,
2173 				targetAddressSpace == VMAddressSpace::Kernel()
2174 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2175 
2176 			// map in all pages from source
2177 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2178 					vm_page* page  = it.Next();) {
2179 				if (!page->busy) {
2180 					DEBUG_PAGE_ACCESS_START(page);
2181 					map_page(newArea, page,
2182 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2183 							- newArea->cache_offset),
2184 						protection, &reservation);
2185 					DEBUG_PAGE_ACCESS_END(page);
2186 				}
2187 			}
2188 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2189 			// ensuring that!
2190 
2191 			vm_page_unreserve_pages(&reservation);
2192 		}
2193 	}
2194 	if (status == B_OK)
2195 		newArea->cache_type = sourceArea->cache_type;
2196 
2197 	vm_area_put_locked_cache(cache);
2198 
2199 	if (status < B_OK)
2200 		return status;
2201 
2202 	return newArea->id;
2203 }
2204 
2205 
2206 /*!	Deletes the specified area of the given address space.
2207 
2208 	The address space must be write-locked.
2209 	The caller must ensure that the area does not have any wired ranges.
2210 
2211 	\param addressSpace The address space containing the area.
2212 	\param area The area to be deleted.
2213 	\param deletingAddressSpace \c true, if the address space is in the process
2214 		of being deleted.
2215 */
2216 static void
2217 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2218 	bool deletingAddressSpace)
2219 {
2220 	ASSERT(!area->IsWired());
2221 
2222 	VMAreaHash::Remove(area);
2223 
2224 	// At this point the area is removed from the global hash table, but
2225 	// still exists in the area list.
2226 
2227 	// Unmap the virtual address space the area occupied.
2228 	{
2229 		// We need to lock the complete cache chain.
2230 		VMCache* topCache = vm_area_get_locked_cache(area);
2231 		VMCacheChainLocker cacheChainLocker(topCache);
2232 		cacheChainLocker.LockAllSourceCaches();
2233 
2234 		// If the area's top cache is a temporary cache and the area is the only
2235 		// one referencing it (besides us currently holding a second reference),
2236 		// the unmapping code doesn't need to care about preserving the accessed
2237 		// and dirty flags of the top cache page mappings.
2238 		bool ignoreTopCachePageFlags
2239 			= topCache->temporary && topCache->RefCount() == 2;
2240 
2241 		area->address_space->TranslationMap()->UnmapArea(area,
2242 			deletingAddressSpace, ignoreTopCachePageFlags);
2243 	}
2244 
2245 	if (!area->cache->temporary)
2246 		area->cache->WriteModified();
2247 
2248 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2249 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2250 
2251 	arch_vm_unset_memory_type(area);
2252 	addressSpace->RemoveArea(area, allocationFlags);
2253 	addressSpace->Put();
2254 
2255 	area->cache->RemoveArea(area);
2256 	area->cache->ReleaseRef();
2257 
2258 	addressSpace->DeleteArea(area, allocationFlags);
2259 }
2260 
2261 
2262 status_t
2263 vm_delete_area(team_id team, area_id id, bool kernel)
2264 {
2265 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2266 		team, id));
2267 
2268 	// lock the address space and make sure the area isn't wired
2269 	AddressSpaceWriteLocker locker;
2270 	VMArea* area;
2271 	AreaCacheLocker cacheLocker;
2272 
2273 	do {
2274 		status_t status = locker.SetFromArea(team, id, area);
2275 		if (status != B_OK)
2276 			return status;
2277 
2278 		cacheLocker.SetTo(area);
2279 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2280 
2281 	cacheLocker.Unlock();
2282 
2283 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2284 		return B_NOT_ALLOWED;
2285 
2286 	delete_area(locker.AddressSpace(), area, false);
2287 	return B_OK;
2288 }
2289 
2290 
2291 /*!	Creates a new cache on top of given cache, moves all areas from
2292 	the old cache to the new one, and changes the protection of all affected
2293 	areas' pages to read-only. If requested, wired pages are moved up to the
2294 	new cache and copies are added to the old cache in their place.
2295 	Preconditions:
2296 	- The given cache must be locked.
2297 	- All of the cache's areas' address spaces must be read locked.
2298 	- Either the cache must not have any wired ranges or a page reservation for
2299 	  all wired pages must be provided, so they can be copied.
2300 
2301 	\param lowerCache The cache on top of which a new cache shall be created.
2302 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2303 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2304 		has wired page. The wired pages are copied in this case.
2305 */
2306 static status_t
2307 vm_copy_on_write_area(VMCache* lowerCache,
2308 	vm_page_reservation* wiredPagesReservation)
2309 {
2310 	VMCache* upperCache;
2311 
2312 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2313 
2314 	// We need to separate the cache from its areas. The cache goes one level
2315 	// deeper and we create a new cache inbetween.
2316 
2317 	// create an anonymous cache
2318 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2319 		lowerCache->GuardSize() / B_PAGE_SIZE,
2320 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2321 		VM_PRIORITY_USER);
2322 	if (status != B_OK)
2323 		return status;
2324 
2325 	upperCache->Lock();
2326 
2327 	upperCache->temporary = 1;
2328 	upperCache->virtual_base = lowerCache->virtual_base;
2329 	upperCache->virtual_end = lowerCache->virtual_end;
2330 
2331 	// transfer the lower cache areas to the upper cache
2332 	rw_lock_write_lock(&sAreaCacheLock);
2333 	upperCache->TransferAreas(lowerCache);
2334 	rw_lock_write_unlock(&sAreaCacheLock);
2335 
2336 	lowerCache->AddConsumer(upperCache);
2337 
2338 	// We now need to remap all pages from all of the cache's areas read-only,
2339 	// so that a copy will be created on next write access. If there are wired
2340 	// pages, we keep their protection, move them to the upper cache and create
2341 	// copies for the lower cache.
2342 	if (wiredPagesReservation != NULL) {
2343 		// We need to handle wired pages -- iterate through the cache's pages.
2344 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2345 				vm_page* page = it.Next();) {
2346 			if (page->WiredCount() > 0) {
2347 				// allocate a new page and copy the wired one
2348 				vm_page* copiedPage = vm_page_allocate_page(
2349 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2350 
2351 				vm_memcpy_physical_page(
2352 					copiedPage->physical_page_number * B_PAGE_SIZE,
2353 					page->physical_page_number * B_PAGE_SIZE);
2354 
2355 				// move the wired page to the upper cache (note: removing is OK
2356 				// with the SplayTree iterator) and insert the copy
2357 				upperCache->MovePage(page);
2358 				lowerCache->InsertPage(copiedPage,
2359 					page->cache_offset * B_PAGE_SIZE);
2360 
2361 				DEBUG_PAGE_ACCESS_END(copiedPage);
2362 			} else {
2363 				// Change the protection of this page in all areas.
2364 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2365 						tempArea = tempArea->cache_next) {
2366 					// The area must be readable in the same way it was
2367 					// previously writable.
2368 					uint32 protection = B_KERNEL_READ_AREA;
2369 					if ((tempArea->protection & B_READ_AREA) != 0)
2370 						protection |= B_READ_AREA;
2371 
2372 					VMTranslationMap* map
2373 						= tempArea->address_space->TranslationMap();
2374 					map->Lock();
2375 					map->ProtectPage(tempArea,
2376 						virtual_page_address(tempArea, page), protection);
2377 					map->Unlock();
2378 				}
2379 			}
2380 		}
2381 	} else {
2382 		ASSERT(lowerCache->WiredPagesCount() == 0);
2383 
2384 		// just change the protection of all areas
2385 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2386 				tempArea = tempArea->cache_next) {
2387 			// The area must be readable in the same way it was previously
2388 			// writable.
2389 			uint32 protection = B_KERNEL_READ_AREA;
2390 			if ((tempArea->protection & B_READ_AREA) != 0)
2391 				protection |= B_READ_AREA;
2392 
2393 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2394 			map->Lock();
2395 			map->ProtectArea(tempArea, protection);
2396 			map->Unlock();
2397 		}
2398 	}
2399 
2400 	vm_area_put_locked_cache(upperCache);
2401 
2402 	return B_OK;
2403 }
2404 
2405 
2406 area_id
2407 vm_copy_area(team_id team, const char* name, void** _address,
2408 	uint32 addressSpec, uint32 protection, area_id sourceID)
2409 {
2410 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2411 
2412 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2413 		// set the same protection for the kernel as for userland
2414 		protection |= B_KERNEL_READ_AREA;
2415 		if (writableCopy)
2416 			protection |= B_KERNEL_WRITE_AREA;
2417 	}
2418 
2419 	// Do the locking: target address space, all address spaces associated with
2420 	// the source cache, and the cache itself.
2421 	MultiAddressSpaceLocker locker;
2422 	VMAddressSpace* targetAddressSpace;
2423 	VMCache* cache;
2424 	VMArea* source;
2425 	AreaCacheLocker cacheLocker;
2426 	status_t status;
2427 	bool sharedArea;
2428 
2429 	page_num_t wiredPages = 0;
2430 	vm_page_reservation wiredPagesReservation;
2431 
2432 	bool restart;
2433 	do {
2434 		restart = false;
2435 
2436 		locker.Unset();
2437 		status = locker.AddTeam(team, true, &targetAddressSpace);
2438 		if (status == B_OK) {
2439 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2440 				&cache);
2441 		}
2442 		if (status != B_OK)
2443 			return status;
2444 
2445 		cacheLocker.SetTo(cache, true);	// already locked
2446 
2447 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2448 
2449 		page_num_t oldWiredPages = wiredPages;
2450 		wiredPages = 0;
2451 
2452 		// If the source area isn't shared, count the number of wired pages in
2453 		// the cache and reserve as many pages.
2454 		if (!sharedArea) {
2455 			wiredPages = cache->WiredPagesCount();
2456 
2457 			if (wiredPages > oldWiredPages) {
2458 				cacheLocker.Unlock();
2459 				locker.Unlock();
2460 
2461 				if (oldWiredPages > 0)
2462 					vm_page_unreserve_pages(&wiredPagesReservation);
2463 
2464 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2465 					VM_PRIORITY_USER);
2466 
2467 				restart = true;
2468 			}
2469 		} else if (oldWiredPages > 0)
2470 			vm_page_unreserve_pages(&wiredPagesReservation);
2471 	} while (restart);
2472 
2473 	// unreserve pages later
2474 	struct PagesUnreserver {
2475 		PagesUnreserver(vm_page_reservation* reservation)
2476 			:
2477 			fReservation(reservation)
2478 		{
2479 		}
2480 
2481 		~PagesUnreserver()
2482 		{
2483 			if (fReservation != NULL)
2484 				vm_page_unreserve_pages(fReservation);
2485 		}
2486 
2487 	private:
2488 		vm_page_reservation*	fReservation;
2489 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2490 
2491 	if (addressSpec == B_CLONE_ADDRESS) {
2492 		addressSpec = B_EXACT_ADDRESS;
2493 		*_address = (void*)source->Base();
2494 	}
2495 
2496 	// First, create a cache on top of the source area, respectively use the
2497 	// existing one, if this is a shared area.
2498 
2499 	VMArea* target;
2500 	virtual_address_restrictions addressRestrictions = {};
2501 	addressRestrictions.address = *_address;
2502 	addressRestrictions.address_specification = addressSpec;
2503 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2504 		name, source->Size(), source->wiring, protection,
2505 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2506 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2507 		&addressRestrictions, true, &target, _address);
2508 	if (status < B_OK)
2509 		return status;
2510 
2511 	if (sharedArea) {
2512 		// The new area uses the old area's cache, but map_backing_store()
2513 		// hasn't acquired a ref. So we have to do that now.
2514 		cache->AcquireRefLocked();
2515 	}
2516 
2517 	// If the source area is writable, we need to move it one layer up as well
2518 
2519 	if (!sharedArea) {
2520 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2521 			// TODO: do something more useful if this fails!
2522 			if (vm_copy_on_write_area(cache,
2523 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2524 				panic("vm_copy_on_write_area() failed!\n");
2525 			}
2526 		}
2527 	}
2528 
2529 	// we return the ID of the newly created area
2530 	return target->id;
2531 }
2532 
2533 
2534 status_t
2535 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2536 	bool kernel)
2537 {
2538 	fix_protection(&newProtection);
2539 
2540 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2541 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2542 
2543 	if (!arch_vm_supports_protection(newProtection))
2544 		return B_NOT_SUPPORTED;
2545 
2546 	bool becomesWritable
2547 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2548 
2549 	// lock address spaces and cache
2550 	MultiAddressSpaceLocker locker;
2551 	VMCache* cache;
2552 	VMArea* area;
2553 	status_t status;
2554 	AreaCacheLocker cacheLocker;
2555 	bool isWritable;
2556 
2557 	bool restart;
2558 	do {
2559 		restart = false;
2560 
2561 		locker.Unset();
2562 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2563 		if (status != B_OK)
2564 			return status;
2565 
2566 		cacheLocker.SetTo(cache, true);	// already locked
2567 
2568 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2569 			return B_NOT_ALLOWED;
2570 
2571 		if (area->protection == newProtection)
2572 			return B_OK;
2573 
2574 		if (team != VMAddressSpace::KernelID()
2575 			&& area->address_space->ID() != team) {
2576 			// unless you're the kernel, you are only allowed to set
2577 			// the protection of your own areas
2578 			return B_NOT_ALLOWED;
2579 		}
2580 
2581 		isWritable
2582 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2583 
2584 		// Make sure the area (respectively, if we're going to call
2585 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2586 		// wired ranges.
2587 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2588 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2589 					otherArea = otherArea->cache_next) {
2590 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2591 					restart = true;
2592 					break;
2593 				}
2594 			}
2595 		} else {
2596 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2597 				restart = true;
2598 		}
2599 	} while (restart);
2600 
2601 	bool changePageProtection = true;
2602 	bool changeTopCachePagesOnly = false;
2603 
2604 	if (isWritable && !becomesWritable) {
2605 		// writable -> !writable
2606 
2607 		if (cache->source != NULL && cache->temporary) {
2608 			if (cache->CountWritableAreas(area) == 0) {
2609 				// Since this cache now lives from the pages in its source cache,
2610 				// we can change the cache's commitment to take only those pages
2611 				// into account that really are in this cache.
2612 
2613 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2614 					team == VMAddressSpace::KernelID()
2615 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2616 
2617 				// TODO: we may be able to join with our source cache, if
2618 				// count == 0
2619 			}
2620 		}
2621 
2622 		// If only the writability changes, we can just remap the pages of the
2623 		// top cache, since the pages of lower caches are mapped read-only
2624 		// anyway. That's advantageous only, if the number of pages in the cache
2625 		// is significantly smaller than the number of pages in the area,
2626 		// though.
2627 		if (newProtection
2628 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2629 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2630 			changeTopCachePagesOnly = true;
2631 		}
2632 	} else if (!isWritable && becomesWritable) {
2633 		// !writable -> writable
2634 
2635 		if (!cache->consumers.IsEmpty()) {
2636 			// There are consumers -- we have to insert a new cache. Fortunately
2637 			// vm_copy_on_write_area() does everything that's needed.
2638 			changePageProtection = false;
2639 			status = vm_copy_on_write_area(cache, NULL);
2640 		} else {
2641 			// No consumers, so we don't need to insert a new one.
2642 			if (cache->source != NULL && cache->temporary) {
2643 				// the cache's commitment must contain all possible pages
2644 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2645 					team == VMAddressSpace::KernelID()
2646 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2647 			}
2648 
2649 			if (status == B_OK && cache->source != NULL) {
2650 				// There's a source cache, hence we can't just change all pages'
2651 				// protection or we might allow writing into pages belonging to
2652 				// a lower cache.
2653 				changeTopCachePagesOnly = true;
2654 			}
2655 		}
2656 	} else {
2657 		// we don't have anything special to do in all other cases
2658 	}
2659 
2660 	if (status == B_OK) {
2661 		// remap existing pages in this cache
2662 		if (changePageProtection) {
2663 			VMTranslationMap* map = area->address_space->TranslationMap();
2664 			map->Lock();
2665 
2666 			if (changeTopCachePagesOnly) {
2667 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2668 				page_num_t lastPageOffset
2669 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2670 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2671 						vm_page* page = it.Next();) {
2672 					if (page->cache_offset >= firstPageOffset
2673 						&& page->cache_offset <= lastPageOffset) {
2674 						addr_t address = virtual_page_address(area, page);
2675 						map->ProtectPage(area, address, newProtection);
2676 					}
2677 				}
2678 			} else
2679 				map->ProtectArea(area, newProtection);
2680 
2681 			map->Unlock();
2682 		}
2683 
2684 		area->protection = newProtection;
2685 	}
2686 
2687 	return status;
2688 }
2689 
2690 
2691 status_t
2692 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2693 {
2694 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2695 	if (addressSpace == NULL)
2696 		return B_BAD_TEAM_ID;
2697 
2698 	VMTranslationMap* map = addressSpace->TranslationMap();
2699 
2700 	map->Lock();
2701 	uint32 dummyFlags;
2702 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2703 	map->Unlock();
2704 
2705 	addressSpace->Put();
2706 	return status;
2707 }
2708 
2709 
2710 /*!	The page's cache must be locked.
2711 */
2712 bool
2713 vm_test_map_modification(vm_page* page)
2714 {
2715 	if (page->modified)
2716 		return true;
2717 
2718 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2719 	vm_page_mapping* mapping;
2720 	while ((mapping = iterator.Next()) != NULL) {
2721 		VMArea* area = mapping->area;
2722 		VMTranslationMap* map = area->address_space->TranslationMap();
2723 
2724 		phys_addr_t physicalAddress;
2725 		uint32 flags;
2726 		map->Lock();
2727 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2728 		map->Unlock();
2729 
2730 		if ((flags & PAGE_MODIFIED) != 0)
2731 			return true;
2732 	}
2733 
2734 	return false;
2735 }
2736 
2737 
2738 /*!	The page's cache must be locked.
2739 */
2740 void
2741 vm_clear_map_flags(vm_page* page, uint32 flags)
2742 {
2743 	if ((flags & PAGE_ACCESSED) != 0)
2744 		page->accessed = false;
2745 	if ((flags & PAGE_MODIFIED) != 0)
2746 		page->modified = false;
2747 
2748 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2749 	vm_page_mapping* mapping;
2750 	while ((mapping = iterator.Next()) != NULL) {
2751 		VMArea* area = mapping->area;
2752 		VMTranslationMap* map = area->address_space->TranslationMap();
2753 
2754 		map->Lock();
2755 		map->ClearFlags(virtual_page_address(area, page), flags);
2756 		map->Unlock();
2757 	}
2758 }
2759 
2760 
2761 /*!	Removes all mappings from a page.
2762 	After you've called this function, the page is unmapped from memory and
2763 	the page's \c accessed and \c modified flags have been updated according
2764 	to the state of the mappings.
2765 	The page's cache must be locked.
2766 */
2767 void
2768 vm_remove_all_page_mappings(vm_page* page)
2769 {
2770 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2771 		VMArea* area = mapping->area;
2772 		VMTranslationMap* map = area->address_space->TranslationMap();
2773 		addr_t address = virtual_page_address(area, page);
2774 		map->UnmapPage(area, address, false);
2775 	}
2776 }
2777 
2778 
2779 int32
2780 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2781 {
2782 	int32 count = 0;
2783 
2784 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2785 	vm_page_mapping* mapping;
2786 	while ((mapping = iterator.Next()) != NULL) {
2787 		VMArea* area = mapping->area;
2788 		VMTranslationMap* map = area->address_space->TranslationMap();
2789 
2790 		bool modified;
2791 		if (map->ClearAccessedAndModified(area,
2792 				virtual_page_address(area, page), false, modified)) {
2793 			count++;
2794 		}
2795 
2796 		page->modified |= modified;
2797 	}
2798 
2799 
2800 	if (page->accessed) {
2801 		count++;
2802 		page->accessed = false;
2803 	}
2804 
2805 	return count;
2806 }
2807 
2808 
2809 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2810 	mappings.
2811 	The function iterates through the page mappings and removes them until
2812 	encountering one that has been accessed. From then on it will continue to
2813 	iterate, but only clear the accessed flag of the mapping. The page's
2814 	\c modified bit will be updated accordingly, the \c accessed bit will be
2815 	cleared.
2816 	\return The number of mapping accessed bits encountered, including the
2817 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2818 		of the page have been removed.
2819 */
2820 int32
2821 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2822 {
2823 	ASSERT(page->WiredCount() == 0);
2824 
2825 	if (page->accessed)
2826 		return vm_clear_page_mapping_accessed_flags(page);
2827 
2828 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2829 		VMArea* area = mapping->area;
2830 		VMTranslationMap* map = area->address_space->TranslationMap();
2831 		addr_t address = virtual_page_address(area, page);
2832 		bool modified = false;
2833 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2834 			page->accessed = true;
2835 			page->modified |= modified;
2836 			return vm_clear_page_mapping_accessed_flags(page);
2837 		}
2838 		page->modified |= modified;
2839 	}
2840 
2841 	return 0;
2842 }
2843 
2844 
2845 static int
2846 display_mem(int argc, char** argv)
2847 {
2848 	bool physical = false;
2849 	addr_t copyAddress;
2850 	int32 displayWidth;
2851 	int32 itemSize;
2852 	int32 num = -1;
2853 	addr_t address;
2854 	int i = 1, j;
2855 
2856 	if (argc > 1 && argv[1][0] == '-') {
2857 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2858 			physical = true;
2859 			i++;
2860 		} else
2861 			i = 99;
2862 	}
2863 
2864 	if (argc < i + 1 || argc > i + 2) {
2865 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2866 			"\tdl - 8 bytes\n"
2867 			"\tdw - 4 bytes\n"
2868 			"\tds - 2 bytes\n"
2869 			"\tdb - 1 byte\n"
2870 			"\tstring - a whole string\n"
2871 			"  -p or --physical only allows memory from a single page to be "
2872 			"displayed.\n");
2873 		return 0;
2874 	}
2875 
2876 	address = parse_expression(argv[i]);
2877 
2878 	if (argc > i + 1)
2879 		num = parse_expression(argv[i + 1]);
2880 
2881 	// build the format string
2882 	if (strcmp(argv[0], "db") == 0) {
2883 		itemSize = 1;
2884 		displayWidth = 16;
2885 	} else if (strcmp(argv[0], "ds") == 0) {
2886 		itemSize = 2;
2887 		displayWidth = 8;
2888 	} else if (strcmp(argv[0], "dw") == 0) {
2889 		itemSize = 4;
2890 		displayWidth = 4;
2891 	} else if (strcmp(argv[0], "dl") == 0) {
2892 		itemSize = 8;
2893 		displayWidth = 2;
2894 	} else if (strcmp(argv[0], "string") == 0) {
2895 		itemSize = 1;
2896 		displayWidth = -1;
2897 	} else {
2898 		kprintf("display_mem called in an invalid way!\n");
2899 		return 0;
2900 	}
2901 
2902 	if (num <= 0)
2903 		num = displayWidth;
2904 
2905 	void* physicalPageHandle = NULL;
2906 
2907 	if (physical) {
2908 		int32 offset = address & (B_PAGE_SIZE - 1);
2909 		if (num * itemSize + offset > B_PAGE_SIZE) {
2910 			num = (B_PAGE_SIZE - offset) / itemSize;
2911 			kprintf("NOTE: number of bytes has been cut to page size\n");
2912 		}
2913 
2914 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2915 
2916 		if (vm_get_physical_page_debug(address, &copyAddress,
2917 				&physicalPageHandle) != B_OK) {
2918 			kprintf("getting the hardware page failed.");
2919 			return 0;
2920 		}
2921 
2922 		address += offset;
2923 		copyAddress += offset;
2924 	} else
2925 		copyAddress = address;
2926 
2927 	if (!strcmp(argv[0], "string")) {
2928 		kprintf("%p \"", (char*)copyAddress);
2929 
2930 		// string mode
2931 		for (i = 0; true; i++) {
2932 			char c;
2933 			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2934 					!= B_OK
2935 				|| c == '\0') {
2936 				break;
2937 			}
2938 
2939 			if (c == '\n')
2940 				kprintf("\\n");
2941 			else if (c == '\t')
2942 				kprintf("\\t");
2943 			else {
2944 				if (!isprint(c))
2945 					c = '.';
2946 
2947 				kprintf("%c", c);
2948 			}
2949 		}
2950 
2951 		kprintf("\"\n");
2952 	} else {
2953 		// number mode
2954 		for (i = 0; i < num; i++) {
2955 			uint32 value;
2956 
2957 			if ((i % displayWidth) == 0) {
2958 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2959 				if (i != 0)
2960 					kprintf("\n");
2961 
2962 				kprintf("[0x%lx]  ", address + i * itemSize);
2963 
2964 				for (j = 0; j < displayed; j++) {
2965 					char c;
2966 					if (debug_memcpy(B_CURRENT_TEAM, &c,
2967 							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2968 						displayed = j;
2969 						break;
2970 					}
2971 					if (!isprint(c))
2972 						c = '.';
2973 
2974 					kprintf("%c", c);
2975 				}
2976 				if (num > displayWidth) {
2977 					// make sure the spacing in the last line is correct
2978 					for (j = displayed; j < displayWidth * itemSize; j++)
2979 						kprintf(" ");
2980 				}
2981 				kprintf("  ");
2982 			}
2983 
2984 			if (debug_memcpy(B_CURRENT_TEAM, &value,
2985 					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2986 				kprintf("read fault");
2987 				break;
2988 			}
2989 
2990 			switch (itemSize) {
2991 				case 1:
2992 					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2993 					break;
2994 				case 2:
2995 					kprintf(" %04" B_PRIx16, *(uint16*)&value);
2996 					break;
2997 				case 4:
2998 					kprintf(" %08" B_PRIx32, *(uint32*)&value);
2999 					break;
3000 				case 8:
3001 					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3002 					break;
3003 			}
3004 		}
3005 
3006 		kprintf("\n");
3007 	}
3008 
3009 	if (physical) {
3010 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3011 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3012 	}
3013 	return 0;
3014 }
3015 
3016 
3017 static void
3018 dump_cache_tree_recursively(VMCache* cache, int level,
3019 	VMCache* highlightCache)
3020 {
3021 	// print this cache
3022 	for (int i = 0; i < level; i++)
3023 		kprintf("  ");
3024 	if (cache == highlightCache)
3025 		kprintf("%p <--\n", cache);
3026 	else
3027 		kprintf("%p\n", cache);
3028 
3029 	// recursively print its consumers
3030 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3031 			VMCache* consumer = it.Next();) {
3032 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3033 	}
3034 }
3035 
3036 
3037 static int
3038 dump_cache_tree(int argc, char** argv)
3039 {
3040 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3041 		kprintf("usage: %s <address>\n", argv[0]);
3042 		return 0;
3043 	}
3044 
3045 	addr_t address = parse_expression(argv[1]);
3046 	if (address == 0)
3047 		return 0;
3048 
3049 	VMCache* cache = (VMCache*)address;
3050 	VMCache* root = cache;
3051 
3052 	// find the root cache (the transitive source)
3053 	while (root->source != NULL)
3054 		root = root->source;
3055 
3056 	dump_cache_tree_recursively(root, 0, cache);
3057 
3058 	return 0;
3059 }
3060 
3061 
3062 const char*
3063 vm_cache_type_to_string(int32 type)
3064 {
3065 	switch (type) {
3066 		case CACHE_TYPE_RAM:
3067 			return "RAM";
3068 		case CACHE_TYPE_DEVICE:
3069 			return "device";
3070 		case CACHE_TYPE_VNODE:
3071 			return "vnode";
3072 		case CACHE_TYPE_NULL:
3073 			return "null";
3074 
3075 		default:
3076 			return "unknown";
3077 	}
3078 }
3079 
3080 
3081 #if DEBUG_CACHE_LIST
3082 
3083 static void
3084 update_cache_info_recursively(VMCache* cache, cache_info& info)
3085 {
3086 	info.page_count += cache->page_count;
3087 	if (cache->type == CACHE_TYPE_RAM)
3088 		info.committed += cache->committed_size;
3089 
3090 	// recurse
3091 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3092 			VMCache* consumer = it.Next();) {
3093 		update_cache_info_recursively(consumer, info);
3094 	}
3095 }
3096 
3097 
3098 static int
3099 cache_info_compare_page_count(const void* _a, const void* _b)
3100 {
3101 	const cache_info* a = (const cache_info*)_a;
3102 	const cache_info* b = (const cache_info*)_b;
3103 	if (a->page_count == b->page_count)
3104 		return 0;
3105 	return a->page_count < b->page_count ? 1 : -1;
3106 }
3107 
3108 
3109 static int
3110 cache_info_compare_committed(const void* _a, const void* _b)
3111 {
3112 	const cache_info* a = (const cache_info*)_a;
3113 	const cache_info* b = (const cache_info*)_b;
3114 	if (a->committed == b->committed)
3115 		return 0;
3116 	return a->committed < b->committed ? 1 : -1;
3117 }
3118 
3119 
3120 static void
3121 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3122 {
3123 	for (int i = 0; i < level; i++)
3124 		kprintf("  ");
3125 
3126 	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3127 		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3128 		cache->virtual_base, cache->virtual_end, cache->page_count);
3129 
3130 	if (level == 0)
3131 		kprintf("/%lu", info.page_count);
3132 
3133 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3134 		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3135 
3136 		if (level == 0)
3137 			kprintf("/%lu", info.committed);
3138 	}
3139 
3140 	// areas
3141 	if (cache->areas != NULL) {
3142 		VMArea* area = cache->areas;
3143 		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3144 			area->name, area->address_space->ID());
3145 
3146 		while (area->cache_next != NULL) {
3147 			area = area->cache_next;
3148 			kprintf(", %" B_PRId32, area->id);
3149 		}
3150 	}
3151 
3152 	kputs("\n");
3153 
3154 	// recurse
3155 	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3156 			VMCache* consumer = it.Next();) {
3157 		dump_caches_recursively(consumer, info, level + 1);
3158 	}
3159 }
3160 
3161 
3162 static int
3163 dump_caches(int argc, char** argv)
3164 {
3165 	if (sCacheInfoTable == NULL) {
3166 		kprintf("No cache info table!\n");
3167 		return 0;
3168 	}
3169 
3170 	bool sortByPageCount = true;
3171 
3172 	for (int32 i = 1; i < argc; i++) {
3173 		if (strcmp(argv[i], "-c") == 0) {
3174 			sortByPageCount = false;
3175 		} else {
3176 			print_debugger_command_usage(argv[0]);
3177 			return 0;
3178 		}
3179 	}
3180 
3181 	uint32 totalCount = 0;
3182 	uint32 rootCount = 0;
3183 	off_t totalCommitted = 0;
3184 	page_num_t totalPages = 0;
3185 
3186 	VMCache* cache = gDebugCacheList;
3187 	while (cache) {
3188 		totalCount++;
3189 		if (cache->source == NULL) {
3190 			cache_info stackInfo;
3191 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3192 				? sCacheInfoTable[rootCount] : stackInfo;
3193 			rootCount++;
3194 			info.cache = cache;
3195 			info.page_count = 0;
3196 			info.committed = 0;
3197 			update_cache_info_recursively(cache, info);
3198 			totalCommitted += info.committed;
3199 			totalPages += info.page_count;
3200 		}
3201 
3202 		cache = cache->debug_next;
3203 	}
3204 
3205 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3206 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3207 			sortByPageCount
3208 				? &cache_info_compare_page_count
3209 				: &cache_info_compare_committed);
3210 	}
3211 
3212 	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3213 		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3214 	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3215 		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3216 			"page count" : "committed size");
3217 
3218 	if (rootCount <= (uint32)kCacheInfoTableCount) {
3219 		for (uint32 i = 0; i < rootCount; i++) {
3220 			cache_info& info = sCacheInfoTable[i];
3221 			dump_caches_recursively(info.cache, info, 0);
3222 		}
3223 	} else
3224 		kprintf("Cache info table too small! Can't sort and print caches!\n");
3225 
3226 	return 0;
3227 }
3228 
3229 #endif	// DEBUG_CACHE_LIST
3230 
3231 
3232 static int
3233 dump_cache(int argc, char** argv)
3234 {
3235 	VMCache* cache;
3236 	bool showPages = false;
3237 	int i = 1;
3238 
3239 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3240 		kprintf("usage: %s [-ps] <address>\n"
3241 			"  if -p is specified, all pages are shown, if -s is used\n"
3242 			"  only the cache info is shown respectively.\n", argv[0]);
3243 		return 0;
3244 	}
3245 	while (argv[i][0] == '-') {
3246 		char* arg = argv[i] + 1;
3247 		while (arg[0]) {
3248 			if (arg[0] == 'p')
3249 				showPages = true;
3250 			arg++;
3251 		}
3252 		i++;
3253 	}
3254 	if (argv[i] == NULL) {
3255 		kprintf("%s: invalid argument, pass address\n", argv[0]);
3256 		return 0;
3257 	}
3258 
3259 	addr_t address = parse_expression(argv[i]);
3260 	if (address == 0)
3261 		return 0;
3262 
3263 	cache = (VMCache*)address;
3264 
3265 	cache->Dump(showPages);
3266 
3267 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3268 
3269 	return 0;
3270 }
3271 
3272 
3273 static void
3274 dump_area_struct(VMArea* area, bool mappings)
3275 {
3276 	kprintf("AREA: %p\n", area);
3277 	kprintf("name:\t\t'%s'\n", area->name);
3278 	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3279 	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3280 	kprintf("base:\t\t0x%lx\n", area->Base());
3281 	kprintf("size:\t\t0x%lx\n", area->Size());
3282 	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3283 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3284 	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3285 	kprintf("cache:\t\t%p\n", area->cache);
3286 	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3287 	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3288 	kprintf("cache_next:\t%p\n", area->cache_next);
3289 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3290 
3291 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3292 	if (mappings) {
3293 		kprintf("page mappings:\n");
3294 		while (iterator.HasNext()) {
3295 			vm_page_mapping* mapping = iterator.Next();
3296 			kprintf("  %p", mapping->page);
3297 		}
3298 		kprintf("\n");
3299 	} else {
3300 		uint32 count = 0;
3301 		while (iterator.Next() != NULL) {
3302 			count++;
3303 		}
3304 		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3305 	}
3306 }
3307 
3308 
3309 static int
3310 dump_area(int argc, char** argv)
3311 {
3312 	bool mappings = false;
3313 	bool found = false;
3314 	int32 index = 1;
3315 	VMArea* area;
3316 	addr_t num;
3317 
3318 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3319 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3320 			"All areas matching either id/address/name are listed. You can\n"
3321 			"force to check only a specific item by prefixing the specifier\n"
3322 			"with the id/contains/address/name keywords.\n"
3323 			"-m shows the area's mappings as well.\n");
3324 		return 0;
3325 	}
3326 
3327 	if (!strcmp(argv[1], "-m")) {
3328 		mappings = true;
3329 		index++;
3330 	}
3331 
3332 	int32 mode = 0xf;
3333 	if (!strcmp(argv[index], "id"))
3334 		mode = 1;
3335 	else if (!strcmp(argv[index], "contains"))
3336 		mode = 2;
3337 	else if (!strcmp(argv[index], "name"))
3338 		mode = 4;
3339 	else if (!strcmp(argv[index], "address"))
3340 		mode = 0;
3341 	if (mode != 0xf)
3342 		index++;
3343 
3344 	if (index >= argc) {
3345 		kprintf("No area specifier given.\n");
3346 		return 0;
3347 	}
3348 
3349 	num = parse_expression(argv[index]);
3350 
3351 	if (mode == 0) {
3352 		dump_area_struct((struct VMArea*)num, mappings);
3353 	} else {
3354 		// walk through the area list, looking for the arguments as a name
3355 
3356 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3357 		while ((area = it.Next()) != NULL) {
3358 			if (((mode & 4) != 0 && area->name != NULL
3359 					&& !strcmp(argv[index], area->name))
3360 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3361 					|| (((mode & 2) != 0 && area->Base() <= num
3362 						&& area->Base() + area->Size() > num))))) {
3363 				dump_area_struct(area, mappings);
3364 				found = true;
3365 			}
3366 		}
3367 
3368 		if (!found)
3369 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3370 	}
3371 
3372 	return 0;
3373 }
3374 
3375 
3376 static int
3377 dump_area_list(int argc, char** argv)
3378 {
3379 	VMArea* area;
3380 	const char* name = NULL;
3381 	int32 id = 0;
3382 
3383 	if (argc > 1) {
3384 		id = parse_expression(argv[1]);
3385 		if (id == 0)
3386 			name = argv[1];
3387 	}
3388 
3389 	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3390 		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3391 		B_PRINTF_POINTER_WIDTH, "size");
3392 
3393 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3394 	while ((area = it.Next()) != NULL) {
3395 		if ((id != 0 && area->address_space->ID() != id)
3396 			|| (name != NULL && strstr(area->name, name) == NULL))
3397 			continue;
3398 
3399 		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3400 			area->id, (void*)area->Base(), (void*)area->Size(),
3401 			area->protection, area->wiring, area->name);
3402 	}
3403 	return 0;
3404 }
3405 
3406 
3407 static int
3408 dump_available_memory(int argc, char** argv)
3409 {
3410 	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3411 		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3412 	return 0;
3413 }
3414 
3415 
3416 static int
3417 dump_mapping_info(int argc, char** argv)
3418 {
3419 	bool reverseLookup = false;
3420 	bool pageLookup = false;
3421 
3422 	int argi = 1;
3423 	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3424 		const char* arg = argv[argi];
3425 		if (strcmp(arg, "-r") == 0) {
3426 			reverseLookup = true;
3427 		} else if (strcmp(arg, "-p") == 0) {
3428 			reverseLookup = true;
3429 			pageLookup = true;
3430 		} else {
3431 			print_debugger_command_usage(argv[0]);
3432 			return 0;
3433 		}
3434 	}
3435 
3436 	// We need at least one argument, the address. Optionally a thread ID can be
3437 	// specified.
3438 	if (argi >= argc || argi + 2 < argc) {
3439 		print_debugger_command_usage(argv[0]);
3440 		return 0;
3441 	}
3442 
3443 	uint64 addressValue;
3444 	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3445 		return 0;
3446 
3447 	Team* team = NULL;
3448 	if (argi < argc) {
3449 		uint64 threadID;
3450 		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3451 			return 0;
3452 
3453 		Thread* thread = Thread::GetDebug(threadID);
3454 		if (thread == NULL) {
3455 			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3456 			return 0;
3457 		}
3458 
3459 		team = thread->team;
3460 	}
3461 
3462 	if (reverseLookup) {
3463 		phys_addr_t physicalAddress;
3464 		if (pageLookup) {
3465 			vm_page* page = (vm_page*)(addr_t)addressValue;
3466 			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3467 		} else {
3468 			physicalAddress = (phys_addr_t)addressValue;
3469 			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3470 		}
3471 
3472 		kprintf("    Team     Virtual Address      Area\n");
3473 		kprintf("--------------------------------------\n");
3474 
3475 		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3476 			Callback()
3477 				:
3478 				fAddressSpace(NULL)
3479 			{
3480 			}
3481 
3482 			void SetAddressSpace(VMAddressSpace* addressSpace)
3483 			{
3484 				fAddressSpace = addressSpace;
3485 			}
3486 
3487 			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3488 			{
3489 				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3490 					virtualAddress);
3491 				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3492 					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3493 				else
3494 					kprintf("\n");
3495 				return false;
3496 			}
3497 
3498 		private:
3499 			VMAddressSpace*	fAddressSpace;
3500 		} callback;
3501 
3502 		if (team != NULL) {
3503 			// team specified -- get its address space
3504 			VMAddressSpace* addressSpace = team->address_space;
3505 			if (addressSpace == NULL) {
3506 				kprintf("Failed to get address space!\n");
3507 				return 0;
3508 			}
3509 
3510 			callback.SetAddressSpace(addressSpace);
3511 			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3512 				physicalAddress, callback);
3513 		} else {
3514 			// no team specified -- iterate through all address spaces
3515 			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3516 				addressSpace != NULL;
3517 				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3518 				callback.SetAddressSpace(addressSpace);
3519 				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3520 					physicalAddress, callback);
3521 			}
3522 		}
3523 	} else {
3524 		// get the address space
3525 		addr_t virtualAddress = (addr_t)addressValue;
3526 		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3527 		VMAddressSpace* addressSpace;
3528 		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3529 			addressSpace = VMAddressSpace::Kernel();
3530 		} else if (team != NULL) {
3531 			addressSpace = team->address_space;
3532 		} else {
3533 			Thread* thread = debug_get_debugged_thread();
3534 			if (thread == NULL || thread->team == NULL) {
3535 				kprintf("Failed to get team!\n");
3536 				return 0;
3537 			}
3538 
3539 			addressSpace = thread->team->address_space;
3540 		}
3541 
3542 		if (addressSpace == NULL) {
3543 			kprintf("Failed to get address space!\n");
3544 			return 0;
3545 		}
3546 
3547 		// let the translation map implementation do the job
3548 		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3549 	}
3550 
3551 	return 0;
3552 }
3553 
3554 
3555 /*!	Deletes all areas and reserved regions in the given address space.
3556 
3557 	The caller must ensure that none of the areas has any wired ranges.
3558 
3559 	\param addressSpace The address space.
3560 	\param deletingAddressSpace \c true, if the address space is in the process
3561 		of being deleted.
3562 */
3563 void
3564 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3565 {
3566 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3567 		addressSpace->ID()));
3568 
3569 	addressSpace->WriteLock();
3570 
3571 	// remove all reserved areas in this address space
3572 	addressSpace->UnreserveAllAddressRanges(0);
3573 
3574 	// delete all the areas in this address space
3575 	while (VMArea* area = addressSpace->FirstArea()) {
3576 		ASSERT(!area->IsWired());
3577 		delete_area(addressSpace, area, deletingAddressSpace);
3578 	}
3579 
3580 	addressSpace->WriteUnlock();
3581 }
3582 
3583 
3584 static area_id
3585 vm_area_for(addr_t address, bool kernel)
3586 {
3587 	team_id team;
3588 	if (IS_USER_ADDRESS(address)) {
3589 		// we try the user team address space, if any
3590 		team = VMAddressSpace::CurrentID();
3591 		if (team < 0)
3592 			return team;
3593 	} else
3594 		team = VMAddressSpace::KernelID();
3595 
3596 	AddressSpaceReadLocker locker(team);
3597 	if (!locker.IsLocked())
3598 		return B_BAD_TEAM_ID;
3599 
3600 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3601 	if (area != NULL) {
3602 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3603 			return B_ERROR;
3604 
3605 		return area->id;
3606 	}
3607 
3608 	return B_ERROR;
3609 }
3610 
3611 
3612 /*!	Frees physical pages that were used during the boot process.
3613 	\a end is inclusive.
3614 */
3615 static void
3616 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3617 {
3618 	// free all physical pages in the specified range
3619 
3620 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3621 		phys_addr_t physicalAddress;
3622 		uint32 flags;
3623 
3624 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3625 			&& (flags & PAGE_PRESENT) != 0) {
3626 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3627 			if (page != NULL && page->State() != PAGE_STATE_FREE
3628 					 && page->State() != PAGE_STATE_CLEAR
3629 					 && page->State() != PAGE_STATE_UNUSED) {
3630 				DEBUG_PAGE_ACCESS_START(page);
3631 				vm_page_set_state(page, PAGE_STATE_FREE);
3632 			}
3633 		}
3634 	}
3635 
3636 	// unmap the memory
3637 	map->Unmap(start, end);
3638 }
3639 
3640 
3641 void
3642 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3643 {
3644 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3645 	addr_t end = start + (size - 1);
3646 	addr_t lastEnd = start;
3647 
3648 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3649 		(void*)start, (void*)end));
3650 
3651 	// The areas are sorted in virtual address space order, so
3652 	// we just have to find the holes between them that fall
3653 	// into the area we should dispose
3654 
3655 	map->Lock();
3656 
3657 	for (VMAddressSpace::AreaIterator it
3658 				= VMAddressSpace::Kernel()->GetAreaIterator();
3659 			VMArea* area = it.Next();) {
3660 		addr_t areaStart = area->Base();
3661 		addr_t areaEnd = areaStart + (area->Size() - 1);
3662 
3663 		if (areaEnd < start)
3664 			continue;
3665 
3666 		if (areaStart > end) {
3667 			// we are done, the area is already beyond of what we have to free
3668 			break;
3669 		}
3670 
3671 		if (areaStart > lastEnd) {
3672 			// this is something we can free
3673 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3674 				(void*)areaStart));
3675 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3676 		}
3677 
3678 		if (areaEnd >= end) {
3679 			lastEnd = areaEnd;
3680 				// no +1 to prevent potential overflow
3681 			break;
3682 		}
3683 
3684 		lastEnd = areaEnd + 1;
3685 	}
3686 
3687 	if (lastEnd < end) {
3688 		// we can also get rid of some space at the end of the area
3689 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3690 			(void*)end));
3691 		unmap_and_free_physical_pages(map, lastEnd, end);
3692 	}
3693 
3694 	map->Unlock();
3695 }
3696 
3697 
3698 static void
3699 create_preloaded_image_areas(struct preloaded_image* _image)
3700 {
3701 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3702 	char name[B_OS_NAME_LENGTH];
3703 	void* address;
3704 	int32 length;
3705 
3706 	// use file name to create a good area name
3707 	char* fileName = strrchr(image->name, '/');
3708 	if (fileName == NULL)
3709 		fileName = image->name;
3710 	else
3711 		fileName++;
3712 
3713 	length = strlen(fileName);
3714 	// make sure there is enough space for the suffix
3715 	if (length > 25)
3716 		length = 25;
3717 
3718 	memcpy(name, fileName, length);
3719 	strcpy(name + length, "_text");
3720 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3721 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3722 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3723 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3724 		// this will later be remapped read-only/executable by the
3725 		// ELF initialization code
3726 
3727 	strcpy(name + length, "_data");
3728 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3729 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3730 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3731 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3732 }
3733 
3734 
3735 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3736 	Any boot loader resources contained in that arguments must not be accessed
3737 	anymore past this point.
3738 */
3739 void
3740 vm_free_kernel_args(kernel_args* args)
3741 {
3742 	uint32 i;
3743 
3744 	TRACE(("vm_free_kernel_args()\n"));
3745 
3746 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3747 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3748 		if (area >= B_OK)
3749 			delete_area(area);
3750 	}
3751 }
3752 
3753 
3754 static void
3755 allocate_kernel_args(kernel_args* args)
3756 {
3757 	TRACE(("allocate_kernel_args()\n"));
3758 
3759 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3760 		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3761 
3762 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3763 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3764 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3765 	}
3766 }
3767 
3768 
3769 static void
3770 unreserve_boot_loader_ranges(kernel_args* args)
3771 {
3772 	TRACE(("unreserve_boot_loader_ranges()\n"));
3773 
3774 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3775 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3776 			(void*)(addr_t)args->virtual_allocated_range[i].start,
3777 			args->virtual_allocated_range[i].size);
3778 	}
3779 }
3780 
3781 
3782 static void
3783 reserve_boot_loader_ranges(kernel_args* args)
3784 {
3785 	TRACE(("reserve_boot_loader_ranges()\n"));
3786 
3787 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3788 		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3789 
3790 		// If the address is no kernel address, we just skip it. The
3791 		// architecture specific code has to deal with it.
3792 		if (!IS_KERNEL_ADDRESS(address)) {
3793 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3794 				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3795 			continue;
3796 		}
3797 
3798 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3799 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3800 		if (status < B_OK)
3801 			panic("could not reserve boot loader ranges\n");
3802 	}
3803 }
3804 
3805 
3806 static addr_t
3807 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3808 {
3809 	size = PAGE_ALIGN(size);
3810 
3811 	// find a slot in the virtual allocation addr range
3812 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3813 		// check to see if the space between this one and the last is big enough
3814 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3815 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3816 			+ args->virtual_allocated_range[i - 1].size;
3817 
3818 		addr_t base = alignment > 0
3819 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3820 
3821 		if (base >= KERNEL_BASE && base < rangeStart
3822 				&& rangeStart - base >= size) {
3823 			args->virtual_allocated_range[i - 1].size
3824 				+= base + size - previousRangeEnd;
3825 			return base;
3826 		}
3827 	}
3828 
3829 	// we hadn't found one between allocation ranges. this is ok.
3830 	// see if there's a gap after the last one
3831 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3832 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3833 		+ args->virtual_allocated_range[lastEntryIndex].size;
3834 	addr_t base = alignment > 0
3835 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3836 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3837 		args->virtual_allocated_range[lastEntryIndex].size
3838 			+= base + size - lastRangeEnd;
3839 		return base;
3840 	}
3841 
3842 	// see if there's a gap before the first one
3843 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3844 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3845 		base = rangeStart - size;
3846 		if (alignment > 0)
3847 			base = ROUNDDOWN(base, alignment);
3848 
3849 		if (base >= KERNEL_BASE) {
3850 			args->virtual_allocated_range[0].start = base;
3851 			args->virtual_allocated_range[0].size += rangeStart - base;
3852 			return base;
3853 		}
3854 	}
3855 
3856 	return 0;
3857 }
3858 
3859 
3860 static bool
3861 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3862 {
3863 	// TODO: horrible brute-force method of determining if the page can be
3864 	// allocated
3865 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3866 		if (address >= args->physical_memory_range[i].start
3867 			&& address < args->physical_memory_range[i].start
3868 				+ args->physical_memory_range[i].size)
3869 			return true;
3870 	}
3871 	return false;
3872 }
3873 
3874 
3875 page_num_t
3876 vm_allocate_early_physical_page(kernel_args* args)
3877 {
3878 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3879 		phys_addr_t nextPage;
3880 
3881 		nextPage = args->physical_allocated_range[i].start
3882 			+ args->physical_allocated_range[i].size;
3883 		// see if the page after the next allocated paddr run can be allocated
3884 		if (i + 1 < args->num_physical_allocated_ranges
3885 			&& args->physical_allocated_range[i + 1].size != 0) {
3886 			// see if the next page will collide with the next allocated range
3887 			if (nextPage >= args->physical_allocated_range[i+1].start)
3888 				continue;
3889 		}
3890 		// see if the next physical page fits in the memory block
3891 		if (is_page_in_physical_memory_range(args, nextPage)) {
3892 			// we got one!
3893 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3894 			return nextPage / B_PAGE_SIZE;
3895 		}
3896 	}
3897 
3898 	// Expanding upwards didn't work, try going downwards.
3899 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3900 		phys_addr_t nextPage;
3901 
3902 		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3903 		// see if the page after the prev allocated paddr run can be allocated
3904 		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3905 			// see if the next page will collide with the next allocated range
3906 			if (nextPage < args->physical_allocated_range[i-1].start
3907 				+ args->physical_allocated_range[i-1].size)
3908 				continue;
3909 		}
3910 		// see if the next physical page fits in the memory block
3911 		if (is_page_in_physical_memory_range(args, nextPage)) {
3912 			// we got one!
3913 			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3914 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3915 			return nextPage / B_PAGE_SIZE;
3916 		}
3917 	}
3918 
3919 	return 0;
3920 		// could not allocate a block
3921 }
3922 
3923 
3924 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3925 	allocate some pages before the VM is completely up.
3926 */
3927 addr_t
3928 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3929 	uint32 attributes, addr_t alignment)
3930 {
3931 	if (physicalSize > virtualSize)
3932 		physicalSize = virtualSize;
3933 
3934 	// find the vaddr to allocate at
3935 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3936 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3937 	if (virtualBase == 0) {
3938 		panic("vm_allocate_early: could not allocate virtual address\n");
3939 		return 0;
3940 	}
3941 
3942 	// map the pages
3943 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3944 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3945 		if (physicalAddress == 0)
3946 			panic("error allocating early page!\n");
3947 
3948 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3949 
3950 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3951 			physicalAddress * B_PAGE_SIZE, attributes,
3952 			&vm_allocate_early_physical_page);
3953 	}
3954 
3955 	return virtualBase;
3956 }
3957 
3958 
3959 /*!	The main entrance point to initialize the VM. */
3960 status_t
3961 vm_init(kernel_args* args)
3962 {
3963 	struct preloaded_image* image;
3964 	void* address;
3965 	status_t err = 0;
3966 	uint32 i;
3967 
3968 	TRACE(("vm_init: entry\n"));
3969 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3970 	err = arch_vm_init(args);
3971 
3972 	// initialize some globals
3973 	vm_page_init_num_pages(args);
3974 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3975 
3976 	slab_init(args);
3977 
3978 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3979 	off_t heapSize = INITIAL_HEAP_SIZE;
3980 	// try to accomodate low memory systems
3981 	while (heapSize > sAvailableMemory / 8)
3982 		heapSize /= 2;
3983 	if (heapSize < 1024 * 1024)
3984 		panic("vm_init: go buy some RAM please.");
3985 
3986 	// map in the new heap and initialize it
3987 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3988 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3989 	TRACE(("heap at 0x%lx\n", heapBase));
3990 	heap_init(heapBase, heapSize);
3991 #endif
3992 
3993 	// initialize the free page list and physical page mapper
3994 	vm_page_init(args);
3995 
3996 	// initialize the cache allocators
3997 	vm_cache_init(args);
3998 
3999 	{
4000 		status_t error = VMAreaHash::Init();
4001 		if (error != B_OK)
4002 			panic("vm_init: error initializing area hash table\n");
4003 	}
4004 
4005 	VMAddressSpace::Init();
4006 	reserve_boot_loader_ranges(args);
4007 
4008 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4009 	heap_init_post_area();
4010 #endif
4011 
4012 	// Do any further initialization that the architecture dependant layers may
4013 	// need now
4014 	arch_vm_translation_map_init_post_area(args);
4015 	arch_vm_init_post_area(args);
4016 	vm_page_init_post_area(args);
4017 	slab_init_post_area();
4018 
4019 	// allocate areas to represent stuff that already exists
4020 
4021 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4022 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4023 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4024 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4025 #endif
4026 
4027 	allocate_kernel_args(args);
4028 
4029 	create_preloaded_image_areas(args->kernel_image);
4030 
4031 	// allocate areas for preloaded images
4032 	for (image = args->preloaded_images; image != NULL; image = image->next)
4033 		create_preloaded_image_areas(image);
4034 
4035 	// allocate kernel stacks
4036 	for (i = 0; i < args->num_cpus; i++) {
4037 		char name[64];
4038 
4039 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4040 		address = (void*)args->cpu_kstack[i].start;
4041 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4042 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4043 	}
4044 
4045 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4046 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4047 
4048 #if PARANOID_KERNEL_MALLOC
4049 	vm_block_address_range("uninitialized heap memory",
4050 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4051 #endif
4052 #if PARANOID_KERNEL_FREE
4053 	vm_block_address_range("freed heap memory",
4054 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4055 #endif
4056 
4057 	// create the object cache for the page mappings
4058 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4059 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4060 		NULL, NULL);
4061 	if (gPageMappingsObjectCache == NULL)
4062 		panic("failed to create page mappings object cache");
4063 
4064 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4065 
4066 #if DEBUG_CACHE_LIST
4067 	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4068 		virtual_address_restrictions virtualRestrictions = {};
4069 		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4070 		physical_address_restrictions physicalRestrictions = {};
4071 		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4072 			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4073 			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4074 			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4075 			&physicalRestrictions, (void**)&sCacheInfoTable);
4076 	}
4077 #endif	// DEBUG_CACHE_LIST
4078 
4079 	// add some debugger commands
4080 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4081 	add_debugger_command("area", &dump_area,
4082 		"Dump info about a particular area");
4083 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4084 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4085 #if DEBUG_CACHE_LIST
4086 	if (sCacheInfoTable != NULL) {
4087 		add_debugger_command_etc("caches", &dump_caches,
4088 			"List all VMCache trees",
4089 			"[ \"-c\" ]\n"
4090 			"All cache trees are listed sorted in decreasing order by number "
4091 				"of\n"
4092 			"used pages or, if \"-c\" is specified, by size of committed "
4093 				"memory.\n",
4094 			0);
4095 	}
4096 #endif
4097 	add_debugger_command("avail", &dump_available_memory,
4098 		"Dump available memory");
4099 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4100 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4101 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4102 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4103 	add_debugger_command("string", &display_mem, "dump strings");
4104 
4105 	add_debugger_command_etc("mapping", &dump_mapping_info,
4106 		"Print address mapping information",
4107 		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4108 		"Prints low-level page mapping information for a given address. If\n"
4109 		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4110 		"address that is looked up in the translation map of the current\n"
4111 		"team, respectively the team specified by thread ID <thread ID>. If\n"
4112 		"\"-r\" is specified, <address> is a physical address that is\n"
4113 		"searched in the translation map of all teams, respectively the team\n"
4114 		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4115 		"<address> is the address of a vm_page structure. The behavior is\n"
4116 		"equivalent to specifying \"-r\" with the physical address of that\n"
4117 		"page.\n",
4118 		0);
4119 
4120 	TRACE(("vm_init: exit\n"));
4121 
4122 	vm_cache_init_post_heap();
4123 
4124 	return err;
4125 }
4126 
4127 
4128 status_t
4129 vm_init_post_sem(kernel_args* args)
4130 {
4131 	// This frees all unused boot loader resources and makes its space available
4132 	// again
4133 	arch_vm_init_end(args);
4134 	unreserve_boot_loader_ranges(args);
4135 
4136 	// fill in all of the semaphores that were not allocated before
4137 	// since we're still single threaded and only the kernel address space
4138 	// exists, it isn't that hard to find all of the ones we need to create
4139 
4140 	arch_vm_translation_map_init_post_sem(args);
4141 
4142 	slab_init_post_sem();
4143 
4144 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4145 	heap_init_post_sem();
4146 #endif
4147 
4148 	return B_OK;
4149 }
4150 
4151 
4152 status_t
4153 vm_init_post_thread(kernel_args* args)
4154 {
4155 	vm_page_init_post_thread(args);
4156 	slab_init_post_thread();
4157 	return heap_init_post_thread();
4158 }
4159 
4160 
4161 status_t
4162 vm_init_post_modules(kernel_args* args)
4163 {
4164 	return arch_vm_init_post_modules(args);
4165 }
4166 
4167 
4168 void
4169 permit_page_faults(void)
4170 {
4171 	Thread* thread = thread_get_current_thread();
4172 	if (thread != NULL)
4173 		atomic_add(&thread->page_faults_allowed, 1);
4174 }
4175 
4176 
4177 void
4178 forbid_page_faults(void)
4179 {
4180 	Thread* thread = thread_get_current_thread();
4181 	if (thread != NULL)
4182 		atomic_add(&thread->page_faults_allowed, -1);
4183 }
4184 
4185 
4186 status_t
4187 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4188 	bool isUser, addr_t* newIP)
4189 {
4190 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4191 		faultAddress));
4192 
4193 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4194 
4195 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4196 	VMAddressSpace* addressSpace = NULL;
4197 
4198 	status_t status = B_OK;
4199 	*newIP = 0;
4200 	atomic_add((int32*)&sPageFaults, 1);
4201 
4202 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4203 		addressSpace = VMAddressSpace::GetKernel();
4204 	} else if (IS_USER_ADDRESS(pageAddress)) {
4205 		addressSpace = VMAddressSpace::GetCurrent();
4206 		if (addressSpace == NULL) {
4207 			if (!isUser) {
4208 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4209 					"memory!\n");
4210 				status = B_BAD_ADDRESS;
4211 				TPF(PageFaultError(-1,
4212 					VMPageFaultTracing
4213 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4214 			} else {
4215 				// XXX weird state.
4216 				panic("vm_page_fault: non kernel thread accessing user memory "
4217 					"that doesn't exist!\n");
4218 				status = B_BAD_ADDRESS;
4219 			}
4220 		}
4221 	} else {
4222 		// the hit was probably in the 64k DMZ between kernel and user space
4223 		// this keeps a user space thread from passing a buffer that crosses
4224 		// into kernel space
4225 		status = B_BAD_ADDRESS;
4226 		TPF(PageFaultError(-1,
4227 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4228 	}
4229 
4230 	if (status == B_OK) {
4231 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4232 			isUser, NULL);
4233 	}
4234 
4235 	if (status < B_OK) {
4236 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4237 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4238 			strerror(status), address, faultAddress, isWrite, isUser,
4239 			thread_get_current_thread_id());
4240 		if (!isUser) {
4241 			Thread* thread = thread_get_current_thread();
4242 			if (thread != NULL && thread->fault_handler != 0) {
4243 				// this will cause the arch dependant page fault handler to
4244 				// modify the IP on the interrupt frame or whatever to return
4245 				// to this address
4246 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4247 			} else {
4248 				// unhandled page fault in the kernel
4249 				panic("vm_page_fault: unhandled page fault in kernel space at "
4250 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4251 			}
4252 		} else {
4253 #if 1
4254 			// TODO: remove me once we have proper userland debugging support
4255 			// (and tools)
4256 			VMArea* area = NULL;
4257 			if (addressSpace != NULL) {
4258 				addressSpace->ReadLock();
4259 				area = addressSpace->LookupArea(faultAddress);
4260 			}
4261 
4262 			Thread* thread = thread_get_current_thread();
4263 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4264 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4265 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4266 				thread->team->Name(), thread->team->id,
4267 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4268 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4269 					area->Base() : 0x0));
4270 
4271 			// We can print a stack trace of the userland thread here.
4272 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
4273 // fault and someone is already waiting for a write lock on the same address
4274 // space. This thread will then try to acquire the lock again and will
4275 // be queued after the writer.
4276 #	if 0
4277 			if (area) {
4278 				struct stack_frame {
4279 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
4280 						struct stack_frame*	previous;
4281 						void*				return_address;
4282 					#else
4283 						// ...
4284 					#warning writeme
4285 					#endif
4286 				} frame;
4287 #		ifdef __INTEL__
4288 				struct iframe* iframe = x86_get_user_iframe();
4289 				if (iframe == NULL)
4290 					panic("iframe is NULL!");
4291 
4292 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
4293 					sizeof(struct stack_frame));
4294 #		elif defined(__POWERPC__)
4295 				struct iframe* iframe = ppc_get_user_iframe();
4296 				if (iframe == NULL)
4297 					panic("iframe is NULL!");
4298 
4299 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
4300 					sizeof(struct stack_frame));
4301 #		else
4302 #			warning "vm_page_fault() stack trace won't work"
4303 				status = B_ERROR;
4304 #		endif
4305 
4306 				dprintf("stack trace:\n");
4307 				int32 maxFrames = 50;
4308 				while (status == B_OK && --maxFrames >= 0
4309 						&& frame.return_address != NULL) {
4310 					dprintf("  %p", frame.return_address);
4311 					area = addressSpace->LookupArea(
4312 						(addr_t)frame.return_address);
4313 					if (area) {
4314 						dprintf(" (%s + %#lx)", area->name,
4315 							(addr_t)frame.return_address - area->Base());
4316 					}
4317 					dprintf("\n");
4318 
4319 					status = user_memcpy(&frame, frame.previous,
4320 						sizeof(struct stack_frame));
4321 				}
4322 			}
4323 #	endif	// 0 (stack trace)
4324 
4325 			if (addressSpace != NULL)
4326 				addressSpace->ReadUnlock();
4327 #endif
4328 
4329 			// If the thread has a signal handler for SIGSEGV, we simply
4330 			// send it the signal. Otherwise we notify the user debugger
4331 			// first.
4332 			struct sigaction action;
4333 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4334 					&& action.sa_handler != SIG_DFL
4335 					&& action.sa_handler != SIG_IGN)
4336 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4337 					SIGSEGV)) {
4338 				Signal signal(SIGSEGV,
4339 					status == B_PERMISSION_DENIED
4340 						? SEGV_ACCERR : SEGV_MAPERR,
4341 					EFAULT, thread->team->id);
4342 				signal.SetAddress((void*)address);
4343 				send_signal_to_thread(thread, signal, 0);
4344 			}
4345 		}
4346 	}
4347 
4348 	if (addressSpace != NULL)
4349 		addressSpace->Put();
4350 
4351 	return B_HANDLED_INTERRUPT;
4352 }
4353 
4354 
4355 struct PageFaultContext {
4356 	AddressSpaceReadLocker	addressSpaceLocker;
4357 	VMCacheChainLocker		cacheChainLocker;
4358 
4359 	VMTranslationMap*		map;
4360 	VMCache*				topCache;
4361 	off_t					cacheOffset;
4362 	vm_page_reservation		reservation;
4363 	bool					isWrite;
4364 
4365 	// return values
4366 	vm_page*				page;
4367 	bool					restart;
4368 	bool					pageAllocated;
4369 
4370 
4371 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4372 		:
4373 		addressSpaceLocker(addressSpace, true),
4374 		map(addressSpace->TranslationMap()),
4375 		isWrite(isWrite)
4376 	{
4377 	}
4378 
4379 	~PageFaultContext()
4380 	{
4381 		UnlockAll();
4382 		vm_page_unreserve_pages(&reservation);
4383 	}
4384 
4385 	void Prepare(VMCache* topCache, off_t cacheOffset)
4386 	{
4387 		this->topCache = topCache;
4388 		this->cacheOffset = cacheOffset;
4389 		page = NULL;
4390 		restart = false;
4391 		pageAllocated = false;
4392 
4393 		cacheChainLocker.SetTo(topCache);
4394 	}
4395 
4396 	void UnlockAll(VMCache* exceptCache = NULL)
4397 	{
4398 		topCache = NULL;
4399 		addressSpaceLocker.Unlock();
4400 		cacheChainLocker.Unlock(exceptCache);
4401 	}
4402 };
4403 
4404 
4405 /*!	Gets the page that should be mapped into the area.
4406 	Returns an error code other than \c B_OK, if the page couldn't be found or
4407 	paged in. The locking state of the address space and the caches is undefined
4408 	in that case.
4409 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4410 	had to unlock the address space and all caches and is supposed to be called
4411 	again.
4412 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4413 	found. It is returned in \c context.page. The address space will still be
4414 	locked as well as all caches starting from the top cache to at least the
4415 	cache the page lives in.
4416 */
4417 static status_t
4418 fault_get_page(PageFaultContext& context)
4419 {
4420 	VMCache* cache = context.topCache;
4421 	VMCache* lastCache = NULL;
4422 	vm_page* page = NULL;
4423 
4424 	while (cache != NULL) {
4425 		// We already hold the lock of the cache at this point.
4426 
4427 		lastCache = cache;
4428 
4429 		page = cache->LookupPage(context.cacheOffset);
4430 		if (page != NULL && page->busy) {
4431 			// page must be busy -- wait for it to become unbusy
4432 			context.UnlockAll(cache);
4433 			cache->ReleaseRefLocked();
4434 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4435 
4436 			// restart the whole process
4437 			context.restart = true;
4438 			return B_OK;
4439 		}
4440 
4441 		if (page != NULL)
4442 			break;
4443 
4444 		// The current cache does not contain the page we're looking for.
4445 
4446 		// see if the backing store has it
4447 		if (cache->HasPage(context.cacheOffset)) {
4448 			// insert a fresh page and mark it busy -- we're going to read it in
4449 			page = vm_page_allocate_page(&context.reservation,
4450 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4451 			cache->InsertPage(page, context.cacheOffset);
4452 
4453 			// We need to unlock all caches and the address space while reading
4454 			// the page in. Keep a reference to the cache around.
4455 			cache->AcquireRefLocked();
4456 			context.UnlockAll();
4457 
4458 			// read the page in
4459 			generic_io_vec vec;
4460 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4461 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4462 
4463 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4464 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4465 
4466 			cache->Lock();
4467 
4468 			if (status < B_OK) {
4469 				// on error remove and free the page
4470 				dprintf("reading page from cache %p returned: %s!\n",
4471 					cache, strerror(status));
4472 
4473 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4474 				cache->RemovePage(page);
4475 				vm_page_set_state(page, PAGE_STATE_FREE);
4476 
4477 				cache->ReleaseRefAndUnlock();
4478 				return status;
4479 			}
4480 
4481 			// mark the page unbusy again
4482 			cache->MarkPageUnbusy(page);
4483 
4484 			DEBUG_PAGE_ACCESS_END(page);
4485 
4486 			// Since we needed to unlock everything temporarily, the area
4487 			// situation might have changed. So we need to restart the whole
4488 			// process.
4489 			cache->ReleaseRefAndUnlock();
4490 			context.restart = true;
4491 			return B_OK;
4492 		}
4493 
4494 		cache = context.cacheChainLocker.LockSourceCache();
4495 	}
4496 
4497 	if (page == NULL) {
4498 		// There was no adequate page, determine the cache for a clean one.
4499 		// Read-only pages come in the deepest cache, only the top most cache
4500 		// may have direct write access.
4501 		cache = context.isWrite ? context.topCache : lastCache;
4502 
4503 		// allocate a clean page
4504 		page = vm_page_allocate_page(&context.reservation,
4505 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4506 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4507 			page->physical_page_number));
4508 
4509 		// insert the new page into our cache
4510 		cache->InsertPage(page, context.cacheOffset);
4511 		context.pageAllocated = true;
4512 	} else if (page->Cache() != context.topCache && context.isWrite) {
4513 		// We have a page that has the data we want, but in the wrong cache
4514 		// object so we need to copy it and stick it into the top cache.
4515 		vm_page* sourcePage = page;
4516 
4517 		// TODO: If memory is low, it might be a good idea to steal the page
4518 		// from our source cache -- if possible, that is.
4519 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4520 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4521 
4522 		// To not needlessly kill concurrency we unlock all caches but the top
4523 		// one while copying the page. Lacking another mechanism to ensure that
4524 		// the source page doesn't disappear, we mark it busy.
4525 		sourcePage->busy = true;
4526 		context.cacheChainLocker.UnlockKeepRefs(true);
4527 
4528 		// copy the page
4529 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4530 			sourcePage->physical_page_number * B_PAGE_SIZE);
4531 
4532 		context.cacheChainLocker.RelockCaches(true);
4533 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4534 
4535 		// insert the new page into our cache
4536 		context.topCache->InsertPage(page, context.cacheOffset);
4537 		context.pageAllocated = true;
4538 	} else
4539 		DEBUG_PAGE_ACCESS_START(page);
4540 
4541 	context.page = page;
4542 	return B_OK;
4543 }
4544 
4545 
4546 /*!	Makes sure the address in the given address space is mapped.
4547 
4548 	\param addressSpace The address space.
4549 	\param originalAddress The address. Doesn't need to be page aligned.
4550 	\param isWrite If \c true the address shall be write-accessible.
4551 	\param isUser If \c true the access is requested by a userland team.
4552 	\param wirePage On success, if non \c NULL, the wired count of the page
4553 		mapped at the given address is incremented and the page is returned
4554 		via this parameter.
4555 	\return \c B_OK on success, another error code otherwise.
4556 */
4557 static status_t
4558 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4559 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4560 {
4561 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4562 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4563 		originalAddress, isWrite, isUser));
4564 
4565 	PageFaultContext context(addressSpace, isWrite);
4566 
4567 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4568 	status_t status = B_OK;
4569 
4570 	addressSpace->IncrementFaultCount();
4571 
4572 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4573 	// the pages upfront makes sure we don't have any cache locked, so that the
4574 	// page daemon/thief can do their job without problems.
4575 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4576 		originalAddress);
4577 	context.addressSpaceLocker.Unlock();
4578 	vm_page_reserve_pages(&context.reservation, reservePages,
4579 		addressSpace == VMAddressSpace::Kernel()
4580 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4581 
4582 	while (true) {
4583 		context.addressSpaceLocker.Lock();
4584 
4585 		// get the area the fault was in
4586 		VMArea* area = addressSpace->LookupArea(address);
4587 		if (area == NULL) {
4588 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4589 				"space\n", originalAddress);
4590 			TPF(PageFaultError(-1,
4591 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4592 			status = B_BAD_ADDRESS;
4593 			break;
4594 		}
4595 
4596 		// check permissions
4597 		uint32 protection = get_area_page_protection(area, address);
4598 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4599 			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4600 				area->id, (void*)originalAddress);
4601 			TPF(PageFaultError(area->id,
4602 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4603 			status = B_PERMISSION_DENIED;
4604 			break;
4605 		}
4606 		if (isWrite && (protection
4607 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4608 			dprintf("write access attempted on write-protected area 0x%"
4609 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4610 			TPF(PageFaultError(area->id,
4611 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4612 			status = B_PERMISSION_DENIED;
4613 			break;
4614 		} else if (isExecute && (protection
4615 				& (B_EXECUTE_AREA
4616 					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4617 			dprintf("instruction fetch attempted on execute-protected area 0x%"
4618 				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4619 			TPF(PageFaultError(area->id,
4620 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4621 			status = B_PERMISSION_DENIED;
4622 			break;
4623 		} else if (!isWrite && !isExecute && (protection
4624 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4625 			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4626 				" at %p\n", area->id, (void*)originalAddress);
4627 			TPF(PageFaultError(area->id,
4628 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4629 			status = B_PERMISSION_DENIED;
4630 			break;
4631 		}
4632 
4633 		// We have the area, it was a valid access, so let's try to resolve the
4634 		// page fault now.
4635 		// At first, the top most cache from the area is investigated.
4636 
4637 		context.Prepare(vm_area_get_locked_cache(area),
4638 			address - area->Base() + area->cache_offset);
4639 
4640 		// See if this cache has a fault handler -- this will do all the work
4641 		// for us.
4642 		{
4643 			// Note, since the page fault is resolved with interrupts enabled,
4644 			// the fault handler could be called more than once for the same
4645 			// reason -- the store must take this into account.
4646 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4647 			if (status != B_BAD_HANDLER)
4648 				break;
4649 		}
4650 
4651 		// The top most cache has no fault handler, so let's see if the cache or
4652 		// its sources already have the page we're searching for (we're going
4653 		// from top to bottom).
4654 		status = fault_get_page(context);
4655 		if (status != B_OK) {
4656 			TPF(PageFaultError(area->id, status));
4657 			break;
4658 		}
4659 
4660 		if (context.restart)
4661 			continue;
4662 
4663 		// All went fine, all there is left to do is to map the page into the
4664 		// address space.
4665 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4666 			context.page));
4667 
4668 		// If the page doesn't reside in the area's cache, we need to make sure
4669 		// it's mapped in read-only, so that we cannot overwrite someone else's
4670 		// data (copy-on-write)
4671 		uint32 newProtection = protection;
4672 		if (context.page->Cache() != context.topCache && !isWrite)
4673 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4674 
4675 		bool unmapPage = false;
4676 		bool mapPage = true;
4677 
4678 		// check whether there's already a page mapped at the address
4679 		context.map->Lock();
4680 
4681 		phys_addr_t physicalAddress;
4682 		uint32 flags;
4683 		vm_page* mappedPage = NULL;
4684 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4685 			&& (flags & PAGE_PRESENT) != 0
4686 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4687 				!= NULL) {
4688 			// Yep there's already a page. If it's ours, we can simply adjust
4689 			// its protection. Otherwise we have to unmap it.
4690 			if (mappedPage == context.page) {
4691 				context.map->ProtectPage(area, address, newProtection);
4692 					// Note: We assume that ProtectPage() is atomic (i.e.
4693 					// the page isn't temporarily unmapped), otherwise we'd have
4694 					// to make sure it isn't wired.
4695 				mapPage = false;
4696 			} else
4697 				unmapPage = true;
4698 		}
4699 
4700 		context.map->Unlock();
4701 
4702 		if (unmapPage) {
4703 			// If the page is wired, we can't unmap it. Wait until it is unwired
4704 			// again and restart. Note that the page cannot be wired for
4705 			// writing, since it it isn't in the topmost cache. So we can safely
4706 			// ignore ranges wired for writing (our own and other concurrent
4707 			// wiring attempts in progress) and in fact have to do that to avoid
4708 			// a deadlock.
4709 			VMAreaUnwiredWaiter waiter;
4710 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4711 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4712 				// unlock everything and wait
4713 				if (context.pageAllocated) {
4714 					// ... but since we allocated a page and inserted it into
4715 					// the top cache, remove and free it first. Otherwise we'd
4716 					// have a page from a lower cache mapped while an upper
4717 					// cache has a page that would shadow it.
4718 					context.topCache->RemovePage(context.page);
4719 					vm_page_free_etc(context.topCache, context.page,
4720 						&context.reservation);
4721 				} else
4722 					DEBUG_PAGE_ACCESS_END(context.page);
4723 
4724 				context.UnlockAll();
4725 				waiter.waitEntry.Wait();
4726 				continue;
4727 			}
4728 
4729 			// Note: The mapped page is a page of a lower cache. We are
4730 			// guaranteed to have that cached locked, our new page is a copy of
4731 			// that page, and the page is not busy. The logic for that guarantee
4732 			// is as follows: Since the page is mapped, it must live in the top
4733 			// cache (ruled out above) or any of its lower caches, and there is
4734 			// (was before the new page was inserted) no other page in any
4735 			// cache between the top cache and the page's cache (otherwise that
4736 			// would be mapped instead). That in turn means that our algorithm
4737 			// must have found it and therefore it cannot be busy either.
4738 			DEBUG_PAGE_ACCESS_START(mappedPage);
4739 			unmap_page(area, address);
4740 			DEBUG_PAGE_ACCESS_END(mappedPage);
4741 		}
4742 
4743 		if (mapPage) {
4744 			if (map_page(area, context.page, address, newProtection,
4745 					&context.reservation) != B_OK) {
4746 				// Mapping can only fail, when the page mapping object couldn't
4747 				// be allocated. Save for the missing mapping everything is
4748 				// fine, though. If this was a regular page fault, we'll simply
4749 				// leave and probably fault again. To make sure we'll have more
4750 				// luck then, we ensure that the minimum object reserve is
4751 				// available.
4752 				DEBUG_PAGE_ACCESS_END(context.page);
4753 
4754 				context.UnlockAll();
4755 
4756 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4757 						!= B_OK) {
4758 					// Apparently the situation is serious. Let's get ourselves
4759 					// killed.
4760 					status = B_NO_MEMORY;
4761 				} else if (wirePage != NULL) {
4762 					// The caller expects us to wire the page. Since
4763 					// object_cache_reserve() succeeded, we should now be able
4764 					// to allocate a mapping structure. Restart.
4765 					continue;
4766 				}
4767 
4768 				break;
4769 			}
4770 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4771 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4772 
4773 		// also wire the page, if requested
4774 		if (wirePage != NULL && status == B_OK) {
4775 			increment_page_wired_count(context.page);
4776 			*wirePage = context.page;
4777 		}
4778 
4779 		DEBUG_PAGE_ACCESS_END(context.page);
4780 
4781 		break;
4782 	}
4783 
4784 	return status;
4785 }
4786 
4787 
4788 status_t
4789 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4790 {
4791 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4792 }
4793 
4794 status_t
4795 vm_put_physical_page(addr_t vaddr, void* handle)
4796 {
4797 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4798 }
4799 
4800 
4801 status_t
4802 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4803 	void** _handle)
4804 {
4805 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4806 }
4807 
4808 status_t
4809 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4810 {
4811 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4812 }
4813 
4814 
4815 status_t
4816 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4817 {
4818 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4819 }
4820 
4821 status_t
4822 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4823 {
4824 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4825 }
4826 
4827 
4828 void
4829 vm_get_info(system_info* info)
4830 {
4831 	swap_get_info(info);
4832 
4833 	MutexLocker locker(sAvailableMemoryLock);
4834 	info->needed_memory = sNeededMemory;
4835 	info->free_memory = sAvailableMemory;
4836 }
4837 
4838 
4839 uint32
4840 vm_num_page_faults(void)
4841 {
4842 	return sPageFaults;
4843 }
4844 
4845 
4846 off_t
4847 vm_available_memory(void)
4848 {
4849 	MutexLocker locker(sAvailableMemoryLock);
4850 	return sAvailableMemory;
4851 }
4852 
4853 
4854 off_t
4855 vm_available_not_needed_memory(void)
4856 {
4857 	MutexLocker locker(sAvailableMemoryLock);
4858 	return sAvailableMemory - sNeededMemory;
4859 }
4860 
4861 
4862 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4863 	debugger.
4864 */
4865 off_t
4866 vm_available_not_needed_memory_debug(void)
4867 {
4868 	return sAvailableMemory - sNeededMemory;
4869 }
4870 
4871 
4872 size_t
4873 vm_kernel_address_space_left(void)
4874 {
4875 	return VMAddressSpace::Kernel()->FreeSpace();
4876 }
4877 
4878 
4879 void
4880 vm_unreserve_memory(size_t amount)
4881 {
4882 	mutex_lock(&sAvailableMemoryLock);
4883 
4884 	sAvailableMemory += amount;
4885 
4886 	mutex_unlock(&sAvailableMemoryLock);
4887 }
4888 
4889 
4890 status_t
4891 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4892 {
4893 	size_t reserve = kMemoryReserveForPriority[priority];
4894 
4895 	MutexLocker locker(sAvailableMemoryLock);
4896 
4897 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4898 
4899 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4900 		sAvailableMemory -= amount;
4901 		return B_OK;
4902 	}
4903 
4904 	if (timeout <= 0)
4905 		return B_NO_MEMORY;
4906 
4907 	// turn timeout into an absolute timeout
4908 	timeout += system_time();
4909 
4910 	// loop until we've got the memory or the timeout occurs
4911 	do {
4912 		sNeededMemory += amount;
4913 
4914 		// call the low resource manager
4915 		locker.Unlock();
4916 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4917 			B_ABSOLUTE_TIMEOUT, timeout);
4918 		locker.Lock();
4919 
4920 		sNeededMemory -= amount;
4921 
4922 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4923 			sAvailableMemory -= amount;
4924 			return B_OK;
4925 		}
4926 	} while (timeout > system_time());
4927 
4928 	return B_NO_MEMORY;
4929 }
4930 
4931 
4932 status_t
4933 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4934 {
4935 	// NOTE: The caller is responsible for synchronizing calls to this function!
4936 
4937 	AddressSpaceReadLocker locker;
4938 	VMArea* area;
4939 	status_t status = locker.SetFromArea(id, area);
4940 	if (status != B_OK)
4941 		return status;
4942 
4943 	// nothing to do, if the type doesn't change
4944 	uint32 oldType = area->MemoryType();
4945 	if (type == oldType)
4946 		return B_OK;
4947 
4948 	// set the memory type of the area and the mapped pages
4949 	VMTranslationMap* map = area->address_space->TranslationMap();
4950 	map->Lock();
4951 	area->SetMemoryType(type);
4952 	map->ProtectArea(area, area->protection);
4953 	map->Unlock();
4954 
4955 	// set the physical memory type
4956 	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4957 	if (error != B_OK) {
4958 		// reset the memory type of the area and the mapped pages
4959 		map->Lock();
4960 		area->SetMemoryType(oldType);
4961 		map->ProtectArea(area, area->protection);
4962 		map->Unlock();
4963 		return error;
4964 	}
4965 
4966 	return B_OK;
4967 
4968 }
4969 
4970 
4971 /*!	This function enforces some protection properties:
4972 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4973 	 - if B_EXECUTE_AREA is set, B_KERNEL_EXECUTE_AREA is set as well
4974 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4975 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4976 	   and B_KERNEL_WRITE_AREA.
4977 */
4978 static void
4979 fix_protection(uint32* protection)
4980 {
4981 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4982 		if ((*protection & B_USER_PROTECTION) == 0
4983 			|| (*protection & B_WRITE_AREA) != 0)
4984 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4985 		else
4986 			*protection |= B_KERNEL_READ_AREA;
4987 		if ((*protection & B_EXECUTE_AREA) != 0)
4988 			*protection |= B_KERNEL_EXECUTE_AREA;
4989 	}
4990 }
4991 
4992 
4993 static void
4994 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4995 {
4996 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4997 	info->area = area->id;
4998 	info->address = (void*)area->Base();
4999 	info->size = area->Size();
5000 	info->protection = area->protection;
5001 	info->lock = B_FULL_LOCK;
5002 	info->team = area->address_space->ID();
5003 	info->copy_count = 0;
5004 	info->in_count = 0;
5005 	info->out_count = 0;
5006 		// TODO: retrieve real values here!
5007 
5008 	VMCache* cache = vm_area_get_locked_cache(area);
5009 
5010 	// Note, this is a simplification; the cache could be larger than this area
5011 	info->ram_size = cache->page_count * B_PAGE_SIZE;
5012 
5013 	vm_area_put_locked_cache(cache);
5014 }
5015 
5016 
5017 static status_t
5018 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
5019 {
5020 	// is newSize a multiple of B_PAGE_SIZE?
5021 	if (newSize & (B_PAGE_SIZE - 1))
5022 		return B_BAD_VALUE;
5023 
5024 	// lock all affected address spaces and the cache
5025 	VMArea* area;
5026 	VMCache* cache;
5027 
5028 	MultiAddressSpaceLocker locker;
5029 	AreaCacheLocker cacheLocker;
5030 
5031 	status_t status;
5032 	size_t oldSize;
5033 	bool anyKernelArea;
5034 	bool restart;
5035 
5036 	do {
5037 		anyKernelArea = false;
5038 		restart = false;
5039 
5040 		locker.Unset();
5041 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
5042 		if (status != B_OK)
5043 			return status;
5044 		cacheLocker.SetTo(cache, true);	// already locked
5045 
5046 		// enforce restrictions
5047 		if (!kernel) {
5048 			if ((area->protection & B_KERNEL_AREA) != 0)
5049 				return B_NOT_ALLOWED;
5050 			// TODO: Enforce all restrictions (team, etc.)!
5051 		}
5052 
5053 		oldSize = area->Size();
5054 		if (newSize == oldSize)
5055 			return B_OK;
5056 
5057 		if (cache->type != CACHE_TYPE_RAM)
5058 			return B_NOT_ALLOWED;
5059 
5060 		if (oldSize < newSize) {
5061 			// We need to check if all areas of this cache can be resized.
5062 			for (VMArea* current = cache->areas; current != NULL;
5063 					current = current->cache_next) {
5064 				if (!current->address_space->CanResizeArea(current, newSize))
5065 					return B_ERROR;
5066 				anyKernelArea
5067 					|= current->address_space == VMAddressSpace::Kernel();
5068 			}
5069 		} else {
5070 			// We're shrinking the areas, so we must make sure the affected
5071 			// ranges are not wired.
5072 			for (VMArea* current = cache->areas; current != NULL;
5073 					current = current->cache_next) {
5074 				anyKernelArea
5075 					|= current->address_space == VMAddressSpace::Kernel();
5076 
5077 				if (wait_if_area_range_is_wired(current,
5078 						current->Base() + newSize, oldSize - newSize, &locker,
5079 						&cacheLocker)) {
5080 					restart = true;
5081 					break;
5082 				}
5083 			}
5084 		}
5085 	} while (restart);
5086 
5087 	// Okay, looks good so far, so let's do it
5088 
5089 	int priority = kernel && anyKernelArea
5090 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5091 	uint32 allocationFlags = kernel && anyKernelArea
5092 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5093 
5094 	if (oldSize < newSize) {
5095 		// Growing the cache can fail, so we do it first.
5096 		status = cache->Resize(cache->virtual_base + newSize, priority);
5097 		if (status != B_OK)
5098 			return status;
5099 	}
5100 
5101 	for (VMArea* current = cache->areas; current != NULL;
5102 			current = current->cache_next) {
5103 		status = current->address_space->ResizeArea(current, newSize,
5104 			allocationFlags);
5105 		if (status != B_OK)
5106 			break;
5107 
5108 		// We also need to unmap all pages beyond the new size, if the area has
5109 		// shrunk
5110 		if (newSize < oldSize) {
5111 			VMCacheChainLocker cacheChainLocker(cache);
5112 			cacheChainLocker.LockAllSourceCaches();
5113 
5114 			unmap_pages(current, current->Base() + newSize,
5115 				oldSize - newSize);
5116 
5117 			cacheChainLocker.Unlock(cache);
5118 		}
5119 	}
5120 
5121 	if (status == B_OK) {
5122 		// Shrink or grow individual page protections if in use.
5123 		if (area->page_protections != NULL) {
5124 			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5125 			uint8* newProtections
5126 				= (uint8*)realloc(area->page_protections, bytes);
5127 			if (newProtections == NULL)
5128 				status = B_NO_MEMORY;
5129 			else {
5130 				area->page_protections = newProtections;
5131 
5132 				if (oldSize < newSize) {
5133 					// init the additional page protections to that of the area
5134 					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5135 					uint32 areaProtection = area->protection
5136 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5137 					memset(area->page_protections + offset,
5138 						areaProtection | (areaProtection << 4), bytes - offset);
5139 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5140 						uint8& entry = area->page_protections[offset - 1];
5141 						entry = (entry & 0x0f) | (areaProtection << 4);
5142 					}
5143 				}
5144 			}
5145 		}
5146 	}
5147 
5148 	// shrinking the cache can't fail, so we do it now
5149 	if (status == B_OK && newSize < oldSize)
5150 		status = cache->Resize(cache->virtual_base + newSize, priority);
5151 
5152 	if (status != B_OK) {
5153 		// Something failed -- resize the areas back to their original size.
5154 		// This can fail, too, in which case we're seriously screwed.
5155 		for (VMArea* current = cache->areas; current != NULL;
5156 				current = current->cache_next) {
5157 			if (current->address_space->ResizeArea(current, oldSize,
5158 					allocationFlags) != B_OK) {
5159 				panic("vm_resize_area(): Failed and not being able to restore "
5160 					"original state.");
5161 			}
5162 		}
5163 
5164 		cache->Resize(cache->virtual_base + oldSize, priority);
5165 	}
5166 
5167 	// TODO: we must honour the lock restrictions of this area
5168 	return status;
5169 }
5170 
5171 
5172 status_t
5173 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5174 {
5175 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5176 }
5177 
5178 
5179 status_t
5180 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5181 {
5182 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5183 }
5184 
5185 
5186 status_t
5187 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5188 	bool user)
5189 {
5190 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5191 }
5192 
5193 
5194 void
5195 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5196 {
5197 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5198 }
5199 
5200 
5201 /*!	Copies a range of memory directly from/to a page that might not be mapped
5202 	at the moment.
5203 
5204 	For \a unsafeMemory the current mapping (if any is ignored). The function
5205 	walks through the respective area's cache chain to find the physical page
5206 	and copies from/to it directly.
5207 	The memory range starting at \a unsafeMemory with a length of \a size bytes
5208 	must not cross a page boundary.
5209 
5210 	\param teamID The team ID identifying the address space \a unsafeMemory is
5211 		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5212 		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5213 		is passed, the address space of the thread returned by
5214 		debug_get_debugged_thread() is used.
5215 	\param unsafeMemory The start of the unsafe memory range to be copied
5216 		from/to.
5217 	\param buffer A safely accessible kernel buffer to be copied from/to.
5218 	\param size The number of bytes to be copied.
5219 	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5220 		\a unsafeMemory, the other way around otherwise.
5221 */
5222 status_t
5223 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5224 	size_t size, bool copyToUnsafe)
5225 {
5226 	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5227 			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5228 		return B_BAD_VALUE;
5229 	}
5230 
5231 	// get the address space for the debugged thread
5232 	VMAddressSpace* addressSpace;
5233 	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5234 		addressSpace = VMAddressSpace::Kernel();
5235 	} else if (teamID == B_CURRENT_TEAM) {
5236 		Thread* thread = debug_get_debugged_thread();
5237 		if (thread == NULL || thread->team == NULL)
5238 			return B_BAD_ADDRESS;
5239 
5240 		addressSpace = thread->team->address_space;
5241 	} else
5242 		addressSpace = VMAddressSpace::DebugGet(teamID);
5243 
5244 	if (addressSpace == NULL)
5245 		return B_BAD_ADDRESS;
5246 
5247 	// get the area
5248 	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5249 	if (area == NULL)
5250 		return B_BAD_ADDRESS;
5251 
5252 	// search the page
5253 	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5254 		+ area->cache_offset;
5255 	VMCache* cache = area->cache;
5256 	vm_page* page = NULL;
5257 	while (cache != NULL) {
5258 		page = cache->DebugLookupPage(cacheOffset);
5259 		if (page != NULL)
5260 			break;
5261 
5262 		// Page not found in this cache -- if it is paged out, we must not try
5263 		// to get it from lower caches.
5264 		if (cache->DebugHasPage(cacheOffset))
5265 			break;
5266 
5267 		cache = cache->source;
5268 	}
5269 
5270 	if (page == NULL)
5271 		return B_UNSUPPORTED;
5272 
5273 	// copy from/to physical memory
5274 	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5275 		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5276 
5277 	if (copyToUnsafe) {
5278 		if (page->Cache() != area->cache)
5279 			return B_UNSUPPORTED;
5280 
5281 		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5282 	}
5283 
5284 	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5285 }
5286 
5287 
5288 //	#pragma mark - kernel public API
5289 
5290 
5291 status_t
5292 user_memcpy(void* to, const void* from, size_t size)
5293 {
5294 	// don't allow address overflows
5295 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5296 		return B_BAD_ADDRESS;
5297 
5298 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5299 		return B_BAD_ADDRESS;
5300 
5301 	return B_OK;
5302 }
5303 
5304 
5305 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5306 	the string in \a to, NULL-terminating the result.
5307 
5308 	\param to Pointer to the destination C-string.
5309 	\param from Pointer to the source C-string.
5310 	\param size Size in bytes of the string buffer pointed to by \a to.
5311 
5312 	\return strlen(\a from).
5313 */
5314 ssize_t
5315 user_strlcpy(char* to, const char* from, size_t size)
5316 {
5317 	if (to == NULL && size != 0)
5318 		return B_BAD_VALUE;
5319 	if (from == NULL)
5320 		return B_BAD_ADDRESS;
5321 
5322 	// limit size to avoid address overflows
5323 	size_t maxSize = std::min((addr_t)size,
5324 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5325 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5326 		// the source address might still overflow.
5327 
5328 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5329 
5330 	// If we hit the address overflow boundary, fail.
5331 	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5332 			&& maxSize < size)) {
5333 		return B_BAD_ADDRESS;
5334 	}
5335 
5336 	return result;
5337 }
5338 
5339 
5340 status_t
5341 user_memset(void* s, char c, size_t count)
5342 {
5343 	// don't allow address overflows
5344 	if ((addr_t)s + count < (addr_t)s)
5345 		return B_BAD_ADDRESS;
5346 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5347 		return B_BAD_ADDRESS;
5348 
5349 	return B_OK;
5350 }
5351 
5352 
5353 /*!	Wires a single page at the given address.
5354 
5355 	\param team The team whose address space the address belongs to. Supports
5356 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5357 		parameter is ignored.
5358 	\param address address The virtual address to wire down. Does not need to
5359 		be page aligned.
5360 	\param writable If \c true the page shall be writable.
5361 	\param info On success the info is filled in, among other things
5362 		containing the physical address the given virtual one translates to.
5363 	\return \c B_OK, when the page could be wired, another error code otherwise.
5364 */
5365 status_t
5366 vm_wire_page(team_id team, addr_t address, bool writable,
5367 	VMPageWiringInfo* info)
5368 {
5369 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5370 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5371 
5372 	// compute the page protection that is required
5373 	bool isUser = IS_USER_ADDRESS(address);
5374 	uint32 requiredProtection = PAGE_PRESENT
5375 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5376 	if (writable)
5377 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5378 
5379 	// get and read lock the address space
5380 	VMAddressSpace* addressSpace = NULL;
5381 	if (isUser) {
5382 		if (team == B_CURRENT_TEAM)
5383 			addressSpace = VMAddressSpace::GetCurrent();
5384 		else
5385 			addressSpace = VMAddressSpace::Get(team);
5386 	} else
5387 		addressSpace = VMAddressSpace::GetKernel();
5388 	if (addressSpace == NULL)
5389 		return B_ERROR;
5390 
5391 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5392 
5393 	VMTranslationMap* map = addressSpace->TranslationMap();
5394 	status_t error = B_OK;
5395 
5396 	// get the area
5397 	VMArea* area = addressSpace->LookupArea(pageAddress);
5398 	if (area == NULL) {
5399 		addressSpace->Put();
5400 		return B_BAD_ADDRESS;
5401 	}
5402 
5403 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5404 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5405 
5406 	// mark the area range wired
5407 	area->Wire(&info->range);
5408 
5409 	// Lock the area's cache chain and the translation map. Needed to look
5410 	// up the page and play with its wired count.
5411 	cacheChainLocker.LockAllSourceCaches();
5412 	map->Lock();
5413 
5414 	phys_addr_t physicalAddress;
5415 	uint32 flags;
5416 	vm_page* page;
5417 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5418 		&& (flags & requiredProtection) == requiredProtection
5419 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5420 			!= NULL) {
5421 		// Already mapped with the correct permissions -- just increment
5422 		// the page's wired count.
5423 		increment_page_wired_count(page);
5424 
5425 		map->Unlock();
5426 		cacheChainLocker.Unlock();
5427 		addressSpaceLocker.Unlock();
5428 	} else {
5429 		// Let vm_soft_fault() map the page for us, if possible. We need
5430 		// to fully unlock to avoid deadlocks. Since we have already
5431 		// wired the area itself, nothing disturbing will happen with it
5432 		// in the meantime.
5433 		map->Unlock();
5434 		cacheChainLocker.Unlock();
5435 		addressSpaceLocker.Unlock();
5436 
5437 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5438 			isUser, &page);
5439 
5440 		if (error != B_OK) {
5441 			// The page could not be mapped -- clean up.
5442 			VMCache* cache = vm_area_get_locked_cache(area);
5443 			area->Unwire(&info->range);
5444 			cache->ReleaseRefAndUnlock();
5445 			addressSpace->Put();
5446 			return error;
5447 		}
5448 	}
5449 
5450 	info->physicalAddress
5451 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5452 			+ address % B_PAGE_SIZE;
5453 	info->page = page;
5454 
5455 	return B_OK;
5456 }
5457 
5458 
5459 /*!	Unwires a single page previously wired via vm_wire_page().
5460 
5461 	\param info The same object passed to vm_wire_page() before.
5462 */
5463 void
5464 vm_unwire_page(VMPageWiringInfo* info)
5465 {
5466 	// lock the address space
5467 	VMArea* area = info->range.area;
5468 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5469 		// takes over our reference
5470 
5471 	// lock the top cache
5472 	VMCache* cache = vm_area_get_locked_cache(area);
5473 	VMCacheChainLocker cacheChainLocker(cache);
5474 
5475 	if (info->page->Cache() != cache) {
5476 		// The page is not in the top cache, so we lock the whole cache chain
5477 		// before touching the page's wired count.
5478 		cacheChainLocker.LockAllSourceCaches();
5479 	}
5480 
5481 	decrement_page_wired_count(info->page);
5482 
5483 	// remove the wired range from the range
5484 	area->Unwire(&info->range);
5485 
5486 	cacheChainLocker.Unlock();
5487 }
5488 
5489 
5490 /*!	Wires down the given address range in the specified team's address space.
5491 
5492 	If successful the function
5493 	- acquires a reference to the specified team's address space,
5494 	- adds respective wired ranges to all areas that intersect with the given
5495 	  address range,
5496 	- makes sure all pages in the given address range are mapped with the
5497 	  requested access permissions and increments their wired count.
5498 
5499 	It fails, when \a team doesn't specify a valid address space, when any part
5500 	of the specified address range is not covered by areas, when the concerned
5501 	areas don't allow mapping with the requested permissions, or when mapping
5502 	failed for another reason.
5503 
5504 	When successful the call must be balanced by a unlock_memory_etc() call with
5505 	the exact same parameters.
5506 
5507 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5508 		supported.
5509 	\param address The start of the address range to be wired.
5510 	\param numBytes The size of the address range to be wired.
5511 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5512 		requests that the range must be wired writable ("read from device
5513 		into memory").
5514 	\return \c B_OK on success, another error code otherwise.
5515 */
5516 status_t
5517 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5518 {
5519 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5520 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5521 
5522 	// compute the page protection that is required
5523 	bool isUser = IS_USER_ADDRESS(address);
5524 	bool writable = (flags & B_READ_DEVICE) == 0;
5525 	uint32 requiredProtection = PAGE_PRESENT
5526 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5527 	if (writable)
5528 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5529 
5530 	uint32 mallocFlags = isUser
5531 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5532 
5533 	// get and read lock the address space
5534 	VMAddressSpace* addressSpace = NULL;
5535 	if (isUser) {
5536 		if (team == B_CURRENT_TEAM)
5537 			addressSpace = VMAddressSpace::GetCurrent();
5538 		else
5539 			addressSpace = VMAddressSpace::Get(team);
5540 	} else
5541 		addressSpace = VMAddressSpace::GetKernel();
5542 	if (addressSpace == NULL)
5543 		return B_ERROR;
5544 
5545 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5546 		// We get a new address space reference here. The one we got above will
5547 		// be freed by unlock_memory_etc().
5548 
5549 	VMTranslationMap* map = addressSpace->TranslationMap();
5550 	status_t error = B_OK;
5551 
5552 	// iterate through all concerned areas
5553 	addr_t nextAddress = lockBaseAddress;
5554 	while (nextAddress != lockEndAddress) {
5555 		// get the next area
5556 		VMArea* area = addressSpace->LookupArea(nextAddress);
5557 		if (area == NULL) {
5558 			error = B_BAD_ADDRESS;
5559 			break;
5560 		}
5561 
5562 		addr_t areaStart = nextAddress;
5563 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5564 
5565 		// allocate the wired range (do that before locking the cache to avoid
5566 		// deadlocks)
5567 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5568 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5569 		if (range == NULL) {
5570 			error = B_NO_MEMORY;
5571 			break;
5572 		}
5573 
5574 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5575 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5576 
5577 		// mark the area range wired
5578 		area->Wire(range);
5579 
5580 		// Depending on the area cache type and the wiring, we may not need to
5581 		// look at the individual pages.
5582 		if (area->cache_type == CACHE_TYPE_NULL
5583 			|| area->cache_type == CACHE_TYPE_DEVICE
5584 			|| area->wiring == B_FULL_LOCK
5585 			|| area->wiring == B_CONTIGUOUS) {
5586 			nextAddress = areaEnd;
5587 			continue;
5588 		}
5589 
5590 		// Lock the area's cache chain and the translation map. Needed to look
5591 		// up pages and play with their wired count.
5592 		cacheChainLocker.LockAllSourceCaches();
5593 		map->Lock();
5594 
5595 		// iterate through the pages and wire them
5596 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5597 			phys_addr_t physicalAddress;
5598 			uint32 flags;
5599 
5600 			vm_page* page;
5601 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5602 				&& (flags & requiredProtection) == requiredProtection
5603 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5604 					!= NULL) {
5605 				// Already mapped with the correct permissions -- just increment
5606 				// the page's wired count.
5607 				increment_page_wired_count(page);
5608 			} else {
5609 				// Let vm_soft_fault() map the page for us, if possible. We need
5610 				// to fully unlock to avoid deadlocks. Since we have already
5611 				// wired the area itself, nothing disturbing will happen with it
5612 				// in the meantime.
5613 				map->Unlock();
5614 				cacheChainLocker.Unlock();
5615 				addressSpaceLocker.Unlock();
5616 
5617 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5618 					false, isUser, &page);
5619 
5620 				addressSpaceLocker.Lock();
5621 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5622 				cacheChainLocker.LockAllSourceCaches();
5623 				map->Lock();
5624 			}
5625 
5626 			if (error != B_OK)
5627 				break;
5628 		}
5629 
5630 		map->Unlock();
5631 
5632 		if (error == B_OK) {
5633 			cacheChainLocker.Unlock();
5634 		} else {
5635 			// An error occurred, so abort right here. If the current address
5636 			// is the first in this area, unwire the area, since we won't get
5637 			// to it when reverting what we've done so far.
5638 			if (nextAddress == areaStart) {
5639 				area->Unwire(range);
5640 				cacheChainLocker.Unlock();
5641 				range->~VMAreaWiredRange();
5642 				free_etc(range, mallocFlags);
5643 			} else
5644 				cacheChainLocker.Unlock();
5645 
5646 			break;
5647 		}
5648 	}
5649 
5650 	if (error != B_OK) {
5651 		// An error occurred, so unwire all that we've already wired. Note that
5652 		// even if not a single page was wired, unlock_memory_etc() is called
5653 		// to put the address space reference.
5654 		addressSpaceLocker.Unlock();
5655 		unlock_memory_etc(team, (void*)lockBaseAddress,
5656 			nextAddress - lockBaseAddress, flags);
5657 	}
5658 
5659 	return error;
5660 }
5661 
5662 
5663 status_t
5664 lock_memory(void* address, size_t numBytes, uint32 flags)
5665 {
5666 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5667 }
5668 
5669 
5670 /*!	Unwires an address range previously wired with lock_memory_etc().
5671 
5672 	Note that a call to this function must balance a previous lock_memory_etc()
5673 	call with exactly the same parameters.
5674 */
5675 status_t
5676 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5677 {
5678 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5679 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5680 
5681 	// compute the page protection that is required
5682 	bool isUser = IS_USER_ADDRESS(address);
5683 	bool writable = (flags & B_READ_DEVICE) == 0;
5684 	uint32 requiredProtection = PAGE_PRESENT
5685 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5686 	if (writable)
5687 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5688 
5689 	uint32 mallocFlags = isUser
5690 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5691 
5692 	// get and read lock the address space
5693 	VMAddressSpace* addressSpace = NULL;
5694 	if (isUser) {
5695 		if (team == B_CURRENT_TEAM)
5696 			addressSpace = VMAddressSpace::GetCurrent();
5697 		else
5698 			addressSpace = VMAddressSpace::Get(team);
5699 	} else
5700 		addressSpace = VMAddressSpace::GetKernel();
5701 	if (addressSpace == NULL)
5702 		return B_ERROR;
5703 
5704 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5705 		// Take over the address space reference. We don't unlock until we're
5706 		// done.
5707 
5708 	VMTranslationMap* map = addressSpace->TranslationMap();
5709 	status_t error = B_OK;
5710 
5711 	// iterate through all concerned areas
5712 	addr_t nextAddress = lockBaseAddress;
5713 	while (nextAddress != lockEndAddress) {
5714 		// get the next area
5715 		VMArea* area = addressSpace->LookupArea(nextAddress);
5716 		if (area == NULL) {
5717 			error = B_BAD_ADDRESS;
5718 			break;
5719 		}
5720 
5721 		addr_t areaStart = nextAddress;
5722 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5723 
5724 		// Lock the area's top cache. This is a requirement for
5725 		// VMArea::Unwire().
5726 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5727 
5728 		// Depending on the area cache type and the wiring, we may not need to
5729 		// look at the individual pages.
5730 		if (area->cache_type == CACHE_TYPE_NULL
5731 			|| area->cache_type == CACHE_TYPE_DEVICE
5732 			|| area->wiring == B_FULL_LOCK
5733 			|| area->wiring == B_CONTIGUOUS) {
5734 			// unwire the range (to avoid deadlocks we delete the range after
5735 			// unlocking the cache)
5736 			nextAddress = areaEnd;
5737 			VMAreaWiredRange* range = area->Unwire(areaStart,
5738 				areaEnd - areaStart, writable);
5739 			cacheChainLocker.Unlock();
5740 			if (range != NULL) {
5741 				range->~VMAreaWiredRange();
5742 				free_etc(range, mallocFlags);
5743 			}
5744 			continue;
5745 		}
5746 
5747 		// Lock the area's cache chain and the translation map. Needed to look
5748 		// up pages and play with their wired count.
5749 		cacheChainLocker.LockAllSourceCaches();
5750 		map->Lock();
5751 
5752 		// iterate through the pages and unwire them
5753 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5754 			phys_addr_t physicalAddress;
5755 			uint32 flags;
5756 
5757 			vm_page* page;
5758 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5759 				&& (flags & PAGE_PRESENT) != 0
5760 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5761 					!= NULL) {
5762 				// Already mapped with the correct permissions -- just increment
5763 				// the page's wired count.
5764 				decrement_page_wired_count(page);
5765 			} else {
5766 				panic("unlock_memory_etc(): Failed to unwire page: address "
5767 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5768 					nextAddress);
5769 				error = B_BAD_VALUE;
5770 				break;
5771 			}
5772 		}
5773 
5774 		map->Unlock();
5775 
5776 		// All pages are unwired. Remove the area's wired range as well (to
5777 		// avoid deadlocks we delete the range after unlocking the cache).
5778 		VMAreaWiredRange* range = area->Unwire(areaStart,
5779 			areaEnd - areaStart, writable);
5780 
5781 		cacheChainLocker.Unlock();
5782 
5783 		if (range != NULL) {
5784 			range->~VMAreaWiredRange();
5785 			free_etc(range, mallocFlags);
5786 		}
5787 
5788 		if (error != B_OK)
5789 			break;
5790 	}
5791 
5792 	// get rid of the address space reference lock_memory_etc() acquired
5793 	addressSpace->Put();
5794 
5795 	return error;
5796 }
5797 
5798 
5799 status_t
5800 unlock_memory(void* address, size_t numBytes, uint32 flags)
5801 {
5802 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5803 }
5804 
5805 
5806 /*!	Similar to get_memory_map(), but also allows to specify the address space
5807 	for the memory in question and has a saner semantics.
5808 	Returns \c B_OK when the complete range could be translated or
5809 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5810 	case the actual number of entries is written to \c *_numEntries. Any other
5811 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5812 	in this case.
5813 */
5814 status_t
5815 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5816 	physical_entry* table, uint32* _numEntries)
5817 {
5818 	uint32 numEntries = *_numEntries;
5819 	*_numEntries = 0;
5820 
5821 	VMAddressSpace* addressSpace;
5822 	addr_t virtualAddress = (addr_t)address;
5823 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5824 	phys_addr_t physicalAddress;
5825 	status_t status = B_OK;
5826 	int32 index = -1;
5827 	addr_t offset = 0;
5828 	bool interrupts = are_interrupts_enabled();
5829 
5830 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5831 		"entries)\n", team, address, numBytes, numEntries));
5832 
5833 	if (numEntries == 0 || numBytes == 0)
5834 		return B_BAD_VALUE;
5835 
5836 	// in which address space is the address to be found?
5837 	if (IS_USER_ADDRESS(virtualAddress)) {
5838 		if (team == B_CURRENT_TEAM)
5839 			addressSpace = VMAddressSpace::GetCurrent();
5840 		else
5841 			addressSpace = VMAddressSpace::Get(team);
5842 	} else
5843 		addressSpace = VMAddressSpace::GetKernel();
5844 
5845 	if (addressSpace == NULL)
5846 		return B_ERROR;
5847 
5848 	VMTranslationMap* map = addressSpace->TranslationMap();
5849 
5850 	if (interrupts)
5851 		map->Lock();
5852 
5853 	while (offset < numBytes) {
5854 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5855 		uint32 flags;
5856 
5857 		if (interrupts) {
5858 			status = map->Query((addr_t)address + offset, &physicalAddress,
5859 				&flags);
5860 		} else {
5861 			status = map->QueryInterrupt((addr_t)address + offset,
5862 				&physicalAddress, &flags);
5863 		}
5864 		if (status < B_OK)
5865 			break;
5866 		if ((flags & PAGE_PRESENT) == 0) {
5867 			panic("get_memory_map() called on unmapped memory!");
5868 			return B_BAD_ADDRESS;
5869 		}
5870 
5871 		if (index < 0 && pageOffset > 0) {
5872 			physicalAddress += pageOffset;
5873 			if (bytes > B_PAGE_SIZE - pageOffset)
5874 				bytes = B_PAGE_SIZE - pageOffset;
5875 		}
5876 
5877 		// need to switch to the next physical_entry?
5878 		if (index < 0 || table[index].address
5879 				!= physicalAddress - table[index].size) {
5880 			if ((uint32)++index + 1 > numEntries) {
5881 				// table to small
5882 				break;
5883 			}
5884 			table[index].address = physicalAddress;
5885 			table[index].size = bytes;
5886 		} else {
5887 			// page does fit in current entry
5888 			table[index].size += bytes;
5889 		}
5890 
5891 		offset += bytes;
5892 	}
5893 
5894 	if (interrupts)
5895 		map->Unlock();
5896 
5897 	if (status != B_OK)
5898 		return status;
5899 
5900 	if ((uint32)index + 1 > numEntries) {
5901 		*_numEntries = index;
5902 		return B_BUFFER_OVERFLOW;
5903 	}
5904 
5905 	*_numEntries = index + 1;
5906 	return B_OK;
5907 }
5908 
5909 
5910 /*!	According to the BeBook, this function should always succeed.
5911 	This is no longer the case.
5912 */
5913 extern "C" int32
5914 __get_memory_map_haiku(const void* address, size_t numBytes,
5915 	physical_entry* table, int32 numEntries)
5916 {
5917 	uint32 entriesRead = numEntries;
5918 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5919 		table, &entriesRead);
5920 	if (error != B_OK)
5921 		return error;
5922 
5923 	// close the entry list
5924 
5925 	// if it's only one entry, we will silently accept the missing ending
5926 	if (numEntries == 1)
5927 		return B_OK;
5928 
5929 	if (entriesRead + 1 > (uint32)numEntries)
5930 		return B_BUFFER_OVERFLOW;
5931 
5932 	table[entriesRead].address = 0;
5933 	table[entriesRead].size = 0;
5934 
5935 	return B_OK;
5936 }
5937 
5938 
5939 area_id
5940 area_for(void* address)
5941 {
5942 	return vm_area_for((addr_t)address, true);
5943 }
5944 
5945 
5946 area_id
5947 find_area(const char* name)
5948 {
5949 	return VMAreaHash::Find(name);
5950 }
5951 
5952 
5953 status_t
5954 _get_area_info(area_id id, area_info* info, size_t size)
5955 {
5956 	if (size != sizeof(area_info) || info == NULL)
5957 		return B_BAD_VALUE;
5958 
5959 	AddressSpaceReadLocker locker;
5960 	VMArea* area;
5961 	status_t status = locker.SetFromArea(id, area);
5962 	if (status != B_OK)
5963 		return status;
5964 
5965 	fill_area_info(area, info, size);
5966 	return B_OK;
5967 }
5968 
5969 
5970 status_t
5971 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5972 {
5973 	addr_t nextBase = *(addr_t*)cookie;
5974 
5975 	// we're already through the list
5976 	if (nextBase == (addr_t)-1)
5977 		return B_ENTRY_NOT_FOUND;
5978 
5979 	if (team == B_CURRENT_TEAM)
5980 		team = team_get_current_team_id();
5981 
5982 	AddressSpaceReadLocker locker(team);
5983 	if (!locker.IsLocked())
5984 		return B_BAD_TEAM_ID;
5985 
5986 	VMArea* area;
5987 	for (VMAddressSpace::AreaIterator it
5988 				= locker.AddressSpace()->GetAreaIterator();
5989 			(area = it.Next()) != NULL;) {
5990 		if (area->Base() > nextBase)
5991 			break;
5992 	}
5993 
5994 	if (area == NULL) {
5995 		nextBase = (addr_t)-1;
5996 		return B_ENTRY_NOT_FOUND;
5997 	}
5998 
5999 	fill_area_info(area, info, size);
6000 	*cookie = (ssize_t)(area->Base());
6001 
6002 	return B_OK;
6003 }
6004 
6005 
6006 status_t
6007 set_area_protection(area_id area, uint32 newProtection)
6008 {
6009 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
6010 		newProtection, true);
6011 }
6012 
6013 
6014 status_t
6015 resize_area(area_id areaID, size_t newSize)
6016 {
6017 	return vm_resize_area(areaID, newSize, true);
6018 }
6019 
6020 
6021 /*!	Transfers the specified area to a new team. The caller must be the owner
6022 	of the area.
6023 */
6024 area_id
6025 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
6026 	bool kernel)
6027 {
6028 	area_info info;
6029 	status_t status = get_area_info(id, &info);
6030 	if (status != B_OK)
6031 		return status;
6032 
6033 	if (info.team != thread_get_current_thread()->team->id)
6034 		return B_PERMISSION_DENIED;
6035 
6036 	area_id clonedArea = vm_clone_area(target, info.name, _address,
6037 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6038 	if (clonedArea < 0)
6039 		return clonedArea;
6040 
6041 	status = vm_delete_area(info.team, id, kernel);
6042 	if (status != B_OK) {
6043 		vm_delete_area(target, clonedArea, kernel);
6044 		return status;
6045 	}
6046 
6047 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6048 
6049 	return clonedArea;
6050 }
6051 
6052 
6053 extern "C" area_id
6054 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6055 	size_t numBytes, uint32 addressSpec, uint32 protection,
6056 	void** _virtualAddress)
6057 {
6058 	if (!arch_vm_supports_protection(protection))
6059 		return B_NOT_SUPPORTED;
6060 
6061 	fix_protection(&protection);
6062 
6063 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6064 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6065 		false);
6066 }
6067 
6068 
6069 area_id
6070 clone_area(const char* name, void** _address, uint32 addressSpec,
6071 	uint32 protection, area_id source)
6072 {
6073 	if ((protection & B_KERNEL_PROTECTION) == 0)
6074 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6075 
6076 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6077 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6078 }
6079 
6080 
6081 area_id
6082 create_area_etc(team_id team, const char* name, uint32 size, uint32 lock,
6083 	uint32 protection, uint32 flags, uint32 guardSize,
6084 	const virtual_address_restrictions* virtualAddressRestrictions,
6085 	const physical_address_restrictions* physicalAddressRestrictions,
6086 	void** _address)
6087 {
6088 	fix_protection(&protection);
6089 
6090 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6091 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6092 		true, _address);
6093 }
6094 
6095 
6096 extern "C" area_id
6097 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6098 	size_t size, uint32 lock, uint32 protection)
6099 {
6100 	fix_protection(&protection);
6101 
6102 	virtual_address_restrictions virtualRestrictions = {};
6103 	virtualRestrictions.address = *_address;
6104 	virtualRestrictions.address_specification = addressSpec;
6105 	physical_address_restrictions physicalRestrictions = {};
6106 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6107 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6108 		true, _address);
6109 }
6110 
6111 
6112 status_t
6113 delete_area(area_id area)
6114 {
6115 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6116 }
6117 
6118 
6119 //	#pragma mark - Userland syscalls
6120 
6121 
6122 status_t
6123 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6124 	addr_t size)
6125 {
6126 	// filter out some unavailable values (for userland)
6127 	switch (addressSpec) {
6128 		case B_ANY_KERNEL_ADDRESS:
6129 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6130 			return B_BAD_VALUE;
6131 	}
6132 
6133 	addr_t address;
6134 
6135 	if (!IS_USER_ADDRESS(userAddress)
6136 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6137 		return B_BAD_ADDRESS;
6138 
6139 	status_t status = vm_reserve_address_range(
6140 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6141 		RESERVED_AVOID_BASE);
6142 	if (status != B_OK)
6143 		return status;
6144 
6145 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6146 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6147 			(void*)address, size);
6148 		return B_BAD_ADDRESS;
6149 	}
6150 
6151 	return B_OK;
6152 }
6153 
6154 
6155 status_t
6156 _user_unreserve_address_range(addr_t address, addr_t size)
6157 {
6158 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6159 		(void*)address, size);
6160 }
6161 
6162 
6163 area_id
6164 _user_area_for(void* address)
6165 {
6166 	return vm_area_for((addr_t)address, false);
6167 }
6168 
6169 
6170 area_id
6171 _user_find_area(const char* userName)
6172 {
6173 	char name[B_OS_NAME_LENGTH];
6174 
6175 	if (!IS_USER_ADDRESS(userName)
6176 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6177 		return B_BAD_ADDRESS;
6178 
6179 	return find_area(name);
6180 }
6181 
6182 
6183 status_t
6184 _user_get_area_info(area_id area, area_info* userInfo)
6185 {
6186 	if (!IS_USER_ADDRESS(userInfo))
6187 		return B_BAD_ADDRESS;
6188 
6189 	area_info info;
6190 	status_t status = get_area_info(area, &info);
6191 	if (status < B_OK)
6192 		return status;
6193 
6194 	// TODO: do we want to prevent userland from seeing kernel protections?
6195 	//info.protection &= B_USER_PROTECTION;
6196 
6197 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6198 		return B_BAD_ADDRESS;
6199 
6200 	return status;
6201 }
6202 
6203 
6204 status_t
6205 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6206 {
6207 	ssize_t cookie;
6208 
6209 	if (!IS_USER_ADDRESS(userCookie)
6210 		|| !IS_USER_ADDRESS(userInfo)
6211 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6212 		return B_BAD_ADDRESS;
6213 
6214 	area_info info;
6215 	status_t status = _get_next_area_info(team, &cookie, &info,
6216 		sizeof(area_info));
6217 	if (status != B_OK)
6218 		return status;
6219 
6220 	//info.protection &= B_USER_PROTECTION;
6221 
6222 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6223 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6224 		return B_BAD_ADDRESS;
6225 
6226 	return status;
6227 }
6228 
6229 
6230 status_t
6231 _user_set_area_protection(area_id area, uint32 newProtection)
6232 {
6233 	if ((newProtection & ~B_USER_PROTECTION) != 0)
6234 		return B_BAD_VALUE;
6235 
6236 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6237 		newProtection, false);
6238 }
6239 
6240 
6241 status_t
6242 _user_resize_area(area_id area, size_t newSize)
6243 {
6244 	// TODO: Since we restrict deleting of areas to those owned by the team,
6245 	// we should also do that for resizing (check other functions, too).
6246 	return vm_resize_area(area, newSize, false);
6247 }
6248 
6249 
6250 area_id
6251 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6252 	team_id target)
6253 {
6254 	// filter out some unavailable values (for userland)
6255 	switch (addressSpec) {
6256 		case B_ANY_KERNEL_ADDRESS:
6257 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6258 			return B_BAD_VALUE;
6259 	}
6260 
6261 	void* address;
6262 	if (!IS_USER_ADDRESS(userAddress)
6263 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6264 		return B_BAD_ADDRESS;
6265 
6266 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6267 	if (newArea < B_OK)
6268 		return newArea;
6269 
6270 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6271 		return B_BAD_ADDRESS;
6272 
6273 	return newArea;
6274 }
6275 
6276 
6277 area_id
6278 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6279 	uint32 protection, area_id sourceArea)
6280 {
6281 	char name[B_OS_NAME_LENGTH];
6282 	void* address;
6283 
6284 	// filter out some unavailable values (for userland)
6285 	switch (addressSpec) {
6286 		case B_ANY_KERNEL_ADDRESS:
6287 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6288 			return B_BAD_VALUE;
6289 	}
6290 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6291 		return B_BAD_VALUE;
6292 
6293 	if (!IS_USER_ADDRESS(userName)
6294 		|| !IS_USER_ADDRESS(userAddress)
6295 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6296 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6297 		return B_BAD_ADDRESS;
6298 
6299 	fix_protection(&protection);
6300 
6301 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6302 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6303 		false);
6304 	if (clonedArea < B_OK)
6305 		return clonedArea;
6306 
6307 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6308 		delete_area(clonedArea);
6309 		return B_BAD_ADDRESS;
6310 	}
6311 
6312 	return clonedArea;
6313 }
6314 
6315 
6316 area_id
6317 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6318 	size_t size, uint32 lock, uint32 protection)
6319 {
6320 	char name[B_OS_NAME_LENGTH];
6321 	void* address;
6322 
6323 	// filter out some unavailable values (for userland)
6324 	switch (addressSpec) {
6325 		case B_ANY_KERNEL_ADDRESS:
6326 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6327 			return B_BAD_VALUE;
6328 	}
6329 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6330 		return B_BAD_VALUE;
6331 
6332 	if (!IS_USER_ADDRESS(userName)
6333 		|| !IS_USER_ADDRESS(userAddress)
6334 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6335 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6336 		return B_BAD_ADDRESS;
6337 
6338 	if (addressSpec == B_EXACT_ADDRESS
6339 		&& IS_KERNEL_ADDRESS(address))
6340 		return B_BAD_VALUE;
6341 
6342 	if (addressSpec == B_ANY_ADDRESS)
6343 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6344 	if (addressSpec == B_BASE_ADDRESS)
6345 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6346 
6347 	fix_protection(&protection);
6348 
6349 	virtual_address_restrictions virtualRestrictions = {};
6350 	virtualRestrictions.address = address;
6351 	virtualRestrictions.address_specification = addressSpec;
6352 	physical_address_restrictions physicalRestrictions = {};
6353 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6354 		size, lock, protection, 0, 0, &virtualRestrictions,
6355 		&physicalRestrictions, false, &address);
6356 
6357 	if (area >= B_OK
6358 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6359 		delete_area(area);
6360 		return B_BAD_ADDRESS;
6361 	}
6362 
6363 	return area;
6364 }
6365 
6366 
6367 status_t
6368 _user_delete_area(area_id area)
6369 {
6370 	// Unlike the BeOS implementation, you can now only delete areas
6371 	// that you have created yourself from userland.
6372 	// The documentation to delete_area() explicitly states that this
6373 	// will be restricted in the future, and so it will.
6374 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6375 }
6376 
6377 
6378 // TODO: create a BeOS style call for this!
6379 
6380 area_id
6381 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6382 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6383 	int fd, off_t offset)
6384 {
6385 	char name[B_OS_NAME_LENGTH];
6386 	void* address;
6387 	area_id area;
6388 
6389 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6390 		return B_BAD_VALUE;
6391 
6392 	fix_protection(&protection);
6393 
6394 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6395 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6396 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6397 		return B_BAD_ADDRESS;
6398 
6399 	if (addressSpec == B_EXACT_ADDRESS) {
6400 		if ((addr_t)address + size < (addr_t)address
6401 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6402 			return B_BAD_VALUE;
6403 		}
6404 		if (!IS_USER_ADDRESS(address)
6405 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
6406 			return B_BAD_ADDRESS;
6407 		}
6408 	}
6409 
6410 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6411 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6412 		false);
6413 	if (area < B_OK)
6414 		return area;
6415 
6416 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6417 		return B_BAD_ADDRESS;
6418 
6419 	return area;
6420 }
6421 
6422 
6423 status_t
6424 _user_unmap_memory(void* _address, size_t size)
6425 {
6426 	addr_t address = (addr_t)_address;
6427 
6428 	// check params
6429 	if (size == 0 || (addr_t)address + size < (addr_t)address
6430 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6431 		return B_BAD_VALUE;
6432 	}
6433 
6434 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
6435 		return B_BAD_ADDRESS;
6436 
6437 	// Write lock the address space and ensure the address range is not wired.
6438 	AddressSpaceWriteLocker locker;
6439 	do {
6440 		status_t status = locker.SetTo(team_get_current_team_id());
6441 		if (status != B_OK)
6442 			return status;
6443 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6444 			size, &locker));
6445 
6446 	// unmap
6447 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6448 }
6449 
6450 
6451 status_t
6452 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6453 {
6454 	// check address range
6455 	addr_t address = (addr_t)_address;
6456 	size = PAGE_ALIGN(size);
6457 
6458 	if ((address % B_PAGE_SIZE) != 0)
6459 		return B_BAD_VALUE;
6460 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6461 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6462 		// weird error code required by POSIX
6463 		return ENOMEM;
6464 	}
6465 
6466 	// extend and check protection
6467 	if ((protection & ~B_USER_PROTECTION) != 0)
6468 		return B_BAD_VALUE;
6469 
6470 	fix_protection(&protection);
6471 
6472 	// We need to write lock the address space, since we're going to play with
6473 	// the areas. Also make sure that none of the areas is wired and that we're
6474 	// actually allowed to change the protection.
6475 	AddressSpaceWriteLocker locker;
6476 
6477 	bool restart;
6478 	do {
6479 		restart = false;
6480 
6481 		status_t status = locker.SetTo(team_get_current_team_id());
6482 		if (status != B_OK)
6483 			return status;
6484 
6485 		// First round: Check whether the whole range is covered by areas and we
6486 		// are allowed to modify them.
6487 		addr_t currentAddress = address;
6488 		size_t sizeLeft = size;
6489 		while (sizeLeft > 0) {
6490 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6491 			if (area == NULL)
6492 				return B_NO_MEMORY;
6493 
6494 			if ((area->protection & B_KERNEL_AREA) != 0)
6495 				return B_NOT_ALLOWED;
6496 
6497 			// TODO: For (shared) mapped files we should check whether the new
6498 			// protections are compatible with the file permissions. We don't
6499 			// have a way to do that yet, though.
6500 
6501 			addr_t offset = currentAddress - area->Base();
6502 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6503 
6504 			AreaCacheLocker cacheLocker(area);
6505 
6506 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6507 					&locker, &cacheLocker)) {
6508 				restart = true;
6509 				break;
6510 			}
6511 
6512 			cacheLocker.Unlock();
6513 
6514 			currentAddress += rangeSize;
6515 			sizeLeft -= rangeSize;
6516 		}
6517 	} while (restart);
6518 
6519 	// Second round: If the protections differ from that of the area, create a
6520 	// page protection array and re-map mapped pages.
6521 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6522 	addr_t currentAddress = address;
6523 	size_t sizeLeft = size;
6524 	while (sizeLeft > 0) {
6525 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6526 		if (area == NULL)
6527 			return B_NO_MEMORY;
6528 
6529 		addr_t offset = currentAddress - area->Base();
6530 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6531 
6532 		currentAddress += rangeSize;
6533 		sizeLeft -= rangeSize;
6534 
6535 		if (area->page_protections == NULL) {
6536 			if (area->protection == protection)
6537 				continue;
6538 
6539 			status_t status = allocate_area_page_protections(area);
6540 			if (status != B_OK)
6541 				return status;
6542 		}
6543 
6544 		// We need to lock the complete cache chain, since we potentially unmap
6545 		// pages of lower caches.
6546 		VMCache* topCache = vm_area_get_locked_cache(area);
6547 		VMCacheChainLocker cacheChainLocker(topCache);
6548 		cacheChainLocker.LockAllSourceCaches();
6549 
6550 		for (addr_t pageAddress = area->Base() + offset;
6551 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6552 			map->Lock();
6553 
6554 			set_area_page_protection(area, pageAddress, protection);
6555 
6556 			phys_addr_t physicalAddress;
6557 			uint32 flags;
6558 
6559 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6560 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6561 				map->Unlock();
6562 				continue;
6563 			}
6564 
6565 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6566 			if (page == NULL) {
6567 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6568 					"\n", area, physicalAddress);
6569 				map->Unlock();
6570 				return B_ERROR;
6571 			}
6572 
6573 			// If the page is not in the topmost cache and write access is
6574 			// requested, we have to unmap it. Otherwise we can re-map it with
6575 			// the new protection.
6576 			bool unmapPage = page->Cache() != topCache
6577 				&& (protection & B_WRITE_AREA) != 0;
6578 
6579 			if (!unmapPage)
6580 				map->ProtectPage(area, pageAddress, protection);
6581 
6582 			map->Unlock();
6583 
6584 			if (unmapPage) {
6585 				DEBUG_PAGE_ACCESS_START(page);
6586 				unmap_page(area, pageAddress);
6587 				DEBUG_PAGE_ACCESS_END(page);
6588 			}
6589 		}
6590 	}
6591 
6592 	return B_OK;
6593 }
6594 
6595 
6596 status_t
6597 _user_sync_memory(void* _address, size_t size, uint32 flags)
6598 {
6599 	addr_t address = (addr_t)_address;
6600 	size = PAGE_ALIGN(size);
6601 
6602 	// check params
6603 	if ((address % B_PAGE_SIZE) != 0)
6604 		return B_BAD_VALUE;
6605 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
6606 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
6607 		// weird error code required by POSIX
6608 		return ENOMEM;
6609 	}
6610 
6611 	bool writeSync = (flags & MS_SYNC) != 0;
6612 	bool writeAsync = (flags & MS_ASYNC) != 0;
6613 	if (writeSync && writeAsync)
6614 		return B_BAD_VALUE;
6615 
6616 	if (size == 0 || (!writeSync && !writeAsync))
6617 		return B_OK;
6618 
6619 	// iterate through the range and sync all concerned areas
6620 	while (size > 0) {
6621 		// read lock the address space
6622 		AddressSpaceReadLocker locker;
6623 		status_t error = locker.SetTo(team_get_current_team_id());
6624 		if (error != B_OK)
6625 			return error;
6626 
6627 		// get the first area
6628 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6629 		if (area == NULL)
6630 			return B_NO_MEMORY;
6631 
6632 		uint32 offset = address - area->Base();
6633 		size_t rangeSize = min_c(area->Size() - offset, size);
6634 		offset += area->cache_offset;
6635 
6636 		// lock the cache
6637 		AreaCacheLocker cacheLocker(area);
6638 		if (!cacheLocker)
6639 			return B_BAD_VALUE;
6640 		VMCache* cache = area->cache;
6641 
6642 		locker.Unlock();
6643 
6644 		uint32 firstPage = offset >> PAGE_SHIFT;
6645 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6646 
6647 		// write the pages
6648 		if (cache->type == CACHE_TYPE_VNODE) {
6649 			if (writeSync) {
6650 				// synchronous
6651 				error = vm_page_write_modified_page_range(cache, firstPage,
6652 					endPage);
6653 				if (error != B_OK)
6654 					return error;
6655 			} else {
6656 				// asynchronous
6657 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6658 				// TODO: This is probably not quite what is supposed to happen.
6659 				// Especially when a lot has to be written, it might take ages
6660 				// until it really hits the disk.
6661 			}
6662 		}
6663 
6664 		address += rangeSize;
6665 		size -= rangeSize;
6666 	}
6667 
6668 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6669 	// synchronize multiple mappings of the same file. In our VM they never get
6670 	// out of sync, though, so we don't have to do anything.
6671 
6672 	return B_OK;
6673 }
6674 
6675 
6676 status_t
6677 _user_memory_advice(void* address, size_t size, uint32 advice)
6678 {
6679 	// TODO: Implement!
6680 	return B_OK;
6681 }
6682 
6683 
6684 status_t
6685 _user_get_memory_properties(team_id teamID, const void* address,
6686 	uint32* _protected, uint32* _lock)
6687 {
6688 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6689 		return B_BAD_ADDRESS;
6690 
6691 	AddressSpaceReadLocker locker;
6692 	status_t error = locker.SetTo(teamID);
6693 	if (error != B_OK)
6694 		return error;
6695 
6696 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6697 	if (area == NULL)
6698 		return B_NO_MEMORY;
6699 
6700 
6701 	uint32 protection = area->protection;
6702 	if (area->page_protections != NULL)
6703 		protection = get_area_page_protection(area, (addr_t)address);
6704 
6705 	uint32 wiring = area->wiring;
6706 
6707 	locker.Unlock();
6708 
6709 	error = user_memcpy(_protected, &protection, sizeof(protection));
6710 	if (error != B_OK)
6711 		return error;
6712 
6713 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6714 
6715 	return error;
6716 }
6717 
6718 
6719 // #pragma mark -- compatibility
6720 
6721 
6722 #if defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6723 
6724 
6725 struct physical_entry_beos {
6726 	uint32	address;
6727 	uint32	size;
6728 };
6729 
6730 
6731 /*!	The physical_entry structure has changed. We need to translate it to the
6732 	old one.
6733 */
6734 extern "C" int32
6735 __get_memory_map_beos(const void* _address, size_t numBytes,
6736 	physical_entry_beos* table, int32 numEntries)
6737 {
6738 	if (numEntries <= 0)
6739 		return B_BAD_VALUE;
6740 
6741 	const uint8* address = (const uint8*)_address;
6742 
6743 	int32 count = 0;
6744 	while (numBytes > 0 && count < numEntries) {
6745 		physical_entry entry;
6746 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6747 		if (result < 0) {
6748 			if (result != B_BUFFER_OVERFLOW)
6749 				return result;
6750 		}
6751 
6752 		if (entry.address >= (phys_addr_t)1 << 32) {
6753 			panic("get_memory_map(): Address is greater 4 GB!");
6754 			return B_ERROR;
6755 		}
6756 
6757 		table[count].address = entry.address;
6758 		table[count++].size = entry.size;
6759 
6760 		address += entry.size;
6761 		numBytes -= entry.size;
6762 	}
6763 
6764 	// null-terminate the table, if possible
6765 	if (count < numEntries) {
6766 		table[count].address = 0;
6767 		table[count].size = 0;
6768 	}
6769 
6770 	return B_OK;
6771 }
6772 
6773 
6774 /*!	The type of the \a physicalAddress parameter has changed from void* to
6775 	phys_addr_t.
6776 */
6777 extern "C" area_id
6778 __map_physical_memory_beos(const char* name, void* physicalAddress,
6779 	size_t numBytes, uint32 addressSpec, uint32 protection,
6780 	void** _virtualAddress)
6781 {
6782 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6783 		addressSpec, protection, _virtualAddress);
6784 }
6785 
6786 
6787 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6788 	we meddle with the \a lock parameter to force 32 bit.
6789 */
6790 extern "C" area_id
6791 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6792 	size_t size, uint32 lock, uint32 protection)
6793 {
6794 	switch (lock) {
6795 		case B_NO_LOCK:
6796 			break;
6797 		case B_FULL_LOCK:
6798 		case B_LAZY_LOCK:
6799 			lock = B_32_BIT_FULL_LOCK;
6800 			break;
6801 		case B_CONTIGUOUS:
6802 			lock = B_32_BIT_CONTIGUOUS;
6803 			break;
6804 	}
6805 
6806 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6807 		protection);
6808 }
6809 
6810 
6811 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6812 	"BASE");
6813 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6814 	"map_physical_memory@", "BASE");
6815 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6816 	"BASE");
6817 
6818 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6819 	"get_memory_map@@", "1_ALPHA3");
6820 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6821 	"map_physical_memory@@", "1_ALPHA3");
6822 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6823 	"1_ALPHA3");
6824 
6825 
6826 #else
6827 
6828 
6829 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6830 	"get_memory_map@@", "BASE");
6831 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6832 	"map_physical_memory@@", "BASE");
6833 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6834 	"BASE");
6835 
6836 
6837 #endif	// defined(__INTEL__) && B_HAIKU_PHYSICAL_BITS > 32
6838