xref: /haiku/src/system/kernel/vm/vm.cpp (revision 62f5ba006a08b0df30631375878effaf67ae5dbc)
1 /*
2  * Copyright 2009-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <arch/cpu.h>
27 #include <arch/vm.h>
28 #include <boot/elf.h>
29 #include <boot/stage2.h>
30 #include <condition_variable.h>
31 #include <console.h>
32 #include <debug.h>
33 #include <file_cache.h>
34 #include <fs/fd.h>
35 #include <heap.h>
36 #include <kernel.h>
37 #include <int.h>
38 #include <lock.h>
39 #include <low_resource_manager.h>
40 #include <slab/Slab.h>
41 #include <smp.h>
42 #include <system_info.h>
43 #include <thread.h>
44 #include <team.h>
45 #include <tracing.h>
46 #include <util/AutoLock.h>
47 #include <util/khash.h>
48 #include <vm/vm_page.h>
49 #include <vm/vm_priv.h>
50 #include <vm/VMAddressSpace.h>
51 #include <vm/VMArea.h>
52 #include <vm/VMCache.h>
53 
54 #include "VMAddressSpaceLocking.h"
55 #include "VMAnonymousCache.h"
56 #include "IORequest.h"
57 
58 
59 //#define TRACE_VM
60 //#define TRACE_FAULTS
61 #ifdef TRACE_VM
62 #	define TRACE(x) dprintf x
63 #else
64 #	define TRACE(x) ;
65 #endif
66 #ifdef TRACE_FAULTS
67 #	define FTRACE(x) dprintf x
68 #else
69 #	define FTRACE(x) ;
70 #endif
71 
72 
73 class AreaCacheLocking {
74 public:
75 	inline bool Lock(VMCache* lockable)
76 	{
77 		return false;
78 	}
79 
80 	inline void Unlock(VMCache* lockable)
81 	{
82 		vm_area_put_locked_cache(lockable);
83 	}
84 };
85 
86 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
87 public:
88 	inline AreaCacheLocker(VMCache* cache = NULL)
89 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
90 	{
91 	}
92 
93 	inline AreaCacheLocker(VMArea* area)
94 		: AutoLocker<VMCache, AreaCacheLocking>()
95 	{
96 		SetTo(area);
97 	}
98 
99 	inline void SetTo(VMArea* area)
100 	{
101 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
102 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
103 	}
104 };
105 
106 
107 class VMCacheChainLocker {
108 public:
109 	VMCacheChainLocker()
110 		:
111 		fTopCache(NULL),
112 		fBottomCache(NULL)
113 	{
114 	}
115 
116 	VMCacheChainLocker(VMCache* topCache)
117 		:
118 		fTopCache(topCache),
119 		fBottomCache(topCache)
120 	{
121 	}
122 
123 	~VMCacheChainLocker()
124 	{
125 		Unlock();
126 	}
127 
128 	void SetTo(VMCache* topCache)
129 	{
130 		fTopCache = topCache;
131 		fBottomCache = topCache;
132 
133 		if (topCache != NULL)
134 			topCache->SetUserData(NULL);
135 	}
136 
137 	VMCache* LockSourceCache()
138 	{
139 		if (fBottomCache == NULL || fBottomCache->source == NULL)
140 			return NULL;
141 
142 		VMCache* previousCache = fBottomCache;
143 
144 		fBottomCache = fBottomCache->source;
145 		fBottomCache->Lock();
146 		fBottomCache->AcquireRefLocked();
147 		fBottomCache->SetUserData(previousCache);
148 
149 		return fBottomCache;
150 	}
151 
152 	void LockAllSourceCaches()
153 	{
154 		while (LockSourceCache() != NULL) {
155 		}
156 	}
157 
158 	void Unlock(VMCache* exceptCache = NULL)
159 	{
160 		if (fTopCache == NULL)
161 			return;
162 
163 		// Unlock caches in source -> consumer direction. This is important to
164 		// avoid double-locking and a reversal of locking order in case a cache
165 		// is eligable for merging.
166 		VMCache* cache = fBottomCache;
167 		while (cache != NULL) {
168 			VMCache* nextCache = (VMCache*)cache->UserData();
169 			if (cache != exceptCache)
170 				cache->ReleaseRefAndUnlock(cache != fTopCache);
171 
172 			if (cache == fTopCache)
173 				break;
174 
175 			cache = nextCache;
176 		}
177 
178 		fTopCache = NULL;
179 		fBottomCache = NULL;
180 	}
181 
182 	void UnlockKeepRefs(bool keepTopCacheLocked)
183 	{
184 		if (fTopCache == NULL)
185 			return;
186 
187 		VMCache* nextCache = fBottomCache;
188 		VMCache* cache = NULL;
189 
190 		while (keepTopCacheLocked
191 				? nextCache != fTopCache : cache != fTopCache) {
192 			cache = nextCache;
193 			nextCache = (VMCache*)cache->UserData();
194 			cache->Unlock(cache != fTopCache);
195 		}
196 	}
197 
198 	void RelockCaches(bool topCacheLocked)
199 	{
200 		if (fTopCache == NULL)
201 			return;
202 
203 		VMCache* nextCache = fTopCache;
204 		VMCache* cache = NULL;
205 		if (topCacheLocked) {
206 			cache = nextCache;
207 			nextCache = cache->source;
208 		}
209 
210 		while (cache != fBottomCache && nextCache != NULL) {
211 			VMCache* consumer = cache;
212 			cache = nextCache;
213 			nextCache = cache->source;
214 			cache->Lock();
215 			cache->SetUserData(consumer);
216 		}
217 	}
218 
219 private:
220 	VMCache*	fTopCache;
221 	VMCache*	fBottomCache;
222 };
223 
224 
225 // The memory reserve an allocation of the certain priority must not touch.
226 static const size_t kMemoryReserveForPriority[] = {
227 	VM_MEMORY_RESERVE_USER,		// user
228 	VM_MEMORY_RESERVE_SYSTEM,	// system
229 	0							// VIP
230 };
231 
232 
233 ObjectCache* gPageMappingsObjectCache;
234 
235 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
236 
237 static off_t sAvailableMemory;
238 static off_t sNeededMemory;
239 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
240 static uint32 sPageFaults;
241 
242 static VMPhysicalPageMapper* sPhysicalPageMapper;
243 
244 #if DEBUG_CACHE_LIST
245 
246 struct cache_info {
247 	VMCache*	cache;
248 	addr_t		page_count;
249 	addr_t		committed;
250 };
251 
252 static const int kCacheInfoTableCount = 100 * 1024;
253 static cache_info* sCacheInfoTable;
254 
255 #endif	// DEBUG_CACHE_LIST
256 
257 
258 // function declarations
259 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
260 	bool addressSpaceCleanup);
261 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
262 	bool isWrite, bool isUser);
263 static status_t map_backing_store(VMAddressSpace* addressSpace,
264 	VMCache* cache, void** _virtualAddress, off_t offset, addr_t size,
265 	uint32 addressSpec, int wiring, int protection, int mapping,
266 	VMArea** _area, const char* areaName, uint32 flags, bool kernel);
267 
268 
269 //	#pragma mark -
270 
271 
272 #if VM_PAGE_FAULT_TRACING
273 
274 namespace VMPageFaultTracing {
275 
276 class PageFaultStart : public AbstractTraceEntry {
277 public:
278 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
279 		:
280 		fAddress(address),
281 		fPC(pc),
282 		fWrite(write),
283 		fUser(user)
284 	{
285 		Initialized();
286 	}
287 
288 	virtual void AddDump(TraceOutput& out)
289 	{
290 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
291 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
292 	}
293 
294 private:
295 	addr_t	fAddress;
296 	addr_t	fPC;
297 	bool	fWrite;
298 	bool	fUser;
299 };
300 
301 
302 // page fault errors
303 enum {
304 	PAGE_FAULT_ERROR_NO_AREA		= 0,
305 	PAGE_FAULT_ERROR_KERNEL_ONLY,
306 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
307 	PAGE_FAULT_ERROR_READ_PROTECTED,
308 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
309 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
310 };
311 
312 
313 class PageFaultError : public AbstractTraceEntry {
314 public:
315 	PageFaultError(area_id area, status_t error)
316 		:
317 		fArea(area),
318 		fError(error)
319 	{
320 		Initialized();
321 	}
322 
323 	virtual void AddDump(TraceOutput& out)
324 	{
325 		switch (fError) {
326 			case PAGE_FAULT_ERROR_NO_AREA:
327 				out.Print("page fault error: no area");
328 				break;
329 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
330 				out.Print("page fault error: area: %ld, kernel only", fArea);
331 				break;
332 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
333 				out.Print("page fault error: area: %ld, write protected",
334 					fArea);
335 				break;
336 			case PAGE_FAULT_ERROR_READ_PROTECTED:
337 				out.Print("page fault error: area: %ld, read protected", fArea);
338 				break;
339 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
340 				out.Print("page fault error: kernel touching bad user memory");
341 				break;
342 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
343 				out.Print("page fault error: no address space");
344 				break;
345 			default:
346 				out.Print("page fault error: area: %ld, error: %s", fArea,
347 					strerror(fError));
348 				break;
349 		}
350 	}
351 
352 private:
353 	area_id		fArea;
354 	status_t	fError;
355 };
356 
357 
358 class PageFaultDone : public AbstractTraceEntry {
359 public:
360 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
361 			vm_page* page)
362 		:
363 		fArea(area),
364 		fTopCache(topCache),
365 		fCache(cache),
366 		fPage(page)
367 	{
368 		Initialized();
369 	}
370 
371 	virtual void AddDump(TraceOutput& out)
372 	{
373 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
374 			"page: %p", fArea, fTopCache, fCache, fPage);
375 	}
376 
377 private:
378 	area_id		fArea;
379 	VMCache*	fTopCache;
380 	VMCache*	fCache;
381 	vm_page*	fPage;
382 };
383 
384 }	// namespace VMPageFaultTracing
385 
386 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
387 #else
388 #	define TPF(x) ;
389 #endif	// VM_PAGE_FAULT_TRACING
390 
391 
392 //	#pragma mark -
393 
394 
395 /*!	The page's cache must be locked.
396 */
397 static inline void
398 increment_page_wired_count(vm_page* page)
399 {
400 	if (page->wired_count++ == 0 && page->mappings.IsEmpty())
401 		atomic_add(&gMappedPagesCount, 1);
402 }
403 
404 
405 /*!	The page's cache must be locked.
406 */
407 static inline void
408 decrement_page_wired_count(vm_page* page)
409 {
410 	if (--page->wired_count == 0 && page->mappings.IsEmpty())
411 		atomic_add(&gMappedPagesCount, -1);
412 }
413 
414 
415 static inline addr_t
416 virtual_page_address(VMArea* area, vm_page* page)
417 {
418 	return area->Base()
419 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
420 }
421 
422 
423 //! You need to have the address space locked when calling this function
424 static VMArea*
425 lookup_area(VMAddressSpace* addressSpace, area_id id)
426 {
427 	VMAreaHash::ReadLock();
428 
429 	VMArea* area = VMAreaHash::LookupLocked(id);
430 	if (area != NULL && area->address_space != addressSpace)
431 		area = NULL;
432 
433 	VMAreaHash::ReadUnlock();
434 
435 	return area;
436 }
437 
438 
439 static inline void
440 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
441 {
442 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
443 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
444 	uint8& entry = area->page_protections[pageIndex / 2];
445 	if (pageIndex % 2 == 0)
446 		entry = (entry & 0xf0) | protection;
447 	else
448 		entry = (entry & 0x0f) | (protection << 4);
449 }
450 
451 
452 static inline uint32
453 get_area_page_protection(VMArea* area, addr_t pageAddress)
454 {
455 	if (area->page_protections == NULL)
456 		return area->protection;
457 
458 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
459 	uint32 protection = area->page_protections[pageIndex / 2];
460 	if (pageIndex % 2 == 0)
461 		protection &= 0x0f;
462 	else
463 		protection >>= 4;
464 
465 	return protection | B_KERNEL_READ_AREA
466 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
467 }
468 
469 
470 /*!	The caller must have reserved enough pages the translation map
471 	implementation might need to map this page.
472 	The page's cache must be locked.
473 */
474 static status_t
475 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
476 	vm_page_reservation* reservation)
477 {
478 	VMTranslationMap* map = area->address_space->TranslationMap();
479 
480 	bool wasMapped = page->wired_count > 0 || !page->mappings.IsEmpty();
481 
482 	if (area->wiring == B_NO_LOCK) {
483 		DEBUG_PAGE_ACCESS_CHECK(page);
484 
485 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
486 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
487 			gPageMappingsObjectCache,
488 			CACHE_DONT_WAIT_FOR_MEMORY
489 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
490 		if (mapping == NULL)
491 			return B_NO_MEMORY;
492 
493 		mapping->page = page;
494 		mapping->area = area;
495 
496 		map->Lock();
497 
498 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
499 			reservation);
500 
501 		// insert mapping into lists
502 		if (page->mappings.IsEmpty() && page->wired_count == 0)
503 			atomic_add(&gMappedPagesCount, 1);
504 
505 		page->mappings.Add(mapping);
506 		area->mappings.Add(mapping);
507 
508 		map->Unlock();
509 	} else {
510 		DEBUG_PAGE_ACCESS_CHECK(page);
511 
512 		map->Lock();
513 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
514 			reservation);
515 		map->Unlock();
516 
517 		increment_page_wired_count(page);
518 	}
519 
520 	if (!wasMapped) {
521 		// The page is mapped now, so we must not remain in the cached queue.
522 		// It also makes sense to move it from the inactive to the active, since
523 		// otherwise the page daemon wouldn't come to keep track of it (in idle
524 		// mode) -- if the page isn't touched, it will be deactivated after a
525 		// full iteration through the queue at the latest.
526 		if (page->State() == PAGE_STATE_CACHED
527 				|| page->State() == PAGE_STATE_INACTIVE) {
528 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
529 		}
530 	}
531 
532 	return B_OK;
533 }
534 
535 
536 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
537 	page's cache.
538 */
539 static inline bool
540 unmap_page(VMArea* area, addr_t virtualAddress)
541 {
542 	return area->address_space->TranslationMap()->UnmapPage(area,
543 		virtualAddress, true);
544 }
545 
546 
547 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
548 	mapped pages' caches.
549 */
550 static inline void
551 unmap_pages(VMArea* area, addr_t base, size_t size)
552 {
553 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
554 }
555 
556 
557 /*!	Cuts a piece out of an area. If the given cut range covers the complete
558 	area, it is deleted. If it covers the beginning or the end, the area is
559 	resized accordingly. If the range covers some part in the middle of the
560 	area, it is split in two; in this case the second area is returned via
561 	\a _secondArea (the variable is left untouched in the other cases).
562 	The address space must be write locked.
563 */
564 static status_t
565 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
566 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
567 {
568 	// Does the cut range intersect with the area at all?
569 	addr_t areaLast = area->Base() + (area->Size() - 1);
570 	if (area->Base() > lastAddress || areaLast < address)
571 		return B_OK;
572 
573 	// Is the area fully covered?
574 	if (area->Base() >= address && areaLast <= lastAddress) {
575 		delete_area(addressSpace, area, false);
576 		return B_OK;
577 	}
578 
579 	int priority;
580 	uint32 allocationFlags;
581 	if (addressSpace == VMAddressSpace::Kernel()) {
582 		priority = VM_PRIORITY_SYSTEM;
583 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
584 			| HEAP_DONT_LOCK_KERNEL_SPACE;
585 	} else {
586 		priority = VM_PRIORITY_USER;
587 		allocationFlags = 0;
588 	}
589 
590 	VMCache* cache = vm_area_get_locked_cache(area);
591 	VMCacheChainLocker cacheChainLocker(cache);
592 	cacheChainLocker.LockAllSourceCaches();
593 
594 	// Cut the end only?
595 	if (areaLast <= lastAddress) {
596 		size_t oldSize = area->Size();
597 		size_t newSize = address - area->Base();
598 
599 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
600 			allocationFlags);
601 		if (error != B_OK)
602 			return error;
603 
604 		// unmap pages
605 		unmap_pages(area, address, oldSize - newSize);
606 
607 		// If no one else uses the area's cache, we can resize it, too.
608 		if (cache->areas == area && area->cache_next == NULL
609 			&& list_is_empty(&cache->consumers)) {
610 			// Since VMCache::Resize() can temporarily drop the lock, we must
611 			// unlock all lower caches to prevent locking order inversion.
612 			cacheChainLocker.Unlock(cache);
613 			cache->Resize(cache->virtual_base + newSize, priority);
614 			cache->ReleaseRefAndUnlock();
615 		}
616 
617 		return B_OK;
618 	}
619 
620 	// Cut the beginning only?
621 	if (area->Base() >= address) {
622 		addr_t oldBase = area->Base();
623 		addr_t newBase = lastAddress + 1;
624 		size_t newSize = areaLast - lastAddress;
625 
626 		// unmap pages
627 		unmap_pages(area, oldBase, newBase - oldBase);
628 
629 		// resize the area
630 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
631 			allocationFlags);
632 		if (error != B_OK)
633 			return error;
634 
635 		// TODO: If no one else uses the area's cache, we should resize it, too!
636 
637 		area->cache_offset += newBase - oldBase;
638 
639 		return B_OK;
640 	}
641 
642 	// The tough part -- cut a piece out of the middle of the area.
643 	// We do that by shrinking the area to the begin section and creating a
644 	// new area for the end section.
645 
646 	addr_t firstNewSize = address - area->Base();
647 	addr_t secondBase = lastAddress + 1;
648 	addr_t secondSize = areaLast - lastAddress;
649 
650 	// unmap pages
651 	unmap_pages(area, address, area->Size() - firstNewSize);
652 
653 	// resize the area
654 	addr_t oldSize = area->Size();
655 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
656 		allocationFlags);
657 	if (error != B_OK)
658 		return error;
659 
660 	// TODO: If no one else uses the area's cache, we might want to create a
661 	// new cache for the second area, transfer the concerned pages from the
662 	// first cache to it and resize the first cache.
663 
664 	// map the second area
665 	VMArea* secondArea;
666 	void* secondBaseAddress = (void*)secondBase;
667 	error = map_backing_store(addressSpace, cache, &secondBaseAddress,
668 		area->cache_offset + (secondBase - area->Base()), secondSize,
669 		B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP,
670 		&secondArea, area->name, 0, kernel);
671 	if (error != B_OK) {
672 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
673 		return error;
674 	}
675 
676 	// We need a cache reference for the new area.
677 	cache->AcquireRefLocked();
678 
679 	if (_secondArea != NULL)
680 		*_secondArea = secondArea;
681 
682 	return B_OK;
683 }
684 
685 
686 /*!	Deletes all areas in the given address range.
687 	The address space must be write-locked.
688 */
689 static status_t
690 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
691 	bool kernel)
692 {
693 	size = PAGE_ALIGN(size);
694 	addr_t lastAddress = address + (size - 1);
695 
696 	// Check, whether the caller is allowed to modify the concerned areas.
697 	if (!kernel) {
698 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
699 				VMArea* area = it.Next();) {
700 			addr_t areaLast = area->Base() + (area->Size() - 1);
701 			if (area->Base() < lastAddress && address < areaLast) {
702 				if ((area->protection & B_KERNEL_AREA) != 0)
703 					return B_NOT_ALLOWED;
704 			}
705 		}
706 	}
707 
708 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
709 			VMArea* area = it.Next();) {
710 		addr_t areaLast = area->Base() + (area->Size() - 1);
711 		if (area->Base() < lastAddress && address < areaLast) {
712 			status_t error = cut_area(addressSpace, area, address,
713 				lastAddress, NULL, kernel);
714 			if (error != B_OK)
715 				return error;
716 				// Failing after already messing with areas is ugly, but we
717 				// can't do anything about it.
718 		}
719 	}
720 
721 	return B_OK;
722 }
723 
724 
725 /*! You need to hold the lock of the cache and the write lock of the address
726 	space when calling this function.
727 	Note, that in case of error your cache will be temporarily unlocked.
728 */
729 static status_t
730 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache,
731 	void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
732 	int wiring, int protection, int mapping, VMArea** _area,
733 	const char* areaName, uint32 flags, bool kernel)
734 {
735 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, "
736 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
737 		"'%s'\n", addressSpace, cache, *_virtualAddress, offset, size,
738 		addressSpec, wiring, protection, _area, areaName));
739 	cache->AssertLocked();
740 
741 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
742 		| HEAP_DONT_LOCK_KERNEL_SPACE;
743 	int priority;
744 	if (addressSpace != VMAddressSpace::Kernel()) {
745 		priority = VM_PRIORITY_USER;
746 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
747 		priority = VM_PRIORITY_VIP;
748 		allocationFlags |= HEAP_PRIORITY_VIP;
749 	} else
750 		priority = VM_PRIORITY_SYSTEM;
751 
752 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
753 		allocationFlags);
754 	if (area == NULL)
755 		return B_NO_MEMORY;
756 
757 	status_t status;
758 
759 	// if this is a private map, we need to create a new cache
760 	// to handle the private copies of pages as they are written to
761 	VMCache* sourceCache = cache;
762 	if (mapping == REGION_PRIVATE_MAP) {
763 		VMCache* newCache;
764 
765 		// create an anonymous cache
766 		status = VMCacheFactory::CreateAnonymousCache(newCache,
767 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true,
768 			VM_PRIORITY_USER);
769 		if (status != B_OK)
770 			goto err1;
771 
772 		newCache->Lock();
773 		newCache->temporary = 1;
774 		newCache->scan_skip = cache->scan_skip;
775 		newCache->virtual_base = offset;
776 		newCache->virtual_end = offset + size;
777 
778 		cache->AddConsumer(newCache);
779 
780 		cache = newCache;
781 	}
782 
783 	status = cache->SetMinimalCommitment(size, priority);
784 	if (status != B_OK)
785 		goto err2;
786 
787 	// check to see if this address space has entered DELETE state
788 	if (addressSpace->IsBeingDeleted()) {
789 		// okay, someone is trying to delete this address space now, so we can't
790 		// insert the area, so back out
791 		status = B_BAD_TEAM_ID;
792 		goto err2;
793 	}
794 
795 	if (addressSpec == B_EXACT_ADDRESS
796 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
797 		status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress,
798 			size, kernel);
799 		if (status != B_OK)
800 			goto err2;
801 	}
802 
803 	status = addressSpace->InsertArea(_virtualAddress, addressSpec, size, area,
804 		allocationFlags);
805 	if (status != B_OK) {
806 		// TODO: wait and try again once this is working in the backend
807 #if 0
808 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
809 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
810 				0, 0);
811 		}
812 #endif
813 		goto err2;
814 	}
815 
816 	// attach the cache to the area
817 	area->cache = cache;
818 	area->cache_offset = offset;
819 
820 	// point the cache back to the area
821 	cache->InsertAreaLocked(area);
822 	if (mapping == REGION_PRIVATE_MAP)
823 		cache->Unlock();
824 
825 	// insert the area in the global area hash table
826 	VMAreaHash::Insert(area);
827 
828 	// grab a ref to the address space (the area holds this)
829 	addressSpace->Get();
830 
831 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
832 //		cache, sourceCache, areaName, area);
833 
834 	*_area = area;
835 	return B_OK;
836 
837 err2:
838 	if (mapping == REGION_PRIVATE_MAP) {
839 		// We created this cache, so we must delete it again. Note, that we
840 		// need to temporarily unlock the source cache or we'll otherwise
841 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
842 		sourceCache->Unlock();
843 		cache->ReleaseRefAndUnlock();
844 		sourceCache->Lock();
845 	}
846 err1:
847 	addressSpace->DeleteArea(area, allocationFlags);
848 	return status;
849 }
850 
851 
852 status_t
853 vm_block_address_range(const char* name, void* address, addr_t size)
854 {
855 	if (!arch_vm_supports_protection(0))
856 		return B_NOT_SUPPORTED;
857 
858 	AddressSpaceWriteLocker locker;
859 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
860 	if (status != B_OK)
861 		return status;
862 
863 	VMAddressSpace* addressSpace = locker.AddressSpace();
864 
865 	// create an anonymous cache
866 	VMCache* cache;
867 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
868 		VM_PRIORITY_SYSTEM);
869 	if (status != B_OK)
870 		return status;
871 
872 	cache->temporary = 1;
873 	cache->virtual_end = size;
874 	cache->scan_skip = 1;
875 	cache->Lock();
876 
877 	VMArea* area;
878 	void* areaAddress = address;
879 	status = map_backing_store(addressSpace, cache, &areaAddress, 0, size,
880 		B_EXACT_ADDRESS, B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, &area, name,
881 		0, true);
882 	if (status != B_OK) {
883 		cache->ReleaseRefAndUnlock();
884 		return status;
885 	}
886 
887 	cache->Unlock();
888 	area->cache_type = CACHE_TYPE_RAM;
889 	return area->id;
890 }
891 
892 
893 status_t
894 vm_unreserve_address_range(team_id team, void* address, addr_t size)
895 {
896 	AddressSpaceWriteLocker locker(team);
897 	if (!locker.IsLocked())
898 		return B_BAD_TEAM_ID;
899 
900 	VMAddressSpace* addressSpace = locker.AddressSpace();
901 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
902 		addressSpace == VMAddressSpace::Kernel()
903 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
904 }
905 
906 
907 status_t
908 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
909 	addr_t size, uint32 flags)
910 {
911 	if (size == 0)
912 		return B_BAD_VALUE;
913 
914 	AddressSpaceWriteLocker locker(team);
915 	if (!locker.IsLocked())
916 		return B_BAD_TEAM_ID;
917 
918 	VMAddressSpace* addressSpace = locker.AddressSpace();
919 	return addressSpace->ReserveAddressRange(_address, addressSpec,
920 		size, flags,
921 		addressSpace == VMAddressSpace::Kernel()
922 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
923 }
924 
925 
926 area_id
927 vm_create_anonymous_area(team_id team, const char* name, void** address,
928 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection,
929 	addr_t physicalAddress, uint32 flags, bool kernel)
930 {
931 	VMArea* area;
932 	VMCache* cache;
933 	vm_page* page = NULL;
934 	bool isStack = (protection & B_STACK_AREA) != 0;
935 	page_num_t guardPages;
936 	bool canOvercommit = false;
937 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
938 		? VM_PAGE_ALLOC_CLEAR : 0;
939 
940 	TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size));
941 
942 	size = PAGE_ALIGN(size);
943 
944 	if (size == 0)
945 		return B_BAD_VALUE;
946 	if (!arch_vm_supports_protection(protection))
947 		return B_NOT_SUPPORTED;
948 
949 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
950 		canOvercommit = true;
951 
952 #ifdef DEBUG_KERNEL_STACKS
953 	if ((protection & B_KERNEL_STACK_AREA) != 0)
954 		isStack = true;
955 #endif
956 
957 	// check parameters
958 	switch (addressSpec) {
959 		case B_ANY_ADDRESS:
960 		case B_EXACT_ADDRESS:
961 		case B_BASE_ADDRESS:
962 		case B_ANY_KERNEL_ADDRESS:
963 		case B_ANY_KERNEL_BLOCK_ADDRESS:
964 			break;
965 		case B_PHYSICAL_BASE_ADDRESS:
966 			physicalAddress = (addr_t)*address;
967 			addressSpec = B_ANY_KERNEL_ADDRESS;
968 			break;
969 
970 		default:
971 			return B_BAD_VALUE;
972 	}
973 
974 	if (physicalAddress != 0)
975 		wiring = B_CONTIGUOUS;
976 
977 	bool doReserveMemory = false;
978 	switch (wiring) {
979 		case B_NO_LOCK:
980 			break;
981 		case B_FULL_LOCK:
982 		case B_LAZY_LOCK:
983 		case B_CONTIGUOUS:
984 			doReserveMemory = true;
985 			break;
986 		case B_ALREADY_WIRED:
987 			break;
988 		case B_LOMEM:
989 		//case B_SLOWMEM:
990 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
991 			wiring = B_FULL_LOCK;
992 			doReserveMemory = true;
993 			break;
994 		default:
995 			return B_BAD_VALUE;
996 	}
997 
998 	// For full lock or contiguous areas we're also going to map the pages and
999 	// thus need to reserve pages for the mapping backend upfront.
1000 	addr_t reservedMapPages = 0;
1001 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1002 		AddressSpaceWriteLocker locker;
1003 		status_t status = locker.SetTo(team);
1004 		if (status != B_OK)
1005 			return status;
1006 
1007 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1008 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1009 	}
1010 
1011 	int priority;
1012 	if (team != VMAddressSpace::KernelID())
1013 		priority = VM_PRIORITY_USER;
1014 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1015 		priority = VM_PRIORITY_VIP;
1016 	else
1017 		priority = VM_PRIORITY_SYSTEM;
1018 
1019 	// Reserve memory before acquiring the address space lock. This reduces the
1020 	// chances of failure, since while holding the write lock to the address
1021 	// space (if it is the kernel address space that is), the low memory handler
1022 	// won't be able to free anything for us.
1023 	addr_t reservedMemory = 0;
1024 	if (doReserveMemory) {
1025 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1026 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1027 			return B_NO_MEMORY;
1028 		reservedMemory = size;
1029 		// TODO: We don't reserve the memory for the pages for the page
1030 		// directories/tables. We actually need to do since we currently don't
1031 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1032 		// there are actually less physical pages than there should be, which
1033 		// can get the VM into trouble in low memory situations.
1034 	}
1035 
1036 	AddressSpaceWriteLocker locker;
1037 	VMAddressSpace* addressSpace;
1038 	status_t status;
1039 
1040 	// For full lock areas reserve the pages before locking the address
1041 	// space. E.g. block caches can't release their memory while we hold the
1042 	// address space lock.
1043 	page_num_t reservedPages = reservedMapPages;
1044 	if (wiring == B_FULL_LOCK)
1045 		reservedPages += size / B_PAGE_SIZE;
1046 
1047 	vm_page_reservation reservation;
1048 	if (reservedPages > 0) {
1049 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1050 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1051 					priority)) {
1052 				reservedPages = 0;
1053 				status = B_WOULD_BLOCK;
1054 				goto err0;
1055 			}
1056 		} else
1057 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1058 	}
1059 
1060 	status = locker.SetTo(team);
1061 	if (status != B_OK)
1062 		goto err0;
1063 
1064 	addressSpace = locker.AddressSpace();
1065 
1066 	if (wiring == B_CONTIGUOUS) {
1067 		// we try to allocate the page run here upfront as this may easily
1068 		// fail for obvious reasons
1069 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1070 			physicalAddress, size / B_PAGE_SIZE, priority);
1071 		if (page == NULL) {
1072 			status = B_NO_MEMORY;
1073 			goto err0;
1074 		}
1075 	}
1076 
1077 	// create an anonymous cache
1078 	// if it's a stack, make sure that two pages are available at least
1079 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1080 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1081 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1082 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1083 		wiring == B_NO_LOCK, priority);
1084 	if (status != B_OK)
1085 		goto err1;
1086 
1087 	cache->temporary = 1;
1088 	cache->virtual_end = size;
1089 	cache->committed_size = reservedMemory;
1090 		// TODO: This should be done via a method.
1091 	reservedMemory = 0;
1092 
1093 	switch (wiring) {
1094 		case B_LAZY_LOCK:
1095 		case B_FULL_LOCK:
1096 		case B_CONTIGUOUS:
1097 		case B_ALREADY_WIRED:
1098 			cache->scan_skip = 1;
1099 			break;
1100 		case B_NO_LOCK:
1101 			cache->scan_skip = 0;
1102 			break;
1103 	}
1104 
1105 	cache->Lock();
1106 
1107 	status = map_backing_store(addressSpace, cache, address, 0, size,
1108 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name,
1109 		flags, kernel);
1110 
1111 	if (status != B_OK) {
1112 		cache->ReleaseRefAndUnlock();
1113 		goto err1;
1114 	}
1115 
1116 	locker.DegradeToReadLock();
1117 
1118 	switch (wiring) {
1119 		case B_NO_LOCK:
1120 		case B_LAZY_LOCK:
1121 			// do nothing - the pages are mapped in as needed
1122 			break;
1123 
1124 		case B_FULL_LOCK:
1125 		{
1126 			// Allocate and map all pages for this area
1127 
1128 			off_t offset = 0;
1129 			for (addr_t address = area->Base();
1130 					address < area->Base() + (area->Size() - 1);
1131 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1132 #ifdef DEBUG_KERNEL_STACKS
1133 #	ifdef STACK_GROWS_DOWNWARDS
1134 				if (isStack && address < area->Base()
1135 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1136 #	else
1137 				if (isStack && address >= area->Base() + area->Size()
1138 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1139 #	endif
1140 					continue;
1141 #endif
1142 				vm_page* page = vm_page_allocate_page(&reservation,
1143 					PAGE_STATE_WIRED | pageAllocFlags);
1144 				cache->InsertPage(page, offset);
1145 				map_page(area, page, address, protection, &reservation);
1146 
1147 				DEBUG_PAGE_ACCESS_END(page);
1148 			}
1149 
1150 			break;
1151 		}
1152 
1153 		case B_ALREADY_WIRED:
1154 		{
1155 			// The pages should already be mapped. This is only really useful
1156 			// during boot time. Find the appropriate vm_page objects and stick
1157 			// them in the cache object.
1158 			VMTranslationMap* map = addressSpace->TranslationMap();
1159 			off_t offset = 0;
1160 
1161 			if (!gKernelStartup)
1162 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1163 
1164 			map->Lock();
1165 
1166 			for (addr_t virtualAddress = area->Base();
1167 					virtualAddress < area->Base() + (area->Size() - 1);
1168 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1169 				addr_t physicalAddress;
1170 				uint32 flags;
1171 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1172 				if (status < B_OK) {
1173 					panic("looking up mapping failed for va 0x%lx\n",
1174 						virtualAddress);
1175 				}
1176 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1177 				if (page == NULL) {
1178 					panic("looking up page failed for pa 0x%lx\n",
1179 						physicalAddress);
1180 				}
1181 
1182 				DEBUG_PAGE_ACCESS_START(page);
1183 
1184 				increment_page_wired_count(page);
1185 				cache->InsertPage(page, offset);
1186 				vm_page_set_state(page, PAGE_STATE_WIRED);
1187 				page->busy = false;
1188 
1189 				DEBUG_PAGE_ACCESS_END(page);
1190 			}
1191 
1192 			map->Unlock();
1193 			break;
1194 		}
1195 
1196 		case B_CONTIGUOUS:
1197 		{
1198 			// We have already allocated our continuous pages run, so we can now
1199 			// just map them in the address space
1200 			VMTranslationMap* map = addressSpace->TranslationMap();
1201 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
1202 			addr_t virtualAddress = area->Base();
1203 			off_t offset = 0;
1204 
1205 			map->Lock();
1206 
1207 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1208 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1209 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1210 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1211 				if (page == NULL)
1212 					panic("couldn't lookup physical page just allocated\n");
1213 
1214 				status = map->Map(virtualAddress, physicalAddress, protection,
1215 					&reservation);
1216 				if (status < B_OK)
1217 					panic("couldn't map physical page in page run\n");
1218 
1219 				increment_page_wired_count(page);
1220 				cache->InsertPage(page, offset);
1221 
1222 				DEBUG_PAGE_ACCESS_END(page);
1223 			}
1224 
1225 			map->Unlock();
1226 			break;
1227 		}
1228 
1229 		default:
1230 			break;
1231 	}
1232 
1233 	cache->Unlock();
1234 
1235 	if (reservedPages > 0)
1236 		vm_page_unreserve_pages(&reservation);
1237 
1238 	TRACE(("vm_create_anonymous_area: done\n"));
1239 
1240 	area->cache_type = CACHE_TYPE_RAM;
1241 	return area->id;
1242 
1243 err1:
1244 	if (wiring == B_CONTIGUOUS) {
1245 		// we had reserved the area space upfront...
1246 		addr_t pageNumber = page->physical_page_number;
1247 		int32 i;
1248 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1249 			page = vm_lookup_page(pageNumber);
1250 			if (page == NULL)
1251 				panic("couldn't lookup physical page just allocated\n");
1252 
1253 			vm_page_set_state(page, PAGE_STATE_FREE);
1254 		}
1255 	}
1256 
1257 err0:
1258 	if (reservedPages > 0)
1259 		vm_page_unreserve_pages(&reservation);
1260 	if (reservedMemory > 0)
1261 		vm_unreserve_memory(reservedMemory);
1262 
1263 	return status;
1264 }
1265 
1266 
1267 area_id
1268 vm_map_physical_memory(team_id team, const char* name, void** _address,
1269 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress,
1270 	bool alreadyWired)
1271 {
1272 	VMArea* area;
1273 	VMCache* cache;
1274 	addr_t mapOffset;
1275 
1276 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1277 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
1278 		name, _address, addressSpec, size, protection, physicalAddress));
1279 
1280 	if (!arch_vm_supports_protection(protection))
1281 		return B_NOT_SUPPORTED;
1282 
1283 	AddressSpaceWriteLocker locker(team);
1284 	if (!locker.IsLocked())
1285 		return B_BAD_TEAM_ID;
1286 
1287 	// if the physical address is somewhat inside a page,
1288 	// move the actual area down to align on a page boundary
1289 	mapOffset = physicalAddress % B_PAGE_SIZE;
1290 	size += mapOffset;
1291 	physicalAddress -= mapOffset;
1292 
1293 	size = PAGE_ALIGN(size);
1294 
1295 	// create a device cache
1296 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1297 	if (status != B_OK)
1298 		return status;
1299 
1300 	// tell the page scanner to skip over this area, it's pages are special
1301 	cache->scan_skip = 1;
1302 	cache->virtual_end = size;
1303 
1304 	cache->Lock();
1305 
1306 	status = map_backing_store(locker.AddressSpace(), cache, _address,
1307 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
1308 		REGION_NO_PRIVATE_MAP, &area, name, 0, true);
1309 
1310 	if (status < B_OK)
1311 		cache->ReleaseRefLocked();
1312 
1313 	cache->Unlock();
1314 
1315 	if (status == B_OK) {
1316 		// set requested memory type -- use uncached, if not given
1317 		uint32 memoryType = addressSpec & B_MTR_MASK;
1318 		if (memoryType == 0)
1319 			memoryType = B_MTR_UC;
1320 
1321 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1322 		if (status != B_OK)
1323 			delete_area(locker.AddressSpace(), area, false);
1324 	}
1325 
1326 	if (status >= B_OK && !alreadyWired) {
1327 		// make sure our area is mapped in completely
1328 
1329 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1330 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1331 			area->Base() + (size - 1));
1332 
1333 		vm_page_reservation reservation;
1334 		vm_page_reserve_pages(&reservation, reservePages,
1335 			team == VMAddressSpace::KernelID()
1336 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1337 		map->Lock();
1338 
1339 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1340 			map->Map(area->Base() + offset, physicalAddress + offset,
1341 				protection, &reservation);
1342 		}
1343 
1344 		map->Unlock();
1345 		vm_page_unreserve_pages(&reservation);
1346 	}
1347 
1348 	if (status < B_OK)
1349 		return status;
1350 
1351 	// modify the pointer returned to be offset back into the new area
1352 	// the same way the physical address in was offset
1353 	*_address = (void*)((addr_t)*_address + mapOffset);
1354 
1355 	area->cache_type = CACHE_TYPE_DEVICE;
1356 	return area->id;
1357 }
1358 
1359 
1360 /*!	Don't use!
1361 	TODO: This function was introduced to map physical page vecs to
1362 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1363 	use a device cache and does not track vm_page::wired_count!
1364 */
1365 area_id
1366 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1367 	uint32 addressSpec, addr_t* _size, uint32 protection, struct iovec* vecs,
1368 	uint32 vecCount)
1369 {
1370 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
1371 		"spec = %ld, _size = %p, protection = %ld, vecs = %p, "
1372 		"vecCount = %ld)\n", team, name, _address, addressSpec, _size,
1373 		protection, vecs, vecCount));
1374 
1375 	if (!arch_vm_supports_protection(protection)
1376 		|| (addressSpec & B_MTR_MASK) != 0) {
1377 		return B_NOT_SUPPORTED;
1378 	}
1379 
1380 	AddressSpaceWriteLocker locker(team);
1381 	if (!locker.IsLocked())
1382 		return B_BAD_TEAM_ID;
1383 
1384 	if (vecCount == 0)
1385 		return B_BAD_VALUE;
1386 
1387 	addr_t size = 0;
1388 	for (uint32 i = 0; i < vecCount; i++) {
1389 		if ((addr_t)vecs[i].iov_base % B_PAGE_SIZE != 0
1390 			|| vecs[i].iov_len % B_PAGE_SIZE != 0) {
1391 			return B_BAD_VALUE;
1392 		}
1393 
1394 		size += vecs[i].iov_len;
1395 	}
1396 
1397 	// create a device cache
1398 	VMCache* cache;
1399 	status_t result = VMCacheFactory::CreateDeviceCache(cache,
1400 		(addr_t)vecs[0].iov_base);
1401 	if (result != B_OK)
1402 		return result;
1403 
1404 	// tell the page scanner to skip over this area, it's pages are special
1405 	cache->scan_skip = 1;
1406 	cache->virtual_end = size;
1407 
1408 	cache->Lock();
1409 
1410 	VMArea* area;
1411 	result = map_backing_store(locker.AddressSpace(), cache, _address,
1412 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
1413 		REGION_NO_PRIVATE_MAP, &area, name, 0, true);
1414 
1415 	if (result != B_OK)
1416 		cache->ReleaseRefLocked();
1417 
1418 	cache->Unlock();
1419 
1420 	if (result != B_OK)
1421 		return result;
1422 
1423 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1424 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1425 		area->Base() + (size - 1));
1426 
1427 	vm_page_reservation reservation;
1428 	vm_page_reserve_pages(&reservation, reservePages,
1429 			team == VMAddressSpace::KernelID()
1430 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1431 	map->Lock();
1432 
1433 	uint32 vecIndex = 0;
1434 	size_t vecOffset = 0;
1435 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1436 		while (vecOffset >= vecs[vecIndex].iov_len && vecIndex < vecCount) {
1437 			vecOffset = 0;
1438 			vecIndex++;
1439 		}
1440 
1441 		if (vecIndex >= vecCount)
1442 			break;
1443 
1444 		map->Map(area->Base() + offset,
1445 			(addr_t)vecs[vecIndex].iov_base + vecOffset, protection,
1446 			&reservation);
1447 
1448 		vecOffset += B_PAGE_SIZE;
1449 	}
1450 
1451 	map->Unlock();
1452 	vm_page_unreserve_pages(&reservation);
1453 
1454 	if (_size != NULL)
1455 		*_size = size;
1456 
1457 	area->cache_type = CACHE_TYPE_DEVICE;
1458 	return area->id;
1459 }
1460 
1461 
1462 area_id
1463 vm_create_null_area(team_id team, const char* name, void** address,
1464 	uint32 addressSpec, addr_t size, uint32 flags)
1465 {
1466 	AddressSpaceWriteLocker locker(team);
1467 	if (!locker.IsLocked())
1468 		return B_BAD_TEAM_ID;
1469 
1470 	size = PAGE_ALIGN(size);
1471 
1472 	// create a null cache
1473 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1474 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1475 	VMCache* cache;
1476 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1477 	if (status != B_OK)
1478 		return status;
1479 
1480 	// tell the page scanner to skip over this area, no pages will be mapped
1481 	// here
1482 	cache->scan_skip = 1;
1483 	cache->virtual_end = size;
1484 
1485 	cache->Lock();
1486 
1487 	VMArea* area;
1488 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
1489 		addressSpec, B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP,
1490 		&area, name, flags, true);
1491 
1492 	if (status < B_OK) {
1493 		cache->ReleaseRefAndUnlock();
1494 		return status;
1495 	}
1496 
1497 	cache->Unlock();
1498 
1499 	area->cache_type = CACHE_TYPE_NULL;
1500 	return area->id;
1501 }
1502 
1503 
1504 /*!	Creates the vnode cache for the specified \a vnode.
1505 	The vnode has to be marked busy when calling this function.
1506 */
1507 status_t
1508 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1509 {
1510 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1511 }
1512 
1513 
1514 /*!	\a cache must be locked. The area's address space must be read-locked.
1515 */
1516 static void
1517 pre_map_area_pages(VMArea* area, VMCache* cache,
1518 	vm_page_reservation* reservation)
1519 {
1520 	addr_t baseAddress = area->Base();
1521 	addr_t cacheOffset = area->cache_offset;
1522 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1523 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1524 
1525 	for (VMCachePagesTree::Iterator it
1526 				= cache->pages.GetIterator(firstPage, true, true);
1527 			vm_page* page = it.Next();) {
1528 		if (page->cache_offset >= endPage)
1529 			break;
1530 
1531 		// skip busy and inactive pages
1532 		if (page->busy || page->usage_count == 0)
1533 			continue;
1534 
1535 		DEBUG_PAGE_ACCESS_START(page);
1536 		map_page(area, page,
1537 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1538 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1539 		DEBUG_PAGE_ACCESS_END(page);
1540 	}
1541 }
1542 
1543 
1544 /*!	Will map the file specified by \a fd to an area in memory.
1545 	The file will be mirrored beginning at the specified \a offset. The
1546 	\a offset and \a size arguments have to be page aligned.
1547 */
1548 static area_id
1549 _vm_map_file(team_id team, const char* name, void** _address,
1550 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1551 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1552 {
1553 	// TODO: for binary files, we want to make sure that they get the
1554 	//	copy of a file at a given time, ie. later changes should not
1555 	//	make it into the mapped copy -- this will need quite some changes
1556 	//	to be done in a nice way
1557 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
1558 		fd, offset, size, mapping));
1559 
1560 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1561 	size = PAGE_ALIGN(size);
1562 
1563 	if (mapping == REGION_NO_PRIVATE_MAP)
1564 		protection |= B_SHARED_AREA;
1565 	if (addressSpec != B_EXACT_ADDRESS)
1566 		unmapAddressRange = false;
1567 
1568 	if (fd < 0) {
1569 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1570 		return vm_create_anonymous_area(team, name, _address, addressSpec, size,
1571 			B_NO_LOCK, protection, 0, flags, kernel);
1572 	}
1573 
1574 	// get the open flags of the FD
1575 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1576 	if (descriptor == NULL)
1577 		return EBADF;
1578 	int32 openMode = descriptor->open_mode;
1579 	put_fd(descriptor);
1580 
1581 	// The FD must open for reading at any rate. For shared mapping with write
1582 	// access, additionally the FD must be open for writing.
1583 	if ((openMode & O_ACCMODE) == O_WRONLY
1584 		|| (mapping == REGION_NO_PRIVATE_MAP
1585 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1586 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1587 		return EACCES;
1588 	}
1589 
1590 	// get the vnode for the object, this also grabs a ref to it
1591 	struct vnode* vnode = NULL;
1592 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1593 	if (status < B_OK)
1594 		return status;
1595 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1596 
1597 	// If we're going to pre-map pages, we need to reserve the pages needed by
1598 	// the mapping backend upfront.
1599 	page_num_t reservedPreMapPages = 0;
1600 	vm_page_reservation reservation;
1601 	if ((protection & B_READ_AREA) != 0) {
1602 		AddressSpaceWriteLocker locker;
1603 		status = locker.SetTo(team);
1604 		if (status != B_OK)
1605 			return status;
1606 
1607 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1608 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1609 
1610 		locker.Unlock();
1611 
1612 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1613 			team == VMAddressSpace::KernelID()
1614 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1615 	}
1616 
1617 	struct PageUnreserver {
1618 		PageUnreserver(vm_page_reservation* reservation)
1619 			:
1620 			fReservation(reservation)
1621 		{
1622 		}
1623 
1624 		~PageUnreserver()
1625 		{
1626 			if (fReservation != NULL)
1627 				vm_page_unreserve_pages(fReservation);
1628 		}
1629 
1630 		vm_page_reservation* fReservation;
1631 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1632 
1633 	AddressSpaceWriteLocker locker(team);
1634 	if (!locker.IsLocked())
1635 		return B_BAD_TEAM_ID;
1636 
1637 	// TODO: this only works for file systems that use the file cache
1638 	VMCache* cache;
1639 	status = vfs_get_vnode_cache(vnode, &cache, false);
1640 	if (status < B_OK)
1641 		return status;
1642 
1643 	cache->Lock();
1644 
1645 	VMArea* area;
1646 	status = map_backing_store(locker.AddressSpace(), cache, _address,
1647 		offset, size, addressSpec, 0, protection, mapping, &area, name,
1648 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, kernel);
1649 
1650 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1651 		// map_backing_store() cannot know we no longer need the ref
1652 		cache->ReleaseRefLocked();
1653 	}
1654 
1655 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1656 		pre_map_area_pages(area, cache, &reservation);
1657 
1658 	cache->Unlock();
1659 
1660 	if (status == B_OK) {
1661 		// TODO: this probably deserves a smarter solution, ie. don't always
1662 		// prefetch stuff, and also, probably don't trigger it at this place.
1663 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1664 			// prefetches at max 10 MB starting from "offset"
1665 	}
1666 
1667 	if (status != B_OK)
1668 		return status;
1669 
1670 	area->cache_type = CACHE_TYPE_VNODE;
1671 	return area->id;
1672 }
1673 
1674 
1675 area_id
1676 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1677 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
1678 	int fd, off_t offset)
1679 {
1680 	if (!arch_vm_supports_protection(protection))
1681 		return B_NOT_SUPPORTED;
1682 
1683 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
1684 		mapping, unmapAddressRange, fd, offset, true);
1685 }
1686 
1687 
1688 VMCache*
1689 vm_area_get_locked_cache(VMArea* area)
1690 {
1691 	rw_lock_read_lock(&sAreaCacheLock);
1692 
1693 	while (true) {
1694 		VMCache* cache = area->cache;
1695 
1696 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
1697 			// cache has been deleted
1698 			rw_lock_read_lock(&sAreaCacheLock);
1699 			continue;
1700 		}
1701 
1702 		rw_lock_read_lock(&sAreaCacheLock);
1703 
1704 		if (cache == area->cache) {
1705 			cache->AcquireRefLocked();
1706 			rw_lock_read_unlock(&sAreaCacheLock);
1707 			return cache;
1708 		}
1709 
1710 		// the cache changed in the meantime
1711 		cache->Unlock();
1712 	}
1713 }
1714 
1715 
1716 void
1717 vm_area_put_locked_cache(VMCache* cache)
1718 {
1719 	cache->ReleaseRefAndUnlock();
1720 }
1721 
1722 
1723 area_id
1724 vm_clone_area(team_id team, const char* name, void** address,
1725 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
1726 	bool kernel)
1727 {
1728 	VMArea* newArea = NULL;
1729 	VMArea* sourceArea;
1730 
1731 	// Check whether the source area exists and is cloneable. If so, mark it
1732 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
1733 	{
1734 		AddressSpaceWriteLocker locker;
1735 		status_t status = locker.SetFromArea(sourceID, sourceArea);
1736 		if (status != B_OK)
1737 			return status;
1738 
1739 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
1740 			return B_NOT_ALLOWED;
1741 
1742 		sourceArea->protection |= B_SHARED_AREA;
1743 		protection |= B_SHARED_AREA;
1744 	}
1745 
1746 	// Now lock both address spaces and actually do the cloning.
1747 
1748 	MultiAddressSpaceLocker locker;
1749 	VMAddressSpace* sourceAddressSpace;
1750 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
1751 	if (status != B_OK)
1752 		return status;
1753 
1754 	VMAddressSpace* targetAddressSpace;
1755 	status = locker.AddTeam(team, true, &targetAddressSpace);
1756 	if (status != B_OK)
1757 		return status;
1758 
1759 	status = locker.Lock();
1760 	if (status != B_OK)
1761 		return status;
1762 
1763 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
1764 	if (sourceArea == NULL)
1765 		return B_BAD_VALUE;
1766 
1767 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
1768 		return B_NOT_ALLOWED;
1769 
1770 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
1771 
1772 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
1773 	//	have been adapted. Maybe it should be part of the kernel settings,
1774 	//	anyway (so that old drivers can always work).
1775 #if 0
1776 	if (sourceArea->aspace == VMAddressSpace::Kernel()
1777 		&& addressSpace != VMAddressSpace::Kernel()
1778 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1779 		// kernel areas must not be cloned in userland, unless explicitly
1780 		// declared user-cloneable upon construction
1781 		status = B_NOT_ALLOWED;
1782 	} else
1783 #endif
1784 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
1785 		status = B_NOT_ALLOWED;
1786 	else {
1787 		status = map_backing_store(targetAddressSpace, cache, address,
1788 			sourceArea->cache_offset, sourceArea->Size(), addressSpec,
1789 			sourceArea->wiring, protection, mapping, &newArea, name, 0, kernel);
1790 	}
1791 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
1792 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
1793 		// to create a new cache, and has therefore already acquired a reference
1794 		// to the source cache - but otherwise it has no idea that we need
1795 		// one.
1796 		cache->AcquireRefLocked();
1797 	}
1798 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
1799 		// we need to map in everything at this point
1800 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
1801 			// we don't have actual pages to map but a physical area
1802 			VMTranslationMap* map
1803 				= sourceArea->address_space->TranslationMap();
1804 			map->Lock();
1805 
1806 			addr_t physicalAddress;
1807 			uint32 oldProtection;
1808 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
1809 
1810 			map->Unlock();
1811 
1812 			map = targetAddressSpace->TranslationMap();
1813 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
1814 				newArea->Base() + (newArea->Size() - 1));
1815 
1816 			vm_page_reservation reservation;
1817 			vm_page_reserve_pages(&reservation, reservePages,
1818 				targetAddressSpace == VMAddressSpace::Kernel()
1819 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1820 			map->Lock();
1821 
1822 			for (addr_t offset = 0; offset < newArea->Size();
1823 					offset += B_PAGE_SIZE) {
1824 				map->Map(newArea->Base() + offset, physicalAddress + offset,
1825 					protection, &reservation);
1826 			}
1827 
1828 			map->Unlock();
1829 			vm_page_unreserve_pages(&reservation);
1830 		} else {
1831 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
1832 			size_t reservePages = map->MaxPagesNeededToMap(
1833 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
1834 			vm_page_reservation reservation;
1835 			vm_page_reserve_pages(&reservation, reservePages,
1836 				targetAddressSpace == VMAddressSpace::Kernel()
1837 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1838 
1839 			// map in all pages from source
1840 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
1841 					vm_page* page  = it.Next();) {
1842 				if (!page->busy) {
1843 					DEBUG_PAGE_ACCESS_START(page);
1844 					map_page(newArea, page,
1845 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
1846 							- newArea->cache_offset),
1847 						protection, &reservation);
1848 					DEBUG_PAGE_ACCESS_END(page);
1849 				}
1850 			}
1851 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
1852 			// ensuring that!
1853 
1854 			vm_page_unreserve_pages(&reservation);
1855 		}
1856 	}
1857 	if (status == B_OK)
1858 		newArea->cache_type = sourceArea->cache_type;
1859 
1860 	vm_area_put_locked_cache(cache);
1861 
1862 	if (status < B_OK)
1863 		return status;
1864 
1865 	return newArea->id;
1866 }
1867 
1868 
1869 static void
1870 delete_area(VMAddressSpace* addressSpace, VMArea* area,
1871 	bool deletingAddressSpace)
1872 {
1873 	VMAreaHash::Remove(area);
1874 
1875 	// At this point the area is removed from the global hash table, but
1876 	// still exists in the area list.
1877 
1878 	// Unmap the virtual address space the area occupied.
1879 	{
1880 		// We need to lock the complete cache chain.
1881 		VMCache* topCache = vm_area_get_locked_cache(area);
1882 		VMCacheChainLocker cacheChainLocker(topCache);
1883 		cacheChainLocker.LockAllSourceCaches();
1884 
1885 		// If the area's top cache is a temporary cache and the area is the only
1886 		// one referencing it (besides us currently holding a second reference),
1887 		// the unmapping code doesn't need to care about preserving the accessed
1888 		// and dirty flags of the top cache page mappings.
1889 		bool ignoreTopCachePageFlags
1890 			= topCache->temporary && topCache->RefCount() == 2;
1891 
1892 		area->address_space->TranslationMap()->UnmapArea(area,
1893 			deletingAddressSpace, ignoreTopCachePageFlags);
1894 	}
1895 
1896 	if (!area->cache->temporary)
1897 		area->cache->WriteModified();
1898 
1899 	arch_vm_unset_memory_type(area);
1900 	addressSpace->RemoveArea(area, 0);
1901 	addressSpace->Put();
1902 
1903 	area->cache->RemoveArea(area);
1904 	area->cache->ReleaseRef();
1905 
1906 	addressSpace->DeleteArea(area, 0);
1907 }
1908 
1909 
1910 status_t
1911 vm_delete_area(team_id team, area_id id, bool kernel)
1912 {
1913 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
1914 
1915 	AddressSpaceWriteLocker locker;
1916 	VMArea* area;
1917 	status_t status = locker.SetFromArea(team, id, area);
1918 	if (status != B_OK)
1919 		return status;
1920 
1921 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
1922 		return B_NOT_ALLOWED;
1923 
1924 	delete_area(locker.AddressSpace(), area, false);
1925 	return B_OK;
1926 }
1927 
1928 
1929 /*!	Creates a new cache on top of given cache, moves all areas from
1930 	the old cache to the new one, and changes the protection of all affected
1931 	areas' pages to read-only.
1932 	Preconditions:
1933 	- The given cache must be locked.
1934 	- All of the cache's areas' address spaces must be read locked.
1935 */
1936 static status_t
1937 vm_copy_on_write_area(VMCache* lowerCache)
1938 {
1939 	VMCache* upperCache;
1940 
1941 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
1942 
1943 	// We need to separate the cache from its areas. The cache goes one level
1944 	// deeper and we create a new cache inbetween.
1945 
1946 	// create an anonymous cache
1947 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
1948 		0, true, VM_PRIORITY_USER);
1949 	if (status != B_OK)
1950 		return status;
1951 
1952 	upperCache->Lock();
1953 
1954 	upperCache->temporary = 1;
1955 	upperCache->scan_skip = lowerCache->scan_skip;
1956 	upperCache->virtual_base = lowerCache->virtual_base;
1957 	upperCache->virtual_end = lowerCache->virtual_end;
1958 
1959 	// transfer the lower cache areas to the upper cache
1960 	rw_lock_write_lock(&sAreaCacheLock);
1961 	upperCache->TransferAreas(lowerCache);
1962 	rw_lock_write_unlock(&sAreaCacheLock);
1963 
1964 	lowerCache->AddConsumer(upperCache);
1965 
1966 	// We now need to remap all pages from all of the cache's areas read-only, so
1967 	// that a copy will be created on next write access
1968 
1969 	for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
1970 			tempArea = tempArea->cache_next) {
1971 		// The area must be readable in the same way it was previously writable
1972 		uint32 protection = B_KERNEL_READ_AREA;
1973 		if ((tempArea->protection & B_READ_AREA) != 0)
1974 			protection |= B_READ_AREA;
1975 
1976 		VMTranslationMap* map = tempArea->address_space->TranslationMap();
1977 		map->Lock();
1978 		map->ProtectArea(tempArea, protection);
1979 		map->Unlock();
1980 	}
1981 
1982 	vm_area_put_locked_cache(upperCache);
1983 
1984 	return B_OK;
1985 }
1986 
1987 
1988 area_id
1989 vm_copy_area(team_id team, const char* name, void** _address,
1990 	uint32 addressSpec, uint32 protection, area_id sourceID)
1991 {
1992 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
1993 
1994 	if ((protection & B_KERNEL_PROTECTION) == 0) {
1995 		// set the same protection for the kernel as for userland
1996 		protection |= B_KERNEL_READ_AREA;
1997 		if (writableCopy)
1998 			protection |= B_KERNEL_WRITE_AREA;
1999 	}
2000 
2001 	// Do the locking: target address space, all address spaces associated with
2002 	// the source cache, and the cache itself.
2003 	MultiAddressSpaceLocker locker;
2004 	VMAddressSpace* targetAddressSpace;
2005 	VMCache* cache;
2006 	VMArea* source;
2007 	status_t status = locker.AddTeam(team, true, &targetAddressSpace);
2008 	if (status == B_OK) {
2009 		status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2010 			&cache);
2011 	}
2012 	if (status != B_OK)
2013 		return status;
2014 
2015 	AreaCacheLocker cacheLocker(cache);	// already locked
2016 
2017 	if (addressSpec == B_CLONE_ADDRESS) {
2018 		addressSpec = B_EXACT_ADDRESS;
2019 		*_address = (void*)source->Base();
2020 	}
2021 
2022 	bool sharedArea = (source->protection & B_SHARED_AREA) != 0;
2023 
2024 	// First, create a cache on top of the source area, respectively use the
2025 	// existing one, if this is a shared area.
2026 
2027 	VMArea* target;
2028 	status = map_backing_store(targetAddressSpace, cache, _address,
2029 		source->cache_offset, source->Size(), addressSpec, source->wiring,
2030 		protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2031 		&target, name, 0, true);
2032 	if (status < B_OK)
2033 		return status;
2034 
2035 	if (sharedArea) {
2036 		// The new area uses the old area's cache, but map_backing_store()
2037 		// hasn't acquired a ref. So we have to do that now.
2038 		cache->AcquireRefLocked();
2039 	}
2040 
2041 	// If the source area is writable, we need to move it one layer up as well
2042 
2043 	if (!sharedArea) {
2044 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2045 			// TODO: do something more useful if this fails!
2046 			if (vm_copy_on_write_area(cache) < B_OK)
2047 				panic("vm_copy_on_write_area() failed!\n");
2048 		}
2049 	}
2050 
2051 	// we return the ID of the newly created area
2052 	return target->id;
2053 }
2054 
2055 
2056 static status_t
2057 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2058 	bool kernel)
2059 {
2060 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2061 		"%#lx)\n", team, areaID, newProtection));
2062 
2063 	if (!arch_vm_supports_protection(newProtection))
2064 		return B_NOT_SUPPORTED;
2065 
2066 	// lock address spaces and cache
2067 	MultiAddressSpaceLocker locker;
2068 	VMCache* cache;
2069 	VMArea* area;
2070 	status_t status = locker.AddAreaCacheAndLock(areaID, true, false, area,
2071 		&cache);
2072 	AreaCacheLocker cacheLocker(cache);	// already locked
2073 
2074 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2075 		return B_NOT_ALLOWED;
2076 
2077 	if (area->protection == newProtection)
2078 		return B_OK;
2079 
2080 	if (team != VMAddressSpace::KernelID()
2081 		&& area->address_space->ID() != team) {
2082 		// unless you're the kernel, you are only allowed to set
2083 		// the protection of your own areas
2084 		return B_NOT_ALLOWED;
2085 	}
2086 
2087 	bool changePageProtection = true;
2088 	bool changeTopCachePagesOnly = false;
2089 
2090 	if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2091 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0) {
2092 		// writable -> !writable
2093 
2094 		if (cache->source != NULL && cache->temporary) {
2095 			if (cache->CountWritableAreas(area) == 0) {
2096 				// Since this cache now lives from the pages in its source cache,
2097 				// we can change the cache's commitment to take only those pages
2098 				// into account that really are in this cache.
2099 
2100 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2101 					team == VMAddressSpace::KernelID()
2102 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2103 
2104 				// TODO: we may be able to join with our source cache, if
2105 				// count == 0
2106 			}
2107 		}
2108 
2109 		// If only the writability changes, we can just remap the pages of the
2110 		// top cache, since the pages of lower caches are mapped read-only
2111 		// anyway. That's advantageous only, if the number of pages in the cache
2112 		// is significantly smaller than the number of pages in the area,
2113 		// though.
2114 		if (newProtection
2115 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2116 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2117 			changeTopCachePagesOnly = true;
2118 		}
2119 	} else if ((area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) == 0
2120 		&& (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
2121 		// !writable -> writable
2122 
2123 		if (!list_is_empty(&cache->consumers)) {
2124 			// There are consumers -- we have to insert a new cache. Fortunately
2125 			// vm_copy_on_write_area() does everything that's needed.
2126 			changePageProtection = false;
2127 			status = vm_copy_on_write_area(cache);
2128 		} else {
2129 			// No consumers, so we don't need to insert a new one.
2130 			if (cache->source != NULL && cache->temporary) {
2131 				// the cache's commitment must contain all possible pages
2132 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2133 					team == VMAddressSpace::KernelID()
2134 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2135 			}
2136 
2137 			if (status == B_OK && cache->source != NULL) {
2138 				// There's a source cache, hence we can't just change all pages'
2139 				// protection or we might allow writing into pages belonging to
2140 				// a lower cache.
2141 				changeTopCachePagesOnly = true;
2142 			}
2143 		}
2144 	} else {
2145 		// we don't have anything special to do in all other cases
2146 	}
2147 
2148 	if (status == B_OK) {
2149 		// remap existing pages in this cache
2150 		if (changePageProtection) {
2151 			VMTranslationMap* map = area->address_space->TranslationMap();
2152 			map->Lock();
2153 
2154 			if (changeTopCachePagesOnly) {
2155 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2156 				page_num_t lastPageOffset
2157 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2158 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2159 						vm_page* page = it.Next();) {
2160 					if (page->cache_offset >= firstPageOffset
2161 						&& page->cache_offset <= lastPageOffset) {
2162 						addr_t address = virtual_page_address(area, page);
2163 						map->ProtectPage(area, address, newProtection);
2164 					}
2165 				}
2166 			} else
2167 				map->ProtectArea(area, newProtection);
2168 
2169 			map->Unlock();
2170 		}
2171 
2172 		area->protection = newProtection;
2173 	}
2174 
2175 	return status;
2176 }
2177 
2178 
2179 status_t
2180 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr)
2181 {
2182 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2183 	if (addressSpace == NULL)
2184 		return B_BAD_TEAM_ID;
2185 
2186 	VMTranslationMap* map = addressSpace->TranslationMap();
2187 
2188 	map->Lock();
2189 	uint32 dummyFlags;
2190 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2191 	map->Unlock();
2192 
2193 	addressSpace->Put();
2194 	return status;
2195 }
2196 
2197 
2198 /*!	The page's cache must be locked.
2199 */
2200 bool
2201 vm_test_map_modification(vm_page* page)
2202 {
2203 	if (page->modified)
2204 		return true;
2205 
2206 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2207 	vm_page_mapping* mapping;
2208 	while ((mapping = iterator.Next()) != NULL) {
2209 		VMArea* area = mapping->area;
2210 		VMTranslationMap* map = area->address_space->TranslationMap();
2211 
2212 		addr_t physicalAddress;
2213 		uint32 flags;
2214 		map->Lock();
2215 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2216 		map->Unlock();
2217 
2218 		if ((flags & PAGE_MODIFIED) != 0)
2219 			return true;
2220 	}
2221 
2222 	return false;
2223 }
2224 
2225 
2226 /*!	The page's cache must be locked.
2227 */
2228 void
2229 vm_clear_map_flags(vm_page* page, uint32 flags)
2230 {
2231 	if ((flags & PAGE_ACCESSED) != 0)
2232 		page->accessed = false;
2233 	if ((flags & PAGE_MODIFIED) != 0)
2234 		page->modified = false;
2235 
2236 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2237 	vm_page_mapping* mapping;
2238 	while ((mapping = iterator.Next()) != NULL) {
2239 		VMArea* area = mapping->area;
2240 		VMTranslationMap* map = area->address_space->TranslationMap();
2241 
2242 		map->Lock();
2243 		map->ClearFlags(virtual_page_address(area, page), flags);
2244 		map->Unlock();
2245 	}
2246 }
2247 
2248 
2249 /*!	Removes all mappings from a page.
2250 	After you've called this function, the page is unmapped from memory and
2251 	the page's \c accessed and \c modified flags have been updated according
2252 	to the state of the mappings.
2253 	The page's cache must be locked.
2254 */
2255 void
2256 vm_remove_all_page_mappings(vm_page* page)
2257 {
2258 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2259 		VMArea* area = mapping->area;
2260 		VMTranslationMap* map = area->address_space->TranslationMap();
2261 		addr_t address = virtual_page_address(area, page);
2262 		map->UnmapPage(area, address, false);
2263 	}
2264 }
2265 
2266 
2267 int32
2268 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2269 {
2270 	int32 count = 0;
2271 
2272 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2273 	vm_page_mapping* mapping;
2274 	while ((mapping = iterator.Next()) != NULL) {
2275 		VMArea* area = mapping->area;
2276 		VMTranslationMap* map = area->address_space->TranslationMap();
2277 
2278 		bool modified;
2279 		if (map->ClearAccessedAndModified(area,
2280 				virtual_page_address(area, page), false, modified)) {
2281 			count++;
2282 		}
2283 
2284 		page->modified |= modified;
2285 	}
2286 
2287 
2288 	if (page->accessed) {
2289 		count++;
2290 		page->accessed = false;
2291 	}
2292 
2293 	return count;
2294 }
2295 
2296 
2297 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2298 	mappings.
2299 	The function iterates through the page mappings and removes them until
2300 	encountering one that has been accessed. From then on it will continue to
2301 	iterate, but only clear the accessed flag of the mapping. The page's
2302 	\c modified bit will be updated accordingly, the \c accessed bit will be
2303 	cleared.
2304 	\return The number of mapping accessed bits encountered, including the
2305 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2306 		of the page have been removed.
2307 */
2308 int32
2309 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2310 {
2311 	if (page->accessed)
2312 		return vm_clear_page_mapping_accessed_flags(page);
2313 
2314 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2315 		VMArea* area = mapping->area;
2316 		VMTranslationMap* map = area->address_space->TranslationMap();
2317 		addr_t address = virtual_page_address(area, page);
2318 		bool modified = false;
2319 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2320 			page->accessed = true;
2321 			page->modified |= modified;
2322 			return vm_clear_page_mapping_accessed_flags(page);
2323 		}
2324 		page->modified |= modified;
2325 	}
2326 
2327 	return 0;
2328 }
2329 
2330 
2331 static int
2332 display_mem(int argc, char** argv)
2333 {
2334 	bool physical = false;
2335 	addr_t copyAddress;
2336 	int32 displayWidth;
2337 	int32 itemSize;
2338 	int32 num = -1;
2339 	addr_t address;
2340 	int i = 1, j;
2341 
2342 	if (argc > 1 && argv[1][0] == '-') {
2343 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2344 			physical = true;
2345 			i++;
2346 		} else
2347 			i = 99;
2348 	}
2349 
2350 	if (argc < i + 1 || argc > i + 2) {
2351 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2352 			"\tdl - 8 bytes\n"
2353 			"\tdw - 4 bytes\n"
2354 			"\tds - 2 bytes\n"
2355 			"\tdb - 1 byte\n"
2356 			"\tstring - a whole string\n"
2357 			"  -p or --physical only allows memory from a single page to be "
2358 			"displayed.\n");
2359 		return 0;
2360 	}
2361 
2362 	address = parse_expression(argv[i]);
2363 
2364 	if (argc > i + 1)
2365 		num = parse_expression(argv[i + 1]);
2366 
2367 	// build the format string
2368 	if (strcmp(argv[0], "db") == 0) {
2369 		itemSize = 1;
2370 		displayWidth = 16;
2371 	} else if (strcmp(argv[0], "ds") == 0) {
2372 		itemSize = 2;
2373 		displayWidth = 8;
2374 	} else if (strcmp(argv[0], "dw") == 0) {
2375 		itemSize = 4;
2376 		displayWidth = 4;
2377 	} else if (strcmp(argv[0], "dl") == 0) {
2378 		itemSize = 8;
2379 		displayWidth = 2;
2380 	} else if (strcmp(argv[0], "string") == 0) {
2381 		itemSize = 1;
2382 		displayWidth = -1;
2383 	} else {
2384 		kprintf("display_mem called in an invalid way!\n");
2385 		return 0;
2386 	}
2387 
2388 	if (num <= 0)
2389 		num = displayWidth;
2390 
2391 	void* physicalPageHandle = NULL;
2392 
2393 	if (physical) {
2394 		int32 offset = address & (B_PAGE_SIZE - 1);
2395 		if (num * itemSize + offset > B_PAGE_SIZE) {
2396 			num = (B_PAGE_SIZE - offset) / itemSize;
2397 			kprintf("NOTE: number of bytes has been cut to page size\n");
2398 		}
2399 
2400 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2401 
2402 		if (vm_get_physical_page_debug(address, &copyAddress,
2403 				&physicalPageHandle) != B_OK) {
2404 			kprintf("getting the hardware page failed.");
2405 			return 0;
2406 		}
2407 
2408 		address += offset;
2409 		copyAddress += offset;
2410 	} else
2411 		copyAddress = address;
2412 
2413 	if (!strcmp(argv[0], "string")) {
2414 		kprintf("%p \"", (char*)copyAddress);
2415 
2416 		// string mode
2417 		for (i = 0; true; i++) {
2418 			char c;
2419 			if (debug_memcpy(&c, (char*)copyAddress + i, 1) != B_OK
2420 				|| c == '\0')
2421 				break;
2422 
2423 			if (c == '\n')
2424 				kprintf("\\n");
2425 			else if (c == '\t')
2426 				kprintf("\\t");
2427 			else {
2428 				if (!isprint(c))
2429 					c = '.';
2430 
2431 				kprintf("%c", c);
2432 			}
2433 		}
2434 
2435 		kprintf("\"\n");
2436 	} else {
2437 		// number mode
2438 		for (i = 0; i < num; i++) {
2439 			uint32 value;
2440 
2441 			if ((i % displayWidth) == 0) {
2442 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2443 				if (i != 0)
2444 					kprintf("\n");
2445 
2446 				kprintf("[0x%lx]  ", address + i * itemSize);
2447 
2448 				for (j = 0; j < displayed; j++) {
2449 					char c;
2450 					if (debug_memcpy(&c, (char*)copyAddress + i * itemSize + j,
2451 							1) != B_OK) {
2452 						displayed = j;
2453 						break;
2454 					}
2455 					if (!isprint(c))
2456 						c = '.';
2457 
2458 					kprintf("%c", c);
2459 				}
2460 				if (num > displayWidth) {
2461 					// make sure the spacing in the last line is correct
2462 					for (j = displayed; j < displayWidth * itemSize; j++)
2463 						kprintf(" ");
2464 				}
2465 				kprintf("  ");
2466 			}
2467 
2468 			if (debug_memcpy(&value, (uint8*)copyAddress + i * itemSize,
2469 					itemSize) != B_OK) {
2470 				kprintf("read fault");
2471 				break;
2472 			}
2473 
2474 			switch (itemSize) {
2475 				case 1:
2476 					kprintf(" %02x", *(uint8*)&value);
2477 					break;
2478 				case 2:
2479 					kprintf(" %04x", *(uint16*)&value);
2480 					break;
2481 				case 4:
2482 					kprintf(" %08lx", *(uint32*)&value);
2483 					break;
2484 				case 8:
2485 					kprintf(" %016Lx", *(uint64*)&value);
2486 					break;
2487 			}
2488 		}
2489 
2490 		kprintf("\n");
2491 	}
2492 
2493 	if (physical) {
2494 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2495 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2496 	}
2497 	return 0;
2498 }
2499 
2500 
2501 static void
2502 dump_cache_tree_recursively(VMCache* cache, int level,
2503 	VMCache* highlightCache)
2504 {
2505 	// print this cache
2506 	for (int i = 0; i < level; i++)
2507 		kprintf("  ");
2508 	if (cache == highlightCache)
2509 		kprintf("%p <--\n", cache);
2510 	else
2511 		kprintf("%p\n", cache);
2512 
2513 	// recursively print its consumers
2514 	VMCache* consumer = NULL;
2515 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2516 			consumer)) != NULL) {
2517 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
2518 	}
2519 }
2520 
2521 
2522 static int
2523 dump_cache_tree(int argc, char** argv)
2524 {
2525 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2526 		kprintf("usage: %s <address>\n", argv[0]);
2527 		return 0;
2528 	}
2529 
2530 	addr_t address = parse_expression(argv[1]);
2531 	if (address == 0)
2532 		return 0;
2533 
2534 	VMCache* cache = (VMCache*)address;
2535 	VMCache* root = cache;
2536 
2537 	// find the root cache (the transitive source)
2538 	while (root->source != NULL)
2539 		root = root->source;
2540 
2541 	dump_cache_tree_recursively(root, 0, cache);
2542 
2543 	return 0;
2544 }
2545 
2546 
2547 static const char*
2548 cache_type_to_string(int32 type)
2549 {
2550 	switch (type) {
2551 		case CACHE_TYPE_RAM:
2552 			return "RAM";
2553 		case CACHE_TYPE_DEVICE:
2554 			return "device";
2555 		case CACHE_TYPE_VNODE:
2556 			return "vnode";
2557 		case CACHE_TYPE_NULL:
2558 			return "null";
2559 
2560 		default:
2561 			return "unknown";
2562 	}
2563 }
2564 
2565 
2566 #if DEBUG_CACHE_LIST
2567 
2568 static void
2569 update_cache_info_recursively(VMCache* cache, cache_info& info)
2570 {
2571 	info.page_count += cache->page_count;
2572 	if (cache->type == CACHE_TYPE_RAM)
2573 		info.committed += cache->committed_size;
2574 
2575 	// recurse
2576 	VMCache* consumer = NULL;
2577 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2578 			consumer)) != NULL) {
2579 		update_cache_info_recursively(consumer, info);
2580 	}
2581 }
2582 
2583 
2584 static int
2585 cache_info_compare_page_count(const void* _a, const void* _b)
2586 {
2587 	const cache_info* a = (const cache_info*)_a;
2588 	const cache_info* b = (const cache_info*)_b;
2589 	if (a->page_count == b->page_count)
2590 		return 0;
2591 	return a->page_count < b->page_count ? 1 : -1;
2592 }
2593 
2594 
2595 static int
2596 cache_info_compare_committed(const void* _a, const void* _b)
2597 {
2598 	const cache_info* a = (const cache_info*)_a;
2599 	const cache_info* b = (const cache_info*)_b;
2600 	if (a->committed == b->committed)
2601 		return 0;
2602 	return a->committed < b->committed ? 1 : -1;
2603 }
2604 
2605 
2606 static void
2607 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
2608 {
2609 	for (int i = 0; i < level; i++)
2610 		kprintf("  ");
2611 
2612 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
2613 		cache_type_to_string(cache->type), cache->virtual_base,
2614 		cache->virtual_end, cache->page_count);
2615 
2616 	if (level == 0)
2617 		kprintf("/%lu", info.page_count);
2618 
2619 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
2620 		kprintf(", committed: %lld", cache->committed_size);
2621 
2622 		if (level == 0)
2623 			kprintf("/%lu", info.committed);
2624 	}
2625 
2626 	// areas
2627 	if (cache->areas != NULL) {
2628 		VMArea* area = cache->areas;
2629 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
2630 			area->address_space->ID());
2631 
2632 		while (area->cache_next != NULL) {
2633 			area = area->cache_next;
2634 			kprintf(", %ld", area->id);
2635 		}
2636 	}
2637 
2638 	kputs("\n");
2639 
2640 	// recurse
2641 	VMCache* consumer = NULL;
2642 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2643 			consumer)) != NULL) {
2644 		dump_caches_recursively(consumer, info, level + 1);
2645 	}
2646 }
2647 
2648 
2649 static int
2650 dump_caches(int argc, char** argv)
2651 {
2652 	if (sCacheInfoTable == NULL) {
2653 		kprintf("No cache info table!\n");
2654 		return 0;
2655 	}
2656 
2657 	bool sortByPageCount = true;
2658 
2659 	for (int32 i = 1; i < argc; i++) {
2660 		if (strcmp(argv[i], "-c") == 0) {
2661 			sortByPageCount = false;
2662 		} else {
2663 			print_debugger_command_usage(argv[0]);
2664 			return 0;
2665 		}
2666 	}
2667 
2668 	uint32 totalCount = 0;
2669 	uint32 rootCount = 0;
2670 	off_t totalCommitted = 0;
2671 	page_num_t totalPages = 0;
2672 
2673 	VMCache* cache = gDebugCacheList;
2674 	while (cache) {
2675 		totalCount++;
2676 		if (cache->source == NULL) {
2677 			cache_info stackInfo;
2678 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
2679 				? sCacheInfoTable[rootCount] : stackInfo;
2680 			rootCount++;
2681 			info.cache = cache;
2682 			info.page_count = 0;
2683 			info.committed = 0;
2684 			update_cache_info_recursively(cache, info);
2685 			totalCommitted += info.committed;
2686 			totalPages += info.page_count;
2687 		}
2688 
2689 		cache = cache->debug_next;
2690 	}
2691 
2692 	if (rootCount <= (uint32)kCacheInfoTableCount) {
2693 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
2694 			sortByPageCount
2695 				? &cache_info_compare_page_count
2696 				: &cache_info_compare_committed);
2697 	}
2698 
2699 	kprintf("total committed memory: %lld, total used pages: %lu\n",
2700 		totalCommitted, totalPages);
2701 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
2702 		"tree...\n\n", totalCount, rootCount,
2703 		sortByPageCount ? "page count" : "committed size");
2704 
2705 	if (rootCount <= (uint32)kCacheInfoTableCount) {
2706 		for (uint32 i = 0; i < rootCount; i++) {
2707 			cache_info& info = sCacheInfoTable[i];
2708 			dump_caches_recursively(info.cache, info, 0);
2709 		}
2710 	} else
2711 		kprintf("Cache info table too small! Can't sort and print caches!\n");
2712 
2713 	return 0;
2714 }
2715 
2716 #endif	// DEBUG_CACHE_LIST
2717 
2718 
2719 static int
2720 dump_cache(int argc, char** argv)
2721 {
2722 	VMCache* cache;
2723 	bool showPages = false;
2724 	int i = 1;
2725 
2726 	if (argc < 2 || !strcmp(argv[1], "--help")) {
2727 		kprintf("usage: %s [-ps] <address>\n"
2728 			"  if -p is specified, all pages are shown, if -s is used\n"
2729 			"  only the cache info is shown respectively.\n", argv[0]);
2730 		return 0;
2731 	}
2732 	while (argv[i][0] == '-') {
2733 		char* arg = argv[i] + 1;
2734 		while (arg[0]) {
2735 			if (arg[0] == 'p')
2736 				showPages = true;
2737 			arg++;
2738 		}
2739 		i++;
2740 	}
2741 	if (argv[i] == NULL) {
2742 		kprintf("%s: invalid argument, pass address\n", argv[0]);
2743 		return 0;
2744 	}
2745 
2746 	addr_t address = parse_expression(argv[i]);
2747 	if (address == 0)
2748 		return 0;
2749 
2750 	cache = (VMCache*)address;
2751 
2752 	kprintf("CACHE %p:\n", cache);
2753 	kprintf("  ref_count:    %ld\n", cache->RefCount());
2754 	kprintf("  source:       %p\n", cache->source);
2755 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
2756 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
2757 	kprintf("  virtual_end:  0x%Lx\n", cache->virtual_end);
2758 	kprintf("  temporary:    %ld\n", cache->temporary);
2759 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
2760 	kprintf("  lock:         %p\n", cache->GetLock());
2761 #if KDEBUG
2762 	kprintf("  lock.holder:  %ld\n", cache->GetLock()->holder);
2763 #endif
2764 	kprintf("  areas:\n");
2765 
2766 	for (VMArea* area = cache->areas; area != NULL; area = area->cache_next) {
2767 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
2768 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->Base(),
2769 			area->Size());
2770 		kprintf("\tprotection: 0x%lx\n", area->protection);
2771 		kprintf("\towner:      0x%lx\n", area->address_space->ID());
2772 	}
2773 
2774 	kprintf("  consumers:\n");
2775 	VMCache* consumer = NULL;
2776 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2777 				consumer)) != NULL) {
2778 		kprintf("\t%p\n", consumer);
2779 	}
2780 
2781 	kprintf("  pages:\n");
2782 	if (showPages) {
2783 		for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2784 				vm_page* page = it.Next();) {
2785 			if (!vm_page_is_dummy(page)) {
2786 				kprintf("\t%p ppn 0x%lx offset 0x%lx state %u (%s) "
2787 					"wired_count %u\n", page, page->physical_page_number,
2788 					page->cache_offset, page->State(),
2789 					page_state_to_string(page->State()), page->wired_count);
2790 			} else {
2791 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
2792 					page, page->State(), page_state_to_string(page->State()));
2793 			}
2794 		}
2795 	} else
2796 		kprintf("\t%ld in cache\n", cache->page_count);
2797 
2798 	set_debug_variable("_sourceCache", (addr_t)cache->source);
2799 
2800 	return 0;
2801 }
2802 
2803 
2804 static void
2805 dump_area_struct(VMArea* area, bool mappings)
2806 {
2807 	kprintf("AREA: %p\n", area);
2808 	kprintf("name:\t\t'%s'\n", area->name);
2809 	kprintf("owner:\t\t0x%lx\n", area->address_space->ID());
2810 	kprintf("id:\t\t0x%lx\n", area->id);
2811 	kprintf("base:\t\t0x%lx\n", area->Base());
2812 	kprintf("size:\t\t0x%lx\n", area->Size());
2813 	kprintf("protection:\t0x%lx\n", area->protection);
2814 	kprintf("wiring:\t\t0x%x\n", area->wiring);
2815 	kprintf("memory_type:\t0x%x\n", area->memory_type);
2816 	kprintf("cache:\t\t%p\n", area->cache);
2817 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
2818 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
2819 	kprintf("cache_next:\t%p\n", area->cache_next);
2820 	kprintf("cache_prev:\t%p\n", area->cache_prev);
2821 
2822 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
2823 	if (mappings) {
2824 		kprintf("page mappings:\n");
2825 		while (iterator.HasNext()) {
2826 			vm_page_mapping* mapping = iterator.Next();
2827 			kprintf("  %p", mapping->page);
2828 		}
2829 		kprintf("\n");
2830 	} else {
2831 		uint32 count = 0;
2832 		while (iterator.Next() != NULL) {
2833 			count++;
2834 		}
2835 		kprintf("page mappings:\t%lu\n", count);
2836 	}
2837 }
2838 
2839 
2840 static int
2841 dump_area(int argc, char** argv)
2842 {
2843 	bool mappings = false;
2844 	bool found = false;
2845 	int32 index = 1;
2846 	VMArea* area;
2847 	addr_t num;
2848 
2849 	if (argc < 2 || !strcmp(argv[1], "--help")) {
2850 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
2851 			"All areas matching either id/address/name are listed. You can\n"
2852 			"force to check only a specific item by prefixing the specifier\n"
2853 			"with the id/contains/address/name keywords.\n"
2854 			"-m shows the area's mappings as well.\n");
2855 		return 0;
2856 	}
2857 
2858 	if (!strcmp(argv[1], "-m")) {
2859 		mappings = true;
2860 		index++;
2861 	}
2862 
2863 	int32 mode = 0xf;
2864 	if (!strcmp(argv[index], "id"))
2865 		mode = 1;
2866 	else if (!strcmp(argv[index], "contains"))
2867 		mode = 2;
2868 	else if (!strcmp(argv[index], "name"))
2869 		mode = 4;
2870 	else if (!strcmp(argv[index], "address"))
2871 		mode = 0;
2872 	if (mode != 0xf)
2873 		index++;
2874 
2875 	if (index >= argc) {
2876 		kprintf("No area specifier given.\n");
2877 		return 0;
2878 	}
2879 
2880 	num = parse_expression(argv[index]);
2881 
2882 	if (mode == 0) {
2883 		dump_area_struct((struct VMArea*)num, mappings);
2884 	} else {
2885 		// walk through the area list, looking for the arguments as a name
2886 
2887 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
2888 		while ((area = it.Next()) != NULL) {
2889 			if (((mode & 4) != 0 && area->name != NULL
2890 					&& !strcmp(argv[index], area->name))
2891 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
2892 					|| (((mode & 2) != 0 && area->Base() <= num
2893 						&& area->Base() + area->Size() > num))))) {
2894 				dump_area_struct(area, mappings);
2895 				found = true;
2896 			}
2897 		}
2898 
2899 		if (!found)
2900 			kprintf("could not find area %s (%ld)\n", argv[index], num);
2901 	}
2902 
2903 	return 0;
2904 }
2905 
2906 
2907 static int
2908 dump_area_list(int argc, char** argv)
2909 {
2910 	VMArea* area;
2911 	const char* name = NULL;
2912 	int32 id = 0;
2913 
2914 	if (argc > 1) {
2915 		id = parse_expression(argv[1]);
2916 		if (id == 0)
2917 			name = argv[1];
2918 	}
2919 
2920 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
2921 
2922 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
2923 	while ((area = it.Next()) != NULL) {
2924 		if ((id != 0 && area->address_space->ID() != id)
2925 			|| (name != NULL && strstr(area->name, name) == NULL))
2926 			continue;
2927 
2928 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
2929 			(void*)area->Base(), (void*)area->Size(), area->protection,
2930 			area->wiring, area->name);
2931 	}
2932 	return 0;
2933 }
2934 
2935 
2936 static int
2937 dump_available_memory(int argc, char** argv)
2938 {
2939 	kprintf("Available memory: %Ld/%lu bytes\n",
2940 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
2941 	return 0;
2942 }
2943 
2944 
2945 status_t
2946 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
2947 {
2948 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
2949 		addressSpace->ID()));
2950 
2951 	addressSpace->WriteLock();
2952 
2953 	// remove all reserved areas in this address space
2954 	addressSpace->UnreserveAllAddressRanges(0);
2955 
2956 	// delete all the areas in this address space
2957 	while (VMArea* area = addressSpace->FirstArea())
2958 		delete_area(addressSpace, area, deletingAddressSpace);
2959 
2960 	addressSpace->WriteUnlock();
2961 	return B_OK;
2962 }
2963 
2964 
2965 static area_id
2966 vm_area_for(addr_t address, bool kernel)
2967 {
2968 	team_id team;
2969 	if (IS_USER_ADDRESS(address)) {
2970 		// we try the user team address space, if any
2971 		team = VMAddressSpace::CurrentID();
2972 		if (team < 0)
2973 			return team;
2974 	} else
2975 		team = VMAddressSpace::KernelID();
2976 
2977 	AddressSpaceReadLocker locker(team);
2978 	if (!locker.IsLocked())
2979 		return B_BAD_TEAM_ID;
2980 
2981 	VMArea* area = locker.AddressSpace()->LookupArea(address);
2982 	if (area != NULL) {
2983 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
2984 			return B_ERROR;
2985 
2986 		return area->id;
2987 	}
2988 
2989 	return B_ERROR;
2990 }
2991 
2992 
2993 /*!	Frees physical pages that were used during the boot process.
2994 	\a end is inclusive.
2995 */
2996 static void
2997 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
2998 {
2999 	// free all physical pages in the specified range
3000 
3001 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3002 		addr_t physicalAddress;
3003 		uint32 flags;
3004 
3005 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3006 			&& (flags & PAGE_PRESENT) != 0) {
3007 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3008 			if (page != NULL && page->State() != PAGE_STATE_FREE
3009 					 && page->State() != PAGE_STATE_CLEAR
3010 					 && page->State() != PAGE_STATE_UNUSED) {
3011 				DEBUG_PAGE_ACCESS_START(page);
3012 				vm_page_set_state(page, PAGE_STATE_FREE);
3013 			}
3014 		}
3015 	}
3016 
3017 	// unmap the memory
3018 	map->Unmap(start, end);
3019 }
3020 
3021 
3022 void
3023 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3024 {
3025 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3026 	addr_t end = start + (size - 1);
3027 	addr_t lastEnd = start;
3028 
3029 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3030 		(void*)start, (void*)end));
3031 
3032 	// The areas are sorted in virtual address space order, so
3033 	// we just have to find the holes between them that fall
3034 	// into the area we should dispose
3035 
3036 	map->Lock();
3037 
3038 	for (VMAddressSpace::AreaIterator it
3039 				= VMAddressSpace::Kernel()->GetAreaIterator();
3040 			VMArea* area = it.Next();) {
3041 		addr_t areaStart = area->Base();
3042 		addr_t areaEnd = areaStart + (area->Size() - 1);
3043 
3044 		if (areaEnd < start)
3045 			continue;
3046 
3047 		if (areaStart > end) {
3048 			// we are done, the area is already beyond of what we have to free
3049 			end = areaStart - 1;
3050 			break;
3051 		}
3052 
3053 		if (areaStart > lastEnd) {
3054 			// this is something we can free
3055 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3056 				(void*)areaStart));
3057 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3058 		}
3059 
3060 		if (areaEnd >= end) {
3061 			lastEnd = areaEnd;
3062 				// no +1 to prevent potential overflow
3063 			break;
3064 		}
3065 
3066 		lastEnd = areaEnd + 1;
3067 	}
3068 
3069 	if (lastEnd < end) {
3070 		// we can also get rid of some space at the end of the area
3071 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3072 			(void*)end));
3073 		unmap_and_free_physical_pages(map, lastEnd, end);
3074 	}
3075 
3076 	map->Unlock();
3077 }
3078 
3079 
3080 static void
3081 create_preloaded_image_areas(struct preloaded_image* image)
3082 {
3083 	char name[B_OS_NAME_LENGTH];
3084 	void* address;
3085 	int32 length;
3086 
3087 	// use file name to create a good area name
3088 	char* fileName = strrchr(image->name, '/');
3089 	if (fileName == NULL)
3090 		fileName = image->name;
3091 	else
3092 		fileName++;
3093 
3094 	length = strlen(fileName);
3095 	// make sure there is enough space for the suffix
3096 	if (length > 25)
3097 		length = 25;
3098 
3099 	memcpy(name, fileName, length);
3100 	strcpy(name + length, "_text");
3101 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3102 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3103 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3104 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3105 		// this will later be remapped read-only/executable by the
3106 		// ELF initialization code
3107 
3108 	strcpy(name + length, "_data");
3109 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3110 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3111 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3112 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3113 }
3114 
3115 
3116 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3117 	Any boot loader resources contained in that arguments must not be accessed
3118 	anymore past this point.
3119 */
3120 void
3121 vm_free_kernel_args(kernel_args* args)
3122 {
3123 	uint32 i;
3124 
3125 	TRACE(("vm_free_kernel_args()\n"));
3126 
3127 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3128 		area_id area = area_for((void*)args->kernel_args_range[i].start);
3129 		if (area >= B_OK)
3130 			delete_area(area);
3131 	}
3132 }
3133 
3134 
3135 static void
3136 allocate_kernel_args(kernel_args* args)
3137 {
3138 	TRACE(("allocate_kernel_args()\n"));
3139 
3140 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3141 		void* address = (void*)args->kernel_args_range[i].start;
3142 
3143 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3144 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3145 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3146 	}
3147 }
3148 
3149 
3150 static void
3151 unreserve_boot_loader_ranges(kernel_args* args)
3152 {
3153 	TRACE(("unreserve_boot_loader_ranges()\n"));
3154 
3155 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3156 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3157 			(void*)args->virtual_allocated_range[i].start,
3158 			args->virtual_allocated_range[i].size);
3159 	}
3160 }
3161 
3162 
3163 static void
3164 reserve_boot_loader_ranges(kernel_args* args)
3165 {
3166 	TRACE(("reserve_boot_loader_ranges()\n"));
3167 
3168 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3169 		void* address = (void*)args->virtual_allocated_range[i].start;
3170 
3171 		// If the address is no kernel address, we just skip it. The
3172 		// architecture specific code has to deal with it.
3173 		if (!IS_KERNEL_ADDRESS(address)) {
3174 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3175 				address, args->virtual_allocated_range[i].size);
3176 			continue;
3177 		}
3178 
3179 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3180 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3181 		if (status < B_OK)
3182 			panic("could not reserve boot loader ranges\n");
3183 	}
3184 }
3185 
3186 
3187 static addr_t
3188 allocate_early_virtual(kernel_args* args, size_t size, bool blockAlign)
3189 {
3190 	size = PAGE_ALIGN(size);
3191 
3192 	// find a slot in the virtual allocation addr range
3193 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3194 		// check to see if the space between this one and the last is big enough
3195 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3196 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3197 			+ args->virtual_allocated_range[i - 1].size;
3198 
3199 		addr_t base = blockAlign
3200 			? ROUNDUP(previousRangeEnd, size) : previousRangeEnd;
3201 
3202 		if (base >= KERNEL_BASE && base < rangeStart
3203 				&& rangeStart - base >= size) {
3204 			args->virtual_allocated_range[i - 1].size
3205 				+= base + size - previousRangeEnd;
3206 			return base;
3207 		}
3208 	}
3209 
3210 	// we hadn't found one between allocation ranges. this is ok.
3211 	// see if there's a gap after the last one
3212 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3213 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3214 		+ args->virtual_allocated_range[lastEntryIndex].size;
3215 	addr_t base = blockAlign ? ROUNDUP(lastRangeEnd, size) : lastRangeEnd;
3216 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3217 		args->virtual_allocated_range[lastEntryIndex].size
3218 			+= base + size - lastRangeEnd;
3219 		return base;
3220 	}
3221 
3222 	// see if there's a gap before the first one
3223 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3224 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3225 		base = rangeStart - size;
3226 		if (blockAlign)
3227 			base = ROUNDDOWN(base, size);
3228 
3229 		if (base >= KERNEL_BASE) {
3230 			args->virtual_allocated_range[0].start = base;
3231 			args->virtual_allocated_range[0].size += rangeStart - base;
3232 			return base;
3233 		}
3234 	}
3235 
3236 	return 0;
3237 }
3238 
3239 
3240 static bool
3241 is_page_in_physical_memory_range(kernel_args* args, addr_t address)
3242 {
3243 	// TODO: horrible brute-force method of determining if the page can be
3244 	// allocated
3245 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3246 		if (address >= args->physical_memory_range[i].start
3247 			&& address < args->physical_memory_range[i].start
3248 				+ args->physical_memory_range[i].size)
3249 			return true;
3250 	}
3251 	return false;
3252 }
3253 
3254 
3255 static addr_t
3256 allocate_early_physical_page(kernel_args* args)
3257 {
3258 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3259 		addr_t nextPage;
3260 
3261 		nextPage = args->physical_allocated_range[i].start
3262 			+ args->physical_allocated_range[i].size;
3263 		// see if the page after the next allocated paddr run can be allocated
3264 		if (i + 1 < args->num_physical_allocated_ranges
3265 			&& args->physical_allocated_range[i + 1].size != 0) {
3266 			// see if the next page will collide with the next allocated range
3267 			if (nextPage >= args->physical_allocated_range[i+1].start)
3268 				continue;
3269 		}
3270 		// see if the next physical page fits in the memory block
3271 		if (is_page_in_physical_memory_range(args, nextPage)) {
3272 			// we got one!
3273 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3274 			return nextPage / B_PAGE_SIZE;
3275 		}
3276 	}
3277 
3278 	return 0;
3279 		// could not allocate a block
3280 }
3281 
3282 
3283 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3284 	allocate some pages before the VM is completely up.
3285 */
3286 addr_t
3287 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3288 	uint32 attributes, bool blockAlign)
3289 {
3290 	if (physicalSize > virtualSize)
3291 		physicalSize = virtualSize;
3292 
3293 	// find the vaddr to allocate at
3294 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, blockAlign);
3295 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
3296 
3297 	// map the pages
3298 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3299 		addr_t physicalAddress = allocate_early_physical_page(args);
3300 		if (physicalAddress == 0)
3301 			panic("error allocating early page!\n");
3302 
3303 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3304 
3305 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3306 			physicalAddress * B_PAGE_SIZE, attributes,
3307 			&allocate_early_physical_page);
3308 	}
3309 
3310 	return virtualBase;
3311 }
3312 
3313 
3314 /*!	The main entrance point to initialize the VM. */
3315 status_t
3316 vm_init(kernel_args* args)
3317 {
3318 	struct preloaded_image* image;
3319 	void* address;
3320 	status_t err = 0;
3321 	uint32 i;
3322 
3323 	TRACE(("vm_init: entry\n"));
3324 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3325 	err = arch_vm_init(args);
3326 
3327 	// initialize some globals
3328 	vm_page_init_num_pages(args);
3329 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3330 
3331 	size_t heapSize = INITIAL_HEAP_SIZE;
3332 	// try to accomodate low memory systems
3333 	while (heapSize > sAvailableMemory / 8)
3334 		heapSize /= 2;
3335 	if (heapSize < 1024 * 1024)
3336 		panic("vm_init: go buy some RAM please.");
3337 
3338 	slab_init(args);
3339 
3340 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3341 	// map in the new heap and initialize it
3342 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3343 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false);
3344 	TRACE(("heap at 0x%lx\n", heapBase));
3345 	heap_init(heapBase, heapSize);
3346 #endif
3347 
3348 	// initialize the free page list and physical page mapper
3349 	vm_page_init(args);
3350 
3351 	// initialize the hash table that stores the pages mapped to caches
3352 	vm_cache_init(args);
3353 
3354 	{
3355 		status_t error = VMAreaHash::Init();
3356 		if (error != B_OK)
3357 			panic("vm_init: error initializing area hash table\n");
3358 	}
3359 
3360 	VMAddressSpace::Init();
3361 	reserve_boot_loader_ranges(args);
3362 
3363 	// Do any further initialization that the architecture dependant layers may
3364 	// need now
3365 	arch_vm_translation_map_init_post_area(args);
3366 	arch_vm_init_post_area(args);
3367 	vm_page_init_post_area(args);
3368 	slab_init_post_area();
3369 
3370 	// allocate areas to represent stuff that already exists
3371 
3372 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3373 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3374 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3375 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3376 #endif
3377 
3378 	allocate_kernel_args(args);
3379 
3380 	create_preloaded_image_areas(&args->kernel_image);
3381 
3382 	// allocate areas for preloaded images
3383 	for (image = args->preloaded_images; image != NULL; image = image->next)
3384 		create_preloaded_image_areas(image);
3385 
3386 	// allocate kernel stacks
3387 	for (i = 0; i < args->num_cpus; i++) {
3388 		char name[64];
3389 
3390 		sprintf(name, "idle thread %lu kstack", i + 1);
3391 		address = (void*)args->cpu_kstack[i].start;
3392 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3393 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3394 	}
3395 
3396 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3397 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3398 
3399 	// create the object cache for the page mappings
3400 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3401 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3402 		NULL, NULL);
3403 	if (gPageMappingsObjectCache == NULL)
3404 		panic("failed to create page mappings object cache");
3405 
3406 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3407 
3408 #if DEBUG_CACHE_LIST
3409 	create_area("cache info table", (void**)&sCacheInfoTable,
3410 		B_ANY_KERNEL_ADDRESS,
3411 		ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3412 		B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3413 #endif	// DEBUG_CACHE_LIST
3414 
3415 	// add some debugger commands
3416 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3417 	add_debugger_command("area", &dump_area,
3418 		"Dump info about a particular area");
3419 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3420 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3421 #if DEBUG_CACHE_LIST
3422 	add_debugger_command_etc("caches", &dump_caches,
3423 		"List all VMCache trees",
3424 		"[ \"-c\" ]\n"
3425 		"All cache trees are listed sorted in decreasing order by number of\n"
3426 		"used pages or, if \"-c\" is specified, by size of committed memory.\n",
3427 		0);
3428 #endif
3429 	add_debugger_command("avail", &dump_available_memory,
3430 		"Dump available memory");
3431 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3432 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3433 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3434 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3435 	add_debugger_command("string", &display_mem, "dump strings");
3436 
3437 	TRACE(("vm_init: exit\n"));
3438 
3439 	vm_cache_init_post_heap();
3440 
3441 	return err;
3442 }
3443 
3444 
3445 status_t
3446 vm_init_post_sem(kernel_args* args)
3447 {
3448 	// This frees all unused boot loader resources and makes its space available
3449 	// again
3450 	arch_vm_init_end(args);
3451 	unreserve_boot_loader_ranges(args);
3452 
3453 	// fill in all of the semaphores that were not allocated before
3454 	// since we're still single threaded and only the kernel address space
3455 	// exists, it isn't that hard to find all of the ones we need to create
3456 
3457 	arch_vm_translation_map_init_post_sem(args);
3458 	VMAddressSpace::InitPostSem();
3459 
3460 	slab_init_post_sem();
3461 
3462 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3463 	heap_init_post_sem();
3464 #endif
3465 
3466 	return B_OK;
3467 }
3468 
3469 
3470 status_t
3471 vm_init_post_thread(kernel_args* args)
3472 {
3473 	vm_page_init_post_thread(args);
3474 	slab_init_post_thread();
3475 	return heap_init_post_thread();
3476 }
3477 
3478 
3479 status_t
3480 vm_init_post_modules(kernel_args* args)
3481 {
3482 	return arch_vm_init_post_modules(args);
3483 }
3484 
3485 
3486 void
3487 permit_page_faults(void)
3488 {
3489 	struct thread* thread = thread_get_current_thread();
3490 	if (thread != NULL)
3491 		atomic_add(&thread->page_faults_allowed, 1);
3492 }
3493 
3494 
3495 void
3496 forbid_page_faults(void)
3497 {
3498 	struct thread* thread = thread_get_current_thread();
3499 	if (thread != NULL)
3500 		atomic_add(&thread->page_faults_allowed, -1);
3501 }
3502 
3503 
3504 status_t
3505 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3506 	addr_t* newIP)
3507 {
3508 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3509 		faultAddress));
3510 
3511 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
3512 
3513 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
3514 	VMAddressSpace* addressSpace = NULL;
3515 
3516 	status_t status = B_OK;
3517 	*newIP = 0;
3518 	atomic_add((int32*)&sPageFaults, 1);
3519 
3520 	if (IS_KERNEL_ADDRESS(pageAddress)) {
3521 		addressSpace = VMAddressSpace::GetKernel();
3522 	} else if (IS_USER_ADDRESS(pageAddress)) {
3523 		addressSpace = VMAddressSpace::GetCurrent();
3524 		if (addressSpace == NULL) {
3525 			if (!isUser) {
3526 				dprintf("vm_page_fault: kernel thread accessing invalid user "
3527 					"memory!\n");
3528 				status = B_BAD_ADDRESS;
3529 				TPF(PageFaultError(-1,
3530 					VMPageFaultTracing
3531 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
3532 			} else {
3533 				// XXX weird state.
3534 				panic("vm_page_fault: non kernel thread accessing user memory "
3535 					"that doesn't exist!\n");
3536 				status = B_BAD_ADDRESS;
3537 			}
3538 		}
3539 	} else {
3540 		// the hit was probably in the 64k DMZ between kernel and user space
3541 		// this keeps a user space thread from passing a buffer that crosses
3542 		// into kernel space
3543 		status = B_BAD_ADDRESS;
3544 		TPF(PageFaultError(-1,
3545 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
3546 	}
3547 
3548 	if (status == B_OK)
3549 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser);
3550 
3551 	if (status < B_OK) {
3552 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
3553 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
3554 			strerror(status), address, faultAddress, isWrite, isUser,
3555 			thread_get_current_thread_id());
3556 		if (!isUser) {
3557 			struct thread* thread = thread_get_current_thread();
3558 			if (thread != NULL && thread->fault_handler != 0) {
3559 				// this will cause the arch dependant page fault handler to
3560 				// modify the IP on the interrupt frame or whatever to return
3561 				// to this address
3562 				*newIP = thread->fault_handler;
3563 			} else {
3564 				// unhandled page fault in the kernel
3565 				panic("vm_page_fault: unhandled page fault in kernel space at "
3566 					"0x%lx, ip 0x%lx\n", address, faultAddress);
3567 			}
3568 		} else {
3569 #if 1
3570 			addressSpace->ReadLock();
3571 
3572 			// TODO: remove me once we have proper userland debugging support
3573 			// (and tools)
3574 			VMArea* area = addressSpace->LookupArea(faultAddress);
3575 
3576 			struct thread* thread = thread_get_current_thread();
3577 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
3578 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
3579 				thread->name, thread->id, thread->team->name, thread->team->id,
3580 				isWrite ? "write" : "read", address, faultAddress,
3581 				area ? area->name : "???",
3582 				faultAddress - (area ? area->Base() : 0x0));
3583 
3584 			// We can print a stack trace of the userland thread here.
3585 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
3586 // fault and someone is already waiting for a write lock on the same address
3587 // space. This thread will then try to acquire the lock again and will
3588 // be queued after the writer.
3589 #	if 0
3590 			if (area) {
3591 				struct stack_frame {
3592 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
3593 						struct stack_frame*	previous;
3594 						void*				return_address;
3595 					#else
3596 						// ...
3597 					#warning writeme
3598 					#endif
3599 				} frame;
3600 #		ifdef __INTEL__
3601 				struct iframe* iframe = i386_get_user_iframe();
3602 				if (iframe == NULL)
3603 					panic("iframe is NULL!");
3604 
3605 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
3606 					sizeof(struct stack_frame));
3607 #		elif defined(__POWERPC__)
3608 				struct iframe* iframe = ppc_get_user_iframe();
3609 				if (iframe == NULL)
3610 					panic("iframe is NULL!");
3611 
3612 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
3613 					sizeof(struct stack_frame));
3614 #		else
3615 #			warning "vm_page_fault() stack trace won't work"
3616 				status = B_ERROR;
3617 #		endif
3618 
3619 				dprintf("stack trace:\n");
3620 				int32 maxFrames = 50;
3621 				while (status == B_OK && --maxFrames >= 0
3622 						&& frame.return_address != NULL) {
3623 					dprintf("  %p", frame.return_address);
3624 					area = addressSpace->LookupArea(
3625 						(addr_t)frame.return_address);
3626 					if (area) {
3627 						dprintf(" (%s + %#lx)", area->name,
3628 							(addr_t)frame.return_address - area->Base());
3629 					}
3630 					dprintf("\n");
3631 
3632 					status = user_memcpy(&frame, frame.previous,
3633 						sizeof(struct stack_frame));
3634 				}
3635 			}
3636 #	endif	// 0 (stack trace)
3637 
3638 			addressSpace->ReadUnlock();
3639 #endif
3640 
3641 			// TODO: the fault_callback is a temporary solution for vm86
3642 			if (thread->fault_callback == NULL
3643 				|| thread->fault_callback(address, faultAddress, isWrite)) {
3644 				// If the thread has a signal handler for SIGSEGV, we simply
3645 				// send it the signal. Otherwise we notify the user debugger
3646 				// first.
3647 				struct sigaction action;
3648 				if (sigaction(SIGSEGV, NULL, &action) == 0
3649 					&& action.sa_handler != SIG_DFL
3650 					&& action.sa_handler != SIG_IGN) {
3651 					send_signal(thread->id, SIGSEGV);
3652 				} else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION,
3653 						SIGSEGV)) {
3654 					send_signal(thread->id, SIGSEGV);
3655 				}
3656 			}
3657 		}
3658 	}
3659 
3660 	if (addressSpace != NULL)
3661 		addressSpace->Put();
3662 
3663 	return B_HANDLED_INTERRUPT;
3664 }
3665 
3666 
3667 struct PageFaultContext {
3668 	AddressSpaceReadLocker	addressSpaceLocker;
3669 	VMCacheChainLocker		cacheChainLocker;
3670 
3671 	VMTranslationMap*		map;
3672 	VMCache*				topCache;
3673 	off_t					cacheOffset;
3674 	vm_page_reservation		reservation;
3675 	bool					isWrite;
3676 
3677 	// return values
3678 	vm_page*				page;
3679 	bool					restart;
3680 
3681 
3682 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
3683 		:
3684 		addressSpaceLocker(addressSpace, true),
3685 		map(addressSpace->TranslationMap()),
3686 		isWrite(isWrite)
3687 	{
3688 	}
3689 
3690 	~PageFaultContext()
3691 	{
3692 		UnlockAll();
3693 		vm_page_unreserve_pages(&reservation);
3694 	}
3695 
3696 	void Prepare(VMCache* topCache, off_t cacheOffset)
3697 	{
3698 		this->topCache = topCache;
3699 		this->cacheOffset = cacheOffset;
3700 		page = NULL;
3701 		restart = false;
3702 
3703 		cacheChainLocker.SetTo(topCache);
3704 	}
3705 
3706 	void UnlockAll(VMCache* exceptCache = NULL)
3707 	{
3708 		topCache = NULL;
3709 		addressSpaceLocker.Unlock();
3710 		cacheChainLocker.Unlock(exceptCache);
3711 	}
3712 };
3713 
3714 
3715 /*!	Gets the page that should be mapped into the area.
3716 	Returns an error code other than \c B_OK, if the page couldn't be found or
3717 	paged in. The locking state of the address space and the caches is undefined
3718 	in that case.
3719 	Returns \c B_OK with \c context.restart set to \c true, if the functions
3720 	had to unlock the address space and all caches and is supposed to be called
3721 	again.
3722 	Returns \c B_OK with \c context.restart set to \c false, if the page was
3723 	found. It is returned in \c context.page. The address space will still be
3724 	locked as well as all caches starting from the top cache to at least the
3725 	cache the page lives in.
3726 */
3727 static status_t
3728 fault_get_page(PageFaultContext& context)
3729 {
3730 	VMCache* cache = context.topCache;
3731 	VMCache* lastCache = NULL;
3732 	vm_page* page = NULL;
3733 
3734 	while (cache != NULL) {
3735 		// We already hold the lock of the cache at this point.
3736 
3737 		lastCache = cache;
3738 
3739 		for (;;) {
3740 			page = cache->LookupPage(context.cacheOffset);
3741 			if (page == NULL || !page->busy) {
3742 				// Either there is no page or there is one and it is not busy.
3743 				break;
3744 			}
3745 
3746 			// page must be busy -- wait for it to become unbusy
3747 			context.UnlockAll(cache);
3748 			cache->ReleaseRefLocked();
3749 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
3750 
3751 			// restart the whole process
3752 			context.restart = true;
3753 			return B_OK;
3754 		}
3755 
3756 		if (page != NULL)
3757 			break;
3758 
3759 		// The current cache does not contain the page we're looking for.
3760 
3761 		// see if the backing store has it
3762 		if (cache->HasPage(context.cacheOffset)) {
3763 			// insert a fresh page and mark it busy -- we're going to read it in
3764 			page = vm_page_allocate_page(&context.reservation,
3765 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
3766 			cache->InsertPage(page, context.cacheOffset);
3767 
3768 			// We need to unlock all caches and the address space while reading
3769 			// the page in. Keep a reference to the cache around.
3770 			cache->AcquireRefLocked();
3771 			context.UnlockAll();
3772 
3773 			// read the page in
3774 			iovec vec;
3775 			vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE);
3776 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
3777 
3778 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
3779 				B_PHYSICAL_IO_REQUEST, &bytesRead);
3780 
3781 			cache->Lock();
3782 
3783 			if (status < B_OK) {
3784 				// on error remove and free the page
3785 				dprintf("reading page from cache %p returned: %s!\n",
3786 					cache, strerror(status));
3787 
3788 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
3789 				cache->RemovePage(page);
3790 				vm_page_set_state(page, PAGE_STATE_FREE);
3791 
3792 				cache->ReleaseRefAndUnlock();
3793 				return status;
3794 			}
3795 
3796 			// mark the page unbusy again
3797 			cache->MarkPageUnbusy(page);
3798 
3799 			DEBUG_PAGE_ACCESS_END(page);
3800 
3801 			// Since we needed to unlock everything temporarily, the area
3802 			// situation might have changed. So we need to restart the whole
3803 			// process.
3804 			cache->ReleaseRefAndUnlock();
3805 			context.restart = true;
3806 			return B_OK;
3807 		}
3808 
3809 		cache = context.cacheChainLocker.LockSourceCache();
3810 	}
3811 
3812 	if (page == NULL) {
3813 		// There was no adequate page, determine the cache for a clean one.
3814 		// Read-only pages come in the deepest cache, only the top most cache
3815 		// may have direct write access.
3816 		cache = context.isWrite ? context.topCache : lastCache;
3817 
3818 		// allocate a clean page
3819 		page = vm_page_allocate_page(&context.reservation,
3820 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
3821 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
3822 			page->physical_page_number));
3823 
3824 		// insert the new page into our cache
3825 		cache->InsertPage(page, context.cacheOffset);
3826 	} else if (page->Cache() != context.topCache && context.isWrite) {
3827 		// We have a page that has the data we want, but in the wrong cache
3828 		// object so we need to copy it and stick it into the top cache.
3829 		vm_page* sourcePage = page;
3830 
3831 		// TODO: If memory is low, it might be a good idea to steal the page
3832 		// from our source cache -- if possible, that is.
3833 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
3834 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
3835 
3836 		// To not needlessly kill concurrency we unlock all caches but the top
3837 		// one while copying the page. Lacking another mechanism to ensure that
3838 		// the source page doesn't disappear, we mark it busy.
3839 		sourcePage->busy = true;
3840 		context.cacheChainLocker.UnlockKeepRefs(true);
3841 
3842 		// copy the page
3843 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
3844 			sourcePage->physical_page_number * B_PAGE_SIZE);
3845 
3846 		context.cacheChainLocker.RelockCaches(true);
3847 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
3848 
3849 		// insert the new page into our cache
3850 		context.topCache->InsertPage(page, context.cacheOffset);
3851 	} else
3852 		DEBUG_PAGE_ACCESS_START(page);
3853 
3854 	context.page = page;
3855 	return B_OK;
3856 }
3857 
3858 
3859 static status_t
3860 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
3861 	bool isWrite, bool isUser)
3862 {
3863 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
3864 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
3865 
3866 	PageFaultContext context(addressSpace, isWrite);
3867 
3868 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
3869 	status_t status = B_OK;
3870 
3871 	addressSpace->IncrementFaultCount();
3872 
3873 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
3874 	// the pages upfront makes sure we don't have any cache locked, so that the
3875 	// page daemon/thief can do their job without problems.
3876 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
3877 		originalAddress);
3878 	context.addressSpaceLocker.Unlock();
3879 	vm_page_reserve_pages(&context.reservation, reservePages,
3880 		addressSpace == VMAddressSpace::Kernel()
3881 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3882 
3883 	while (true) {
3884 		context.addressSpaceLocker.Lock();
3885 
3886 		// get the area the fault was in
3887 		VMArea* area = addressSpace->LookupArea(address);
3888 		if (area == NULL) {
3889 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
3890 				"space\n", originalAddress);
3891 			TPF(PageFaultError(-1,
3892 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
3893 			status = B_BAD_ADDRESS;
3894 			break;
3895 		}
3896 
3897 		// check permissions
3898 		uint32 protection = get_area_page_protection(area, address);
3899 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
3900 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
3901 				(void*)originalAddress);
3902 			TPF(PageFaultError(area->id,
3903 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
3904 			status = B_PERMISSION_DENIED;
3905 			break;
3906 		}
3907 		if (isWrite && (protection
3908 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
3909 			dprintf("write access attempted on write-protected area 0x%lx at"
3910 				" %p\n", area->id, (void*)originalAddress);
3911 			TPF(PageFaultError(area->id,
3912 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
3913 			status = B_PERMISSION_DENIED;
3914 			break;
3915 		} else if (!isWrite && (protection
3916 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
3917 			dprintf("read access attempted on read-protected area 0x%lx at"
3918 				" %p\n", area->id, (void*)originalAddress);
3919 			TPF(PageFaultError(area->id,
3920 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
3921 			status = B_PERMISSION_DENIED;
3922 			break;
3923 		}
3924 
3925 		// We have the area, it was a valid access, so let's try to resolve the
3926 		// page fault now.
3927 		// At first, the top most cache from the area is investigated.
3928 
3929 		context.Prepare(vm_area_get_locked_cache(area),
3930 			address - area->Base() + area->cache_offset);
3931 
3932 		// See if this cache has a fault handler -- this will do all the work
3933 		// for us.
3934 		{
3935 			// Note, since the page fault is resolved with interrupts enabled,
3936 			// the fault handler could be called more than once for the same
3937 			// reason -- the store must take this into account.
3938 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
3939 			if (status != B_BAD_HANDLER)
3940 				break;
3941 		}
3942 
3943 		// The top most cache has no fault handler, so let's see if the cache or
3944 		// its sources already have the page we're searching for (we're going
3945 		// from top to bottom).
3946 		status = fault_get_page(context);
3947 		if (status != B_OK) {
3948 			TPF(PageFaultError(area->id, status));
3949 			break;
3950 		}
3951 
3952 		if (context.restart)
3953 			continue;
3954 
3955 		// All went fine, all there is left to do is to map the page into the
3956 		// address space.
3957 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
3958 			context.page));
3959 
3960 		// If the page doesn't reside in the area's cache, we need to make sure
3961 		// it's mapped in read-only, so that we cannot overwrite someone else's
3962 		// data (copy-on-write)
3963 		uint32 newProtection = protection;
3964 		if (context.page->Cache() != context.topCache && !isWrite)
3965 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
3966 
3967 		bool unmapPage = false;
3968 		bool mapPage = true;
3969 
3970 		// check whether there's already a page mapped at the address
3971 		context.map->Lock();
3972 
3973 		addr_t physicalAddress;
3974 		uint32 flags;
3975 		vm_page* mappedPage = NULL;
3976 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
3977 			&& (flags & PAGE_PRESENT) != 0
3978 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
3979 				!= NULL) {
3980 			// Yep there's already a page. If it's ours, we can simply adjust
3981 			// its protection. Otherwise we have to unmap it.
3982 			if (mappedPage == context.page) {
3983 				context.map->ProtectPage(area, address, newProtection);
3984 				mapPage = false;
3985 			} else
3986 				unmapPage = true;
3987 		}
3988 
3989 		context.map->Unlock();
3990 
3991 		if (unmapPage) {
3992 			// Note: The mapped page is a page of a lower cache. We are
3993 			// guaranteed to have that cached locked, our new page is a copy of
3994 			// that page, and the page is not busy. The logic for that guarantee
3995 			// is as follows: Since the page is mapped, it must live in the top
3996 			// cache (ruled out above) or any of its lower caches, and there is
3997 			// (was before the new page was inserted) no other page in any
3998 			// cache between the top cache and the page's cache (otherwise that
3999 			// would be mapped instead). That in turn means that our algorithm
4000 			// must have found it and therefore it cannot be busy either.
4001 			DEBUG_PAGE_ACCESS_START(mappedPage);
4002 			unmap_page(area, address);
4003 			DEBUG_PAGE_ACCESS_END(mappedPage);
4004 		}
4005 
4006 		if (mapPage) {
4007 			if (map_page(area, context.page, address, newProtection,
4008 					&context.reservation) != B_OK) {
4009 				// Mapping can only fail, when the page mapping object couldn't
4010 				// be allocated. Save for the missing mapping everything is
4011 				// fine, though. We'll simply leave and probably fault again.
4012 				// To make sure we'll have more luck then, we ensure that the
4013 				// minimum object reserve is available.
4014 				DEBUG_PAGE_ACCESS_END(context.page);
4015 
4016 				context.UnlockAll();
4017 
4018 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4019 						!= B_OK) {
4020 					// Apparently the situation is serious. Let's get ourselves
4021 					// killed.
4022 					status = B_NO_MEMORY;
4023 				}
4024 
4025 				break;
4026 			}
4027 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4028 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4029 
4030 		DEBUG_PAGE_ACCESS_END(context.page);
4031 
4032 		break;
4033 	}
4034 
4035 	return status;
4036 }
4037 
4038 
4039 status_t
4040 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle)
4041 {
4042 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4043 }
4044 
4045 status_t
4046 vm_put_physical_page(addr_t vaddr, void* handle)
4047 {
4048 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4049 }
4050 
4051 
4052 status_t
4053 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle)
4054 {
4055 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4056 }
4057 
4058 status_t
4059 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4060 {
4061 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4062 }
4063 
4064 
4065 status_t
4066 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle)
4067 {
4068 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4069 }
4070 
4071 status_t
4072 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4073 {
4074 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4075 }
4076 
4077 
4078 void
4079 vm_get_info(system_memory_info* info)
4080 {
4081 	swap_get_info(info);
4082 
4083 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4084 	info->page_faults = sPageFaults;
4085 
4086 	MutexLocker locker(sAvailableMemoryLock);
4087 	info->free_memory = sAvailableMemory;
4088 	info->needed_memory = sNeededMemory;
4089 }
4090 
4091 
4092 uint32
4093 vm_num_page_faults(void)
4094 {
4095 	return sPageFaults;
4096 }
4097 
4098 
4099 off_t
4100 vm_available_memory(void)
4101 {
4102 	MutexLocker locker(sAvailableMemoryLock);
4103 	return sAvailableMemory;
4104 }
4105 
4106 
4107 off_t
4108 vm_available_not_needed_memory(void)
4109 {
4110 	MutexLocker locker(sAvailableMemoryLock);
4111 	return sAvailableMemory - sNeededMemory;
4112 }
4113 
4114 
4115 size_t
4116 vm_kernel_address_space_left(void)
4117 {
4118 	return VMAddressSpace::Kernel()->FreeSpace();
4119 }
4120 
4121 
4122 void
4123 vm_unreserve_memory(size_t amount)
4124 {
4125 	mutex_lock(&sAvailableMemoryLock);
4126 
4127 	sAvailableMemory += amount;
4128 
4129 	mutex_unlock(&sAvailableMemoryLock);
4130 }
4131 
4132 
4133 status_t
4134 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4135 {
4136 	size_t reserve = kMemoryReserveForPriority[priority];
4137 
4138 	MutexLocker locker(sAvailableMemoryLock);
4139 
4140 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4141 
4142 	if (sAvailableMemory >= amount + reserve) {
4143 		sAvailableMemory -= amount;
4144 		return B_OK;
4145 	}
4146 
4147 	if (timeout <= 0)
4148 		return B_NO_MEMORY;
4149 
4150 	// turn timeout into an absolute timeout
4151 	timeout += system_time();
4152 
4153 	// loop until we've got the memory or the timeout occurs
4154 	do {
4155 		sNeededMemory += amount;
4156 
4157 		// call the low resource manager
4158 		locker.Unlock();
4159 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4160 			B_ABSOLUTE_TIMEOUT, timeout);
4161 		locker.Lock();
4162 
4163 		sNeededMemory -= amount;
4164 
4165 		if (sAvailableMemory >= amount + reserve) {
4166 			sAvailableMemory -= amount;
4167 			return B_OK;
4168 		}
4169 	} while (timeout > system_time());
4170 
4171 	return B_NO_MEMORY;
4172 }
4173 
4174 
4175 status_t
4176 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
4177 {
4178 	AddressSpaceReadLocker locker;
4179 	VMArea* area;
4180 	status_t status = locker.SetFromArea(id, area);
4181 	if (status != B_OK)
4182 		return status;
4183 
4184 	return arch_vm_set_memory_type(area, physicalBase, type);
4185 }
4186 
4187 
4188 /*!	This function enforces some protection properties:
4189 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4190 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4191 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4192 	   and B_KERNEL_WRITE_AREA.
4193 */
4194 static void
4195 fix_protection(uint32* protection)
4196 {
4197 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4198 		if ((*protection & B_USER_PROTECTION) == 0
4199 			|| (*protection & B_WRITE_AREA) != 0)
4200 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4201 		else
4202 			*protection |= B_KERNEL_READ_AREA;
4203 	}
4204 }
4205 
4206 
4207 static void
4208 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4209 {
4210 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4211 	info->area = area->id;
4212 	info->address = (void*)area->Base();
4213 	info->size = area->Size();
4214 	info->protection = area->protection;
4215 	info->lock = B_FULL_LOCK;
4216 	info->team = area->address_space->ID();
4217 	info->copy_count = 0;
4218 	info->in_count = 0;
4219 	info->out_count = 0;
4220 		// TODO: retrieve real values here!
4221 
4222 	VMCache* cache = vm_area_get_locked_cache(area);
4223 
4224 	// Note, this is a simplification; the cache could be larger than this area
4225 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4226 
4227 	vm_area_put_locked_cache(cache);
4228 }
4229 
4230 
4231 /*!
4232 	Tests whether or not the area that contains the specified address
4233 	needs any kind of locking, and actually exists.
4234 	Used by both lock_memory() and unlock_memory().
4235 */
4236 static status_t
4237 test_lock_memory(VMAddressSpace* addressSpace, addr_t address,
4238 	bool& needsLocking)
4239 {
4240 	addressSpace->ReadLock();
4241 
4242 	VMArea* area = addressSpace->LookupArea(address);
4243 	if (area != NULL) {
4244 		// This determines if we need to lock the memory at all
4245 		needsLocking = area->cache_type != CACHE_TYPE_NULL
4246 			&& area->cache_type != CACHE_TYPE_DEVICE
4247 			&& area->wiring != B_FULL_LOCK
4248 			&& area->wiring != B_CONTIGUOUS;
4249 	}
4250 
4251 	addressSpace->ReadUnlock();
4252 
4253 	if (area == NULL)
4254 		return B_BAD_ADDRESS;
4255 
4256 	return B_OK;
4257 }
4258 
4259 
4260 static status_t
4261 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4262 {
4263 	// is newSize a multiple of B_PAGE_SIZE?
4264 	if (newSize & (B_PAGE_SIZE - 1))
4265 		return B_BAD_VALUE;
4266 
4267 	// lock all affected address spaces and the cache
4268 	VMArea* area;
4269 	VMCache* cache;
4270 
4271 	MultiAddressSpaceLocker locker;
4272 	status_t status = locker.AddAreaCacheAndLock(areaID, true, true, area,
4273 		&cache);
4274 	if (status != B_OK)
4275 		return status;
4276 	AreaCacheLocker cacheLocker(cache);	// already locked
4277 
4278 	// enforce restrictions
4279 	if (!kernel) {
4280 		if ((area->protection & B_KERNEL_AREA) != 0)
4281 			return B_NOT_ALLOWED;
4282 		// TODO: Enforce all restrictions (team, etc.)!
4283 	}
4284 
4285 	size_t oldSize = area->Size();
4286 	if (newSize == oldSize)
4287 		return B_OK;
4288 
4289 	// Resize all areas of this area's cache
4290 
4291 	if (cache->type != CACHE_TYPE_RAM)
4292 		return B_NOT_ALLOWED;
4293 
4294 	bool anyKernelArea = false;
4295 	if (oldSize < newSize) {
4296 		// We need to check if all areas of this cache can be resized
4297 		for (VMArea* current = cache->areas; current != NULL;
4298 				current = current->cache_next) {
4299 			if (!current->address_space->CanResizeArea(current, newSize))
4300 				return B_ERROR;
4301 			anyKernelArea |= current->address_space == VMAddressSpace::Kernel();
4302 		}
4303 	}
4304 
4305 	// Okay, looks good so far, so let's do it
4306 
4307 	int priority = kernel && anyKernelArea
4308 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4309 	uint32 allocationFlags = kernel && anyKernelArea
4310 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4311 
4312 	if (oldSize < newSize) {
4313 		// Growing the cache can fail, so we do it first.
4314 		status = cache->Resize(cache->virtual_base + newSize, priority);
4315 		if (status != B_OK)
4316 			return status;
4317 	}
4318 
4319 	for (VMArea* current = cache->areas; current != NULL;
4320 			current = current->cache_next) {
4321 		status = current->address_space->ResizeArea(current, newSize,
4322 			allocationFlags);
4323 		if (status != B_OK)
4324 			break;
4325 
4326 		// We also need to unmap all pages beyond the new size, if the area has
4327 		// shrunk
4328 		if (newSize < oldSize) {
4329 			VMCacheChainLocker cacheChainLocker(cache);
4330 			cacheChainLocker.LockAllSourceCaches();
4331 
4332 			unmap_pages(current, current->Base() + newSize,
4333 				oldSize - newSize);
4334 
4335 			cacheChainLocker.Unlock(cache);
4336 		}
4337 	}
4338 
4339 	// shrinking the cache can't fail, so we do it now
4340 	if (status == B_OK && newSize < oldSize)
4341 		status = cache->Resize(cache->virtual_base + newSize, priority);
4342 
4343 	if (status != B_OK) {
4344 		// Something failed -- resize the areas back to their original size.
4345 		// This can fail, too, in which case we're seriously screwed.
4346 		for (VMArea* current = cache->areas; current != NULL;
4347 				current = current->cache_next) {
4348 			if (current->address_space->ResizeArea(current, oldSize,
4349 					allocationFlags) != B_OK) {
4350 				panic("vm_resize_area(): Failed and not being able to restore "
4351 					"original state.");
4352 			}
4353 		}
4354 
4355 		cache->Resize(cache->virtual_base + oldSize, priority);
4356 	}
4357 
4358 	// TODO: we must honour the lock restrictions of this area
4359 	return status;
4360 }
4361 
4362 
4363 status_t
4364 vm_memset_physical(addr_t address, int value, size_t length)
4365 {
4366 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4367 }
4368 
4369 
4370 status_t
4371 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user)
4372 {
4373 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4374 }
4375 
4376 
4377 status_t
4378 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user)
4379 {
4380 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4381 }
4382 
4383 
4384 void
4385 vm_memcpy_physical_page(addr_t to, addr_t from)
4386 {
4387 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4388 }
4389 
4390 
4391 //	#pragma mark - kernel public API
4392 
4393 
4394 status_t
4395 user_memcpy(void* to, const void* from, size_t size)
4396 {
4397 	// don't allow address overflows
4398 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
4399 		return B_BAD_ADDRESS;
4400 
4401 	if (arch_cpu_user_memcpy(to, from, size,
4402 			&thread_get_current_thread()->fault_handler) < B_OK)
4403 		return B_BAD_ADDRESS;
4404 
4405 	return B_OK;
4406 }
4407 
4408 
4409 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
4410 	the string in \a to, NULL-terminating the result.
4411 
4412 	\param to Pointer to the destination C-string.
4413 	\param from Pointer to the source C-string.
4414 	\param size Size in bytes of the string buffer pointed to by \a to.
4415 
4416 	\return strlen(\a from).
4417 */
4418 ssize_t
4419 user_strlcpy(char* to, const char* from, size_t size)
4420 {
4421 	if (to == NULL && size != 0)
4422 		return B_BAD_VALUE;
4423 	if (from == NULL)
4424 		return B_BAD_ADDRESS;
4425 
4426 	// limit size to avoid address overflows
4427 	size_t maxSize = std::min(size,
4428 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
4429 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
4430 		// the source address might still overflow.
4431 
4432 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
4433 		&thread_get_current_thread()->fault_handler);
4434 
4435 	// If we hit the address overflow boundary, fail.
4436 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
4437 		return B_BAD_ADDRESS;
4438 
4439 	return result;
4440 }
4441 
4442 
4443 status_t
4444 user_memset(void* s, char c, size_t count)
4445 {
4446 	// don't allow address overflows
4447 	if ((addr_t)s + count < (addr_t)s)
4448 		return B_BAD_ADDRESS;
4449 
4450 	if (arch_cpu_user_memset(s, c, count,
4451 			&thread_get_current_thread()->fault_handler) < B_OK)
4452 		return B_BAD_ADDRESS;
4453 
4454 	return B_OK;
4455 }
4456 
4457 
4458 status_t
4459 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
4460 {
4461 	VMAddressSpace* addressSpace = NULL;
4462 	addr_t unalignedBase = (addr_t)address;
4463 	addr_t end = unalignedBase + numBytes;
4464 	addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE);
4465 	bool isUser = IS_USER_ADDRESS(address);
4466 	bool needsLocking = true;
4467 
4468 	if (isUser) {
4469 		if (team == B_CURRENT_TEAM)
4470 			addressSpace = VMAddressSpace::GetCurrent();
4471 		else
4472 			addressSpace = VMAddressSpace::Get(team);
4473 	} else
4474 		addressSpace = VMAddressSpace::GetKernel();
4475 	if (addressSpace == NULL)
4476 		return B_ERROR;
4477 
4478 	// test if we're on an area that allows faults at all
4479 
4480 	VMTranslationMap* map = addressSpace->TranslationMap();
4481 
4482 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
4483 	if (status < B_OK)
4484 		goto out;
4485 	if (!needsLocking)
4486 		goto out;
4487 
4488 	for (; base < end; base += B_PAGE_SIZE) {
4489 		addr_t physicalAddress;
4490 		uint32 protection;
4491 		status_t status;
4492 
4493 		map->Lock();
4494 		status = map->Query(base, &physicalAddress, &protection);
4495 		map->Unlock();
4496 
4497 		if (status < B_OK)
4498 			goto out;
4499 
4500 		if ((protection & PAGE_PRESENT) != 0) {
4501 			// if B_READ_DEVICE is set, the caller intents to write to the locked
4502 			// memory, so if it hasn't been mapped writable, we'll try the soft
4503 			// fault anyway
4504 			if ((flags & B_READ_DEVICE) == 0
4505 				|| (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
4506 				// update wiring
4507 				vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4508 				if (page == NULL)
4509 					panic("couldn't lookup physical page just allocated\n");
4510 
4511 				increment_page_wired_count(page);
4512 				continue;
4513 			}
4514 		}
4515 
4516 		status = vm_soft_fault(addressSpace, base, (flags & B_READ_DEVICE) != 0,
4517 			isUser);
4518 		if (status != B_OK)	{
4519 			dprintf("lock_memory(address = %p, numBytes = %lu, flags = %lu) "
4520 				"failed: %s\n", (void*)unalignedBase, numBytes, flags,
4521 				strerror(status));
4522 			goto out;
4523 		}
4524 
4525 		// TODO: Here's a race condition. We should probably add a parameter
4526 		// to vm_soft_fault() that would cause the page's wired count to be
4527 		// incremented immediately.
4528 		// TODO: After memory has been locked in an area, we need to prevent the
4529 		// area from being deleted, resized, cut, etc. That could be done using
4530 		// a "locked pages" count in VMArea, and maybe a condition variable, if
4531 		// we want to allow waiting for the area to become eligible for these
4532 		// operations again.
4533 
4534 		map->Lock();
4535 		status = map->Query(base, &physicalAddress, &protection);
4536 		map->Unlock();
4537 
4538 		if (status < B_OK)
4539 			goto out;
4540 
4541 		// update wiring
4542 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4543 		if (page == NULL)
4544 			panic("couldn't lookup physical page");
4545 
4546 		increment_page_wired_count(page);
4547 			// TODO: We need the cache to be locked at this point! See TODO
4548 			// above for a possible solution.
4549 	}
4550 
4551 out:
4552 	addressSpace->Put();
4553 	return status;
4554 }
4555 
4556 
4557 status_t
4558 lock_memory(void* address, size_t numBytes, uint32 flags)
4559 {
4560 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
4561 }
4562 
4563 
4564 status_t
4565 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
4566 {
4567 	VMAddressSpace* addressSpace = NULL;
4568 	addr_t unalignedBase = (addr_t)address;
4569 	addr_t end = unalignedBase + numBytes;
4570 	addr_t base = ROUNDDOWN(unalignedBase, B_PAGE_SIZE);
4571 	bool needsLocking = true;
4572 
4573 	if (IS_USER_ADDRESS(address)) {
4574 		if (team == B_CURRENT_TEAM)
4575 			addressSpace = VMAddressSpace::GetCurrent();
4576 		else
4577 			addressSpace = VMAddressSpace::Get(team);
4578 	} else
4579 		addressSpace = VMAddressSpace::GetKernel();
4580 	if (addressSpace == NULL)
4581 		return B_ERROR;
4582 
4583 	VMTranslationMap* map = addressSpace->TranslationMap();
4584 
4585 	status_t status = test_lock_memory(addressSpace, base, needsLocking);
4586 	if (status < B_OK)
4587 		goto out;
4588 	if (!needsLocking)
4589 		goto out;
4590 
4591 	for (; base < end; base += B_PAGE_SIZE) {
4592 		map->Lock();
4593 
4594 		addr_t physicalAddress;
4595 		uint32 protection;
4596 		status = map->Query(base, &physicalAddress, &protection);
4597 			// TODO: ATM there's no mechanism that guarantees that the page
4598 			// we've marked wired in lock_memory_etc() is the one we find here.
4599 			// If we only locked for reading, the original page might stem from
4600 			// a lower cache and a page fault in the meantime might have mapped
4601 			// a page from the top cache.
4602 			// Moreover fork() can insert a new top cache and re-map pages
4603 			// read-only at any time. This would even cause a violation of the
4604 			// lock_memory() guarantee.
4605 
4606 		map->Unlock();
4607 
4608 		if (status < B_OK)
4609 			goto out;
4610 		if ((protection & PAGE_PRESENT) == 0)
4611 			panic("calling unlock_memory() on unmapped memory!");
4612 
4613 		// update wiring
4614 		vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
4615 		if (page == NULL)
4616 			panic("couldn't lookup physical page");
4617 
4618 		decrement_page_wired_count(page);
4619 			// TODO: We need the cache to be locked at this point!
4620 	}
4621 
4622 out:
4623 	addressSpace->Put();
4624 	return status;
4625 }
4626 
4627 
4628 status_t
4629 unlock_memory(void* address, size_t numBytes, uint32 flags)
4630 {
4631 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
4632 }
4633 
4634 
4635 /*!	Similar to get_memory_map(), but also allows to specify the address space
4636 	for the memory in question and has a saner semantics.
4637 	Returns \c B_OK when the complete range could be translated or
4638 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
4639 	case the actual number of entries is written to \c *_numEntries. Any other
4640 	error case indicates complete failure; \c *_numEntries will be set to \c 0
4641 	in this case.
4642 */
4643 status_t
4644 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
4645 	physical_entry* table, uint32* _numEntries)
4646 {
4647 	uint32 numEntries = *_numEntries;
4648 	*_numEntries = 0;
4649 
4650 	VMAddressSpace* addressSpace;
4651 	addr_t virtualAddress = (addr_t)address;
4652 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
4653 	addr_t physicalAddress;
4654 	status_t status = B_OK;
4655 	int32 index = -1;
4656 	addr_t offset = 0;
4657 	bool interrupts = are_interrupts_enabled();
4658 
4659 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
4660 		address, numBytes, numEntries));
4661 
4662 	if (numEntries == 0 || numBytes == 0)
4663 		return B_BAD_VALUE;
4664 
4665 	// in which address space is the address to be found?
4666 	if (IS_USER_ADDRESS(virtualAddress)) {
4667 		if (team == B_CURRENT_TEAM)
4668 			addressSpace = VMAddressSpace::GetCurrent();
4669 		else
4670 			addressSpace = VMAddressSpace::Get(team);
4671 	} else
4672 		addressSpace = VMAddressSpace::GetKernel();
4673 
4674 	if (addressSpace == NULL)
4675 		return B_ERROR;
4676 
4677 	VMTranslationMap* map = addressSpace->TranslationMap();
4678 
4679 	if (interrupts)
4680 		map->Lock();
4681 
4682 	while (offset < numBytes) {
4683 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
4684 		uint32 flags;
4685 
4686 		if (interrupts) {
4687 			status = map->Query((addr_t)address + offset, &physicalAddress,
4688 				&flags);
4689 		} else {
4690 			status = map->QueryInterrupt((addr_t)address + offset,
4691 				&physicalAddress, &flags);
4692 		}
4693 		if (status < B_OK)
4694 			break;
4695 		if ((flags & PAGE_PRESENT) == 0) {
4696 			panic("get_memory_map() called on unmapped memory!");
4697 			return B_BAD_ADDRESS;
4698 		}
4699 
4700 		if (index < 0 && pageOffset > 0) {
4701 			physicalAddress += pageOffset;
4702 			if (bytes > B_PAGE_SIZE - pageOffset)
4703 				bytes = B_PAGE_SIZE - pageOffset;
4704 		}
4705 
4706 		// need to switch to the next physical_entry?
4707 		if (index < 0 || (addr_t)table[index].address
4708 				!= physicalAddress - table[index].size) {
4709 			if ((uint32)++index + 1 > numEntries) {
4710 				// table to small
4711 				status = B_BUFFER_OVERFLOW;
4712 				break;
4713 			}
4714 			table[index].address = (void*)physicalAddress;
4715 			table[index].size = bytes;
4716 		} else {
4717 			// page does fit in current entry
4718 			table[index].size += bytes;
4719 		}
4720 
4721 		offset += bytes;
4722 	}
4723 
4724 	if (interrupts)
4725 		map->Unlock();
4726 
4727 	if (status != B_OK)
4728 		return status;
4729 
4730 	if ((uint32)index + 1 > numEntries) {
4731 		*_numEntries = index;
4732 		return B_BUFFER_OVERFLOW;
4733 	}
4734 
4735 	*_numEntries = index + 1;
4736 	return B_OK;
4737 }
4738 
4739 
4740 /*!	According to the BeBook, this function should always succeed.
4741 	This is no longer the case.
4742 */
4743 long
4744 get_memory_map(const void* address, ulong numBytes, physical_entry* table,
4745 	long numEntries)
4746 {
4747 	uint32 entriesRead = numEntries;
4748 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
4749 		table, &entriesRead);
4750 	if (error != B_OK)
4751 		return error;
4752 
4753 	// close the entry list
4754 
4755 	// if it's only one entry, we will silently accept the missing ending
4756 	if (numEntries == 1)
4757 		return B_OK;
4758 
4759 	if (entriesRead + 1 > (uint32)numEntries)
4760 		return B_BUFFER_OVERFLOW;
4761 
4762 	table[entriesRead].address = NULL;
4763 	table[entriesRead].size = 0;
4764 
4765 	return B_OK;
4766 }
4767 
4768 
4769 area_id
4770 area_for(void* address)
4771 {
4772 	return vm_area_for((addr_t)address, true);
4773 }
4774 
4775 
4776 area_id
4777 find_area(const char* name)
4778 {
4779 	return VMAreaHash::Find(name);
4780 }
4781 
4782 
4783 status_t
4784 _get_area_info(area_id id, area_info* info, size_t size)
4785 {
4786 	if (size != sizeof(area_info) || info == NULL)
4787 		return B_BAD_VALUE;
4788 
4789 	AddressSpaceReadLocker locker;
4790 	VMArea* area;
4791 	status_t status = locker.SetFromArea(id, area);
4792 	if (status != B_OK)
4793 		return status;
4794 
4795 	fill_area_info(area, info, size);
4796 	return B_OK;
4797 }
4798 
4799 
4800 status_t
4801 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
4802 {
4803 	addr_t nextBase = *(addr_t*)cookie;
4804 
4805 	// we're already through the list
4806 	if (nextBase == (addr_t)-1)
4807 		return B_ENTRY_NOT_FOUND;
4808 
4809 	if (team == B_CURRENT_TEAM)
4810 		team = team_get_current_team_id();
4811 
4812 	AddressSpaceReadLocker locker(team);
4813 	if (!locker.IsLocked())
4814 		return B_BAD_TEAM_ID;
4815 
4816 	VMArea* area;
4817 	for (VMAddressSpace::AreaIterator it
4818 				= locker.AddressSpace()->GetAreaIterator();
4819 			(area = it.Next()) != NULL;) {
4820 		if (area->Base() > nextBase)
4821 			break;
4822 	}
4823 
4824 	if (area == NULL) {
4825 		nextBase = (addr_t)-1;
4826 		return B_ENTRY_NOT_FOUND;
4827 	}
4828 
4829 	fill_area_info(area, info, size);
4830 	*cookie = (int32)(area->Base());
4831 		// TODO: Not 64 bit safe!
4832 
4833 	return B_OK;
4834 }
4835 
4836 
4837 status_t
4838 set_area_protection(area_id area, uint32 newProtection)
4839 {
4840 	fix_protection(&newProtection);
4841 
4842 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
4843 		newProtection, true);
4844 }
4845 
4846 
4847 status_t
4848 resize_area(area_id areaID, size_t newSize)
4849 {
4850 	return vm_resize_area(areaID, newSize, true);
4851 }
4852 
4853 
4854 /*!	Transfers the specified area to a new team. The caller must be the owner
4855 	of the area.
4856 */
4857 area_id
4858 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
4859 	bool kernel)
4860 {
4861 	area_info info;
4862 	status_t status = get_area_info(id, &info);
4863 	if (status != B_OK)
4864 		return status;
4865 
4866 	if (info.team != thread_get_current_thread()->team->id)
4867 		return B_PERMISSION_DENIED;
4868 
4869 	area_id clonedArea = vm_clone_area(target, info.name, _address,
4870 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
4871 	if (clonedArea < 0)
4872 		return clonedArea;
4873 
4874 	status = vm_delete_area(info.team, id, kernel);
4875 	if (status != B_OK) {
4876 		vm_delete_area(target, clonedArea, kernel);
4877 		return status;
4878 	}
4879 
4880 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
4881 
4882 	return clonedArea;
4883 }
4884 
4885 
4886 area_id
4887 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes,
4888 	uint32 addressSpec, uint32 protection, void** _virtualAddress)
4889 {
4890 	if (!arch_vm_supports_protection(protection))
4891 		return B_NOT_SUPPORTED;
4892 
4893 	fix_protection(&protection);
4894 
4895 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
4896 		_virtualAddress, addressSpec, numBytes, protection,
4897 		(addr_t)physicalAddress, false);
4898 }
4899 
4900 
4901 area_id
4902 clone_area(const char* name, void** _address, uint32 addressSpec,
4903 	uint32 protection, area_id source)
4904 {
4905 	if ((protection & B_KERNEL_PROTECTION) == 0)
4906 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4907 
4908 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
4909 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
4910 }
4911 
4912 
4913 area_id
4914 create_area_etc(team_id team, const char* name, void** address,
4915 	uint32 addressSpec, uint32 size, uint32 lock, uint32 protection,
4916 	addr_t physicalAddress, uint32 flags)
4917 {
4918 	fix_protection(&protection);
4919 
4920 	return vm_create_anonymous_area(team, (char*)name, address, addressSpec,
4921 		size, lock, protection, physicalAddress, flags, true);
4922 }
4923 
4924 
4925 area_id
4926 create_area(const char* name, void** _address, uint32 addressSpec, size_t size,
4927 	uint32 lock, uint32 protection)
4928 {
4929 	fix_protection(&protection);
4930 
4931 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), (char*)name,
4932 		_address, addressSpec, size, lock, protection, 0, 0, true);
4933 }
4934 
4935 
4936 status_t
4937 delete_area(area_id area)
4938 {
4939 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
4940 }
4941 
4942 
4943 //	#pragma mark - Userland syscalls
4944 
4945 
4946 status_t
4947 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
4948 	addr_t size)
4949 {
4950 	// filter out some unavailable values (for userland)
4951 	switch (addressSpec) {
4952 		case B_ANY_KERNEL_ADDRESS:
4953 		case B_ANY_KERNEL_BLOCK_ADDRESS:
4954 			return B_BAD_VALUE;
4955 	}
4956 
4957 	addr_t address;
4958 
4959 	if (!IS_USER_ADDRESS(userAddress)
4960 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
4961 		return B_BAD_ADDRESS;
4962 
4963 	status_t status = vm_reserve_address_range(
4964 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
4965 		RESERVED_AVOID_BASE);
4966 	if (status != B_OK)
4967 		return status;
4968 
4969 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
4970 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
4971 			(void*)address, size);
4972 		return B_BAD_ADDRESS;
4973 	}
4974 
4975 	return B_OK;
4976 }
4977 
4978 
4979 status_t
4980 _user_unreserve_address_range(addr_t address, addr_t size)
4981 {
4982 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
4983 		(void*)address, size);
4984 }
4985 
4986 
4987 area_id
4988 _user_area_for(void* address)
4989 {
4990 	return vm_area_for((addr_t)address, false);
4991 }
4992 
4993 
4994 area_id
4995 _user_find_area(const char* userName)
4996 {
4997 	char name[B_OS_NAME_LENGTH];
4998 
4999 	if (!IS_USER_ADDRESS(userName)
5000 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5001 		return B_BAD_ADDRESS;
5002 
5003 	return find_area(name);
5004 }
5005 
5006 
5007 status_t
5008 _user_get_area_info(area_id area, area_info* userInfo)
5009 {
5010 	if (!IS_USER_ADDRESS(userInfo))
5011 		return B_BAD_ADDRESS;
5012 
5013 	area_info info;
5014 	status_t status = get_area_info(area, &info);
5015 	if (status < B_OK)
5016 		return status;
5017 
5018 	// TODO: do we want to prevent userland from seeing kernel protections?
5019 	//info.protection &= B_USER_PROTECTION;
5020 
5021 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5022 		return B_BAD_ADDRESS;
5023 
5024 	return status;
5025 }
5026 
5027 
5028 status_t
5029 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
5030 {
5031 	int32 cookie;
5032 
5033 	if (!IS_USER_ADDRESS(userCookie)
5034 		|| !IS_USER_ADDRESS(userInfo)
5035 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5036 		return B_BAD_ADDRESS;
5037 
5038 	area_info info;
5039 	status_t status = _get_next_area_info(team, &cookie, &info,
5040 		sizeof(area_info));
5041 	if (status != B_OK)
5042 		return status;
5043 
5044 	//info.protection &= B_USER_PROTECTION;
5045 
5046 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5047 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5048 		return B_BAD_ADDRESS;
5049 
5050 	return status;
5051 }
5052 
5053 
5054 status_t
5055 _user_set_area_protection(area_id area, uint32 newProtection)
5056 {
5057 	if ((newProtection & ~B_USER_PROTECTION) != 0)
5058 		return B_BAD_VALUE;
5059 
5060 	fix_protection(&newProtection);
5061 
5062 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
5063 		newProtection, false);
5064 }
5065 
5066 
5067 status_t
5068 _user_resize_area(area_id area, size_t newSize)
5069 {
5070 	// TODO: Since we restrict deleting of areas to those owned by the team,
5071 	// we should also do that for resizing (check other functions, too).
5072 	return vm_resize_area(area, newSize, false);
5073 }
5074 
5075 
5076 area_id
5077 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
5078 	team_id target)
5079 {
5080 	// filter out some unavailable values (for userland)
5081 	switch (addressSpec) {
5082 		case B_ANY_KERNEL_ADDRESS:
5083 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5084 			return B_BAD_VALUE;
5085 	}
5086 
5087 	void* address;
5088 	if (!IS_USER_ADDRESS(userAddress)
5089 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5090 		return B_BAD_ADDRESS;
5091 
5092 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
5093 	if (newArea < B_OK)
5094 		return newArea;
5095 
5096 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5097 		return B_BAD_ADDRESS;
5098 
5099 	return newArea;
5100 }
5101 
5102 
5103 area_id
5104 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
5105 	uint32 protection, area_id sourceArea)
5106 {
5107 	char name[B_OS_NAME_LENGTH];
5108 	void* address;
5109 
5110 	// filter out some unavailable values (for userland)
5111 	switch (addressSpec) {
5112 		case B_ANY_KERNEL_ADDRESS:
5113 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5114 			return B_BAD_VALUE;
5115 	}
5116 	if ((protection & ~B_USER_PROTECTION) != 0)
5117 		return B_BAD_VALUE;
5118 
5119 	if (!IS_USER_ADDRESS(userName)
5120 		|| !IS_USER_ADDRESS(userAddress)
5121 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5122 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5123 		return B_BAD_ADDRESS;
5124 
5125 	fix_protection(&protection);
5126 
5127 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
5128 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
5129 		false);
5130 	if (clonedArea < B_OK)
5131 		return clonedArea;
5132 
5133 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5134 		delete_area(clonedArea);
5135 		return B_BAD_ADDRESS;
5136 	}
5137 
5138 	return clonedArea;
5139 }
5140 
5141 
5142 area_id
5143 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
5144 	size_t size, uint32 lock, uint32 protection)
5145 {
5146 	char name[B_OS_NAME_LENGTH];
5147 	void* address;
5148 
5149 	// filter out some unavailable values (for userland)
5150 	switch (addressSpec) {
5151 		case B_ANY_KERNEL_ADDRESS:
5152 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5153 			return B_BAD_VALUE;
5154 	}
5155 	if ((protection & ~B_USER_PROTECTION) != 0)
5156 		return B_BAD_VALUE;
5157 
5158 	if (!IS_USER_ADDRESS(userName)
5159 		|| !IS_USER_ADDRESS(userAddress)
5160 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5161 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5162 		return B_BAD_ADDRESS;
5163 
5164 	if (addressSpec == B_EXACT_ADDRESS
5165 		&& IS_KERNEL_ADDRESS(address))
5166 		return B_BAD_VALUE;
5167 
5168 	fix_protection(&protection);
5169 
5170 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(),
5171 		(char*)name, &address, addressSpec, size, lock, protection, 0, 0,
5172 		false);
5173 
5174 	if (area >= B_OK
5175 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5176 		delete_area(area);
5177 		return B_BAD_ADDRESS;
5178 	}
5179 
5180 	return area;
5181 }
5182 
5183 
5184 status_t
5185 _user_delete_area(area_id area)
5186 {
5187 	// Unlike the BeOS implementation, you can now only delete areas
5188 	// that you have created yourself from userland.
5189 	// The documentation to delete_area() explicitly states that this
5190 	// will be restricted in the future, and so it will.
5191 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
5192 }
5193 
5194 
5195 // TODO: create a BeOS style call for this!
5196 
5197 area_id
5198 _user_map_file(const char* userName, void** userAddress, int addressSpec,
5199 	size_t size, int protection, int mapping, bool unmapAddressRange, int fd,
5200 	off_t offset)
5201 {
5202 	char name[B_OS_NAME_LENGTH];
5203 	void* address;
5204 	area_id area;
5205 
5206 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
5207 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
5208 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5209 		return B_BAD_ADDRESS;
5210 
5211 	if (addressSpec == B_EXACT_ADDRESS) {
5212 		if ((addr_t)address + size < (addr_t)address)
5213 			return B_BAD_VALUE;
5214 		if (!IS_USER_ADDRESS(address)
5215 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
5216 			return B_BAD_ADDRESS;
5217 		}
5218 	}
5219 
5220 	// userland created areas can always be accessed by the kernel
5221 	protection |= B_KERNEL_READ_AREA
5222 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
5223 
5224 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
5225 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
5226 		false);
5227 	if (area < B_OK)
5228 		return area;
5229 
5230 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5231 		return B_BAD_ADDRESS;
5232 
5233 	return area;
5234 }
5235 
5236 
5237 status_t
5238 _user_unmap_memory(void* _address, size_t size)
5239 {
5240 	addr_t address = (addr_t)_address;
5241 
5242 	// check params
5243 	if (size == 0 || (addr_t)address + size < (addr_t)address)
5244 		return B_BAD_VALUE;
5245 
5246 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
5247 		return B_BAD_ADDRESS;
5248 
5249 	// write lock the address space
5250 	AddressSpaceWriteLocker locker;
5251 	status_t status = locker.SetTo(team_get_current_team_id());
5252 	if (status != B_OK)
5253 		return status;
5254 
5255 	// unmap
5256 	return unmap_address_range(locker.AddressSpace(), address, size, false);
5257 }
5258 
5259 
5260 status_t
5261 _user_set_memory_protection(void* _address, size_t size, int protection)
5262 {
5263 	// check address range
5264 	addr_t address = (addr_t)_address;
5265 	size = PAGE_ALIGN(size);
5266 
5267 	if ((address % B_PAGE_SIZE) != 0)
5268 		return B_BAD_VALUE;
5269 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
5270 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
5271 		// weird error code required by POSIX
5272 		return ENOMEM;
5273 	}
5274 
5275 	// extend and check protection
5276 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
5277 	uint32 actualProtection = protection | B_KERNEL_READ_AREA
5278 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
5279 
5280 	if (!arch_vm_supports_protection(actualProtection))
5281 		return B_NOT_SUPPORTED;
5282 
5283 	// We need to write lock the address space, since we're going to play with
5284 	// the areas.
5285 	AddressSpaceWriteLocker locker;
5286 	status_t status = locker.SetTo(team_get_current_team_id());
5287 	if (status != B_OK)
5288 		return status;
5289 
5290 	// First round: Check whether the whole range is covered by areas and we are
5291 	// allowed to modify them.
5292 	addr_t currentAddress = address;
5293 	size_t sizeLeft = size;
5294 	while (sizeLeft > 0) {
5295 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
5296 		if (area == NULL)
5297 			return B_NO_MEMORY;
5298 
5299 		if ((area->protection & B_KERNEL_AREA) != 0)
5300 			return B_NOT_ALLOWED;
5301 
5302 		// TODO: For (shared) mapped files we should check whether the new
5303 		// protections are compatible with the file permissions. We don't have
5304 		// a way to do that yet, though.
5305 
5306 		addr_t offset = currentAddress - area->Base();
5307 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
5308 
5309 		currentAddress += rangeSize;
5310 		sizeLeft -= rangeSize;
5311 	}
5312 
5313 	// Second round: If the protections differ from that of the area, create a
5314 	// page protection array and re-map mapped pages.
5315 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
5316 	currentAddress = address;
5317 	sizeLeft = size;
5318 	while (sizeLeft > 0) {
5319 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
5320 		if (area == NULL)
5321 			return B_NO_MEMORY;
5322 
5323 		addr_t offset = currentAddress - area->Base();
5324 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
5325 
5326 		currentAddress += rangeSize;
5327 		sizeLeft -= rangeSize;
5328 
5329 		if (area->page_protections == NULL) {
5330 			if (area->protection == actualProtection)
5331 				continue;
5332 
5333 			// In the page protections we store only the three user protections,
5334 			// so we use 4 bits per page.
5335 			uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
5336 			area->page_protections = (uint8*)malloc(bytes);
5337 			if (area->page_protections == NULL)
5338 				return B_NO_MEMORY;
5339 
5340 			// init the page protections for all pages to that of the area
5341 			uint32 areaProtection = area->protection
5342 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5343 			memset(area->page_protections,
5344 				areaProtection | (areaProtection << 4), bytes);
5345 		}
5346 
5347 		// We need to lock the complete cache chain, since we potentially unmap
5348 		// pages of lower caches.
5349 		VMCache* topCache = vm_area_get_locked_cache(area);
5350 		VMCacheChainLocker cacheChainLocker(topCache);
5351 		cacheChainLocker.LockAllSourceCaches();
5352 
5353 		for (addr_t pageAddress = area->Base() + offset;
5354 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
5355 			map->Lock();
5356 
5357 			set_area_page_protection(area, pageAddress, protection);
5358 
5359 			addr_t physicalAddress;
5360 			uint32 flags;
5361 
5362 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
5363 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
5364 				map->Unlock();
5365 				continue;
5366 			}
5367 
5368 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5369 			if (page == NULL) {
5370 				panic("area %p looking up page failed for pa 0x%lx\n", area,
5371 					physicalAddress);
5372 				map->Unlock();
5373 				return B_ERROR;
5374 			}
5375 
5376 			// If the page is not in the topmost cache and write access is
5377 			// requested, we have to unmap it. Otherwise we can re-map it with
5378 			// the new protection.
5379 			bool unmapPage = page->Cache() != topCache
5380 				&& (protection & B_WRITE_AREA) != 0;
5381 
5382 			if (!unmapPage)
5383 				map->ProtectPage(area, pageAddress, actualProtection);
5384 
5385 			map->Unlock();
5386 
5387 			if (unmapPage) {
5388 				DEBUG_PAGE_ACCESS_START(page);
5389 				unmap_page(area, pageAddress);
5390 				DEBUG_PAGE_ACCESS_END(page);
5391 			}
5392 		}
5393 	}
5394 
5395 	return B_OK;
5396 }
5397 
5398 
5399 status_t
5400 _user_sync_memory(void* _address, size_t size, int flags)
5401 {
5402 	addr_t address = (addr_t)_address;
5403 	size = PAGE_ALIGN(size);
5404 
5405 	// check params
5406 	if ((address % B_PAGE_SIZE) != 0)
5407 		return B_BAD_VALUE;
5408 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
5409 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
5410 		// weird error code required by POSIX
5411 		return ENOMEM;
5412 	}
5413 
5414 	bool writeSync = (flags & MS_SYNC) != 0;
5415 	bool writeAsync = (flags & MS_ASYNC) != 0;
5416 	if (writeSync && writeAsync)
5417 		return B_BAD_VALUE;
5418 
5419 	if (size == 0 || (!writeSync && !writeAsync))
5420 		return B_OK;
5421 
5422 	// iterate through the range and sync all concerned areas
5423 	while (size > 0) {
5424 		// read lock the address space
5425 		AddressSpaceReadLocker locker;
5426 		status_t error = locker.SetTo(team_get_current_team_id());
5427 		if (error != B_OK)
5428 			return error;
5429 
5430 		// get the first area
5431 		VMArea* area = locker.AddressSpace()->LookupArea(address);
5432 		if (area == NULL)
5433 			return B_NO_MEMORY;
5434 
5435 		uint32 offset = address - area->Base();
5436 		size_t rangeSize = min_c(area->Size() - offset, size);
5437 		offset += area->cache_offset;
5438 
5439 		// lock the cache
5440 		AreaCacheLocker cacheLocker(area);
5441 		if (!cacheLocker)
5442 			return B_BAD_VALUE;
5443 		VMCache* cache = area->cache;
5444 
5445 		locker.Unlock();
5446 
5447 		uint32 firstPage = offset >> PAGE_SHIFT;
5448 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
5449 
5450 		// write the pages
5451 		if (cache->type == CACHE_TYPE_VNODE) {
5452 			if (writeSync) {
5453 				// synchronous
5454 				error = vm_page_write_modified_page_range(cache, firstPage,
5455 					endPage);
5456 				if (error != B_OK)
5457 					return error;
5458 			} else {
5459 				// asynchronous
5460 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
5461 				// TODO: This is probably not quite what is supposed to happen.
5462 				// Especially when a lot has to be written, it might take ages
5463 				// until it really hits the disk.
5464 			}
5465 		}
5466 
5467 		address += rangeSize;
5468 		size -= rangeSize;
5469 	}
5470 
5471 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
5472 	// synchronize multiple mappings of the same file. In our VM they never get
5473 	// out of sync, though, so we don't have to do anything.
5474 
5475 	return B_OK;
5476 }
5477 
5478 
5479 status_t
5480 _user_memory_advice(void* address, size_t size, int advice)
5481 {
5482 	// TODO: Implement!
5483 	return B_OK;
5484 }
5485