xref: /haiku/src/system/kernel/vm/vm.cpp (revision 61ce1824dfdea15a581a1e54b92dc06aadf418c5)
1 /*
2  * Copyright 2009-2010, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleter.h>
25 
26 #include <arch/cpu.h>
27 #include <arch/vm.h>
28 #include <boot/elf.h>
29 #include <boot/stage2.h>
30 #include <condition_variable.h>
31 #include <console.h>
32 #include <debug.h>
33 #include <file_cache.h>
34 #include <fs/fd.h>
35 #include <heap.h>
36 #include <kernel.h>
37 #include <int.h>
38 #include <lock.h>
39 #include <low_resource_manager.h>
40 #include <slab/Slab.h>
41 #include <smp.h>
42 #include <system_info.h>
43 #include <thread.h>
44 #include <team.h>
45 #include <tracing.h>
46 #include <util/AutoLock.h>
47 #include <util/khash.h>
48 #include <vm/vm_page.h>
49 #include <vm/vm_priv.h>
50 #include <vm/VMAddressSpace.h>
51 #include <vm/VMArea.h>
52 #include <vm/VMCache.h>
53 
54 #include "VMAddressSpaceLocking.h"
55 #include "VMAnonymousCache.h"
56 #include "IORequest.h"
57 
58 
59 //#define TRACE_VM
60 //#define TRACE_FAULTS
61 #ifdef TRACE_VM
62 #	define TRACE(x) dprintf x
63 #else
64 #	define TRACE(x) ;
65 #endif
66 #ifdef TRACE_FAULTS
67 #	define FTRACE(x) dprintf x
68 #else
69 #	define FTRACE(x) ;
70 #endif
71 
72 
73 class AreaCacheLocking {
74 public:
75 	inline bool Lock(VMCache* lockable)
76 	{
77 		return false;
78 	}
79 
80 	inline void Unlock(VMCache* lockable)
81 	{
82 		vm_area_put_locked_cache(lockable);
83 	}
84 };
85 
86 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
87 public:
88 	inline AreaCacheLocker(VMCache* cache = NULL)
89 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
90 	{
91 	}
92 
93 	inline AreaCacheLocker(VMArea* area)
94 		: AutoLocker<VMCache, AreaCacheLocking>()
95 	{
96 		SetTo(area);
97 	}
98 
99 	inline void SetTo(VMCache* cache, bool alreadyLocked)
100 	{
101 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
102 	}
103 
104 	inline void SetTo(VMArea* area)
105 	{
106 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
107 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
108 	}
109 };
110 
111 
112 class VMCacheChainLocker {
113 public:
114 	VMCacheChainLocker()
115 		:
116 		fTopCache(NULL),
117 		fBottomCache(NULL)
118 	{
119 	}
120 
121 	VMCacheChainLocker(VMCache* topCache)
122 		:
123 		fTopCache(topCache),
124 		fBottomCache(topCache)
125 	{
126 	}
127 
128 	~VMCacheChainLocker()
129 	{
130 		Unlock();
131 	}
132 
133 	void SetTo(VMCache* topCache)
134 	{
135 		fTopCache = topCache;
136 		fBottomCache = topCache;
137 
138 		if (topCache != NULL)
139 			topCache->SetUserData(NULL);
140 	}
141 
142 	VMCache* LockSourceCache()
143 	{
144 		if (fBottomCache == NULL || fBottomCache->source == NULL)
145 			return NULL;
146 
147 		VMCache* previousCache = fBottomCache;
148 
149 		fBottomCache = fBottomCache->source;
150 		fBottomCache->Lock();
151 		fBottomCache->AcquireRefLocked();
152 		fBottomCache->SetUserData(previousCache);
153 
154 		return fBottomCache;
155 	}
156 
157 	void LockAllSourceCaches()
158 	{
159 		while (LockSourceCache() != NULL) {
160 		}
161 	}
162 
163 	void Unlock(VMCache* exceptCache = NULL)
164 	{
165 		if (fTopCache == NULL)
166 			return;
167 
168 		// Unlock caches in source -> consumer direction. This is important to
169 		// avoid double-locking and a reversal of locking order in case a cache
170 		// is eligable for merging.
171 		VMCache* cache = fBottomCache;
172 		while (cache != NULL) {
173 			VMCache* nextCache = (VMCache*)cache->UserData();
174 			if (cache != exceptCache)
175 				cache->ReleaseRefAndUnlock(cache != fTopCache);
176 
177 			if (cache == fTopCache)
178 				break;
179 
180 			cache = nextCache;
181 		}
182 
183 		fTopCache = NULL;
184 		fBottomCache = NULL;
185 	}
186 
187 	void UnlockKeepRefs(bool keepTopCacheLocked)
188 	{
189 		if (fTopCache == NULL)
190 			return;
191 
192 		VMCache* nextCache = fBottomCache;
193 		VMCache* cache = NULL;
194 
195 		while (keepTopCacheLocked
196 				? nextCache != fTopCache : cache != fTopCache) {
197 			cache = nextCache;
198 			nextCache = (VMCache*)cache->UserData();
199 			cache->Unlock(cache != fTopCache);
200 		}
201 	}
202 
203 	void RelockCaches(bool topCacheLocked)
204 	{
205 		if (fTopCache == NULL)
206 			return;
207 
208 		VMCache* nextCache = fTopCache;
209 		VMCache* cache = NULL;
210 		if (topCacheLocked) {
211 			cache = nextCache;
212 			nextCache = cache->source;
213 		}
214 
215 		while (cache != fBottomCache && nextCache != NULL) {
216 			VMCache* consumer = cache;
217 			cache = nextCache;
218 			nextCache = cache->source;
219 			cache->Lock();
220 			cache->SetUserData(consumer);
221 		}
222 	}
223 
224 private:
225 	VMCache*	fTopCache;
226 	VMCache*	fBottomCache;
227 };
228 
229 
230 // The memory reserve an allocation of the certain priority must not touch.
231 static const size_t kMemoryReserveForPriority[] = {
232 	VM_MEMORY_RESERVE_USER,		// user
233 	VM_MEMORY_RESERVE_SYSTEM,	// system
234 	0							// VIP
235 };
236 
237 
238 ObjectCache* gPageMappingsObjectCache;
239 
240 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
241 
242 static off_t sAvailableMemory;
243 static off_t sNeededMemory;
244 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
245 static uint32 sPageFaults;
246 
247 static VMPhysicalPageMapper* sPhysicalPageMapper;
248 
249 #if DEBUG_CACHE_LIST
250 
251 struct cache_info {
252 	VMCache*	cache;
253 	addr_t		page_count;
254 	addr_t		committed;
255 };
256 
257 static const int kCacheInfoTableCount = 100 * 1024;
258 static cache_info* sCacheInfoTable;
259 
260 #endif	// DEBUG_CACHE_LIST
261 
262 
263 // function declarations
264 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
265 	bool addressSpaceCleanup);
266 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
267 	bool isWrite, bool isUser, vm_page** wirePage,
268 	VMAreaWiredRange* wiredRange = NULL);
269 static status_t map_backing_store(VMAddressSpace* addressSpace,
270 	VMCache* cache, void** _virtualAddress, off_t offset, addr_t size,
271 	uint32 addressSpec, int wiring, int protection, int mapping,
272 	VMArea** _area, const char* areaName, uint32 flags, bool kernel);
273 
274 
275 //	#pragma mark -
276 
277 
278 #if VM_PAGE_FAULT_TRACING
279 
280 namespace VMPageFaultTracing {
281 
282 class PageFaultStart : public AbstractTraceEntry {
283 public:
284 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
285 		:
286 		fAddress(address),
287 		fPC(pc),
288 		fWrite(write),
289 		fUser(user)
290 	{
291 		Initialized();
292 	}
293 
294 	virtual void AddDump(TraceOutput& out)
295 	{
296 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
297 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
298 	}
299 
300 private:
301 	addr_t	fAddress;
302 	addr_t	fPC;
303 	bool	fWrite;
304 	bool	fUser;
305 };
306 
307 
308 // page fault errors
309 enum {
310 	PAGE_FAULT_ERROR_NO_AREA		= 0,
311 	PAGE_FAULT_ERROR_KERNEL_ONLY,
312 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
313 	PAGE_FAULT_ERROR_READ_PROTECTED,
314 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
315 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
316 };
317 
318 
319 class PageFaultError : public AbstractTraceEntry {
320 public:
321 	PageFaultError(area_id area, status_t error)
322 		:
323 		fArea(area),
324 		fError(error)
325 	{
326 		Initialized();
327 	}
328 
329 	virtual void AddDump(TraceOutput& out)
330 	{
331 		switch (fError) {
332 			case PAGE_FAULT_ERROR_NO_AREA:
333 				out.Print("page fault error: no area");
334 				break;
335 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
336 				out.Print("page fault error: area: %ld, kernel only", fArea);
337 				break;
338 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
339 				out.Print("page fault error: area: %ld, write protected",
340 					fArea);
341 				break;
342 			case PAGE_FAULT_ERROR_READ_PROTECTED:
343 				out.Print("page fault error: area: %ld, read protected", fArea);
344 				break;
345 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
346 				out.Print("page fault error: kernel touching bad user memory");
347 				break;
348 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
349 				out.Print("page fault error: no address space");
350 				break;
351 			default:
352 				out.Print("page fault error: area: %ld, error: %s", fArea,
353 					strerror(fError));
354 				break;
355 		}
356 	}
357 
358 private:
359 	area_id		fArea;
360 	status_t	fError;
361 };
362 
363 
364 class PageFaultDone : public AbstractTraceEntry {
365 public:
366 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
367 			vm_page* page)
368 		:
369 		fArea(area),
370 		fTopCache(topCache),
371 		fCache(cache),
372 		fPage(page)
373 	{
374 		Initialized();
375 	}
376 
377 	virtual void AddDump(TraceOutput& out)
378 	{
379 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
380 			"page: %p", fArea, fTopCache, fCache, fPage);
381 	}
382 
383 private:
384 	area_id		fArea;
385 	VMCache*	fTopCache;
386 	VMCache*	fCache;
387 	vm_page*	fPage;
388 };
389 
390 }	// namespace VMPageFaultTracing
391 
392 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
393 #else
394 #	define TPF(x) ;
395 #endif	// VM_PAGE_FAULT_TRACING
396 
397 
398 //	#pragma mark -
399 
400 
401 /*!	The page's cache must be locked.
402 */
403 static inline void
404 increment_page_wired_count(vm_page* page)
405 {
406 	if (page->wired_count++ == 0 && page->mappings.IsEmpty())
407 		atomic_add(&gMappedPagesCount, 1);
408 }
409 
410 
411 /*!	The page's cache must be locked.
412 */
413 static inline void
414 decrement_page_wired_count(vm_page* page)
415 {
416 	if (--page->wired_count == 0 && page->mappings.IsEmpty())
417 		atomic_add(&gMappedPagesCount, -1);
418 }
419 
420 
421 static inline addr_t
422 virtual_page_address(VMArea* area, vm_page* page)
423 {
424 	return area->Base()
425 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
426 }
427 
428 
429 //! You need to have the address space locked when calling this function
430 static VMArea*
431 lookup_area(VMAddressSpace* addressSpace, area_id id)
432 {
433 	VMAreaHash::ReadLock();
434 
435 	VMArea* area = VMAreaHash::LookupLocked(id);
436 	if (area != NULL && area->address_space != addressSpace)
437 		area = NULL;
438 
439 	VMAreaHash::ReadUnlock();
440 
441 	return area;
442 }
443 
444 
445 static inline void
446 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
447 {
448 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
449 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
450 	uint8& entry = area->page_protections[pageIndex / 2];
451 	if (pageIndex % 2 == 0)
452 		entry = (entry & 0xf0) | protection;
453 	else
454 		entry = (entry & 0x0f) | (protection << 4);
455 }
456 
457 
458 static inline uint32
459 get_area_page_protection(VMArea* area, addr_t pageAddress)
460 {
461 	if (area->page_protections == NULL)
462 		return area->protection;
463 
464 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
465 	uint32 protection = area->page_protections[pageIndex / 2];
466 	if (pageIndex % 2 == 0)
467 		protection &= 0x0f;
468 	else
469 		protection >>= 4;
470 
471 	return protection | B_KERNEL_READ_AREA
472 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
473 }
474 
475 
476 /*!	The caller must have reserved enough pages the translation map
477 	implementation might need to map this page.
478 	The page's cache must be locked.
479 */
480 static status_t
481 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
482 	vm_page_reservation* reservation)
483 {
484 	VMTranslationMap* map = area->address_space->TranslationMap();
485 
486 	bool wasMapped = page->wired_count > 0 || !page->mappings.IsEmpty();
487 
488 	if (area->wiring == B_NO_LOCK) {
489 		DEBUG_PAGE_ACCESS_CHECK(page);
490 
491 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
492 		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
493 			gPageMappingsObjectCache,
494 			CACHE_DONT_WAIT_FOR_MEMORY
495 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
496 		if (mapping == NULL)
497 			return B_NO_MEMORY;
498 
499 		mapping->page = page;
500 		mapping->area = area;
501 
502 		map->Lock();
503 
504 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
505 			reservation);
506 
507 		// insert mapping into lists
508 		if (page->mappings.IsEmpty() && page->wired_count == 0)
509 			atomic_add(&gMappedPagesCount, 1);
510 
511 		page->mappings.Add(mapping);
512 		area->mappings.Add(mapping);
513 
514 		map->Unlock();
515 	} else {
516 		DEBUG_PAGE_ACCESS_CHECK(page);
517 
518 		map->Lock();
519 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
520 			reservation);
521 		map->Unlock();
522 
523 		increment_page_wired_count(page);
524 	}
525 
526 	if (!wasMapped) {
527 		// The page is mapped now, so we must not remain in the cached queue.
528 		// It also makes sense to move it from the inactive to the active, since
529 		// otherwise the page daemon wouldn't come to keep track of it (in idle
530 		// mode) -- if the page isn't touched, it will be deactivated after a
531 		// full iteration through the queue at the latest.
532 		if (page->State() == PAGE_STATE_CACHED
533 				|| page->State() == PAGE_STATE_INACTIVE) {
534 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
535 		}
536 	}
537 
538 	return B_OK;
539 }
540 
541 
542 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
543 	page's cache.
544 */
545 static inline bool
546 unmap_page(VMArea* area, addr_t virtualAddress)
547 {
548 	return area->address_space->TranslationMap()->UnmapPage(area,
549 		virtualAddress, true);
550 }
551 
552 
553 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
554 	mapped pages' caches.
555 */
556 static inline void
557 unmap_pages(VMArea* area, addr_t base, size_t size)
558 {
559 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
560 }
561 
562 
563 /*!	Cuts a piece out of an area. If the given cut range covers the complete
564 	area, it is deleted. If it covers the beginning or the end, the area is
565 	resized accordingly. If the range covers some part in the middle of the
566 	area, it is split in two; in this case the second area is returned via
567 	\a _secondArea (the variable is left untouched in the other cases).
568 	The address space must be write locked.
569 	The caller must ensure that no part of the given range is wired.
570 */
571 static status_t
572 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
573 	addr_t lastAddress, VMArea** _secondArea, bool kernel)
574 {
575 	// Does the cut range intersect with the area at all?
576 	addr_t areaLast = area->Base() + (area->Size() - 1);
577 	if (area->Base() > lastAddress || areaLast < address)
578 		return B_OK;
579 
580 	// Is the area fully covered?
581 	if (area->Base() >= address && areaLast <= lastAddress) {
582 		delete_area(addressSpace, area, false);
583 		return B_OK;
584 	}
585 
586 	int priority;
587 	uint32 allocationFlags;
588 	if (addressSpace == VMAddressSpace::Kernel()) {
589 		priority = VM_PRIORITY_SYSTEM;
590 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
591 			| HEAP_DONT_LOCK_KERNEL_SPACE;
592 	} else {
593 		priority = VM_PRIORITY_USER;
594 		allocationFlags = 0;
595 	}
596 
597 	VMCache* cache = vm_area_get_locked_cache(area);
598 	VMCacheChainLocker cacheChainLocker(cache);
599 	cacheChainLocker.LockAllSourceCaches();
600 
601 	// Cut the end only?
602 	if (areaLast <= lastAddress) {
603 		size_t oldSize = area->Size();
604 		size_t newSize = address - area->Base();
605 
606 		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
607 			allocationFlags);
608 		if (error != B_OK)
609 			return error;
610 
611 		// unmap pages
612 		unmap_pages(area, address, oldSize - newSize);
613 
614 		// If no one else uses the area's cache, we can resize it, too.
615 		if (cache->areas == area && area->cache_next == NULL
616 			&& list_is_empty(&cache->consumers)) {
617 			// Since VMCache::Resize() can temporarily drop the lock, we must
618 			// unlock all lower caches to prevent locking order inversion.
619 			cacheChainLocker.Unlock(cache);
620 			cache->Resize(cache->virtual_base + newSize, priority);
621 			cache->ReleaseRefAndUnlock();
622 		}
623 
624 		return B_OK;
625 	}
626 
627 	// Cut the beginning only?
628 	if (area->Base() >= address) {
629 		addr_t oldBase = area->Base();
630 		addr_t newBase = lastAddress + 1;
631 		size_t newSize = areaLast - lastAddress;
632 
633 		// unmap pages
634 		unmap_pages(area, oldBase, newBase - oldBase);
635 
636 		// resize the area
637 		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
638 			allocationFlags);
639 		if (error != B_OK)
640 			return error;
641 
642 		// TODO: If no one else uses the area's cache, we should resize it, too!
643 
644 		area->cache_offset += newBase - oldBase;
645 
646 		return B_OK;
647 	}
648 
649 	// The tough part -- cut a piece out of the middle of the area.
650 	// We do that by shrinking the area to the begin section and creating a
651 	// new area for the end section.
652 
653 	addr_t firstNewSize = address - area->Base();
654 	addr_t secondBase = lastAddress + 1;
655 	addr_t secondSize = areaLast - lastAddress;
656 
657 	// unmap pages
658 	unmap_pages(area, address, area->Size() - firstNewSize);
659 
660 	// resize the area
661 	addr_t oldSize = area->Size();
662 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
663 		allocationFlags);
664 	if (error != B_OK)
665 		return error;
666 
667 	// TODO: If no one else uses the area's cache, we might want to create a
668 	// new cache for the second area, transfer the concerned pages from the
669 	// first cache to it and resize the first cache.
670 
671 	// map the second area
672 	VMArea* secondArea;
673 	void* secondBaseAddress = (void*)secondBase;
674 	error = map_backing_store(addressSpace, cache, &secondBaseAddress,
675 		area->cache_offset + (secondBase - area->Base()), secondSize,
676 		B_EXACT_ADDRESS, area->wiring, area->protection, REGION_NO_PRIVATE_MAP,
677 		&secondArea, area->name, 0, kernel);
678 	if (error != B_OK) {
679 		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
680 		return error;
681 	}
682 
683 	// We need a cache reference for the new area.
684 	cache->AcquireRefLocked();
685 
686 	if (_secondArea != NULL)
687 		*_secondArea = secondArea;
688 
689 	return B_OK;
690 }
691 
692 
693 /*!	Deletes all areas in the given address range.
694 	The address space must be write-locked.
695 	The caller must ensure that no part of the given range is wired.
696 */
697 static status_t
698 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
699 	bool kernel)
700 {
701 	size = PAGE_ALIGN(size);
702 	addr_t lastAddress = address + (size - 1);
703 
704 	// Check, whether the caller is allowed to modify the concerned areas.
705 	if (!kernel) {
706 		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
707 				VMArea* area = it.Next();) {
708 			addr_t areaLast = area->Base() + (area->Size() - 1);
709 			if (area->Base() < lastAddress && address < areaLast) {
710 				if ((area->protection & B_KERNEL_AREA) != 0)
711 					return B_NOT_ALLOWED;
712 			}
713 		}
714 	}
715 
716 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
717 			VMArea* area = it.Next();) {
718 		addr_t areaLast = area->Base() + (area->Size() - 1);
719 		if (area->Base() < lastAddress && address < areaLast) {
720 			status_t error = cut_area(addressSpace, area, address,
721 				lastAddress, NULL, kernel);
722 			if (error != B_OK)
723 				return error;
724 				// Failing after already messing with areas is ugly, but we
725 				// can't do anything about it.
726 		}
727 	}
728 
729 	return B_OK;
730 }
731 
732 
733 /*! You need to hold the lock of the cache and the write lock of the address
734 	space when calling this function.
735 	Note, that in case of error your cache will be temporarily unlocked.
736 	If \a addressSpec is \c B_EXACT_ADDRESS and the
737 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
738 	that no part of the specified address range (base \c *_virtualAddress, size
739 	\a size) is wired.
740 */
741 static status_t
742 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache,
743 	void** _virtualAddress, off_t offset, addr_t size, uint32 addressSpec,
744 	int wiring, int protection, int mapping, VMArea** _area,
745 	const char* areaName, uint32 flags, bool kernel)
746 {
747 	TRACE(("map_backing_store: aspace %p, cache %p, *vaddr %p, offset 0x%Lx, "
748 		"size %lu, addressSpec %ld, wiring %d, protection %d, area %p, areaName "
749 		"'%s'\n", addressSpace, cache, *_virtualAddress, offset, size,
750 		addressSpec, wiring, protection, _area, areaName));
751 	cache->AssertLocked();
752 
753 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
754 		| HEAP_DONT_LOCK_KERNEL_SPACE;
755 	int priority;
756 	if (addressSpace != VMAddressSpace::Kernel()) {
757 		priority = VM_PRIORITY_USER;
758 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
759 		priority = VM_PRIORITY_VIP;
760 		allocationFlags |= HEAP_PRIORITY_VIP;
761 	} else
762 		priority = VM_PRIORITY_SYSTEM;
763 
764 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
765 		allocationFlags);
766 	if (area == NULL)
767 		return B_NO_MEMORY;
768 
769 	status_t status;
770 
771 	// if this is a private map, we need to create a new cache
772 	// to handle the private copies of pages as they are written to
773 	VMCache* sourceCache = cache;
774 	if (mapping == REGION_PRIVATE_MAP) {
775 		VMCache* newCache;
776 
777 		// create an anonymous cache
778 		status = VMCacheFactory::CreateAnonymousCache(newCache,
779 			(protection & B_STACK_AREA) != 0, 0, USER_STACK_GUARD_PAGES, true,
780 			VM_PRIORITY_USER);
781 		if (status != B_OK)
782 			goto err1;
783 
784 		newCache->Lock();
785 		newCache->temporary = 1;
786 		newCache->scan_skip = cache->scan_skip;
787 		newCache->virtual_base = offset;
788 		newCache->virtual_end = offset + size;
789 
790 		cache->AddConsumer(newCache);
791 
792 		cache = newCache;
793 	}
794 
795 	status = cache->SetMinimalCommitment(size, priority);
796 	if (status != B_OK)
797 		goto err2;
798 
799 	// check to see if this address space has entered DELETE state
800 	if (addressSpace->IsBeingDeleted()) {
801 		// okay, someone is trying to delete this address space now, so we can't
802 		// insert the area, so back out
803 		status = B_BAD_TEAM_ID;
804 		goto err2;
805 	}
806 
807 	if (addressSpec == B_EXACT_ADDRESS
808 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
809 		status = unmap_address_range(addressSpace, (addr_t)*_virtualAddress,
810 			size, kernel);
811 		if (status != B_OK)
812 			goto err2;
813 	}
814 
815 	status = addressSpace->InsertArea(_virtualAddress, addressSpec, size, area,
816 		allocationFlags);
817 	if (status != B_OK) {
818 		// TODO: wait and try again once this is working in the backend
819 #if 0
820 		if (status == B_NO_MEMORY && addressSpec == B_ANY_KERNEL_ADDRESS) {
821 			low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size,
822 				0, 0);
823 		}
824 #endif
825 		goto err2;
826 	}
827 
828 	// attach the cache to the area
829 	area->cache = cache;
830 	area->cache_offset = offset;
831 
832 	// point the cache back to the area
833 	cache->InsertAreaLocked(area);
834 	if (mapping == REGION_PRIVATE_MAP)
835 		cache->Unlock();
836 
837 	// insert the area in the global area hash table
838 	VMAreaHash::Insert(area);
839 
840 	// grab a ref to the address space (the area holds this)
841 	addressSpace->Get();
842 
843 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
844 //		cache, sourceCache, areaName, area);
845 
846 	*_area = area;
847 	return B_OK;
848 
849 err2:
850 	if (mapping == REGION_PRIVATE_MAP) {
851 		// We created this cache, so we must delete it again. Note, that we
852 		// need to temporarily unlock the source cache or we'll otherwise
853 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
854 		sourceCache->Unlock();
855 		cache->ReleaseRefAndUnlock();
856 		sourceCache->Lock();
857 	}
858 err1:
859 	addressSpace->DeleteArea(area, allocationFlags);
860 	return status;
861 }
862 
863 
864 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
865 	  locker1, locker2).
866 */
867 template<typename LockerType1, typename LockerType2>
868 static inline bool
869 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
870 {
871 	area->cache->AssertLocked();
872 
873 	VMAreaUnwiredWaiter waiter;
874 	if (!area->AddWaiterIfWired(&waiter))
875 		return false;
876 
877 	// unlock everything and wait
878 	if (locker1 != NULL)
879 		locker1->Unlock();
880 	if (locker2 != NULL)
881 		locker2->Unlock();
882 
883 	waiter.waitEntry.Wait();
884 
885 	return true;
886 }
887 
888 
889 /*!	Checks whether the given area has any wired ranges intersecting with the
890 	specified range and waits, if so.
891 
892 	When it has to wait, the function calls \c Unlock() on both \a locker1
893 	and \a locker2, if given.
894 	The area's top cache must be locked and must be unlocked as a side effect
895 	of calling \c Unlock() on either \a locker1 or \a locker2.
896 
897 	If the function does not have to wait it does not modify or unlock any
898 	object.
899 
900 	\param area The area to be checked.
901 	\param base The base address of the range to check.
902 	\param size The size of the address range to check.
903 	\param locker1 An object to be unlocked when before starting to wait (may
904 		be \c NULL).
905 	\param locker2 An object to be unlocked when before starting to wait (may
906 		be \c NULL).
907 	\return \c true, if the function had to wait, \c false otherwise.
908 */
909 template<typename LockerType1, typename LockerType2>
910 static inline bool
911 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
912 	LockerType1* locker1, LockerType2* locker2)
913 {
914 	area->cache->AssertLocked();
915 
916 	VMAreaUnwiredWaiter waiter;
917 	if (!area->AddWaiterIfWired(&waiter, base, size))
918 		return false;
919 
920 	// unlock everything and wait
921 	if (locker1 != NULL)
922 		locker1->Unlock();
923 	if (locker2 != NULL)
924 		locker2->Unlock();
925 
926 	waiter.waitEntry.Wait();
927 
928 	return true;
929 }
930 
931 
932 /*!	Checks whether the given address space has any wired ranges intersecting
933 	with the specified range and waits, if so.
934 
935 	Similar to wait_if_area_range_is_wired(), with the following differences:
936 	- All areas intersecting with the range are checked (respectively all until
937 	  one is found that contains a wired range intersecting with the given
938 	  range).
939 	- The given address space must at least be read-locked and must be unlocked
940 	  when \c Unlock() is called on \a locker.
941 	- None of the areas' caches are allowed to be locked.
942 */
943 template<typename LockerType>
944 static inline bool
945 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
946 	size_t size, LockerType* locker)
947 {
948 	addr_t end = base + size - 1;
949 	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
950 			VMArea* area = it.Next();) {
951 		// TODO: Introduce a VMAddressSpace method to get a close iterator!
952 		if (area->Base() > end)
953 			return false;
954 
955 		if (base >= area->Base() + area->Size() - 1)
956 			continue;
957 
958 		VMCache* cache = vm_area_get_locked_cache(area);
959 
960 		if (wait_if_area_range_is_wired(area, base, size, locker, cache))
961 			return true;
962 
963 		cache->Unlock();
964 	}
965 
966 	return false;
967 }
968 
969 
970 status_t
971 vm_block_address_range(const char* name, void* address, addr_t size)
972 {
973 	if (!arch_vm_supports_protection(0))
974 		return B_NOT_SUPPORTED;
975 
976 	AddressSpaceWriteLocker locker;
977 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
978 	if (status != B_OK)
979 		return status;
980 
981 	VMAddressSpace* addressSpace = locker.AddressSpace();
982 
983 	// create an anonymous cache
984 	VMCache* cache;
985 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
986 		VM_PRIORITY_SYSTEM);
987 	if (status != B_OK)
988 		return status;
989 
990 	cache->temporary = 1;
991 	cache->virtual_end = size;
992 	cache->scan_skip = 1;
993 	cache->Lock();
994 
995 	VMArea* area;
996 	void* areaAddress = address;
997 	status = map_backing_store(addressSpace, cache, &areaAddress, 0, size,
998 		B_EXACT_ADDRESS, B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, &area, name,
999 		0, true);
1000 	if (status != B_OK) {
1001 		cache->ReleaseRefAndUnlock();
1002 		return status;
1003 	}
1004 
1005 	cache->Unlock();
1006 	area->cache_type = CACHE_TYPE_RAM;
1007 	return area->id;
1008 }
1009 
1010 
1011 status_t
1012 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1013 {
1014 	AddressSpaceWriteLocker locker(team);
1015 	if (!locker.IsLocked())
1016 		return B_BAD_TEAM_ID;
1017 
1018 	VMAddressSpace* addressSpace = locker.AddressSpace();
1019 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1020 		addressSpace == VMAddressSpace::Kernel()
1021 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1022 }
1023 
1024 
1025 status_t
1026 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1027 	addr_t size, uint32 flags)
1028 {
1029 	if (size == 0)
1030 		return B_BAD_VALUE;
1031 
1032 	AddressSpaceWriteLocker locker(team);
1033 	if (!locker.IsLocked())
1034 		return B_BAD_TEAM_ID;
1035 
1036 	VMAddressSpace* addressSpace = locker.AddressSpace();
1037 	return addressSpace->ReserveAddressRange(_address, addressSpec,
1038 		size, flags,
1039 		addressSpace == VMAddressSpace::Kernel()
1040 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1041 }
1042 
1043 
1044 area_id
1045 vm_create_anonymous_area(team_id team, const char* name, void** address,
1046 	uint32 addressSpec, addr_t size, uint32 wiring, uint32 protection,
1047 	addr_t physicalAddress, uint32 flags, bool kernel)
1048 {
1049 	VMArea* area;
1050 	VMCache* cache;
1051 	vm_page* page = NULL;
1052 	bool isStack = (protection & B_STACK_AREA) != 0;
1053 	page_num_t guardPages;
1054 	bool canOvercommit = false;
1055 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1056 		? VM_PAGE_ALLOC_CLEAR : 0;
1057 
1058 	TRACE(("create_anonymous_area [%ld] %s: size 0x%lx\n", team, name, size));
1059 
1060 	size = PAGE_ALIGN(size);
1061 
1062 	if (size == 0)
1063 		return B_BAD_VALUE;
1064 	if (!arch_vm_supports_protection(protection))
1065 		return B_NOT_SUPPORTED;
1066 
1067 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1068 		canOvercommit = true;
1069 
1070 #ifdef DEBUG_KERNEL_STACKS
1071 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1072 		isStack = true;
1073 #endif
1074 
1075 	// check parameters
1076 	switch (addressSpec) {
1077 		case B_ANY_ADDRESS:
1078 		case B_EXACT_ADDRESS:
1079 		case B_BASE_ADDRESS:
1080 		case B_ANY_KERNEL_ADDRESS:
1081 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1082 			break;
1083 		case B_PHYSICAL_BASE_ADDRESS:
1084 			physicalAddress = (addr_t)*address;
1085 			addressSpec = B_ANY_KERNEL_ADDRESS;
1086 			break;
1087 
1088 		default:
1089 			return B_BAD_VALUE;
1090 	}
1091 
1092 	if (physicalAddress != 0)
1093 		wiring = B_CONTIGUOUS;
1094 
1095 	bool doReserveMemory = false;
1096 	switch (wiring) {
1097 		case B_NO_LOCK:
1098 			break;
1099 		case B_FULL_LOCK:
1100 		case B_LAZY_LOCK:
1101 		case B_CONTIGUOUS:
1102 			doReserveMemory = true;
1103 			break;
1104 		case B_ALREADY_WIRED:
1105 			break;
1106 		case B_LOMEM:
1107 		//case B_SLOWMEM:
1108 			dprintf("B_LOMEM/SLOWMEM is not yet supported!\n");
1109 			wiring = B_FULL_LOCK;
1110 			doReserveMemory = true;
1111 			break;
1112 		default:
1113 			return B_BAD_VALUE;
1114 	}
1115 
1116 	// For full lock or contiguous areas we're also going to map the pages and
1117 	// thus need to reserve pages for the mapping backend upfront.
1118 	addr_t reservedMapPages = 0;
1119 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1120 		AddressSpaceWriteLocker locker;
1121 		status_t status = locker.SetTo(team);
1122 		if (status != B_OK)
1123 			return status;
1124 
1125 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1126 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1127 	}
1128 
1129 	int priority;
1130 	if (team != VMAddressSpace::KernelID())
1131 		priority = VM_PRIORITY_USER;
1132 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1133 		priority = VM_PRIORITY_VIP;
1134 	else
1135 		priority = VM_PRIORITY_SYSTEM;
1136 
1137 	// Reserve memory before acquiring the address space lock. This reduces the
1138 	// chances of failure, since while holding the write lock to the address
1139 	// space (if it is the kernel address space that is), the low memory handler
1140 	// won't be able to free anything for us.
1141 	addr_t reservedMemory = 0;
1142 	if (doReserveMemory) {
1143 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1144 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1145 			return B_NO_MEMORY;
1146 		reservedMemory = size;
1147 		// TODO: We don't reserve the memory for the pages for the page
1148 		// directories/tables. We actually need to do since we currently don't
1149 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1150 		// there are actually less physical pages than there should be, which
1151 		// can get the VM into trouble in low memory situations.
1152 	}
1153 
1154 	AddressSpaceWriteLocker locker;
1155 	VMAddressSpace* addressSpace;
1156 	status_t status;
1157 
1158 	// For full lock areas reserve the pages before locking the address
1159 	// space. E.g. block caches can't release their memory while we hold the
1160 	// address space lock.
1161 	page_num_t reservedPages = reservedMapPages;
1162 	if (wiring == B_FULL_LOCK)
1163 		reservedPages += size / B_PAGE_SIZE;
1164 
1165 	vm_page_reservation reservation;
1166 	if (reservedPages > 0) {
1167 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1168 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1169 					priority)) {
1170 				reservedPages = 0;
1171 				status = B_WOULD_BLOCK;
1172 				goto err0;
1173 			}
1174 		} else
1175 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1176 	}
1177 
1178 	// Lock the address space and, if B_EXACT_ADDRESS and
1179 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1180 	// is not wired.
1181 	do {
1182 		status = locker.SetTo(team);
1183 		if (status != B_OK)
1184 			goto err0;
1185 
1186 		addressSpace = locker.AddressSpace();
1187 	} while (addressSpec == B_EXACT_ADDRESS
1188 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1189 		&& wait_if_address_range_is_wired(addressSpace, (addr_t)*address, size,
1190 			&locker));
1191 
1192 	if (wiring == B_CONTIGUOUS) {
1193 		// we try to allocate the page run here upfront as this may easily
1194 		// fail for obvious reasons
1195 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1196 			physicalAddress, size / B_PAGE_SIZE, priority);
1197 		if (page == NULL) {
1198 			status = B_NO_MEMORY;
1199 			goto err0;
1200 		}
1201 	}
1202 
1203 	// create an anonymous cache
1204 	// if it's a stack, make sure that two pages are available at least
1205 	guardPages = isStack ? ((protection & B_USER_PROTECTION) != 0
1206 		? USER_STACK_GUARD_PAGES : KERNEL_STACK_GUARD_PAGES) : 0;
1207 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1208 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1209 		wiring == B_NO_LOCK, priority);
1210 	if (status != B_OK)
1211 		goto err1;
1212 
1213 	cache->temporary = 1;
1214 	cache->virtual_end = size;
1215 	cache->committed_size = reservedMemory;
1216 		// TODO: This should be done via a method.
1217 	reservedMemory = 0;
1218 
1219 	switch (wiring) {
1220 		case B_LAZY_LOCK:
1221 		case B_FULL_LOCK:
1222 		case B_CONTIGUOUS:
1223 		case B_ALREADY_WIRED:
1224 			cache->scan_skip = 1;
1225 			break;
1226 		case B_NO_LOCK:
1227 			cache->scan_skip = 0;
1228 			break;
1229 	}
1230 
1231 	cache->Lock();
1232 
1233 	status = map_backing_store(addressSpace, cache, address, 0, size,
1234 		addressSpec, wiring, protection, REGION_NO_PRIVATE_MAP, &area, name,
1235 		flags, kernel);
1236 
1237 	if (status != B_OK) {
1238 		cache->ReleaseRefAndUnlock();
1239 		goto err1;
1240 	}
1241 
1242 	locker.DegradeToReadLock();
1243 
1244 	switch (wiring) {
1245 		case B_NO_LOCK:
1246 		case B_LAZY_LOCK:
1247 			// do nothing - the pages are mapped in as needed
1248 			break;
1249 
1250 		case B_FULL_LOCK:
1251 		{
1252 			// Allocate and map all pages for this area
1253 
1254 			off_t offset = 0;
1255 			for (addr_t address = area->Base();
1256 					address < area->Base() + (area->Size() - 1);
1257 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1258 #ifdef DEBUG_KERNEL_STACKS
1259 #	ifdef STACK_GROWS_DOWNWARDS
1260 				if (isStack && address < area->Base()
1261 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1262 #	else
1263 				if (isStack && address >= area->Base() + area->Size()
1264 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1265 #	endif
1266 					continue;
1267 #endif
1268 				vm_page* page = vm_page_allocate_page(&reservation,
1269 					PAGE_STATE_WIRED | pageAllocFlags);
1270 				cache->InsertPage(page, offset);
1271 				map_page(area, page, address, protection, &reservation);
1272 
1273 				DEBUG_PAGE_ACCESS_END(page);
1274 			}
1275 
1276 			break;
1277 		}
1278 
1279 		case B_ALREADY_WIRED:
1280 		{
1281 			// The pages should already be mapped. This is only really useful
1282 			// during boot time. Find the appropriate vm_page objects and stick
1283 			// them in the cache object.
1284 			VMTranslationMap* map = addressSpace->TranslationMap();
1285 			off_t offset = 0;
1286 
1287 			if (!gKernelStartup)
1288 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1289 
1290 			map->Lock();
1291 
1292 			for (addr_t virtualAddress = area->Base();
1293 					virtualAddress < area->Base() + (area->Size() - 1);
1294 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1295 				addr_t physicalAddress;
1296 				uint32 flags;
1297 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1298 				if (status < B_OK) {
1299 					panic("looking up mapping failed for va 0x%lx\n",
1300 						virtualAddress);
1301 				}
1302 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1303 				if (page == NULL) {
1304 					panic("looking up page failed for pa 0x%lx\n",
1305 						physicalAddress);
1306 				}
1307 
1308 				DEBUG_PAGE_ACCESS_START(page);
1309 
1310 				increment_page_wired_count(page);
1311 				cache->InsertPage(page, offset);
1312 				vm_page_set_state(page, PAGE_STATE_WIRED);
1313 				page->busy = false;
1314 
1315 				DEBUG_PAGE_ACCESS_END(page);
1316 			}
1317 
1318 			map->Unlock();
1319 			break;
1320 		}
1321 
1322 		case B_CONTIGUOUS:
1323 		{
1324 			// We have already allocated our continuous pages run, so we can now
1325 			// just map them in the address space
1326 			VMTranslationMap* map = addressSpace->TranslationMap();
1327 			addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE;
1328 			addr_t virtualAddress = area->Base();
1329 			off_t offset = 0;
1330 
1331 			map->Lock();
1332 
1333 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1334 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1335 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1336 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1337 				if (page == NULL)
1338 					panic("couldn't lookup physical page just allocated\n");
1339 
1340 				status = map->Map(virtualAddress, physicalAddress, protection,
1341 					&reservation);
1342 				if (status < B_OK)
1343 					panic("couldn't map physical page in page run\n");
1344 
1345 				increment_page_wired_count(page);
1346 				cache->InsertPage(page, offset);
1347 
1348 				DEBUG_PAGE_ACCESS_END(page);
1349 			}
1350 
1351 			map->Unlock();
1352 			break;
1353 		}
1354 
1355 		default:
1356 			break;
1357 	}
1358 
1359 	cache->Unlock();
1360 
1361 	if (reservedPages > 0)
1362 		vm_page_unreserve_pages(&reservation);
1363 
1364 	TRACE(("vm_create_anonymous_area: done\n"));
1365 
1366 	area->cache_type = CACHE_TYPE_RAM;
1367 	return area->id;
1368 
1369 err1:
1370 	if (wiring == B_CONTIGUOUS) {
1371 		// we had reserved the area space upfront...
1372 		addr_t pageNumber = page->physical_page_number;
1373 		int32 i;
1374 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1375 			page = vm_lookup_page(pageNumber);
1376 			if (page == NULL)
1377 				panic("couldn't lookup physical page just allocated\n");
1378 
1379 			vm_page_set_state(page, PAGE_STATE_FREE);
1380 		}
1381 	}
1382 
1383 err0:
1384 	if (reservedPages > 0)
1385 		vm_page_unreserve_pages(&reservation);
1386 	if (reservedMemory > 0)
1387 		vm_unreserve_memory(reservedMemory);
1388 
1389 	return status;
1390 }
1391 
1392 
1393 area_id
1394 vm_map_physical_memory(team_id team, const char* name, void** _address,
1395 	uint32 addressSpec, addr_t size, uint32 protection, addr_t physicalAddress,
1396 	bool alreadyWired)
1397 {
1398 	VMArea* area;
1399 	VMCache* cache;
1400 	addr_t mapOffset;
1401 
1402 	TRACE(("vm_map_physical_memory(aspace = %ld, \"%s\", virtual = %p, "
1403 		"spec = %ld, size = %lu, protection = %ld, phys = %#lx)\n", team,
1404 		name, _address, addressSpec, size, protection, physicalAddress));
1405 
1406 	if (!arch_vm_supports_protection(protection))
1407 		return B_NOT_SUPPORTED;
1408 
1409 	AddressSpaceWriteLocker locker(team);
1410 	if (!locker.IsLocked())
1411 		return B_BAD_TEAM_ID;
1412 
1413 	// if the physical address is somewhat inside a page,
1414 	// move the actual area down to align on a page boundary
1415 	mapOffset = physicalAddress % B_PAGE_SIZE;
1416 	size += mapOffset;
1417 	physicalAddress -= mapOffset;
1418 
1419 	size = PAGE_ALIGN(size);
1420 
1421 	// create a device cache
1422 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1423 	if (status != B_OK)
1424 		return status;
1425 
1426 	// tell the page scanner to skip over this area, it's pages are special
1427 	cache->scan_skip = 1;
1428 	cache->virtual_end = size;
1429 
1430 	cache->Lock();
1431 
1432 	status = map_backing_store(locker.AddressSpace(), cache, _address,
1433 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
1434 		REGION_NO_PRIVATE_MAP, &area, name, 0, true);
1435 
1436 	if (status < B_OK)
1437 		cache->ReleaseRefLocked();
1438 
1439 	cache->Unlock();
1440 
1441 	if (status == B_OK) {
1442 		// set requested memory type -- use uncached, if not given
1443 		uint32 memoryType = addressSpec & B_MTR_MASK;
1444 		if (memoryType == 0)
1445 			memoryType = B_MTR_UC;
1446 
1447 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1448 		if (status != B_OK)
1449 			delete_area(locker.AddressSpace(), area, false);
1450 	}
1451 
1452 	if (status >= B_OK && !alreadyWired) {
1453 		// make sure our area is mapped in completely
1454 
1455 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1456 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1457 			area->Base() + (size - 1));
1458 
1459 		vm_page_reservation reservation;
1460 		vm_page_reserve_pages(&reservation, reservePages,
1461 			team == VMAddressSpace::KernelID()
1462 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1463 		map->Lock();
1464 
1465 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1466 			map->Map(area->Base() + offset, physicalAddress + offset,
1467 				protection, &reservation);
1468 		}
1469 
1470 		map->Unlock();
1471 		vm_page_unreserve_pages(&reservation);
1472 	}
1473 
1474 	if (status < B_OK)
1475 		return status;
1476 
1477 	// modify the pointer returned to be offset back into the new area
1478 	// the same way the physical address in was offset
1479 	*_address = (void*)((addr_t)*_address + mapOffset);
1480 
1481 	area->cache_type = CACHE_TYPE_DEVICE;
1482 	return area->id;
1483 }
1484 
1485 
1486 /*!	Don't use!
1487 	TODO: This function was introduced to map physical page vecs to
1488 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1489 	use a device cache and does not track vm_page::wired_count!
1490 */
1491 area_id
1492 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1493 	uint32 addressSpec, addr_t* _size, uint32 protection, struct iovec* vecs,
1494 	uint32 vecCount)
1495 {
1496 	TRACE(("vm_map_physical_memory_vecs(team = %ld, \"%s\", virtual = %p, "
1497 		"spec = %ld, _size = %p, protection = %ld, vecs = %p, "
1498 		"vecCount = %ld)\n", team, name, _address, addressSpec, _size,
1499 		protection, vecs, vecCount));
1500 
1501 	if (!arch_vm_supports_protection(protection)
1502 		|| (addressSpec & B_MTR_MASK) != 0) {
1503 		return B_NOT_SUPPORTED;
1504 	}
1505 
1506 	AddressSpaceWriteLocker locker(team);
1507 	if (!locker.IsLocked())
1508 		return B_BAD_TEAM_ID;
1509 
1510 	if (vecCount == 0)
1511 		return B_BAD_VALUE;
1512 
1513 	addr_t size = 0;
1514 	for (uint32 i = 0; i < vecCount; i++) {
1515 		if ((addr_t)vecs[i].iov_base % B_PAGE_SIZE != 0
1516 			|| vecs[i].iov_len % B_PAGE_SIZE != 0) {
1517 			return B_BAD_VALUE;
1518 		}
1519 
1520 		size += vecs[i].iov_len;
1521 	}
1522 
1523 	// create a device cache
1524 	VMCache* cache;
1525 	status_t result = VMCacheFactory::CreateDeviceCache(cache,
1526 		(addr_t)vecs[0].iov_base);
1527 	if (result != B_OK)
1528 		return result;
1529 
1530 	// tell the page scanner to skip over this area, it's pages are special
1531 	cache->scan_skip = 1;
1532 	cache->virtual_end = size;
1533 
1534 	cache->Lock();
1535 
1536 	VMArea* area;
1537 	result = map_backing_store(locker.AddressSpace(), cache, _address,
1538 		0, size, addressSpec & ~B_MTR_MASK, B_FULL_LOCK, protection,
1539 		REGION_NO_PRIVATE_MAP, &area, name, 0, true);
1540 
1541 	if (result != B_OK)
1542 		cache->ReleaseRefLocked();
1543 
1544 	cache->Unlock();
1545 
1546 	if (result != B_OK)
1547 		return result;
1548 
1549 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1550 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1551 		area->Base() + (size - 1));
1552 
1553 	vm_page_reservation reservation;
1554 	vm_page_reserve_pages(&reservation, reservePages,
1555 			team == VMAddressSpace::KernelID()
1556 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1557 	map->Lock();
1558 
1559 	uint32 vecIndex = 0;
1560 	size_t vecOffset = 0;
1561 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1562 		while (vecOffset >= vecs[vecIndex].iov_len && vecIndex < vecCount) {
1563 			vecOffset = 0;
1564 			vecIndex++;
1565 		}
1566 
1567 		if (vecIndex >= vecCount)
1568 			break;
1569 
1570 		map->Map(area->Base() + offset,
1571 			(addr_t)vecs[vecIndex].iov_base + vecOffset, protection,
1572 			&reservation);
1573 
1574 		vecOffset += B_PAGE_SIZE;
1575 	}
1576 
1577 	map->Unlock();
1578 	vm_page_unreserve_pages(&reservation);
1579 
1580 	if (_size != NULL)
1581 		*_size = size;
1582 
1583 	area->cache_type = CACHE_TYPE_DEVICE;
1584 	return area->id;
1585 }
1586 
1587 
1588 area_id
1589 vm_create_null_area(team_id team, const char* name, void** address,
1590 	uint32 addressSpec, addr_t size, uint32 flags)
1591 {
1592 	size = PAGE_ALIGN(size);
1593 
1594 	// Lock the address space and, if B_EXACT_ADDRESS and
1595 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1596 	// is not wired.
1597 	AddressSpaceWriteLocker locker;
1598 	do {
1599 		if (locker.SetTo(team) != B_OK)
1600 			return B_BAD_TEAM_ID;
1601 	} while (addressSpec == B_EXACT_ADDRESS
1602 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1603 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1604 			(addr_t)*address, size, &locker));
1605 
1606 	// create a null cache
1607 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1608 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1609 	VMCache* cache;
1610 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1611 	if (status != B_OK)
1612 		return status;
1613 
1614 	// tell the page scanner to skip over this area, no pages will be mapped
1615 	// here
1616 	cache->scan_skip = 1;
1617 	cache->virtual_end = size;
1618 
1619 	cache->Lock();
1620 
1621 	VMArea* area;
1622 	status = map_backing_store(locker.AddressSpace(), cache, address, 0, size,
1623 		addressSpec, B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP,
1624 		&area, name, flags, true);
1625 
1626 	if (status < B_OK) {
1627 		cache->ReleaseRefAndUnlock();
1628 		return status;
1629 	}
1630 
1631 	cache->Unlock();
1632 
1633 	area->cache_type = CACHE_TYPE_NULL;
1634 	return area->id;
1635 }
1636 
1637 
1638 /*!	Creates the vnode cache for the specified \a vnode.
1639 	The vnode has to be marked busy when calling this function.
1640 */
1641 status_t
1642 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1643 {
1644 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1645 }
1646 
1647 
1648 /*!	\a cache must be locked. The area's address space must be read-locked.
1649 */
1650 static void
1651 pre_map_area_pages(VMArea* area, VMCache* cache,
1652 	vm_page_reservation* reservation)
1653 {
1654 	addr_t baseAddress = area->Base();
1655 	addr_t cacheOffset = area->cache_offset;
1656 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1657 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1658 
1659 	for (VMCachePagesTree::Iterator it
1660 				= cache->pages.GetIterator(firstPage, true, true);
1661 			vm_page* page = it.Next();) {
1662 		if (page->cache_offset >= endPage)
1663 			break;
1664 
1665 		// skip busy and inactive pages
1666 		if (page->busy || page->usage_count == 0)
1667 			continue;
1668 
1669 		DEBUG_PAGE_ACCESS_START(page);
1670 		map_page(area, page,
1671 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1672 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1673 		DEBUG_PAGE_ACCESS_END(page);
1674 	}
1675 }
1676 
1677 
1678 /*!	Will map the file specified by \a fd to an area in memory.
1679 	The file will be mirrored beginning at the specified \a offset. The
1680 	\a offset and \a size arguments have to be page aligned.
1681 */
1682 static area_id
1683 _vm_map_file(team_id team, const char* name, void** _address,
1684 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1685 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1686 {
1687 	// TODO: for binary files, we want to make sure that they get the
1688 	//	copy of a file at a given time, ie. later changes should not
1689 	//	make it into the mapped copy -- this will need quite some changes
1690 	//	to be done in a nice way
1691 	TRACE(("_vm_map_file(fd = %d, offset = %Ld, size = %lu, mapping %ld)\n",
1692 		fd, offset, size, mapping));
1693 
1694 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1695 	size = PAGE_ALIGN(size);
1696 
1697 	if (mapping == REGION_NO_PRIVATE_MAP)
1698 		protection |= B_SHARED_AREA;
1699 	if (addressSpec != B_EXACT_ADDRESS)
1700 		unmapAddressRange = false;
1701 
1702 	if (fd < 0) {
1703 		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1704 		return vm_create_anonymous_area(team, name, _address, addressSpec, size,
1705 			B_NO_LOCK, protection, 0, flags, kernel);
1706 	}
1707 
1708 	// get the open flags of the FD
1709 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1710 	if (descriptor == NULL)
1711 		return EBADF;
1712 	int32 openMode = descriptor->open_mode;
1713 	put_fd(descriptor);
1714 
1715 	// The FD must open for reading at any rate. For shared mapping with write
1716 	// access, additionally the FD must be open for writing.
1717 	if ((openMode & O_ACCMODE) == O_WRONLY
1718 		|| (mapping == REGION_NO_PRIVATE_MAP
1719 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1720 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1721 		return EACCES;
1722 	}
1723 
1724 	// get the vnode for the object, this also grabs a ref to it
1725 	struct vnode* vnode = NULL;
1726 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1727 	if (status < B_OK)
1728 		return status;
1729 	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1730 
1731 	// If we're going to pre-map pages, we need to reserve the pages needed by
1732 	// the mapping backend upfront.
1733 	page_num_t reservedPreMapPages = 0;
1734 	vm_page_reservation reservation;
1735 	if ((protection & B_READ_AREA) != 0) {
1736 		AddressSpaceWriteLocker locker;
1737 		status = locker.SetTo(team);
1738 		if (status != B_OK)
1739 			return status;
1740 
1741 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1742 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1743 
1744 		locker.Unlock();
1745 
1746 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1747 			team == VMAddressSpace::KernelID()
1748 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1749 	}
1750 
1751 	struct PageUnreserver {
1752 		PageUnreserver(vm_page_reservation* reservation)
1753 			:
1754 			fReservation(reservation)
1755 		{
1756 		}
1757 
1758 		~PageUnreserver()
1759 		{
1760 			if (fReservation != NULL)
1761 				vm_page_unreserve_pages(fReservation);
1762 		}
1763 
1764 		vm_page_reservation* fReservation;
1765 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1766 
1767 	// Lock the address space and, if the specified address range shall be
1768 	// unmapped, ensure it is not wired.
1769 	AddressSpaceWriteLocker locker;
1770 	do {
1771 		if (locker.SetTo(team) != B_OK)
1772 			return B_BAD_TEAM_ID;
1773 	} while (unmapAddressRange
1774 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1775 			(addr_t)*_address, size, &locker));
1776 
1777 	// TODO: this only works for file systems that use the file cache
1778 	VMCache* cache;
1779 	status = vfs_get_vnode_cache(vnode, &cache, false);
1780 	if (status < B_OK)
1781 		return status;
1782 
1783 	cache->Lock();
1784 
1785 	VMArea* area;
1786 	status = map_backing_store(locker.AddressSpace(), cache, _address,
1787 		offset, size, addressSpec, 0, protection, mapping, &area, name,
1788 		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, kernel);
1789 
1790 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1791 		// map_backing_store() cannot know we no longer need the ref
1792 		cache->ReleaseRefLocked();
1793 	}
1794 
1795 	if (status == B_OK && (protection & B_READ_AREA) != 0)
1796 		pre_map_area_pages(area, cache, &reservation);
1797 
1798 	cache->Unlock();
1799 
1800 	if (status == B_OK) {
1801 		// TODO: this probably deserves a smarter solution, ie. don't always
1802 		// prefetch stuff, and also, probably don't trigger it at this place.
1803 		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
1804 			// prefetches at max 10 MB starting from "offset"
1805 	}
1806 
1807 	if (status != B_OK)
1808 		return status;
1809 
1810 	area->cache_type = CACHE_TYPE_VNODE;
1811 	return area->id;
1812 }
1813 
1814 
1815 area_id
1816 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
1817 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
1818 	int fd, off_t offset)
1819 {
1820 	if (!arch_vm_supports_protection(protection))
1821 		return B_NOT_SUPPORTED;
1822 
1823 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
1824 		mapping, unmapAddressRange, fd, offset, true);
1825 }
1826 
1827 
1828 VMCache*
1829 vm_area_get_locked_cache(VMArea* area)
1830 {
1831 	rw_lock_read_lock(&sAreaCacheLock);
1832 
1833 	while (true) {
1834 		VMCache* cache = area->cache;
1835 
1836 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
1837 			// cache has been deleted
1838 			rw_lock_read_lock(&sAreaCacheLock);
1839 			continue;
1840 		}
1841 
1842 		rw_lock_read_lock(&sAreaCacheLock);
1843 
1844 		if (cache == area->cache) {
1845 			cache->AcquireRefLocked();
1846 			rw_lock_read_unlock(&sAreaCacheLock);
1847 			return cache;
1848 		}
1849 
1850 		// the cache changed in the meantime
1851 		cache->Unlock();
1852 	}
1853 }
1854 
1855 
1856 void
1857 vm_area_put_locked_cache(VMCache* cache)
1858 {
1859 	cache->ReleaseRefAndUnlock();
1860 }
1861 
1862 
1863 area_id
1864 vm_clone_area(team_id team, const char* name, void** address,
1865 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
1866 	bool kernel)
1867 {
1868 	VMArea* newArea = NULL;
1869 	VMArea* sourceArea;
1870 
1871 	// Check whether the source area exists and is cloneable. If so, mark it
1872 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
1873 	{
1874 		AddressSpaceWriteLocker locker;
1875 		status_t status = locker.SetFromArea(sourceID, sourceArea);
1876 		if (status != B_OK)
1877 			return status;
1878 
1879 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
1880 			return B_NOT_ALLOWED;
1881 
1882 		sourceArea->protection |= B_SHARED_AREA;
1883 		protection |= B_SHARED_AREA;
1884 	}
1885 
1886 	// Now lock both address spaces and actually do the cloning.
1887 
1888 	MultiAddressSpaceLocker locker;
1889 	VMAddressSpace* sourceAddressSpace;
1890 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
1891 	if (status != B_OK)
1892 		return status;
1893 
1894 	VMAddressSpace* targetAddressSpace;
1895 	status = locker.AddTeam(team, true, &targetAddressSpace);
1896 	if (status != B_OK)
1897 		return status;
1898 
1899 	status = locker.Lock();
1900 	if (status != B_OK)
1901 		return status;
1902 
1903 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
1904 	if (sourceArea == NULL)
1905 		return B_BAD_VALUE;
1906 
1907 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
1908 		return B_NOT_ALLOWED;
1909 
1910 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
1911 
1912 	// TODO: for now, B_USER_CLONEABLE is disabled, until all drivers
1913 	//	have been adapted. Maybe it should be part of the kernel settings,
1914 	//	anyway (so that old drivers can always work).
1915 #if 0
1916 	if (sourceArea->aspace == VMAddressSpace::Kernel()
1917 		&& addressSpace != VMAddressSpace::Kernel()
1918 		&& !(sourceArea->protection & B_USER_CLONEABLE_AREA)) {
1919 		// kernel areas must not be cloned in userland, unless explicitly
1920 		// declared user-cloneable upon construction
1921 		status = B_NOT_ALLOWED;
1922 	} else
1923 #endif
1924 	if (sourceArea->cache_type == CACHE_TYPE_NULL)
1925 		status = B_NOT_ALLOWED;
1926 	else {
1927 		status = map_backing_store(targetAddressSpace, cache, address,
1928 			sourceArea->cache_offset, sourceArea->Size(), addressSpec,
1929 			sourceArea->wiring, protection, mapping, &newArea, name, 0, kernel);
1930 	}
1931 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
1932 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
1933 		// to create a new cache, and has therefore already acquired a reference
1934 		// to the source cache - but otherwise it has no idea that we need
1935 		// one.
1936 		cache->AcquireRefLocked();
1937 	}
1938 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
1939 		// we need to map in everything at this point
1940 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
1941 			// we don't have actual pages to map but a physical area
1942 			VMTranslationMap* map
1943 				= sourceArea->address_space->TranslationMap();
1944 			map->Lock();
1945 
1946 			addr_t physicalAddress;
1947 			uint32 oldProtection;
1948 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
1949 
1950 			map->Unlock();
1951 
1952 			map = targetAddressSpace->TranslationMap();
1953 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
1954 				newArea->Base() + (newArea->Size() - 1));
1955 
1956 			vm_page_reservation reservation;
1957 			vm_page_reserve_pages(&reservation, reservePages,
1958 				targetAddressSpace == VMAddressSpace::Kernel()
1959 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1960 			map->Lock();
1961 
1962 			for (addr_t offset = 0; offset < newArea->Size();
1963 					offset += B_PAGE_SIZE) {
1964 				map->Map(newArea->Base() + offset, physicalAddress + offset,
1965 					protection, &reservation);
1966 			}
1967 
1968 			map->Unlock();
1969 			vm_page_unreserve_pages(&reservation);
1970 		} else {
1971 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
1972 			size_t reservePages = map->MaxPagesNeededToMap(
1973 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
1974 			vm_page_reservation reservation;
1975 			vm_page_reserve_pages(&reservation, reservePages,
1976 				targetAddressSpace == VMAddressSpace::Kernel()
1977 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1978 
1979 			// map in all pages from source
1980 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
1981 					vm_page* page  = it.Next();) {
1982 				if (!page->busy) {
1983 					DEBUG_PAGE_ACCESS_START(page);
1984 					map_page(newArea, page,
1985 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
1986 							- newArea->cache_offset),
1987 						protection, &reservation);
1988 					DEBUG_PAGE_ACCESS_END(page);
1989 				}
1990 			}
1991 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
1992 			// ensuring that!
1993 
1994 			vm_page_unreserve_pages(&reservation);
1995 		}
1996 	}
1997 	if (status == B_OK)
1998 		newArea->cache_type = sourceArea->cache_type;
1999 
2000 	vm_area_put_locked_cache(cache);
2001 
2002 	if (status < B_OK)
2003 		return status;
2004 
2005 	return newArea->id;
2006 }
2007 
2008 
2009 /*!	Deletes the specified area of the given address space.
2010 
2011 	The address space must be write-locked.
2012 	The caller must ensure that the area does not have any wired ranges.
2013 
2014 	\param addressSpace The address space containing the area.
2015 	\param area The area to be deleted.
2016 	\param deletingAddressSpace \c true, if the address space is in the process
2017 		of being deleted.
2018 */
2019 static void
2020 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2021 	bool deletingAddressSpace)
2022 {
2023 	ASSERT(!area->IsWired());
2024 
2025 	VMAreaHash::Remove(area);
2026 
2027 	// At this point the area is removed from the global hash table, but
2028 	// still exists in the area list.
2029 
2030 	// Unmap the virtual address space the area occupied.
2031 	{
2032 		// We need to lock the complete cache chain.
2033 		VMCache* topCache = vm_area_get_locked_cache(area);
2034 		VMCacheChainLocker cacheChainLocker(topCache);
2035 		cacheChainLocker.LockAllSourceCaches();
2036 
2037 		// If the area's top cache is a temporary cache and the area is the only
2038 		// one referencing it (besides us currently holding a second reference),
2039 		// the unmapping code doesn't need to care about preserving the accessed
2040 		// and dirty flags of the top cache page mappings.
2041 		bool ignoreTopCachePageFlags
2042 			= topCache->temporary && topCache->RefCount() == 2;
2043 
2044 		area->address_space->TranslationMap()->UnmapArea(area,
2045 			deletingAddressSpace, ignoreTopCachePageFlags);
2046 	}
2047 
2048 	if (!area->cache->temporary)
2049 		area->cache->WriteModified();
2050 
2051 	arch_vm_unset_memory_type(area);
2052 	addressSpace->RemoveArea(area, 0);
2053 	addressSpace->Put();
2054 
2055 	area->cache->RemoveArea(area);
2056 	area->cache->ReleaseRef();
2057 
2058 	addressSpace->DeleteArea(area, 0);
2059 }
2060 
2061 
2062 status_t
2063 vm_delete_area(team_id team, area_id id, bool kernel)
2064 {
2065 	TRACE(("vm_delete_area(team = 0x%lx, area = 0x%lx)\n", team, id));
2066 
2067 	// lock the address space and make sure the area isn't wired
2068 	AddressSpaceWriteLocker locker;
2069 	VMArea* area;
2070 	AreaCacheLocker cacheLocker;
2071 
2072 	do {
2073 		status_t status = locker.SetFromArea(team, id, area);
2074 		if (status != B_OK)
2075 			return status;
2076 
2077 		cacheLocker.SetTo(area);
2078 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2079 
2080 	cacheLocker.Unlock();
2081 
2082 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2083 		return B_NOT_ALLOWED;
2084 
2085 	delete_area(locker.AddressSpace(), area, false);
2086 	return B_OK;
2087 }
2088 
2089 
2090 /*!	Creates a new cache on top of given cache, moves all areas from
2091 	the old cache to the new one, and changes the protection of all affected
2092 	areas' pages to read-only.
2093 	Preconditions:
2094 	- The given cache must be locked.
2095 	- All of the cache's areas' address spaces must be read locked.
2096 	- None of the cache's areas must have any wired ranges.
2097 */
2098 static status_t
2099 vm_copy_on_write_area(VMCache* lowerCache)
2100 {
2101 	VMCache* upperCache;
2102 
2103 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2104 
2105 	// We need to separate the cache from its areas. The cache goes one level
2106 	// deeper and we create a new cache inbetween.
2107 
2108 	// create an anonymous cache
2109 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2110 		0, true, VM_PRIORITY_USER);
2111 	if (status != B_OK)
2112 		return status;
2113 
2114 	upperCache->Lock();
2115 
2116 	upperCache->temporary = 1;
2117 	upperCache->scan_skip = lowerCache->scan_skip;
2118 	upperCache->virtual_base = lowerCache->virtual_base;
2119 	upperCache->virtual_end = lowerCache->virtual_end;
2120 
2121 	// transfer the lower cache areas to the upper cache
2122 	rw_lock_write_lock(&sAreaCacheLock);
2123 	upperCache->TransferAreas(lowerCache);
2124 	rw_lock_write_unlock(&sAreaCacheLock);
2125 
2126 	lowerCache->AddConsumer(upperCache);
2127 
2128 	// We now need to remap all pages from all of the cache's areas read-only, so
2129 	// that a copy will be created on next write access
2130 
2131 	for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2132 			tempArea = tempArea->cache_next) {
2133 		// The area must be readable in the same way it was previously writable
2134 		uint32 protection = B_KERNEL_READ_AREA;
2135 		if ((tempArea->protection & B_READ_AREA) != 0)
2136 			protection |= B_READ_AREA;
2137 
2138 		VMTranslationMap* map = tempArea->address_space->TranslationMap();
2139 		map->Lock();
2140 		map->ProtectArea(tempArea, protection);
2141 		map->Unlock();
2142 	}
2143 
2144 	vm_area_put_locked_cache(upperCache);
2145 
2146 	return B_OK;
2147 }
2148 
2149 
2150 area_id
2151 vm_copy_area(team_id team, const char* name, void** _address,
2152 	uint32 addressSpec, uint32 protection, area_id sourceID)
2153 {
2154 	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2155 
2156 	if ((protection & B_KERNEL_PROTECTION) == 0) {
2157 		// set the same protection for the kernel as for userland
2158 		protection |= B_KERNEL_READ_AREA;
2159 		if (writableCopy)
2160 			protection |= B_KERNEL_WRITE_AREA;
2161 	}
2162 
2163 	// Do the locking: target address space, all address spaces associated with
2164 	// the source cache, and the cache itself.
2165 	MultiAddressSpaceLocker locker;
2166 	VMAddressSpace* targetAddressSpace;
2167 	VMCache* cache;
2168 	VMArea* source;
2169 	AreaCacheLocker cacheLocker;
2170 	status_t status;
2171 	bool sharedArea;
2172 
2173 	bool restart;
2174 	do {
2175 		restart = false;
2176 
2177 		locker.Unset();
2178 		status = locker.AddTeam(team, true, &targetAddressSpace);
2179 		if (status == B_OK) {
2180 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2181 				&cache);
2182 		}
2183 		if (status != B_OK)
2184 			return status;
2185 
2186 		cacheLocker.SetTo(cache, true);	// already locked
2187 
2188 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2189 
2190 		// Make sure the source area (respectively, if not shared, all areas of
2191 		// the cache) doesn't have any wired ranges.
2192 		if (sharedArea) {
2193 			if (wait_if_area_is_wired(source, &locker, &cacheLocker))
2194 				restart = true;
2195 		} else {
2196 			for (VMArea* area = cache->areas; area != NULL;
2197 					area = area->cache_next) {
2198 				if (wait_if_area_is_wired(area, &locker, &cacheLocker)) {
2199 					restart = true;
2200 					break;
2201 				}
2202 			}
2203 		}
2204 	} while (restart);
2205 
2206 	if (addressSpec == B_CLONE_ADDRESS) {
2207 		addressSpec = B_EXACT_ADDRESS;
2208 		*_address = (void*)source->Base();
2209 	}
2210 
2211 	// First, create a cache on top of the source area, respectively use the
2212 	// existing one, if this is a shared area.
2213 
2214 	VMArea* target;
2215 	status = map_backing_store(targetAddressSpace, cache, _address,
2216 		source->cache_offset, source->Size(), addressSpec, source->wiring,
2217 		protection, sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2218 		&target, name, 0, true);
2219 	if (status < B_OK)
2220 		return status;
2221 
2222 	if (sharedArea) {
2223 		// The new area uses the old area's cache, but map_backing_store()
2224 		// hasn't acquired a ref. So we have to do that now.
2225 		cache->AcquireRefLocked();
2226 	}
2227 
2228 	// If the source area is writable, we need to move it one layer up as well
2229 
2230 	if (!sharedArea) {
2231 		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2232 			// TODO: do something more useful if this fails!
2233 			if (vm_copy_on_write_area(cache) < B_OK)
2234 				panic("vm_copy_on_write_area() failed!\n");
2235 		}
2236 	}
2237 
2238 	// we return the ID of the newly created area
2239 	return target->id;
2240 }
2241 
2242 
2243 static status_t
2244 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2245 	bool kernel)
2246 {
2247 	TRACE(("vm_set_area_protection(team = %#lx, area = %#lx, protection = "
2248 		"%#lx)\n", team, areaID, newProtection));
2249 
2250 	if (!arch_vm_supports_protection(newProtection))
2251 		return B_NOT_SUPPORTED;
2252 
2253 	bool becomesWritable
2254 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2255 
2256 	// lock address spaces and cache
2257 	MultiAddressSpaceLocker locker;
2258 	VMCache* cache;
2259 	VMArea* area;
2260 	status_t status;
2261 	AreaCacheLocker cacheLocker;
2262 	bool isWritable;
2263 
2264 	bool restart;
2265 	do {
2266 		restart = false;
2267 
2268 		locker.Unset();
2269 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2270 		if (status != B_OK)
2271 			return status;
2272 
2273 		cacheLocker.SetTo(cache, true);	// already locked
2274 
2275 		if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2276 			return B_NOT_ALLOWED;
2277 
2278 		if (area->protection == newProtection)
2279 			return B_OK;
2280 
2281 		if (team != VMAddressSpace::KernelID()
2282 			&& area->address_space->ID() != team) {
2283 			// unless you're the kernel, you are only allowed to set
2284 			// the protection of your own areas
2285 			return B_NOT_ALLOWED;
2286 		}
2287 
2288 		isWritable
2289 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2290 
2291 		// Make sure the area (respectively, if we're going to call
2292 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2293 		// wired ranges.
2294 		if (!isWritable && becomesWritable && !list_is_empty(&cache->consumers)) {
2295 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2296 					otherArea = otherArea->cache_next) {
2297 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2298 					restart = true;
2299 					break;
2300 				}
2301 			}
2302 		} else {
2303 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2304 				restart = true;
2305 		}
2306 	} while (restart);
2307 
2308 	bool changePageProtection = true;
2309 	bool changeTopCachePagesOnly = false;
2310 
2311 	if (isWritable && !becomesWritable) {
2312 		// writable -> !writable
2313 
2314 		if (cache->source != NULL && cache->temporary) {
2315 			if (cache->CountWritableAreas(area) == 0) {
2316 				// Since this cache now lives from the pages in its source cache,
2317 				// we can change the cache's commitment to take only those pages
2318 				// into account that really are in this cache.
2319 
2320 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2321 					team == VMAddressSpace::KernelID()
2322 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2323 
2324 				// TODO: we may be able to join with our source cache, if
2325 				// count == 0
2326 			}
2327 		}
2328 
2329 		// If only the writability changes, we can just remap the pages of the
2330 		// top cache, since the pages of lower caches are mapped read-only
2331 		// anyway. That's advantageous only, if the number of pages in the cache
2332 		// is significantly smaller than the number of pages in the area,
2333 		// though.
2334 		if (newProtection
2335 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2336 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2337 			changeTopCachePagesOnly = true;
2338 		}
2339 	} else if (!isWritable && becomesWritable) {
2340 		// !writable -> writable
2341 
2342 		if (!list_is_empty(&cache->consumers)) {
2343 			// There are consumers -- we have to insert a new cache. Fortunately
2344 			// vm_copy_on_write_area() does everything that's needed.
2345 			changePageProtection = false;
2346 			status = vm_copy_on_write_area(cache);
2347 		} else {
2348 			// No consumers, so we don't need to insert a new one.
2349 			if (cache->source != NULL && cache->temporary) {
2350 				// the cache's commitment must contain all possible pages
2351 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2352 					team == VMAddressSpace::KernelID()
2353 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2354 			}
2355 
2356 			if (status == B_OK && cache->source != NULL) {
2357 				// There's a source cache, hence we can't just change all pages'
2358 				// protection or we might allow writing into pages belonging to
2359 				// a lower cache.
2360 				changeTopCachePagesOnly = true;
2361 			}
2362 		}
2363 	} else {
2364 		// we don't have anything special to do in all other cases
2365 	}
2366 
2367 	if (status == B_OK) {
2368 		// remap existing pages in this cache
2369 		if (changePageProtection) {
2370 			VMTranslationMap* map = area->address_space->TranslationMap();
2371 			map->Lock();
2372 
2373 			if (changeTopCachePagesOnly) {
2374 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2375 				page_num_t lastPageOffset
2376 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2377 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2378 						vm_page* page = it.Next();) {
2379 					if (page->cache_offset >= firstPageOffset
2380 						&& page->cache_offset <= lastPageOffset) {
2381 						addr_t address = virtual_page_address(area, page);
2382 						map->ProtectPage(area, address, newProtection);
2383 					}
2384 				}
2385 			} else
2386 				map->ProtectArea(area, newProtection);
2387 
2388 			map->Unlock();
2389 		}
2390 
2391 		area->protection = newProtection;
2392 	}
2393 
2394 	return status;
2395 }
2396 
2397 
2398 status_t
2399 vm_get_page_mapping(team_id team, addr_t vaddr, addr_t* paddr)
2400 {
2401 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2402 	if (addressSpace == NULL)
2403 		return B_BAD_TEAM_ID;
2404 
2405 	VMTranslationMap* map = addressSpace->TranslationMap();
2406 
2407 	map->Lock();
2408 	uint32 dummyFlags;
2409 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2410 	map->Unlock();
2411 
2412 	addressSpace->Put();
2413 	return status;
2414 }
2415 
2416 
2417 /*!	The page's cache must be locked.
2418 */
2419 bool
2420 vm_test_map_modification(vm_page* page)
2421 {
2422 	if (page->modified)
2423 		return true;
2424 
2425 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2426 	vm_page_mapping* mapping;
2427 	while ((mapping = iterator.Next()) != NULL) {
2428 		VMArea* area = mapping->area;
2429 		VMTranslationMap* map = area->address_space->TranslationMap();
2430 
2431 		addr_t physicalAddress;
2432 		uint32 flags;
2433 		map->Lock();
2434 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2435 		map->Unlock();
2436 
2437 		if ((flags & PAGE_MODIFIED) != 0)
2438 			return true;
2439 	}
2440 
2441 	return false;
2442 }
2443 
2444 
2445 /*!	The page's cache must be locked.
2446 */
2447 void
2448 vm_clear_map_flags(vm_page* page, uint32 flags)
2449 {
2450 	if ((flags & PAGE_ACCESSED) != 0)
2451 		page->accessed = false;
2452 	if ((flags & PAGE_MODIFIED) != 0)
2453 		page->modified = false;
2454 
2455 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2456 	vm_page_mapping* mapping;
2457 	while ((mapping = iterator.Next()) != NULL) {
2458 		VMArea* area = mapping->area;
2459 		VMTranslationMap* map = area->address_space->TranslationMap();
2460 
2461 		map->Lock();
2462 		map->ClearFlags(virtual_page_address(area, page), flags);
2463 		map->Unlock();
2464 	}
2465 }
2466 
2467 
2468 /*!	Removes all mappings from a page.
2469 	After you've called this function, the page is unmapped from memory and
2470 	the page's \c accessed and \c modified flags have been updated according
2471 	to the state of the mappings.
2472 	The page's cache must be locked.
2473 */
2474 void
2475 vm_remove_all_page_mappings(vm_page* page)
2476 {
2477 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2478 		VMArea* area = mapping->area;
2479 		VMTranslationMap* map = area->address_space->TranslationMap();
2480 		addr_t address = virtual_page_address(area, page);
2481 		map->UnmapPage(area, address, false);
2482 	}
2483 }
2484 
2485 
2486 int32
2487 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2488 {
2489 	int32 count = 0;
2490 
2491 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2492 	vm_page_mapping* mapping;
2493 	while ((mapping = iterator.Next()) != NULL) {
2494 		VMArea* area = mapping->area;
2495 		VMTranslationMap* map = area->address_space->TranslationMap();
2496 
2497 		bool modified;
2498 		if (map->ClearAccessedAndModified(area,
2499 				virtual_page_address(area, page), false, modified)) {
2500 			count++;
2501 		}
2502 
2503 		page->modified |= modified;
2504 	}
2505 
2506 
2507 	if (page->accessed) {
2508 		count++;
2509 		page->accessed = false;
2510 	}
2511 
2512 	return count;
2513 }
2514 
2515 
2516 /*!	Removes all mappings of a page and/or clears the accessed bits of the
2517 	mappings.
2518 	The function iterates through the page mappings and removes them until
2519 	encountering one that has been accessed. From then on it will continue to
2520 	iterate, but only clear the accessed flag of the mapping. The page's
2521 	\c modified bit will be updated accordingly, the \c accessed bit will be
2522 	cleared.
2523 	\return The number of mapping accessed bits encountered, including the
2524 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2525 		of the page have been removed.
2526 */
2527 int32
2528 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2529 {
2530 	ASSERT(page->wired_count == 0);
2531 
2532 	if (page->accessed)
2533 		return vm_clear_page_mapping_accessed_flags(page);
2534 
2535 	while (vm_page_mapping* mapping = page->mappings.Head()) {
2536 		VMArea* area = mapping->area;
2537 		VMTranslationMap* map = area->address_space->TranslationMap();
2538 		addr_t address = virtual_page_address(area, page);
2539 		bool modified = false;
2540 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2541 			page->accessed = true;
2542 			page->modified |= modified;
2543 			return vm_clear_page_mapping_accessed_flags(page);
2544 		}
2545 		page->modified |= modified;
2546 	}
2547 
2548 	return 0;
2549 }
2550 
2551 
2552 static int
2553 display_mem(int argc, char** argv)
2554 {
2555 	bool physical = false;
2556 	addr_t copyAddress;
2557 	int32 displayWidth;
2558 	int32 itemSize;
2559 	int32 num = -1;
2560 	addr_t address;
2561 	int i = 1, j;
2562 
2563 	if (argc > 1 && argv[1][0] == '-') {
2564 		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2565 			physical = true;
2566 			i++;
2567 		} else
2568 			i = 99;
2569 	}
2570 
2571 	if (argc < i + 1 || argc > i + 2) {
2572 		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2573 			"\tdl - 8 bytes\n"
2574 			"\tdw - 4 bytes\n"
2575 			"\tds - 2 bytes\n"
2576 			"\tdb - 1 byte\n"
2577 			"\tstring - a whole string\n"
2578 			"  -p or --physical only allows memory from a single page to be "
2579 			"displayed.\n");
2580 		return 0;
2581 	}
2582 
2583 	address = parse_expression(argv[i]);
2584 
2585 	if (argc > i + 1)
2586 		num = parse_expression(argv[i + 1]);
2587 
2588 	// build the format string
2589 	if (strcmp(argv[0], "db") == 0) {
2590 		itemSize = 1;
2591 		displayWidth = 16;
2592 	} else if (strcmp(argv[0], "ds") == 0) {
2593 		itemSize = 2;
2594 		displayWidth = 8;
2595 	} else if (strcmp(argv[0], "dw") == 0) {
2596 		itemSize = 4;
2597 		displayWidth = 4;
2598 	} else if (strcmp(argv[0], "dl") == 0) {
2599 		itemSize = 8;
2600 		displayWidth = 2;
2601 	} else if (strcmp(argv[0], "string") == 0) {
2602 		itemSize = 1;
2603 		displayWidth = -1;
2604 	} else {
2605 		kprintf("display_mem called in an invalid way!\n");
2606 		return 0;
2607 	}
2608 
2609 	if (num <= 0)
2610 		num = displayWidth;
2611 
2612 	void* physicalPageHandle = NULL;
2613 
2614 	if (physical) {
2615 		int32 offset = address & (B_PAGE_SIZE - 1);
2616 		if (num * itemSize + offset > B_PAGE_SIZE) {
2617 			num = (B_PAGE_SIZE - offset) / itemSize;
2618 			kprintf("NOTE: number of bytes has been cut to page size\n");
2619 		}
2620 
2621 		address = ROUNDDOWN(address, B_PAGE_SIZE);
2622 
2623 		if (vm_get_physical_page_debug(address, &copyAddress,
2624 				&physicalPageHandle) != B_OK) {
2625 			kprintf("getting the hardware page failed.");
2626 			return 0;
2627 		}
2628 
2629 		address += offset;
2630 		copyAddress += offset;
2631 	} else
2632 		copyAddress = address;
2633 
2634 	if (!strcmp(argv[0], "string")) {
2635 		kprintf("%p \"", (char*)copyAddress);
2636 
2637 		// string mode
2638 		for (i = 0; true; i++) {
2639 			char c;
2640 			if (debug_memcpy(&c, (char*)copyAddress + i, 1) != B_OK
2641 				|| c == '\0')
2642 				break;
2643 
2644 			if (c == '\n')
2645 				kprintf("\\n");
2646 			else if (c == '\t')
2647 				kprintf("\\t");
2648 			else {
2649 				if (!isprint(c))
2650 					c = '.';
2651 
2652 				kprintf("%c", c);
2653 			}
2654 		}
2655 
2656 		kprintf("\"\n");
2657 	} else {
2658 		// number mode
2659 		for (i = 0; i < num; i++) {
2660 			uint32 value;
2661 
2662 			if ((i % displayWidth) == 0) {
2663 				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2664 				if (i != 0)
2665 					kprintf("\n");
2666 
2667 				kprintf("[0x%lx]  ", address + i * itemSize);
2668 
2669 				for (j = 0; j < displayed; j++) {
2670 					char c;
2671 					if (debug_memcpy(&c, (char*)copyAddress + i * itemSize + j,
2672 							1) != B_OK) {
2673 						displayed = j;
2674 						break;
2675 					}
2676 					if (!isprint(c))
2677 						c = '.';
2678 
2679 					kprintf("%c", c);
2680 				}
2681 				if (num > displayWidth) {
2682 					// make sure the spacing in the last line is correct
2683 					for (j = displayed; j < displayWidth * itemSize; j++)
2684 						kprintf(" ");
2685 				}
2686 				kprintf("  ");
2687 			}
2688 
2689 			if (debug_memcpy(&value, (uint8*)copyAddress + i * itemSize,
2690 					itemSize) != B_OK) {
2691 				kprintf("read fault");
2692 				break;
2693 			}
2694 
2695 			switch (itemSize) {
2696 				case 1:
2697 					kprintf(" %02x", *(uint8*)&value);
2698 					break;
2699 				case 2:
2700 					kprintf(" %04x", *(uint16*)&value);
2701 					break;
2702 				case 4:
2703 					kprintf(" %08lx", *(uint32*)&value);
2704 					break;
2705 				case 8:
2706 					kprintf(" %016Lx", *(uint64*)&value);
2707 					break;
2708 			}
2709 		}
2710 
2711 		kprintf("\n");
2712 	}
2713 
2714 	if (physical) {
2715 		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
2716 		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
2717 	}
2718 	return 0;
2719 }
2720 
2721 
2722 static void
2723 dump_cache_tree_recursively(VMCache* cache, int level,
2724 	VMCache* highlightCache)
2725 {
2726 	// print this cache
2727 	for (int i = 0; i < level; i++)
2728 		kprintf("  ");
2729 	if (cache == highlightCache)
2730 		kprintf("%p <--\n", cache);
2731 	else
2732 		kprintf("%p\n", cache);
2733 
2734 	// recursively print its consumers
2735 	VMCache* consumer = NULL;
2736 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2737 			consumer)) != NULL) {
2738 		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
2739 	}
2740 }
2741 
2742 
2743 static int
2744 dump_cache_tree(int argc, char** argv)
2745 {
2746 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2747 		kprintf("usage: %s <address>\n", argv[0]);
2748 		return 0;
2749 	}
2750 
2751 	addr_t address = parse_expression(argv[1]);
2752 	if (address == 0)
2753 		return 0;
2754 
2755 	VMCache* cache = (VMCache*)address;
2756 	VMCache* root = cache;
2757 
2758 	// find the root cache (the transitive source)
2759 	while (root->source != NULL)
2760 		root = root->source;
2761 
2762 	dump_cache_tree_recursively(root, 0, cache);
2763 
2764 	return 0;
2765 }
2766 
2767 
2768 static const char*
2769 cache_type_to_string(int32 type)
2770 {
2771 	switch (type) {
2772 		case CACHE_TYPE_RAM:
2773 			return "RAM";
2774 		case CACHE_TYPE_DEVICE:
2775 			return "device";
2776 		case CACHE_TYPE_VNODE:
2777 			return "vnode";
2778 		case CACHE_TYPE_NULL:
2779 			return "null";
2780 
2781 		default:
2782 			return "unknown";
2783 	}
2784 }
2785 
2786 
2787 #if DEBUG_CACHE_LIST
2788 
2789 static void
2790 update_cache_info_recursively(VMCache* cache, cache_info& info)
2791 {
2792 	info.page_count += cache->page_count;
2793 	if (cache->type == CACHE_TYPE_RAM)
2794 		info.committed += cache->committed_size;
2795 
2796 	// recurse
2797 	VMCache* consumer = NULL;
2798 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2799 			consumer)) != NULL) {
2800 		update_cache_info_recursively(consumer, info);
2801 	}
2802 }
2803 
2804 
2805 static int
2806 cache_info_compare_page_count(const void* _a, const void* _b)
2807 {
2808 	const cache_info* a = (const cache_info*)_a;
2809 	const cache_info* b = (const cache_info*)_b;
2810 	if (a->page_count == b->page_count)
2811 		return 0;
2812 	return a->page_count < b->page_count ? 1 : -1;
2813 }
2814 
2815 
2816 static int
2817 cache_info_compare_committed(const void* _a, const void* _b)
2818 {
2819 	const cache_info* a = (const cache_info*)_a;
2820 	const cache_info* b = (const cache_info*)_b;
2821 	if (a->committed == b->committed)
2822 		return 0;
2823 	return a->committed < b->committed ? 1 : -1;
2824 }
2825 
2826 
2827 static void
2828 dump_caches_recursively(VMCache* cache, cache_info& info, int level)
2829 {
2830 	for (int i = 0; i < level; i++)
2831 		kprintf("  ");
2832 
2833 	kprintf("%p: type: %s, base: %lld, size: %lld, pages: %lu", cache,
2834 		cache_type_to_string(cache->type), cache->virtual_base,
2835 		cache->virtual_end, cache->page_count);
2836 
2837 	if (level == 0)
2838 		kprintf("/%lu", info.page_count);
2839 
2840 	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
2841 		kprintf(", committed: %lld", cache->committed_size);
2842 
2843 		if (level == 0)
2844 			kprintf("/%lu", info.committed);
2845 	}
2846 
2847 	// areas
2848 	if (cache->areas != NULL) {
2849 		VMArea* area = cache->areas;
2850 		kprintf(", areas: %ld (%s, team: %ld)", area->id, area->name,
2851 			area->address_space->ID());
2852 
2853 		while (area->cache_next != NULL) {
2854 			area = area->cache_next;
2855 			kprintf(", %ld", area->id);
2856 		}
2857 	}
2858 
2859 	kputs("\n");
2860 
2861 	// recurse
2862 	VMCache* consumer = NULL;
2863 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2864 			consumer)) != NULL) {
2865 		dump_caches_recursively(consumer, info, level + 1);
2866 	}
2867 }
2868 
2869 
2870 static int
2871 dump_caches(int argc, char** argv)
2872 {
2873 	if (sCacheInfoTable == NULL) {
2874 		kprintf("No cache info table!\n");
2875 		return 0;
2876 	}
2877 
2878 	bool sortByPageCount = true;
2879 
2880 	for (int32 i = 1; i < argc; i++) {
2881 		if (strcmp(argv[i], "-c") == 0) {
2882 			sortByPageCount = false;
2883 		} else {
2884 			print_debugger_command_usage(argv[0]);
2885 			return 0;
2886 		}
2887 	}
2888 
2889 	uint32 totalCount = 0;
2890 	uint32 rootCount = 0;
2891 	off_t totalCommitted = 0;
2892 	page_num_t totalPages = 0;
2893 
2894 	VMCache* cache = gDebugCacheList;
2895 	while (cache) {
2896 		totalCount++;
2897 		if (cache->source == NULL) {
2898 			cache_info stackInfo;
2899 			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
2900 				? sCacheInfoTable[rootCount] : stackInfo;
2901 			rootCount++;
2902 			info.cache = cache;
2903 			info.page_count = 0;
2904 			info.committed = 0;
2905 			update_cache_info_recursively(cache, info);
2906 			totalCommitted += info.committed;
2907 			totalPages += info.page_count;
2908 		}
2909 
2910 		cache = cache->debug_next;
2911 	}
2912 
2913 	if (rootCount <= (uint32)kCacheInfoTableCount) {
2914 		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
2915 			sortByPageCount
2916 				? &cache_info_compare_page_count
2917 				: &cache_info_compare_committed);
2918 	}
2919 
2920 	kprintf("total committed memory: %lld, total used pages: %lu\n",
2921 		totalCommitted, totalPages);
2922 	kprintf("%lu caches (%lu root caches), sorted by %s per cache "
2923 		"tree...\n\n", totalCount, rootCount,
2924 		sortByPageCount ? "page count" : "committed size");
2925 
2926 	if (rootCount <= (uint32)kCacheInfoTableCount) {
2927 		for (uint32 i = 0; i < rootCount; i++) {
2928 			cache_info& info = sCacheInfoTable[i];
2929 			dump_caches_recursively(info.cache, info, 0);
2930 		}
2931 	} else
2932 		kprintf("Cache info table too small! Can't sort and print caches!\n");
2933 
2934 	return 0;
2935 }
2936 
2937 #endif	// DEBUG_CACHE_LIST
2938 
2939 
2940 static int
2941 dump_cache(int argc, char** argv)
2942 {
2943 	VMCache* cache;
2944 	bool showPages = false;
2945 	int i = 1;
2946 
2947 	if (argc < 2 || !strcmp(argv[1], "--help")) {
2948 		kprintf("usage: %s [-ps] <address>\n"
2949 			"  if -p is specified, all pages are shown, if -s is used\n"
2950 			"  only the cache info is shown respectively.\n", argv[0]);
2951 		return 0;
2952 	}
2953 	while (argv[i][0] == '-') {
2954 		char* arg = argv[i] + 1;
2955 		while (arg[0]) {
2956 			if (arg[0] == 'p')
2957 				showPages = true;
2958 			arg++;
2959 		}
2960 		i++;
2961 	}
2962 	if (argv[i] == NULL) {
2963 		kprintf("%s: invalid argument, pass address\n", argv[0]);
2964 		return 0;
2965 	}
2966 
2967 	addr_t address = parse_expression(argv[i]);
2968 	if (address == 0)
2969 		return 0;
2970 
2971 	cache = (VMCache*)address;
2972 
2973 	kprintf("CACHE %p:\n", cache);
2974 	kprintf("  ref_count:    %ld\n", cache->RefCount());
2975 	kprintf("  source:       %p\n", cache->source);
2976 	kprintf("  type:         %s\n", cache_type_to_string(cache->type));
2977 	kprintf("  virtual_base: 0x%Lx\n", cache->virtual_base);
2978 	kprintf("  virtual_end:  0x%Lx\n", cache->virtual_end);
2979 	kprintf("  temporary:    %ld\n", cache->temporary);
2980 	kprintf("  scan_skip:    %ld\n", cache->scan_skip);
2981 	kprintf("  lock:         %p\n", cache->GetLock());
2982 #if KDEBUG
2983 	kprintf("  lock.holder:  %ld\n", cache->GetLock()->holder);
2984 #endif
2985 	kprintf("  areas:\n");
2986 
2987 	for (VMArea* area = cache->areas; area != NULL; area = area->cache_next) {
2988 		kprintf("    area 0x%lx, %s\n", area->id, area->name);
2989 		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->Base(),
2990 			area->Size());
2991 		kprintf("\tprotection: 0x%lx\n", area->protection);
2992 		kprintf("\towner:      0x%lx\n", area->address_space->ID());
2993 	}
2994 
2995 	kprintf("  consumers:\n");
2996 	VMCache* consumer = NULL;
2997 	while ((consumer = (VMCache*)list_get_next_item(&cache->consumers,
2998 				consumer)) != NULL) {
2999 		kprintf("\t%p\n", consumer);
3000 	}
3001 
3002 	kprintf("  pages:\n");
3003 	if (showPages) {
3004 		for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3005 				vm_page* page = it.Next();) {
3006 			if (!vm_page_is_dummy(page)) {
3007 				kprintf("\t%p ppn 0x%lx offset 0x%lx state %u (%s) "
3008 					"wired_count %u\n", page, page->physical_page_number,
3009 					page->cache_offset, page->State(),
3010 					page_state_to_string(page->State()), page->wired_count);
3011 			} else {
3012 				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
3013 					page, page->State(), page_state_to_string(page->State()));
3014 			}
3015 		}
3016 	} else
3017 		kprintf("\t%ld in cache\n", cache->page_count);
3018 
3019 	set_debug_variable("_sourceCache", (addr_t)cache->source);
3020 
3021 	return 0;
3022 }
3023 
3024 
3025 static void
3026 dump_area_struct(VMArea* area, bool mappings)
3027 {
3028 	kprintf("AREA: %p\n", area);
3029 	kprintf("name:\t\t'%s'\n", area->name);
3030 	kprintf("owner:\t\t0x%lx\n", area->address_space->ID());
3031 	kprintf("id:\t\t0x%lx\n", area->id);
3032 	kprintf("base:\t\t0x%lx\n", area->Base());
3033 	kprintf("size:\t\t0x%lx\n", area->Size());
3034 	kprintf("protection:\t0x%lx\n", area->protection);
3035 	kprintf("wiring:\t\t0x%x\n", area->wiring);
3036 	kprintf("memory_type:\t0x%x\n", area->memory_type);
3037 	kprintf("cache:\t\t%p\n", area->cache);
3038 	kprintf("cache_type:\t%s\n", cache_type_to_string(area->cache_type));
3039 	kprintf("cache_offset:\t0x%Lx\n", area->cache_offset);
3040 	kprintf("cache_next:\t%p\n", area->cache_next);
3041 	kprintf("cache_prev:\t%p\n", area->cache_prev);
3042 
3043 	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3044 	if (mappings) {
3045 		kprintf("page mappings:\n");
3046 		while (iterator.HasNext()) {
3047 			vm_page_mapping* mapping = iterator.Next();
3048 			kprintf("  %p", mapping->page);
3049 		}
3050 		kprintf("\n");
3051 	} else {
3052 		uint32 count = 0;
3053 		while (iterator.Next() != NULL) {
3054 			count++;
3055 		}
3056 		kprintf("page mappings:\t%lu\n", count);
3057 	}
3058 }
3059 
3060 
3061 static int
3062 dump_area(int argc, char** argv)
3063 {
3064 	bool mappings = false;
3065 	bool found = false;
3066 	int32 index = 1;
3067 	VMArea* area;
3068 	addr_t num;
3069 
3070 	if (argc < 2 || !strcmp(argv[1], "--help")) {
3071 		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3072 			"All areas matching either id/address/name are listed. You can\n"
3073 			"force to check only a specific item by prefixing the specifier\n"
3074 			"with the id/contains/address/name keywords.\n"
3075 			"-m shows the area's mappings as well.\n");
3076 		return 0;
3077 	}
3078 
3079 	if (!strcmp(argv[1], "-m")) {
3080 		mappings = true;
3081 		index++;
3082 	}
3083 
3084 	int32 mode = 0xf;
3085 	if (!strcmp(argv[index], "id"))
3086 		mode = 1;
3087 	else if (!strcmp(argv[index], "contains"))
3088 		mode = 2;
3089 	else if (!strcmp(argv[index], "name"))
3090 		mode = 4;
3091 	else if (!strcmp(argv[index], "address"))
3092 		mode = 0;
3093 	if (mode != 0xf)
3094 		index++;
3095 
3096 	if (index >= argc) {
3097 		kprintf("No area specifier given.\n");
3098 		return 0;
3099 	}
3100 
3101 	num = parse_expression(argv[index]);
3102 
3103 	if (mode == 0) {
3104 		dump_area_struct((struct VMArea*)num, mappings);
3105 	} else {
3106 		// walk through the area list, looking for the arguments as a name
3107 
3108 		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3109 		while ((area = it.Next()) != NULL) {
3110 			if (((mode & 4) != 0 && area->name != NULL
3111 					&& !strcmp(argv[index], area->name))
3112 				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3113 					|| (((mode & 2) != 0 && area->Base() <= num
3114 						&& area->Base() + area->Size() > num))))) {
3115 				dump_area_struct(area, mappings);
3116 				found = true;
3117 			}
3118 		}
3119 
3120 		if (!found)
3121 			kprintf("could not find area %s (%ld)\n", argv[index], num);
3122 	}
3123 
3124 	return 0;
3125 }
3126 
3127 
3128 static int
3129 dump_area_list(int argc, char** argv)
3130 {
3131 	VMArea* area;
3132 	const char* name = NULL;
3133 	int32 id = 0;
3134 
3135 	if (argc > 1) {
3136 		id = parse_expression(argv[1]);
3137 		if (id == 0)
3138 			name = argv[1];
3139 	}
3140 
3141 	kprintf("addr          id  base\t\tsize    protect lock  name\n");
3142 
3143 	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3144 	while ((area = it.Next()) != NULL) {
3145 		if ((id != 0 && area->address_space->ID() != id)
3146 			|| (name != NULL && strstr(area->name, name) == NULL))
3147 			continue;
3148 
3149 		kprintf("%p %5lx  %p\t%p %4lx\t%4d  %s\n", area, area->id,
3150 			(void*)area->Base(), (void*)area->Size(), area->protection,
3151 			area->wiring, area->name);
3152 	}
3153 	return 0;
3154 }
3155 
3156 
3157 static int
3158 dump_available_memory(int argc, char** argv)
3159 {
3160 	kprintf("Available memory: %Ld/%lu bytes\n",
3161 		sAvailableMemory, vm_page_num_pages() * B_PAGE_SIZE);
3162 	return 0;
3163 }
3164 
3165 
3166 /*!	Deletes all areas and reserved regions in the given address space.
3167 
3168 	The caller must ensure that none of the areas has any wired ranges.
3169 
3170 	\param addressSpace The address space.
3171 	\param deletingAddressSpace \c true, if the address space is in the process
3172 		of being deleted.
3173 */
3174 void
3175 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3176 {
3177 	TRACE(("vm_delete_areas: called on address space 0x%lx\n",
3178 		addressSpace->ID()));
3179 
3180 	addressSpace->WriteLock();
3181 
3182 	// remove all reserved areas in this address space
3183 	addressSpace->UnreserveAllAddressRanges(0);
3184 
3185 	// delete all the areas in this address space
3186 	while (VMArea* area = addressSpace->FirstArea()) {
3187 		ASSERT(!area->IsWired());
3188 		delete_area(addressSpace, area, deletingAddressSpace);
3189 	}
3190 
3191 	addressSpace->WriteUnlock();
3192 }
3193 
3194 
3195 static area_id
3196 vm_area_for(addr_t address, bool kernel)
3197 {
3198 	team_id team;
3199 	if (IS_USER_ADDRESS(address)) {
3200 		// we try the user team address space, if any
3201 		team = VMAddressSpace::CurrentID();
3202 		if (team < 0)
3203 			return team;
3204 	} else
3205 		team = VMAddressSpace::KernelID();
3206 
3207 	AddressSpaceReadLocker locker(team);
3208 	if (!locker.IsLocked())
3209 		return B_BAD_TEAM_ID;
3210 
3211 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3212 	if (area != NULL) {
3213 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3214 			return B_ERROR;
3215 
3216 		return area->id;
3217 	}
3218 
3219 	return B_ERROR;
3220 }
3221 
3222 
3223 /*!	Frees physical pages that were used during the boot process.
3224 	\a end is inclusive.
3225 */
3226 static void
3227 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3228 {
3229 	// free all physical pages in the specified range
3230 
3231 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3232 		addr_t physicalAddress;
3233 		uint32 flags;
3234 
3235 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3236 			&& (flags & PAGE_PRESENT) != 0) {
3237 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3238 			if (page != NULL && page->State() != PAGE_STATE_FREE
3239 					 && page->State() != PAGE_STATE_CLEAR
3240 					 && page->State() != PAGE_STATE_UNUSED) {
3241 				DEBUG_PAGE_ACCESS_START(page);
3242 				vm_page_set_state(page, PAGE_STATE_FREE);
3243 			}
3244 		}
3245 	}
3246 
3247 	// unmap the memory
3248 	map->Unmap(start, end);
3249 }
3250 
3251 
3252 void
3253 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3254 {
3255 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3256 	addr_t end = start + (size - 1);
3257 	addr_t lastEnd = start;
3258 
3259 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3260 		(void*)start, (void*)end));
3261 
3262 	// The areas are sorted in virtual address space order, so
3263 	// we just have to find the holes between them that fall
3264 	// into the area we should dispose
3265 
3266 	map->Lock();
3267 
3268 	for (VMAddressSpace::AreaIterator it
3269 				= VMAddressSpace::Kernel()->GetAreaIterator();
3270 			VMArea* area = it.Next();) {
3271 		addr_t areaStart = area->Base();
3272 		addr_t areaEnd = areaStart + (area->Size() - 1);
3273 
3274 		if (areaEnd < start)
3275 			continue;
3276 
3277 		if (areaStart > end) {
3278 			// we are done, the area is already beyond of what we have to free
3279 			end = areaStart - 1;
3280 			break;
3281 		}
3282 
3283 		if (areaStart > lastEnd) {
3284 			// this is something we can free
3285 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3286 				(void*)areaStart));
3287 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3288 		}
3289 
3290 		if (areaEnd >= end) {
3291 			lastEnd = areaEnd;
3292 				// no +1 to prevent potential overflow
3293 			break;
3294 		}
3295 
3296 		lastEnd = areaEnd + 1;
3297 	}
3298 
3299 	if (lastEnd < end) {
3300 		// we can also get rid of some space at the end of the area
3301 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3302 			(void*)end));
3303 		unmap_and_free_physical_pages(map, lastEnd, end);
3304 	}
3305 
3306 	map->Unlock();
3307 }
3308 
3309 
3310 static void
3311 create_preloaded_image_areas(struct preloaded_image* image)
3312 {
3313 	char name[B_OS_NAME_LENGTH];
3314 	void* address;
3315 	int32 length;
3316 
3317 	// use file name to create a good area name
3318 	char* fileName = strrchr(image->name, '/');
3319 	if (fileName == NULL)
3320 		fileName = image->name;
3321 	else
3322 		fileName++;
3323 
3324 	length = strlen(fileName);
3325 	// make sure there is enough space for the suffix
3326 	if (length > 25)
3327 		length = 25;
3328 
3329 	memcpy(name, fileName, length);
3330 	strcpy(name + length, "_text");
3331 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3332 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3333 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3334 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3335 		// this will later be remapped read-only/executable by the
3336 		// ELF initialization code
3337 
3338 	strcpy(name + length, "_data");
3339 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3340 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3341 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3342 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3343 }
3344 
3345 
3346 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3347 	Any boot loader resources contained in that arguments must not be accessed
3348 	anymore past this point.
3349 */
3350 void
3351 vm_free_kernel_args(kernel_args* args)
3352 {
3353 	uint32 i;
3354 
3355 	TRACE(("vm_free_kernel_args()\n"));
3356 
3357 	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3358 		area_id area = area_for((void*)args->kernel_args_range[i].start);
3359 		if (area >= B_OK)
3360 			delete_area(area);
3361 	}
3362 }
3363 
3364 
3365 static void
3366 allocate_kernel_args(kernel_args* args)
3367 {
3368 	TRACE(("allocate_kernel_args()\n"));
3369 
3370 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3371 		void* address = (void*)args->kernel_args_range[i].start;
3372 
3373 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3374 			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3375 			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3376 	}
3377 }
3378 
3379 
3380 static void
3381 unreserve_boot_loader_ranges(kernel_args* args)
3382 {
3383 	TRACE(("unreserve_boot_loader_ranges()\n"));
3384 
3385 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3386 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3387 			(void*)args->virtual_allocated_range[i].start,
3388 			args->virtual_allocated_range[i].size);
3389 	}
3390 }
3391 
3392 
3393 static void
3394 reserve_boot_loader_ranges(kernel_args* args)
3395 {
3396 	TRACE(("reserve_boot_loader_ranges()\n"));
3397 
3398 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3399 		void* address = (void*)args->virtual_allocated_range[i].start;
3400 
3401 		// If the address is no kernel address, we just skip it. The
3402 		// architecture specific code has to deal with it.
3403 		if (!IS_KERNEL_ADDRESS(address)) {
3404 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %lu\n",
3405 				address, args->virtual_allocated_range[i].size);
3406 			continue;
3407 		}
3408 
3409 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3410 			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3411 		if (status < B_OK)
3412 			panic("could not reserve boot loader ranges\n");
3413 	}
3414 }
3415 
3416 
3417 static addr_t
3418 allocate_early_virtual(kernel_args* args, size_t size, bool blockAlign)
3419 {
3420 	size = PAGE_ALIGN(size);
3421 
3422 	// find a slot in the virtual allocation addr range
3423 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3424 		// check to see if the space between this one and the last is big enough
3425 		addr_t rangeStart = args->virtual_allocated_range[i].start;
3426 		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3427 			+ args->virtual_allocated_range[i - 1].size;
3428 
3429 		addr_t base = blockAlign
3430 			? ROUNDUP(previousRangeEnd, size) : previousRangeEnd;
3431 
3432 		if (base >= KERNEL_BASE && base < rangeStart
3433 				&& rangeStart - base >= size) {
3434 			args->virtual_allocated_range[i - 1].size
3435 				+= base + size - previousRangeEnd;
3436 			return base;
3437 		}
3438 	}
3439 
3440 	// we hadn't found one between allocation ranges. this is ok.
3441 	// see if there's a gap after the last one
3442 	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3443 	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3444 		+ args->virtual_allocated_range[lastEntryIndex].size;
3445 	addr_t base = blockAlign ? ROUNDUP(lastRangeEnd, size) : lastRangeEnd;
3446 	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3447 		args->virtual_allocated_range[lastEntryIndex].size
3448 			+= base + size - lastRangeEnd;
3449 		return base;
3450 	}
3451 
3452 	// see if there's a gap before the first one
3453 	addr_t rangeStart = args->virtual_allocated_range[0].start;
3454 	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3455 		base = rangeStart - size;
3456 		if (blockAlign)
3457 			base = ROUNDDOWN(base, size);
3458 
3459 		if (base >= KERNEL_BASE) {
3460 			args->virtual_allocated_range[0].start = base;
3461 			args->virtual_allocated_range[0].size += rangeStart - base;
3462 			return base;
3463 		}
3464 	}
3465 
3466 	return 0;
3467 }
3468 
3469 
3470 static bool
3471 is_page_in_physical_memory_range(kernel_args* args, addr_t address)
3472 {
3473 	// TODO: horrible brute-force method of determining if the page can be
3474 	// allocated
3475 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3476 		if (address >= args->physical_memory_range[i].start
3477 			&& address < args->physical_memory_range[i].start
3478 				+ args->physical_memory_range[i].size)
3479 			return true;
3480 	}
3481 	return false;
3482 }
3483 
3484 
3485 static addr_t
3486 allocate_early_physical_page(kernel_args* args)
3487 {
3488 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3489 		addr_t nextPage;
3490 
3491 		nextPage = args->physical_allocated_range[i].start
3492 			+ args->physical_allocated_range[i].size;
3493 		// see if the page after the next allocated paddr run can be allocated
3494 		if (i + 1 < args->num_physical_allocated_ranges
3495 			&& args->physical_allocated_range[i + 1].size != 0) {
3496 			// see if the next page will collide with the next allocated range
3497 			if (nextPage >= args->physical_allocated_range[i+1].start)
3498 				continue;
3499 		}
3500 		// see if the next physical page fits in the memory block
3501 		if (is_page_in_physical_memory_range(args, nextPage)) {
3502 			// we got one!
3503 			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3504 			return nextPage / B_PAGE_SIZE;
3505 		}
3506 	}
3507 
3508 	return 0;
3509 		// could not allocate a block
3510 }
3511 
3512 
3513 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3514 	allocate some pages before the VM is completely up.
3515 */
3516 addr_t
3517 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3518 	uint32 attributes, bool blockAlign)
3519 {
3520 	if (physicalSize > virtualSize)
3521 		physicalSize = virtualSize;
3522 
3523 	// find the vaddr to allocate at
3524 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, blockAlign);
3525 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualAddress);
3526 
3527 	// map the pages
3528 	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3529 		addr_t physicalAddress = allocate_early_physical_page(args);
3530 		if (physicalAddress == 0)
3531 			panic("error allocating early page!\n");
3532 
3533 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3534 
3535 		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3536 			physicalAddress * B_PAGE_SIZE, attributes,
3537 			&allocate_early_physical_page);
3538 	}
3539 
3540 	return virtualBase;
3541 }
3542 
3543 
3544 /*!	The main entrance point to initialize the VM. */
3545 status_t
3546 vm_init(kernel_args* args)
3547 {
3548 	struct preloaded_image* image;
3549 	void* address;
3550 	status_t err = 0;
3551 	uint32 i;
3552 
3553 	TRACE(("vm_init: entry\n"));
3554 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3555 	err = arch_vm_init(args);
3556 
3557 	// initialize some globals
3558 	vm_page_init_num_pages(args);
3559 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3560 
3561 	size_t heapSize = INITIAL_HEAP_SIZE;
3562 	// try to accomodate low memory systems
3563 	while (heapSize > sAvailableMemory / 8)
3564 		heapSize /= 2;
3565 	if (heapSize < 1024 * 1024)
3566 		panic("vm_init: go buy some RAM please.");
3567 
3568 	slab_init(args);
3569 
3570 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3571 	// map in the new heap and initialize it
3572 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3573 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, false);
3574 	TRACE(("heap at 0x%lx\n", heapBase));
3575 	heap_init(heapBase, heapSize);
3576 #endif
3577 
3578 	// initialize the free page list and physical page mapper
3579 	vm_page_init(args);
3580 
3581 	// initialize the hash table that stores the pages mapped to caches
3582 	vm_cache_init(args);
3583 
3584 	{
3585 		status_t error = VMAreaHash::Init();
3586 		if (error != B_OK)
3587 			panic("vm_init: error initializing area hash table\n");
3588 	}
3589 
3590 	VMAddressSpace::Init();
3591 	reserve_boot_loader_ranges(args);
3592 
3593 	// Do any further initialization that the architecture dependant layers may
3594 	// need now
3595 	arch_vm_translation_map_init_post_area(args);
3596 	arch_vm_init_post_area(args);
3597 	vm_page_init_post_area(args);
3598 	slab_init_post_area();
3599 
3600 	// allocate areas to represent stuff that already exists
3601 
3602 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3603 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3604 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3605 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3606 #endif
3607 
3608 	allocate_kernel_args(args);
3609 
3610 	create_preloaded_image_areas(&args->kernel_image);
3611 
3612 	// allocate areas for preloaded images
3613 	for (image = args->preloaded_images; image != NULL; image = image->next)
3614 		create_preloaded_image_areas(image);
3615 
3616 	// allocate kernel stacks
3617 	for (i = 0; i < args->num_cpus; i++) {
3618 		char name[64];
3619 
3620 		sprintf(name, "idle thread %lu kstack", i + 1);
3621 		address = (void*)args->cpu_kstack[i].start;
3622 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3623 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3624 	}
3625 
3626 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3627 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3628 
3629 	// create the object cache for the page mappings
3630 	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
3631 		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
3632 		NULL, NULL);
3633 	if (gPageMappingsObjectCache == NULL)
3634 		panic("failed to create page mappings object cache");
3635 
3636 	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
3637 
3638 #if DEBUG_CACHE_LIST
3639 	create_area("cache info table", (void**)&sCacheInfoTable,
3640 		B_ANY_KERNEL_ADDRESS,
3641 		ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
3642 		B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3643 #endif	// DEBUG_CACHE_LIST
3644 
3645 	// add some debugger commands
3646 	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
3647 	add_debugger_command("area", &dump_area,
3648 		"Dump info about a particular area");
3649 	add_debugger_command("cache", &dump_cache, "Dump VMCache");
3650 	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
3651 #if DEBUG_CACHE_LIST
3652 	add_debugger_command_etc("caches", &dump_caches,
3653 		"List all VMCache trees",
3654 		"[ \"-c\" ]\n"
3655 		"All cache trees are listed sorted in decreasing order by number of\n"
3656 		"used pages or, if \"-c\" is specified, by size of committed memory.\n",
3657 		0);
3658 #endif
3659 	add_debugger_command("avail", &dump_available_memory,
3660 		"Dump available memory");
3661 	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
3662 	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
3663 	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
3664 	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
3665 	add_debugger_command("string", &display_mem, "dump strings");
3666 
3667 	TRACE(("vm_init: exit\n"));
3668 
3669 	vm_cache_init_post_heap();
3670 
3671 	return err;
3672 }
3673 
3674 
3675 status_t
3676 vm_init_post_sem(kernel_args* args)
3677 {
3678 	// This frees all unused boot loader resources and makes its space available
3679 	// again
3680 	arch_vm_init_end(args);
3681 	unreserve_boot_loader_ranges(args);
3682 
3683 	// fill in all of the semaphores that were not allocated before
3684 	// since we're still single threaded and only the kernel address space
3685 	// exists, it isn't that hard to find all of the ones we need to create
3686 
3687 	arch_vm_translation_map_init_post_sem(args);
3688 	VMAddressSpace::InitPostSem();
3689 
3690 	slab_init_post_sem();
3691 
3692 #if	!USE_SLAB_ALLOCATOR_FOR_MALLOC
3693 	heap_init_post_sem();
3694 #endif
3695 
3696 	return B_OK;
3697 }
3698 
3699 
3700 status_t
3701 vm_init_post_thread(kernel_args* args)
3702 {
3703 	vm_page_init_post_thread(args);
3704 	slab_init_post_thread();
3705 	return heap_init_post_thread();
3706 }
3707 
3708 
3709 status_t
3710 vm_init_post_modules(kernel_args* args)
3711 {
3712 	return arch_vm_init_post_modules(args);
3713 }
3714 
3715 
3716 void
3717 permit_page_faults(void)
3718 {
3719 	struct thread* thread = thread_get_current_thread();
3720 	if (thread != NULL)
3721 		atomic_add(&thread->page_faults_allowed, 1);
3722 }
3723 
3724 
3725 void
3726 forbid_page_faults(void)
3727 {
3728 	struct thread* thread = thread_get_current_thread();
3729 	if (thread != NULL)
3730 		atomic_add(&thread->page_faults_allowed, -1);
3731 }
3732 
3733 
3734 status_t
3735 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isUser,
3736 	addr_t* newIP)
3737 {
3738 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
3739 		faultAddress));
3740 
3741 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
3742 
3743 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
3744 	VMAddressSpace* addressSpace = NULL;
3745 
3746 	status_t status = B_OK;
3747 	*newIP = 0;
3748 	atomic_add((int32*)&sPageFaults, 1);
3749 
3750 	if (IS_KERNEL_ADDRESS(pageAddress)) {
3751 		addressSpace = VMAddressSpace::GetKernel();
3752 	} else if (IS_USER_ADDRESS(pageAddress)) {
3753 		addressSpace = VMAddressSpace::GetCurrent();
3754 		if (addressSpace == NULL) {
3755 			if (!isUser) {
3756 				dprintf("vm_page_fault: kernel thread accessing invalid user "
3757 					"memory!\n");
3758 				status = B_BAD_ADDRESS;
3759 				TPF(PageFaultError(-1,
3760 					VMPageFaultTracing
3761 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
3762 			} else {
3763 				// XXX weird state.
3764 				panic("vm_page_fault: non kernel thread accessing user memory "
3765 					"that doesn't exist!\n");
3766 				status = B_BAD_ADDRESS;
3767 			}
3768 		}
3769 	} else {
3770 		// the hit was probably in the 64k DMZ between kernel and user space
3771 		// this keeps a user space thread from passing a buffer that crosses
3772 		// into kernel space
3773 		status = B_BAD_ADDRESS;
3774 		TPF(PageFaultError(-1,
3775 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
3776 	}
3777 
3778 	if (status == B_OK) {
3779 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isUser,
3780 			NULL);
3781 	}
3782 
3783 	if (status < B_OK) {
3784 		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
3785 			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%lx\n",
3786 			strerror(status), address, faultAddress, isWrite, isUser,
3787 			thread_get_current_thread_id());
3788 		if (!isUser) {
3789 			struct thread* thread = thread_get_current_thread();
3790 			if (thread != NULL && thread->fault_handler != 0) {
3791 				// this will cause the arch dependant page fault handler to
3792 				// modify the IP on the interrupt frame or whatever to return
3793 				// to this address
3794 				*newIP = thread->fault_handler;
3795 			} else {
3796 				// unhandled page fault in the kernel
3797 				panic("vm_page_fault: unhandled page fault in kernel space at "
3798 					"0x%lx, ip 0x%lx\n", address, faultAddress);
3799 			}
3800 		} else {
3801 #if 1
3802 			addressSpace->ReadLock();
3803 
3804 			// TODO: remove me once we have proper userland debugging support
3805 			// (and tools)
3806 			VMArea* area = addressSpace->LookupArea(faultAddress);
3807 
3808 			struct thread* thread = thread_get_current_thread();
3809 			dprintf("vm_page_fault: thread \"%s\" (%ld) in team \"%s\" (%ld) "
3810 				"tried to %s address %#lx, ip %#lx (\"%s\" +%#lx)\n",
3811 				thread->name, thread->id, thread->team->name, thread->team->id,
3812 				isWrite ? "write" : "read", address, faultAddress,
3813 				area ? area->name : "???",
3814 				faultAddress - (area ? area->Base() : 0x0));
3815 
3816 			// We can print a stack trace of the userland thread here.
3817 // TODO: The user_memcpy() below can cause a deadlock, if it causes a page
3818 // fault and someone is already waiting for a write lock on the same address
3819 // space. This thread will then try to acquire the lock again and will
3820 // be queued after the writer.
3821 #	if 0
3822 			if (area) {
3823 				struct stack_frame {
3824 					#if defined(__INTEL__) || defined(__POWERPC__) || defined(__M68K__)
3825 						struct stack_frame*	previous;
3826 						void*				return_address;
3827 					#else
3828 						// ...
3829 					#warning writeme
3830 					#endif
3831 				} frame;
3832 #		ifdef __INTEL__
3833 				struct iframe* iframe = i386_get_user_iframe();
3834 				if (iframe == NULL)
3835 					panic("iframe is NULL!");
3836 
3837 				status_t status = user_memcpy(&frame, (void*)iframe->ebp,
3838 					sizeof(struct stack_frame));
3839 #		elif defined(__POWERPC__)
3840 				struct iframe* iframe = ppc_get_user_iframe();
3841 				if (iframe == NULL)
3842 					panic("iframe is NULL!");
3843 
3844 				status_t status = user_memcpy(&frame, (void*)iframe->r1,
3845 					sizeof(struct stack_frame));
3846 #		else
3847 #			warning "vm_page_fault() stack trace won't work"
3848 				status = B_ERROR;
3849 #		endif
3850 
3851 				dprintf("stack trace:\n");
3852 				int32 maxFrames = 50;
3853 				while (status == B_OK && --maxFrames >= 0
3854 						&& frame.return_address != NULL) {
3855 					dprintf("  %p", frame.return_address);
3856 					area = addressSpace->LookupArea(
3857 						(addr_t)frame.return_address);
3858 					if (area) {
3859 						dprintf(" (%s + %#lx)", area->name,
3860 							(addr_t)frame.return_address - area->Base());
3861 					}
3862 					dprintf("\n");
3863 
3864 					status = user_memcpy(&frame, frame.previous,
3865 						sizeof(struct stack_frame));
3866 				}
3867 			}
3868 #	endif	// 0 (stack trace)
3869 
3870 			addressSpace->ReadUnlock();
3871 #endif
3872 
3873 			// TODO: the fault_callback is a temporary solution for vm86
3874 			if (thread->fault_callback == NULL
3875 				|| thread->fault_callback(address, faultAddress, isWrite)) {
3876 				// If the thread has a signal handler for SIGSEGV, we simply
3877 				// send it the signal. Otherwise we notify the user debugger
3878 				// first.
3879 				struct sigaction action;
3880 				if (sigaction(SIGSEGV, NULL, &action) == 0
3881 					&& action.sa_handler != SIG_DFL
3882 					&& action.sa_handler != SIG_IGN) {
3883 					send_signal(thread->id, SIGSEGV);
3884 				} else if (user_debug_exception_occurred(B_SEGMENT_VIOLATION,
3885 						SIGSEGV)) {
3886 					send_signal(thread->id, SIGSEGV);
3887 				}
3888 			}
3889 		}
3890 	}
3891 
3892 	if (addressSpace != NULL)
3893 		addressSpace->Put();
3894 
3895 	return B_HANDLED_INTERRUPT;
3896 }
3897 
3898 
3899 struct PageFaultContext {
3900 	AddressSpaceReadLocker	addressSpaceLocker;
3901 	VMCacheChainLocker		cacheChainLocker;
3902 
3903 	VMTranslationMap*		map;
3904 	VMCache*				topCache;
3905 	off_t					cacheOffset;
3906 	vm_page_reservation		reservation;
3907 	bool					isWrite;
3908 
3909 	// return values
3910 	vm_page*				page;
3911 	bool					restart;
3912 
3913 
3914 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
3915 		:
3916 		addressSpaceLocker(addressSpace, true),
3917 		map(addressSpace->TranslationMap()),
3918 		isWrite(isWrite)
3919 	{
3920 	}
3921 
3922 	~PageFaultContext()
3923 	{
3924 		UnlockAll();
3925 		vm_page_unreserve_pages(&reservation);
3926 	}
3927 
3928 	void Prepare(VMCache* topCache, off_t cacheOffset)
3929 	{
3930 		this->topCache = topCache;
3931 		this->cacheOffset = cacheOffset;
3932 		page = NULL;
3933 		restart = false;
3934 
3935 		cacheChainLocker.SetTo(topCache);
3936 	}
3937 
3938 	void UnlockAll(VMCache* exceptCache = NULL)
3939 	{
3940 		topCache = NULL;
3941 		addressSpaceLocker.Unlock();
3942 		cacheChainLocker.Unlock(exceptCache);
3943 	}
3944 };
3945 
3946 
3947 /*!	Gets the page that should be mapped into the area.
3948 	Returns an error code other than \c B_OK, if the page couldn't be found or
3949 	paged in. The locking state of the address space and the caches is undefined
3950 	in that case.
3951 	Returns \c B_OK with \c context.restart set to \c true, if the functions
3952 	had to unlock the address space and all caches and is supposed to be called
3953 	again.
3954 	Returns \c B_OK with \c context.restart set to \c false, if the page was
3955 	found. It is returned in \c context.page. The address space will still be
3956 	locked as well as all caches starting from the top cache to at least the
3957 	cache the page lives in.
3958 */
3959 static status_t
3960 fault_get_page(PageFaultContext& context)
3961 {
3962 	VMCache* cache = context.topCache;
3963 	VMCache* lastCache = NULL;
3964 	vm_page* page = NULL;
3965 
3966 	while (cache != NULL) {
3967 		// We already hold the lock of the cache at this point.
3968 
3969 		lastCache = cache;
3970 
3971 		for (;;) {
3972 			page = cache->LookupPage(context.cacheOffset);
3973 			if (page == NULL || !page->busy) {
3974 				// Either there is no page or there is one and it is not busy.
3975 				break;
3976 			}
3977 
3978 			// page must be busy -- wait for it to become unbusy
3979 			context.UnlockAll(cache);
3980 			cache->ReleaseRefLocked();
3981 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
3982 
3983 			// restart the whole process
3984 			context.restart = true;
3985 			return B_OK;
3986 		}
3987 
3988 		if (page != NULL)
3989 			break;
3990 
3991 		// The current cache does not contain the page we're looking for.
3992 
3993 		// see if the backing store has it
3994 		if (cache->HasPage(context.cacheOffset)) {
3995 			// insert a fresh page and mark it busy -- we're going to read it in
3996 			page = vm_page_allocate_page(&context.reservation,
3997 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
3998 			cache->InsertPage(page, context.cacheOffset);
3999 
4000 			// We need to unlock all caches and the address space while reading
4001 			// the page in. Keep a reference to the cache around.
4002 			cache->AcquireRefLocked();
4003 			context.UnlockAll();
4004 
4005 			// read the page in
4006 			iovec vec;
4007 			vec.iov_base = (void*)(page->physical_page_number * B_PAGE_SIZE);
4008 			size_t bytesRead = vec.iov_len = B_PAGE_SIZE;
4009 
4010 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4011 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4012 
4013 			cache->Lock();
4014 
4015 			if (status < B_OK) {
4016 				// on error remove and free the page
4017 				dprintf("reading page from cache %p returned: %s!\n",
4018 					cache, strerror(status));
4019 
4020 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4021 				cache->RemovePage(page);
4022 				vm_page_set_state(page, PAGE_STATE_FREE);
4023 
4024 				cache->ReleaseRefAndUnlock();
4025 				return status;
4026 			}
4027 
4028 			// mark the page unbusy again
4029 			cache->MarkPageUnbusy(page);
4030 
4031 			DEBUG_PAGE_ACCESS_END(page);
4032 
4033 			// Since we needed to unlock everything temporarily, the area
4034 			// situation might have changed. So we need to restart the whole
4035 			// process.
4036 			cache->ReleaseRefAndUnlock();
4037 			context.restart = true;
4038 			return B_OK;
4039 		}
4040 
4041 		cache = context.cacheChainLocker.LockSourceCache();
4042 	}
4043 
4044 	if (page == NULL) {
4045 		// There was no adequate page, determine the cache for a clean one.
4046 		// Read-only pages come in the deepest cache, only the top most cache
4047 		// may have direct write access.
4048 		cache = context.isWrite ? context.topCache : lastCache;
4049 
4050 		// allocate a clean page
4051 		page = vm_page_allocate_page(&context.reservation,
4052 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4053 		FTRACE(("vm_soft_fault: just allocated page 0x%lx\n",
4054 			page->physical_page_number));
4055 
4056 		// insert the new page into our cache
4057 		cache->InsertPage(page, context.cacheOffset);
4058 	} else if (page->Cache() != context.topCache && context.isWrite) {
4059 		// We have a page that has the data we want, but in the wrong cache
4060 		// object so we need to copy it and stick it into the top cache.
4061 		vm_page* sourcePage = page;
4062 
4063 		// TODO: If memory is low, it might be a good idea to steal the page
4064 		// from our source cache -- if possible, that is.
4065 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4066 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4067 
4068 		// To not needlessly kill concurrency we unlock all caches but the top
4069 		// one while copying the page. Lacking another mechanism to ensure that
4070 		// the source page doesn't disappear, we mark it busy.
4071 		sourcePage->busy = true;
4072 		context.cacheChainLocker.UnlockKeepRefs(true);
4073 
4074 		// copy the page
4075 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4076 			sourcePage->physical_page_number * B_PAGE_SIZE);
4077 
4078 		context.cacheChainLocker.RelockCaches(true);
4079 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4080 
4081 		// insert the new page into our cache
4082 		context.topCache->InsertPage(page, context.cacheOffset);
4083 	} else
4084 		DEBUG_PAGE_ACCESS_START(page);
4085 
4086 	context.page = page;
4087 	return B_OK;
4088 }
4089 
4090 
4091 /*!	Makes sure the address in the given address space is mapped.
4092 
4093 	\param addressSpace The address space.
4094 	\param originalAddress The address. Doesn't need to be page aligned.
4095 	\param isWrite If \c true the address shall be write-accessible.
4096 	\param isUser If \c true the access is requested by a userland team.
4097 	\param wirePage On success, if non \c NULL, the wired count of the page
4098 		mapped at the given address is incremented and the page is returned
4099 		via this parameter.
4100 	\param wiredRange If given, this wiredRange is ignored when checking whether
4101 		an already mapped page at the virtual address can be unmapped.
4102 	\return \c B_OK on success, another error code otherwise.
4103 */
4104 static status_t
4105 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4106 	bool isWrite, bool isUser, vm_page** wirePage, VMAreaWiredRange* wiredRange)
4107 {
4108 	FTRACE(("vm_soft_fault: thid 0x%lx address 0x%lx, isWrite %d, isUser %d\n",
4109 		thread_get_current_thread_id(), originalAddress, isWrite, isUser));
4110 
4111 	PageFaultContext context(addressSpace, isWrite);
4112 
4113 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4114 	status_t status = B_OK;
4115 
4116 	addressSpace->IncrementFaultCount();
4117 
4118 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4119 	// the pages upfront makes sure we don't have any cache locked, so that the
4120 	// page daemon/thief can do their job without problems.
4121 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4122 		originalAddress);
4123 	context.addressSpaceLocker.Unlock();
4124 	vm_page_reserve_pages(&context.reservation, reservePages,
4125 		addressSpace == VMAddressSpace::Kernel()
4126 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4127 
4128 	while (true) {
4129 		context.addressSpaceLocker.Lock();
4130 
4131 		// get the area the fault was in
4132 		VMArea* area = addressSpace->LookupArea(address);
4133 		if (area == NULL) {
4134 			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4135 				"space\n", originalAddress);
4136 			TPF(PageFaultError(-1,
4137 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4138 			status = B_BAD_ADDRESS;
4139 			break;
4140 		}
4141 
4142 		// check permissions
4143 		uint32 protection = get_area_page_protection(area, address);
4144 		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4145 			dprintf("user access on kernel area 0x%lx at %p\n", area->id,
4146 				(void*)originalAddress);
4147 			TPF(PageFaultError(area->id,
4148 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4149 			status = B_PERMISSION_DENIED;
4150 			break;
4151 		}
4152 		if (isWrite && (protection
4153 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4154 			dprintf("write access attempted on write-protected area 0x%lx at"
4155 				" %p\n", area->id, (void*)originalAddress);
4156 			TPF(PageFaultError(area->id,
4157 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4158 			status = B_PERMISSION_DENIED;
4159 			break;
4160 		} else if (!isWrite && (protection
4161 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4162 			dprintf("read access attempted on read-protected area 0x%lx at"
4163 				" %p\n", area->id, (void*)originalAddress);
4164 			TPF(PageFaultError(area->id,
4165 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4166 			status = B_PERMISSION_DENIED;
4167 			break;
4168 		}
4169 
4170 		// We have the area, it was a valid access, so let's try to resolve the
4171 		// page fault now.
4172 		// At first, the top most cache from the area is investigated.
4173 
4174 		context.Prepare(vm_area_get_locked_cache(area),
4175 			address - area->Base() + area->cache_offset);
4176 
4177 		// See if this cache has a fault handler -- this will do all the work
4178 		// for us.
4179 		{
4180 			// Note, since the page fault is resolved with interrupts enabled,
4181 			// the fault handler could be called more than once for the same
4182 			// reason -- the store must take this into account.
4183 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4184 			if (status != B_BAD_HANDLER)
4185 				break;
4186 		}
4187 
4188 		// The top most cache has no fault handler, so let's see if the cache or
4189 		// its sources already have the page we're searching for (we're going
4190 		// from top to bottom).
4191 		status = fault_get_page(context);
4192 		if (status != B_OK) {
4193 			TPF(PageFaultError(area->id, status));
4194 			break;
4195 		}
4196 
4197 		if (context.restart)
4198 			continue;
4199 
4200 		// All went fine, all there is left to do is to map the page into the
4201 		// address space.
4202 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4203 			context.page));
4204 
4205 		// If the page doesn't reside in the area's cache, we need to make sure
4206 		// it's mapped in read-only, so that we cannot overwrite someone else's
4207 		// data (copy-on-write)
4208 		uint32 newProtection = protection;
4209 		if (context.page->Cache() != context.topCache && !isWrite)
4210 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4211 
4212 		bool unmapPage = false;
4213 		bool mapPage = true;
4214 
4215 		// check whether there's already a page mapped at the address
4216 		context.map->Lock();
4217 
4218 		addr_t physicalAddress;
4219 		uint32 flags;
4220 		vm_page* mappedPage = NULL;
4221 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4222 			&& (flags & PAGE_PRESENT) != 0
4223 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4224 				!= NULL) {
4225 			// Yep there's already a page. If it's ours, we can simply adjust
4226 			// its protection. Otherwise we have to unmap it.
4227 			if (mappedPage == context.page) {
4228 				context.map->ProtectPage(area, address, newProtection);
4229 					// Note: We assume that ProtectPage() is atomic (i.e.
4230 					// the page isn't temporarily unmapped), otherwise we'd have
4231 					// to make sure it isn't wired.
4232 				mapPage = false;
4233 			} else
4234 				unmapPage = true;
4235 		}
4236 
4237 		context.map->Unlock();
4238 
4239 		if (unmapPage) {
4240 			// If the page is wired, we can't unmap it. Wait until it is unwired
4241 			// again and restart.
4242 			VMAreaUnwiredWaiter waiter;
4243 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4244 					wiredRange)) {
4245 				// unlock everything and wait
4246 				context.UnlockAll();
4247 				waiter.waitEntry.Wait();
4248 				continue;
4249 			}
4250 
4251 			// Note: The mapped page is a page of a lower cache. We are
4252 			// guaranteed to have that cached locked, our new page is a copy of
4253 			// that page, and the page is not busy. The logic for that guarantee
4254 			// is as follows: Since the page is mapped, it must live in the top
4255 			// cache (ruled out above) or any of its lower caches, and there is
4256 			// (was before the new page was inserted) no other page in any
4257 			// cache between the top cache and the page's cache (otherwise that
4258 			// would be mapped instead). That in turn means that our algorithm
4259 			// must have found it and therefore it cannot be busy either.
4260 			DEBUG_PAGE_ACCESS_START(mappedPage);
4261 			unmap_page(area, address);
4262 			DEBUG_PAGE_ACCESS_END(mappedPage);
4263 		}
4264 
4265 		if (mapPage) {
4266 			if (map_page(area, context.page, address, newProtection,
4267 					&context.reservation) != B_OK) {
4268 				// Mapping can only fail, when the page mapping object couldn't
4269 				// be allocated. Save for the missing mapping everything is
4270 				// fine, though. If this was a regular page fault, we'll simply
4271 				// leave and probably fault again. To make sure we'll have more
4272 				// luck then, we ensure that the minimum object reserve is
4273 				// available.
4274 				DEBUG_PAGE_ACCESS_END(context.page);
4275 
4276 				context.UnlockAll();
4277 
4278 				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4279 						!= B_OK) {
4280 					// Apparently the situation is serious. Let's get ourselves
4281 					// killed.
4282 					status = B_NO_MEMORY;
4283 				} else if (wirePage != NULL) {
4284 					// The caller expects us to wire the page. Since
4285 					// object_cache_reserve() succeeded, we should now be able
4286 					// to allocate a mapping structure. Restart.
4287 					continue;
4288 				}
4289 
4290 				break;
4291 			}
4292 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4293 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4294 
4295 		// also wire the page, if requested
4296 		if (wirePage != NULL && status == B_OK) {
4297 			increment_page_wired_count(context.page);
4298 			*wirePage = context.page;
4299 		}
4300 
4301 		DEBUG_PAGE_ACCESS_END(context.page);
4302 
4303 		break;
4304 	}
4305 
4306 	return status;
4307 }
4308 
4309 
4310 status_t
4311 vm_get_physical_page(addr_t paddr, addr_t* _vaddr, void** _handle)
4312 {
4313 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4314 }
4315 
4316 status_t
4317 vm_put_physical_page(addr_t vaddr, void* handle)
4318 {
4319 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4320 }
4321 
4322 
4323 status_t
4324 vm_get_physical_page_current_cpu(addr_t paddr, addr_t* _vaddr, void** _handle)
4325 {
4326 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4327 }
4328 
4329 status_t
4330 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4331 {
4332 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4333 }
4334 
4335 
4336 status_t
4337 vm_get_physical_page_debug(addr_t paddr, addr_t* _vaddr, void** _handle)
4338 {
4339 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4340 }
4341 
4342 status_t
4343 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4344 {
4345 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4346 }
4347 
4348 
4349 void
4350 vm_get_info(system_memory_info* info)
4351 {
4352 	swap_get_info(info);
4353 
4354 	info->max_memory = vm_page_num_pages() * B_PAGE_SIZE;
4355 	info->page_faults = sPageFaults;
4356 
4357 	MutexLocker locker(sAvailableMemoryLock);
4358 	info->free_memory = sAvailableMemory;
4359 	info->needed_memory = sNeededMemory;
4360 }
4361 
4362 
4363 uint32
4364 vm_num_page_faults(void)
4365 {
4366 	return sPageFaults;
4367 }
4368 
4369 
4370 off_t
4371 vm_available_memory(void)
4372 {
4373 	MutexLocker locker(sAvailableMemoryLock);
4374 	return sAvailableMemory;
4375 }
4376 
4377 
4378 off_t
4379 vm_available_not_needed_memory(void)
4380 {
4381 	MutexLocker locker(sAvailableMemoryLock);
4382 	return sAvailableMemory - sNeededMemory;
4383 }
4384 
4385 
4386 size_t
4387 vm_kernel_address_space_left(void)
4388 {
4389 	return VMAddressSpace::Kernel()->FreeSpace();
4390 }
4391 
4392 
4393 void
4394 vm_unreserve_memory(size_t amount)
4395 {
4396 	mutex_lock(&sAvailableMemoryLock);
4397 
4398 	sAvailableMemory += amount;
4399 
4400 	mutex_unlock(&sAvailableMemoryLock);
4401 }
4402 
4403 
4404 status_t
4405 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4406 {
4407 	size_t reserve = kMemoryReserveForPriority[priority];
4408 
4409 	MutexLocker locker(sAvailableMemoryLock);
4410 
4411 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4412 
4413 	if (sAvailableMemory >= amount + reserve) {
4414 		sAvailableMemory -= amount;
4415 		return B_OK;
4416 	}
4417 
4418 	if (timeout <= 0)
4419 		return B_NO_MEMORY;
4420 
4421 	// turn timeout into an absolute timeout
4422 	timeout += system_time();
4423 
4424 	// loop until we've got the memory or the timeout occurs
4425 	do {
4426 		sNeededMemory += amount;
4427 
4428 		// call the low resource manager
4429 		locker.Unlock();
4430 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4431 			B_ABSOLUTE_TIMEOUT, timeout);
4432 		locker.Lock();
4433 
4434 		sNeededMemory -= amount;
4435 
4436 		if (sAvailableMemory >= amount + reserve) {
4437 			sAvailableMemory -= amount;
4438 			return B_OK;
4439 		}
4440 	} while (timeout > system_time());
4441 
4442 	return B_NO_MEMORY;
4443 }
4444 
4445 
4446 status_t
4447 vm_set_area_memory_type(area_id id, addr_t physicalBase, uint32 type)
4448 {
4449 	AddressSpaceReadLocker locker;
4450 	VMArea* area;
4451 	status_t status = locker.SetFromArea(id, area);
4452 	if (status != B_OK)
4453 		return status;
4454 
4455 	return arch_vm_set_memory_type(area, physicalBase, type);
4456 }
4457 
4458 
4459 /*!	This function enforces some protection properties:
4460 	 - if B_WRITE_AREA is set, B_WRITE_KERNEL_AREA is set as well
4461 	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4462 	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4463 	   and B_KERNEL_WRITE_AREA.
4464 */
4465 static void
4466 fix_protection(uint32* protection)
4467 {
4468 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4469 		if ((*protection & B_USER_PROTECTION) == 0
4470 			|| (*protection & B_WRITE_AREA) != 0)
4471 			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4472 		else
4473 			*protection |= B_KERNEL_READ_AREA;
4474 	}
4475 }
4476 
4477 
4478 static void
4479 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4480 {
4481 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4482 	info->area = area->id;
4483 	info->address = (void*)area->Base();
4484 	info->size = area->Size();
4485 	info->protection = area->protection;
4486 	info->lock = B_FULL_LOCK;
4487 	info->team = area->address_space->ID();
4488 	info->copy_count = 0;
4489 	info->in_count = 0;
4490 	info->out_count = 0;
4491 		// TODO: retrieve real values here!
4492 
4493 	VMCache* cache = vm_area_get_locked_cache(area);
4494 
4495 	// Note, this is a simplification; the cache could be larger than this area
4496 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4497 
4498 	vm_area_put_locked_cache(cache);
4499 }
4500 
4501 
4502 static status_t
4503 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4504 {
4505 	// is newSize a multiple of B_PAGE_SIZE?
4506 	if (newSize & (B_PAGE_SIZE - 1))
4507 		return B_BAD_VALUE;
4508 
4509 	// lock all affected address spaces and the cache
4510 	VMArea* area;
4511 	VMCache* cache;
4512 
4513 	MultiAddressSpaceLocker locker;
4514 	AreaCacheLocker cacheLocker;
4515 
4516 	status_t status;
4517 	size_t oldSize;
4518 	bool anyKernelArea;
4519 	bool restart;
4520 
4521 	do {
4522 		anyKernelArea = false;
4523 		restart = false;
4524 
4525 		locker.Unset();
4526 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4527 		if (status != B_OK)
4528 			return status;
4529 		cacheLocker.SetTo(cache, true);	// already locked
4530 
4531 		// enforce restrictions
4532 		if (!kernel) {
4533 			if ((area->protection & B_KERNEL_AREA) != 0)
4534 				return B_NOT_ALLOWED;
4535 			// TODO: Enforce all restrictions (team, etc.)!
4536 		}
4537 
4538 		oldSize = area->Size();
4539 		if (newSize == oldSize)
4540 			return B_OK;
4541 
4542 		if (cache->type != CACHE_TYPE_RAM)
4543 			return B_NOT_ALLOWED;
4544 
4545 		if (oldSize < newSize) {
4546 			// We need to check if all areas of this cache can be resized.
4547 			for (VMArea* current = cache->areas; current != NULL;
4548 					current = current->cache_next) {
4549 				if (!current->address_space->CanResizeArea(current, newSize))
4550 					return B_ERROR;
4551 				anyKernelArea
4552 					|= current->address_space == VMAddressSpace::Kernel();
4553 			}
4554 		} else {
4555 			// We're shrinking the areas, so we must make sure the affected
4556 			// ranges are not wired.
4557 			for (VMArea* current = cache->areas; current != NULL;
4558 					current = current->cache_next) {
4559 				anyKernelArea
4560 					|= current->address_space == VMAddressSpace::Kernel();
4561 
4562 				if (wait_if_area_range_is_wired(current,
4563 						current->Base() + newSize, oldSize - newSize, &locker,
4564 						&cacheLocker)) {
4565 					restart = true;
4566 					break;
4567 				}
4568 			}
4569 		}
4570 	} while (restart);
4571 
4572 	// Okay, looks good so far, so let's do it
4573 
4574 	int priority = kernel && anyKernelArea
4575 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4576 	uint32 allocationFlags = kernel && anyKernelArea
4577 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4578 
4579 	if (oldSize < newSize) {
4580 		// Growing the cache can fail, so we do it first.
4581 		status = cache->Resize(cache->virtual_base + newSize, priority);
4582 		if (status != B_OK)
4583 			return status;
4584 	}
4585 
4586 	for (VMArea* current = cache->areas; current != NULL;
4587 			current = current->cache_next) {
4588 		status = current->address_space->ResizeArea(current, newSize,
4589 			allocationFlags);
4590 		if (status != B_OK)
4591 			break;
4592 
4593 		// We also need to unmap all pages beyond the new size, if the area has
4594 		// shrunk
4595 		if (newSize < oldSize) {
4596 			VMCacheChainLocker cacheChainLocker(cache);
4597 			cacheChainLocker.LockAllSourceCaches();
4598 
4599 			unmap_pages(current, current->Base() + newSize,
4600 				oldSize - newSize);
4601 
4602 			cacheChainLocker.Unlock(cache);
4603 		}
4604 	}
4605 
4606 	// shrinking the cache can't fail, so we do it now
4607 	if (status == B_OK && newSize < oldSize)
4608 		status = cache->Resize(cache->virtual_base + newSize, priority);
4609 
4610 	if (status != B_OK) {
4611 		// Something failed -- resize the areas back to their original size.
4612 		// This can fail, too, in which case we're seriously screwed.
4613 		for (VMArea* current = cache->areas; current != NULL;
4614 				current = current->cache_next) {
4615 			if (current->address_space->ResizeArea(current, oldSize,
4616 					allocationFlags) != B_OK) {
4617 				panic("vm_resize_area(): Failed and not being able to restore "
4618 					"original state.");
4619 			}
4620 		}
4621 
4622 		cache->Resize(cache->virtual_base + oldSize, priority);
4623 	}
4624 
4625 	// TODO: we must honour the lock restrictions of this area
4626 	return status;
4627 }
4628 
4629 
4630 status_t
4631 vm_memset_physical(addr_t address, int value, size_t length)
4632 {
4633 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4634 }
4635 
4636 
4637 status_t
4638 vm_memcpy_from_physical(void* to, addr_t from, size_t length, bool user)
4639 {
4640 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
4641 }
4642 
4643 
4644 status_t
4645 vm_memcpy_to_physical(addr_t to, const void* _from, size_t length, bool user)
4646 {
4647 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
4648 }
4649 
4650 
4651 void
4652 vm_memcpy_physical_page(addr_t to, addr_t from)
4653 {
4654 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
4655 }
4656 
4657 
4658 //	#pragma mark - kernel public API
4659 
4660 
4661 status_t
4662 user_memcpy(void* to, const void* from, size_t size)
4663 {
4664 	// don't allow address overflows
4665 	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
4666 		return B_BAD_ADDRESS;
4667 
4668 	if (arch_cpu_user_memcpy(to, from, size,
4669 			&thread_get_current_thread()->fault_handler) < B_OK)
4670 		return B_BAD_ADDRESS;
4671 
4672 	return B_OK;
4673 }
4674 
4675 
4676 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
4677 	the string in \a to, NULL-terminating the result.
4678 
4679 	\param to Pointer to the destination C-string.
4680 	\param from Pointer to the source C-string.
4681 	\param size Size in bytes of the string buffer pointed to by \a to.
4682 
4683 	\return strlen(\a from).
4684 */
4685 ssize_t
4686 user_strlcpy(char* to, const char* from, size_t size)
4687 {
4688 	if (to == NULL && size != 0)
4689 		return B_BAD_VALUE;
4690 	if (from == NULL)
4691 		return B_BAD_ADDRESS;
4692 
4693 	// limit size to avoid address overflows
4694 	size_t maxSize = std::min(size,
4695 		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
4696 		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
4697 		// the source address might still overflow.
4698 
4699 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize,
4700 		&thread_get_current_thread()->fault_handler);
4701 
4702 	// If we hit the address overflow boundary, fail.
4703 	if (result >= 0 && (size_t)result >= maxSize && maxSize < size)
4704 		return B_BAD_ADDRESS;
4705 
4706 	return result;
4707 }
4708 
4709 
4710 status_t
4711 user_memset(void* s, char c, size_t count)
4712 {
4713 	// don't allow address overflows
4714 	if ((addr_t)s + count < (addr_t)s)
4715 		return B_BAD_ADDRESS;
4716 
4717 	if (arch_cpu_user_memset(s, c, count,
4718 			&thread_get_current_thread()->fault_handler) < B_OK)
4719 		return B_BAD_ADDRESS;
4720 
4721 	return B_OK;
4722 }
4723 
4724 
4725 /*!	Wires down the given address range in the specified team's address space.
4726 
4727 	If successful the function
4728 	- acquires a reference to the specified team's address space,
4729 	- adds respective wired ranges to all areas that intersect with the given
4730 	  address range,
4731 	- makes sure all pages in the given address range are mapped with the
4732 	  requested access permissions and increments their wired count.
4733 
4734 	It fails, when \a team doesn't specify a valid address space, when any part
4735 	of the specified address range is not covered by areas, when the concerned
4736 	areas don't allow mapping with the requested permissions, or when mapping
4737 	failed for another reason.
4738 
4739 	When successful the call must be balanced by a unlock_memory_etc() call with
4740 	the exact same parameters.
4741 
4742 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
4743 		supported.
4744 	\param address The start of the address range to be wired.
4745 	\param numBytes The size of the address range to be wired.
4746 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
4747 		requests that the range must be wired writable ("read from device
4748 		into memory").
4749 	\return \c B_OK on success, another error code otherwise.
4750 */
4751 status_t
4752 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
4753 {
4754 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
4755 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
4756 
4757 	// compute the page protection that is required
4758 	bool isUser = IS_USER_ADDRESS(address);
4759 	bool writable = (flags & B_READ_DEVICE) == 0;
4760 	uint32 requiredProtection = PAGE_PRESENT
4761 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
4762 	if (writable)
4763 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
4764 
4765 	uint32 mallocFlags = isUser
4766 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
4767 
4768 	// get and read lock the address space
4769 	VMAddressSpace* addressSpace = NULL;
4770 	if (isUser) {
4771 		if (team == B_CURRENT_TEAM)
4772 			addressSpace = VMAddressSpace::GetCurrent();
4773 		else
4774 			addressSpace = VMAddressSpace::Get(team);
4775 	} else
4776 		addressSpace = VMAddressSpace::GetKernel();
4777 	if (addressSpace == NULL)
4778 		return B_ERROR;
4779 
4780 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
4781 
4782 	VMTranslationMap* map = addressSpace->TranslationMap();
4783 	status_t error = B_OK;
4784 
4785 	// iterate through all concerned areas
4786 	addr_t nextAddress = lockBaseAddress;
4787 	while (nextAddress != lockEndAddress) {
4788 		// get the next area
4789 		VMArea* area = addressSpace->LookupArea(nextAddress);
4790 		if (area == NULL) {
4791 			error = B_BAD_ADDRESS;
4792 			break;
4793 		}
4794 
4795 		addr_t areaStart = nextAddress;
4796 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
4797 
4798 		// allocate the wired range (do that before locking the cache to avoid
4799 		// deadlocks)
4800 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
4801 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
4802 		if (range == NULL) {
4803 			error = B_NO_MEMORY;
4804 			break;
4805 		}
4806 
4807 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
4808 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
4809 
4810 		// mark the area range wired
4811 		area->Wire(range);
4812 
4813 		// Depending on the area cache type and the wiring, we may not need to
4814 		// look at the individual pages.
4815 		if (area->cache_type == CACHE_TYPE_NULL
4816 			|| area->cache_type == CACHE_TYPE_DEVICE
4817 			|| area->wiring == B_FULL_LOCK
4818 			|| area->wiring == B_CONTIGUOUS) {
4819 			nextAddress = areaEnd;
4820 			continue;
4821 		}
4822 
4823 		// Lock the area's cache chain and the translation map. Needed to look
4824 		// up pages and play with their wired count.
4825 		cacheChainLocker.LockAllSourceCaches();
4826 		map->Lock();
4827 
4828 		// iterate through the pages and wire them
4829 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
4830 			addr_t physicalAddress;
4831 			uint32 flags;
4832 
4833 			vm_page* page;
4834 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
4835 				&& (flags & requiredProtection) == requiredProtection
4836 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4837 					!= NULL) {
4838 				// Already mapped with the correct permissions -- just increment
4839 				// the page's wired count.
4840 				increment_page_wired_count(page);
4841 			} else {
4842 				// Let vm_soft_fault() map the page for us, if possible. We need
4843 				// to fully unlock to avoid deadlocks. Since we have already
4844 				// wired the area itself, nothing disturbing will happen with it
4845 				// in the meantime.
4846 				map->Unlock();
4847 				cacheChainLocker.Unlock();
4848 				addressSpaceLocker.Unlock();
4849 
4850 				error = vm_soft_fault(addressSpace, nextAddress, writable,
4851 					isUser, &page, range);
4852 
4853 				addressSpaceLocker.Lock();
4854 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
4855 				cacheChainLocker.LockAllSourceCaches();
4856 				map->Lock();
4857 			}
4858 
4859 			if (error != B_OK)
4860 				break;
4861 		}
4862 
4863 		map->Unlock();
4864 
4865 		if (error == B_OK) {
4866 			cacheChainLocker.Unlock();
4867 		} else {
4868 			// An error occurred, so abort right here. If the current address
4869 			// is the first in this area, unwire the area, since we won't get
4870 			// to it when reverting what we've done so far.
4871 			if (nextAddress == areaStart) {
4872 				area->Unwire(range);
4873 				cacheChainLocker.Unlock();
4874 				range->~VMAreaWiredRange();
4875 				free_etc(range, mallocFlags);
4876 			} else
4877 				cacheChainLocker.Unlock();
4878 
4879 			break;
4880 		}
4881 	}
4882 
4883 	if (error != B_OK) {
4884 		// An error occurred, so unwire all that we've already wired. Note that
4885 		// even if not a single page was wired, unlock_memory_etc() is called
4886 		// to put the address space reference.
4887 		addressSpaceLocker.Unlock();
4888 		unlock_memory_etc(team, (void*)address, nextAddress - lockBaseAddress,
4889 			flags);
4890 	}
4891 
4892 	return error;
4893 }
4894 
4895 
4896 status_t
4897 lock_memory(void* address, size_t numBytes, uint32 flags)
4898 {
4899 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
4900 }
4901 
4902 
4903 /*!	Unwires an address range previously wired with lock_memory_etc().
4904 
4905 	Note that a call to this function must balance a previous lock_memory_etc()
4906 	call with exactly the same parameters.
4907 */
4908 status_t
4909 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
4910 {
4911 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
4912 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
4913 
4914 	// compute the page protection that is required
4915 	bool isUser = IS_USER_ADDRESS(address);
4916 	bool writable = (flags & B_READ_DEVICE) == 0;
4917 	uint32 requiredProtection = PAGE_PRESENT
4918 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
4919 	if (writable)
4920 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
4921 
4922 	uint32 mallocFlags = isUser
4923 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
4924 
4925 	// get and read lock the address space
4926 	VMAddressSpace* addressSpace = NULL;
4927 	if (isUser) {
4928 		if (team == B_CURRENT_TEAM)
4929 			addressSpace = VMAddressSpace::GetCurrent();
4930 		else
4931 			addressSpace = VMAddressSpace::Get(team);
4932 	} else
4933 		addressSpace = VMAddressSpace::GetKernel();
4934 	if (addressSpace == NULL)
4935 		return B_ERROR;
4936 
4937 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
4938 
4939 	VMTranslationMap* map = addressSpace->TranslationMap();
4940 	status_t error = B_OK;
4941 
4942 	// iterate through all concerned areas
4943 	addr_t nextAddress = lockBaseAddress;
4944 	while (nextAddress != lockEndAddress) {
4945 		// get the next area
4946 		VMArea* area = addressSpace->LookupArea(nextAddress);
4947 		if (area == NULL) {
4948 			error = B_BAD_ADDRESS;
4949 			break;
4950 		}
4951 
4952 		addr_t areaStart = nextAddress;
4953 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
4954 
4955 		// Lock the area's top cache. This is a requirement for
4956 		// VMArea::Unwire().
4957 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
4958 
4959 		// Depending on the area cache type and the wiring, we may not need to
4960 		// look at the individual pages.
4961 		if (area->cache_type == CACHE_TYPE_NULL
4962 			|| area->cache_type == CACHE_TYPE_DEVICE
4963 			|| area->wiring == B_FULL_LOCK
4964 			|| area->wiring == B_CONTIGUOUS) {
4965 			// unwire the range (to avoid deadlocks we delete the range after
4966 			// unlocking the cache)
4967 			nextAddress = areaEnd;
4968 			VMAreaWiredRange* range = area->Unwire(areaStart,
4969 				areaEnd - areaStart, writable);
4970 			cacheChainLocker.Unlock();
4971 			if (range != NULL) {
4972 				range->~VMAreaWiredRange();
4973 				free_etc(range, mallocFlags);
4974 			}
4975 			continue;
4976 		}
4977 
4978 		// Lock the area's cache chain and the translation map. Needed to look
4979 		// up pages and play with their wired count.
4980 		cacheChainLocker.LockAllSourceCaches();
4981 		map->Lock();
4982 
4983 		// iterate through the pages and unwire them
4984 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
4985 			addr_t physicalAddress;
4986 			uint32 flags;
4987 
4988 			vm_page* page;
4989 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
4990 				&& (flags & PAGE_PRESENT) != 0
4991 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4992 					!= NULL) {
4993 				// Already mapped with the correct permissions -- just increment
4994 				// the page's wired count.
4995 				decrement_page_wired_count(page);
4996 			} else {
4997 				panic("unlock_memory_etc(): Failed to unwire page: address "
4998 					"space %p, address: %#" B_PRIxADDR, addressSpace,
4999 					nextAddress);
5000 				error = B_BAD_VALUE;
5001 				break;
5002 			}
5003 		}
5004 
5005 		map->Unlock();
5006 
5007 		// All pages are unwired. Remove the area's wired range as well (to
5008 		// avoid deadlocks we delete the range after unlocking the cache).
5009 		VMAreaWiredRange* range = area->Unwire(areaStart,
5010 			areaEnd - areaStart, writable);
5011 
5012 		cacheChainLocker.Unlock();
5013 
5014 		if (range != NULL) {
5015 			range->~VMAreaWiredRange();
5016 			free_etc(range, mallocFlags);
5017 		}
5018 
5019 		if (error != B_OK)
5020 			break;
5021 	}
5022 
5023 	// get rid of the address space reference
5024 	addressSpace->Put();
5025 
5026 	return error;
5027 }
5028 
5029 
5030 status_t
5031 unlock_memory(void* address, size_t numBytes, uint32 flags)
5032 {
5033 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5034 }
5035 
5036 
5037 /*!	Similar to get_memory_map(), but also allows to specify the address space
5038 	for the memory in question and has a saner semantics.
5039 	Returns \c B_OK when the complete range could be translated or
5040 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5041 	case the actual number of entries is written to \c *_numEntries. Any other
5042 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5043 	in this case.
5044 */
5045 status_t
5046 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5047 	physical_entry* table, uint32* _numEntries)
5048 {
5049 	uint32 numEntries = *_numEntries;
5050 	*_numEntries = 0;
5051 
5052 	VMAddressSpace* addressSpace;
5053 	addr_t virtualAddress = (addr_t)address;
5054 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5055 	addr_t physicalAddress;
5056 	status_t status = B_OK;
5057 	int32 index = -1;
5058 	addr_t offset = 0;
5059 	bool interrupts = are_interrupts_enabled();
5060 
5061 	TRACE(("get_memory_map_etc(%ld, %p, %lu bytes, %ld entries)\n", team,
5062 		address, numBytes, numEntries));
5063 
5064 	if (numEntries == 0 || numBytes == 0)
5065 		return B_BAD_VALUE;
5066 
5067 	// in which address space is the address to be found?
5068 	if (IS_USER_ADDRESS(virtualAddress)) {
5069 		if (team == B_CURRENT_TEAM)
5070 			addressSpace = VMAddressSpace::GetCurrent();
5071 		else
5072 			addressSpace = VMAddressSpace::Get(team);
5073 	} else
5074 		addressSpace = VMAddressSpace::GetKernel();
5075 
5076 	if (addressSpace == NULL)
5077 		return B_ERROR;
5078 
5079 	VMTranslationMap* map = addressSpace->TranslationMap();
5080 
5081 	if (interrupts)
5082 		map->Lock();
5083 
5084 	while (offset < numBytes) {
5085 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5086 		uint32 flags;
5087 
5088 		if (interrupts) {
5089 			status = map->Query((addr_t)address + offset, &physicalAddress,
5090 				&flags);
5091 		} else {
5092 			status = map->QueryInterrupt((addr_t)address + offset,
5093 				&physicalAddress, &flags);
5094 		}
5095 		if (status < B_OK)
5096 			break;
5097 		if ((flags & PAGE_PRESENT) == 0) {
5098 			panic("get_memory_map() called on unmapped memory!");
5099 			return B_BAD_ADDRESS;
5100 		}
5101 
5102 		if (index < 0 && pageOffset > 0) {
5103 			physicalAddress += pageOffset;
5104 			if (bytes > B_PAGE_SIZE - pageOffset)
5105 				bytes = B_PAGE_SIZE - pageOffset;
5106 		}
5107 
5108 		// need to switch to the next physical_entry?
5109 		if (index < 0 || (addr_t)table[index].address
5110 				!= physicalAddress - table[index].size) {
5111 			if ((uint32)++index + 1 > numEntries) {
5112 				// table to small
5113 				status = B_BUFFER_OVERFLOW;
5114 				break;
5115 			}
5116 			table[index].address = (void*)physicalAddress;
5117 			table[index].size = bytes;
5118 		} else {
5119 			// page does fit in current entry
5120 			table[index].size += bytes;
5121 		}
5122 
5123 		offset += bytes;
5124 	}
5125 
5126 	if (interrupts)
5127 		map->Unlock();
5128 
5129 	if (status != B_OK)
5130 		return status;
5131 
5132 	if ((uint32)index + 1 > numEntries) {
5133 		*_numEntries = index;
5134 		return B_BUFFER_OVERFLOW;
5135 	}
5136 
5137 	*_numEntries = index + 1;
5138 	return B_OK;
5139 }
5140 
5141 
5142 /*!	According to the BeBook, this function should always succeed.
5143 	This is no longer the case.
5144 */
5145 long
5146 get_memory_map(const void* address, ulong numBytes, physical_entry* table,
5147 	long numEntries)
5148 {
5149 	uint32 entriesRead = numEntries;
5150 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5151 		table, &entriesRead);
5152 	if (error != B_OK)
5153 		return error;
5154 
5155 	// close the entry list
5156 
5157 	// if it's only one entry, we will silently accept the missing ending
5158 	if (numEntries == 1)
5159 		return B_OK;
5160 
5161 	if (entriesRead + 1 > (uint32)numEntries)
5162 		return B_BUFFER_OVERFLOW;
5163 
5164 	table[entriesRead].address = NULL;
5165 	table[entriesRead].size = 0;
5166 
5167 	return B_OK;
5168 }
5169 
5170 
5171 area_id
5172 area_for(void* address)
5173 {
5174 	return vm_area_for((addr_t)address, true);
5175 }
5176 
5177 
5178 area_id
5179 find_area(const char* name)
5180 {
5181 	return VMAreaHash::Find(name);
5182 }
5183 
5184 
5185 status_t
5186 _get_area_info(area_id id, area_info* info, size_t size)
5187 {
5188 	if (size != sizeof(area_info) || info == NULL)
5189 		return B_BAD_VALUE;
5190 
5191 	AddressSpaceReadLocker locker;
5192 	VMArea* area;
5193 	status_t status = locker.SetFromArea(id, area);
5194 	if (status != B_OK)
5195 		return status;
5196 
5197 	fill_area_info(area, info, size);
5198 	return B_OK;
5199 }
5200 
5201 
5202 status_t
5203 _get_next_area_info(team_id team, int32* cookie, area_info* info, size_t size)
5204 {
5205 	addr_t nextBase = *(addr_t*)cookie;
5206 
5207 	// we're already through the list
5208 	if (nextBase == (addr_t)-1)
5209 		return B_ENTRY_NOT_FOUND;
5210 
5211 	if (team == B_CURRENT_TEAM)
5212 		team = team_get_current_team_id();
5213 
5214 	AddressSpaceReadLocker locker(team);
5215 	if (!locker.IsLocked())
5216 		return B_BAD_TEAM_ID;
5217 
5218 	VMArea* area;
5219 	for (VMAddressSpace::AreaIterator it
5220 				= locker.AddressSpace()->GetAreaIterator();
5221 			(area = it.Next()) != NULL;) {
5222 		if (area->Base() > nextBase)
5223 			break;
5224 	}
5225 
5226 	if (area == NULL) {
5227 		nextBase = (addr_t)-1;
5228 		return B_ENTRY_NOT_FOUND;
5229 	}
5230 
5231 	fill_area_info(area, info, size);
5232 	*cookie = (int32)(area->Base());
5233 		// TODO: Not 64 bit safe!
5234 
5235 	return B_OK;
5236 }
5237 
5238 
5239 status_t
5240 set_area_protection(area_id area, uint32 newProtection)
5241 {
5242 	fix_protection(&newProtection);
5243 
5244 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5245 		newProtection, true);
5246 }
5247 
5248 
5249 status_t
5250 resize_area(area_id areaID, size_t newSize)
5251 {
5252 	return vm_resize_area(areaID, newSize, true);
5253 }
5254 
5255 
5256 /*!	Transfers the specified area to a new team. The caller must be the owner
5257 	of the area.
5258 */
5259 area_id
5260 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5261 	bool kernel)
5262 {
5263 	area_info info;
5264 	status_t status = get_area_info(id, &info);
5265 	if (status != B_OK)
5266 		return status;
5267 
5268 	if (info.team != thread_get_current_thread()->team->id)
5269 		return B_PERMISSION_DENIED;
5270 
5271 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5272 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5273 	if (clonedArea < 0)
5274 		return clonedArea;
5275 
5276 	status = vm_delete_area(info.team, id, kernel);
5277 	if (status != B_OK) {
5278 		vm_delete_area(target, clonedArea, kernel);
5279 		return status;
5280 	}
5281 
5282 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5283 
5284 	return clonedArea;
5285 }
5286 
5287 
5288 area_id
5289 map_physical_memory(const char* name, void* physicalAddress, size_t numBytes,
5290 	uint32 addressSpec, uint32 protection, void** _virtualAddress)
5291 {
5292 	if (!arch_vm_supports_protection(protection))
5293 		return B_NOT_SUPPORTED;
5294 
5295 	fix_protection(&protection);
5296 
5297 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5298 		_virtualAddress, addressSpec, numBytes, protection,
5299 		(addr_t)physicalAddress, false);
5300 }
5301 
5302 
5303 area_id
5304 clone_area(const char* name, void** _address, uint32 addressSpec,
5305 	uint32 protection, area_id source)
5306 {
5307 	if ((protection & B_KERNEL_PROTECTION) == 0)
5308 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5309 
5310 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5311 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5312 }
5313 
5314 
5315 area_id
5316 create_area_etc(team_id team, const char* name, void** address,
5317 	uint32 addressSpec, uint32 size, uint32 lock, uint32 protection,
5318 	addr_t physicalAddress, uint32 flags)
5319 {
5320 	fix_protection(&protection);
5321 
5322 	return vm_create_anonymous_area(team, (char*)name, address, addressSpec,
5323 		size, lock, protection, physicalAddress, flags, true);
5324 }
5325 
5326 
5327 area_id
5328 create_area(const char* name, void** _address, uint32 addressSpec, size_t size,
5329 	uint32 lock, uint32 protection)
5330 {
5331 	fix_protection(&protection);
5332 
5333 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), (char*)name,
5334 		_address, addressSpec, size, lock, protection, 0, 0, true);
5335 }
5336 
5337 
5338 status_t
5339 delete_area(area_id area)
5340 {
5341 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5342 }
5343 
5344 
5345 //	#pragma mark - Userland syscalls
5346 
5347 
5348 status_t
5349 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5350 	addr_t size)
5351 {
5352 	// filter out some unavailable values (for userland)
5353 	switch (addressSpec) {
5354 		case B_ANY_KERNEL_ADDRESS:
5355 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5356 			return B_BAD_VALUE;
5357 	}
5358 
5359 	addr_t address;
5360 
5361 	if (!IS_USER_ADDRESS(userAddress)
5362 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5363 		return B_BAD_ADDRESS;
5364 
5365 	status_t status = vm_reserve_address_range(
5366 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5367 		RESERVED_AVOID_BASE);
5368 	if (status != B_OK)
5369 		return status;
5370 
5371 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5372 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5373 			(void*)address, size);
5374 		return B_BAD_ADDRESS;
5375 	}
5376 
5377 	return B_OK;
5378 }
5379 
5380 
5381 status_t
5382 _user_unreserve_address_range(addr_t address, addr_t size)
5383 {
5384 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5385 		(void*)address, size);
5386 }
5387 
5388 
5389 area_id
5390 _user_area_for(void* address)
5391 {
5392 	return vm_area_for((addr_t)address, false);
5393 }
5394 
5395 
5396 area_id
5397 _user_find_area(const char* userName)
5398 {
5399 	char name[B_OS_NAME_LENGTH];
5400 
5401 	if (!IS_USER_ADDRESS(userName)
5402 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5403 		return B_BAD_ADDRESS;
5404 
5405 	return find_area(name);
5406 }
5407 
5408 
5409 status_t
5410 _user_get_area_info(area_id area, area_info* userInfo)
5411 {
5412 	if (!IS_USER_ADDRESS(userInfo))
5413 		return B_BAD_ADDRESS;
5414 
5415 	area_info info;
5416 	status_t status = get_area_info(area, &info);
5417 	if (status < B_OK)
5418 		return status;
5419 
5420 	// TODO: do we want to prevent userland from seeing kernel protections?
5421 	//info.protection &= B_USER_PROTECTION;
5422 
5423 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5424 		return B_BAD_ADDRESS;
5425 
5426 	return status;
5427 }
5428 
5429 
5430 status_t
5431 _user_get_next_area_info(team_id team, int32* userCookie, area_info* userInfo)
5432 {
5433 	int32 cookie;
5434 
5435 	if (!IS_USER_ADDRESS(userCookie)
5436 		|| !IS_USER_ADDRESS(userInfo)
5437 		|| user_memcpy(&cookie, userCookie, sizeof(int32)) < B_OK)
5438 		return B_BAD_ADDRESS;
5439 
5440 	area_info info;
5441 	status_t status = _get_next_area_info(team, &cookie, &info,
5442 		sizeof(area_info));
5443 	if (status != B_OK)
5444 		return status;
5445 
5446 	//info.protection &= B_USER_PROTECTION;
5447 
5448 	if (user_memcpy(userCookie, &cookie, sizeof(int32)) < B_OK
5449 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5450 		return B_BAD_ADDRESS;
5451 
5452 	return status;
5453 }
5454 
5455 
5456 status_t
5457 _user_set_area_protection(area_id area, uint32 newProtection)
5458 {
5459 	if ((newProtection & ~B_USER_PROTECTION) != 0)
5460 		return B_BAD_VALUE;
5461 
5462 	fix_protection(&newProtection);
5463 
5464 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
5465 		newProtection, false);
5466 }
5467 
5468 
5469 status_t
5470 _user_resize_area(area_id area, size_t newSize)
5471 {
5472 	// TODO: Since we restrict deleting of areas to those owned by the team,
5473 	// we should also do that for resizing (check other functions, too).
5474 	return vm_resize_area(area, newSize, false);
5475 }
5476 
5477 
5478 area_id
5479 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
5480 	team_id target)
5481 {
5482 	// filter out some unavailable values (for userland)
5483 	switch (addressSpec) {
5484 		case B_ANY_KERNEL_ADDRESS:
5485 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5486 			return B_BAD_VALUE;
5487 	}
5488 
5489 	void* address;
5490 	if (!IS_USER_ADDRESS(userAddress)
5491 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5492 		return B_BAD_ADDRESS;
5493 
5494 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
5495 	if (newArea < B_OK)
5496 		return newArea;
5497 
5498 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5499 		return B_BAD_ADDRESS;
5500 
5501 	return newArea;
5502 }
5503 
5504 
5505 area_id
5506 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
5507 	uint32 protection, area_id sourceArea)
5508 {
5509 	char name[B_OS_NAME_LENGTH];
5510 	void* address;
5511 
5512 	// filter out some unavailable values (for userland)
5513 	switch (addressSpec) {
5514 		case B_ANY_KERNEL_ADDRESS:
5515 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5516 			return B_BAD_VALUE;
5517 	}
5518 	if ((protection & ~B_USER_PROTECTION) != 0)
5519 		return B_BAD_VALUE;
5520 
5521 	if (!IS_USER_ADDRESS(userName)
5522 		|| !IS_USER_ADDRESS(userAddress)
5523 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5524 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5525 		return B_BAD_ADDRESS;
5526 
5527 	fix_protection(&protection);
5528 
5529 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
5530 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
5531 		false);
5532 	if (clonedArea < B_OK)
5533 		return clonedArea;
5534 
5535 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5536 		delete_area(clonedArea);
5537 		return B_BAD_ADDRESS;
5538 	}
5539 
5540 	return clonedArea;
5541 }
5542 
5543 
5544 area_id
5545 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
5546 	size_t size, uint32 lock, uint32 protection)
5547 {
5548 	char name[B_OS_NAME_LENGTH];
5549 	void* address;
5550 
5551 	// filter out some unavailable values (for userland)
5552 	switch (addressSpec) {
5553 		case B_ANY_KERNEL_ADDRESS:
5554 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5555 			return B_BAD_VALUE;
5556 	}
5557 	if ((protection & ~B_USER_PROTECTION) != 0)
5558 		return B_BAD_VALUE;
5559 
5560 	if (!IS_USER_ADDRESS(userName)
5561 		|| !IS_USER_ADDRESS(userAddress)
5562 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
5563 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5564 		return B_BAD_ADDRESS;
5565 
5566 	if (addressSpec == B_EXACT_ADDRESS
5567 		&& IS_KERNEL_ADDRESS(address))
5568 		return B_BAD_VALUE;
5569 
5570 	fix_protection(&protection);
5571 
5572 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(),
5573 		(char*)name, &address, addressSpec, size, lock, protection, 0, 0,
5574 		false);
5575 
5576 	if (area >= B_OK
5577 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
5578 		delete_area(area);
5579 		return B_BAD_ADDRESS;
5580 	}
5581 
5582 	return area;
5583 }
5584 
5585 
5586 status_t
5587 _user_delete_area(area_id area)
5588 {
5589 	// Unlike the BeOS implementation, you can now only delete areas
5590 	// that you have created yourself from userland.
5591 	// The documentation to delete_area() explicitly states that this
5592 	// will be restricted in the future, and so it will.
5593 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
5594 }
5595 
5596 
5597 // TODO: create a BeOS style call for this!
5598 
5599 area_id
5600 _user_map_file(const char* userName, void** userAddress, int addressSpec,
5601 	size_t size, int protection, int mapping, bool unmapAddressRange, int fd,
5602 	off_t offset)
5603 {
5604 	char name[B_OS_NAME_LENGTH];
5605 	void* address;
5606 	area_id area;
5607 
5608 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
5609 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
5610 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
5611 		return B_BAD_ADDRESS;
5612 
5613 	if (addressSpec == B_EXACT_ADDRESS) {
5614 		if ((addr_t)address + size < (addr_t)address
5615 				|| (addr_t)address % B_PAGE_SIZE != 0) {
5616 			return B_BAD_VALUE;
5617 		}
5618 		if (!IS_USER_ADDRESS(address)
5619 				|| !IS_USER_ADDRESS((addr_t)address + size)) {
5620 			return B_BAD_ADDRESS;
5621 		}
5622 	}
5623 
5624 	// userland created areas can always be accessed by the kernel
5625 	protection |= B_KERNEL_READ_AREA
5626 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
5627 
5628 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
5629 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
5630 		false);
5631 	if (area < B_OK)
5632 		return area;
5633 
5634 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
5635 		return B_BAD_ADDRESS;
5636 
5637 	return area;
5638 }
5639 
5640 
5641 status_t
5642 _user_unmap_memory(void* _address, size_t size)
5643 {
5644 	addr_t address = (addr_t)_address;
5645 
5646 	// check params
5647 	if (size == 0 || (addr_t)address + size < (addr_t)address
5648 			|| (addr_t)address % B_PAGE_SIZE != 0) {
5649 		return B_BAD_VALUE;
5650 	}
5651 
5652 	if (!IS_USER_ADDRESS(address) || !IS_USER_ADDRESS((addr_t)address + size))
5653 		return B_BAD_ADDRESS;
5654 
5655 	// Write lock the address space and ensure the address range is not wired.
5656 	AddressSpaceWriteLocker locker;
5657 	do {
5658 		status_t status = locker.SetTo(team_get_current_team_id());
5659 		if (status != B_OK)
5660 			return status;
5661 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
5662 			size, &locker));
5663 
5664 	// unmap
5665 	return unmap_address_range(locker.AddressSpace(), address, size, false);
5666 }
5667 
5668 
5669 status_t
5670 _user_set_memory_protection(void* _address, size_t size, int protection)
5671 {
5672 	// check address range
5673 	addr_t address = (addr_t)_address;
5674 	size = PAGE_ALIGN(size);
5675 
5676 	if ((address % B_PAGE_SIZE) != 0)
5677 		return B_BAD_VALUE;
5678 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
5679 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
5680 		// weird error code required by POSIX
5681 		return ENOMEM;
5682 	}
5683 
5684 	// extend and check protection
5685 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
5686 	uint32 actualProtection = protection | B_KERNEL_READ_AREA
5687 		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
5688 
5689 	if (!arch_vm_supports_protection(actualProtection))
5690 		return B_NOT_SUPPORTED;
5691 
5692 	// We need to write lock the address space, since we're going to play with
5693 	// the areas. Also make sure that none of the areas is wired and that we're
5694 	// actually allowed to change the protection.
5695 	AddressSpaceWriteLocker locker;
5696 
5697 	bool restart;
5698 	do {
5699 		restart = false;
5700 
5701 		status_t status = locker.SetTo(team_get_current_team_id());
5702 		if (status != B_OK)
5703 			return status;
5704 
5705 		// First round: Check whether the whole range is covered by areas and we
5706 		// are allowed to modify them.
5707 		addr_t currentAddress = address;
5708 		size_t sizeLeft = size;
5709 		while (sizeLeft > 0) {
5710 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
5711 			if (area == NULL)
5712 				return B_NO_MEMORY;
5713 
5714 			if ((area->protection & B_KERNEL_AREA) != 0)
5715 				return B_NOT_ALLOWED;
5716 
5717 			AreaCacheLocker cacheLocker(area);
5718 
5719 			if (wait_if_area_is_wired(area, &locker, &cacheLocker)) {
5720 				restart = true;
5721 				break;
5722 			}
5723 
5724 			cacheLocker.Unlock();
5725 
5726 			// TODO: For (shared) mapped files we should check whether the new
5727 			// protections are compatible with the file permissions. We don't
5728 			// have a way to do that yet, though.
5729 
5730 			addr_t offset = currentAddress - area->Base();
5731 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
5732 
5733 			currentAddress += rangeSize;
5734 			sizeLeft -= rangeSize;
5735 		}
5736 	} while (restart);
5737 
5738 	// Second round: If the protections differ from that of the area, create a
5739 	// page protection array and re-map mapped pages.
5740 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
5741 	addr_t currentAddress = address;
5742 	size_t sizeLeft = size;
5743 	while (sizeLeft > 0) {
5744 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
5745 		if (area == NULL)
5746 			return B_NO_MEMORY;
5747 
5748 		addr_t offset = currentAddress - area->Base();
5749 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
5750 
5751 		currentAddress += rangeSize;
5752 		sizeLeft -= rangeSize;
5753 
5754 		if (area->page_protections == NULL) {
5755 			if (area->protection == actualProtection)
5756 				continue;
5757 
5758 			// In the page protections we store only the three user protections,
5759 			// so we use 4 bits per page.
5760 			uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
5761 			area->page_protections = (uint8*)malloc(bytes);
5762 			if (area->page_protections == NULL)
5763 				return B_NO_MEMORY;
5764 
5765 			// init the page protections for all pages to that of the area
5766 			uint32 areaProtection = area->protection
5767 				& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5768 			memset(area->page_protections,
5769 				areaProtection | (areaProtection << 4), bytes);
5770 		}
5771 
5772 		// We need to lock the complete cache chain, since we potentially unmap
5773 		// pages of lower caches.
5774 		VMCache* topCache = vm_area_get_locked_cache(area);
5775 		VMCacheChainLocker cacheChainLocker(topCache);
5776 		cacheChainLocker.LockAllSourceCaches();
5777 
5778 		for (addr_t pageAddress = area->Base() + offset;
5779 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
5780 			map->Lock();
5781 
5782 			set_area_page_protection(area, pageAddress, protection);
5783 
5784 			addr_t physicalAddress;
5785 			uint32 flags;
5786 
5787 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
5788 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
5789 				map->Unlock();
5790 				continue;
5791 			}
5792 
5793 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
5794 			if (page == NULL) {
5795 				panic("area %p looking up page failed for pa 0x%lx\n", area,
5796 					physicalAddress);
5797 				map->Unlock();
5798 				return B_ERROR;
5799 			}
5800 
5801 			// If the page is not in the topmost cache and write access is
5802 			// requested, we have to unmap it. Otherwise we can re-map it with
5803 			// the new protection.
5804 			bool unmapPage = page->Cache() != topCache
5805 				&& (protection & B_WRITE_AREA) != 0;
5806 
5807 			if (!unmapPage)
5808 				map->ProtectPage(area, pageAddress, actualProtection);
5809 
5810 			map->Unlock();
5811 
5812 			if (unmapPage) {
5813 				DEBUG_PAGE_ACCESS_START(page);
5814 				unmap_page(area, pageAddress);
5815 				DEBUG_PAGE_ACCESS_END(page);
5816 			}
5817 		}
5818 	}
5819 
5820 	return B_OK;
5821 }
5822 
5823 
5824 status_t
5825 _user_sync_memory(void* _address, size_t size, int flags)
5826 {
5827 	addr_t address = (addr_t)_address;
5828 	size = PAGE_ALIGN(size);
5829 
5830 	// check params
5831 	if ((address % B_PAGE_SIZE) != 0)
5832 		return B_BAD_VALUE;
5833 	if ((addr_t)address + size < (addr_t)address || !IS_USER_ADDRESS(address)
5834 		|| !IS_USER_ADDRESS((addr_t)address + size)) {
5835 		// weird error code required by POSIX
5836 		return ENOMEM;
5837 	}
5838 
5839 	bool writeSync = (flags & MS_SYNC) != 0;
5840 	bool writeAsync = (flags & MS_ASYNC) != 0;
5841 	if (writeSync && writeAsync)
5842 		return B_BAD_VALUE;
5843 
5844 	if (size == 0 || (!writeSync && !writeAsync))
5845 		return B_OK;
5846 
5847 	// iterate through the range and sync all concerned areas
5848 	while (size > 0) {
5849 		// read lock the address space
5850 		AddressSpaceReadLocker locker;
5851 		status_t error = locker.SetTo(team_get_current_team_id());
5852 		if (error != B_OK)
5853 			return error;
5854 
5855 		// get the first area
5856 		VMArea* area = locker.AddressSpace()->LookupArea(address);
5857 		if (area == NULL)
5858 			return B_NO_MEMORY;
5859 
5860 		uint32 offset = address - area->Base();
5861 		size_t rangeSize = min_c(area->Size() - offset, size);
5862 		offset += area->cache_offset;
5863 
5864 		// lock the cache
5865 		AreaCacheLocker cacheLocker(area);
5866 		if (!cacheLocker)
5867 			return B_BAD_VALUE;
5868 		VMCache* cache = area->cache;
5869 
5870 		locker.Unlock();
5871 
5872 		uint32 firstPage = offset >> PAGE_SHIFT;
5873 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
5874 
5875 		// write the pages
5876 		if (cache->type == CACHE_TYPE_VNODE) {
5877 			if (writeSync) {
5878 				// synchronous
5879 				error = vm_page_write_modified_page_range(cache, firstPage,
5880 					endPage);
5881 				if (error != B_OK)
5882 					return error;
5883 			} else {
5884 				// asynchronous
5885 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
5886 				// TODO: This is probably not quite what is supposed to happen.
5887 				// Especially when a lot has to be written, it might take ages
5888 				// until it really hits the disk.
5889 			}
5890 		}
5891 
5892 		address += rangeSize;
5893 		size -= rangeSize;
5894 	}
5895 
5896 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
5897 	// synchronize multiple mappings of the same file. In our VM they never get
5898 	// out of sync, though, so we don't have to do anything.
5899 
5900 	return B_OK;
5901 }
5902 
5903 
5904 status_t
5905 _user_memory_advice(void* address, size_t size, int advice)
5906 {
5907 	// TODO: Implement!
5908 	return B_OK;
5909 }
5910