xref: /haiku/src/system/kernel/vm/vm.cpp (revision 4da2ed410326a2f55b733876a49fa095321b87ad)
1 /*
2  * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 #include <vm/vm.h>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18 
19 #include <algorithm>
20 
21 #include <OS.h>
22 #include <KernelExport.h>
23 
24 #include <AutoDeleterDrivers.h>
25 
26 #include <symbol_versioning.h>
27 
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57 
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62 
63 
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 #	define TRACE(x) dprintf x
68 #else
69 #	define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 #	define FTRACE(x) dprintf x
73 #else
74 #	define FTRACE(x) ;
75 #endif
76 
77 
78 namespace {
79 
80 class AreaCacheLocking {
81 public:
Lock(VMCache * lockable)82 	inline bool Lock(VMCache* lockable)
83 	{
84 		return false;
85 	}
86 
Unlock(VMCache * lockable)87 	inline void Unlock(VMCache* lockable)
88 	{
89 		vm_area_put_locked_cache(lockable);
90 	}
91 };
92 
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
AreaCacheLocker(VMCache * cache=NULL)95 	inline AreaCacheLocker(VMCache* cache = NULL)
96 		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 	{
98 	}
99 
AreaCacheLocker(VMArea * area)100 	inline AreaCacheLocker(VMArea* area)
101 		: AutoLocker<VMCache, AreaCacheLocking>()
102 	{
103 		SetTo(area);
104 	}
105 
SetTo(VMCache * cache,bool alreadyLocked)106 	inline void SetTo(VMCache* cache, bool alreadyLocked)
107 	{
108 		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 	}
110 
SetTo(VMArea * area)111 	inline void SetTo(VMArea* area)
112 	{
113 		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 	}
116 };
117 
118 
119 class VMCacheChainLocker {
120 public:
VMCacheChainLocker()121 	VMCacheChainLocker()
122 		:
123 		fTopCache(NULL),
124 		fBottomCache(NULL)
125 	{
126 	}
127 
VMCacheChainLocker(VMCache * topCache)128 	VMCacheChainLocker(VMCache* topCache)
129 		:
130 		fTopCache(topCache),
131 		fBottomCache(topCache)
132 	{
133 	}
134 
~VMCacheChainLocker()135 	~VMCacheChainLocker()
136 	{
137 		Unlock();
138 	}
139 
SetTo(VMCache * topCache)140 	void SetTo(VMCache* topCache)
141 	{
142 		fTopCache = topCache;
143 		fBottomCache = topCache;
144 
145 		if (topCache != NULL)
146 			topCache->SetUserData(NULL);
147 	}
148 
LockSourceCache()149 	VMCache* LockSourceCache()
150 	{
151 		if (fBottomCache == NULL || fBottomCache->source == NULL)
152 			return NULL;
153 
154 		VMCache* previousCache = fBottomCache;
155 
156 		fBottomCache = fBottomCache->source;
157 		fBottomCache->Lock();
158 		fBottomCache->AcquireRefLocked();
159 		fBottomCache->SetUserData(previousCache);
160 
161 		return fBottomCache;
162 	}
163 
LockAllSourceCaches()164 	void LockAllSourceCaches()
165 	{
166 		while (LockSourceCache() != NULL) {
167 		}
168 	}
169 
Unlock(VMCache * exceptCache=NULL)170 	void Unlock(VMCache* exceptCache = NULL)
171 	{
172 		if (fTopCache == NULL)
173 			return;
174 
175 		// Unlock caches in source -> consumer direction. This is important to
176 		// avoid double-locking and a reversal of locking order in case a cache
177 		// is eligable for merging.
178 		VMCache* cache = fBottomCache;
179 		while (cache != NULL) {
180 			VMCache* nextCache = (VMCache*)cache->UserData();
181 			if (cache != exceptCache)
182 				cache->ReleaseRefAndUnlock(cache != fTopCache);
183 
184 			if (cache == fTopCache)
185 				break;
186 
187 			cache = nextCache;
188 		}
189 
190 		fTopCache = NULL;
191 		fBottomCache = NULL;
192 	}
193 
UnlockKeepRefs(bool keepTopCacheLocked)194 	void UnlockKeepRefs(bool keepTopCacheLocked)
195 	{
196 		if (fTopCache == NULL)
197 			return;
198 
199 		VMCache* nextCache = fBottomCache;
200 		VMCache* cache = NULL;
201 
202 		while (keepTopCacheLocked
203 				? nextCache != fTopCache : cache != fTopCache) {
204 			cache = nextCache;
205 			nextCache = (VMCache*)cache->UserData();
206 			cache->Unlock(cache != fTopCache);
207 		}
208 	}
209 
RelockCaches(bool topCacheLocked)210 	void RelockCaches(bool topCacheLocked)
211 	{
212 		if (fTopCache == NULL)
213 			return;
214 
215 		VMCache* nextCache = fTopCache;
216 		VMCache* cache = NULL;
217 		if (topCacheLocked) {
218 			cache = nextCache;
219 			nextCache = cache->source;
220 		}
221 
222 		while (cache != fBottomCache && nextCache != NULL) {
223 			VMCache* consumer = cache;
224 			cache = nextCache;
225 			nextCache = cache->source;
226 			cache->Lock();
227 			cache->SetUserData(consumer);
228 		}
229 	}
230 
231 private:
232 	VMCache*	fTopCache;
233 	VMCache*	fBottomCache;
234 };
235 
236 } // namespace
237 
238 
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 	VM_MEMORY_RESERVE_USER,		// user
242 	VM_MEMORY_RESERVE_SYSTEM,	// system
243 	0							// VIP
244 };
245 
246 
247 static ObjectCache** sPageMappingsObjectCaches;
248 static uint32 sPageMappingsMask;
249 
250 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
251 
252 static off_t sAvailableMemory;
253 static off_t sNeededMemory;
254 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
255 static uint32 sPageFaults;
256 
257 static VMPhysicalPageMapper* sPhysicalPageMapper;
258 
259 
260 // function declarations
261 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
262 	bool deletingAddressSpace, bool alreadyRemoved = false);
263 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
264 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
265 static status_t map_backing_store(VMAddressSpace* addressSpace,
266 	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
267 	int protection, int protectionMax, int mapping, uint32 flags,
268 	const virtual_address_restrictions* addressRestrictions, bool kernel,
269 	VMArea** _area, void** _virtualAddress);
270 static void fix_protection(uint32* protection);
271 
272 
273 //	#pragma mark -
274 
275 
276 #if VM_PAGE_FAULT_TRACING
277 
278 namespace VMPageFaultTracing {
279 
280 class PageFaultStart : public AbstractTraceEntry {
281 public:
PageFaultStart(addr_t address,bool write,bool user,addr_t pc)282 	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
283 		:
284 		fAddress(address),
285 		fPC(pc),
286 		fWrite(write),
287 		fUser(user)
288 	{
289 		Initialized();
290 	}
291 
AddDump(TraceOutput & out)292 	virtual void AddDump(TraceOutput& out)
293 	{
294 		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
295 			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
296 	}
297 
298 private:
299 	addr_t	fAddress;
300 	addr_t	fPC;
301 	bool	fWrite;
302 	bool	fUser;
303 };
304 
305 
306 // page fault errors
307 enum {
308 	PAGE_FAULT_ERROR_NO_AREA		= 0,
309 	PAGE_FAULT_ERROR_KERNEL_ONLY,
310 	PAGE_FAULT_ERROR_WRITE_PROTECTED,
311 	PAGE_FAULT_ERROR_READ_PROTECTED,
312 	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
313 	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
314 	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
315 };
316 
317 
318 class PageFaultError : public AbstractTraceEntry {
319 public:
PageFaultError(area_id area,status_t error)320 	PageFaultError(area_id area, status_t error)
321 		:
322 		fArea(area),
323 		fError(error)
324 	{
325 		Initialized();
326 	}
327 
AddDump(TraceOutput & out)328 	virtual void AddDump(TraceOutput& out)
329 	{
330 		switch (fError) {
331 			case PAGE_FAULT_ERROR_NO_AREA:
332 				out.Print("page fault error: no area");
333 				break;
334 			case PAGE_FAULT_ERROR_KERNEL_ONLY:
335 				out.Print("page fault error: area: %ld, kernel only", fArea);
336 				break;
337 			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
338 				out.Print("page fault error: area: %ld, write protected",
339 					fArea);
340 				break;
341 			case PAGE_FAULT_ERROR_READ_PROTECTED:
342 				out.Print("page fault error: area: %ld, read protected", fArea);
343 				break;
344 			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
345 				out.Print("page fault error: area: %ld, execute protected",
346 					fArea);
347 				break;
348 			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
349 				out.Print("page fault error: kernel touching bad user memory");
350 				break;
351 			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
352 				out.Print("page fault error: no address space");
353 				break;
354 			default:
355 				out.Print("page fault error: area: %ld, error: %s", fArea,
356 					strerror(fError));
357 				break;
358 		}
359 	}
360 
361 private:
362 	area_id		fArea;
363 	status_t	fError;
364 };
365 
366 
367 class PageFaultDone : public AbstractTraceEntry {
368 public:
PageFaultDone(area_id area,VMCache * topCache,VMCache * cache,vm_page * page)369 	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
370 			vm_page* page)
371 		:
372 		fArea(area),
373 		fTopCache(topCache),
374 		fCache(cache),
375 		fPage(page)
376 	{
377 		Initialized();
378 	}
379 
AddDump(TraceOutput & out)380 	virtual void AddDump(TraceOutput& out)
381 	{
382 		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
383 			"page: %p", fArea, fTopCache, fCache, fPage);
384 	}
385 
386 private:
387 	area_id		fArea;
388 	VMCache*	fTopCache;
389 	VMCache*	fCache;
390 	vm_page*	fPage;
391 };
392 
393 }	// namespace VMPageFaultTracing
394 
395 #	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
396 #else
397 #	define TPF(x) ;
398 #endif	// VM_PAGE_FAULT_TRACING
399 
400 
401 //	#pragma mark - page mappings allocation
402 
403 
404 static void
create_page_mappings_object_caches()405 create_page_mappings_object_caches()
406 {
407 	// We want an even power of 2 smaller than the number of CPUs.
408 	const int32 numCPUs = smp_get_num_cpus();
409 	int32 count = next_power_of_2(numCPUs);
410 	if (count > numCPUs)
411 		count >>= 1;
412 	sPageMappingsMask = count - 1;
413 
414 	sPageMappingsObjectCaches = new object_cache*[count];
415 	if (sPageMappingsObjectCaches == NULL)
416 		panic("failed to allocate page mappings object_cache array");
417 
418 	for (int32 i = 0; i < count; i++) {
419 		char name[32];
420 		snprintf(name, sizeof(name), "page mappings %" B_PRId32, i);
421 
422 		object_cache* cache = create_object_cache_etc(name,
423 			sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
424 			NULL, NULL);
425 		if (cache == NULL)
426 			panic("failed to create page mappings object_cache");
427 
428 		object_cache_set_minimum_reserve(cache, 1024);
429 		sPageMappingsObjectCaches[i] = cache;
430 	}
431 }
432 
433 
434 static object_cache*
page_mapping_object_cache_for(page_num_t page)435 page_mapping_object_cache_for(page_num_t page)
436 {
437 	return sPageMappingsObjectCaches[page & sPageMappingsMask];
438 }
439 
440 
441 static vm_page_mapping*
allocate_page_mapping(page_num_t page,uint32 flags=0)442 allocate_page_mapping(page_num_t page, uint32 flags = 0)
443 {
444 	return (vm_page_mapping*)object_cache_alloc(page_mapping_object_cache_for(page),
445 		flags);
446 }
447 
448 
449 void
vm_free_page_mapping(page_num_t page,vm_page_mapping * mapping,uint32 flags)450 vm_free_page_mapping(page_num_t page, vm_page_mapping* mapping, uint32 flags)
451 {
452 	object_cache_free(page_mapping_object_cache_for(page), mapping, flags);
453 }
454 
455 
456 //	#pragma mark -
457 
458 
459 /*!	The page's cache must be locked.
460 */
461 static inline void
increment_page_wired_count(vm_page * page)462 increment_page_wired_count(vm_page* page)
463 {
464 	if (!page->IsMapped())
465 		atomic_add(&gMappedPagesCount, 1);
466 	page->IncrementWiredCount();
467 }
468 
469 
470 /*!	The page's cache must be locked.
471 */
472 static inline void
decrement_page_wired_count(vm_page * page)473 decrement_page_wired_count(vm_page* page)
474 {
475 	page->DecrementWiredCount();
476 	if (!page->IsMapped())
477 		atomic_add(&gMappedPagesCount, -1);
478 }
479 
480 
481 static inline addr_t
virtual_page_address(VMArea * area,vm_page * page)482 virtual_page_address(VMArea* area, vm_page* page)
483 {
484 	return area->Base()
485 		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
486 }
487 
488 
489 static inline bool
is_page_in_area(VMArea * area,vm_page * page)490 is_page_in_area(VMArea* area, vm_page* page)
491 {
492 	off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
493 	return pageCacheOffsetBytes >= area->cache_offset
494 		&& pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
495 }
496 
497 
498 //! You need to have the address space locked when calling this function
499 static VMArea*
lookup_area(VMAddressSpace * addressSpace,area_id id)500 lookup_area(VMAddressSpace* addressSpace, area_id id)
501 {
502 	VMAreas::ReadLock();
503 
504 	VMArea* area = VMAreas::LookupLocked(id);
505 	if (area != NULL && area->address_space != addressSpace)
506 		area = NULL;
507 
508 	VMAreas::ReadUnlock();
509 
510 	return area;
511 }
512 
513 
514 static inline size_t
area_page_protections_size(size_t areaSize)515 area_page_protections_size(size_t areaSize)
516 {
517 	// In the page protections we store only the three user protections,
518 	// so we use 4 bits per page.
519 	return (areaSize / B_PAGE_SIZE + 1) / 2;
520 }
521 
522 
523 static status_t
allocate_area_page_protections(VMArea * area)524 allocate_area_page_protections(VMArea* area)
525 {
526 	size_t bytes = area_page_protections_size(area->Size());
527 	area->page_protections = (uint8*)malloc_etc(bytes,
528 		area->address_space == VMAddressSpace::Kernel()
529 			? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
530 	if (area->page_protections == NULL)
531 		return B_NO_MEMORY;
532 
533 	// init the page protections for all pages to that of the area
534 	uint32 areaProtection = area->protection
535 		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
536 	memset(area->page_protections, areaProtection | (areaProtection << 4), bytes);
537 
538 	// clear protections from the area
539 	area->protection &= ~(B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA
540 		| B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA | B_KERNEL_EXECUTE_AREA);
541 	return B_OK;
542 }
543 
544 
545 static inline uint8*
realloc_area_page_protections(uint8 * pageProtections,size_t areaSize,uint32 allocationFlags)546 realloc_area_page_protections(uint8* pageProtections, size_t areaSize,
547 	uint32 allocationFlags)
548 {
549 	size_t bytes = area_page_protections_size(areaSize);
550 	return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
551 }
552 
553 
554 static inline void
set_area_page_protection(VMArea * area,addr_t pageAddress,uint32 protection)555 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
556 {
557 	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
558 	addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
559 	uint8& entry = area->page_protections[pageIndex / 2];
560 	if (pageIndex % 2 == 0)
561 		entry = (entry & 0xf0) | protection;
562 	else
563 		entry = (entry & 0x0f) | (protection << 4);
564 }
565 
566 
567 static inline uint32
get_area_page_protection(VMArea * area,addr_t pageAddress)568 get_area_page_protection(VMArea* area, addr_t pageAddress)
569 {
570 	if (area->page_protections == NULL)
571 		return area->protection;
572 
573 	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
574 	uint32 protection = area->page_protections[pageIndex / 2];
575 	if (pageIndex % 2 == 0)
576 		protection &= 0x0f;
577 	else
578 		protection >>= 4;
579 
580 	uint32 kernelProtection = 0;
581 	if ((protection & B_READ_AREA) != 0)
582 		kernelProtection |= B_KERNEL_READ_AREA;
583 	if ((protection & B_WRITE_AREA) != 0)
584 		kernelProtection |= B_KERNEL_WRITE_AREA;
585 
586 	// If this is a kernel area we return only the kernel flags.
587 	if (area->address_space == VMAddressSpace::Kernel())
588 		return kernelProtection;
589 
590 	return protection | kernelProtection;
591 }
592 
593 
594 /*! Computes the committed size an area's cache ought to have,
595 	based on the area's page_protections and any pages already present.
596 */
597 static inline uint32
compute_area_page_commitment(VMArea * area)598 compute_area_page_commitment(VMArea* area)
599 {
600 	const size_t bytes = area_page_protections_size(area->Size());
601 	const bool oddPageCount = ((area->Size() / B_PAGE_SIZE) % 2) != 0;
602 	size_t pages = 0;
603 	for (size_t i = 0; i < bytes; i++) {
604 		const uint8 protection = area->page_protections[i];
605 		const off_t pageOffset = bytes * 2 * B_PAGE_SIZE;
606 		if (area->cache->LookupPage(pageOffset) != NULL)
607 			pages++;
608 		else
609 			pages += ((protection & (B_WRITE_AREA << 0)) != 0) ? 1 : 0;
610 
611 		if (i == (bytes - 1) && oddPageCount)
612 			break;
613 
614 		if (area->cache->LookupPage(pageOffset + B_PAGE_SIZE) != NULL)
615 			pages++;
616 		else
617 			pages += ((protection & (B_WRITE_AREA << 4)) != 0) ? 1 : 0;
618 	}
619 	return pages;
620 }
621 
622 
623 /*!	The caller must have reserved enough pages the translation map
624 	implementation might need to map this page.
625 	The page's cache must be locked.
626 */
627 static status_t
map_page(VMArea * area,vm_page * page,addr_t address,uint32 protection,vm_page_reservation * reservation)628 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
629 	vm_page_reservation* reservation)
630 {
631 	VMTranslationMap* map = area->address_space->TranslationMap();
632 
633 	bool wasMapped = page->IsMapped();
634 
635 	if (area->wiring == B_NO_LOCK) {
636 		DEBUG_PAGE_ACCESS_CHECK(page);
637 
638 		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
639 		vm_page_mapping* mapping = allocate_page_mapping(page->physical_page_number,
640 			CACHE_DONT_WAIT_FOR_MEMORY
641 				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
642 		if (mapping == NULL)
643 			return B_NO_MEMORY;
644 
645 		mapping->page = page;
646 		mapping->area = area;
647 
648 		map->Lock();
649 
650 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
651 			area->MemoryType(), reservation);
652 
653 		// insert mapping into lists
654 		if (!page->IsMapped())
655 			atomic_add(&gMappedPagesCount, 1);
656 
657 		page->mappings.Add(mapping);
658 		area->mappings.Add(mapping);
659 
660 		map->Unlock();
661 	} else {
662 		DEBUG_PAGE_ACCESS_CHECK(page);
663 
664 		map->Lock();
665 		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
666 			area->MemoryType(), reservation);
667 		map->Unlock();
668 
669 		increment_page_wired_count(page);
670 	}
671 
672 	if (!wasMapped) {
673 		// The page is mapped now, so we must not remain in the cached queue.
674 		// It also makes sense to move it from the inactive to the active, since
675 		// otherwise the page daemon wouldn't come to keep track of it (in idle
676 		// mode) -- if the page isn't touched, it will be deactivated after a
677 		// full iteration through the queue at the latest.
678 		if (page->State() == PAGE_STATE_CACHED
679 				|| page->State() == PAGE_STATE_INACTIVE) {
680 			vm_page_set_state(page, PAGE_STATE_ACTIVE);
681 		}
682 	}
683 
684 	return B_OK;
685 }
686 
687 
688 /*!	If \a preserveModified is \c true, the caller must hold the lock of the
689 	page's cache.
690 */
691 static inline bool
unmap_page(VMArea * area,addr_t virtualAddress)692 unmap_page(VMArea* area, addr_t virtualAddress)
693 {
694 	return area->address_space->TranslationMap()->UnmapPage(area,
695 		virtualAddress, true);
696 }
697 
698 
699 /*!	If \a preserveModified is \c true, the caller must hold the lock of all
700 	mapped pages' caches.
701 */
702 static inline void
unmap_pages(VMArea * area,addr_t base,size_t size)703 unmap_pages(VMArea* area, addr_t base, size_t size)
704 {
705 	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
706 }
707 
708 
709 static inline bool
intersect_area(VMArea * area,addr_t & address,addr_t & size,addr_t & offset)710 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
711 {
712 	if (address < area->Base()) {
713 		offset = area->Base() - address;
714 		if (offset >= size)
715 			return false;
716 
717 		address = area->Base();
718 		size -= offset;
719 		offset = 0;
720 		if (size > area->Size())
721 			size = area->Size();
722 
723 		return true;
724 	}
725 
726 	offset = address - area->Base();
727 	if (offset >= area->Size())
728 		return false;
729 
730 	if (size >= area->Size() - offset)
731 		size = area->Size() - offset;
732 
733 	return true;
734 }
735 
736 
737 /*!	Cuts a piece out of an area. If the given cut range covers the complete
738 	area, it is deleted. If it covers the beginning or the end, the area is
739 	resized accordingly. If the range covers some part in the middle of the
740 	area, it is split in two; in this case the second area is returned via
741 	\a _secondArea (the variable is left untouched in the other cases).
742 	The address space must be write locked.
743 	The caller must ensure that no part of the given range is wired.
744 */
745 static status_t
cut_area(VMAddressSpace * addressSpace,VMArea * area,addr_t address,addr_t size,VMArea ** _secondArea,bool kernel)746 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
747 	addr_t size, VMArea** _secondArea, bool kernel)
748 {
749 	addr_t offset;
750 	if (!intersect_area(area, address, size, offset))
751 		return B_OK;
752 
753 	// Is the area fully covered?
754 	if (address == area->Base() && size == area->Size()) {
755 		delete_area(addressSpace, area, false);
756 		return B_OK;
757 	}
758 
759 	int priority;
760 	uint32 allocationFlags;
761 	if (addressSpace == VMAddressSpace::Kernel()) {
762 		priority = VM_PRIORITY_SYSTEM;
763 		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
764 			| HEAP_DONT_LOCK_KERNEL_SPACE;
765 	} else {
766 		priority = VM_PRIORITY_USER;
767 		allocationFlags = 0;
768 	}
769 
770 	VMCache* cache = vm_area_get_locked_cache(area);
771 	VMCacheChainLocker cacheChainLocker(cache);
772 	cacheChainLocker.LockAllSourceCaches();
773 
774 	// If no one else uses the area's cache and it's an anonymous cache, we can
775 	// resize or split it, too.
776 	bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
777 		&& cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
778 
779 	const addr_t oldSize = area->Size();
780 
781 	// Cut the end only?
782 	if (offset > 0 && size == area->Size() - offset) {
783 		status_t error = addressSpace->ShrinkAreaTail(area, offset,
784 			allocationFlags);
785 		if (error != B_OK)
786 			return error;
787 
788 		if (area->page_protections != NULL) {
789 			uint8* newProtections = realloc_area_page_protections(
790 				area->page_protections, area->Size(), allocationFlags);
791 
792 			if (newProtections == NULL) {
793 				addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
794 				return B_NO_MEMORY;
795 			}
796 
797 			area->page_protections = newProtections;
798 		}
799 
800 		// unmap pages
801 		unmap_pages(area, address, size);
802 
803 		if (onlyCacheUser) {
804 			// Since VMCache::Resize() can temporarily drop the lock, we must
805 			// unlock all lower caches to prevent locking order inversion.
806 			cacheChainLocker.Unlock(cache);
807 			cache->Resize(cache->virtual_base + offset, priority);
808 		}
809 
810 		if (area->page_protections != NULL) {
811 			// Resize() adjusts the commitment, so we must do this after that.
812 			const size_t newCommitmentPages = compute_area_page_commitment(area);
813 			cache->Commit(newCommitmentPages * B_PAGE_SIZE, VM_PRIORITY_USER);
814 		}
815 
816 		if (onlyCacheUser)
817 			cache->ReleaseRefAndUnlock();
818 		return B_OK;
819 	}
820 
821 	// Cut the beginning only?
822 	if (area->Base() == address) {
823 		uint8* newProtections = NULL;
824 		if (area->page_protections != NULL) {
825 			// Allocate all memory before shifting, as the shift might lose some bits.
826 			newProtections = realloc_area_page_protections(NULL, area->Size(),
827 				allocationFlags);
828 
829 			if (newProtections == NULL)
830 				return B_NO_MEMORY;
831 		}
832 
833 		// resize the area
834 		status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
835 			allocationFlags);
836 		if (error != B_OK) {
837 			free_etc(newProtections, allocationFlags);
838 			return error;
839 		}
840 
841 		if (area->page_protections != NULL) {
842 			size_t oldBytes = area_page_protections_size(oldSize);
843 			ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
844 			bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
845 
846 			size_t bytes = area_page_protections_size(area->Size());
847 			memcpy(newProtections, area->page_protections, bytes);
848 			free_etc(area->page_protections, allocationFlags);
849 			area->page_protections = newProtections;
850 		}
851 
852 		// unmap pages
853 		unmap_pages(area, address, size);
854 
855 		if (onlyCacheUser) {
856 			// Since VMCache::Rebase() can temporarily drop the lock, we must
857 			// unlock all lower caches to prevent locking order inversion.
858 			cacheChainLocker.Unlock(cache);
859 			cache->Rebase(cache->virtual_base + size, priority);
860 		}
861 
862 		if (area->page_protections != NULL) {
863 			// Rebase() adjusts the commitment, so we must do this after that.
864 			const size_t newCommitmentPages = compute_area_page_commitment(area);
865 			cache->Commit(newCommitmentPages * B_PAGE_SIZE, VM_PRIORITY_USER);
866 		}
867 
868 		if (onlyCacheUser)
869 			cache->ReleaseRefAndUnlock();
870 
871 		area->cache_offset += size;
872 		return B_OK;
873 	}
874 
875 	// The tough part -- cut a piece out of the middle of the area.
876 	// We do that by shrinking the area to the begin section and creating a
877 	// new area for the end section.
878 	addr_t firstNewSize = offset;
879 	addr_t secondBase = address + size;
880 	addr_t secondSize = area->Size() - offset - size;
881 
882 	// unmap pages
883 	unmap_pages(area, address, area->Size() - firstNewSize);
884 
885 	// resize the area
886 	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
887 		allocationFlags);
888 	if (error != B_OK)
889 		return error;
890 
891 	uint8* areaNewProtections = NULL;
892 	uint8* secondAreaNewProtections = NULL;
893 
894 	// Try to allocate the new memory before making some hard to reverse
895 	// changes.
896 	if (area->page_protections != NULL) {
897 		areaNewProtections = realloc_area_page_protections(NULL, area->Size(),
898 			allocationFlags);
899 		secondAreaNewProtections = realloc_area_page_protections(NULL, secondSize,
900 			allocationFlags);
901 
902 		if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
903 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
904 			free_etc(areaNewProtections, allocationFlags);
905 			free_etc(secondAreaNewProtections, allocationFlags);
906 			return B_NO_MEMORY;
907 		}
908 	}
909 
910 	virtual_address_restrictions addressRestrictions = {};
911 	addressRestrictions.address = (void*)secondBase;
912 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
913 	VMArea* secondArea;
914 	AutoLocker<VMCache> areaCacheLocker, secondCacheLocker;
915 
916 	if (onlyCacheUser) {
917 		// Create a new cache for the second area.
918 		VMCache* secondCache;
919 		error = VMCacheFactory::CreateAnonymousCache(secondCache,
920 			area->protection & B_OVERCOMMITTING_AREA, 0, 0,
921 			dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
922 		if (error != B_OK) {
923 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
924 			free_etc(areaNewProtections, allocationFlags);
925 			free_etc(secondAreaNewProtections, allocationFlags);
926 			return error;
927 		}
928 
929 		secondCache->Lock();
930 		secondCacheLocker.SetTo(secondCache, true);
931 		secondCache->temporary = cache->temporary;
932 		secondCache->virtual_base = area->cache_offset;
933 		secondCache->virtual_end = area->cache_offset + secondSize;
934 
935 		// Transfer the concerned pages from the first cache.
936 		off_t adoptOffset = area->cache_offset + secondBase - area->Base();
937 		error = secondCache->Adopt(cache, adoptOffset, secondSize,
938 			area->cache_offset);
939 
940 		if (error == B_OK) {
941 			// Since VMCache::Resize() can temporarily drop the lock, we must
942 			// unlock all lower caches to prevent locking order inversion.
943 			cacheChainLocker.Unlock(cache);
944 			areaCacheLocker.SetTo(cache, true);
945 			cache->Resize(cache->virtual_base + firstNewSize, priority);
946 				// Don't unlock the cache yet because we might have to resize it back.
947 				// (Or we might have to modify its commitment, if we have page_protections.)
948 
949 			// Map the second area.
950 			error = map_backing_store(addressSpace, secondCache,
951 				area->cache_offset, area->name, secondSize, area->wiring,
952 				area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
953 				&addressRestrictions, kernel, &secondArea, NULL);
954 		}
955 
956 		if (error != B_OK) {
957 			// Restore the original cache.
958 			cache->Resize(cache->virtual_base + oldSize, priority);
959 
960 			// Move the pages back.
961 			status_t readoptStatus = cache->Adopt(secondCache,
962 				area->cache_offset, secondSize, adoptOffset);
963 			if (readoptStatus != B_OK) {
964 				// Some (swap) pages have not been moved back and will be lost
965 				// once the second cache is deleted.
966 				panic("failed to restore cache range: %s",
967 					strerror(readoptStatus));
968 
969 				// TODO: Handle out of memory cases by freeing memory and
970 				// retrying.
971 			}
972 
973 			cache->ReleaseRefLocked();
974 			secondCache->ReleaseRefLocked();
975 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
976 			free_etc(areaNewProtections, allocationFlags);
977 			free_etc(secondAreaNewProtections, allocationFlags);
978 			return error;
979 		}
980 
981 		cache->ReleaseRefLocked();
982 	} else {
983 		// Reuse the existing cache.
984 		error = map_backing_store(addressSpace, cache, area->cache_offset
985 				+ (secondBase - area->Base()),
986 			area->name, secondSize, area->wiring, area->protection,
987 			area->protection_max, REGION_NO_PRIVATE_MAP, 0,
988 			&addressRestrictions, kernel, &secondArea, NULL);
989 		if (error != B_OK) {
990 			addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
991 			free_etc(areaNewProtections, allocationFlags);
992 			free_etc(secondAreaNewProtections, allocationFlags);
993 			return error;
994 		}
995 
996 		// We need a cache reference for the new area.
997 		cache->AcquireRefLocked();
998 	}
999 
1000 	if (area->page_protections != NULL) {
1001 		// Copy the protection bits of the first area.
1002 		const size_t areaBytes = area_page_protections_size(area->Size());
1003 		memcpy(areaNewProtections, area->page_protections, areaBytes);
1004 		uint8* areaOldProtections = area->page_protections;
1005 		area->page_protections = areaNewProtections;
1006 
1007 		// Shift the protection bits of the second area to the start of
1008 		// the old array.
1009 		const size_t oldBytes = area_page_protections_size(oldSize);
1010 		addr_t secondAreaOffset = secondBase - area->Base();
1011 		ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
1012 		bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
1013 
1014 		// Copy the protection bits of the second area.
1015 		const size_t secondAreaBytes = area_page_protections_size(secondSize);
1016 		memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
1017 		secondArea->page_protections = secondAreaNewProtections;
1018 
1019 		// We don't need this anymore.
1020 		free_etc(areaOldProtections, allocationFlags);
1021 
1022 		// Shrink commitments.
1023 		const size_t areaCommitPages = compute_area_page_commitment(area);
1024 		area->cache->Commit(areaCommitPages * B_PAGE_SIZE, VM_PRIORITY_USER);
1025 
1026 		const size_t secondCommitPages = compute_area_page_commitment(secondArea);
1027 		secondArea->cache->Commit(secondCommitPages * B_PAGE_SIZE, VM_PRIORITY_USER);
1028 
1029 		// Set the correct page protections for the second area.
1030 		VMTranslationMap* map = addressSpace->TranslationMap();
1031 		map->Lock();
1032 		for (VMCachePagesTree::Iterator it
1033 				= secondArea->cache->pages.GetIterator();
1034 				vm_page* page = it.Next();) {
1035 			if (is_page_in_area(secondArea, page)) {
1036 				addr_t address = virtual_page_address(secondArea, page);
1037 				uint32 pageProtection
1038 					= get_area_page_protection(secondArea, address);
1039 				map->ProtectPage(secondArea, address, pageProtection);
1040 			}
1041 		}
1042 		map->Unlock();
1043 	}
1044 
1045 	if (_secondArea != NULL)
1046 		*_secondArea = secondArea;
1047 
1048 	return B_OK;
1049 }
1050 
1051 
1052 /*!	Deletes or cuts all areas in the given address range.
1053 	The address space must be write-locked.
1054 	The caller must ensure that no part of the given range is wired.
1055 */
1056 static status_t
unmap_address_range(VMAddressSpace * addressSpace,addr_t address,addr_t size,bool kernel)1057 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1058 	bool kernel)
1059 {
1060 	size = PAGE_ALIGN(size);
1061 
1062 	// Check, whether the caller is allowed to modify the concerned areas.
1063 	if (!kernel) {
1064 		for (VMAddressSpace::AreaRangeIterator it
1065 				= addressSpace->GetAreaRangeIterator(address, size);
1066 			VMArea* area = it.Next();) {
1067 
1068 			if ((area->protection & B_KERNEL_AREA) != 0) {
1069 				dprintf("unmap_address_range: team %" B_PRId32 " tried to "
1070 					"unmap range of kernel area %" B_PRId32 " (%s)\n",
1071 					team_get_current_team_id(), area->id, area->name);
1072 				return B_NOT_ALLOWED;
1073 			}
1074 		}
1075 	}
1076 
1077 	for (VMAddressSpace::AreaRangeIterator it
1078 			= addressSpace->GetAreaRangeIterator(address, size);
1079 		VMArea* area = it.Next();) {
1080 
1081 		status_t error = cut_area(addressSpace, area, address, size, NULL,
1082 			kernel);
1083 		if (error != B_OK)
1084 			return error;
1085 			// Failing after already messing with areas is ugly, but we
1086 			// can't do anything about it.
1087 	}
1088 
1089 	return B_OK;
1090 }
1091 
1092 
1093 static status_t
discard_area_range(VMArea * area,addr_t address,addr_t size)1094 discard_area_range(VMArea* area, addr_t address, addr_t size)
1095 {
1096 	addr_t offset;
1097 	if (!intersect_area(area, address, size, offset))
1098 		return B_OK;
1099 
1100 	// If someone else uses the area's cache or it's not an anonymous cache, we
1101 	// can't discard.
1102 	VMCache* cache = vm_area_get_locked_cache(area);
1103 	if (cache->areas != area || area->cache_next != NULL
1104 		|| !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1105 		return B_OK;
1106 	}
1107 
1108 	VMCacheChainLocker cacheChainLocker(cache);
1109 	cacheChainLocker.LockAllSourceCaches();
1110 
1111 	unmap_pages(area, address, size);
1112 
1113 	// Since VMCache::Discard() can temporarily drop the lock, we must
1114 	// unlock all lower caches to prevent locking order inversion.
1115 	cacheChainLocker.Unlock(cache);
1116 	cache->Discard(cache->virtual_base + offset, size);
1117 	cache->ReleaseRefAndUnlock();
1118 
1119 	return B_OK;
1120 }
1121 
1122 
1123 static status_t
discard_address_range(VMAddressSpace * addressSpace,addr_t address,addr_t size,bool kernel)1124 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1125 	bool kernel)
1126 {
1127 	for (VMAddressSpace::AreaRangeIterator it
1128 		= addressSpace->GetAreaRangeIterator(address, size);
1129 			VMArea* area = it.Next();) {
1130 		status_t error = discard_area_range(area, address, size);
1131 		if (error != B_OK)
1132 			return error;
1133 	}
1134 
1135 	return B_OK;
1136 }
1137 
1138 
1139 /*! You need to hold the lock of the cache and the write lock of the address
1140 	space when calling this function.
1141 	Note, that in case of error your cache will be temporarily unlocked.
1142 	If \a addressSpec is \c B_EXACT_ADDRESS and the
1143 	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1144 	that no part of the specified address range (base \c *_virtualAddress, size
1145 	\a size) is wired. The cache will also be temporarily unlocked.
1146 */
1147 static status_t
map_backing_store(VMAddressSpace * addressSpace,VMCache * cache,off_t offset,const char * areaName,addr_t size,int wiring,int protection,int protectionMax,int mapping,uint32 flags,const virtual_address_restrictions * addressRestrictions,bool kernel,VMArea ** _area,void ** _virtualAddress)1148 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1149 	const char* areaName, addr_t size, int wiring, int protection,
1150 	int protectionMax, int mapping,
1151 	uint32 flags, const virtual_address_restrictions* addressRestrictions,
1152 	bool kernel, VMArea** _area, void** _virtualAddress)
1153 {
1154 	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1155 		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1156 		", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1157 		addressSpace, cache, addressRestrictions->address, offset, size,
1158 		addressRestrictions->address_specification, wiring, protection,
1159 		protectionMax, _area, areaName));
1160 	cache->AssertLocked();
1161 
1162 	if (size == 0) {
1163 #if KDEBUG
1164 		panic("map_backing_store(): called with size=0 for area '%s'!",
1165 			areaName);
1166 #endif
1167 		return B_BAD_VALUE;
1168 	}
1169 	if (offset < 0)
1170 		return B_BAD_VALUE;
1171 
1172 	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1173 		| HEAP_DONT_LOCK_KERNEL_SPACE;
1174 	int priority;
1175 	if (addressSpace != VMAddressSpace::Kernel()) {
1176 		priority = VM_PRIORITY_USER;
1177 	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1178 		priority = VM_PRIORITY_VIP;
1179 		allocationFlags |= HEAP_PRIORITY_VIP;
1180 	} else
1181 		priority = VM_PRIORITY_SYSTEM;
1182 
1183 	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1184 		allocationFlags);
1185 	if (mapping != REGION_PRIVATE_MAP)
1186 		area->protection_max = protectionMax & B_USER_PROTECTION;
1187 	if (area == NULL)
1188 		return B_NO_MEMORY;
1189 
1190 	status_t status;
1191 
1192 	// if this is a private map, we need to create a new cache
1193 	// to handle the private copies of pages as they are written to
1194 	VMCache* sourceCache = cache;
1195 	if (mapping == REGION_PRIVATE_MAP) {
1196 		VMCache* newCache;
1197 
1198 		// create an anonymous cache
1199 		status = VMCacheFactory::CreateAnonymousCache(newCache,
1200 			(protection & B_STACK_AREA) != 0
1201 				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1202 			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1203 		if (status != B_OK)
1204 			goto err1;
1205 
1206 		newCache->Lock();
1207 		newCache->temporary = 1;
1208 		newCache->virtual_base = offset;
1209 		newCache->virtual_end = offset + size;
1210 
1211 		cache->AddConsumer(newCache);
1212 
1213 		cache = newCache;
1214 	}
1215 
1216 	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1217 		status = cache->SetMinimalCommitment(size, priority);
1218 		if (status != B_OK)
1219 			goto err2;
1220 	}
1221 
1222 	// check to see if this address space has entered DELETE state
1223 	if (addressSpace->IsBeingDeleted()) {
1224 		// okay, someone is trying to delete this address space now, so we can't
1225 		// insert the area, so back out
1226 		status = B_BAD_TEAM_ID;
1227 		goto err2;
1228 	}
1229 
1230 	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1231 			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1232 		// temporarily unlock the current cache since it might be mapped to
1233 		// some existing area, and unmap_address_range also needs to lock that
1234 		// cache to delete the area.
1235 		cache->Unlock();
1236 		status = unmap_address_range(addressSpace,
1237 			(addr_t)addressRestrictions->address, size, kernel);
1238 		cache->Lock();
1239 		if (status != B_OK)
1240 			goto err2;
1241 	}
1242 
1243 	status = addressSpace->InsertArea(area, size, addressRestrictions,
1244 		allocationFlags, _virtualAddress);
1245 	if (status == B_NO_MEMORY
1246 			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1247 		// Due to how many locks are held, we cannot wait here for space to be
1248 		// freed up, but we can at least notify the low_resource handler.
1249 		low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1250 	}
1251 	if (status != B_OK)
1252 		goto err2;
1253 
1254 	// attach the cache to the area
1255 	area->cache = cache;
1256 	area->cache_offset = offset;
1257 
1258 	// point the cache back to the area
1259 	cache->InsertAreaLocked(area);
1260 	if (mapping == REGION_PRIVATE_MAP)
1261 		cache->Unlock();
1262 
1263 	// insert the area in the global areas map
1264 	status = VMAreas::Insert(area);
1265 	if (status != B_OK)
1266 		goto err3;
1267 
1268 	// grab a ref to the address space (the area holds this)
1269 	addressSpace->Get();
1270 
1271 //	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1272 //		cache, sourceCache, areaName, area);
1273 
1274 	*_area = area;
1275 	return B_OK;
1276 
1277 err3:
1278 	cache->Lock();
1279 	cache->RemoveArea(area);
1280 	area->cache = NULL;
1281 err2:
1282 	if (mapping == REGION_PRIVATE_MAP) {
1283 		// We created this cache, so we must delete it again. Note, that we
1284 		// need to temporarily unlock the source cache or we'll otherwise
1285 		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1286 		sourceCache->Unlock();
1287 		cache->ReleaseRefAndUnlock();
1288 		sourceCache->Lock();
1289 	}
1290 err1:
1291 	addressSpace->DeleteArea(area, allocationFlags);
1292 	return status;
1293 }
1294 
1295 
1296 /*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1297 	  locker1, locker2).
1298 */
1299 template<typename LockerType1, typename LockerType2>
1300 static inline bool
wait_if_area_is_wired(VMArea * area,LockerType1 * locker1,LockerType2 * locker2)1301 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1302 {
1303 	area->cache->AssertLocked();
1304 
1305 	VMAreaUnwiredWaiter waiter;
1306 	if (!area->AddWaiterIfWired(&waiter))
1307 		return false;
1308 
1309 	// unlock everything and wait
1310 	if (locker1 != NULL)
1311 		locker1->Unlock();
1312 	if (locker2 != NULL)
1313 		locker2->Unlock();
1314 
1315 	waiter.waitEntry.Wait();
1316 
1317 	return true;
1318 }
1319 
1320 
1321 /*!	Checks whether the given area has any wired ranges intersecting with the
1322 	specified range and waits, if so.
1323 
1324 	When it has to wait, the function calls \c Unlock() on both \a locker1
1325 	and \a locker2, if given.
1326 	The area's top cache must be locked and must be unlocked as a side effect
1327 	of calling \c Unlock() on either \a locker1 or \a locker2.
1328 
1329 	If the function does not have to wait it does not modify or unlock any
1330 	object.
1331 
1332 	\param area The area to be checked.
1333 	\param base The base address of the range to check.
1334 	\param size The size of the address range to check.
1335 	\param locker1 An object to be unlocked when before starting to wait (may
1336 		be \c NULL).
1337 	\param locker2 An object to be unlocked when before starting to wait (may
1338 		be \c NULL).
1339 	\return \c true, if the function had to wait, \c false otherwise.
1340 */
1341 template<typename LockerType1, typename LockerType2>
1342 static inline bool
wait_if_area_range_is_wired(VMArea * area,addr_t base,size_t size,LockerType1 * locker1,LockerType2 * locker2)1343 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1344 	LockerType1* locker1, LockerType2* locker2)
1345 {
1346 	area->cache->AssertLocked();
1347 
1348 	VMAreaUnwiredWaiter waiter;
1349 	if (!area->AddWaiterIfWired(&waiter, base, size))
1350 		return false;
1351 
1352 	// unlock everything and wait
1353 	if (locker1 != NULL)
1354 		locker1->Unlock();
1355 	if (locker2 != NULL)
1356 		locker2->Unlock();
1357 
1358 	waiter.waitEntry.Wait();
1359 
1360 	return true;
1361 }
1362 
1363 
1364 /*!	Checks whether the given address space has any wired ranges intersecting
1365 	with the specified range and waits, if so.
1366 
1367 	Similar to wait_if_area_range_is_wired(), with the following differences:
1368 	- All areas intersecting with the range are checked (respectively all until
1369 	  one is found that contains a wired range intersecting with the given
1370 	  range).
1371 	- The given address space must at least be read-locked and must be unlocked
1372 	  when \c Unlock() is called on \a locker.
1373 	- None of the areas' caches are allowed to be locked.
1374 */
1375 template<typename LockerType>
1376 static inline bool
wait_if_address_range_is_wired(VMAddressSpace * addressSpace,addr_t base,size_t size,LockerType * locker)1377 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1378 	size_t size, LockerType* locker)
1379 {
1380 	for (VMAddressSpace::AreaRangeIterator it
1381 		= addressSpace->GetAreaRangeIterator(base, size);
1382 			VMArea* area = it.Next();) {
1383 
1384 		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1385 
1386 		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1387 			return true;
1388 	}
1389 
1390 	return false;
1391 }
1392 
1393 
1394 /*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1395 	It must be called in a situation where the kernel address space may be
1396 	locked.
1397 */
1398 status_t
vm_prepare_kernel_area_debug_protection(area_id id,void ** cookie)1399 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1400 {
1401 	AddressSpaceReadLocker locker;
1402 	VMArea* area;
1403 	status_t status = locker.SetFromArea(id, area);
1404 	if (status != B_OK)
1405 		return status;
1406 
1407 	if (area->page_protections == NULL) {
1408 		status = allocate_area_page_protections(area);
1409 		if (status != B_OK)
1410 			return status;
1411 	}
1412 
1413 	*cookie = (void*)area;
1414 	return B_OK;
1415 }
1416 
1417 
1418 /*!	This is a debug helper function that can only be used with very specific
1419 	use cases.
1420 	Sets protection for the given address range to the protection specified.
1421 	If \a protection is 0 then the involved pages will be marked non-present
1422 	in the translation map to cause a fault on access. The pages aren't
1423 	actually unmapped however so that they can be marked present again with
1424 	additional calls to this function. For this to work the area must be
1425 	fully locked in memory so that the pages aren't otherwise touched.
1426 	This function does not lock the kernel address space and needs to be
1427 	supplied with a \a cookie retrieved from a successful call to
1428 	vm_prepare_kernel_area_debug_protection().
1429 */
1430 status_t
vm_set_kernel_area_debug_protection(void * cookie,void * _address,size_t size,uint32 protection)1431 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1432 	uint32 protection)
1433 {
1434 	// check address range
1435 	addr_t address = (addr_t)_address;
1436 	size = PAGE_ALIGN(size);
1437 
1438 	if ((address % B_PAGE_SIZE) != 0
1439 		|| (addr_t)address + size < (addr_t)address
1440 		|| !IS_KERNEL_ADDRESS(address)
1441 		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1442 		return B_BAD_VALUE;
1443 	}
1444 
1445 	// Translate the kernel protection to user protection as we only store that.
1446 	if ((protection & B_KERNEL_READ_AREA) != 0)
1447 		protection |= B_READ_AREA;
1448 	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1449 		protection |= B_WRITE_AREA;
1450 
1451 	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1452 	VMTranslationMap* map = addressSpace->TranslationMap();
1453 	VMArea* area = (VMArea*)cookie;
1454 
1455 	addr_t offset = address - area->Base();
1456 	if (area->Size() - offset < size) {
1457 		panic("protect range not fully within supplied area");
1458 		return B_BAD_VALUE;
1459 	}
1460 
1461 	if (area->page_protections == NULL) {
1462 		panic("area has no page protections");
1463 		return B_BAD_VALUE;
1464 	}
1465 
1466 	// Invalidate the mapping entries so any access to them will fault or
1467 	// restore the mapping entries unchanged so that lookup will success again.
1468 	map->Lock();
1469 	map->DebugMarkRangePresent(address, address + size, protection != 0);
1470 	map->Unlock();
1471 
1472 	// And set the proper page protections so that the fault case will actually
1473 	// fail and not simply try to map a new page.
1474 	for (addr_t pageAddress = address; pageAddress < address + size;
1475 			pageAddress += B_PAGE_SIZE) {
1476 		set_area_page_protection(area, pageAddress, protection);
1477 	}
1478 
1479 	return B_OK;
1480 }
1481 
1482 
1483 status_t
vm_block_address_range(const char * name,void * address,addr_t size)1484 vm_block_address_range(const char* name, void* address, addr_t size)
1485 {
1486 	if (!arch_vm_supports_protection(0))
1487 		return B_NOT_SUPPORTED;
1488 
1489 	AddressSpaceWriteLocker locker;
1490 	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1491 	if (status != B_OK)
1492 		return status;
1493 
1494 	VMAddressSpace* addressSpace = locker.AddressSpace();
1495 
1496 	// create an anonymous cache
1497 	VMCache* cache;
1498 	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1499 		VM_PRIORITY_SYSTEM);
1500 	if (status != B_OK)
1501 		return status;
1502 
1503 	cache->temporary = 1;
1504 	cache->virtual_end = size;
1505 	cache->Lock();
1506 
1507 	VMArea* area;
1508 	virtual_address_restrictions addressRestrictions = {};
1509 	addressRestrictions.address = address;
1510 	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1511 	status = map_backing_store(addressSpace, cache, 0, name, size,
1512 		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1513 		true, &area, NULL);
1514 	if (status != B_OK) {
1515 		cache->ReleaseRefAndUnlock();
1516 		return status;
1517 	}
1518 
1519 	cache->Unlock();
1520 	area->cache_type = CACHE_TYPE_RAM;
1521 	return area->id;
1522 }
1523 
1524 
1525 status_t
vm_unreserve_address_range(team_id team,void * address,addr_t size)1526 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1527 {
1528 	AddressSpaceWriteLocker locker(team);
1529 	if (!locker.IsLocked())
1530 		return B_BAD_TEAM_ID;
1531 
1532 	VMAddressSpace* addressSpace = locker.AddressSpace();
1533 	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1534 		addressSpace == VMAddressSpace::Kernel()
1535 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1536 }
1537 
1538 
1539 status_t
vm_reserve_address_range(team_id team,void ** _address,uint32 addressSpec,addr_t size,uint32 flags)1540 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1541 	addr_t size, uint32 flags)
1542 {
1543 	if (size == 0)
1544 		return B_BAD_VALUE;
1545 
1546 	AddressSpaceWriteLocker locker(team);
1547 	if (!locker.IsLocked())
1548 		return B_BAD_TEAM_ID;
1549 
1550 	virtual_address_restrictions addressRestrictions = {};
1551 	addressRestrictions.address = *_address;
1552 	addressRestrictions.address_specification = addressSpec;
1553 	VMAddressSpace* addressSpace = locker.AddressSpace();
1554 	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1555 		addressSpace == VMAddressSpace::Kernel()
1556 			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1557 		_address);
1558 }
1559 
1560 
1561 area_id
vm_create_anonymous_area(team_id team,const char * name,addr_t size,uint32 wiring,uint32 protection,uint32 flags,addr_t guardSize,const virtual_address_restrictions * virtualAddressRestrictions,const physical_address_restrictions * physicalAddressRestrictions,bool kernel,void ** _address)1562 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1563 	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1564 	const virtual_address_restrictions* virtualAddressRestrictions,
1565 	const physical_address_restrictions* physicalAddressRestrictions,
1566 	bool kernel, void** _address)
1567 {
1568 	VMArea* area;
1569 	VMCache* cache;
1570 	vm_page* page = NULL;
1571 	bool isStack = (protection & B_STACK_AREA) != 0;
1572 	page_num_t guardPages;
1573 	bool canOvercommit = false;
1574 	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1575 		? VM_PAGE_ALLOC_CLEAR : 0;
1576 
1577 	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1578 		team, name, size));
1579 
1580 	size = PAGE_ALIGN(size);
1581 	guardSize = PAGE_ALIGN(guardSize);
1582 	guardPages = guardSize / B_PAGE_SIZE;
1583 
1584 	if (size == 0 || size < guardSize)
1585 		return B_BAD_VALUE;
1586 	if (!arch_vm_supports_protection(protection))
1587 		return B_NOT_SUPPORTED;
1588 
1589 	if (team == B_CURRENT_TEAM)
1590 		team = VMAddressSpace::CurrentID();
1591 	if (team < 0)
1592 		return B_BAD_TEAM_ID;
1593 
1594 	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1595 		canOvercommit = true;
1596 
1597 #ifdef DEBUG_KERNEL_STACKS
1598 	if ((protection & B_KERNEL_STACK_AREA) != 0)
1599 		isStack = true;
1600 #endif
1601 
1602 	// check parameters
1603 	switch (virtualAddressRestrictions->address_specification) {
1604 		case B_ANY_ADDRESS:
1605 		case B_EXACT_ADDRESS:
1606 		case B_BASE_ADDRESS:
1607 		case B_ANY_KERNEL_ADDRESS:
1608 		case B_ANY_KERNEL_BLOCK_ADDRESS:
1609 		case B_RANDOMIZED_ANY_ADDRESS:
1610 		case B_RANDOMIZED_BASE_ADDRESS:
1611 			break;
1612 
1613 		default:
1614 			return B_BAD_VALUE;
1615 	}
1616 
1617 	// If low or high physical address restrictions are given, we force
1618 	// B_CONTIGUOUS wiring, since only then we'll use
1619 	// vm_page_allocate_page_run() which deals with those restrictions.
1620 	if (physicalAddressRestrictions->low_address != 0
1621 		|| physicalAddressRestrictions->high_address != 0) {
1622 		wiring = B_CONTIGUOUS;
1623 	}
1624 
1625 	physical_address_restrictions stackPhysicalRestrictions;
1626 	bool doReserveMemory = false;
1627 	switch (wiring) {
1628 		case B_NO_LOCK:
1629 			break;
1630 		case B_FULL_LOCK:
1631 		case B_LAZY_LOCK:
1632 		case B_CONTIGUOUS:
1633 			doReserveMemory = true;
1634 			break;
1635 		case B_ALREADY_WIRED:
1636 			break;
1637 		case B_LOMEM:
1638 			stackPhysicalRestrictions = *physicalAddressRestrictions;
1639 			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1640 			physicalAddressRestrictions = &stackPhysicalRestrictions;
1641 			wiring = B_CONTIGUOUS;
1642 			doReserveMemory = true;
1643 			break;
1644 		case B_32_BIT_FULL_LOCK:
1645 			if (B_HAIKU_PHYSICAL_BITS <= 32
1646 				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1647 				wiring = B_FULL_LOCK;
1648 				doReserveMemory = true;
1649 				break;
1650 			}
1651 			// TODO: We don't really support this mode efficiently. Just fall
1652 			// through for now ...
1653 		case B_32_BIT_CONTIGUOUS:
1654 			#if B_HAIKU_PHYSICAL_BITS > 32
1655 				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1656 					stackPhysicalRestrictions = *physicalAddressRestrictions;
1657 					stackPhysicalRestrictions.high_address
1658 						= (phys_addr_t)1 << 32;
1659 					physicalAddressRestrictions = &stackPhysicalRestrictions;
1660 				}
1661 			#endif
1662 			wiring = B_CONTIGUOUS;
1663 			doReserveMemory = true;
1664 			break;
1665 		default:
1666 			return B_BAD_VALUE;
1667 	}
1668 
1669 	// Optimization: For a single-page contiguous allocation without low/high
1670 	// memory restriction B_FULL_LOCK wiring suffices.
1671 	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1672 		&& physicalAddressRestrictions->low_address == 0
1673 		&& physicalAddressRestrictions->high_address == 0) {
1674 		wiring = B_FULL_LOCK;
1675 	}
1676 
1677 	// For full lock or contiguous areas we're also going to map the pages and
1678 	// thus need to reserve pages for the mapping backend upfront.
1679 	addr_t reservedMapPages = 0;
1680 	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1681 		AddressSpaceWriteLocker locker;
1682 		status_t status = locker.SetTo(team);
1683 		if (status != B_OK)
1684 			return status;
1685 
1686 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1687 		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1688 	}
1689 
1690 	int priority;
1691 	if (team != VMAddressSpace::KernelID())
1692 		priority = VM_PRIORITY_USER;
1693 	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1694 		priority = VM_PRIORITY_VIP;
1695 	else
1696 		priority = VM_PRIORITY_SYSTEM;
1697 
1698 	// Reserve memory before acquiring the address space lock. This reduces the
1699 	// chances of failure, since while holding the write lock to the address
1700 	// space (if it is the kernel address space that is), the low memory handler
1701 	// won't be able to free anything for us.
1702 	addr_t reservedMemory = 0;
1703 	if (doReserveMemory) {
1704 		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1705 		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1706 			return B_NO_MEMORY;
1707 		reservedMemory = size;
1708 		// TODO: We don't reserve the memory for the pages for the page
1709 		// directories/tables. We actually need to do since we currently don't
1710 		// reclaim them (and probably can't reclaim all of them anyway). Thus
1711 		// there are actually less physical pages than there should be, which
1712 		// can get the VM into trouble in low memory situations.
1713 	}
1714 
1715 	AddressSpaceWriteLocker locker;
1716 	VMAddressSpace* addressSpace;
1717 	status_t status;
1718 
1719 	// For full lock areas reserve the pages before locking the address
1720 	// space. E.g. block caches can't release their memory while we hold the
1721 	// address space lock.
1722 	page_num_t reservedPages = reservedMapPages;
1723 	if (wiring == B_FULL_LOCK)
1724 		reservedPages += size / B_PAGE_SIZE;
1725 
1726 	vm_page_reservation reservation;
1727 	if (reservedPages > 0) {
1728 		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1729 			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1730 					priority)) {
1731 				reservedPages = 0;
1732 				status = B_WOULD_BLOCK;
1733 				goto err0;
1734 			}
1735 		} else
1736 			vm_page_reserve_pages(&reservation, reservedPages, priority);
1737 	}
1738 
1739 	if (wiring == B_CONTIGUOUS) {
1740 		// we try to allocate the page run here upfront as this may easily
1741 		// fail for obvious reasons
1742 		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1743 			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1744 		if (page == NULL) {
1745 			status = B_NO_MEMORY;
1746 			goto err0;
1747 		}
1748 	}
1749 
1750 	// Lock the address space and, if B_EXACT_ADDRESS and
1751 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1752 	// is not wired.
1753 	do {
1754 		status = locker.SetTo(team);
1755 		if (status != B_OK)
1756 			goto err1;
1757 
1758 		addressSpace = locker.AddressSpace();
1759 	} while (virtualAddressRestrictions->address_specification
1760 			== B_EXACT_ADDRESS
1761 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1762 		&& wait_if_address_range_is_wired(addressSpace,
1763 			(addr_t)virtualAddressRestrictions->address, size, &locker));
1764 
1765 	// create an anonymous cache
1766 	// if it's a stack, make sure that two pages are available at least
1767 	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1768 		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1769 		wiring == B_NO_LOCK, priority);
1770 	if (status != B_OK)
1771 		goto err1;
1772 
1773 	cache->temporary = 1;
1774 	cache->virtual_end = size;
1775 	cache->committed_size = reservedMemory;
1776 		// TODO: This should be done via a method.
1777 	reservedMemory = 0;
1778 
1779 	cache->Lock();
1780 
1781 	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1782 		protection, 0, REGION_NO_PRIVATE_MAP, flags,
1783 		virtualAddressRestrictions, kernel, &area, _address);
1784 
1785 	if (status != B_OK) {
1786 		cache->ReleaseRefAndUnlock();
1787 		goto err1;
1788 	}
1789 
1790 	locker.DegradeToReadLock();
1791 
1792 	switch (wiring) {
1793 		case B_NO_LOCK:
1794 		case B_LAZY_LOCK:
1795 			// do nothing - the pages are mapped in as needed
1796 			break;
1797 
1798 		case B_FULL_LOCK:
1799 		{
1800 			// Allocate and map all pages for this area
1801 
1802 			off_t offset = 0;
1803 			for (addr_t address = area->Base();
1804 					address < area->Base() + (area->Size() - 1);
1805 					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1806 #ifdef DEBUG_KERNEL_STACKS
1807 #	ifdef STACK_GROWS_DOWNWARDS
1808 				if (isStack && address < area->Base()
1809 						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1810 #	else
1811 				if (isStack && address >= area->Base() + area->Size()
1812 						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1813 #	endif
1814 					continue;
1815 #endif
1816 				vm_page* page = vm_page_allocate_page(&reservation,
1817 					PAGE_STATE_WIRED | pageAllocFlags);
1818 				cache->InsertPage(page, offset);
1819 				map_page(area, page, address, protection, &reservation);
1820 
1821 				DEBUG_PAGE_ACCESS_END(page);
1822 			}
1823 
1824 			break;
1825 		}
1826 
1827 		case B_ALREADY_WIRED:
1828 		{
1829 			// The pages should already be mapped. This is only really useful
1830 			// during boot time. Find the appropriate vm_page objects and stick
1831 			// them in the cache object.
1832 			VMTranslationMap* map = addressSpace->TranslationMap();
1833 			off_t offset = 0;
1834 
1835 			if (!gKernelStartup)
1836 				panic("ALREADY_WIRED flag used outside kernel startup\n");
1837 
1838 			map->Lock();
1839 
1840 			for (addr_t virtualAddress = area->Base();
1841 					virtualAddress < area->Base() + (area->Size() - 1);
1842 					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1843 				phys_addr_t physicalAddress;
1844 				uint32 flags;
1845 				status = map->Query(virtualAddress, &physicalAddress, &flags);
1846 				if (status < B_OK) {
1847 					panic("looking up mapping failed for va 0x%lx\n",
1848 						virtualAddress);
1849 				}
1850 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1851 				if (page == NULL) {
1852 					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1853 						"\n", physicalAddress);
1854 				}
1855 
1856 				DEBUG_PAGE_ACCESS_START(page);
1857 
1858 				cache->InsertPage(page, offset);
1859 				increment_page_wired_count(page);
1860 				vm_page_set_state(page, PAGE_STATE_WIRED);
1861 				page->busy = false;
1862 
1863 				DEBUG_PAGE_ACCESS_END(page);
1864 			}
1865 
1866 			map->Unlock();
1867 			break;
1868 		}
1869 
1870 		case B_CONTIGUOUS:
1871 		{
1872 			// We have already allocated our continuous pages run, so we can now
1873 			// just map them in the address space
1874 			VMTranslationMap* map = addressSpace->TranslationMap();
1875 			phys_addr_t physicalAddress
1876 				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1877 			addr_t virtualAddress = area->Base();
1878 			off_t offset = 0;
1879 
1880 			map->Lock();
1881 
1882 			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1883 					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1884 					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1885 				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1886 				if (page == NULL)
1887 					panic("couldn't lookup physical page just allocated\n");
1888 
1889 				status = map->Map(virtualAddress, physicalAddress, protection,
1890 					area->MemoryType(), &reservation);
1891 				if (status < B_OK)
1892 					panic("couldn't map physical page in page run\n");
1893 
1894 				cache->InsertPage(page, offset);
1895 				increment_page_wired_count(page);
1896 
1897 				DEBUG_PAGE_ACCESS_END(page);
1898 			}
1899 
1900 			map->Unlock();
1901 			break;
1902 		}
1903 
1904 		default:
1905 			break;
1906 	}
1907 
1908 	cache->Unlock();
1909 
1910 	if (reservedPages > 0)
1911 		vm_page_unreserve_pages(&reservation);
1912 
1913 	TRACE(("vm_create_anonymous_area: done\n"));
1914 
1915 	area->cache_type = CACHE_TYPE_RAM;
1916 	return area->id;
1917 
1918 err1:
1919 	if (wiring == B_CONTIGUOUS) {
1920 		// we had reserved the area space upfront...
1921 		phys_addr_t pageNumber = page->physical_page_number;
1922 		int32 i;
1923 		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1924 			page = vm_lookup_page(pageNumber);
1925 			if (page == NULL)
1926 				panic("couldn't lookup physical page just allocated\n");
1927 
1928 			vm_page_set_state(page, PAGE_STATE_FREE);
1929 		}
1930 	}
1931 
1932 err0:
1933 	if (reservedPages > 0)
1934 		vm_page_unreserve_pages(&reservation);
1935 	if (reservedMemory > 0)
1936 		vm_unreserve_memory(reservedMemory);
1937 
1938 	return status;
1939 }
1940 
1941 
1942 area_id
vm_map_physical_memory(team_id team,const char * name,void ** _address,uint32 addressSpec,addr_t size,uint32 protection,phys_addr_t physicalAddress,bool alreadyWired)1943 vm_map_physical_memory(team_id team, const char* name, void** _address,
1944 	uint32 addressSpec, addr_t size, uint32 protection,
1945 	phys_addr_t physicalAddress, bool alreadyWired)
1946 {
1947 	VMArea* area;
1948 	VMCache* cache;
1949 	addr_t mapOffset;
1950 
1951 	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1952 		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1953 		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1954 		addressSpec, size, protection, physicalAddress));
1955 
1956 	if (!arch_vm_supports_protection(protection))
1957 		return B_NOT_SUPPORTED;
1958 
1959 	AddressSpaceWriteLocker locker(team);
1960 	if (!locker.IsLocked())
1961 		return B_BAD_TEAM_ID;
1962 
1963 	// if the physical address is somewhat inside a page,
1964 	// move the actual area down to align on a page boundary
1965 	mapOffset = physicalAddress % B_PAGE_SIZE;
1966 	size += mapOffset;
1967 	physicalAddress -= mapOffset;
1968 
1969 	size = PAGE_ALIGN(size);
1970 
1971 	// create a device cache
1972 	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1973 	if (status != B_OK)
1974 		return status;
1975 
1976 	cache->virtual_end = size;
1977 
1978 	cache->Lock();
1979 
1980 	virtual_address_restrictions addressRestrictions = {};
1981 	addressRestrictions.address = *_address;
1982 	addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK;
1983 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1984 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, CREATE_AREA_DONT_COMMIT_MEMORY,
1985 		&addressRestrictions, true, &area, _address);
1986 
1987 	if (status < B_OK)
1988 		cache->ReleaseRefLocked();
1989 
1990 	cache->Unlock();
1991 
1992 	if (status == B_OK) {
1993 		// Set requested memory type -- default to uncached, but allow
1994 		// that to be overridden by ranges that may already exist.
1995 		uint32 memoryType = addressSpec & B_MEMORY_TYPE_MASK;
1996 		const bool weak = (memoryType == 0);
1997 		if (weak)
1998 			memoryType = B_UNCACHED_MEMORY;
1999 
2000 		status = arch_vm_set_memory_type(area, physicalAddress, memoryType,
2001 			weak ? &memoryType : NULL);
2002 
2003 		area->SetMemoryType(memoryType);
2004 
2005 		if (status != B_OK)
2006 			delete_area(locker.AddressSpace(), area, false);
2007 	}
2008 
2009 	if (status != B_OK)
2010 		return status;
2011 
2012 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2013 
2014 	if (alreadyWired) {
2015 		// The area is already mapped, but possibly not with the right
2016 		// memory type.
2017 		map->Lock();
2018 		map->ProtectArea(area, area->protection);
2019 		map->Unlock();
2020 	} else {
2021 		// Map the area completely.
2022 
2023 		// reserve pages needed for the mapping
2024 		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2025 			area->Base() + (size - 1));
2026 		vm_page_reservation reservation;
2027 		vm_page_reserve_pages(&reservation, reservePages,
2028 			team == VMAddressSpace::KernelID()
2029 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2030 
2031 		map->Lock();
2032 
2033 		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2034 			map->Map(area->Base() + offset, physicalAddress + offset,
2035 				protection, area->MemoryType(), &reservation);
2036 		}
2037 
2038 		map->Unlock();
2039 
2040 		vm_page_unreserve_pages(&reservation);
2041 	}
2042 
2043 	// modify the pointer returned to be offset back into the new area
2044 	// the same way the physical address in was offset
2045 	*_address = (void*)((addr_t)*_address + mapOffset);
2046 
2047 	area->cache_type = CACHE_TYPE_DEVICE;
2048 	return area->id;
2049 }
2050 
2051 
2052 /*!	Don't use!
2053 	TODO: This function was introduced to map physical page vecs to
2054 	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
2055 	use a device cache and does not track vm_page::wired_count!
2056 */
2057 area_id
vm_map_physical_memory_vecs(team_id team,const char * name,void ** _address,uint32 addressSpec,addr_t * _size,uint32 protection,struct generic_io_vec * vecs,uint32 vecCount)2058 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
2059 	uint32 addressSpec, addr_t* _size, uint32 protection,
2060 	struct generic_io_vec* vecs, uint32 vecCount)
2061 {
2062 	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
2063 		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
2064 		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
2065 		addressSpec, _size, protection, vecs, vecCount));
2066 
2067 	if (!arch_vm_supports_protection(protection)
2068 		|| (addressSpec & B_MEMORY_TYPE_MASK) != 0) {
2069 		return B_NOT_SUPPORTED;
2070 	}
2071 
2072 	AddressSpaceWriteLocker locker(team);
2073 	if (!locker.IsLocked())
2074 		return B_BAD_TEAM_ID;
2075 
2076 	if (vecCount == 0)
2077 		return B_BAD_VALUE;
2078 
2079 	addr_t size = 0;
2080 	for (uint32 i = 0; i < vecCount; i++) {
2081 		if (vecs[i].base % B_PAGE_SIZE != 0
2082 			|| vecs[i].length % B_PAGE_SIZE != 0) {
2083 			return B_BAD_VALUE;
2084 		}
2085 
2086 		size += vecs[i].length;
2087 	}
2088 
2089 	// create a device cache
2090 	VMCache* cache;
2091 	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
2092 	if (result != B_OK)
2093 		return result;
2094 
2095 	cache->virtual_end = size;
2096 
2097 	cache->Lock();
2098 
2099 	VMArea* area;
2100 	virtual_address_restrictions addressRestrictions = {};
2101 	addressRestrictions.address = *_address;
2102 	addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK;
2103 	result = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2104 		B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, CREATE_AREA_DONT_COMMIT_MEMORY,
2105 		&addressRestrictions, true, &area, _address);
2106 
2107 	if (result != B_OK)
2108 		cache->ReleaseRefLocked();
2109 
2110 	cache->Unlock();
2111 
2112 	if (result != B_OK)
2113 		return result;
2114 
2115 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2116 	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2117 		area->Base() + (size - 1));
2118 
2119 	vm_page_reservation reservation;
2120 	vm_page_reserve_pages(&reservation, reservePages,
2121 			team == VMAddressSpace::KernelID()
2122 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2123 	map->Lock();
2124 
2125 	uint32 vecIndex = 0;
2126 	size_t vecOffset = 0;
2127 	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2128 		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2129 			vecOffset = 0;
2130 			vecIndex++;
2131 		}
2132 
2133 		if (vecIndex >= vecCount)
2134 			break;
2135 
2136 		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2137 			protection, area->MemoryType(), &reservation);
2138 
2139 		vecOffset += B_PAGE_SIZE;
2140 	}
2141 
2142 	map->Unlock();
2143 	vm_page_unreserve_pages(&reservation);
2144 
2145 	if (_size != NULL)
2146 		*_size = size;
2147 
2148 	area->cache_type = CACHE_TYPE_DEVICE;
2149 	return area->id;
2150 }
2151 
2152 
2153 area_id
vm_create_null_area(team_id team,const char * name,void ** address,uint32 addressSpec,addr_t size,uint32 flags)2154 vm_create_null_area(team_id team, const char* name, void** address,
2155 	uint32 addressSpec, addr_t size, uint32 flags)
2156 {
2157 	size = PAGE_ALIGN(size);
2158 
2159 	// Lock the address space and, if B_EXACT_ADDRESS and
2160 	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2161 	// is not wired.
2162 	AddressSpaceWriteLocker locker;
2163 	do {
2164 		if (locker.SetTo(team) != B_OK)
2165 			return B_BAD_TEAM_ID;
2166 	} while (addressSpec == B_EXACT_ADDRESS
2167 		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2168 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2169 			(addr_t)*address, size, &locker));
2170 
2171 	// create a null cache
2172 	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2173 		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2174 	VMCache* cache;
2175 	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2176 	if (status != B_OK)
2177 		return status;
2178 
2179 	cache->temporary = 1;
2180 	cache->virtual_end = size;
2181 
2182 	cache->Lock();
2183 
2184 	VMArea* area;
2185 	virtual_address_restrictions addressRestrictions = {};
2186 	addressRestrictions.address = *address;
2187 	addressRestrictions.address_specification = addressSpec;
2188 	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2189 		B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2190 		REGION_NO_PRIVATE_MAP, flags | CREATE_AREA_DONT_COMMIT_MEMORY,
2191 		&addressRestrictions, true, &area, address);
2192 
2193 	if (status < B_OK) {
2194 		cache->ReleaseRefAndUnlock();
2195 		return status;
2196 	}
2197 
2198 	cache->Unlock();
2199 
2200 	area->cache_type = CACHE_TYPE_NULL;
2201 	return area->id;
2202 }
2203 
2204 
2205 /*!	Creates the vnode cache for the specified \a vnode.
2206 	The vnode has to be marked busy when calling this function.
2207 */
2208 status_t
vm_create_vnode_cache(struct vnode * vnode,struct VMCache ** cache)2209 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2210 {
2211 	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2212 }
2213 
2214 
2215 /*!	\a cache must be locked. The area's address space must be read-locked.
2216 */
2217 static void
pre_map_area_pages(VMArea * area,VMCache * cache,vm_page_reservation * reservation,int32 maxCount)2218 pre_map_area_pages(VMArea* area, VMCache* cache,
2219 	vm_page_reservation* reservation, int32 maxCount)
2220 {
2221 	addr_t baseAddress = area->Base();
2222 	addr_t cacheOffset = area->cache_offset;
2223 	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2224 	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2225 
2226 	VMCachePagesTree::Iterator it = cache->pages.GetIterator(firstPage, true, true);
2227 	vm_page* page;
2228 	while ((page = it.Next()) != NULL && maxCount > 0) {
2229 		if (page->cache_offset >= endPage)
2230 			break;
2231 
2232 		// skip busy and inactive pages
2233 		if (page->busy || (page->usage_count == 0 && !page->accessed))
2234 			continue;
2235 
2236 		DEBUG_PAGE_ACCESS_START(page);
2237 		map_page(area, page,
2238 			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2239 			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2240 		maxCount--;
2241 		DEBUG_PAGE_ACCESS_END(page);
2242 	}
2243 }
2244 
2245 
2246 /*!	Will map the file specified by \a fd to an area in memory.
2247 	The file will be mirrored beginning at the specified \a offset. The
2248 	\a offset and \a size arguments have to be page aligned.
2249 */
2250 static area_id
_vm_map_file(team_id team,const char * name,void ** _address,uint32 addressSpec,size_t size,uint32 protection,uint32 mapping,bool unmapAddressRange,int fd,off_t offset,bool kernel)2251 _vm_map_file(team_id team, const char* name, void** _address,
2252 	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2253 	bool unmapAddressRange, int fd, off_t offset, bool kernel)
2254 {
2255 	// TODO: for binary files, we want to make sure that they get the
2256 	//	copy of a file at a given time, ie. later changes should not
2257 	//	make it into the mapped copy -- this will need quite some changes
2258 	//	to be done in a nice way
2259 	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2260 		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
2261 
2262 	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2263 	size = PAGE_ALIGN(size);
2264 
2265 	if (mapping == REGION_NO_PRIVATE_MAP)
2266 		protection |= B_SHARED_AREA;
2267 	if (addressSpec != B_EXACT_ADDRESS)
2268 		unmapAddressRange = false;
2269 
2270 	uint32 mappingFlags = 0;
2271 	if (unmapAddressRange)
2272 		mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2273 
2274 	if (fd < 0) {
2275 		virtual_address_restrictions virtualRestrictions = {};
2276 		virtualRestrictions.address = *_address;
2277 		virtualRestrictions.address_specification = addressSpec;
2278 		physical_address_restrictions physicalRestrictions = {};
2279 		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2280 			mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2281 			_address);
2282 	}
2283 
2284 	// get the open flags of the FD
2285 	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2286 	if (descriptor == NULL)
2287 		return EBADF;
2288 	int32 openMode = descriptor->open_mode;
2289 	put_fd(descriptor);
2290 
2291 	// The FD must open for reading at any rate. For shared mapping with write
2292 	// access, additionally the FD must be open for writing.
2293 	if ((openMode & O_ACCMODE) == O_WRONLY
2294 		|| (mapping == REGION_NO_PRIVATE_MAP
2295 			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2296 			&& (openMode & O_ACCMODE) == O_RDONLY)) {
2297 		return EACCES;
2298 	}
2299 
2300 	uint32 protectionMax = 0;
2301 	if (mapping == REGION_NO_PRIVATE_MAP) {
2302 		if ((openMode & O_ACCMODE) == O_RDWR)
2303 			protectionMax = protection | B_USER_PROTECTION;
2304 		else
2305 			protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2306 	} else if (mapping == REGION_PRIVATE_MAP) {
2307 		// For privately mapped read-only regions, skip committing memory.
2308 		// (If protections are changed later on, memory will be committed then.)
2309 		if ((protection & B_WRITE_AREA) == 0)
2310 			mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2311 	}
2312 
2313 	// get the vnode for the object, this also grabs a ref to it
2314 	struct vnode* vnode = NULL;
2315 	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2316 	if (status < B_OK)
2317 		return status;
2318 	VnodePutter vnodePutter(vnode);
2319 
2320 	// If we're going to pre-map pages, we need to reserve the pages needed by
2321 	// the mapping backend upfront.
2322 	page_num_t reservedPreMapPages = 0;
2323 	vm_page_reservation reservation;
2324 	if ((protection & B_READ_AREA) != 0) {
2325 		AddressSpaceWriteLocker locker;
2326 		status = locker.SetTo(team);
2327 		if (status != B_OK)
2328 			return status;
2329 
2330 		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2331 		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2332 
2333 		locker.Unlock();
2334 
2335 		vm_page_reserve_pages(&reservation, reservedPreMapPages,
2336 			team == VMAddressSpace::KernelID()
2337 				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2338 	}
2339 
2340 	struct PageUnreserver {
2341 		PageUnreserver(vm_page_reservation* reservation)
2342 			:
2343 			fReservation(reservation)
2344 		{
2345 		}
2346 
2347 		~PageUnreserver()
2348 		{
2349 			if (fReservation != NULL)
2350 				vm_page_unreserve_pages(fReservation);
2351 		}
2352 
2353 		vm_page_reservation* fReservation;
2354 	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2355 
2356 	// Lock the address space and, if the specified address range shall be
2357 	// unmapped, ensure it is not wired.
2358 	AddressSpaceWriteLocker locker;
2359 	do {
2360 		if (locker.SetTo(team) != B_OK)
2361 			return B_BAD_TEAM_ID;
2362 	} while (unmapAddressRange
2363 		&& wait_if_address_range_is_wired(locker.AddressSpace(),
2364 			(addr_t)*_address, size, &locker));
2365 
2366 	// TODO: this only works for file systems that use the file cache
2367 	VMCache* cache;
2368 	status = vfs_get_vnode_cache(vnode, &cache, false);
2369 	if (status < B_OK)
2370 		return status;
2371 
2372 	cache->Lock();
2373 
2374 	VMArea* area;
2375 	virtual_address_restrictions addressRestrictions = {};
2376 	addressRestrictions.address = *_address;
2377 	addressRestrictions.address_specification = addressSpec;
2378 	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2379 		0, protection, protectionMax, mapping, mappingFlags,
2380 		&addressRestrictions, kernel, &area, _address);
2381 
2382 	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2383 		// map_backing_store() cannot know we no longer need the ref
2384 		cache->ReleaseRefLocked();
2385 	}
2386 
2387 	if (status == B_OK && (protection & B_READ_AREA) != 0) {
2388 		// Pre-map at most 10MB worth of pages.
2389 		pre_map_area_pages(area, cache, &reservation,
2390 			(10LL * 1024 * 1024) / B_PAGE_SIZE);
2391 	}
2392 
2393 	cache->Unlock();
2394 
2395 	if (status == B_OK) {
2396 		// TODO: this probably deserves a smarter solution, e.g. probably
2397 		// trigger prefetch somewhere else.
2398 
2399 		// Prefetch at most 10MB starting from "offset", but only if the cache
2400 		// doesn't already contain more pages than the prefetch size.
2401 		const size_t prefetch = min_c(size, 10LL * 1024 * 1024);
2402 		if (cache->page_count < (prefetch / B_PAGE_SIZE))
2403 			cache_prefetch_vnode(vnode, offset, prefetch);
2404 	}
2405 
2406 	if (status != B_OK)
2407 		return status;
2408 
2409 	area->cache_type = CACHE_TYPE_VNODE;
2410 	return area->id;
2411 }
2412 
2413 
2414 area_id
vm_map_file(team_id aid,const char * name,void ** address,uint32 addressSpec,addr_t size,uint32 protection,uint32 mapping,bool unmapAddressRange,int fd,off_t offset)2415 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2416 	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2417 	int fd, off_t offset)
2418 {
2419 	if (!arch_vm_supports_protection(protection))
2420 		return B_NOT_SUPPORTED;
2421 
2422 	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2423 		mapping, unmapAddressRange, fd, offset, true);
2424 }
2425 
2426 
2427 VMCache*
vm_area_get_locked_cache(VMArea * area)2428 vm_area_get_locked_cache(VMArea* area)
2429 {
2430 	rw_lock_read_lock(&sAreaCacheLock);
2431 
2432 	while (true) {
2433 		VMCache* cache = area->cache;
2434 
2435 		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2436 			// cache has been deleted
2437 			rw_lock_read_lock(&sAreaCacheLock);
2438 			continue;
2439 		}
2440 
2441 		rw_lock_read_lock(&sAreaCacheLock);
2442 
2443 		if (cache == area->cache) {
2444 			cache->AcquireRefLocked();
2445 			rw_lock_read_unlock(&sAreaCacheLock);
2446 			return cache;
2447 		}
2448 
2449 		// the cache changed in the meantime
2450 		cache->Unlock();
2451 	}
2452 }
2453 
2454 
2455 void
vm_area_put_locked_cache(VMCache * cache)2456 vm_area_put_locked_cache(VMCache* cache)
2457 {
2458 	cache->ReleaseRefAndUnlock();
2459 }
2460 
2461 
2462 area_id
vm_clone_area(team_id team,const char * name,void ** address,uint32 addressSpec,uint32 protection,uint32 mapping,area_id sourceID,bool kernel)2463 vm_clone_area(team_id team, const char* name, void** address,
2464 	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2465 	bool kernel)
2466 {
2467 	VMArea* newArea = NULL;
2468 	VMArea* sourceArea;
2469 
2470 	// Check whether the source area exists and is cloneable. If so, mark it
2471 	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2472 	{
2473 		AddressSpaceWriteLocker locker;
2474 		status_t status = locker.SetFromArea(sourceID, sourceArea);
2475 		if (status != B_OK)
2476 			return status;
2477 
2478 		if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2479 			return B_NOT_ALLOWED;
2480 
2481 		sourceArea->protection |= B_SHARED_AREA;
2482 		protection |= B_SHARED_AREA;
2483 	}
2484 
2485 	// Now lock both address spaces and actually do the cloning.
2486 
2487 	MultiAddressSpaceLocker locker;
2488 	VMAddressSpace* sourceAddressSpace;
2489 	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2490 	if (status != B_OK)
2491 		return status;
2492 
2493 	VMAddressSpace* targetAddressSpace;
2494 	status = locker.AddTeam(team, true, &targetAddressSpace);
2495 	if (status != B_OK)
2496 		return status;
2497 
2498 	status = locker.Lock();
2499 	if (status != B_OK)
2500 		return status;
2501 
2502 	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2503 	if (sourceArea == NULL)
2504 		return B_BAD_VALUE;
2505 
2506 	if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2507 		return B_NOT_ALLOWED;
2508 
2509 	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2510 
2511 	if (!kernel && sourceAddressSpace != targetAddressSpace
2512 		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2513 #if KDEBUG
2514 		Team* team = thread_get_current_thread()->team;
2515 		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2516 			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2517 #endif
2518 		status = B_NOT_ALLOWED;
2519 	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2520 		status = B_NOT_ALLOWED;
2521 	} else {
2522 		uint32 flags = 0;
2523 		if (mapping != REGION_PRIVATE_MAP)
2524 			flags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2525 
2526 		virtual_address_restrictions addressRestrictions = {};
2527 		addressRestrictions.address = *address;
2528 		addressRestrictions.address_specification = addressSpec;
2529 		status = map_backing_store(targetAddressSpace, cache,
2530 			sourceArea->cache_offset, name, sourceArea->Size(),
2531 			sourceArea->wiring, protection, sourceArea->protection_max,
2532 			mapping, flags, &addressRestrictions,
2533 			kernel, &newArea, address);
2534 	}
2535 	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2536 		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2537 		// to create a new cache, and has therefore already acquired a reference
2538 		// to the source cache - but otherwise it has no idea that we need
2539 		// one.
2540 		cache->AcquireRefLocked();
2541 	}
2542 	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2543 		// we need to map in everything at this point
2544 		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2545 			// we don't have actual pages to map but a physical area
2546 			VMTranslationMap* map
2547 				= sourceArea->address_space->TranslationMap();
2548 			map->Lock();
2549 
2550 			phys_addr_t physicalAddress;
2551 			uint32 oldProtection;
2552 			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2553 
2554 			map->Unlock();
2555 
2556 			map = targetAddressSpace->TranslationMap();
2557 			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2558 				newArea->Base() + (newArea->Size() - 1));
2559 
2560 			vm_page_reservation reservation;
2561 			vm_page_reserve_pages(&reservation, reservePages,
2562 				targetAddressSpace == VMAddressSpace::Kernel()
2563 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2564 			map->Lock();
2565 
2566 			for (addr_t offset = 0; offset < newArea->Size();
2567 					offset += B_PAGE_SIZE) {
2568 				map->Map(newArea->Base() + offset, physicalAddress + offset,
2569 					protection, newArea->MemoryType(), &reservation);
2570 			}
2571 
2572 			map->Unlock();
2573 			vm_page_unreserve_pages(&reservation);
2574 		} else {
2575 			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2576 			size_t reservePages = map->MaxPagesNeededToMap(
2577 				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2578 			vm_page_reservation reservation;
2579 			vm_page_reserve_pages(&reservation, reservePages,
2580 				targetAddressSpace == VMAddressSpace::Kernel()
2581 					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2582 
2583 			// map in all pages from source
2584 			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2585 					vm_page* page  = it.Next();) {
2586 				if (!page->busy) {
2587 					DEBUG_PAGE_ACCESS_START(page);
2588 					map_page(newArea, page,
2589 						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2590 							- newArea->cache_offset),
2591 						protection, &reservation);
2592 					DEBUG_PAGE_ACCESS_END(page);
2593 				}
2594 			}
2595 			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2596 			// ensuring that!
2597 
2598 			vm_page_unreserve_pages(&reservation);
2599 		}
2600 	}
2601 	if (status == B_OK)
2602 		newArea->cache_type = sourceArea->cache_type;
2603 
2604 	vm_area_put_locked_cache(cache);
2605 
2606 	if (status < B_OK)
2607 		return status;
2608 
2609 	return newArea->id;
2610 }
2611 
2612 
2613 /*!	Deletes the specified area of the given address space.
2614 
2615 	The address space must be write-locked.
2616 	The caller must ensure that the area does not have any wired ranges.
2617 
2618 	\param addressSpace The address space containing the area.
2619 	\param area The area to be deleted.
2620 	\param deletingAddressSpace \c true, if the address space is in the process
2621 		of being deleted.
2622 	\param alreadyRemoved \c true, if the area was already removed from the global
2623 		areas map (and thus had its ID deallocated.)
2624 */
2625 static void
delete_area(VMAddressSpace * addressSpace,VMArea * area,bool deletingAddressSpace,bool alreadyRemoved)2626 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2627 	bool deletingAddressSpace, bool alreadyRemoved)
2628 {
2629 	ASSERT(!area->IsWired());
2630 
2631 	if (area->id >= 0 && !alreadyRemoved)
2632 		VMAreas::Remove(area);
2633 
2634 	// At this point the area is removed from the global hash table, but
2635 	// still exists in the area list.
2636 
2637 	// Unmap the virtual address space the area occupied.
2638 	{
2639 		// We need to lock the complete cache chain.
2640 		VMCache* topCache = vm_area_get_locked_cache(area);
2641 		VMCacheChainLocker cacheChainLocker(topCache);
2642 		cacheChainLocker.LockAllSourceCaches();
2643 
2644 		// If the area's top cache is a temporary cache and the area is the only
2645 		// one referencing it (besides us currently holding a second reference),
2646 		// the unmapping code doesn't need to care about preserving the accessed
2647 		// and dirty flags of the top cache page mappings.
2648 		bool ignoreTopCachePageFlags
2649 			= topCache->temporary && topCache->RefCount() == 2;
2650 
2651 		area->address_space->TranslationMap()->UnmapArea(area,
2652 			deletingAddressSpace, ignoreTopCachePageFlags);
2653 	}
2654 
2655 	if (!area->cache->temporary)
2656 		area->cache->WriteModified();
2657 
2658 	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2659 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2660 
2661 	arch_vm_unset_memory_type(area);
2662 	addressSpace->RemoveArea(area, allocationFlags);
2663 	addressSpace->Put();
2664 
2665 	area->cache->RemoveArea(area);
2666 	area->cache->ReleaseRef();
2667 
2668 	addressSpace->DeleteArea(area, allocationFlags);
2669 }
2670 
2671 
2672 status_t
vm_delete_area(team_id team,area_id id,bool kernel)2673 vm_delete_area(team_id team, area_id id, bool kernel)
2674 {
2675 	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2676 		team, id));
2677 
2678 	// lock the address space and make sure the area isn't wired
2679 	AddressSpaceWriteLocker locker;
2680 	VMArea* area;
2681 	AreaCacheLocker cacheLocker;
2682 
2683 	do {
2684 		status_t status = locker.SetFromArea(team, id, area);
2685 		if (status != B_OK)
2686 			return status;
2687 
2688 		cacheLocker.SetTo(area);
2689 	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2690 
2691 	cacheLocker.Unlock();
2692 
2693 	if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2694 		return B_NOT_ALLOWED;
2695 
2696 	delete_area(locker.AddressSpace(), area, false);
2697 	return B_OK;
2698 }
2699 
2700 
2701 /*!	Creates a new cache on top of given cache, moves all areas from
2702 	the old cache to the new one, and changes the protection of all affected
2703 	areas' pages to read-only. If requested, wired pages are moved up to the
2704 	new cache and copies are added to the old cache in their place.
2705 	Preconditions:
2706 	- The given cache must be locked.
2707 	- All of the cache's areas' address spaces must be read locked.
2708 	- Either the cache must not have any wired ranges or a page reservation for
2709 	  all wired pages must be provided, so they can be copied.
2710 
2711 	\param lowerCache The cache on top of which a new cache shall be created.
2712 	\param wiredPagesReservation If \c NULL there must not be any wired pages
2713 		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2714 		has wired page. The wired pages are copied in this case.
2715 */
2716 static status_t
vm_copy_on_write_area(VMCache * lowerCache,vm_page_reservation * wiredPagesReservation)2717 vm_copy_on_write_area(VMCache* lowerCache,
2718 	vm_page_reservation* wiredPagesReservation)
2719 {
2720 	VMCache* upperCache;
2721 
2722 	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2723 
2724 	// We need to separate the cache from its areas. The cache goes one level
2725 	// deeper and we create a new cache inbetween.
2726 
2727 	// create an anonymous cache
2728 	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2729 		lowerCache->GuardSize() / B_PAGE_SIZE,
2730 		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2731 		VM_PRIORITY_USER);
2732 	if (status != B_OK)
2733 		return status;
2734 
2735 	upperCache->Lock();
2736 
2737 	upperCache->temporary = 1;
2738 	upperCache->virtual_base = lowerCache->virtual_base;
2739 	upperCache->virtual_end = lowerCache->virtual_end;
2740 
2741 	// transfer the lower cache areas to the upper cache
2742 	rw_lock_write_lock(&sAreaCacheLock);
2743 	upperCache->TransferAreas(lowerCache);
2744 	rw_lock_write_unlock(&sAreaCacheLock);
2745 
2746 	lowerCache->AddConsumer(upperCache);
2747 
2748 	// We now need to remap all pages from all of the cache's areas read-only,
2749 	// so that a copy will be created on next write access. If there are wired
2750 	// pages, we keep their protection, move them to the upper cache and create
2751 	// copies for the lower cache.
2752 	if (wiredPagesReservation != NULL) {
2753 		// We need to handle wired pages -- iterate through the cache's pages.
2754 		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2755 				vm_page* page = it.Next();) {
2756 			if (page->WiredCount() > 0) {
2757 				// allocate a new page and copy the wired one
2758 				vm_page* copiedPage = vm_page_allocate_page(
2759 					wiredPagesReservation, PAGE_STATE_ACTIVE);
2760 
2761 				vm_memcpy_physical_page(
2762 					copiedPage->physical_page_number * B_PAGE_SIZE,
2763 					page->physical_page_number * B_PAGE_SIZE);
2764 
2765 				// move the wired page to the upper cache (note: removing is OK
2766 				// with the SplayTree iterator) and insert the copy
2767 				upperCache->MovePage(page);
2768 				lowerCache->InsertPage(copiedPage,
2769 					page->cache_offset * B_PAGE_SIZE);
2770 
2771 				DEBUG_PAGE_ACCESS_END(copiedPage);
2772 			} else {
2773 				// Change the protection of this page in all areas.
2774 				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2775 						tempArea = tempArea->cache_next) {
2776 					if (!is_page_in_area(tempArea, page))
2777 						continue;
2778 
2779 					// The area must be readable in the same way it was
2780 					// previously writable.
2781 					addr_t address = virtual_page_address(tempArea, page);
2782 					uint32 protection = 0;
2783 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2784 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2785 						protection |= B_KERNEL_READ_AREA;
2786 					if ((pageProtection & B_READ_AREA) != 0)
2787 						protection |= B_READ_AREA;
2788 
2789 					VMTranslationMap* map
2790 						= tempArea->address_space->TranslationMap();
2791 					map->Lock();
2792 					map->ProtectPage(tempArea, address, protection);
2793 					map->Unlock();
2794 				}
2795 			}
2796 		}
2797 	} else {
2798 		ASSERT(lowerCache->WiredPagesCount() == 0);
2799 
2800 		// just change the protection of all areas
2801 		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2802 				tempArea = tempArea->cache_next) {
2803 			if (tempArea->page_protections != NULL) {
2804 				// Change the protection of all pages in this area.
2805 				VMTranslationMap* map = tempArea->address_space->TranslationMap();
2806 				map->Lock();
2807 				for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2808 					vm_page* page = it.Next();) {
2809 					if (!is_page_in_area(tempArea, page))
2810 						continue;
2811 
2812 					// The area must be readable in the same way it was
2813 					// previously writable.
2814 					addr_t address = virtual_page_address(tempArea, page);
2815 					uint32 protection = 0;
2816 					uint32 pageProtection = get_area_page_protection(tempArea, address);
2817 					if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2818 						protection |= B_KERNEL_READ_AREA;
2819 					if ((pageProtection & B_READ_AREA) != 0)
2820 						protection |= B_READ_AREA;
2821 
2822 					map->ProtectPage(tempArea, address, protection);
2823 				}
2824 				map->Unlock();
2825 				continue;
2826 			}
2827 			// The area must be readable in the same way it was previously
2828 			// writable.
2829 			uint32 protection = 0;
2830 			if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2831 				protection |= B_KERNEL_READ_AREA;
2832 			if ((tempArea->protection & B_READ_AREA) != 0)
2833 				protection |= B_READ_AREA;
2834 
2835 			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2836 			map->Lock();
2837 			map->ProtectArea(tempArea, protection);
2838 			map->Unlock();
2839 		}
2840 	}
2841 
2842 	vm_area_put_locked_cache(upperCache);
2843 
2844 	return B_OK;
2845 }
2846 
2847 
2848 area_id
vm_copy_area(team_id team,const char * name,void ** _address,uint32 addressSpec,area_id sourceID)2849 vm_copy_area(team_id team, const char* name, void** _address,
2850 	uint32 addressSpec, area_id sourceID)
2851 {
2852 	// Do the locking: target address space, all address spaces associated with
2853 	// the source cache, and the cache itself.
2854 	MultiAddressSpaceLocker locker;
2855 	VMAddressSpace* targetAddressSpace;
2856 	VMCache* cache;
2857 	VMArea* source;
2858 	AreaCacheLocker cacheLocker;
2859 	status_t status;
2860 	bool sharedArea;
2861 
2862 	page_num_t wiredPages = 0;
2863 	vm_page_reservation wiredPagesReservation;
2864 
2865 	bool restart;
2866 	do {
2867 		restart = false;
2868 
2869 		locker.Unset();
2870 		status = locker.AddTeam(team, true, &targetAddressSpace);
2871 		if (status == B_OK) {
2872 			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2873 				&cache);
2874 		}
2875 		if (status != B_OK)
2876 			return status;
2877 
2878 		cacheLocker.SetTo(cache, true);	// already locked
2879 
2880 		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2881 
2882 		page_num_t oldWiredPages = wiredPages;
2883 		wiredPages = 0;
2884 
2885 		// If the source area isn't shared, count the number of wired pages in
2886 		// the cache and reserve as many pages.
2887 		if (!sharedArea) {
2888 			wiredPages = cache->WiredPagesCount();
2889 
2890 			if (wiredPages > oldWiredPages) {
2891 				cacheLocker.Unlock();
2892 				locker.Unlock();
2893 
2894 				if (oldWiredPages > 0)
2895 					vm_page_unreserve_pages(&wiredPagesReservation);
2896 
2897 				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2898 					VM_PRIORITY_USER);
2899 
2900 				restart = true;
2901 			}
2902 		} else if (oldWiredPages > 0)
2903 			vm_page_unreserve_pages(&wiredPagesReservation);
2904 	} while (restart);
2905 
2906 	// unreserve pages later
2907 	struct PagesUnreserver {
2908 		PagesUnreserver(vm_page_reservation* reservation)
2909 			:
2910 			fReservation(reservation)
2911 		{
2912 		}
2913 
2914 		~PagesUnreserver()
2915 		{
2916 			if (fReservation != NULL)
2917 				vm_page_unreserve_pages(fReservation);
2918 		}
2919 
2920 	private:
2921 		vm_page_reservation*	fReservation;
2922 	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2923 
2924 	bool writableCopy
2925 		= (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2926 	uint8* targetPageProtections = NULL;
2927 
2928 	if (source->page_protections != NULL) {
2929 		const size_t bytes = area_page_protections_size(source->Size());
2930 		targetPageProtections = (uint8*)malloc_etc(bytes,
2931 			(source->address_space == VMAddressSpace::Kernel()
2932 					|| targetAddressSpace == VMAddressSpace::Kernel())
2933 				? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2934 		if (targetPageProtections == NULL)
2935 			return B_NO_MEMORY;
2936 
2937 		memcpy(targetPageProtections, source->page_protections, bytes);
2938 
2939 		for (size_t i = 0; i < bytes; i++) {
2940 			if ((targetPageProtections[i]
2941 					& (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2942 				writableCopy = true;
2943 				break;
2944 			}
2945 		}
2946 	}
2947 
2948 	if (addressSpec == B_CLONE_ADDRESS) {
2949 		addressSpec = B_EXACT_ADDRESS;
2950 		*_address = (void*)source->Base();
2951 	}
2952 
2953 	// First, create a cache on top of the source area, respectively use the
2954 	// existing one, if this is a shared area.
2955 
2956 	VMArea* target;
2957 	virtual_address_restrictions addressRestrictions = {};
2958 	addressRestrictions.address = *_address;
2959 	addressRestrictions.address_specification = addressSpec;
2960 	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2961 		name, source->Size(), source->wiring, source->protection,
2962 		source->protection_max,
2963 		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2964 		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2965 		&addressRestrictions, true, &target, _address);
2966 	if (status < B_OK) {
2967 		free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2968 		return status;
2969 	}
2970 
2971 	if (targetPageProtections != NULL) {
2972 		target->page_protections = targetPageProtections;
2973 
2974 		if (!sharedArea) {
2975 			// Shrink the commitment (this should never fail).
2976 			AreaCacheLocker locker(target);
2977 			const size_t newPageCommitment = compute_area_page_commitment(target);
2978 			target->cache->Commit(newPageCommitment * B_PAGE_SIZE, VM_PRIORITY_USER);
2979 		}
2980 	}
2981 
2982 	if (sharedArea) {
2983 		// The new area uses the old area's cache, but map_backing_store()
2984 		// hasn't acquired a ref. So we have to do that now.
2985 		cache->AcquireRefLocked();
2986 	}
2987 
2988 	// If the source area is writable, we need to move it one layer up as well
2989 	if (!sharedArea) {
2990 		if (writableCopy) {
2991 			// TODO: do something more useful if this fails!
2992 			if (vm_copy_on_write_area(cache,
2993 					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2994 				panic("vm_copy_on_write_area() failed!\n");
2995 			}
2996 		}
2997 	}
2998 
2999 	// we return the ID of the newly created area
3000 	return target->id;
3001 }
3002 
3003 
3004 status_t
vm_set_area_protection(team_id team,area_id areaID,uint32 newProtection,bool kernel)3005 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
3006 	bool kernel)
3007 {
3008 	fix_protection(&newProtection);
3009 
3010 	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
3011 		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
3012 
3013 	if (!arch_vm_supports_protection(newProtection))
3014 		return B_NOT_SUPPORTED;
3015 
3016 	bool becomesWritable
3017 		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
3018 
3019 	// lock address spaces and cache
3020 	MultiAddressSpaceLocker locker;
3021 	VMCache* cache;
3022 	VMArea* area;
3023 	status_t status;
3024 	AreaCacheLocker cacheLocker;
3025 	bool isWritable;
3026 
3027 	bool restart;
3028 	do {
3029 		restart = false;
3030 
3031 		locker.Unset();
3032 		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
3033 		if (status != B_OK)
3034 			return status;
3035 
3036 		cacheLocker.SetTo(cache, true);	// already locked
3037 
3038 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
3039 				|| (area->protection & B_KERNEL_AREA) != 0)) {
3040 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
3041 				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
3042 				" (%s)\n", team, newProtection, areaID, area->name);
3043 			return B_NOT_ALLOWED;
3044 		}
3045 		if (!kernel && area->protection_max != 0
3046 			&& (newProtection & area->protection_max)
3047 				!= (newProtection & B_USER_PROTECTION)) {
3048 			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
3049 				"set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
3050 				"area %" B_PRId32 " (%s)\n", team, newProtection,
3051 				area->protection_max, areaID, area->name);
3052 			return B_NOT_ALLOWED;
3053 		}
3054 
3055 		if (team != VMAddressSpace::KernelID()
3056 			&& area->address_space->ID() != team) {
3057 			// unless you're the kernel, you are only allowed to set
3058 			// the protection of your own areas
3059 			return B_NOT_ALLOWED;
3060 		}
3061 
3062 		if (area->protection == newProtection)
3063 			return B_OK;
3064 
3065 		isWritable
3066 			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
3067 
3068 		// Make sure the area (respectively, if we're going to call
3069 		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
3070 		// wired ranges.
3071 		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
3072 			for (VMArea* otherArea = cache->areas; otherArea != NULL;
3073 					otherArea = otherArea->cache_next) {
3074 				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
3075 					restart = true;
3076 					break;
3077 				}
3078 			}
3079 		} else {
3080 			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
3081 				restart = true;
3082 		}
3083 	} while (restart);
3084 
3085 	if (area->page_protections != NULL) {
3086 		// Get rid of the per-page protections.
3087 		free_etc(area->page_protections,
3088 			area->address_space == VMAddressSpace::Kernel() ? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
3089 		area->page_protections = NULL;
3090 
3091 		// Assume the existing protections don't match the new ones.
3092 		isWritable = !becomesWritable;
3093 	}
3094 
3095 	bool changePageProtection = true;
3096 	bool changeTopCachePagesOnly = false;
3097 
3098 	if (isWritable && !becomesWritable) {
3099 		// writable -> !writable
3100 
3101 		if (cache->source != NULL && cache->temporary) {
3102 			if (cache->CountWritableAreas(area) == 0) {
3103 				// Since this cache now lives from the pages in its source cache,
3104 				// we can change the cache's commitment to take only those pages
3105 				// into account that really are in this cache.
3106 
3107 				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
3108 					team == VMAddressSpace::KernelID()
3109 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3110 
3111 				// TODO: we may be able to join with our source cache, if
3112 				// count == 0
3113 			}
3114 		}
3115 
3116 		// If only the writability changes, we can just remap the pages of the
3117 		// top cache, since the pages of lower caches are mapped read-only
3118 		// anyway. That's advantageous only, if the number of pages in the cache
3119 		// is significantly smaller than the number of pages in the area,
3120 		// though.
3121 		if (newProtection
3122 				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
3123 			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
3124 			changeTopCachePagesOnly = true;
3125 		}
3126 	} else if (!isWritable && becomesWritable) {
3127 		// !writable -> writable
3128 
3129 		if (!cache->consumers.IsEmpty()) {
3130 			// There are consumers -- we have to insert a new cache. Fortunately
3131 			// vm_copy_on_write_area() does everything that's needed.
3132 			changePageProtection = false;
3133 			status = vm_copy_on_write_area(cache, NULL);
3134 		} else {
3135 			// No consumers, so we don't need to insert a new one.
3136 			if (cache->source != NULL && cache->temporary) {
3137 				// the cache's commitment must contain all possible pages
3138 				status = cache->Commit(cache->virtual_end - cache->virtual_base,
3139 					team == VMAddressSpace::KernelID()
3140 						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3141 			}
3142 
3143 			if (status == B_OK && cache->source != NULL) {
3144 				// There's a source cache, hence we can't just change all pages'
3145 				// protection or we might allow writing into pages belonging to
3146 				// a lower cache.
3147 				changeTopCachePagesOnly = true;
3148 			}
3149 		}
3150 	} else {
3151 		// we don't have anything special to do in all other cases
3152 	}
3153 
3154 	if (status == B_OK) {
3155 		// remap existing pages in this cache
3156 		if (changePageProtection) {
3157 			VMTranslationMap* map = area->address_space->TranslationMap();
3158 			map->Lock();
3159 
3160 			if (changeTopCachePagesOnly) {
3161 				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3162 				page_num_t lastPageOffset
3163 					= firstPageOffset + area->Size() / B_PAGE_SIZE;
3164 				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3165 						vm_page* page = it.Next();) {
3166 					if (page->cache_offset >= firstPageOffset
3167 						&& page->cache_offset <= lastPageOffset) {
3168 						addr_t address = virtual_page_address(area, page);
3169 						map->ProtectPage(area, address, newProtection);
3170 					}
3171 				}
3172 			} else
3173 				map->ProtectArea(area, newProtection);
3174 
3175 			map->Unlock();
3176 		}
3177 
3178 		area->protection = newProtection;
3179 	}
3180 
3181 	return status;
3182 }
3183 
3184 
3185 status_t
vm_get_page_mapping(team_id team,addr_t vaddr,phys_addr_t * paddr)3186 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3187 {
3188 	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3189 	if (addressSpace == NULL)
3190 		return B_BAD_TEAM_ID;
3191 
3192 	VMTranslationMap* map = addressSpace->TranslationMap();
3193 
3194 	map->Lock();
3195 	uint32 dummyFlags;
3196 	status_t status = map->Query(vaddr, paddr, &dummyFlags);
3197 	map->Unlock();
3198 
3199 	addressSpace->Put();
3200 	return status;
3201 }
3202 
3203 
3204 /*!	The page's cache must be locked.
3205 */
3206 bool
vm_test_map_modification(vm_page * page)3207 vm_test_map_modification(vm_page* page)
3208 {
3209 	if (page->modified)
3210 		return true;
3211 
3212 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3213 	vm_page_mapping* mapping;
3214 	while ((mapping = iterator.Next()) != NULL) {
3215 		VMArea* area = mapping->area;
3216 		VMTranslationMap* map = area->address_space->TranslationMap();
3217 
3218 		phys_addr_t physicalAddress;
3219 		uint32 flags;
3220 		map->Lock();
3221 		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3222 		map->Unlock();
3223 
3224 		if ((flags & PAGE_MODIFIED) != 0)
3225 			return true;
3226 	}
3227 
3228 	return false;
3229 }
3230 
3231 
3232 /*!	The page's cache must be locked.
3233 */
3234 void
vm_clear_map_flags(vm_page * page,uint32 flags)3235 vm_clear_map_flags(vm_page* page, uint32 flags)
3236 {
3237 	if ((flags & PAGE_ACCESSED) != 0)
3238 		page->accessed = false;
3239 	if ((flags & PAGE_MODIFIED) != 0)
3240 		page->modified = false;
3241 
3242 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3243 	vm_page_mapping* mapping;
3244 	while ((mapping = iterator.Next()) != NULL) {
3245 		VMArea* area = mapping->area;
3246 		VMTranslationMap* map = area->address_space->TranslationMap();
3247 
3248 		map->Lock();
3249 		map->ClearFlags(virtual_page_address(area, page), flags);
3250 		map->Unlock();
3251 	}
3252 }
3253 
3254 
3255 /*!	Removes all mappings from a page.
3256 	After you've called this function, the page is unmapped from memory and
3257 	the page's \c accessed and \c modified flags have been updated according
3258 	to the state of the mappings.
3259 	The page's cache must be locked.
3260 */
3261 void
vm_remove_all_page_mappings(vm_page * page)3262 vm_remove_all_page_mappings(vm_page* page)
3263 {
3264 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3265 		VMArea* area = mapping->area;
3266 		VMTranslationMap* map = area->address_space->TranslationMap();
3267 		addr_t address = virtual_page_address(area, page);
3268 		map->UnmapPage(area, address, false);
3269 	}
3270 }
3271 
3272 
3273 int32
vm_clear_page_mapping_accessed_flags(struct vm_page * page)3274 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3275 {
3276 	int32 count = 0;
3277 
3278 	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3279 	vm_page_mapping* mapping;
3280 	while ((mapping = iterator.Next()) != NULL) {
3281 		VMArea* area = mapping->area;
3282 		VMTranslationMap* map = area->address_space->TranslationMap();
3283 
3284 		bool modified;
3285 		if (map->ClearAccessedAndModified(area,
3286 				virtual_page_address(area, page), false, modified)) {
3287 			count++;
3288 		}
3289 
3290 		page->modified |= modified;
3291 	}
3292 
3293 
3294 	if (page->accessed) {
3295 		count++;
3296 		page->accessed = false;
3297 	}
3298 
3299 	return count;
3300 }
3301 
3302 
3303 /*!	Removes all mappings of a page and/or clears the accessed bits of the
3304 	mappings.
3305 	The function iterates through the page mappings and removes them until
3306 	encountering one that has been accessed. From then on it will continue to
3307 	iterate, but only clear the accessed flag of the mapping. The page's
3308 	\c modified bit will be updated accordingly, the \c accessed bit will be
3309 	cleared.
3310 	\return The number of mapping accessed bits encountered, including the
3311 		\c accessed bit of the page itself. If \c 0 is returned, all mappings
3312 		of the page have been removed.
3313 */
3314 int32
vm_remove_all_page_mappings_if_unaccessed(struct vm_page * page)3315 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3316 {
3317 	ASSERT(page->WiredCount() == 0);
3318 
3319 	if (page->accessed)
3320 		return vm_clear_page_mapping_accessed_flags(page);
3321 
3322 	while (vm_page_mapping* mapping = page->mappings.Head()) {
3323 		VMArea* area = mapping->area;
3324 		VMTranslationMap* map = area->address_space->TranslationMap();
3325 		addr_t address = virtual_page_address(area, page);
3326 		bool modified = false;
3327 		if (map->ClearAccessedAndModified(area, address, true, modified)) {
3328 			page->accessed = true;
3329 			page->modified |= modified;
3330 			return vm_clear_page_mapping_accessed_flags(page);
3331 		}
3332 		page->modified |= modified;
3333 	}
3334 
3335 	return 0;
3336 }
3337 
3338 
3339 /*!	Deletes all areas and reserved regions in the given address space.
3340 
3341 	The caller must ensure that none of the areas has any wired ranges.
3342 
3343 	\param addressSpace The address space.
3344 	\param deletingAddressSpace \c true, if the address space is in the process
3345 		of being deleted.
3346 */
3347 void
vm_delete_areas(struct VMAddressSpace * addressSpace,bool deletingAddressSpace)3348 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3349 {
3350 	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3351 		addressSpace->ID()));
3352 
3353 	addressSpace->WriteLock();
3354 
3355 	// remove all reserved areas in this address space
3356 	addressSpace->UnreserveAllAddressRanges(0);
3357 
3358 	// remove all areas from the areas map at once (to avoid lock contention)
3359 	VMAreas::WriteLock();
3360 	{
3361 		VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
3362 		while (VMArea* area = it.Next())
3363 			VMAreas::Remove(area);
3364 	}
3365 	VMAreas::WriteUnlock();
3366 
3367 	// delete all the areas in this address space
3368 	while (VMArea* area = addressSpace->FirstArea()) {
3369 		ASSERT(!area->IsWired());
3370 		delete_area(addressSpace, area, deletingAddressSpace, true);
3371 	}
3372 
3373 	addressSpace->WriteUnlock();
3374 }
3375 
3376 
3377 static area_id
vm_area_for(addr_t address,bool kernel)3378 vm_area_for(addr_t address, bool kernel)
3379 {
3380 	team_id team;
3381 	if (IS_USER_ADDRESS(address)) {
3382 		// we try the user team address space, if any
3383 		team = VMAddressSpace::CurrentID();
3384 		if (team < 0)
3385 			return team;
3386 	} else
3387 		team = VMAddressSpace::KernelID();
3388 
3389 	AddressSpaceReadLocker locker(team);
3390 	if (!locker.IsLocked())
3391 		return B_BAD_TEAM_ID;
3392 
3393 	VMArea* area = locker.AddressSpace()->LookupArea(address);
3394 	if (area != NULL) {
3395 		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3396 				&& (area->protection & B_KERNEL_AREA) != 0)
3397 			return B_ERROR;
3398 
3399 		return area->id;
3400 	}
3401 
3402 	return B_ERROR;
3403 }
3404 
3405 
3406 /*!	Frees physical pages that were used during the boot process.
3407 	\a end is inclusive.
3408 */
3409 static void
unmap_and_free_physical_pages(VMTranslationMap * map,addr_t start,addr_t end)3410 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3411 {
3412 	// free all physical pages in the specified range
3413 
3414 	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3415 		phys_addr_t physicalAddress;
3416 		uint32 flags;
3417 
3418 		if (map->Query(current, &physicalAddress, &flags) == B_OK
3419 			&& (flags & PAGE_PRESENT) != 0) {
3420 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3421 			if (page != NULL && page->State() != PAGE_STATE_FREE
3422 					&& page->State() != PAGE_STATE_CLEAR
3423 					&& page->State() != PAGE_STATE_UNUSED) {
3424 				DEBUG_PAGE_ACCESS_START(page);
3425 				vm_page_set_state(page, PAGE_STATE_FREE);
3426 			}
3427 		}
3428 	}
3429 
3430 	// unmap the memory
3431 	map->Unmap(start, end);
3432 }
3433 
3434 
3435 void
vm_free_unused_boot_loader_range(addr_t start,addr_t size)3436 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3437 {
3438 	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3439 	addr_t end = start + (size - 1);
3440 	addr_t lastEnd = start;
3441 
3442 	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3443 		(void*)start, (void*)end));
3444 
3445 	// The areas are sorted in virtual address space order, so
3446 	// we just have to find the holes between them that fall
3447 	// into the area we should dispose
3448 
3449 	map->Lock();
3450 
3451 	for (VMAddressSpace::AreaIterator it
3452 				= VMAddressSpace::Kernel()->GetAreaIterator();
3453 			VMArea* area = it.Next();) {
3454 		addr_t areaStart = area->Base();
3455 		addr_t areaEnd = areaStart + (area->Size() - 1);
3456 
3457 		if (areaEnd < start)
3458 			continue;
3459 
3460 		if (areaStart > end) {
3461 			// we are done, the area is already beyond of what we have to free
3462 			break;
3463 		}
3464 
3465 		if (areaStart > lastEnd) {
3466 			// this is something we can free
3467 			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3468 				(void*)areaStart));
3469 			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3470 		}
3471 
3472 		if (areaEnd >= end) {
3473 			lastEnd = areaEnd;
3474 				// no +1 to prevent potential overflow
3475 			break;
3476 		}
3477 
3478 		lastEnd = areaEnd + 1;
3479 	}
3480 
3481 	if (lastEnd < end) {
3482 		// we can also get rid of some space at the end of the area
3483 		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3484 			(void*)end));
3485 		unmap_and_free_physical_pages(map, lastEnd, end);
3486 	}
3487 
3488 	map->Unlock();
3489 }
3490 
3491 
3492 static void
create_preloaded_image_areas(struct preloaded_image * _image)3493 create_preloaded_image_areas(struct preloaded_image* _image)
3494 {
3495 	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3496 	char name[B_OS_NAME_LENGTH];
3497 	void* address;
3498 	int32 length;
3499 
3500 	// use file name to create a good area name
3501 	char* fileName = strrchr(image->name, '/');
3502 	if (fileName == NULL)
3503 		fileName = image->name;
3504 	else
3505 		fileName++;
3506 
3507 	length = strlen(fileName);
3508 	// make sure there is enough space for the suffix
3509 	if (length > 25)
3510 		length = 25;
3511 
3512 	memcpy(name, fileName, length);
3513 	strcpy(name + length, "_text");
3514 	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3515 	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3516 		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3517 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3518 		// this will later be remapped read-only/executable by the
3519 		// ELF initialization code
3520 
3521 	strcpy(name + length, "_data");
3522 	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3523 	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3524 		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3525 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3526 }
3527 
3528 
3529 /*!	Frees all previously kernel arguments areas from the kernel_args structure.
3530 	Any boot loader resources contained in that arguments must not be accessed
3531 	anymore past this point.
3532 */
3533 void
vm_free_kernel_args(kernel_args * args)3534 vm_free_kernel_args(kernel_args* args)
3535 {
3536 	TRACE(("vm_free_kernel_args()\n"));
3537 
3538 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3539 		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3540 		if (area >= B_OK)
3541 			delete_area(area);
3542 	}
3543 }
3544 
3545 
3546 static void
allocate_kernel_args(kernel_args * args)3547 allocate_kernel_args(kernel_args* args)
3548 {
3549 	TRACE(("allocate_kernel_args()\n"));
3550 
3551 	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3552 		const addr_range& range = args->kernel_args_range[i];
3553 		void* address = (void*)(addr_t)range.start;
3554 
3555 		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3556 			range.size, B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3557 	}
3558 }
3559 
3560 
3561 static void
unreserve_boot_loader_ranges(kernel_args * args)3562 unreserve_boot_loader_ranges(kernel_args* args)
3563 {
3564 	TRACE(("unreserve_boot_loader_ranges()\n"));
3565 
3566 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3567 		const addr_range& range = args->virtual_allocated_range[i];
3568 		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3569 			(void*)(addr_t)range.start, range.size);
3570 	}
3571 }
3572 
3573 
3574 static void
reserve_boot_loader_ranges(kernel_args * args)3575 reserve_boot_loader_ranges(kernel_args* args)
3576 {
3577 	TRACE(("reserve_boot_loader_ranges()\n"));
3578 
3579 	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3580 		const addr_range& range = args->virtual_allocated_range[i];
3581 		void* address = (void*)(addr_t)range.start;
3582 
3583 		// If the address is no kernel address, we just skip it. The
3584 		// architecture specific code has to deal with it.
3585 		if (!IS_KERNEL_ADDRESS(address)) {
3586 			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3587 				B_PRIu64 "\n", address, range.size);
3588 			continue;
3589 		}
3590 
3591 		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3592 			&address, B_EXACT_ADDRESS, range.size, 0);
3593 		if (status < B_OK)
3594 			panic("could not reserve boot loader ranges\n");
3595 	}
3596 }
3597 
3598 
3599 static addr_t
allocate_early_virtual(kernel_args * args,size_t size,addr_t alignment)3600 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3601 {
3602 	size = PAGE_ALIGN(size);
3603 	if (alignment <= B_PAGE_SIZE) {
3604 		// All allocations are naturally page-aligned.
3605 		alignment = 0;
3606 	} else {
3607 		ASSERT((alignment % B_PAGE_SIZE) == 0);
3608 	}
3609 
3610 	// Find a slot in the virtual allocation ranges.
3611 	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3612 		// Check if the space between this one and the previous is big enough.
3613 		const addr_range& range = args->virtual_allocated_range[i];
3614 		addr_range& previousRange = args->virtual_allocated_range[i - 1];
3615 		const addr_t previousRangeEnd = previousRange.start + previousRange.size;
3616 
3617 		addr_t base = alignment > 0
3618 			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3619 
3620 		if (base >= KERNEL_BASE && base < range.start && (range.start - base) >= size) {
3621 			previousRange.size += base + size - previousRangeEnd;
3622 			return base;
3623 		}
3624 	}
3625 
3626 	// We didn't find one between allocation ranges. This is OK.
3627 	// See if there's a gap after the last one.
3628 	addr_range& lastRange
3629 		= args->virtual_allocated_range[args->num_virtual_allocated_ranges - 1];
3630 	const addr_t lastRangeEnd = lastRange.start + lastRange.size;
3631 	addr_t base = alignment > 0
3632 		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3633 	if ((KERNEL_BASE + (KERNEL_SIZE - 1) - base) >= size) {
3634 		lastRange.size += base + size - lastRangeEnd;
3635 		return base;
3636 	}
3637 
3638 	// See if there's a gap before the first one.
3639 	addr_range& firstRange = args->virtual_allocated_range[0];
3640 	if (firstRange.start > KERNEL_BASE && (firstRange.start - KERNEL_BASE) >= size) {
3641 		base = firstRange.start - size;
3642 		if (alignment > 0)
3643 			base = ROUNDDOWN(base, alignment);
3644 
3645 		if (base >= KERNEL_BASE) {
3646 			firstRange.size += firstRange.start - base;
3647 			firstRange.start = base;
3648 			return base;
3649 		}
3650 	}
3651 
3652 	return 0;
3653 }
3654 
3655 
3656 static bool
is_page_in_physical_memory_range(kernel_args * args,phys_addr_t address)3657 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3658 {
3659 	// TODO: horrible brute-force method of determining if the page can be
3660 	// allocated
3661 	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3662 		const addr_range& range = args->physical_memory_range[i];
3663 		if (address >= range.start && address < (range.start + range.size))
3664 			return true;
3665 	}
3666 	return false;
3667 }
3668 
3669 
3670 page_num_t
vm_allocate_early_physical_page(kernel_args * args)3671 vm_allocate_early_physical_page(kernel_args* args)
3672 {
3673 	return vm_allocate_early_physical_page_etc(args);
3674 }
3675 
3676 
3677 page_num_t
vm_allocate_early_physical_page_etc(kernel_args * args,phys_addr_t maxAddress)3678 vm_allocate_early_physical_page_etc(kernel_args* args, phys_addr_t maxAddress)
3679 {
3680 	if (args->num_physical_allocated_ranges == 0) {
3681 		panic("early physical page allocations no longer possible!");
3682 		return 0;
3683 	}
3684 	if (maxAddress == 0)
3685 		maxAddress = __HAIKU_PHYS_ADDR_MAX;
3686 
3687 #if defined(B_HAIKU_PHYSICAL_64_BIT)
3688 	// Check if the last physical range is above the 32-bit maximum.
3689 	const addr_range& lastMemoryRange =
3690 		args->physical_memory_range[args->num_physical_memory_ranges - 1];
3691 	const uint64 post32bitAddr = 0x100000000LL;
3692 	if ((lastMemoryRange.start + lastMemoryRange.size) > post32bitAddr
3693 			&& args->num_physical_allocated_ranges < MAX_PHYSICAL_ALLOCATED_RANGE) {
3694 		// To avoid consuming physical memory in the 32-bit range (which drivers may need),
3695 		// ensure the last allocated range at least ends past the 32-bit boundary.
3696 		const addr_range& lastAllocatedRange =
3697 			args->physical_allocated_range[args->num_physical_allocated_ranges - 1];
3698 		const phys_addr_t lastAllocatedPage = lastAllocatedRange.start + lastAllocatedRange.size;
3699 		if (lastAllocatedPage < post32bitAddr) {
3700 			// Create ranges until we have one at least starting at the first point past 4GB.
3701 			// (Some of the logic here is similar to the new-range code at the end of the method.)
3702 			for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3703 				addr_range& memoryRange = args->physical_memory_range[i];
3704 				if ((memoryRange.start + memoryRange.size) < lastAllocatedPage)
3705 					continue;
3706 				if (memoryRange.size < (B_PAGE_SIZE * 128))
3707 					continue;
3708 
3709 				uint64 rangeStart = memoryRange.start;
3710 				if ((memoryRange.start + memoryRange.size) <= post32bitAddr) {
3711 					if (memoryRange.start < lastAllocatedPage)
3712 						continue;
3713 
3714 					// Range has no pages allocated and ends before the 32-bit boundary.
3715 				} else {
3716 					// Range ends past the 32-bit boundary. It could have some pages allocated,
3717 					// but if we're here, we know that nothing is allocated above the boundary,
3718 					// so we want to create a new range with it regardless.
3719 					if (rangeStart < post32bitAddr)
3720 						rangeStart = post32bitAddr;
3721 				}
3722 
3723 				addr_range& allocatedRange =
3724 					args->physical_allocated_range[args->num_physical_allocated_ranges++];
3725 				allocatedRange.start = rangeStart;
3726 				allocatedRange.size = 0;
3727 
3728 				if (rangeStart >= post32bitAddr)
3729 					break;
3730 				if (args->num_physical_allocated_ranges == MAX_PHYSICAL_ALLOCATED_RANGE)
3731 					break;
3732 			}
3733 		}
3734 	}
3735 #endif
3736 
3737 	// Try expanding the existing physical ranges upwards.
3738 	for (int32 i = args->num_physical_allocated_ranges - 1; i >= 0; i--) {
3739 		addr_range& range = args->physical_allocated_range[i];
3740 		phys_addr_t nextPage = range.start + range.size;
3741 
3742 		// check constraints
3743 		if (nextPage > maxAddress)
3744 			continue;
3745 
3746 		// make sure the page does not collide with the next allocated range
3747 		if ((i + 1) < (int32)args->num_physical_allocated_ranges) {
3748 			addr_range& nextRange = args->physical_allocated_range[i + 1];
3749 			if (nextRange.size != 0 && nextPage >= nextRange.start)
3750 				continue;
3751 		}
3752 		// see if the next page fits in the memory block
3753 		if (is_page_in_physical_memory_range(args, nextPage)) {
3754 			// we got one!
3755 			range.size += B_PAGE_SIZE;
3756 			return nextPage / B_PAGE_SIZE;
3757 		}
3758 	}
3759 
3760 	// Expanding upwards didn't work, try going downwards.
3761 	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3762 		addr_range& range = args->physical_allocated_range[i];
3763 		phys_addr_t nextPage = range.start - B_PAGE_SIZE;
3764 
3765 		// check constraints
3766 		if (nextPage > maxAddress)
3767 			continue;
3768 
3769 		// make sure the page does not collide with the previous allocated range
3770 		if (i > 0) {
3771 			addr_range& previousRange = args->physical_allocated_range[i - 1];
3772 			if (previousRange.size != 0 && nextPage < (previousRange.start + previousRange.size))
3773 				continue;
3774 		}
3775 		// see if the next physical page fits in the memory block
3776 		if (is_page_in_physical_memory_range(args, nextPage)) {
3777 			// we got one!
3778 			range.start -= B_PAGE_SIZE;
3779 			range.size += B_PAGE_SIZE;
3780 			return nextPage / B_PAGE_SIZE;
3781 		}
3782 	}
3783 
3784 	// Try starting a new range.
3785 	if (args->num_physical_allocated_ranges < MAX_PHYSICAL_ALLOCATED_RANGE) {
3786 		const addr_range& lastAllocatedRange =
3787 			args->physical_allocated_range[args->num_physical_allocated_ranges - 1];
3788 		const phys_addr_t lastAllocatedPage = lastAllocatedRange.start + lastAllocatedRange.size;
3789 
3790 		phys_addr_t nextPage = 0;
3791 		for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3792 			const addr_range& range = args->physical_memory_range[i];
3793 			// Ignore everything before the last-allocated page, as well as small ranges.
3794 			if (range.start < lastAllocatedPage || range.size < (B_PAGE_SIZE * 128))
3795 				continue;
3796 			if (range.start > maxAddress)
3797 				break;
3798 
3799 			nextPage = range.start;
3800 			break;
3801 		}
3802 
3803 		if (nextPage != 0) {
3804 			// we got one!
3805 			addr_range& range =
3806 				args->physical_allocated_range[args->num_physical_allocated_ranges++];
3807 			range.start = nextPage;
3808 			range.size = B_PAGE_SIZE;
3809 			return nextPage / B_PAGE_SIZE;
3810 		}
3811 	}
3812 
3813 	return 0;
3814 		// could not allocate a block
3815 }
3816 
3817 
3818 /*!	This one uses the kernel_args' physical and virtual memory ranges to
3819 	allocate some pages before the VM is completely up.
3820 */
3821 addr_t
vm_allocate_early(kernel_args * args,size_t virtualSize,size_t physicalSize,uint32 attributes,addr_t alignment)3822 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3823 	uint32 attributes, addr_t alignment)
3824 {
3825 	if (physicalSize > virtualSize)
3826 		physicalSize = virtualSize;
3827 
3828 	// find the vaddr to allocate at
3829 	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3830 	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3831 	if (virtualBase == 0) {
3832 		panic("vm_allocate_early: could not allocate virtual address\n");
3833 		return 0;
3834 	}
3835 
3836 	// map the pages
3837 	for (uint32 i = 0; i < HOWMANY(physicalSize, B_PAGE_SIZE); i++) {
3838 		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3839 		if (physicalAddress == 0)
3840 			panic("error allocating early page!\n");
3841 
3842 		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3843 
3844 		status_t status = arch_vm_translation_map_early_map(args,
3845 			virtualBase + i * B_PAGE_SIZE,
3846 			physicalAddress * B_PAGE_SIZE, attributes,
3847 			&vm_allocate_early_physical_page);
3848 		if (status != B_OK)
3849 			panic("error mapping early page!");
3850 	}
3851 
3852 	return virtualBase;
3853 }
3854 
3855 
3856 /*!	The main entrance point to initialize the VM. */
3857 status_t
vm_init(kernel_args * args)3858 vm_init(kernel_args* args)
3859 {
3860 	struct preloaded_image* image;
3861 	void* address;
3862 	status_t err = 0;
3863 	uint32 i;
3864 
3865 	TRACE(("vm_init: entry\n"));
3866 	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3867 	err = arch_vm_init(args);
3868 
3869 	// initialize some globals
3870 	vm_page_init_num_pages(args);
3871 	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3872 
3873 	slab_init(args);
3874 
3875 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3876 	off_t heapSize = INITIAL_HEAP_SIZE;
3877 	// try to accomodate low memory systems
3878 	while (heapSize > sAvailableMemory / 8)
3879 		heapSize /= 2;
3880 	if (heapSize < 1024 * 1024)
3881 		panic("vm_init: go buy some RAM please.");
3882 
3883 	// map in the new heap and initialize it
3884 	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3885 		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3886 	TRACE(("heap at 0x%lx\n", heapBase));
3887 	heap_init(heapBase, heapSize);
3888 #endif
3889 
3890 	// initialize the free page list and physical page mapper
3891 	vm_page_init(args);
3892 
3893 	// initialize the cache allocators
3894 	vm_cache_init(args);
3895 
3896 	{
3897 		status_t error = VMAreas::Init();
3898 		if (error != B_OK)
3899 			panic("vm_init: error initializing areas map\n");
3900 	}
3901 
3902 	VMAddressSpace::Init();
3903 	reserve_boot_loader_ranges(args);
3904 
3905 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3906 	heap_init_post_area();
3907 #endif
3908 
3909 	// Do any further initialization that the architecture dependant layers may
3910 	// need now
3911 	arch_vm_translation_map_init_post_area(args);
3912 	arch_vm_init_post_area(args);
3913 	vm_page_init_post_area(args);
3914 	slab_init_post_area();
3915 
3916 	// allocate areas to represent stuff that already exists
3917 
3918 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3919 	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3920 	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3921 		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3922 #endif
3923 
3924 	allocate_kernel_args(args);
3925 
3926 	create_preloaded_image_areas(args->kernel_image);
3927 
3928 	// allocate areas for preloaded images
3929 	for (image = args->preloaded_images; image != NULL; image = image->next)
3930 		create_preloaded_image_areas(image);
3931 
3932 	// allocate kernel stacks
3933 	for (i = 0; i < args->num_cpus; i++) {
3934 		char name[64];
3935 
3936 		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
3937 		address = (void*)args->cpu_kstack[i].start;
3938 		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3939 			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3940 	}
3941 
3942 	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3943 	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3944 
3945 #if PARANOID_KERNEL_MALLOC
3946 	vm_block_address_range("uninitialized heap memory",
3947 		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3948 #endif
3949 #if PARANOID_KERNEL_FREE
3950 	vm_block_address_range("freed heap memory",
3951 		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3952 #endif
3953 
3954 	create_page_mappings_object_caches();
3955 
3956 	vm_debug_init();
3957 
3958 	TRACE(("vm_init: exit\n"));
3959 
3960 	vm_cache_init_post_heap();
3961 
3962 	return err;
3963 }
3964 
3965 
3966 status_t
vm_init_post_sem(kernel_args * args)3967 vm_init_post_sem(kernel_args* args)
3968 {
3969 	// This frees all unused boot loader resources and makes its space available
3970 	// again
3971 	arch_vm_init_end(args);
3972 	unreserve_boot_loader_ranges(args);
3973 
3974 	// fill in all of the semaphores that were not allocated before
3975 	// since we're still single threaded and only the kernel address space
3976 	// exists, it isn't that hard to find all of the ones we need to create
3977 
3978 	arch_vm_translation_map_init_post_sem(args);
3979 
3980 	slab_init_post_sem();
3981 
3982 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3983 	heap_init_post_sem();
3984 #endif
3985 
3986 	return B_OK;
3987 }
3988 
3989 
3990 status_t
vm_init_post_thread(kernel_args * args)3991 vm_init_post_thread(kernel_args* args)
3992 {
3993 	vm_page_init_post_thread(args);
3994 	slab_init_post_thread();
3995 	return heap_init_post_thread();
3996 }
3997 
3998 
3999 status_t
vm_init_post_modules(kernel_args * args)4000 vm_init_post_modules(kernel_args* args)
4001 {
4002 	return arch_vm_init_post_modules(args);
4003 }
4004 
4005 
4006 void
permit_page_faults(void)4007 permit_page_faults(void)
4008 {
4009 	Thread* thread = thread_get_current_thread();
4010 	if (thread != NULL)
4011 		atomic_add(&thread->page_faults_allowed, 1);
4012 }
4013 
4014 
4015 void
forbid_page_faults(void)4016 forbid_page_faults(void)
4017 {
4018 	Thread* thread = thread_get_current_thread();
4019 	if (thread != NULL)
4020 		atomic_add(&thread->page_faults_allowed, -1);
4021 }
4022 
4023 
4024 status_t
vm_page_fault(addr_t address,addr_t faultAddress,bool isWrite,bool isExecute,bool isUser,addr_t * newIP)4025 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4026 	bool isUser, addr_t* newIP)
4027 {
4028 	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4029 		faultAddress));
4030 
4031 	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4032 
4033 	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4034 	VMAddressSpace* addressSpace = NULL;
4035 
4036 	status_t status = B_OK;
4037 	*newIP = 0;
4038 	atomic_add((int32*)&sPageFaults, 1);
4039 
4040 	if (IS_KERNEL_ADDRESS(pageAddress)) {
4041 		addressSpace = VMAddressSpace::GetKernel();
4042 	} else if (IS_USER_ADDRESS(pageAddress)) {
4043 		addressSpace = VMAddressSpace::GetCurrent();
4044 		if (addressSpace == NULL) {
4045 			if (!isUser) {
4046 				dprintf("vm_page_fault: kernel thread accessing invalid user "
4047 					"memory!\n");
4048 				status = B_BAD_ADDRESS;
4049 				TPF(PageFaultError(-1,
4050 					VMPageFaultTracing
4051 						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4052 			} else {
4053 				// XXX weird state.
4054 				panic("vm_page_fault: non kernel thread accessing user memory "
4055 					"that doesn't exist!\n");
4056 				status = B_BAD_ADDRESS;
4057 			}
4058 		}
4059 	} else {
4060 		// the hit was probably in the 64k DMZ between kernel and user space
4061 		// this keeps a user space thread from passing a buffer that crosses
4062 		// into kernel space
4063 		status = B_BAD_ADDRESS;
4064 		TPF(PageFaultError(-1,
4065 			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4066 	}
4067 
4068 	if (status == B_OK) {
4069 		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4070 			isUser, NULL);
4071 	}
4072 
4073 	if (status < B_OK) {
4074 		if (!isUser) {
4075 			dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4076 				"0x%lx, ip 0x%lx, write %d, kernel, exec %d, thread 0x%" B_PRIx32 "\n",
4077 				strerror(status), address, faultAddress, isWrite, isExecute,
4078 				thread_get_current_thread_id());
4079 
4080 			Thread* thread = thread_get_current_thread();
4081 			if (thread != NULL && thread->fault_handler != 0) {
4082 				// this will cause the arch dependant page fault handler to
4083 				// modify the IP on the interrupt frame or whatever to return
4084 				// to this address
4085 				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4086 			} else {
4087 				// unhandled page fault in the kernel
4088 				panic("vm_page_fault: unhandled page fault in kernel space at "
4089 					"0x%lx, ip 0x%lx\n", address, faultAddress);
4090 			}
4091 		} else {
4092 			Thread* thread = thread_get_current_thread();
4093 
4094 #ifdef TRACE_FAULTS
4095 			VMArea* area = NULL;
4096 			if (addressSpace != NULL) {
4097 				addressSpace->ReadLock();
4098 				area = addressSpace->LookupArea(faultAddress);
4099 			}
4100 
4101 			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4102 				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4103 				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4104 				thread->team->Name(), thread->team->id,
4105 				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4106 				faultAddress, area ? area->name : "???", faultAddress - (area ?
4107 					area->Base() : 0x0));
4108 
4109 			if (addressSpace != NULL)
4110 				addressSpace->ReadUnlock();
4111 #endif
4112 
4113 			// If the thread has a signal handler for SIGSEGV, we simply
4114 			// send it the signal. Otherwise we notify the user debugger
4115 			// first.
4116 			struct sigaction action;
4117 			if ((sigaction(SIGSEGV, NULL, &action) == 0
4118 					&& action.sa_handler != SIG_DFL
4119 					&& action.sa_handler != SIG_IGN)
4120 				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4121 					SIGSEGV)) {
4122 				Signal signal(SIGSEGV,
4123 					status == B_PERMISSION_DENIED
4124 						? SEGV_ACCERR : SEGV_MAPERR,
4125 					EFAULT, thread->team->id);
4126 				signal.SetAddress((void*)address);
4127 				send_signal_to_thread(thread, signal, 0);
4128 			}
4129 		}
4130 	}
4131 
4132 	if (addressSpace != NULL)
4133 		addressSpace->Put();
4134 
4135 	return B_HANDLED_INTERRUPT;
4136 }
4137 
4138 
4139 struct PageFaultContext {
4140 	AddressSpaceReadLocker	addressSpaceLocker;
4141 	VMCacheChainLocker		cacheChainLocker;
4142 
4143 	VMTranslationMap*		map;
4144 	VMCache*				topCache;
4145 	off_t					cacheOffset;
4146 	vm_page_reservation		reservation;
4147 	bool					isWrite;
4148 
4149 	// return values
4150 	vm_page*				page;
4151 	bool					restart;
4152 	bool					pageAllocated;
4153 
4154 
PageFaultContextPageFaultContext4155 	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4156 		:
4157 		addressSpaceLocker(addressSpace, true),
4158 		map(addressSpace->TranslationMap()),
4159 		isWrite(isWrite)
4160 	{
4161 	}
4162 
~PageFaultContextPageFaultContext4163 	~PageFaultContext()
4164 	{
4165 		UnlockAll();
4166 		vm_page_unreserve_pages(&reservation);
4167 	}
4168 
PreparePageFaultContext4169 	void Prepare(VMCache* topCache, off_t cacheOffset)
4170 	{
4171 		this->topCache = topCache;
4172 		this->cacheOffset = cacheOffset;
4173 		page = NULL;
4174 		restart = false;
4175 		pageAllocated = false;
4176 
4177 		cacheChainLocker.SetTo(topCache);
4178 	}
4179 
UnlockAllPageFaultContext4180 	void UnlockAll(VMCache* exceptCache = NULL)
4181 	{
4182 		topCache = NULL;
4183 		addressSpaceLocker.Unlock();
4184 		cacheChainLocker.Unlock(exceptCache);
4185 	}
4186 };
4187 
4188 
4189 /*!	Gets the page that should be mapped into the area.
4190 	Returns an error code other than \c B_OK, if the page couldn't be found or
4191 	paged in. The locking state of the address space and the caches is undefined
4192 	in that case.
4193 	Returns \c B_OK with \c context.restart set to \c true, if the functions
4194 	had to unlock the address space and all caches and is supposed to be called
4195 	again.
4196 	Returns \c B_OK with \c context.restart set to \c false, if the page was
4197 	found. It is returned in \c context.page. The address space will still be
4198 	locked as well as all caches starting from the top cache to at least the
4199 	cache the page lives in.
4200 */
4201 static status_t
fault_get_page(PageFaultContext & context)4202 fault_get_page(PageFaultContext& context)
4203 {
4204 	VMCache* cache = context.topCache;
4205 	VMCache* lastCache = NULL;
4206 	vm_page* page = NULL;
4207 
4208 	while (cache != NULL) {
4209 		// We already hold the lock of the cache at this point.
4210 
4211 		lastCache = cache;
4212 
4213 		page = cache->LookupPage(context.cacheOffset);
4214 		if (page != NULL && page->busy) {
4215 			// page must be busy -- wait for it to become unbusy
4216 			context.UnlockAll(cache);
4217 			cache->ReleaseRefLocked();
4218 			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4219 
4220 			// restart the whole process
4221 			context.restart = true;
4222 			return B_OK;
4223 		}
4224 
4225 		if (page != NULL)
4226 			break;
4227 
4228 		// The current cache does not contain the page we're looking for.
4229 
4230 		// see if the backing store has it
4231 		if (cache->HasPage(context.cacheOffset)) {
4232 			// insert a fresh page and mark it busy -- we're going to read it in
4233 			page = vm_page_allocate_page(&context.reservation,
4234 				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4235 			cache->InsertPage(page, context.cacheOffset);
4236 
4237 			// We need to unlock all caches and the address space while reading
4238 			// the page in. Keep a reference to the cache around.
4239 			cache->AcquireRefLocked();
4240 			context.UnlockAll();
4241 
4242 			// read the page in
4243 			generic_io_vec vec;
4244 			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4245 			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4246 
4247 			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4248 				B_PHYSICAL_IO_REQUEST, &bytesRead);
4249 
4250 			cache->Lock();
4251 
4252 			if (status < B_OK) {
4253 				// on error remove and free the page
4254 				dprintf("reading page from cache %p returned: %s!\n",
4255 					cache, strerror(status));
4256 
4257 				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4258 				cache->RemovePage(page);
4259 				vm_page_set_state(page, PAGE_STATE_FREE);
4260 
4261 				cache->ReleaseRefAndUnlock();
4262 				return status;
4263 			}
4264 
4265 			// mark the page unbusy again
4266 			cache->MarkPageUnbusy(page);
4267 
4268 			DEBUG_PAGE_ACCESS_END(page);
4269 
4270 			// Since we needed to unlock everything temporarily, the area
4271 			// situation might have changed. So we need to restart the whole
4272 			// process.
4273 			cache->ReleaseRefAndUnlock();
4274 			context.restart = true;
4275 			return B_OK;
4276 		}
4277 
4278 		cache = context.cacheChainLocker.LockSourceCache();
4279 	}
4280 
4281 	if (page == NULL) {
4282 		// There was no adequate page, determine the cache for a clean one.
4283 		// Read-only pages come in the deepest cache, only the top most cache
4284 		// may have direct write access.
4285 		cache = context.isWrite ? context.topCache : lastCache;
4286 
4287 		// allocate a clean page
4288 		page = vm_page_allocate_page(&context.reservation,
4289 			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4290 		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4291 			page->physical_page_number));
4292 
4293 		// insert the new page into our cache
4294 		cache->InsertPage(page, context.cacheOffset);
4295 		context.pageAllocated = true;
4296 	} else if (page->Cache() != context.topCache && context.isWrite) {
4297 		// We have a page that has the data we want, but in the wrong cache
4298 		// object so we need to copy it and stick it into the top cache.
4299 		vm_page* sourcePage = page;
4300 
4301 		// TODO: If memory is low, it might be a good idea to steal the page
4302 		// from our source cache -- if possible, that is.
4303 		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4304 		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4305 
4306 		// To not needlessly kill concurrency we unlock all caches but the top
4307 		// one while copying the page. Lacking another mechanism to ensure that
4308 		// the source page doesn't disappear, we mark it busy.
4309 		sourcePage->busy = true;
4310 		context.cacheChainLocker.UnlockKeepRefs(true);
4311 
4312 		// copy the page
4313 		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4314 			sourcePage->physical_page_number * B_PAGE_SIZE);
4315 
4316 		context.cacheChainLocker.RelockCaches(true);
4317 		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4318 
4319 		// insert the new page into our cache
4320 		context.topCache->InsertPage(page, context.cacheOffset);
4321 		context.pageAllocated = true;
4322 	} else
4323 		DEBUG_PAGE_ACCESS_START(page);
4324 
4325 	context.page = page;
4326 	return B_OK;
4327 }
4328 
4329 
4330 /*!	Makes sure the address in the given address space is mapped.
4331 
4332 	\param addressSpace The address space.
4333 	\param originalAddress The address. Doesn't need to be page aligned.
4334 	\param isWrite If \c true the address shall be write-accessible.
4335 	\param isUser If \c true the access is requested by a userland team.
4336 	\param wirePage On success, if non \c NULL, the wired count of the page
4337 		mapped at the given address is incremented and the page is returned
4338 		via this parameter.
4339 	\return \c B_OK on success, another error code otherwise.
4340 */
4341 static status_t
vm_soft_fault(VMAddressSpace * addressSpace,addr_t originalAddress,bool isWrite,bool isExecute,bool isUser,vm_page ** wirePage)4342 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4343 	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4344 {
4345 	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4346 		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4347 		originalAddress, isWrite, isUser));
4348 
4349 	PageFaultContext context(addressSpace, isWrite);
4350 
4351 	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4352 	status_t status = B_OK;
4353 
4354 	addressSpace->IncrementFaultCount();
4355 
4356 	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4357 	// the pages upfront makes sure we don't have any cache locked, so that the
4358 	// page daemon/thief can do their job without problems.
4359 	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4360 		originalAddress);
4361 	context.addressSpaceLocker.Unlock();
4362 	vm_page_reserve_pages(&context.reservation, reservePages,
4363 		addressSpace == VMAddressSpace::Kernel()
4364 			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4365 
4366 #ifdef TRACE_FAULTS
4367 	const bool logFaults = true;
4368 #else
4369 	const bool logFaults = !isUser;
4370 #endif
4371 	while (true) {
4372 		context.addressSpaceLocker.Lock();
4373 
4374 		// get the area the fault was in
4375 		VMArea* area = addressSpace->LookupArea(address);
4376 		if (area == NULL) {
4377 			if (logFaults) {
4378 				dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4379 					"space\n", originalAddress);
4380 			}
4381 			TPF(PageFaultError(-1,
4382 				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4383 			status = B_BAD_ADDRESS;
4384 			break;
4385 		}
4386 
4387 		// check permissions
4388 		uint32 protection = get_area_page_protection(area, address);
4389 		if (isUser && (protection & B_USER_PROTECTION) == 0
4390 				&& (area->protection & B_KERNEL_AREA) != 0) {
4391 			if (logFaults) {
4392 				dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4393 					area->id, (void*)originalAddress);
4394 			}
4395 			TPF(PageFaultError(area->id,
4396 				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4397 			status = B_PERMISSION_DENIED;
4398 			break;
4399 		}
4400 		if (isWrite && (protection
4401 				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4402 			if (logFaults) {
4403 				dprintf("write access attempted on write-protected area 0x%"
4404 					B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4405 			}
4406 			TPF(PageFaultError(area->id,
4407 				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4408 			status = B_PERMISSION_DENIED;
4409 			break;
4410 		} else if (isExecute && (protection
4411 				& (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4412 			if (logFaults) {
4413 				dprintf("instruction fetch attempted on execute-protected area 0x%"
4414 					B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4415 			}
4416 			TPF(PageFaultError(area->id,
4417 				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4418 			status = B_PERMISSION_DENIED;
4419 			break;
4420 		} else if (!isWrite && !isExecute && (protection
4421 				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4422 			if (logFaults) {
4423 				dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4424 					" at %p\n", area->id, (void*)originalAddress);
4425 			}
4426 			TPF(PageFaultError(area->id,
4427 				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4428 			status = B_PERMISSION_DENIED;
4429 			break;
4430 		}
4431 
4432 		// We have the area, it was a valid access, so let's try to resolve the
4433 		// page fault now.
4434 		// At first, the top most cache from the area is investigated.
4435 
4436 		context.Prepare(vm_area_get_locked_cache(area),
4437 			address - area->Base() + area->cache_offset);
4438 
4439 		// See if this cache has a fault handler -- this will do all the work
4440 		// for us.
4441 		{
4442 			// Note, since the page fault is resolved with interrupts enabled,
4443 			// the fault handler could be called more than once for the same
4444 			// reason -- the store must take this into account.
4445 			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4446 			if (status != B_BAD_HANDLER)
4447 				break;
4448 		}
4449 
4450 		// The top most cache has no fault handler, so let's see if the cache or
4451 		// its sources already have the page we're searching for (we're going
4452 		// from top to bottom).
4453 		status = fault_get_page(context);
4454 		if (status != B_OK) {
4455 			TPF(PageFaultError(area->id, status));
4456 			break;
4457 		}
4458 
4459 		if (context.restart)
4460 			continue;
4461 
4462 		// All went fine, all there is left to do is to map the page into the
4463 		// address space.
4464 		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4465 			context.page));
4466 
4467 		// If the page doesn't reside in the area's cache, we need to make sure
4468 		// it's mapped in read-only, so that we cannot overwrite someone else's
4469 		// data (copy-on-write)
4470 		uint32 newProtection = protection;
4471 		if (context.page->Cache() != context.topCache && !isWrite)
4472 			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4473 
4474 		bool unmapPage = false;
4475 		bool mapPage = true;
4476 
4477 		// check whether there's already a page mapped at the address
4478 		context.map->Lock();
4479 
4480 		phys_addr_t physicalAddress;
4481 		uint32 flags;
4482 		vm_page* mappedPage = NULL;
4483 		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4484 			&& (flags & PAGE_PRESENT) != 0
4485 			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4486 				!= NULL) {
4487 			// Yep there's already a page. If it's ours, we can simply adjust
4488 			// its protection. Otherwise we have to unmap it.
4489 			if (mappedPage == context.page) {
4490 				context.map->ProtectPage(area, address, newProtection);
4491 					// Note: We assume that ProtectPage() is atomic (i.e.
4492 					// the page isn't temporarily unmapped), otherwise we'd have
4493 					// to make sure it isn't wired.
4494 				mapPage = false;
4495 			} else
4496 				unmapPage = true;
4497 		}
4498 
4499 		context.map->Unlock();
4500 
4501 		if (unmapPage) {
4502 			// If the page is wired, we can't unmap it. Wait until it is unwired
4503 			// again and restart. Note that the page cannot be wired for
4504 			// writing, since it it isn't in the topmost cache. So we can safely
4505 			// ignore ranges wired for writing (our own and other concurrent
4506 			// wiring attempts in progress) and in fact have to do that to avoid
4507 			// a deadlock.
4508 			VMAreaUnwiredWaiter waiter;
4509 			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4510 					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4511 				// unlock everything and wait
4512 				if (context.pageAllocated) {
4513 					// ... but since we allocated a page and inserted it into
4514 					// the top cache, remove and free it first. Otherwise we'd
4515 					// have a page from a lower cache mapped while an upper
4516 					// cache has a page that would shadow it.
4517 					context.topCache->RemovePage(context.page);
4518 					vm_page_free_etc(context.topCache, context.page,
4519 						&context.reservation);
4520 				} else
4521 					DEBUG_PAGE_ACCESS_END(context.page);
4522 
4523 				context.UnlockAll();
4524 				waiter.waitEntry.Wait();
4525 				continue;
4526 			}
4527 
4528 			// Note: The mapped page is a page of a lower cache. We are
4529 			// guaranteed to have that cached locked, our new page is a copy of
4530 			// that page, and the page is not busy. The logic for that guarantee
4531 			// is as follows: Since the page is mapped, it must live in the top
4532 			// cache (ruled out above) or any of its lower caches, and there is
4533 			// (was before the new page was inserted) no other page in any
4534 			// cache between the top cache and the page's cache (otherwise that
4535 			// would be mapped instead). That in turn means that our algorithm
4536 			// must have found it and therefore it cannot be busy either.
4537 			DEBUG_PAGE_ACCESS_START(mappedPage);
4538 			unmap_page(area, address);
4539 			DEBUG_PAGE_ACCESS_END(mappedPage);
4540 		}
4541 
4542 		if (mapPage) {
4543 			if (map_page(area, context.page, address, newProtection,
4544 					&context.reservation) != B_OK) {
4545 				// Mapping can only fail, when the page mapping object couldn't
4546 				// be allocated. Save for the missing mapping everything is
4547 				// fine, though. If this was a regular page fault, we'll simply
4548 				// leave and probably fault again. To make sure we'll have more
4549 				// luck then, we ensure that the minimum object reserve is
4550 				// available.
4551 				DEBUG_PAGE_ACCESS_END(context.page);
4552 
4553 				context.UnlockAll();
4554 
4555 				if (object_cache_reserve(page_mapping_object_cache_for(
4556 							context.page->physical_page_number), 1, 0)
4557 						!= B_OK) {
4558 					// Apparently the situation is serious. Let's get ourselves
4559 					// killed.
4560 					status = B_NO_MEMORY;
4561 				} else if (wirePage != NULL) {
4562 					// The caller expects us to wire the page. Since
4563 					// object_cache_reserve() succeeded, we should now be able
4564 					// to allocate a mapping structure. Restart.
4565 					continue;
4566 				}
4567 
4568 				break;
4569 			}
4570 		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4571 			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4572 
4573 		// also wire the page, if requested
4574 		if (wirePage != NULL && status == B_OK) {
4575 			increment_page_wired_count(context.page);
4576 			*wirePage = context.page;
4577 		}
4578 
4579 		DEBUG_PAGE_ACCESS_END(context.page);
4580 
4581 		break;
4582 	}
4583 
4584 	return status;
4585 }
4586 
4587 
4588 status_t
vm_get_physical_page(phys_addr_t paddr,addr_t * _vaddr,void ** _handle)4589 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4590 {
4591 	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4592 }
4593 
4594 status_t
vm_put_physical_page(addr_t vaddr,void * handle)4595 vm_put_physical_page(addr_t vaddr, void* handle)
4596 {
4597 	return sPhysicalPageMapper->PutPage(vaddr, handle);
4598 }
4599 
4600 
4601 status_t
vm_get_physical_page_current_cpu(phys_addr_t paddr,addr_t * _vaddr,void ** _handle)4602 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4603 	void** _handle)
4604 {
4605 	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4606 }
4607 
4608 status_t
vm_put_physical_page_current_cpu(addr_t vaddr,void * handle)4609 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4610 {
4611 	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4612 }
4613 
4614 
4615 status_t
vm_get_physical_page_debug(phys_addr_t paddr,addr_t * _vaddr,void ** _handle)4616 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4617 {
4618 	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4619 }
4620 
4621 status_t
vm_put_physical_page_debug(addr_t vaddr,void * handle)4622 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4623 {
4624 	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4625 }
4626 
4627 
4628 void
vm_get_info(system_info * info)4629 vm_get_info(system_info* info)
4630 {
4631 	swap_get_info(info);
4632 
4633 	MutexLocker locker(sAvailableMemoryLock);
4634 	info->needed_memory = sNeededMemory;
4635 	info->free_memory = sAvailableMemory;
4636 }
4637 
4638 
4639 uint32
vm_num_page_faults(void)4640 vm_num_page_faults(void)
4641 {
4642 	return sPageFaults;
4643 }
4644 
4645 
4646 off_t
vm_available_memory(void)4647 vm_available_memory(void)
4648 {
4649 	MutexLocker locker(sAvailableMemoryLock);
4650 	return sAvailableMemory;
4651 }
4652 
4653 
4654 /*!	Like vm_available_memory(), but only for use in the kernel
4655 	debugger.
4656 */
4657 off_t
vm_available_memory_debug(void)4658 vm_available_memory_debug(void)
4659 {
4660 	return sAvailableMemory;
4661 }
4662 
4663 
4664 off_t
vm_available_not_needed_memory(void)4665 vm_available_not_needed_memory(void)
4666 {
4667 	MutexLocker locker(sAvailableMemoryLock);
4668 	return sAvailableMemory - sNeededMemory;
4669 }
4670 
4671 
4672 /*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4673 	debugger.
4674 */
4675 off_t
vm_available_not_needed_memory_debug(void)4676 vm_available_not_needed_memory_debug(void)
4677 {
4678 	return sAvailableMemory - sNeededMemory;
4679 }
4680 
4681 
4682 size_t
vm_kernel_address_space_left(void)4683 vm_kernel_address_space_left(void)
4684 {
4685 	return VMAddressSpace::Kernel()->FreeSpace();
4686 }
4687 
4688 
4689 void
vm_unreserve_memory(size_t amount)4690 vm_unreserve_memory(size_t amount)
4691 {
4692 	mutex_lock(&sAvailableMemoryLock);
4693 
4694 	sAvailableMemory += amount;
4695 
4696 	mutex_unlock(&sAvailableMemoryLock);
4697 }
4698 
4699 
4700 status_t
vm_try_reserve_memory(size_t amount,int priority,bigtime_t timeout)4701 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4702 {
4703 	size_t reserve = kMemoryReserveForPriority[priority];
4704 
4705 	MutexLocker locker(sAvailableMemoryLock);
4706 
4707 	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4708 
4709 	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4710 		sAvailableMemory -= amount;
4711 		return B_OK;
4712 	}
4713 
4714 	if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) {
4715 		// Do not wait for something that will never happen.
4716 		return B_NO_MEMORY;
4717 	}
4718 
4719 	if (timeout <= 0)
4720 		return B_NO_MEMORY;
4721 
4722 	// turn timeout into an absolute timeout
4723 	timeout += system_time();
4724 
4725 	// loop until we've got the memory or the timeout occurs
4726 	do {
4727 		sNeededMemory += amount;
4728 
4729 		// call the low resource manager
4730 		locker.Unlock();
4731 		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4732 			B_ABSOLUTE_TIMEOUT, timeout);
4733 		locker.Lock();
4734 
4735 		sNeededMemory -= amount;
4736 
4737 		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4738 			sAvailableMemory -= amount;
4739 			return B_OK;
4740 		}
4741 	} while (timeout > system_time());
4742 
4743 	return B_NO_MEMORY;
4744 }
4745 
4746 
4747 status_t
vm_set_area_memory_type(area_id id,phys_addr_t physicalBase,uint32 type)4748 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4749 {
4750 	// NOTE: The caller is responsible for synchronizing calls to this function!
4751 
4752 	AddressSpaceReadLocker locker;
4753 	VMArea* area;
4754 	status_t status = locker.SetFromArea(id, area);
4755 	if (status != B_OK)
4756 		return status;
4757 
4758 	// nothing to do, if the type doesn't change
4759 	uint32 oldType = area->MemoryType();
4760 	if (type == oldType)
4761 		return B_OK;
4762 
4763 	// set the memory type of the area and the mapped pages
4764 	VMTranslationMap* map = area->address_space->TranslationMap();
4765 	map->Lock();
4766 	area->SetMemoryType(type);
4767 	map->ProtectArea(area, area->protection);
4768 	map->Unlock();
4769 
4770 	// set the physical memory type
4771 	status_t error = arch_vm_set_memory_type(area, physicalBase, type, NULL);
4772 	if (error != B_OK) {
4773 		// reset the memory type of the area and the mapped pages
4774 		map->Lock();
4775 		area->SetMemoryType(oldType);
4776 		map->ProtectArea(area, area->protection);
4777 		map->Unlock();
4778 		return error;
4779 	}
4780 
4781 	return B_OK;
4782 
4783 }
4784 
4785 
4786 /*!	This function enforces some protection properties:
4787 	 - kernel areas must be W^X (after kernel startup)
4788 	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4789 	 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4790 */
4791 static void
fix_protection(uint32 * protection)4792 fix_protection(uint32* protection)
4793 {
4794 	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4795 		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
4796 			|| (*protection & B_WRITE_AREA) != 0)
4797 		&& !gKernelStartup)
4798 		panic("kernel areas cannot be both writable and executable!");
4799 
4800 	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4801 		if ((*protection & B_WRITE_AREA) != 0)
4802 			*protection |= B_KERNEL_WRITE_AREA;
4803 		if ((*protection & B_READ_AREA) != 0)
4804 			*protection |= B_KERNEL_READ_AREA;
4805 	}
4806 }
4807 
4808 
4809 static void
fill_area_info(struct VMArea * area,area_info * info,size_t size)4810 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4811 {
4812 	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4813 	info->area = area->id;
4814 	info->address = (void*)area->Base();
4815 	info->size = area->Size();
4816 	info->protection = area->protection;
4817 	info->lock = area->wiring;
4818 	info->team = area->address_space->ID();
4819 	info->copy_count = 0;
4820 	info->in_count = 0;
4821 	info->out_count = 0;
4822 		// TODO: retrieve real values here!
4823 
4824 	VMCache* cache = vm_area_get_locked_cache(area);
4825 
4826 	// Note, this is a simplification; the cache could be larger than this area
4827 	info->ram_size = cache->page_count * B_PAGE_SIZE;
4828 
4829 	vm_area_put_locked_cache(cache);
4830 }
4831 
4832 
4833 static status_t
vm_resize_area(area_id areaID,size_t newSize,bool kernel)4834 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4835 {
4836 	// is newSize a multiple of B_PAGE_SIZE?
4837 	if (newSize & (B_PAGE_SIZE - 1))
4838 		return B_BAD_VALUE;
4839 
4840 	// lock all affected address spaces and the cache
4841 	VMArea* area;
4842 	VMCache* cache;
4843 
4844 	MultiAddressSpaceLocker locker;
4845 	AreaCacheLocker cacheLocker;
4846 
4847 	status_t status;
4848 	size_t oldSize;
4849 	bool anyKernelArea;
4850 	bool restart;
4851 
4852 	do {
4853 		anyKernelArea = false;
4854 		restart = false;
4855 
4856 		locker.Unset();
4857 		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4858 		if (status != B_OK)
4859 			return status;
4860 		cacheLocker.SetTo(cache, true);	// already locked
4861 
4862 		// enforce restrictions
4863 		if (!kernel && (area->address_space == VMAddressSpace::Kernel()
4864 				|| (area->protection & B_KERNEL_AREA) != 0)) {
4865 			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
4866 				"resize kernel area %" B_PRId32 " (%s)\n",
4867 				team_get_current_team_id(), areaID, area->name);
4868 			return B_NOT_ALLOWED;
4869 		}
4870 		// TODO: Enforce all restrictions (team, etc.)!
4871 
4872 		oldSize = area->Size();
4873 		if (newSize == oldSize)
4874 			return B_OK;
4875 
4876 		if (cache->type != CACHE_TYPE_RAM)
4877 			return B_NOT_ALLOWED;
4878 
4879 		if (oldSize < newSize) {
4880 			// We need to check if all areas of this cache can be resized.
4881 			for (VMArea* current = cache->areas; current != NULL;
4882 					current = current->cache_next) {
4883 				if (!current->address_space->CanResizeArea(current, newSize))
4884 					return B_ERROR;
4885 				anyKernelArea
4886 					|= current->address_space == VMAddressSpace::Kernel();
4887 			}
4888 		} else {
4889 			// We're shrinking the areas, so we must make sure the affected
4890 			// ranges are not wired.
4891 			for (VMArea* current = cache->areas; current != NULL;
4892 					current = current->cache_next) {
4893 				anyKernelArea
4894 					|= current->address_space == VMAddressSpace::Kernel();
4895 
4896 				if (wait_if_area_range_is_wired(current,
4897 						current->Base() + newSize, oldSize - newSize, &locker,
4898 						&cacheLocker)) {
4899 					restart = true;
4900 					break;
4901 				}
4902 			}
4903 		}
4904 	} while (restart);
4905 
4906 	// Okay, looks good so far, so let's do it
4907 
4908 	int priority = kernel && anyKernelArea
4909 		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4910 	uint32 allocationFlags = kernel && anyKernelArea
4911 		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4912 
4913 	if (oldSize < newSize) {
4914 		// Growing the cache can fail, so we do it first.
4915 		status = cache->Resize(cache->virtual_base + newSize, priority);
4916 		if (status != B_OK)
4917 			return status;
4918 	}
4919 
4920 	for (VMArea* current = cache->areas; current != NULL;
4921 			current = current->cache_next) {
4922 		status = current->address_space->ResizeArea(current, newSize,
4923 			allocationFlags);
4924 		if (status != B_OK)
4925 			break;
4926 
4927 		// We also need to unmap all pages beyond the new size, if the area has
4928 		// shrunk
4929 		if (newSize < oldSize) {
4930 			VMCacheChainLocker cacheChainLocker(cache);
4931 			cacheChainLocker.LockAllSourceCaches();
4932 
4933 			unmap_pages(current, current->Base() + newSize,
4934 				oldSize - newSize);
4935 
4936 			cacheChainLocker.Unlock(cache);
4937 		}
4938 	}
4939 
4940 	if (status == B_OK) {
4941 		// Shrink or grow individual page protections if in use.
4942 		if (area->page_protections != NULL) {
4943 			size_t bytes = area_page_protections_size(newSize);
4944 			uint8* newProtections
4945 				= (uint8*)realloc(area->page_protections, bytes);
4946 			if (newProtections == NULL)
4947 				status = B_NO_MEMORY;
4948 			else {
4949 				area->page_protections = newProtections;
4950 
4951 				if (oldSize < newSize) {
4952 					// init the additional page protections to that of the area
4953 					uint32 offset = area_page_protections_size(oldSize);
4954 					uint32 areaProtection = area->protection
4955 						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4956 					memset(area->page_protections + offset,
4957 						areaProtection | (areaProtection << 4), bytes - offset);
4958 					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4959 						uint8& entry = area->page_protections[offset - 1];
4960 						entry = (entry & 0x0f) | (areaProtection << 4);
4961 					}
4962 				}
4963 			}
4964 		}
4965 	}
4966 
4967 	// shrinking the cache can't fail, so we do it now
4968 	if (status == B_OK && newSize < oldSize)
4969 		status = cache->Resize(cache->virtual_base + newSize, priority);
4970 
4971 	if (status != B_OK) {
4972 		// Something failed -- resize the areas back to their original size.
4973 		// This can fail, too, in which case we're seriously screwed.
4974 		for (VMArea* current = cache->areas; current != NULL;
4975 				current = current->cache_next) {
4976 			if (current->address_space->ResizeArea(current, oldSize,
4977 					allocationFlags) != B_OK) {
4978 				panic("vm_resize_area(): Failed and not being able to restore "
4979 					"original state.");
4980 			}
4981 		}
4982 
4983 		cache->Resize(cache->virtual_base + oldSize, priority);
4984 	}
4985 
4986 	// TODO: we must honour the lock restrictions of this area
4987 	return status;
4988 }
4989 
4990 
4991 status_t
vm_memset_physical(phys_addr_t address,int value,phys_size_t length)4992 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
4993 {
4994 	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4995 }
4996 
4997 
4998 status_t
vm_memcpy_from_physical(void * to,phys_addr_t from,size_t length,bool user)4999 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5000 {
5001 	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5002 }
5003 
5004 
5005 status_t
vm_memcpy_to_physical(phys_addr_t to,const void * _from,size_t length,bool user)5006 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5007 	bool user)
5008 {
5009 	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5010 }
5011 
5012 
5013 void
vm_memcpy_physical_page(phys_addr_t to,phys_addr_t from)5014 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5015 {
5016 	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5017 }
5018 
5019 
5020 /** Validate that a memory range is either fully in kernel space, or fully in
5021  *  userspace */
5022 static inline bool
validate_memory_range(const void * addr,size_t size)5023 validate_memory_range(const void* addr, size_t size)
5024 {
5025 	addr_t address = (addr_t)addr;
5026 
5027 	// Check for overflows on all addresses.
5028 	if ((address + size) < address)
5029 		return false;
5030 
5031 	// Validate that the address range does not cross the kernel/user boundary.
5032 	return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5033 }
5034 
5035 
5036 //	#pragma mark - kernel public API
5037 
5038 
5039 status_t
user_memcpy(void * to,const void * from,size_t size)5040 user_memcpy(void* to, const void* from, size_t size)
5041 {
5042 	if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5043 		return B_BAD_ADDRESS;
5044 
5045 	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5046 		return B_BAD_ADDRESS;
5047 
5048 	return B_OK;
5049 }
5050 
5051 
5052 /*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5053 	the string in \a to, NULL-terminating the result.
5054 
5055 	\param to Pointer to the destination C-string.
5056 	\param from Pointer to the source C-string.
5057 	\param size Size in bytes of the string buffer pointed to by \a to.
5058 
5059 	\return strlen(\a from).
5060 */
5061 ssize_t
user_strlcpy(char * to,const char * from,size_t size)5062 user_strlcpy(char* to, const char* from, size_t size)
5063 {
5064 	if (to == NULL && size != 0)
5065 		return B_BAD_VALUE;
5066 	if (from == NULL)
5067 		return B_BAD_ADDRESS;
5068 
5069 	// Protect the source address from overflows.
5070 	size_t maxSize = size;
5071 	if ((addr_t)from + maxSize < (addr_t)from)
5072 		maxSize -= (addr_t)from + maxSize;
5073 	if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5074 		maxSize = USER_TOP - (addr_t)from;
5075 
5076 	if (!validate_memory_range(to, maxSize))
5077 		return B_BAD_ADDRESS;
5078 
5079 	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5080 	if (result < 0)
5081 		return result;
5082 
5083 	// If we hit the address overflow boundary, fail.
5084 	if ((size_t)result >= maxSize && maxSize < size)
5085 		return B_BAD_ADDRESS;
5086 
5087 	return result;
5088 }
5089 
5090 
5091 status_t
user_memset(void * s,char c,size_t count)5092 user_memset(void* s, char c, size_t count)
5093 {
5094 	if (!validate_memory_range(s, count))
5095 		return B_BAD_ADDRESS;
5096 
5097 	if (arch_cpu_user_memset(s, c, count) < B_OK)
5098 		return B_BAD_ADDRESS;
5099 
5100 	return B_OK;
5101 }
5102 
5103 
5104 /*!	Wires a single page at the given address.
5105 
5106 	\param team The team whose address space the address belongs to. Supports
5107 		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5108 		parameter is ignored.
5109 	\param address address The virtual address to wire down. Does not need to
5110 		be page aligned.
5111 	\param writable If \c true the page shall be writable.
5112 	\param info On success the info is filled in, among other things
5113 		containing the physical address the given virtual one translates to.
5114 	\return \c B_OK, when the page could be wired, another error code otherwise.
5115 */
5116 status_t
vm_wire_page(team_id team,addr_t address,bool writable,VMPageWiringInfo * info)5117 vm_wire_page(team_id team, addr_t address, bool writable,
5118 	VMPageWiringInfo* info)
5119 {
5120 	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5121 	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5122 
5123 	// compute the page protection that is required
5124 	bool isUser = IS_USER_ADDRESS(address);
5125 	uint32 requiredProtection = PAGE_PRESENT
5126 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5127 	if (writable)
5128 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5129 
5130 	// get and read lock the address space
5131 	VMAddressSpace* addressSpace = NULL;
5132 	if (isUser) {
5133 		if (team == B_CURRENT_TEAM)
5134 			addressSpace = VMAddressSpace::GetCurrent();
5135 		else
5136 			addressSpace = VMAddressSpace::Get(team);
5137 	} else
5138 		addressSpace = VMAddressSpace::GetKernel();
5139 	if (addressSpace == NULL)
5140 		return B_ERROR;
5141 
5142 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5143 
5144 	VMTranslationMap* map = addressSpace->TranslationMap();
5145 	status_t error = B_OK;
5146 
5147 	// get the area
5148 	VMArea* area = addressSpace->LookupArea(pageAddress);
5149 	if (area == NULL) {
5150 		addressSpace->Put();
5151 		return B_BAD_ADDRESS;
5152 	}
5153 
5154 	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5155 	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5156 
5157 	// mark the area range wired
5158 	area->Wire(&info->range);
5159 
5160 	// Lock the area's cache chain and the translation map. Needed to look
5161 	// up the page and play with its wired count.
5162 	cacheChainLocker.LockAllSourceCaches();
5163 	map->Lock();
5164 
5165 	phys_addr_t physicalAddress;
5166 	uint32 flags;
5167 	vm_page* page;
5168 	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5169 		&& (flags & requiredProtection) == requiredProtection
5170 		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5171 			!= NULL) {
5172 		// Already mapped with the correct permissions -- just increment
5173 		// the page's wired count.
5174 		increment_page_wired_count(page);
5175 
5176 		map->Unlock();
5177 		cacheChainLocker.Unlock();
5178 		addressSpaceLocker.Unlock();
5179 	} else {
5180 		// Let vm_soft_fault() map the page for us, if possible. We need
5181 		// to fully unlock to avoid deadlocks. Since we have already
5182 		// wired the area itself, nothing disturbing will happen with it
5183 		// in the meantime.
5184 		map->Unlock();
5185 		cacheChainLocker.Unlock();
5186 		addressSpaceLocker.Unlock();
5187 
5188 		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5189 			isUser, &page);
5190 
5191 		if (error != B_OK) {
5192 			// The page could not be mapped -- clean up.
5193 			VMCache* cache = vm_area_get_locked_cache(area);
5194 			area->Unwire(&info->range);
5195 			cache->ReleaseRefAndUnlock();
5196 			addressSpace->Put();
5197 			return error;
5198 		}
5199 	}
5200 
5201 	info->physicalAddress
5202 		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5203 			+ address % B_PAGE_SIZE;
5204 	info->page = page;
5205 
5206 	return B_OK;
5207 }
5208 
5209 
5210 /*!	Unwires a single page previously wired via vm_wire_page().
5211 
5212 	\param info The same object passed to vm_wire_page() before.
5213 */
5214 void
vm_unwire_page(VMPageWiringInfo * info)5215 vm_unwire_page(VMPageWiringInfo* info)
5216 {
5217 	// lock the address space
5218 	VMArea* area = info->range.area;
5219 	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5220 		// takes over our reference
5221 
5222 	// lock the top cache
5223 	VMCache* cache = vm_area_get_locked_cache(area);
5224 	VMCacheChainLocker cacheChainLocker(cache);
5225 
5226 	if (info->page->Cache() != cache) {
5227 		// The page is not in the top cache, so we lock the whole cache chain
5228 		// before touching the page's wired count.
5229 		cacheChainLocker.LockAllSourceCaches();
5230 	}
5231 
5232 	decrement_page_wired_count(info->page);
5233 
5234 	// remove the wired range from the range
5235 	area->Unwire(&info->range);
5236 
5237 	cacheChainLocker.Unlock();
5238 }
5239 
5240 
5241 /*!	Wires down the given address range in the specified team's address space.
5242 
5243 	If successful the function
5244 	- acquires a reference to the specified team's address space,
5245 	- adds respective wired ranges to all areas that intersect with the given
5246 	  address range,
5247 	- makes sure all pages in the given address range are mapped with the
5248 	  requested access permissions and increments their wired count.
5249 
5250 	It fails, when \a team doesn't specify a valid address space, when any part
5251 	of the specified address range is not covered by areas, when the concerned
5252 	areas don't allow mapping with the requested permissions, or when mapping
5253 	failed for another reason.
5254 
5255 	When successful the call must be balanced by a unlock_memory_etc() call with
5256 	the exact same parameters.
5257 
5258 	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5259 		supported.
5260 	\param address The start of the address range to be wired.
5261 	\param numBytes The size of the address range to be wired.
5262 	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5263 		requests that the range must be wired writable ("read from device
5264 		into memory").
5265 	\return \c B_OK on success, another error code otherwise.
5266 */
5267 status_t
lock_memory_etc(team_id team,void * address,size_t numBytes,uint32 flags)5268 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5269 {
5270 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5271 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5272 
5273 	// compute the page protection that is required
5274 	bool isUser = IS_USER_ADDRESS(address);
5275 	bool writable = (flags & B_READ_DEVICE) == 0;
5276 	uint32 requiredProtection = PAGE_PRESENT
5277 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5278 	if (writable)
5279 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5280 
5281 	uint32 mallocFlags = isUser
5282 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5283 
5284 	// get and read lock the address space
5285 	VMAddressSpace* addressSpace = NULL;
5286 	if (isUser) {
5287 		if (team == B_CURRENT_TEAM)
5288 			addressSpace = VMAddressSpace::GetCurrent();
5289 		else
5290 			addressSpace = VMAddressSpace::Get(team);
5291 	} else
5292 		addressSpace = VMAddressSpace::GetKernel();
5293 	if (addressSpace == NULL)
5294 		return B_ERROR;
5295 
5296 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5297 		// We get a new address space reference here. The one we got above will
5298 		// be freed by unlock_memory_etc().
5299 
5300 	VMTranslationMap* map = addressSpace->TranslationMap();
5301 	status_t error = B_OK;
5302 
5303 	// iterate through all concerned areas
5304 	addr_t nextAddress = lockBaseAddress;
5305 	while (nextAddress != lockEndAddress) {
5306 		// get the next area
5307 		VMArea* area = addressSpace->LookupArea(nextAddress);
5308 		if (area == NULL) {
5309 			error = B_BAD_ADDRESS;
5310 			break;
5311 		}
5312 
5313 		addr_t areaStart = nextAddress;
5314 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5315 
5316 		// allocate the wired range (do that before locking the cache to avoid
5317 		// deadlocks)
5318 		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5319 			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5320 		if (range == NULL) {
5321 			error = B_NO_MEMORY;
5322 			break;
5323 		}
5324 
5325 		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5326 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5327 
5328 		// mark the area range wired
5329 		area->Wire(range);
5330 
5331 		// Depending on the area cache type and the wiring, we may not need to
5332 		// look at the individual pages.
5333 		if (area->cache_type == CACHE_TYPE_NULL
5334 			|| area->cache_type == CACHE_TYPE_DEVICE
5335 			|| area->wiring == B_FULL_LOCK
5336 			|| area->wiring == B_CONTIGUOUS) {
5337 			nextAddress = areaEnd;
5338 			continue;
5339 		}
5340 
5341 		// Lock the area's cache chain and the translation map. Needed to look
5342 		// up pages and play with their wired count.
5343 		cacheChainLocker.LockAllSourceCaches();
5344 		map->Lock();
5345 
5346 		// iterate through the pages and wire them
5347 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5348 			phys_addr_t physicalAddress;
5349 			uint32 flags;
5350 
5351 			vm_page* page;
5352 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5353 				&& (flags & requiredProtection) == requiredProtection
5354 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5355 					!= NULL) {
5356 				// Already mapped with the correct permissions -- just increment
5357 				// the page's wired count.
5358 				increment_page_wired_count(page);
5359 			} else {
5360 				// Let vm_soft_fault() map the page for us, if possible. We need
5361 				// to fully unlock to avoid deadlocks. Since we have already
5362 				// wired the area itself, nothing disturbing will happen with it
5363 				// in the meantime.
5364 				map->Unlock();
5365 				cacheChainLocker.Unlock();
5366 				addressSpaceLocker.Unlock();
5367 
5368 				error = vm_soft_fault(addressSpace, nextAddress, writable,
5369 					false, isUser, &page);
5370 
5371 				addressSpaceLocker.Lock();
5372 				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5373 				cacheChainLocker.LockAllSourceCaches();
5374 				map->Lock();
5375 			}
5376 
5377 			if (error != B_OK)
5378 				break;
5379 		}
5380 
5381 		map->Unlock();
5382 
5383 		if (error == B_OK) {
5384 			cacheChainLocker.Unlock();
5385 		} else {
5386 			// An error occurred, so abort right here. If the current address
5387 			// is the first in this area, unwire the area, since we won't get
5388 			// to it when reverting what we've done so far.
5389 			if (nextAddress == areaStart) {
5390 				area->Unwire(range);
5391 				cacheChainLocker.Unlock();
5392 				range->~VMAreaWiredRange();
5393 				free_etc(range, mallocFlags);
5394 			} else
5395 				cacheChainLocker.Unlock();
5396 
5397 			break;
5398 		}
5399 	}
5400 
5401 	if (error != B_OK) {
5402 		// An error occurred, so unwire all that we've already wired. Note that
5403 		// even if not a single page was wired, unlock_memory_etc() is called
5404 		// to put the address space reference.
5405 		addressSpaceLocker.Unlock();
5406 		unlock_memory_etc(team, (void*)lockBaseAddress,
5407 			nextAddress - lockBaseAddress, flags);
5408 	}
5409 
5410 	return error;
5411 }
5412 
5413 
5414 status_t
lock_memory(void * address,size_t numBytes,uint32 flags)5415 lock_memory(void* address, size_t numBytes, uint32 flags)
5416 {
5417 	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5418 }
5419 
5420 
5421 /*!	Unwires an address range previously wired with lock_memory_etc().
5422 
5423 	Note that a call to this function must balance a previous lock_memory_etc()
5424 	call with exactly the same parameters.
5425 */
5426 status_t
unlock_memory_etc(team_id team,void * address,size_t numBytes,uint32 flags)5427 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5428 {
5429 	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5430 	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5431 
5432 	// compute the page protection that is required
5433 	bool isUser = IS_USER_ADDRESS(address);
5434 	bool writable = (flags & B_READ_DEVICE) == 0;
5435 	uint32 requiredProtection = PAGE_PRESENT
5436 		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5437 	if (writable)
5438 		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5439 
5440 	uint32 mallocFlags = isUser
5441 		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5442 
5443 	// get and read lock the address space
5444 	VMAddressSpace* addressSpace = NULL;
5445 	if (isUser) {
5446 		if (team == B_CURRENT_TEAM)
5447 			addressSpace = VMAddressSpace::GetCurrent();
5448 		else
5449 			addressSpace = VMAddressSpace::Get(team);
5450 	} else
5451 		addressSpace = VMAddressSpace::GetKernel();
5452 	if (addressSpace == NULL)
5453 		return B_ERROR;
5454 
5455 	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5456 		// Take over the address space reference. We don't unlock until we're
5457 		// done.
5458 
5459 	VMTranslationMap* map = addressSpace->TranslationMap();
5460 	status_t error = B_OK;
5461 
5462 	// iterate through all concerned areas
5463 	addr_t nextAddress = lockBaseAddress;
5464 	while (nextAddress != lockEndAddress) {
5465 		// get the next area
5466 		VMArea* area = addressSpace->LookupArea(nextAddress);
5467 		if (area == NULL) {
5468 			error = B_BAD_ADDRESS;
5469 			break;
5470 		}
5471 
5472 		addr_t areaStart = nextAddress;
5473 		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5474 
5475 		// Lock the area's top cache. This is a requirement for
5476 		// VMArea::Unwire().
5477 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5478 
5479 		// Depending on the area cache type and the wiring, we may not need to
5480 		// look at the individual pages.
5481 		if (area->cache_type == CACHE_TYPE_NULL
5482 			|| area->cache_type == CACHE_TYPE_DEVICE
5483 			|| area->wiring == B_FULL_LOCK
5484 			|| area->wiring == B_CONTIGUOUS) {
5485 			// unwire the range (to avoid deadlocks we delete the range after
5486 			// unlocking the cache)
5487 			nextAddress = areaEnd;
5488 			VMAreaWiredRange* range = area->Unwire(areaStart,
5489 				areaEnd - areaStart, writable);
5490 			cacheChainLocker.Unlock();
5491 			if (range != NULL) {
5492 				range->~VMAreaWiredRange();
5493 				free_etc(range, mallocFlags);
5494 			}
5495 			continue;
5496 		}
5497 
5498 		// Lock the area's cache chain and the translation map. Needed to look
5499 		// up pages and play with their wired count.
5500 		cacheChainLocker.LockAllSourceCaches();
5501 		map->Lock();
5502 
5503 		// iterate through the pages and unwire them
5504 		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5505 			phys_addr_t physicalAddress;
5506 			uint32 flags;
5507 
5508 			vm_page* page;
5509 			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5510 				&& (flags & PAGE_PRESENT) != 0
5511 				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5512 					!= NULL) {
5513 				// Already mapped with the correct permissions -- just increment
5514 				// the page's wired count.
5515 				decrement_page_wired_count(page);
5516 			} else {
5517 				panic("unlock_memory_etc(): Failed to unwire page: address "
5518 					"space %p, address: %#" B_PRIxADDR, addressSpace,
5519 					nextAddress);
5520 				error = B_BAD_VALUE;
5521 				break;
5522 			}
5523 		}
5524 
5525 		map->Unlock();
5526 
5527 		// All pages are unwired. Remove the area's wired range as well (to
5528 		// avoid deadlocks we delete the range after unlocking the cache).
5529 		VMAreaWiredRange* range = area->Unwire(areaStart,
5530 			areaEnd - areaStart, writable);
5531 
5532 		cacheChainLocker.Unlock();
5533 
5534 		if (range != NULL) {
5535 			range->~VMAreaWiredRange();
5536 			free_etc(range, mallocFlags);
5537 		}
5538 
5539 		if (error != B_OK)
5540 			break;
5541 	}
5542 
5543 	// get rid of the address space reference lock_memory_etc() acquired
5544 	addressSpace->Put();
5545 
5546 	return error;
5547 }
5548 
5549 
5550 status_t
unlock_memory(void * address,size_t numBytes,uint32 flags)5551 unlock_memory(void* address, size_t numBytes, uint32 flags)
5552 {
5553 	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5554 }
5555 
5556 
5557 /*!	Similar to get_memory_map(), but also allows to specify the address space
5558 	for the memory in question and has a saner semantics.
5559 	Returns \c B_OK when the complete range could be translated or
5560 	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5561 	case the actual number of entries is written to \c *_numEntries. Any other
5562 	error case indicates complete failure; \c *_numEntries will be set to \c 0
5563 	in this case.
5564 */
5565 status_t
get_memory_map_etc(team_id team,const void * address,size_t numBytes,physical_entry * table,uint32 * _numEntries)5566 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5567 	physical_entry* table, uint32* _numEntries)
5568 {
5569 	uint32 numEntries = *_numEntries;
5570 	*_numEntries = 0;
5571 
5572 	VMAddressSpace* addressSpace;
5573 	addr_t virtualAddress = (addr_t)address;
5574 	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5575 	phys_addr_t physicalAddress;
5576 	status_t status = B_OK;
5577 	int32 index = -1;
5578 	addr_t offset = 0;
5579 	bool interrupts = are_interrupts_enabled();
5580 
5581 	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5582 		"entries)\n", team, address, numBytes, numEntries));
5583 
5584 	if (numEntries == 0 || numBytes == 0)
5585 		return B_BAD_VALUE;
5586 
5587 	// in which address space is the address to be found?
5588 	if (IS_USER_ADDRESS(virtualAddress)) {
5589 		if (team == B_CURRENT_TEAM)
5590 			addressSpace = VMAddressSpace::GetCurrent();
5591 		else
5592 			addressSpace = VMAddressSpace::Get(team);
5593 	} else
5594 		addressSpace = VMAddressSpace::GetKernel();
5595 
5596 	if (addressSpace == NULL)
5597 		return B_ERROR;
5598 
5599 	VMTranslationMap* map = addressSpace->TranslationMap();
5600 
5601 	if (interrupts)
5602 		map->Lock();
5603 
5604 	while (offset < numBytes) {
5605 		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5606 		uint32 flags;
5607 
5608 		if (interrupts) {
5609 			status = map->Query((addr_t)address + offset, &physicalAddress,
5610 				&flags);
5611 		} else {
5612 			status = map->QueryInterrupt((addr_t)address + offset,
5613 				&physicalAddress, &flags);
5614 		}
5615 		if (status < B_OK)
5616 			break;
5617 		if ((flags & PAGE_PRESENT) == 0) {
5618 			panic("get_memory_map() called on unmapped memory!");
5619 			return B_BAD_ADDRESS;
5620 		}
5621 
5622 		if (index < 0 && pageOffset > 0) {
5623 			physicalAddress += pageOffset;
5624 			if (bytes > B_PAGE_SIZE - pageOffset)
5625 				bytes = B_PAGE_SIZE - pageOffset;
5626 		}
5627 
5628 		// need to switch to the next physical_entry?
5629 		if (index < 0 || table[index].address
5630 				!= physicalAddress - table[index].size) {
5631 			if ((uint32)++index + 1 > numEntries) {
5632 				// table to small
5633 				break;
5634 			}
5635 			table[index].address = physicalAddress;
5636 			table[index].size = bytes;
5637 		} else {
5638 			// page does fit in current entry
5639 			table[index].size += bytes;
5640 		}
5641 
5642 		offset += bytes;
5643 	}
5644 
5645 	if (interrupts)
5646 		map->Unlock();
5647 
5648 	if (status != B_OK)
5649 		return status;
5650 
5651 	if ((uint32)index + 1 > numEntries) {
5652 		*_numEntries = index;
5653 		return B_BUFFER_OVERFLOW;
5654 	}
5655 
5656 	*_numEntries = index + 1;
5657 	return B_OK;
5658 }
5659 
5660 
5661 /*!	According to the BeBook, this function should always succeed.
5662 	This is no longer the case.
5663 */
5664 extern "C" int32
__get_memory_map_haiku(const void * address,size_t numBytes,physical_entry * table,int32 numEntries)5665 __get_memory_map_haiku(const void* address, size_t numBytes,
5666 	physical_entry* table, int32 numEntries)
5667 {
5668 	uint32 entriesRead = numEntries;
5669 	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5670 		table, &entriesRead);
5671 	if (error != B_OK)
5672 		return error;
5673 
5674 	// close the entry list
5675 
5676 	// if it's only one entry, we will silently accept the missing ending
5677 	if (numEntries == 1)
5678 		return B_OK;
5679 
5680 	if (entriesRead + 1 > (uint32)numEntries)
5681 		return B_BUFFER_OVERFLOW;
5682 
5683 	table[entriesRead].address = 0;
5684 	table[entriesRead].size = 0;
5685 
5686 	return B_OK;
5687 }
5688 
5689 
5690 area_id
area_for(void * address)5691 area_for(void* address)
5692 {
5693 	return vm_area_for((addr_t)address, true);
5694 }
5695 
5696 
5697 area_id
find_area(const char * name)5698 find_area(const char* name)
5699 {
5700 	return VMAreas::Find(name);
5701 }
5702 
5703 
5704 status_t
_get_area_info(area_id id,area_info * info,size_t size)5705 _get_area_info(area_id id, area_info* info, size_t size)
5706 {
5707 	if (size != sizeof(area_info) || info == NULL)
5708 		return B_BAD_VALUE;
5709 
5710 	AddressSpaceReadLocker locker;
5711 	VMArea* area;
5712 	status_t status = locker.SetFromArea(id, area);
5713 	if (status != B_OK)
5714 		return status;
5715 
5716 	fill_area_info(area, info, size);
5717 	return B_OK;
5718 }
5719 
5720 
5721 status_t
_get_next_area_info(team_id team,ssize_t * cookie,area_info * info,size_t size)5722 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5723 {
5724 	addr_t nextBase = *(addr_t*)cookie;
5725 
5726 	// we're already through the list
5727 	if (nextBase == (addr_t)-1)
5728 		return B_ENTRY_NOT_FOUND;
5729 
5730 	if (team == B_CURRENT_TEAM)
5731 		team = team_get_current_team_id();
5732 
5733 	AddressSpaceReadLocker locker(team);
5734 	if (!locker.IsLocked())
5735 		return B_BAD_TEAM_ID;
5736 
5737 	VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
5738 	if (area == NULL) {
5739 		nextBase = (addr_t)-1;
5740 		return B_ENTRY_NOT_FOUND;
5741 	}
5742 
5743 	fill_area_info(area, info, size);
5744 	*cookie = (ssize_t)(area->Base() + 1);
5745 
5746 	return B_OK;
5747 }
5748 
5749 
5750 status_t
set_area_protection(area_id area,uint32 newProtection)5751 set_area_protection(area_id area, uint32 newProtection)
5752 {
5753 	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5754 		newProtection, true);
5755 }
5756 
5757 
5758 status_t
resize_area(area_id areaID,size_t newSize)5759 resize_area(area_id areaID, size_t newSize)
5760 {
5761 	return vm_resize_area(areaID, newSize, true);
5762 }
5763 
5764 
5765 /*!	Transfers the specified area to a new team. The caller must be the owner
5766 	of the area.
5767 */
5768 area_id
transfer_area(area_id id,void ** _address,uint32 addressSpec,team_id target,bool kernel)5769 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5770 	bool kernel)
5771 {
5772 	area_info info;
5773 	status_t status = get_area_info(id, &info);
5774 	if (status != B_OK)
5775 		return status;
5776 
5777 	if (!kernel && info.team != thread_get_current_thread()->team->id)
5778 		return B_PERMISSION_DENIED;
5779 
5780 	// We need to mark the area cloneable so the following operations work.
5781 	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
5782 	if (status != B_OK)
5783 		return status;
5784 
5785 	area_id clonedArea = vm_clone_area(target, info.name, _address,
5786 		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5787 	if (clonedArea < 0)
5788 		return clonedArea;
5789 
5790 	status = vm_delete_area(info.team, id, kernel);
5791 	if (status != B_OK) {
5792 		vm_delete_area(target, clonedArea, kernel);
5793 		return status;
5794 	}
5795 
5796 	// Now we can reset the protection to whatever it was before.
5797 	set_area_protection(clonedArea, info.protection);
5798 
5799 	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5800 
5801 	return clonedArea;
5802 }
5803 
5804 
5805 extern "C" area_id
__map_physical_memory_haiku(const char * name,phys_addr_t physicalAddress,size_t numBytes,uint32 addressSpec,uint32 protection,void ** _virtualAddress)5806 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5807 	size_t numBytes, uint32 addressSpec, uint32 protection,
5808 	void** _virtualAddress)
5809 {
5810 	if (!arch_vm_supports_protection(protection))
5811 		return B_NOT_SUPPORTED;
5812 
5813 	fix_protection(&protection);
5814 
5815 	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5816 		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5817 		false);
5818 }
5819 
5820 
5821 area_id
clone_area(const char * name,void ** _address,uint32 addressSpec,uint32 protection,area_id source)5822 clone_area(const char* name, void** _address, uint32 addressSpec,
5823 	uint32 protection, area_id source)
5824 {
5825 	if ((protection & B_KERNEL_PROTECTION) == 0)
5826 		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5827 
5828 	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5829 		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5830 }
5831 
5832 
5833 area_id
create_area_etc(team_id team,const char * name,size_t size,uint32 lock,uint32 protection,uint32 flags,uint32 guardSize,const virtual_address_restrictions * virtualAddressRestrictions,const physical_address_restrictions * physicalAddressRestrictions,void ** _address)5834 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
5835 	uint32 protection, uint32 flags, uint32 guardSize,
5836 	const virtual_address_restrictions* virtualAddressRestrictions,
5837 	const physical_address_restrictions* physicalAddressRestrictions,
5838 	void** _address)
5839 {
5840 	fix_protection(&protection);
5841 
5842 	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5843 		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
5844 		true, _address);
5845 }
5846 
5847 
5848 extern "C" area_id
__create_area_haiku(const char * name,void ** _address,uint32 addressSpec,size_t size,uint32 lock,uint32 protection)5849 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5850 	size_t size, uint32 lock, uint32 protection)
5851 {
5852 	fix_protection(&protection);
5853 
5854 	virtual_address_restrictions virtualRestrictions = {};
5855 	virtualRestrictions.address = *_address;
5856 	virtualRestrictions.address_specification = addressSpec;
5857 	physical_address_restrictions physicalRestrictions = {};
5858 	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5859 		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
5860 		true, _address);
5861 }
5862 
5863 
5864 status_t
delete_area(area_id area)5865 delete_area(area_id area)
5866 {
5867 	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5868 }
5869 
5870 
5871 //	#pragma mark - Userland syscalls
5872 
5873 
5874 status_t
_user_reserve_address_range(addr_t * userAddress,uint32 addressSpec,addr_t size)5875 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5876 	addr_t size)
5877 {
5878 	// filter out some unavailable values (for userland)
5879 	switch (addressSpec) {
5880 		case B_ANY_KERNEL_ADDRESS:
5881 		case B_ANY_KERNEL_BLOCK_ADDRESS:
5882 			return B_BAD_VALUE;
5883 	}
5884 
5885 	addr_t address;
5886 
5887 	if (!IS_USER_ADDRESS(userAddress)
5888 		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5889 		return B_BAD_ADDRESS;
5890 
5891 	status_t status = vm_reserve_address_range(
5892 		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5893 		RESERVED_AVOID_BASE);
5894 	if (status != B_OK)
5895 		return status;
5896 
5897 	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5898 		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5899 			(void*)address, size);
5900 		return B_BAD_ADDRESS;
5901 	}
5902 
5903 	return B_OK;
5904 }
5905 
5906 
5907 status_t
_user_unreserve_address_range(addr_t address,addr_t size)5908 _user_unreserve_address_range(addr_t address, addr_t size)
5909 {
5910 	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5911 		(void*)address, size);
5912 }
5913 
5914 
5915 area_id
_user_area_for(void * address)5916 _user_area_for(void* address)
5917 {
5918 	return vm_area_for((addr_t)address, false);
5919 }
5920 
5921 
5922 area_id
_user_find_area(const char * userName)5923 _user_find_area(const char* userName)
5924 {
5925 	char name[B_OS_NAME_LENGTH];
5926 
5927 	if (!IS_USER_ADDRESS(userName)
5928 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5929 		return B_BAD_ADDRESS;
5930 
5931 	return find_area(name);
5932 }
5933 
5934 
5935 status_t
_user_get_area_info(area_id area,area_info * userInfo)5936 _user_get_area_info(area_id area, area_info* userInfo)
5937 {
5938 	if (!IS_USER_ADDRESS(userInfo))
5939 		return B_BAD_ADDRESS;
5940 
5941 	area_info info;
5942 	status_t status = get_area_info(area, &info);
5943 	if (status < B_OK)
5944 		return status;
5945 
5946 	// TODO: do we want to prevent userland from seeing kernel protections?
5947 	//info.protection &= B_USER_PROTECTION;
5948 
5949 	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5950 		return B_BAD_ADDRESS;
5951 
5952 	return status;
5953 }
5954 
5955 
5956 status_t
_user_get_next_area_info(team_id team,ssize_t * userCookie,area_info * userInfo)5957 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
5958 {
5959 	ssize_t cookie;
5960 
5961 	if (!IS_USER_ADDRESS(userCookie)
5962 		|| !IS_USER_ADDRESS(userInfo)
5963 		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
5964 		return B_BAD_ADDRESS;
5965 
5966 	area_info info;
5967 	status_t status = _get_next_area_info(team, &cookie, &info,
5968 		sizeof(area_info));
5969 	if (status != B_OK)
5970 		return status;
5971 
5972 	//info.protection &= B_USER_PROTECTION;
5973 
5974 	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
5975 		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5976 		return B_BAD_ADDRESS;
5977 
5978 	return status;
5979 }
5980 
5981 
5982 status_t
_user_set_area_protection(area_id area,uint32 newProtection)5983 _user_set_area_protection(area_id area, uint32 newProtection)
5984 {
5985 	if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
5986 		return B_BAD_VALUE;
5987 
5988 	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
5989 		newProtection, false);
5990 }
5991 
5992 
5993 status_t
_user_resize_area(area_id area,size_t newSize)5994 _user_resize_area(area_id area, size_t newSize)
5995 {
5996 	// TODO: Since we restrict deleting of areas to those owned by the team,
5997 	// we should also do that for resizing (check other functions, too).
5998 	return vm_resize_area(area, newSize, false);
5999 }
6000 
6001 
6002 area_id
_user_transfer_area(area_id area,void ** userAddress,uint32 addressSpec,team_id target)6003 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6004 	team_id target)
6005 {
6006 	// filter out some unavailable values (for userland)
6007 	switch (addressSpec) {
6008 		case B_ANY_KERNEL_ADDRESS:
6009 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6010 			return B_BAD_VALUE;
6011 	}
6012 
6013 	void* address;
6014 	if (!IS_USER_ADDRESS(userAddress)
6015 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6016 		return B_BAD_ADDRESS;
6017 
6018 	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6019 	if (newArea < B_OK)
6020 		return newArea;
6021 
6022 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6023 		return B_BAD_ADDRESS;
6024 
6025 	return newArea;
6026 }
6027 
6028 
6029 area_id
_user_clone_area(const char * userName,void ** userAddress,uint32 addressSpec,uint32 protection,area_id sourceArea)6030 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6031 	uint32 protection, area_id sourceArea)
6032 {
6033 	char name[B_OS_NAME_LENGTH];
6034 	void* address;
6035 
6036 	// filter out some unavailable values (for userland)
6037 	switch (addressSpec) {
6038 		case B_ANY_KERNEL_ADDRESS:
6039 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6040 			return B_BAD_VALUE;
6041 	}
6042 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6043 		return B_BAD_VALUE;
6044 
6045 	if (!IS_USER_ADDRESS(userName)
6046 		|| !IS_USER_ADDRESS(userAddress)
6047 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6048 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6049 		return B_BAD_ADDRESS;
6050 
6051 	fix_protection(&protection);
6052 
6053 	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6054 		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6055 		false);
6056 	if (clonedArea < B_OK)
6057 		return clonedArea;
6058 
6059 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6060 		delete_area(clonedArea);
6061 		return B_BAD_ADDRESS;
6062 	}
6063 
6064 	return clonedArea;
6065 }
6066 
6067 
6068 area_id
_user_create_area(const char * userName,void ** userAddress,uint32 addressSpec,size_t size,uint32 lock,uint32 protection)6069 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6070 	size_t size, uint32 lock, uint32 protection)
6071 {
6072 	char name[B_OS_NAME_LENGTH];
6073 	void* address;
6074 
6075 	// filter out some unavailable values (for userland)
6076 	switch (addressSpec) {
6077 		case B_ANY_KERNEL_ADDRESS:
6078 		case B_ANY_KERNEL_BLOCK_ADDRESS:
6079 			return B_BAD_VALUE;
6080 	}
6081 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6082 		return B_BAD_VALUE;
6083 
6084 	if (!IS_USER_ADDRESS(userName)
6085 		|| !IS_USER_ADDRESS(userAddress)
6086 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6087 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6088 		return B_BAD_ADDRESS;
6089 
6090 	if (addressSpec == B_EXACT_ADDRESS
6091 		&& IS_KERNEL_ADDRESS(address))
6092 		return B_BAD_VALUE;
6093 
6094 	if (addressSpec == B_ANY_ADDRESS)
6095 		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6096 	if (addressSpec == B_BASE_ADDRESS)
6097 		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6098 
6099 	fix_protection(&protection);
6100 
6101 	virtual_address_restrictions virtualRestrictions = {};
6102 	virtualRestrictions.address = address;
6103 	virtualRestrictions.address_specification = addressSpec;
6104 	physical_address_restrictions physicalRestrictions = {};
6105 	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6106 		size, lock, protection, 0, 0, &virtualRestrictions,
6107 		&physicalRestrictions, false, &address);
6108 
6109 	if (area >= B_OK
6110 		&& user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6111 		delete_area(area);
6112 		return B_BAD_ADDRESS;
6113 	}
6114 
6115 	return area;
6116 }
6117 
6118 
6119 status_t
_user_delete_area(area_id area)6120 _user_delete_area(area_id area)
6121 {
6122 	// Unlike the BeOS implementation, you can now only delete areas
6123 	// that you have created yourself from userland.
6124 	// The documentation to delete_area() explicitly states that this
6125 	// will be restricted in the future, and so it will.
6126 	return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6127 }
6128 
6129 
6130 // TODO: create a BeOS style call for this!
6131 
6132 area_id
_user_map_file(const char * userName,void ** userAddress,uint32 addressSpec,size_t size,uint32 protection,uint32 mapping,bool unmapAddressRange,int fd,off_t offset)6133 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6134 	size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6135 	int fd, off_t offset)
6136 {
6137 	char name[B_OS_NAME_LENGTH];
6138 	void* address;
6139 	area_id area;
6140 
6141 	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6142 		return B_BAD_VALUE;
6143 
6144 	fix_protection(&protection);
6145 
6146 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6147 		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6148 		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6149 		return B_BAD_ADDRESS;
6150 
6151 	if (addressSpec == B_EXACT_ADDRESS) {
6152 		if ((addr_t)address + size < (addr_t)address
6153 				|| (addr_t)address % B_PAGE_SIZE != 0) {
6154 			return B_BAD_VALUE;
6155 		}
6156 		if (!IS_USER_ADDRESS(address)
6157 				|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6158 			return B_BAD_ADDRESS;
6159 		}
6160 	}
6161 
6162 	area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6163 		addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6164 		false);
6165 	if (area < B_OK)
6166 		return area;
6167 
6168 	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6169 		return B_BAD_ADDRESS;
6170 
6171 	return area;
6172 }
6173 
6174 
6175 status_t
_user_unmap_memory(void * _address,size_t size)6176 _user_unmap_memory(void* _address, size_t size)
6177 {
6178 	addr_t address = (addr_t)_address;
6179 
6180 	// check params
6181 	if (size == 0 || (addr_t)address + size < (addr_t)address
6182 		|| (addr_t)address % B_PAGE_SIZE != 0) {
6183 		return B_BAD_VALUE;
6184 	}
6185 
6186 	if (!IS_USER_ADDRESS(address)
6187 		|| !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6188 		return B_BAD_ADDRESS;
6189 	}
6190 
6191 	// Write lock the address space and ensure the address range is not wired.
6192 	AddressSpaceWriteLocker locker;
6193 	do {
6194 		status_t status = locker.SetTo(team_get_current_team_id());
6195 		if (status != B_OK)
6196 			return status;
6197 	} while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6198 			size, &locker));
6199 
6200 	// unmap
6201 	return unmap_address_range(locker.AddressSpace(), address, size, false);
6202 }
6203 
6204 
6205 status_t
_user_set_memory_protection(void * _address,size_t size,uint32 protection)6206 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6207 {
6208 	// check address range
6209 	addr_t address = (addr_t)_address;
6210 	size = PAGE_ALIGN(size);
6211 
6212 	if ((address % B_PAGE_SIZE) != 0)
6213 		return B_BAD_VALUE;
6214 	if (!is_user_address_range(_address, size)) {
6215 		// weird error code required by POSIX
6216 		return ENOMEM;
6217 	}
6218 
6219 	// extend and check protection
6220 	if ((protection & ~B_USER_PROTECTION) != 0)
6221 		return B_BAD_VALUE;
6222 
6223 	fix_protection(&protection);
6224 
6225 	// We need to write lock the address space, since we're going to play with
6226 	// the areas. Also make sure that none of the areas is wired and that we're
6227 	// actually allowed to change the protection.
6228 	AddressSpaceWriteLocker locker;
6229 
6230 	bool restart;
6231 	do {
6232 		restart = false;
6233 
6234 		status_t status = locker.SetTo(team_get_current_team_id());
6235 		if (status != B_OK)
6236 			return status;
6237 
6238 		// First round: Check whether the whole range is covered by areas and we
6239 		// are allowed to modify them.
6240 		addr_t currentAddress = address;
6241 		size_t sizeLeft = size;
6242 		while (sizeLeft > 0) {
6243 			VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6244 			if (area == NULL)
6245 				return B_NO_MEMORY;
6246 
6247 			if ((area->protection & B_KERNEL_AREA) != 0)
6248 				return B_NOT_ALLOWED;
6249 			if (area->protection_max != 0
6250 				&& (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6251 				return B_NOT_ALLOWED;
6252 			}
6253 
6254 			addr_t offset = currentAddress - area->Base();
6255 			size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6256 
6257 			AreaCacheLocker cacheLocker(area);
6258 
6259 			if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6260 					&locker, &cacheLocker)) {
6261 				restart = true;
6262 				break;
6263 			}
6264 
6265 			cacheLocker.Unlock();
6266 
6267 			currentAddress += rangeSize;
6268 			sizeLeft -= rangeSize;
6269 		}
6270 	} while (restart);
6271 
6272 	// Second round: If the protections differ from that of the area, create a
6273 	// page protection array and re-map mapped pages.
6274 	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6275 	addr_t currentAddress = address;
6276 	size_t sizeLeft = size;
6277 	while (sizeLeft > 0) {
6278 		VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6279 		if (area == NULL)
6280 			return B_NO_MEMORY;
6281 
6282 		addr_t offset = currentAddress - area->Base();
6283 		size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6284 
6285 		currentAddress += rangeSize;
6286 		sizeLeft -= rangeSize;
6287 
6288 		if (area->page_protections == NULL) {
6289 			if (area->protection == protection)
6290 				continue;
6291 			if (offset == 0 && rangeSize == area->Size()) {
6292 				// The whole area is covered: let set_area_protection handle it.
6293 				status_t status = vm_set_area_protection(area->address_space->ID(),
6294 					area->id, protection, false);
6295 				if (status != B_OK)
6296 					return status;
6297 				continue;
6298 			}
6299 
6300 			status_t status = allocate_area_page_protections(area);
6301 			if (status != B_OK)
6302 				return status;
6303 		}
6304 
6305 		// We need to lock the complete cache chain, since we potentially unmap
6306 		// pages of lower caches.
6307 		VMCache* topCache = vm_area_get_locked_cache(area);
6308 		VMCacheChainLocker cacheChainLocker(topCache);
6309 		cacheChainLocker.LockAllSourceCaches();
6310 
6311 		// Adjust the committed size, if necessary.
6312 		if (topCache->source != NULL && topCache->temporary) {
6313 			const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6314 			ssize_t commitmentChange = 0;
6315 			const off_t areaCacheBase = area->Base() - area->cache_offset;
6316 			for (addr_t pageAddress = area->Base() + offset;
6317 					pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6318 				if (topCache->LookupPage(pageAddress - areaCacheBase) != NULL) {
6319 					// This page should already be accounted for in the commitment.
6320 					continue;
6321 				}
6322 
6323 				const bool isWritable
6324 					= (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6325 
6326 				if (becomesWritable && !isWritable)
6327 					commitmentChange += B_PAGE_SIZE;
6328 				else if (!becomesWritable && isWritable)
6329 					commitmentChange -= B_PAGE_SIZE;
6330 			}
6331 
6332 			if (commitmentChange != 0) {
6333 				const off_t newCommitment = topCache->committed_size + commitmentChange;
6334 				ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
6335 				status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
6336 				if (status != B_OK)
6337 					return status;
6338 			}
6339 		}
6340 
6341 		for (addr_t pageAddress = area->Base() + offset;
6342 				pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6343 			map->Lock();
6344 
6345 			set_area_page_protection(area, pageAddress, protection);
6346 
6347 			phys_addr_t physicalAddress;
6348 			uint32 flags;
6349 
6350 			status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6351 			if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6352 				map->Unlock();
6353 				continue;
6354 			}
6355 
6356 			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6357 			if (page == NULL) {
6358 				panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6359 					"\n", area, physicalAddress);
6360 				map->Unlock();
6361 				return B_ERROR;
6362 			}
6363 
6364 			// If the page is not in the topmost cache and write access is
6365 			// requested, we have to unmap it. Otherwise we can re-map it with
6366 			// the new protection.
6367 			bool unmapPage = page->Cache() != topCache
6368 				&& (protection & B_WRITE_AREA) != 0;
6369 
6370 			if (!unmapPage)
6371 				map->ProtectPage(area, pageAddress, protection);
6372 
6373 			map->Unlock();
6374 
6375 			if (unmapPage) {
6376 				DEBUG_PAGE_ACCESS_START(page);
6377 				unmap_page(area, pageAddress);
6378 				DEBUG_PAGE_ACCESS_END(page);
6379 			}
6380 		}
6381 	}
6382 
6383 	return B_OK;
6384 }
6385 
6386 
6387 status_t
_user_sync_memory(void * _address,size_t size,uint32 flags)6388 _user_sync_memory(void* _address, size_t size, uint32 flags)
6389 {
6390 	addr_t address = (addr_t)_address;
6391 	size = PAGE_ALIGN(size);
6392 
6393 	// check params
6394 	if ((address % B_PAGE_SIZE) != 0)
6395 		return B_BAD_VALUE;
6396 	if (!is_user_address_range(_address, size)) {
6397 		// weird error code required by POSIX
6398 		return ENOMEM;
6399 	}
6400 
6401 	bool writeSync = (flags & MS_SYNC) != 0;
6402 	bool writeAsync = (flags & MS_ASYNC) != 0;
6403 	if (writeSync && writeAsync)
6404 		return B_BAD_VALUE;
6405 
6406 	if (size == 0 || (!writeSync && !writeAsync))
6407 		return B_OK;
6408 
6409 	// iterate through the range and sync all concerned areas
6410 	while (size > 0) {
6411 		// read lock the address space
6412 		AddressSpaceReadLocker locker;
6413 		status_t error = locker.SetTo(team_get_current_team_id());
6414 		if (error != B_OK)
6415 			return error;
6416 
6417 		// get the first area
6418 		VMArea* area = locker.AddressSpace()->LookupArea(address);
6419 		if (area == NULL)
6420 			return B_NO_MEMORY;
6421 
6422 		uint32 offset = address - area->Base();
6423 		size_t rangeSize = min_c(area->Size() - offset, size);
6424 		offset += area->cache_offset;
6425 
6426 		// lock the cache
6427 		AreaCacheLocker cacheLocker(area);
6428 		if (!cacheLocker)
6429 			return B_BAD_VALUE;
6430 		VMCache* cache = area->cache;
6431 
6432 		locker.Unlock();
6433 
6434 		uint32 firstPage = offset >> PAGE_SHIFT;
6435 		uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6436 
6437 		// write the pages
6438 		if (cache->type == CACHE_TYPE_VNODE) {
6439 			if (writeSync) {
6440 				// synchronous
6441 				error = vm_page_write_modified_page_range(cache, firstPage,
6442 					endPage);
6443 				if (error != B_OK)
6444 					return error;
6445 			} else {
6446 				// asynchronous
6447 				vm_page_schedule_write_page_range(cache, firstPage, endPage);
6448 				// TODO: This is probably not quite what is supposed to happen.
6449 				// Especially when a lot has to be written, it might take ages
6450 				// until it really hits the disk.
6451 			}
6452 		}
6453 
6454 		address += rangeSize;
6455 		size -= rangeSize;
6456 	}
6457 
6458 	// NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6459 	// synchronize multiple mappings of the same file. In our VM they never get
6460 	// out of sync, though, so we don't have to do anything.
6461 
6462 	return B_OK;
6463 }
6464 
6465 
6466 status_t
_user_memory_advice(void * _address,size_t size,uint32 advice)6467 _user_memory_advice(void* _address, size_t size, uint32 advice)
6468 {
6469 	addr_t address = (addr_t)_address;
6470 	if ((address % B_PAGE_SIZE) != 0)
6471 		return B_BAD_VALUE;
6472 
6473 	size = PAGE_ALIGN(size);
6474 	if (!is_user_address_range(_address, size)) {
6475 		// weird error code required by POSIX
6476 		return B_NO_MEMORY;
6477 	}
6478 
6479 	switch (advice) {
6480 		case MADV_NORMAL:
6481 		case MADV_SEQUENTIAL:
6482 		case MADV_RANDOM:
6483 		case MADV_WILLNEED:
6484 		case MADV_DONTNEED:
6485 			// TODO: Implement!
6486 			break;
6487 
6488 		case MADV_FREE:
6489 		{
6490 			AddressSpaceWriteLocker locker;
6491 			do {
6492 				status_t status = locker.SetTo(team_get_current_team_id());
6493 				if (status != B_OK)
6494 					return status;
6495 			} while (wait_if_address_range_is_wired(locker.AddressSpace(),
6496 					address, size, &locker));
6497 
6498 			discard_address_range(locker.AddressSpace(), address, size, false);
6499 			break;
6500 		}
6501 
6502 		default:
6503 			return B_BAD_VALUE;
6504 	}
6505 
6506 	return B_OK;
6507 }
6508 
6509 
6510 status_t
_user_get_memory_properties(team_id teamID,const void * address,uint32 * _protected,uint32 * _lock)6511 _user_get_memory_properties(team_id teamID, const void* address,
6512 	uint32* _protected, uint32* _lock)
6513 {
6514 	if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6515 		return B_BAD_ADDRESS;
6516 
6517 	AddressSpaceReadLocker locker;
6518 	status_t error = locker.SetTo(teamID);
6519 	if (error != B_OK)
6520 		return error;
6521 
6522 	VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6523 	if (area == NULL)
6524 		return B_NO_MEMORY;
6525 
6526 	uint32 protection = get_area_page_protection(area, (addr_t)address);
6527 	uint32 wiring = area->wiring;
6528 
6529 	locker.Unlock();
6530 
6531 	error = user_memcpy(_protected, &protection, sizeof(protection));
6532 	if (error != B_OK)
6533 		return error;
6534 
6535 	error = user_memcpy(_lock, &wiring, sizeof(wiring));
6536 
6537 	return error;
6538 }
6539 
6540 
6541 static status_t
user_set_memory_swappable(const void * _address,size_t size,bool swappable)6542 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
6543 {
6544 #if ENABLE_SWAP_SUPPORT
6545 	// check address range
6546 	addr_t address = (addr_t)_address;
6547 	size = PAGE_ALIGN(size);
6548 
6549 	if ((address % B_PAGE_SIZE) != 0)
6550 		return EINVAL;
6551 	if (!is_user_address_range(_address, size))
6552 		return EINVAL;
6553 
6554 	const addr_t endAddress = address + size;
6555 
6556 	AddressSpaceReadLocker addressSpaceLocker;
6557 	status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
6558 	if (error != B_OK)
6559 		return error;
6560 	VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
6561 
6562 	// iterate through all concerned areas
6563 	addr_t nextAddress = address;
6564 	while (nextAddress != endAddress) {
6565 		// get the next area
6566 		VMArea* area = addressSpace->LookupArea(nextAddress);
6567 		if (area == NULL) {
6568 			error = B_BAD_ADDRESS;
6569 			break;
6570 		}
6571 
6572 		const addr_t areaStart = nextAddress;
6573 		const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
6574 		nextAddress = areaEnd;
6575 
6576 		error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6577 		if (error != B_OK) {
6578 			// We don't need to unset or reset things on failure.
6579 			break;
6580 		}
6581 
6582 		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6583 		VMAnonymousCache* anonCache = NULL;
6584 		if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
6585 			// This memory will aready never be swapped. Nothing to do.
6586 		} else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
6587 			error = anonCache->SetCanSwapPages(areaStart - area->Base(),
6588 				areaEnd - areaStart, swappable);
6589 		} else {
6590 			// Some other cache type? We cannot affect anything here.
6591 			error = EINVAL;
6592 		}
6593 
6594 		cacheChainLocker.Unlock();
6595 
6596 		unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6597 		if (error != B_OK)
6598 			break;
6599 	}
6600 
6601 	return error;
6602 #else
6603 	// No swap support? Nothing to do.
6604 	return B_OK;
6605 #endif
6606 }
6607 
6608 
6609 status_t
_user_mlock(const void * _address,size_t size)6610 _user_mlock(const void* _address, size_t size)
6611 {
6612 	return user_set_memory_swappable(_address, size, false);
6613 }
6614 
6615 
6616 status_t
_user_munlock(const void * _address,size_t size)6617 _user_munlock(const void* _address, size_t size)
6618 {
6619 	// TODO: B_SHARED_AREAs need to be handled a bit differently:
6620 	// if multiple clones of an area had mlock() called on them,
6621 	// munlock() must also be called on all of them to actually unlock.
6622 	// (At present, the first munlock() will unlock all.)
6623 	// TODO: fork() should automatically unlock memory in the child.
6624 	return user_set_memory_swappable(_address, size, true);
6625 }
6626 
6627 
6628 // #pragma mark -- compatibility
6629 
6630 
6631 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6632 
6633 
6634 struct physical_entry_beos {
6635 	uint32	address;
6636 	uint32	size;
6637 };
6638 
6639 
6640 /*!	The physical_entry structure has changed. We need to translate it to the
6641 	old one.
6642 */
6643 extern "C" int32
__get_memory_map_beos(const void * _address,size_t numBytes,physical_entry_beos * table,int32 numEntries)6644 __get_memory_map_beos(const void* _address, size_t numBytes,
6645 	physical_entry_beos* table, int32 numEntries)
6646 {
6647 	if (numEntries <= 0)
6648 		return B_BAD_VALUE;
6649 
6650 	const uint8* address = (const uint8*)_address;
6651 
6652 	int32 count = 0;
6653 	while (numBytes > 0 && count < numEntries) {
6654 		physical_entry entry;
6655 		status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6656 		if (result < 0) {
6657 			if (result != B_BUFFER_OVERFLOW)
6658 				return result;
6659 		}
6660 
6661 		if (entry.address >= (phys_addr_t)1 << 32) {
6662 			panic("get_memory_map(): Address is greater 4 GB!");
6663 			return B_ERROR;
6664 		}
6665 
6666 		table[count].address = entry.address;
6667 		table[count++].size = entry.size;
6668 
6669 		address += entry.size;
6670 		numBytes -= entry.size;
6671 	}
6672 
6673 	// null-terminate the table, if possible
6674 	if (count < numEntries) {
6675 		table[count].address = 0;
6676 		table[count].size = 0;
6677 	}
6678 
6679 	return B_OK;
6680 }
6681 
6682 
6683 /*!	The type of the \a physicalAddress parameter has changed from void* to
6684 	phys_addr_t.
6685 */
6686 extern "C" area_id
__map_physical_memory_beos(const char * name,void * physicalAddress,size_t numBytes,uint32 addressSpec,uint32 protection,void ** _virtualAddress)6687 __map_physical_memory_beos(const char* name, void* physicalAddress,
6688 	size_t numBytes, uint32 addressSpec, uint32 protection,
6689 	void** _virtualAddress)
6690 {
6691 	return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6692 		addressSpec, protection, _virtualAddress);
6693 }
6694 
6695 
6696 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6697 	we meddle with the \a lock parameter to force 32 bit.
6698 */
6699 extern "C" area_id
__create_area_beos(const char * name,void ** _address,uint32 addressSpec,size_t size,uint32 lock,uint32 protection)6700 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6701 	size_t size, uint32 lock, uint32 protection)
6702 {
6703 	switch (lock) {
6704 		case B_NO_LOCK:
6705 			break;
6706 		case B_FULL_LOCK:
6707 		case B_LAZY_LOCK:
6708 			lock = B_32_BIT_FULL_LOCK;
6709 			break;
6710 		case B_CONTIGUOUS:
6711 			lock = B_32_BIT_CONTIGUOUS;
6712 			break;
6713 	}
6714 
6715 	return __create_area_haiku(name, _address, addressSpec, size, lock,
6716 		protection);
6717 }
6718 
6719 
6720 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6721 	"BASE");
6722 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6723 	"map_physical_memory@", "BASE");
6724 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6725 	"BASE");
6726 
6727 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6728 	"get_memory_map@@", "1_ALPHA3");
6729 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6730 	"map_physical_memory@@", "1_ALPHA3");
6731 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6732 	"1_ALPHA3");
6733 
6734 
6735 #else
6736 
6737 
6738 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6739 	"get_memory_map@@", "BASE");
6740 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6741 	"map_physical_memory@@", "BASE");
6742 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6743 	"BASE");
6744 
6745 
6746 #endif	// defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6747