1 /*
2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
5 *
6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
9
10
11 #include <vm/vm.h>
12
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <stdio.h>
16 #include <string.h>
17 #include <sys/mman.h>
18
19 #include <algorithm>
20
21 #include <OS.h>
22 #include <KernelExport.h>
23
24 #include <AutoDeleterDrivers.h>
25
26 #include <symbol_versioning.h>
27
28 #include <arch/cpu.h>
29 #include <arch/vm.h>
30 #include <arch/user_memory.h>
31 #include <boot/elf.h>
32 #include <boot/stage2.h>
33 #include <condition_variable.h>
34 #include <console.h>
35 #include <debug.h>
36 #include <file_cache.h>
37 #include <fs/fd.h>
38 #include <heap.h>
39 #include <kernel.h>
40 #include <int.h>
41 #include <lock.h>
42 #include <low_resource_manager.h>
43 #include <slab/Slab.h>
44 #include <smp.h>
45 #include <system_info.h>
46 #include <thread.h>
47 #include <team.h>
48 #include <tracing.h>
49 #include <util/AutoLock.h>
50 #include <util/BitUtils.h>
51 #include <util/ThreadAutoLock.h>
52 #include <vm/vm_page.h>
53 #include <vm/vm_priv.h>
54 #include <vm/VMAddressSpace.h>
55 #include <vm/VMArea.h>
56 #include <vm/VMCache.h>
57
58 #include "VMAddressSpaceLocking.h"
59 #include "VMAnonymousCache.h"
60 #include "VMAnonymousNoSwapCache.h"
61 #include "IORequest.h"
62
63
64 //#define TRACE_VM
65 //#define TRACE_FAULTS
66 #ifdef TRACE_VM
67 # define TRACE(x) dprintf x
68 #else
69 # define TRACE(x) ;
70 #endif
71 #ifdef TRACE_FAULTS
72 # define FTRACE(x) dprintf x
73 #else
74 # define FTRACE(x) ;
75 #endif
76
77
78 namespace {
79
80 class AreaCacheLocking {
81 public:
Lock(VMCache * lockable)82 inline bool Lock(VMCache* lockable)
83 {
84 return false;
85 }
86
Unlock(VMCache * lockable)87 inline void Unlock(VMCache* lockable)
88 {
89 vm_area_put_locked_cache(lockable);
90 }
91 };
92
93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
94 public:
AreaCacheLocker(VMCache * cache=NULL)95 inline AreaCacheLocker(VMCache* cache = NULL)
96 : AutoLocker<VMCache, AreaCacheLocking>(cache, true)
97 {
98 }
99
AreaCacheLocker(VMArea * area)100 inline AreaCacheLocker(VMArea* area)
101 : AutoLocker<VMCache, AreaCacheLocking>()
102 {
103 SetTo(area);
104 }
105
SetTo(VMCache * cache,bool alreadyLocked)106 inline void SetTo(VMCache* cache, bool alreadyLocked)
107 {
108 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
109 }
110
SetTo(VMArea * area)111 inline void SetTo(VMArea* area)
112 {
113 return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
114 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
115 }
116 };
117
118
119 class VMCacheChainLocker {
120 public:
VMCacheChainLocker()121 VMCacheChainLocker()
122 :
123 fTopCache(NULL),
124 fBottomCache(NULL)
125 {
126 }
127
VMCacheChainLocker(VMCache * topCache)128 VMCacheChainLocker(VMCache* topCache)
129 :
130 fTopCache(topCache),
131 fBottomCache(topCache)
132 {
133 }
134
~VMCacheChainLocker()135 ~VMCacheChainLocker()
136 {
137 Unlock();
138 }
139
SetTo(VMCache * topCache)140 void SetTo(VMCache* topCache)
141 {
142 fTopCache = topCache;
143 fBottomCache = topCache;
144
145 if (topCache != NULL)
146 topCache->SetUserData(NULL);
147 }
148
LockSourceCache()149 VMCache* LockSourceCache()
150 {
151 if (fBottomCache == NULL || fBottomCache->source == NULL)
152 return NULL;
153
154 VMCache* previousCache = fBottomCache;
155
156 fBottomCache = fBottomCache->source;
157 fBottomCache->Lock();
158 fBottomCache->AcquireRefLocked();
159 fBottomCache->SetUserData(previousCache);
160
161 return fBottomCache;
162 }
163
LockAllSourceCaches()164 void LockAllSourceCaches()
165 {
166 while (LockSourceCache() != NULL) {
167 }
168 }
169
Unlock(VMCache * exceptCache=NULL)170 void Unlock(VMCache* exceptCache = NULL)
171 {
172 if (fTopCache == NULL)
173 return;
174
175 // Unlock caches in source -> consumer direction. This is important to
176 // avoid double-locking and a reversal of locking order in case a cache
177 // is eligable for merging.
178 VMCache* cache = fBottomCache;
179 while (cache != NULL) {
180 VMCache* nextCache = (VMCache*)cache->UserData();
181 if (cache != exceptCache)
182 cache->ReleaseRefAndUnlock(cache != fTopCache);
183
184 if (cache == fTopCache)
185 break;
186
187 cache = nextCache;
188 }
189
190 fTopCache = NULL;
191 fBottomCache = NULL;
192 }
193
UnlockKeepRefs(bool keepTopCacheLocked)194 void UnlockKeepRefs(bool keepTopCacheLocked)
195 {
196 if (fTopCache == NULL)
197 return;
198
199 VMCache* nextCache = fBottomCache;
200 VMCache* cache = NULL;
201
202 while (keepTopCacheLocked
203 ? nextCache != fTopCache : cache != fTopCache) {
204 cache = nextCache;
205 nextCache = (VMCache*)cache->UserData();
206 cache->Unlock(cache != fTopCache);
207 }
208 }
209
RelockCaches(bool topCacheLocked)210 void RelockCaches(bool topCacheLocked)
211 {
212 if (fTopCache == NULL)
213 return;
214
215 VMCache* nextCache = fTopCache;
216 VMCache* cache = NULL;
217 if (topCacheLocked) {
218 cache = nextCache;
219 nextCache = cache->source;
220 }
221
222 while (cache != fBottomCache && nextCache != NULL) {
223 VMCache* consumer = cache;
224 cache = nextCache;
225 nextCache = cache->source;
226 cache->Lock();
227 cache->SetUserData(consumer);
228 }
229 }
230
231 private:
232 VMCache* fTopCache;
233 VMCache* fBottomCache;
234 };
235
236 } // namespace
237
238
239 // The memory reserve an allocation of the certain priority must not touch.
240 static const size_t kMemoryReserveForPriority[] = {
241 VM_MEMORY_RESERVE_USER, // user
242 VM_MEMORY_RESERVE_SYSTEM, // system
243 0 // VIP
244 };
245
246
247 static ObjectCache** sPageMappingsObjectCaches;
248 static uint32 sPageMappingsMask;
249
250 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
251
252 static off_t sAvailableMemory;
253 static off_t sNeededMemory;
254 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
255 static uint32 sPageFaults;
256
257 static VMPhysicalPageMapper* sPhysicalPageMapper;
258
259
260 // function declarations
261 static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
262 bool deletingAddressSpace, bool alreadyRemoved = false);
263 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
264 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
265 static status_t map_backing_store(VMAddressSpace* addressSpace,
266 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
267 int protection, int protectionMax, int mapping, uint32 flags,
268 const virtual_address_restrictions* addressRestrictions, bool kernel,
269 VMArea** _area, void** _virtualAddress);
270 static void fix_protection(uint32* protection);
271
272
273 // #pragma mark -
274
275
276 #if VM_PAGE_FAULT_TRACING
277
278 namespace VMPageFaultTracing {
279
280 class PageFaultStart : public AbstractTraceEntry {
281 public:
PageFaultStart(addr_t address,bool write,bool user,addr_t pc)282 PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
283 :
284 fAddress(address),
285 fPC(pc),
286 fWrite(write),
287 fUser(user)
288 {
289 Initialized();
290 }
291
AddDump(TraceOutput & out)292 virtual void AddDump(TraceOutput& out)
293 {
294 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
295 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
296 }
297
298 private:
299 addr_t fAddress;
300 addr_t fPC;
301 bool fWrite;
302 bool fUser;
303 };
304
305
306 // page fault errors
307 enum {
308 PAGE_FAULT_ERROR_NO_AREA = 0,
309 PAGE_FAULT_ERROR_KERNEL_ONLY,
310 PAGE_FAULT_ERROR_WRITE_PROTECTED,
311 PAGE_FAULT_ERROR_READ_PROTECTED,
312 PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
313 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
314 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
315 };
316
317
318 class PageFaultError : public AbstractTraceEntry {
319 public:
PageFaultError(area_id area,status_t error)320 PageFaultError(area_id area, status_t error)
321 :
322 fArea(area),
323 fError(error)
324 {
325 Initialized();
326 }
327
AddDump(TraceOutput & out)328 virtual void AddDump(TraceOutput& out)
329 {
330 switch (fError) {
331 case PAGE_FAULT_ERROR_NO_AREA:
332 out.Print("page fault error: no area");
333 break;
334 case PAGE_FAULT_ERROR_KERNEL_ONLY:
335 out.Print("page fault error: area: %ld, kernel only", fArea);
336 break;
337 case PAGE_FAULT_ERROR_WRITE_PROTECTED:
338 out.Print("page fault error: area: %ld, write protected",
339 fArea);
340 break;
341 case PAGE_FAULT_ERROR_READ_PROTECTED:
342 out.Print("page fault error: area: %ld, read protected", fArea);
343 break;
344 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
345 out.Print("page fault error: area: %ld, execute protected",
346 fArea);
347 break;
348 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
349 out.Print("page fault error: kernel touching bad user memory");
350 break;
351 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
352 out.Print("page fault error: no address space");
353 break;
354 default:
355 out.Print("page fault error: area: %ld, error: %s", fArea,
356 strerror(fError));
357 break;
358 }
359 }
360
361 private:
362 area_id fArea;
363 status_t fError;
364 };
365
366
367 class PageFaultDone : public AbstractTraceEntry {
368 public:
PageFaultDone(area_id area,VMCache * topCache,VMCache * cache,vm_page * page)369 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
370 vm_page* page)
371 :
372 fArea(area),
373 fTopCache(topCache),
374 fCache(cache),
375 fPage(page)
376 {
377 Initialized();
378 }
379
AddDump(TraceOutput & out)380 virtual void AddDump(TraceOutput& out)
381 {
382 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
383 "page: %p", fArea, fTopCache, fCache, fPage);
384 }
385
386 private:
387 area_id fArea;
388 VMCache* fTopCache;
389 VMCache* fCache;
390 vm_page* fPage;
391 };
392
393 } // namespace VMPageFaultTracing
394
395 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
396 #else
397 # define TPF(x) ;
398 #endif // VM_PAGE_FAULT_TRACING
399
400
401 // #pragma mark - page mappings allocation
402
403
404 static void
create_page_mappings_object_caches()405 create_page_mappings_object_caches()
406 {
407 // We want an even power of 2 smaller than the number of CPUs.
408 const int32 numCPUs = smp_get_num_cpus();
409 int32 count = next_power_of_2(numCPUs);
410 if (count > numCPUs)
411 count >>= 1;
412 sPageMappingsMask = count - 1;
413
414 sPageMappingsObjectCaches = new object_cache*[count];
415 if (sPageMappingsObjectCaches == NULL)
416 panic("failed to allocate page mappings object_cache array");
417
418 for (int32 i = 0; i < count; i++) {
419 char name[32];
420 snprintf(name, sizeof(name), "page mappings %" B_PRId32, i);
421
422 object_cache* cache = create_object_cache_etc(name,
423 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
424 NULL, NULL);
425 if (cache == NULL)
426 panic("failed to create page mappings object_cache");
427
428 object_cache_set_minimum_reserve(cache, 1024);
429 sPageMappingsObjectCaches[i] = cache;
430 }
431 }
432
433
434 static object_cache*
page_mapping_object_cache_for(page_num_t page)435 page_mapping_object_cache_for(page_num_t page)
436 {
437 return sPageMappingsObjectCaches[page & sPageMappingsMask];
438 }
439
440
441 static vm_page_mapping*
allocate_page_mapping(page_num_t page,uint32 flags=0)442 allocate_page_mapping(page_num_t page, uint32 flags = 0)
443 {
444 return (vm_page_mapping*)object_cache_alloc(page_mapping_object_cache_for(page),
445 flags);
446 }
447
448
449 void
vm_free_page_mapping(page_num_t page,vm_page_mapping * mapping,uint32 flags)450 vm_free_page_mapping(page_num_t page, vm_page_mapping* mapping, uint32 flags)
451 {
452 object_cache_free(page_mapping_object_cache_for(page), mapping, flags);
453 }
454
455
456 // #pragma mark -
457
458
459 /*! The page's cache must be locked.
460 */
461 static inline void
increment_page_wired_count(vm_page * page)462 increment_page_wired_count(vm_page* page)
463 {
464 if (!page->IsMapped())
465 atomic_add(&gMappedPagesCount, 1);
466 page->IncrementWiredCount();
467 }
468
469
470 /*! The page's cache must be locked.
471 */
472 static inline void
decrement_page_wired_count(vm_page * page)473 decrement_page_wired_count(vm_page* page)
474 {
475 page->DecrementWiredCount();
476 if (!page->IsMapped())
477 atomic_add(&gMappedPagesCount, -1);
478 }
479
480
481 static inline addr_t
virtual_page_address(VMArea * area,vm_page * page)482 virtual_page_address(VMArea* area, vm_page* page)
483 {
484 return area->Base()
485 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
486 }
487
488
489 static inline bool
is_page_in_area(VMArea * area,vm_page * page)490 is_page_in_area(VMArea* area, vm_page* page)
491 {
492 off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT);
493 return pageCacheOffsetBytes >= area->cache_offset
494 && pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size();
495 }
496
497
498 //! You need to have the address space locked when calling this function
499 static VMArea*
lookup_area(VMAddressSpace * addressSpace,area_id id)500 lookup_area(VMAddressSpace* addressSpace, area_id id)
501 {
502 VMAreas::ReadLock();
503
504 VMArea* area = VMAreas::LookupLocked(id);
505 if (area != NULL && area->address_space != addressSpace)
506 area = NULL;
507
508 VMAreas::ReadUnlock();
509
510 return area;
511 }
512
513
514 static inline size_t
area_page_protections_size(size_t areaSize)515 area_page_protections_size(size_t areaSize)
516 {
517 // In the page protections we store only the three user protections,
518 // so we use 4 bits per page.
519 return (areaSize / B_PAGE_SIZE + 1) / 2;
520 }
521
522
523 static status_t
allocate_area_page_protections(VMArea * area)524 allocate_area_page_protections(VMArea* area)
525 {
526 size_t bytes = area_page_protections_size(area->Size());
527 area->page_protections = (uint8*)malloc_etc(bytes,
528 area->address_space == VMAddressSpace::Kernel()
529 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
530 if (area->page_protections == NULL)
531 return B_NO_MEMORY;
532
533 // init the page protections for all pages to that of the area
534 uint32 areaProtection = area->protection
535 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
536 memset(area->page_protections, areaProtection | (areaProtection << 4), bytes);
537
538 // clear protections from the area
539 area->protection &= ~(B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA
540 | B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA | B_KERNEL_EXECUTE_AREA);
541 return B_OK;
542 }
543
544
545 static inline uint8*
realloc_area_page_protections(uint8 * pageProtections,size_t areaSize,uint32 allocationFlags)546 realloc_area_page_protections(uint8* pageProtections, size_t areaSize,
547 uint32 allocationFlags)
548 {
549 size_t bytes = area_page_protections_size(areaSize);
550 return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags);
551 }
552
553
554 static inline void
set_area_page_protection(VMArea * area,addr_t pageAddress,uint32 protection)555 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
556 {
557 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
558 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
559 uint8& entry = area->page_protections[pageIndex / 2];
560 if (pageIndex % 2 == 0)
561 entry = (entry & 0xf0) | protection;
562 else
563 entry = (entry & 0x0f) | (protection << 4);
564 }
565
566
567 static inline uint32
get_area_page_protection(VMArea * area,addr_t pageAddress)568 get_area_page_protection(VMArea* area, addr_t pageAddress)
569 {
570 if (area->page_protections == NULL)
571 return area->protection;
572
573 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
574 uint32 protection = area->page_protections[pageIndex / 2];
575 if (pageIndex % 2 == 0)
576 protection &= 0x0f;
577 else
578 protection >>= 4;
579
580 uint32 kernelProtection = 0;
581 if ((protection & B_READ_AREA) != 0)
582 kernelProtection |= B_KERNEL_READ_AREA;
583 if ((protection & B_WRITE_AREA) != 0)
584 kernelProtection |= B_KERNEL_WRITE_AREA;
585
586 // If this is a kernel area we return only the kernel flags.
587 if (area->address_space == VMAddressSpace::Kernel())
588 return kernelProtection;
589
590 return protection | kernelProtection;
591 }
592
593
594 /*! Computes the committed size an area's cache ought to have,
595 based on the area's page_protections and any pages already present.
596 */
597 static inline uint32
compute_area_page_commitment(VMArea * area)598 compute_area_page_commitment(VMArea* area)
599 {
600 const size_t bytes = area_page_protections_size(area->Size());
601 const bool oddPageCount = ((area->Size() / B_PAGE_SIZE) % 2) != 0;
602 size_t pages = 0;
603 for (size_t i = 0; i < bytes; i++) {
604 const uint8 protection = area->page_protections[i];
605 const off_t pageOffset = bytes * 2 * B_PAGE_SIZE;
606 if (area->cache->LookupPage(pageOffset) != NULL)
607 pages++;
608 else
609 pages += ((protection & (B_WRITE_AREA << 0)) != 0) ? 1 : 0;
610
611 if (i == (bytes - 1) && oddPageCount)
612 break;
613
614 if (area->cache->LookupPage(pageOffset + B_PAGE_SIZE) != NULL)
615 pages++;
616 else
617 pages += ((protection & (B_WRITE_AREA << 4)) != 0) ? 1 : 0;
618 }
619 return pages;
620 }
621
622
623 /*! The caller must have reserved enough pages the translation map
624 implementation might need to map this page.
625 The page's cache must be locked.
626 */
627 static status_t
map_page(VMArea * area,vm_page * page,addr_t address,uint32 protection,vm_page_reservation * reservation)628 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
629 vm_page_reservation* reservation)
630 {
631 VMTranslationMap* map = area->address_space->TranslationMap();
632
633 bool wasMapped = page->IsMapped();
634
635 if (area->wiring == B_NO_LOCK) {
636 DEBUG_PAGE_ACCESS_CHECK(page);
637
638 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
639 vm_page_mapping* mapping = allocate_page_mapping(page->physical_page_number,
640 CACHE_DONT_WAIT_FOR_MEMORY
641 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
642 if (mapping == NULL)
643 return B_NO_MEMORY;
644
645 mapping->page = page;
646 mapping->area = area;
647
648 map->Lock();
649
650 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
651 area->MemoryType(), reservation);
652
653 // insert mapping into lists
654 if (!page->IsMapped())
655 atomic_add(&gMappedPagesCount, 1);
656
657 page->mappings.Add(mapping);
658 area->mappings.Add(mapping);
659
660 map->Unlock();
661 } else {
662 DEBUG_PAGE_ACCESS_CHECK(page);
663
664 map->Lock();
665 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
666 area->MemoryType(), reservation);
667 map->Unlock();
668
669 increment_page_wired_count(page);
670 }
671
672 if (!wasMapped) {
673 // The page is mapped now, so we must not remain in the cached queue.
674 // It also makes sense to move it from the inactive to the active, since
675 // otherwise the page daemon wouldn't come to keep track of it (in idle
676 // mode) -- if the page isn't touched, it will be deactivated after a
677 // full iteration through the queue at the latest.
678 if (page->State() == PAGE_STATE_CACHED
679 || page->State() == PAGE_STATE_INACTIVE) {
680 vm_page_set_state(page, PAGE_STATE_ACTIVE);
681 }
682 }
683
684 return B_OK;
685 }
686
687
688 /*! If \a preserveModified is \c true, the caller must hold the lock of the
689 page's cache.
690 */
691 static inline bool
unmap_page(VMArea * area,addr_t virtualAddress)692 unmap_page(VMArea* area, addr_t virtualAddress)
693 {
694 return area->address_space->TranslationMap()->UnmapPage(area,
695 virtualAddress, true);
696 }
697
698
699 /*! If \a preserveModified is \c true, the caller must hold the lock of all
700 mapped pages' caches.
701 */
702 static inline void
unmap_pages(VMArea * area,addr_t base,size_t size)703 unmap_pages(VMArea* area, addr_t base, size_t size)
704 {
705 area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
706 }
707
708
709 static inline bool
intersect_area(VMArea * area,addr_t & address,addr_t & size,addr_t & offset)710 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset)
711 {
712 if (address < area->Base()) {
713 offset = area->Base() - address;
714 if (offset >= size)
715 return false;
716
717 address = area->Base();
718 size -= offset;
719 offset = 0;
720 if (size > area->Size())
721 size = area->Size();
722
723 return true;
724 }
725
726 offset = address - area->Base();
727 if (offset >= area->Size())
728 return false;
729
730 if (size >= area->Size() - offset)
731 size = area->Size() - offset;
732
733 return true;
734 }
735
736
737 /*! Cuts a piece out of an area. If the given cut range covers the complete
738 area, it is deleted. If it covers the beginning or the end, the area is
739 resized accordingly. If the range covers some part in the middle of the
740 area, it is split in two; in this case the second area is returned via
741 \a _secondArea (the variable is left untouched in the other cases).
742 The address space must be write locked.
743 The caller must ensure that no part of the given range is wired.
744 */
745 static status_t
cut_area(VMAddressSpace * addressSpace,VMArea * area,addr_t address,addr_t size,VMArea ** _secondArea,bool kernel)746 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
747 addr_t size, VMArea** _secondArea, bool kernel)
748 {
749 addr_t offset;
750 if (!intersect_area(area, address, size, offset))
751 return B_OK;
752
753 // Is the area fully covered?
754 if (address == area->Base() && size == area->Size()) {
755 delete_area(addressSpace, area, false);
756 return B_OK;
757 }
758
759 int priority;
760 uint32 allocationFlags;
761 if (addressSpace == VMAddressSpace::Kernel()) {
762 priority = VM_PRIORITY_SYSTEM;
763 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
764 | HEAP_DONT_LOCK_KERNEL_SPACE;
765 } else {
766 priority = VM_PRIORITY_USER;
767 allocationFlags = 0;
768 }
769
770 VMCache* cache = vm_area_get_locked_cache(area);
771 VMCacheChainLocker cacheChainLocker(cache);
772 cacheChainLocker.LockAllSourceCaches();
773
774 // If no one else uses the area's cache and it's an anonymous cache, we can
775 // resize or split it, too.
776 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL
777 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM;
778
779 const addr_t oldSize = area->Size();
780
781 // Cut the end only?
782 if (offset > 0 && size == area->Size() - offset) {
783 status_t error = addressSpace->ShrinkAreaTail(area, offset,
784 allocationFlags);
785 if (error != B_OK)
786 return error;
787
788 if (area->page_protections != NULL) {
789 uint8* newProtections = realloc_area_page_protections(
790 area->page_protections, area->Size(), allocationFlags);
791
792 if (newProtections == NULL) {
793 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
794 return B_NO_MEMORY;
795 }
796
797 area->page_protections = newProtections;
798 }
799
800 // unmap pages
801 unmap_pages(area, address, size);
802
803 if (onlyCacheUser) {
804 // Since VMCache::Resize() can temporarily drop the lock, we must
805 // unlock all lower caches to prevent locking order inversion.
806 cacheChainLocker.Unlock(cache);
807 cache->Resize(cache->virtual_base + offset, priority);
808 }
809
810 if (area->page_protections != NULL) {
811 // Resize() adjusts the commitment, so we must do this after that.
812 const size_t newCommitmentPages = compute_area_page_commitment(area);
813 cache->Commit(newCommitmentPages * B_PAGE_SIZE, VM_PRIORITY_USER);
814 }
815
816 if (onlyCacheUser)
817 cache->ReleaseRefAndUnlock();
818 return B_OK;
819 }
820
821 // Cut the beginning only?
822 if (area->Base() == address) {
823 uint8* newProtections = NULL;
824 if (area->page_protections != NULL) {
825 // Allocate all memory before shifting, as the shift might lose some bits.
826 newProtections = realloc_area_page_protections(NULL, area->Size(),
827 allocationFlags);
828
829 if (newProtections == NULL)
830 return B_NO_MEMORY;
831 }
832
833 // resize the area
834 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size,
835 allocationFlags);
836 if (error != B_OK) {
837 free_etc(newProtections, allocationFlags);
838 return error;
839 }
840
841 if (area->page_protections != NULL) {
842 size_t oldBytes = area_page_protections_size(oldSize);
843 ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE;
844 bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4));
845
846 size_t bytes = area_page_protections_size(area->Size());
847 memcpy(newProtections, area->page_protections, bytes);
848 free_etc(area->page_protections, allocationFlags);
849 area->page_protections = newProtections;
850 }
851
852 // unmap pages
853 unmap_pages(area, address, size);
854
855 if (onlyCacheUser) {
856 // Since VMCache::Rebase() can temporarily drop the lock, we must
857 // unlock all lower caches to prevent locking order inversion.
858 cacheChainLocker.Unlock(cache);
859 cache->Rebase(cache->virtual_base + size, priority);
860 }
861
862 if (area->page_protections != NULL) {
863 // Rebase() adjusts the commitment, so we must do this after that.
864 const size_t newCommitmentPages = compute_area_page_commitment(area);
865 cache->Commit(newCommitmentPages * B_PAGE_SIZE, VM_PRIORITY_USER);
866 }
867
868 if (onlyCacheUser)
869 cache->ReleaseRefAndUnlock();
870
871 area->cache_offset += size;
872 return B_OK;
873 }
874
875 // The tough part -- cut a piece out of the middle of the area.
876 // We do that by shrinking the area to the begin section and creating a
877 // new area for the end section.
878 addr_t firstNewSize = offset;
879 addr_t secondBase = address + size;
880 addr_t secondSize = area->Size() - offset - size;
881
882 // unmap pages
883 unmap_pages(area, address, area->Size() - firstNewSize);
884
885 // resize the area
886 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
887 allocationFlags);
888 if (error != B_OK)
889 return error;
890
891 uint8* areaNewProtections = NULL;
892 uint8* secondAreaNewProtections = NULL;
893
894 // Try to allocate the new memory before making some hard to reverse
895 // changes.
896 if (area->page_protections != NULL) {
897 areaNewProtections = realloc_area_page_protections(NULL, area->Size(),
898 allocationFlags);
899 secondAreaNewProtections = realloc_area_page_protections(NULL, secondSize,
900 allocationFlags);
901
902 if (areaNewProtections == NULL || secondAreaNewProtections == NULL) {
903 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
904 free_etc(areaNewProtections, allocationFlags);
905 free_etc(secondAreaNewProtections, allocationFlags);
906 return B_NO_MEMORY;
907 }
908 }
909
910 virtual_address_restrictions addressRestrictions = {};
911 addressRestrictions.address = (void*)secondBase;
912 addressRestrictions.address_specification = B_EXACT_ADDRESS;
913 VMArea* secondArea;
914 AutoLocker<VMCache> areaCacheLocker, secondCacheLocker;
915
916 if (onlyCacheUser) {
917 // Create a new cache for the second area.
918 VMCache* secondCache;
919 error = VMCacheFactory::CreateAnonymousCache(secondCache,
920 area->protection & B_OVERCOMMITTING_AREA, 0, 0,
921 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority);
922 if (error != B_OK) {
923 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
924 free_etc(areaNewProtections, allocationFlags);
925 free_etc(secondAreaNewProtections, allocationFlags);
926 return error;
927 }
928
929 secondCache->Lock();
930 secondCacheLocker.SetTo(secondCache, true);
931 secondCache->temporary = cache->temporary;
932 secondCache->virtual_base = area->cache_offset;
933 secondCache->virtual_end = area->cache_offset + secondSize;
934
935 // Transfer the concerned pages from the first cache.
936 off_t adoptOffset = area->cache_offset + secondBase - area->Base();
937 error = secondCache->Adopt(cache, adoptOffset, secondSize,
938 area->cache_offset);
939
940 if (error == B_OK) {
941 // Since VMCache::Resize() can temporarily drop the lock, we must
942 // unlock all lower caches to prevent locking order inversion.
943 cacheChainLocker.Unlock(cache);
944 areaCacheLocker.SetTo(cache, true);
945 cache->Resize(cache->virtual_base + firstNewSize, priority);
946 // Don't unlock the cache yet because we might have to resize it back.
947 // (Or we might have to modify its commitment, if we have page_protections.)
948
949 // Map the second area.
950 error = map_backing_store(addressSpace, secondCache,
951 area->cache_offset, area->name, secondSize, area->wiring,
952 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0,
953 &addressRestrictions, kernel, &secondArea, NULL);
954 }
955
956 if (error != B_OK) {
957 // Restore the original cache.
958 cache->Resize(cache->virtual_base + oldSize, priority);
959
960 // Move the pages back.
961 status_t readoptStatus = cache->Adopt(secondCache,
962 area->cache_offset, secondSize, adoptOffset);
963 if (readoptStatus != B_OK) {
964 // Some (swap) pages have not been moved back and will be lost
965 // once the second cache is deleted.
966 panic("failed to restore cache range: %s",
967 strerror(readoptStatus));
968
969 // TODO: Handle out of memory cases by freeing memory and
970 // retrying.
971 }
972
973 cache->ReleaseRefLocked();
974 secondCache->ReleaseRefLocked();
975 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
976 free_etc(areaNewProtections, allocationFlags);
977 free_etc(secondAreaNewProtections, allocationFlags);
978 return error;
979 }
980
981 cache->ReleaseRefLocked();
982 } else {
983 // Reuse the existing cache.
984 error = map_backing_store(addressSpace, cache, area->cache_offset
985 + (secondBase - area->Base()),
986 area->name, secondSize, area->wiring, area->protection,
987 area->protection_max, REGION_NO_PRIVATE_MAP, 0,
988 &addressRestrictions, kernel, &secondArea, NULL);
989 if (error != B_OK) {
990 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
991 free_etc(areaNewProtections, allocationFlags);
992 free_etc(secondAreaNewProtections, allocationFlags);
993 return error;
994 }
995
996 // We need a cache reference for the new area.
997 cache->AcquireRefLocked();
998 }
999
1000 if (area->page_protections != NULL) {
1001 // Copy the protection bits of the first area.
1002 const size_t areaBytes = area_page_protections_size(area->Size());
1003 memcpy(areaNewProtections, area->page_protections, areaBytes);
1004 uint8* areaOldProtections = area->page_protections;
1005 area->page_protections = areaNewProtections;
1006
1007 // Shift the protection bits of the second area to the start of
1008 // the old array.
1009 const size_t oldBytes = area_page_protections_size(oldSize);
1010 addr_t secondAreaOffset = secondBase - area->Base();
1011 ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE;
1012 bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4));
1013
1014 // Copy the protection bits of the second area.
1015 const size_t secondAreaBytes = area_page_protections_size(secondSize);
1016 memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes);
1017 secondArea->page_protections = secondAreaNewProtections;
1018
1019 // We don't need this anymore.
1020 free_etc(areaOldProtections, allocationFlags);
1021
1022 // Shrink commitments.
1023 const size_t areaCommitPages = compute_area_page_commitment(area);
1024 area->cache->Commit(areaCommitPages * B_PAGE_SIZE, VM_PRIORITY_USER);
1025
1026 const size_t secondCommitPages = compute_area_page_commitment(secondArea);
1027 secondArea->cache->Commit(secondCommitPages * B_PAGE_SIZE, VM_PRIORITY_USER);
1028
1029 // Set the correct page protections for the second area.
1030 VMTranslationMap* map = addressSpace->TranslationMap();
1031 map->Lock();
1032 for (VMCachePagesTree::Iterator it
1033 = secondArea->cache->pages.GetIterator();
1034 vm_page* page = it.Next();) {
1035 if (is_page_in_area(secondArea, page)) {
1036 addr_t address = virtual_page_address(secondArea, page);
1037 uint32 pageProtection
1038 = get_area_page_protection(secondArea, address);
1039 map->ProtectPage(secondArea, address, pageProtection);
1040 }
1041 }
1042 map->Unlock();
1043 }
1044
1045 if (_secondArea != NULL)
1046 *_secondArea = secondArea;
1047
1048 return B_OK;
1049 }
1050
1051
1052 /*! Deletes or cuts all areas in the given address range.
1053 The address space must be write-locked.
1054 The caller must ensure that no part of the given range is wired.
1055 */
1056 static status_t
unmap_address_range(VMAddressSpace * addressSpace,addr_t address,addr_t size,bool kernel)1057 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1058 bool kernel)
1059 {
1060 size = PAGE_ALIGN(size);
1061
1062 // Check, whether the caller is allowed to modify the concerned areas.
1063 if (!kernel) {
1064 for (VMAddressSpace::AreaRangeIterator it
1065 = addressSpace->GetAreaRangeIterator(address, size);
1066 VMArea* area = it.Next();) {
1067
1068 if ((area->protection & B_KERNEL_AREA) != 0) {
1069 dprintf("unmap_address_range: team %" B_PRId32 " tried to "
1070 "unmap range of kernel area %" B_PRId32 " (%s)\n",
1071 team_get_current_team_id(), area->id, area->name);
1072 return B_NOT_ALLOWED;
1073 }
1074 }
1075 }
1076
1077 for (VMAddressSpace::AreaRangeIterator it
1078 = addressSpace->GetAreaRangeIterator(address, size);
1079 VMArea* area = it.Next();) {
1080
1081 status_t error = cut_area(addressSpace, area, address, size, NULL,
1082 kernel);
1083 if (error != B_OK)
1084 return error;
1085 // Failing after already messing with areas is ugly, but we
1086 // can't do anything about it.
1087 }
1088
1089 return B_OK;
1090 }
1091
1092
1093 static status_t
discard_area_range(VMArea * area,addr_t address,addr_t size)1094 discard_area_range(VMArea* area, addr_t address, addr_t size)
1095 {
1096 addr_t offset;
1097 if (!intersect_area(area, address, size, offset))
1098 return B_OK;
1099
1100 // If someone else uses the area's cache or it's not an anonymous cache, we
1101 // can't discard.
1102 VMCache* cache = vm_area_get_locked_cache(area);
1103 if (cache->areas != area || area->cache_next != NULL
1104 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) {
1105 return B_OK;
1106 }
1107
1108 VMCacheChainLocker cacheChainLocker(cache);
1109 cacheChainLocker.LockAllSourceCaches();
1110
1111 unmap_pages(area, address, size);
1112
1113 // Since VMCache::Discard() can temporarily drop the lock, we must
1114 // unlock all lower caches to prevent locking order inversion.
1115 cacheChainLocker.Unlock(cache);
1116 cache->Discard(cache->virtual_base + offset, size);
1117 cache->ReleaseRefAndUnlock();
1118
1119 return B_OK;
1120 }
1121
1122
1123 static status_t
discard_address_range(VMAddressSpace * addressSpace,addr_t address,addr_t size,bool kernel)1124 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
1125 bool kernel)
1126 {
1127 for (VMAddressSpace::AreaRangeIterator it
1128 = addressSpace->GetAreaRangeIterator(address, size);
1129 VMArea* area = it.Next();) {
1130 status_t error = discard_area_range(area, address, size);
1131 if (error != B_OK)
1132 return error;
1133 }
1134
1135 return B_OK;
1136 }
1137
1138
1139 /*! You need to hold the lock of the cache and the write lock of the address
1140 space when calling this function.
1141 Note, that in case of error your cache will be temporarily unlocked.
1142 If \a addressSpec is \c B_EXACT_ADDRESS and the
1143 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
1144 that no part of the specified address range (base \c *_virtualAddress, size
1145 \a size) is wired. The cache will also be temporarily unlocked.
1146 */
1147 static status_t
map_backing_store(VMAddressSpace * addressSpace,VMCache * cache,off_t offset,const char * areaName,addr_t size,int wiring,int protection,int protectionMax,int mapping,uint32 flags,const virtual_address_restrictions * addressRestrictions,bool kernel,VMArea ** _area,void ** _virtualAddress)1148 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
1149 const char* areaName, addr_t size, int wiring, int protection,
1150 int protectionMax, int mapping,
1151 uint32 flags, const virtual_address_restrictions* addressRestrictions,
1152 bool kernel, VMArea** _area, void** _virtualAddress)
1153 {
1154 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
1155 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
1156 ", protection %d, protectionMax %d, area %p, areaName '%s'\n",
1157 addressSpace, cache, addressRestrictions->address, offset, size,
1158 addressRestrictions->address_specification, wiring, protection,
1159 protectionMax, _area, areaName));
1160 cache->AssertLocked();
1161
1162 if (size == 0) {
1163 #if KDEBUG
1164 panic("map_backing_store(): called with size=0 for area '%s'!",
1165 areaName);
1166 #endif
1167 return B_BAD_VALUE;
1168 }
1169 if (offset < 0)
1170 return B_BAD_VALUE;
1171
1172 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1173 | HEAP_DONT_LOCK_KERNEL_SPACE;
1174 int priority;
1175 if (addressSpace != VMAddressSpace::Kernel()) {
1176 priority = VM_PRIORITY_USER;
1177 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
1178 priority = VM_PRIORITY_VIP;
1179 allocationFlags |= HEAP_PRIORITY_VIP;
1180 } else
1181 priority = VM_PRIORITY_SYSTEM;
1182
1183 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
1184 allocationFlags);
1185 if (mapping != REGION_PRIVATE_MAP)
1186 area->protection_max = protectionMax & B_USER_PROTECTION;
1187 if (area == NULL)
1188 return B_NO_MEMORY;
1189
1190 status_t status;
1191
1192 // if this is a private map, we need to create a new cache
1193 // to handle the private copies of pages as they are written to
1194 VMCache* sourceCache = cache;
1195 if (mapping == REGION_PRIVATE_MAP) {
1196 VMCache* newCache;
1197
1198 // create an anonymous cache
1199 status = VMCacheFactory::CreateAnonymousCache(newCache,
1200 (protection & B_STACK_AREA) != 0
1201 || (protection & B_OVERCOMMITTING_AREA) != 0, 0,
1202 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
1203 if (status != B_OK)
1204 goto err1;
1205
1206 newCache->Lock();
1207 newCache->temporary = 1;
1208 newCache->virtual_base = offset;
1209 newCache->virtual_end = offset + size;
1210
1211 cache->AddConsumer(newCache);
1212
1213 cache = newCache;
1214 }
1215
1216 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
1217 status = cache->SetMinimalCommitment(size, priority);
1218 if (status != B_OK)
1219 goto err2;
1220 }
1221
1222 // check to see if this address space has entered DELETE state
1223 if (addressSpace->IsBeingDeleted()) {
1224 // okay, someone is trying to delete this address space now, so we can't
1225 // insert the area, so back out
1226 status = B_BAD_TEAM_ID;
1227 goto err2;
1228 }
1229
1230 if (addressRestrictions->address_specification == B_EXACT_ADDRESS
1231 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
1232 // temporarily unlock the current cache since it might be mapped to
1233 // some existing area, and unmap_address_range also needs to lock that
1234 // cache to delete the area.
1235 cache->Unlock();
1236 status = unmap_address_range(addressSpace,
1237 (addr_t)addressRestrictions->address, size, kernel);
1238 cache->Lock();
1239 if (status != B_OK)
1240 goto err2;
1241 }
1242
1243 status = addressSpace->InsertArea(area, size, addressRestrictions,
1244 allocationFlags, _virtualAddress);
1245 if (status == B_NO_MEMORY
1246 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
1247 // Due to how many locks are held, we cannot wait here for space to be
1248 // freed up, but we can at least notify the low_resource handler.
1249 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0);
1250 }
1251 if (status != B_OK)
1252 goto err2;
1253
1254 // attach the cache to the area
1255 area->cache = cache;
1256 area->cache_offset = offset;
1257
1258 // point the cache back to the area
1259 cache->InsertAreaLocked(area);
1260 if (mapping == REGION_PRIVATE_MAP)
1261 cache->Unlock();
1262
1263 // insert the area in the global areas map
1264 status = VMAreas::Insert(area);
1265 if (status != B_OK)
1266 goto err3;
1267
1268 // grab a ref to the address space (the area holds this)
1269 addressSpace->Get();
1270
1271 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
1272 // cache, sourceCache, areaName, area);
1273
1274 *_area = area;
1275 return B_OK;
1276
1277 err3:
1278 cache->Lock();
1279 cache->RemoveArea(area);
1280 area->cache = NULL;
1281 err2:
1282 if (mapping == REGION_PRIVATE_MAP) {
1283 // We created this cache, so we must delete it again. Note, that we
1284 // need to temporarily unlock the source cache or we'll otherwise
1285 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
1286 sourceCache->Unlock();
1287 cache->ReleaseRefAndUnlock();
1288 sourceCache->Lock();
1289 }
1290 err1:
1291 addressSpace->DeleteArea(area, allocationFlags);
1292 return status;
1293 }
1294
1295
1296 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
1297 locker1, locker2).
1298 */
1299 template<typename LockerType1, typename LockerType2>
1300 static inline bool
wait_if_area_is_wired(VMArea * area,LockerType1 * locker1,LockerType2 * locker2)1301 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
1302 {
1303 area->cache->AssertLocked();
1304
1305 VMAreaUnwiredWaiter waiter;
1306 if (!area->AddWaiterIfWired(&waiter))
1307 return false;
1308
1309 // unlock everything and wait
1310 if (locker1 != NULL)
1311 locker1->Unlock();
1312 if (locker2 != NULL)
1313 locker2->Unlock();
1314
1315 waiter.waitEntry.Wait();
1316
1317 return true;
1318 }
1319
1320
1321 /*! Checks whether the given area has any wired ranges intersecting with the
1322 specified range and waits, if so.
1323
1324 When it has to wait, the function calls \c Unlock() on both \a locker1
1325 and \a locker2, if given.
1326 The area's top cache must be locked and must be unlocked as a side effect
1327 of calling \c Unlock() on either \a locker1 or \a locker2.
1328
1329 If the function does not have to wait it does not modify or unlock any
1330 object.
1331
1332 \param area The area to be checked.
1333 \param base The base address of the range to check.
1334 \param size The size of the address range to check.
1335 \param locker1 An object to be unlocked when before starting to wait (may
1336 be \c NULL).
1337 \param locker2 An object to be unlocked when before starting to wait (may
1338 be \c NULL).
1339 \return \c true, if the function had to wait, \c false otherwise.
1340 */
1341 template<typename LockerType1, typename LockerType2>
1342 static inline bool
wait_if_area_range_is_wired(VMArea * area,addr_t base,size_t size,LockerType1 * locker1,LockerType2 * locker2)1343 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
1344 LockerType1* locker1, LockerType2* locker2)
1345 {
1346 area->cache->AssertLocked();
1347
1348 VMAreaUnwiredWaiter waiter;
1349 if (!area->AddWaiterIfWired(&waiter, base, size))
1350 return false;
1351
1352 // unlock everything and wait
1353 if (locker1 != NULL)
1354 locker1->Unlock();
1355 if (locker2 != NULL)
1356 locker2->Unlock();
1357
1358 waiter.waitEntry.Wait();
1359
1360 return true;
1361 }
1362
1363
1364 /*! Checks whether the given address space has any wired ranges intersecting
1365 with the specified range and waits, if so.
1366
1367 Similar to wait_if_area_range_is_wired(), with the following differences:
1368 - All areas intersecting with the range are checked (respectively all until
1369 one is found that contains a wired range intersecting with the given
1370 range).
1371 - The given address space must at least be read-locked and must be unlocked
1372 when \c Unlock() is called on \a locker.
1373 - None of the areas' caches are allowed to be locked.
1374 */
1375 template<typename LockerType>
1376 static inline bool
wait_if_address_range_is_wired(VMAddressSpace * addressSpace,addr_t base,size_t size,LockerType * locker)1377 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1378 size_t size, LockerType* locker)
1379 {
1380 for (VMAddressSpace::AreaRangeIterator it
1381 = addressSpace->GetAreaRangeIterator(base, size);
1382 VMArea* area = it.Next();) {
1383
1384 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1385
1386 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1387 return true;
1388 }
1389
1390 return false;
1391 }
1392
1393
1394 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection().
1395 It must be called in a situation where the kernel address space may be
1396 locked.
1397 */
1398 status_t
vm_prepare_kernel_area_debug_protection(area_id id,void ** cookie)1399 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1400 {
1401 AddressSpaceReadLocker locker;
1402 VMArea* area;
1403 status_t status = locker.SetFromArea(id, area);
1404 if (status != B_OK)
1405 return status;
1406
1407 if (area->page_protections == NULL) {
1408 status = allocate_area_page_protections(area);
1409 if (status != B_OK)
1410 return status;
1411 }
1412
1413 *cookie = (void*)area;
1414 return B_OK;
1415 }
1416
1417
1418 /*! This is a debug helper function that can only be used with very specific
1419 use cases.
1420 Sets protection for the given address range to the protection specified.
1421 If \a protection is 0 then the involved pages will be marked non-present
1422 in the translation map to cause a fault on access. The pages aren't
1423 actually unmapped however so that they can be marked present again with
1424 additional calls to this function. For this to work the area must be
1425 fully locked in memory so that the pages aren't otherwise touched.
1426 This function does not lock the kernel address space and needs to be
1427 supplied with a \a cookie retrieved from a successful call to
1428 vm_prepare_kernel_area_debug_protection().
1429 */
1430 status_t
vm_set_kernel_area_debug_protection(void * cookie,void * _address,size_t size,uint32 protection)1431 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1432 uint32 protection)
1433 {
1434 // check address range
1435 addr_t address = (addr_t)_address;
1436 size = PAGE_ALIGN(size);
1437
1438 if ((address % B_PAGE_SIZE) != 0
1439 || (addr_t)address + size < (addr_t)address
1440 || !IS_KERNEL_ADDRESS(address)
1441 || !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1442 return B_BAD_VALUE;
1443 }
1444
1445 // Translate the kernel protection to user protection as we only store that.
1446 if ((protection & B_KERNEL_READ_AREA) != 0)
1447 protection |= B_READ_AREA;
1448 if ((protection & B_KERNEL_WRITE_AREA) != 0)
1449 protection |= B_WRITE_AREA;
1450
1451 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1452 VMTranslationMap* map = addressSpace->TranslationMap();
1453 VMArea* area = (VMArea*)cookie;
1454
1455 addr_t offset = address - area->Base();
1456 if (area->Size() - offset < size) {
1457 panic("protect range not fully within supplied area");
1458 return B_BAD_VALUE;
1459 }
1460
1461 if (area->page_protections == NULL) {
1462 panic("area has no page protections");
1463 return B_BAD_VALUE;
1464 }
1465
1466 // Invalidate the mapping entries so any access to them will fault or
1467 // restore the mapping entries unchanged so that lookup will success again.
1468 map->Lock();
1469 map->DebugMarkRangePresent(address, address + size, protection != 0);
1470 map->Unlock();
1471
1472 // And set the proper page protections so that the fault case will actually
1473 // fail and not simply try to map a new page.
1474 for (addr_t pageAddress = address; pageAddress < address + size;
1475 pageAddress += B_PAGE_SIZE) {
1476 set_area_page_protection(area, pageAddress, protection);
1477 }
1478
1479 return B_OK;
1480 }
1481
1482
1483 status_t
vm_block_address_range(const char * name,void * address,addr_t size)1484 vm_block_address_range(const char* name, void* address, addr_t size)
1485 {
1486 if (!arch_vm_supports_protection(0))
1487 return B_NOT_SUPPORTED;
1488
1489 AddressSpaceWriteLocker locker;
1490 status_t status = locker.SetTo(VMAddressSpace::KernelID());
1491 if (status != B_OK)
1492 return status;
1493
1494 VMAddressSpace* addressSpace = locker.AddressSpace();
1495
1496 // create an anonymous cache
1497 VMCache* cache;
1498 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1499 VM_PRIORITY_SYSTEM);
1500 if (status != B_OK)
1501 return status;
1502
1503 cache->temporary = 1;
1504 cache->virtual_end = size;
1505 cache->Lock();
1506
1507 VMArea* area;
1508 virtual_address_restrictions addressRestrictions = {};
1509 addressRestrictions.address = address;
1510 addressRestrictions.address_specification = B_EXACT_ADDRESS;
1511 status = map_backing_store(addressSpace, cache, 0, name, size,
1512 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions,
1513 true, &area, NULL);
1514 if (status != B_OK) {
1515 cache->ReleaseRefAndUnlock();
1516 return status;
1517 }
1518
1519 cache->Unlock();
1520 area->cache_type = CACHE_TYPE_RAM;
1521 return area->id;
1522 }
1523
1524
1525 status_t
vm_unreserve_address_range(team_id team,void * address,addr_t size)1526 vm_unreserve_address_range(team_id team, void* address, addr_t size)
1527 {
1528 AddressSpaceWriteLocker locker(team);
1529 if (!locker.IsLocked())
1530 return B_BAD_TEAM_ID;
1531
1532 VMAddressSpace* addressSpace = locker.AddressSpace();
1533 return addressSpace->UnreserveAddressRange((addr_t)address, size,
1534 addressSpace == VMAddressSpace::Kernel()
1535 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1536 }
1537
1538
1539 status_t
vm_reserve_address_range(team_id team,void ** _address,uint32 addressSpec,addr_t size,uint32 flags)1540 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1541 addr_t size, uint32 flags)
1542 {
1543 if (size == 0)
1544 return B_BAD_VALUE;
1545
1546 AddressSpaceWriteLocker locker(team);
1547 if (!locker.IsLocked())
1548 return B_BAD_TEAM_ID;
1549
1550 virtual_address_restrictions addressRestrictions = {};
1551 addressRestrictions.address = *_address;
1552 addressRestrictions.address_specification = addressSpec;
1553 VMAddressSpace* addressSpace = locker.AddressSpace();
1554 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1555 addressSpace == VMAddressSpace::Kernel()
1556 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1557 _address);
1558 }
1559
1560
1561 area_id
vm_create_anonymous_area(team_id team,const char * name,addr_t size,uint32 wiring,uint32 protection,uint32 flags,addr_t guardSize,const virtual_address_restrictions * virtualAddressRestrictions,const physical_address_restrictions * physicalAddressRestrictions,bool kernel,void ** _address)1562 vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1563 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1564 const virtual_address_restrictions* virtualAddressRestrictions,
1565 const physical_address_restrictions* physicalAddressRestrictions,
1566 bool kernel, void** _address)
1567 {
1568 VMArea* area;
1569 VMCache* cache;
1570 vm_page* page = NULL;
1571 bool isStack = (protection & B_STACK_AREA) != 0;
1572 page_num_t guardPages;
1573 bool canOvercommit = false;
1574 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1575 ? VM_PAGE_ALLOC_CLEAR : 0;
1576
1577 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1578 team, name, size));
1579
1580 size = PAGE_ALIGN(size);
1581 guardSize = PAGE_ALIGN(guardSize);
1582 guardPages = guardSize / B_PAGE_SIZE;
1583
1584 if (size == 0 || size < guardSize)
1585 return B_BAD_VALUE;
1586 if (!arch_vm_supports_protection(protection))
1587 return B_NOT_SUPPORTED;
1588
1589 if (team == B_CURRENT_TEAM)
1590 team = VMAddressSpace::CurrentID();
1591 if (team < 0)
1592 return B_BAD_TEAM_ID;
1593
1594 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1595 canOvercommit = true;
1596
1597 #ifdef DEBUG_KERNEL_STACKS
1598 if ((protection & B_KERNEL_STACK_AREA) != 0)
1599 isStack = true;
1600 #endif
1601
1602 // check parameters
1603 switch (virtualAddressRestrictions->address_specification) {
1604 case B_ANY_ADDRESS:
1605 case B_EXACT_ADDRESS:
1606 case B_BASE_ADDRESS:
1607 case B_ANY_KERNEL_ADDRESS:
1608 case B_ANY_KERNEL_BLOCK_ADDRESS:
1609 case B_RANDOMIZED_ANY_ADDRESS:
1610 case B_RANDOMIZED_BASE_ADDRESS:
1611 break;
1612
1613 default:
1614 return B_BAD_VALUE;
1615 }
1616
1617 // If low or high physical address restrictions are given, we force
1618 // B_CONTIGUOUS wiring, since only then we'll use
1619 // vm_page_allocate_page_run() which deals with those restrictions.
1620 if (physicalAddressRestrictions->low_address != 0
1621 || physicalAddressRestrictions->high_address != 0) {
1622 wiring = B_CONTIGUOUS;
1623 }
1624
1625 physical_address_restrictions stackPhysicalRestrictions;
1626 bool doReserveMemory = false;
1627 switch (wiring) {
1628 case B_NO_LOCK:
1629 break;
1630 case B_FULL_LOCK:
1631 case B_LAZY_LOCK:
1632 case B_CONTIGUOUS:
1633 doReserveMemory = true;
1634 break;
1635 case B_ALREADY_WIRED:
1636 break;
1637 case B_LOMEM:
1638 stackPhysicalRestrictions = *physicalAddressRestrictions;
1639 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1640 physicalAddressRestrictions = &stackPhysicalRestrictions;
1641 wiring = B_CONTIGUOUS;
1642 doReserveMemory = true;
1643 break;
1644 case B_32_BIT_FULL_LOCK:
1645 if (B_HAIKU_PHYSICAL_BITS <= 32
1646 || (uint64)vm_page_max_address() < (uint64)1 << 32) {
1647 wiring = B_FULL_LOCK;
1648 doReserveMemory = true;
1649 break;
1650 }
1651 // TODO: We don't really support this mode efficiently. Just fall
1652 // through for now ...
1653 case B_32_BIT_CONTIGUOUS:
1654 #if B_HAIKU_PHYSICAL_BITS > 32
1655 if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1656 stackPhysicalRestrictions = *physicalAddressRestrictions;
1657 stackPhysicalRestrictions.high_address
1658 = (phys_addr_t)1 << 32;
1659 physicalAddressRestrictions = &stackPhysicalRestrictions;
1660 }
1661 #endif
1662 wiring = B_CONTIGUOUS;
1663 doReserveMemory = true;
1664 break;
1665 default:
1666 return B_BAD_VALUE;
1667 }
1668
1669 // Optimization: For a single-page contiguous allocation without low/high
1670 // memory restriction B_FULL_LOCK wiring suffices.
1671 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1672 && physicalAddressRestrictions->low_address == 0
1673 && physicalAddressRestrictions->high_address == 0) {
1674 wiring = B_FULL_LOCK;
1675 }
1676
1677 // For full lock or contiguous areas we're also going to map the pages and
1678 // thus need to reserve pages for the mapping backend upfront.
1679 addr_t reservedMapPages = 0;
1680 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1681 AddressSpaceWriteLocker locker;
1682 status_t status = locker.SetTo(team);
1683 if (status != B_OK)
1684 return status;
1685
1686 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1687 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1688 }
1689
1690 int priority;
1691 if (team != VMAddressSpace::KernelID())
1692 priority = VM_PRIORITY_USER;
1693 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1694 priority = VM_PRIORITY_VIP;
1695 else
1696 priority = VM_PRIORITY_SYSTEM;
1697
1698 // Reserve memory before acquiring the address space lock. This reduces the
1699 // chances of failure, since while holding the write lock to the address
1700 // space (if it is the kernel address space that is), the low memory handler
1701 // won't be able to free anything for us.
1702 addr_t reservedMemory = 0;
1703 if (doReserveMemory) {
1704 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1705 if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1706 return B_NO_MEMORY;
1707 reservedMemory = size;
1708 // TODO: We don't reserve the memory for the pages for the page
1709 // directories/tables. We actually need to do since we currently don't
1710 // reclaim them (and probably can't reclaim all of them anyway). Thus
1711 // there are actually less physical pages than there should be, which
1712 // can get the VM into trouble in low memory situations.
1713 }
1714
1715 AddressSpaceWriteLocker locker;
1716 VMAddressSpace* addressSpace;
1717 status_t status;
1718
1719 // For full lock areas reserve the pages before locking the address
1720 // space. E.g. block caches can't release their memory while we hold the
1721 // address space lock.
1722 page_num_t reservedPages = reservedMapPages;
1723 if (wiring == B_FULL_LOCK)
1724 reservedPages += size / B_PAGE_SIZE;
1725
1726 vm_page_reservation reservation;
1727 if (reservedPages > 0) {
1728 if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1729 if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1730 priority)) {
1731 reservedPages = 0;
1732 status = B_WOULD_BLOCK;
1733 goto err0;
1734 }
1735 } else
1736 vm_page_reserve_pages(&reservation, reservedPages, priority);
1737 }
1738
1739 if (wiring == B_CONTIGUOUS) {
1740 // we try to allocate the page run here upfront as this may easily
1741 // fail for obvious reasons
1742 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1743 size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1744 if (page == NULL) {
1745 status = B_NO_MEMORY;
1746 goto err0;
1747 }
1748 }
1749
1750 // Lock the address space and, if B_EXACT_ADDRESS and
1751 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1752 // is not wired.
1753 do {
1754 status = locker.SetTo(team);
1755 if (status != B_OK)
1756 goto err1;
1757
1758 addressSpace = locker.AddressSpace();
1759 } while (virtualAddressRestrictions->address_specification
1760 == B_EXACT_ADDRESS
1761 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1762 && wait_if_address_range_is_wired(addressSpace,
1763 (addr_t)virtualAddressRestrictions->address, size, &locker));
1764
1765 // create an anonymous cache
1766 // if it's a stack, make sure that two pages are available at least
1767 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1768 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1769 wiring == B_NO_LOCK, priority);
1770 if (status != B_OK)
1771 goto err1;
1772
1773 cache->temporary = 1;
1774 cache->virtual_end = size;
1775 cache->committed_size = reservedMemory;
1776 // TODO: This should be done via a method.
1777 reservedMemory = 0;
1778
1779 cache->Lock();
1780
1781 status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1782 protection, 0, REGION_NO_PRIVATE_MAP, flags,
1783 virtualAddressRestrictions, kernel, &area, _address);
1784
1785 if (status != B_OK) {
1786 cache->ReleaseRefAndUnlock();
1787 goto err1;
1788 }
1789
1790 locker.DegradeToReadLock();
1791
1792 switch (wiring) {
1793 case B_NO_LOCK:
1794 case B_LAZY_LOCK:
1795 // do nothing - the pages are mapped in as needed
1796 break;
1797
1798 case B_FULL_LOCK:
1799 {
1800 // Allocate and map all pages for this area
1801
1802 off_t offset = 0;
1803 for (addr_t address = area->Base();
1804 address < area->Base() + (area->Size() - 1);
1805 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1806 #ifdef DEBUG_KERNEL_STACKS
1807 # ifdef STACK_GROWS_DOWNWARDS
1808 if (isStack && address < area->Base()
1809 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1810 # else
1811 if (isStack && address >= area->Base() + area->Size()
1812 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1813 # endif
1814 continue;
1815 #endif
1816 vm_page* page = vm_page_allocate_page(&reservation,
1817 PAGE_STATE_WIRED | pageAllocFlags);
1818 cache->InsertPage(page, offset);
1819 map_page(area, page, address, protection, &reservation);
1820
1821 DEBUG_PAGE_ACCESS_END(page);
1822 }
1823
1824 break;
1825 }
1826
1827 case B_ALREADY_WIRED:
1828 {
1829 // The pages should already be mapped. This is only really useful
1830 // during boot time. Find the appropriate vm_page objects and stick
1831 // them in the cache object.
1832 VMTranslationMap* map = addressSpace->TranslationMap();
1833 off_t offset = 0;
1834
1835 if (!gKernelStartup)
1836 panic("ALREADY_WIRED flag used outside kernel startup\n");
1837
1838 map->Lock();
1839
1840 for (addr_t virtualAddress = area->Base();
1841 virtualAddress < area->Base() + (area->Size() - 1);
1842 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1843 phys_addr_t physicalAddress;
1844 uint32 flags;
1845 status = map->Query(virtualAddress, &physicalAddress, &flags);
1846 if (status < B_OK) {
1847 panic("looking up mapping failed for va 0x%lx\n",
1848 virtualAddress);
1849 }
1850 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1851 if (page == NULL) {
1852 panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1853 "\n", physicalAddress);
1854 }
1855
1856 DEBUG_PAGE_ACCESS_START(page);
1857
1858 cache->InsertPage(page, offset);
1859 increment_page_wired_count(page);
1860 vm_page_set_state(page, PAGE_STATE_WIRED);
1861 page->busy = false;
1862
1863 DEBUG_PAGE_ACCESS_END(page);
1864 }
1865
1866 map->Unlock();
1867 break;
1868 }
1869
1870 case B_CONTIGUOUS:
1871 {
1872 // We have already allocated our continuous pages run, so we can now
1873 // just map them in the address space
1874 VMTranslationMap* map = addressSpace->TranslationMap();
1875 phys_addr_t physicalAddress
1876 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1877 addr_t virtualAddress = area->Base();
1878 off_t offset = 0;
1879
1880 map->Lock();
1881
1882 for (virtualAddress = area->Base(); virtualAddress < area->Base()
1883 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1884 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1885 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1886 if (page == NULL)
1887 panic("couldn't lookup physical page just allocated\n");
1888
1889 status = map->Map(virtualAddress, physicalAddress, protection,
1890 area->MemoryType(), &reservation);
1891 if (status < B_OK)
1892 panic("couldn't map physical page in page run\n");
1893
1894 cache->InsertPage(page, offset);
1895 increment_page_wired_count(page);
1896
1897 DEBUG_PAGE_ACCESS_END(page);
1898 }
1899
1900 map->Unlock();
1901 break;
1902 }
1903
1904 default:
1905 break;
1906 }
1907
1908 cache->Unlock();
1909
1910 if (reservedPages > 0)
1911 vm_page_unreserve_pages(&reservation);
1912
1913 TRACE(("vm_create_anonymous_area: done\n"));
1914
1915 area->cache_type = CACHE_TYPE_RAM;
1916 return area->id;
1917
1918 err1:
1919 if (wiring == B_CONTIGUOUS) {
1920 // we had reserved the area space upfront...
1921 phys_addr_t pageNumber = page->physical_page_number;
1922 int32 i;
1923 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1924 page = vm_lookup_page(pageNumber);
1925 if (page == NULL)
1926 panic("couldn't lookup physical page just allocated\n");
1927
1928 vm_page_set_state(page, PAGE_STATE_FREE);
1929 }
1930 }
1931
1932 err0:
1933 if (reservedPages > 0)
1934 vm_page_unreserve_pages(&reservation);
1935 if (reservedMemory > 0)
1936 vm_unreserve_memory(reservedMemory);
1937
1938 return status;
1939 }
1940
1941
1942 area_id
vm_map_physical_memory(team_id team,const char * name,void ** _address,uint32 addressSpec,addr_t size,uint32 protection,phys_addr_t physicalAddress,bool alreadyWired)1943 vm_map_physical_memory(team_id team, const char* name, void** _address,
1944 uint32 addressSpec, addr_t size, uint32 protection,
1945 phys_addr_t physicalAddress, bool alreadyWired)
1946 {
1947 VMArea* area;
1948 VMCache* cache;
1949 addr_t mapOffset;
1950
1951 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1952 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1953 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1954 addressSpec, size, protection, physicalAddress));
1955
1956 if (!arch_vm_supports_protection(protection))
1957 return B_NOT_SUPPORTED;
1958
1959 AddressSpaceWriteLocker locker(team);
1960 if (!locker.IsLocked())
1961 return B_BAD_TEAM_ID;
1962
1963 // if the physical address is somewhat inside a page,
1964 // move the actual area down to align on a page boundary
1965 mapOffset = physicalAddress % B_PAGE_SIZE;
1966 size += mapOffset;
1967 physicalAddress -= mapOffset;
1968
1969 size = PAGE_ALIGN(size);
1970
1971 // create a device cache
1972 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1973 if (status != B_OK)
1974 return status;
1975
1976 cache->virtual_end = size;
1977
1978 cache->Lock();
1979
1980 virtual_address_restrictions addressRestrictions = {};
1981 addressRestrictions.address = *_address;
1982 addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK;
1983 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1984 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, CREATE_AREA_DONT_COMMIT_MEMORY,
1985 &addressRestrictions, true, &area, _address);
1986
1987 if (status < B_OK)
1988 cache->ReleaseRefLocked();
1989
1990 cache->Unlock();
1991
1992 if (status == B_OK) {
1993 // Set requested memory type -- default to uncached, but allow
1994 // that to be overridden by ranges that may already exist.
1995 uint32 memoryType = addressSpec & B_MEMORY_TYPE_MASK;
1996 const bool weak = (memoryType == 0);
1997 if (weak)
1998 memoryType = B_UNCACHED_MEMORY;
1999
2000 status = arch_vm_set_memory_type(area, physicalAddress, memoryType,
2001 weak ? &memoryType : NULL);
2002
2003 area->SetMemoryType(memoryType);
2004
2005 if (status != B_OK)
2006 delete_area(locker.AddressSpace(), area, false);
2007 }
2008
2009 if (status != B_OK)
2010 return status;
2011
2012 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2013
2014 if (alreadyWired) {
2015 // The area is already mapped, but possibly not with the right
2016 // memory type.
2017 map->Lock();
2018 map->ProtectArea(area, area->protection);
2019 map->Unlock();
2020 } else {
2021 // Map the area completely.
2022
2023 // reserve pages needed for the mapping
2024 size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2025 area->Base() + (size - 1));
2026 vm_page_reservation reservation;
2027 vm_page_reserve_pages(&reservation, reservePages,
2028 team == VMAddressSpace::KernelID()
2029 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2030
2031 map->Lock();
2032
2033 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2034 map->Map(area->Base() + offset, physicalAddress + offset,
2035 protection, area->MemoryType(), &reservation);
2036 }
2037
2038 map->Unlock();
2039
2040 vm_page_unreserve_pages(&reservation);
2041 }
2042
2043 // modify the pointer returned to be offset back into the new area
2044 // the same way the physical address in was offset
2045 *_address = (void*)((addr_t)*_address + mapOffset);
2046
2047 area->cache_type = CACHE_TYPE_DEVICE;
2048 return area->id;
2049 }
2050
2051
2052 /*! Don't use!
2053 TODO: This function was introduced to map physical page vecs to
2054 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
2055 use a device cache and does not track vm_page::wired_count!
2056 */
2057 area_id
vm_map_physical_memory_vecs(team_id team,const char * name,void ** _address,uint32 addressSpec,addr_t * _size,uint32 protection,struct generic_io_vec * vecs,uint32 vecCount)2058 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
2059 uint32 addressSpec, addr_t* _size, uint32 protection,
2060 struct generic_io_vec* vecs, uint32 vecCount)
2061 {
2062 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
2063 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
2064 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
2065 addressSpec, _size, protection, vecs, vecCount));
2066
2067 if (!arch_vm_supports_protection(protection)
2068 || (addressSpec & B_MEMORY_TYPE_MASK) != 0) {
2069 return B_NOT_SUPPORTED;
2070 }
2071
2072 AddressSpaceWriteLocker locker(team);
2073 if (!locker.IsLocked())
2074 return B_BAD_TEAM_ID;
2075
2076 if (vecCount == 0)
2077 return B_BAD_VALUE;
2078
2079 addr_t size = 0;
2080 for (uint32 i = 0; i < vecCount; i++) {
2081 if (vecs[i].base % B_PAGE_SIZE != 0
2082 || vecs[i].length % B_PAGE_SIZE != 0) {
2083 return B_BAD_VALUE;
2084 }
2085
2086 size += vecs[i].length;
2087 }
2088
2089 // create a device cache
2090 VMCache* cache;
2091 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
2092 if (result != B_OK)
2093 return result;
2094
2095 cache->virtual_end = size;
2096
2097 cache->Lock();
2098
2099 VMArea* area;
2100 virtual_address_restrictions addressRestrictions = {};
2101 addressRestrictions.address = *_address;
2102 addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK;
2103 result = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2104 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, CREATE_AREA_DONT_COMMIT_MEMORY,
2105 &addressRestrictions, true, &area, _address);
2106
2107 if (result != B_OK)
2108 cache->ReleaseRefLocked();
2109
2110 cache->Unlock();
2111
2112 if (result != B_OK)
2113 return result;
2114
2115 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2116 size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
2117 area->Base() + (size - 1));
2118
2119 vm_page_reservation reservation;
2120 vm_page_reserve_pages(&reservation, reservePages,
2121 team == VMAddressSpace::KernelID()
2122 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2123 map->Lock();
2124
2125 uint32 vecIndex = 0;
2126 size_t vecOffset = 0;
2127 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
2128 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
2129 vecOffset = 0;
2130 vecIndex++;
2131 }
2132
2133 if (vecIndex >= vecCount)
2134 break;
2135
2136 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
2137 protection, area->MemoryType(), &reservation);
2138
2139 vecOffset += B_PAGE_SIZE;
2140 }
2141
2142 map->Unlock();
2143 vm_page_unreserve_pages(&reservation);
2144
2145 if (_size != NULL)
2146 *_size = size;
2147
2148 area->cache_type = CACHE_TYPE_DEVICE;
2149 return area->id;
2150 }
2151
2152
2153 area_id
vm_create_null_area(team_id team,const char * name,void ** address,uint32 addressSpec,addr_t size,uint32 flags)2154 vm_create_null_area(team_id team, const char* name, void** address,
2155 uint32 addressSpec, addr_t size, uint32 flags)
2156 {
2157 size = PAGE_ALIGN(size);
2158
2159 // Lock the address space and, if B_EXACT_ADDRESS and
2160 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
2161 // is not wired.
2162 AddressSpaceWriteLocker locker;
2163 do {
2164 if (locker.SetTo(team) != B_OK)
2165 return B_BAD_TEAM_ID;
2166 } while (addressSpec == B_EXACT_ADDRESS
2167 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
2168 && wait_if_address_range_is_wired(locker.AddressSpace(),
2169 (addr_t)*address, size, &locker));
2170
2171 // create a null cache
2172 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
2173 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
2174 VMCache* cache;
2175 status_t status = VMCacheFactory::CreateNullCache(priority, cache);
2176 if (status != B_OK)
2177 return status;
2178
2179 cache->temporary = 1;
2180 cache->virtual_end = size;
2181
2182 cache->Lock();
2183
2184 VMArea* area;
2185 virtual_address_restrictions addressRestrictions = {};
2186 addressRestrictions.address = *address;
2187 addressRestrictions.address_specification = addressSpec;
2188 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
2189 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA,
2190 REGION_NO_PRIVATE_MAP, flags | CREATE_AREA_DONT_COMMIT_MEMORY,
2191 &addressRestrictions, true, &area, address);
2192
2193 if (status < B_OK) {
2194 cache->ReleaseRefAndUnlock();
2195 return status;
2196 }
2197
2198 cache->Unlock();
2199
2200 area->cache_type = CACHE_TYPE_NULL;
2201 return area->id;
2202 }
2203
2204
2205 /*! Creates the vnode cache for the specified \a vnode.
2206 The vnode has to be marked busy when calling this function.
2207 */
2208 status_t
vm_create_vnode_cache(struct vnode * vnode,struct VMCache ** cache)2209 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
2210 {
2211 return VMCacheFactory::CreateVnodeCache(*cache, vnode);
2212 }
2213
2214
2215 /*! \a cache must be locked. The area's address space must be read-locked.
2216 */
2217 static void
pre_map_area_pages(VMArea * area,VMCache * cache,vm_page_reservation * reservation,int32 maxCount)2218 pre_map_area_pages(VMArea* area, VMCache* cache,
2219 vm_page_reservation* reservation, int32 maxCount)
2220 {
2221 addr_t baseAddress = area->Base();
2222 addr_t cacheOffset = area->cache_offset;
2223 page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
2224 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
2225
2226 VMCachePagesTree::Iterator it = cache->pages.GetIterator(firstPage, true, true);
2227 vm_page* page;
2228 while ((page = it.Next()) != NULL && maxCount > 0) {
2229 if (page->cache_offset >= endPage)
2230 break;
2231
2232 // skip busy and inactive pages
2233 if (page->busy || (page->usage_count == 0 && !page->accessed))
2234 continue;
2235
2236 DEBUG_PAGE_ACCESS_START(page);
2237 map_page(area, page,
2238 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
2239 B_READ_AREA | B_KERNEL_READ_AREA, reservation);
2240 maxCount--;
2241 DEBUG_PAGE_ACCESS_END(page);
2242 }
2243 }
2244
2245
2246 /*! Will map the file specified by \a fd to an area in memory.
2247 The file will be mirrored beginning at the specified \a offset. The
2248 \a offset and \a size arguments have to be page aligned.
2249 */
2250 static area_id
_vm_map_file(team_id team,const char * name,void ** _address,uint32 addressSpec,size_t size,uint32 protection,uint32 mapping,bool unmapAddressRange,int fd,off_t offset,bool kernel)2251 _vm_map_file(team_id team, const char* name, void** _address,
2252 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
2253 bool unmapAddressRange, int fd, off_t offset, bool kernel)
2254 {
2255 // TODO: for binary files, we want to make sure that they get the
2256 // copy of a file at a given time, ie. later changes should not
2257 // make it into the mapped copy -- this will need quite some changes
2258 // to be done in a nice way
2259 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
2260 "%" B_PRIu32 ")\n", fd, offset, size, mapping));
2261
2262 offset = ROUNDDOWN(offset, B_PAGE_SIZE);
2263 size = PAGE_ALIGN(size);
2264
2265 if (mapping == REGION_NO_PRIVATE_MAP)
2266 protection |= B_SHARED_AREA;
2267 if (addressSpec != B_EXACT_ADDRESS)
2268 unmapAddressRange = false;
2269
2270 uint32 mappingFlags = 0;
2271 if (unmapAddressRange)
2272 mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE;
2273
2274 if (fd < 0) {
2275 virtual_address_restrictions virtualRestrictions = {};
2276 virtualRestrictions.address = *_address;
2277 virtualRestrictions.address_specification = addressSpec;
2278 physical_address_restrictions physicalRestrictions = {};
2279 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
2280 mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
2281 _address);
2282 }
2283
2284 // get the open flags of the FD
2285 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2286 if (descriptor == NULL)
2287 return EBADF;
2288 int32 openMode = descriptor->open_mode;
2289 put_fd(descriptor);
2290
2291 // The FD must open for reading at any rate. For shared mapping with write
2292 // access, additionally the FD must be open for writing.
2293 if ((openMode & O_ACCMODE) == O_WRONLY
2294 || (mapping == REGION_NO_PRIVATE_MAP
2295 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
2296 && (openMode & O_ACCMODE) == O_RDONLY)) {
2297 return EACCES;
2298 }
2299
2300 uint32 protectionMax = 0;
2301 if (mapping == REGION_NO_PRIVATE_MAP) {
2302 if ((openMode & O_ACCMODE) == O_RDWR)
2303 protectionMax = protection | B_USER_PROTECTION;
2304 else
2305 protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA);
2306 } else if (mapping == REGION_PRIVATE_MAP) {
2307 // For privately mapped read-only regions, skip committing memory.
2308 // (If protections are changed later on, memory will be committed then.)
2309 if ((protection & B_WRITE_AREA) == 0)
2310 mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2311 }
2312
2313 // get the vnode for the object, this also grabs a ref to it
2314 struct vnode* vnode = NULL;
2315 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
2316 if (status < B_OK)
2317 return status;
2318 VnodePutter vnodePutter(vnode);
2319
2320 // If we're going to pre-map pages, we need to reserve the pages needed by
2321 // the mapping backend upfront.
2322 page_num_t reservedPreMapPages = 0;
2323 vm_page_reservation reservation;
2324 if ((protection & B_READ_AREA) != 0) {
2325 AddressSpaceWriteLocker locker;
2326 status = locker.SetTo(team);
2327 if (status != B_OK)
2328 return status;
2329
2330 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
2331 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
2332
2333 locker.Unlock();
2334
2335 vm_page_reserve_pages(&reservation, reservedPreMapPages,
2336 team == VMAddressSpace::KernelID()
2337 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2338 }
2339
2340 struct PageUnreserver {
2341 PageUnreserver(vm_page_reservation* reservation)
2342 :
2343 fReservation(reservation)
2344 {
2345 }
2346
2347 ~PageUnreserver()
2348 {
2349 if (fReservation != NULL)
2350 vm_page_unreserve_pages(fReservation);
2351 }
2352
2353 vm_page_reservation* fReservation;
2354 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
2355
2356 // Lock the address space and, if the specified address range shall be
2357 // unmapped, ensure it is not wired.
2358 AddressSpaceWriteLocker locker;
2359 do {
2360 if (locker.SetTo(team) != B_OK)
2361 return B_BAD_TEAM_ID;
2362 } while (unmapAddressRange
2363 && wait_if_address_range_is_wired(locker.AddressSpace(),
2364 (addr_t)*_address, size, &locker));
2365
2366 // TODO: this only works for file systems that use the file cache
2367 VMCache* cache;
2368 status = vfs_get_vnode_cache(vnode, &cache, false);
2369 if (status < B_OK)
2370 return status;
2371
2372 cache->Lock();
2373
2374 VMArea* area;
2375 virtual_address_restrictions addressRestrictions = {};
2376 addressRestrictions.address = *_address;
2377 addressRestrictions.address_specification = addressSpec;
2378 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
2379 0, protection, protectionMax, mapping, mappingFlags,
2380 &addressRestrictions, kernel, &area, _address);
2381
2382 if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
2383 // map_backing_store() cannot know we no longer need the ref
2384 cache->ReleaseRefLocked();
2385 }
2386
2387 if (status == B_OK && (protection & B_READ_AREA) != 0) {
2388 // Pre-map at most 10MB worth of pages.
2389 pre_map_area_pages(area, cache, &reservation,
2390 (10LL * 1024 * 1024) / B_PAGE_SIZE);
2391 }
2392
2393 cache->Unlock();
2394
2395 if (status == B_OK) {
2396 // TODO: this probably deserves a smarter solution, e.g. probably
2397 // trigger prefetch somewhere else.
2398
2399 // Prefetch at most 10MB starting from "offset", but only if the cache
2400 // doesn't already contain more pages than the prefetch size.
2401 const size_t prefetch = min_c(size, 10LL * 1024 * 1024);
2402 if (cache->page_count < (prefetch / B_PAGE_SIZE))
2403 cache_prefetch_vnode(vnode, offset, prefetch);
2404 }
2405
2406 if (status != B_OK)
2407 return status;
2408
2409 area->cache_type = CACHE_TYPE_VNODE;
2410 return area->id;
2411 }
2412
2413
2414 area_id
vm_map_file(team_id aid,const char * name,void ** address,uint32 addressSpec,addr_t size,uint32 protection,uint32 mapping,bool unmapAddressRange,int fd,off_t offset)2415 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2416 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2417 int fd, off_t offset)
2418 {
2419 if (!arch_vm_supports_protection(protection))
2420 return B_NOT_SUPPORTED;
2421
2422 return _vm_map_file(aid, name, address, addressSpec, size, protection,
2423 mapping, unmapAddressRange, fd, offset, true);
2424 }
2425
2426
2427 VMCache*
vm_area_get_locked_cache(VMArea * area)2428 vm_area_get_locked_cache(VMArea* area)
2429 {
2430 rw_lock_read_lock(&sAreaCacheLock);
2431
2432 while (true) {
2433 VMCache* cache = area->cache;
2434
2435 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2436 // cache has been deleted
2437 rw_lock_read_lock(&sAreaCacheLock);
2438 continue;
2439 }
2440
2441 rw_lock_read_lock(&sAreaCacheLock);
2442
2443 if (cache == area->cache) {
2444 cache->AcquireRefLocked();
2445 rw_lock_read_unlock(&sAreaCacheLock);
2446 return cache;
2447 }
2448
2449 // the cache changed in the meantime
2450 cache->Unlock();
2451 }
2452 }
2453
2454
2455 void
vm_area_put_locked_cache(VMCache * cache)2456 vm_area_put_locked_cache(VMCache* cache)
2457 {
2458 cache->ReleaseRefAndUnlock();
2459 }
2460
2461
2462 area_id
vm_clone_area(team_id team,const char * name,void ** address,uint32 addressSpec,uint32 protection,uint32 mapping,area_id sourceID,bool kernel)2463 vm_clone_area(team_id team, const char* name, void** address,
2464 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2465 bool kernel)
2466 {
2467 VMArea* newArea = NULL;
2468 VMArea* sourceArea;
2469
2470 // Check whether the source area exists and is cloneable. If so, mark it
2471 // B_SHARED_AREA, so that we don't get problems with copy-on-write.
2472 {
2473 AddressSpaceWriteLocker locker;
2474 status_t status = locker.SetFromArea(sourceID, sourceArea);
2475 if (status != B_OK)
2476 return status;
2477
2478 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2479 return B_NOT_ALLOWED;
2480
2481 sourceArea->protection |= B_SHARED_AREA;
2482 protection |= B_SHARED_AREA;
2483 }
2484
2485 // Now lock both address spaces and actually do the cloning.
2486
2487 MultiAddressSpaceLocker locker;
2488 VMAddressSpace* sourceAddressSpace;
2489 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2490 if (status != B_OK)
2491 return status;
2492
2493 VMAddressSpace* targetAddressSpace;
2494 status = locker.AddTeam(team, true, &targetAddressSpace);
2495 if (status != B_OK)
2496 return status;
2497
2498 status = locker.Lock();
2499 if (status != B_OK)
2500 return status;
2501
2502 sourceArea = lookup_area(sourceAddressSpace, sourceID);
2503 if (sourceArea == NULL)
2504 return B_BAD_VALUE;
2505
2506 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0)
2507 return B_NOT_ALLOWED;
2508
2509 VMCache* cache = vm_area_get_locked_cache(sourceArea);
2510
2511 if (!kernel && sourceAddressSpace != targetAddressSpace
2512 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2513 #if KDEBUG
2514 Team* team = thread_get_current_thread()->team;
2515 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2516 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2517 #endif
2518 status = B_NOT_ALLOWED;
2519 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2520 status = B_NOT_ALLOWED;
2521 } else {
2522 uint32 flags = 0;
2523 if (mapping != REGION_PRIVATE_MAP)
2524 flags |= CREATE_AREA_DONT_COMMIT_MEMORY;
2525
2526 virtual_address_restrictions addressRestrictions = {};
2527 addressRestrictions.address = *address;
2528 addressRestrictions.address_specification = addressSpec;
2529 status = map_backing_store(targetAddressSpace, cache,
2530 sourceArea->cache_offset, name, sourceArea->Size(),
2531 sourceArea->wiring, protection, sourceArea->protection_max,
2532 mapping, flags, &addressRestrictions,
2533 kernel, &newArea, address);
2534 }
2535 if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2536 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2537 // to create a new cache, and has therefore already acquired a reference
2538 // to the source cache - but otherwise it has no idea that we need
2539 // one.
2540 cache->AcquireRefLocked();
2541 }
2542 if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2543 // we need to map in everything at this point
2544 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2545 // we don't have actual pages to map but a physical area
2546 VMTranslationMap* map
2547 = sourceArea->address_space->TranslationMap();
2548 map->Lock();
2549
2550 phys_addr_t physicalAddress;
2551 uint32 oldProtection;
2552 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2553
2554 map->Unlock();
2555
2556 map = targetAddressSpace->TranslationMap();
2557 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2558 newArea->Base() + (newArea->Size() - 1));
2559
2560 vm_page_reservation reservation;
2561 vm_page_reserve_pages(&reservation, reservePages,
2562 targetAddressSpace == VMAddressSpace::Kernel()
2563 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2564 map->Lock();
2565
2566 for (addr_t offset = 0; offset < newArea->Size();
2567 offset += B_PAGE_SIZE) {
2568 map->Map(newArea->Base() + offset, physicalAddress + offset,
2569 protection, newArea->MemoryType(), &reservation);
2570 }
2571
2572 map->Unlock();
2573 vm_page_unreserve_pages(&reservation);
2574 } else {
2575 VMTranslationMap* map = targetAddressSpace->TranslationMap();
2576 size_t reservePages = map->MaxPagesNeededToMap(
2577 newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2578 vm_page_reservation reservation;
2579 vm_page_reserve_pages(&reservation, reservePages,
2580 targetAddressSpace == VMAddressSpace::Kernel()
2581 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2582
2583 // map in all pages from source
2584 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2585 vm_page* page = it.Next();) {
2586 if (!page->busy) {
2587 DEBUG_PAGE_ACCESS_START(page);
2588 map_page(newArea, page,
2589 newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2590 - newArea->cache_offset),
2591 protection, &reservation);
2592 DEBUG_PAGE_ACCESS_END(page);
2593 }
2594 }
2595 // TODO: B_FULL_LOCK means that all pages are locked. We are not
2596 // ensuring that!
2597
2598 vm_page_unreserve_pages(&reservation);
2599 }
2600 }
2601 if (status == B_OK)
2602 newArea->cache_type = sourceArea->cache_type;
2603
2604 vm_area_put_locked_cache(cache);
2605
2606 if (status < B_OK)
2607 return status;
2608
2609 return newArea->id;
2610 }
2611
2612
2613 /*! Deletes the specified area of the given address space.
2614
2615 The address space must be write-locked.
2616 The caller must ensure that the area does not have any wired ranges.
2617
2618 \param addressSpace The address space containing the area.
2619 \param area The area to be deleted.
2620 \param deletingAddressSpace \c true, if the address space is in the process
2621 of being deleted.
2622 \param alreadyRemoved \c true, if the area was already removed from the global
2623 areas map (and thus had its ID deallocated.)
2624 */
2625 static void
delete_area(VMAddressSpace * addressSpace,VMArea * area,bool deletingAddressSpace,bool alreadyRemoved)2626 delete_area(VMAddressSpace* addressSpace, VMArea* area,
2627 bool deletingAddressSpace, bool alreadyRemoved)
2628 {
2629 ASSERT(!area->IsWired());
2630
2631 if (area->id >= 0 && !alreadyRemoved)
2632 VMAreas::Remove(area);
2633
2634 // At this point the area is removed from the global hash table, but
2635 // still exists in the area list.
2636
2637 // Unmap the virtual address space the area occupied.
2638 {
2639 // We need to lock the complete cache chain.
2640 VMCache* topCache = vm_area_get_locked_cache(area);
2641 VMCacheChainLocker cacheChainLocker(topCache);
2642 cacheChainLocker.LockAllSourceCaches();
2643
2644 // If the area's top cache is a temporary cache and the area is the only
2645 // one referencing it (besides us currently holding a second reference),
2646 // the unmapping code doesn't need to care about preserving the accessed
2647 // and dirty flags of the top cache page mappings.
2648 bool ignoreTopCachePageFlags
2649 = topCache->temporary && topCache->RefCount() == 2;
2650
2651 area->address_space->TranslationMap()->UnmapArea(area,
2652 deletingAddressSpace, ignoreTopCachePageFlags);
2653 }
2654
2655 if (!area->cache->temporary)
2656 area->cache->WriteModified();
2657
2658 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2659 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2660
2661 arch_vm_unset_memory_type(area);
2662 addressSpace->RemoveArea(area, allocationFlags);
2663 addressSpace->Put();
2664
2665 area->cache->RemoveArea(area);
2666 area->cache->ReleaseRef();
2667
2668 addressSpace->DeleteArea(area, allocationFlags);
2669 }
2670
2671
2672 status_t
vm_delete_area(team_id team,area_id id,bool kernel)2673 vm_delete_area(team_id team, area_id id, bool kernel)
2674 {
2675 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2676 team, id));
2677
2678 // lock the address space and make sure the area isn't wired
2679 AddressSpaceWriteLocker locker;
2680 VMArea* area;
2681 AreaCacheLocker cacheLocker;
2682
2683 do {
2684 status_t status = locker.SetFromArea(team, id, area);
2685 if (status != B_OK)
2686 return status;
2687
2688 cacheLocker.SetTo(area);
2689 } while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2690
2691 cacheLocker.Unlock();
2692
2693 if (!kernel && (area->protection & B_KERNEL_AREA) != 0)
2694 return B_NOT_ALLOWED;
2695
2696 delete_area(locker.AddressSpace(), area, false);
2697 return B_OK;
2698 }
2699
2700
2701 /*! Creates a new cache on top of given cache, moves all areas from
2702 the old cache to the new one, and changes the protection of all affected
2703 areas' pages to read-only. If requested, wired pages are moved up to the
2704 new cache and copies are added to the old cache in their place.
2705 Preconditions:
2706 - The given cache must be locked.
2707 - All of the cache's areas' address spaces must be read locked.
2708 - Either the cache must not have any wired ranges or a page reservation for
2709 all wired pages must be provided, so they can be copied.
2710
2711 \param lowerCache The cache on top of which a new cache shall be created.
2712 \param wiredPagesReservation If \c NULL there must not be any wired pages
2713 in \a lowerCache. Otherwise as many pages must be reserved as the cache
2714 has wired page. The wired pages are copied in this case.
2715 */
2716 static status_t
vm_copy_on_write_area(VMCache * lowerCache,vm_page_reservation * wiredPagesReservation)2717 vm_copy_on_write_area(VMCache* lowerCache,
2718 vm_page_reservation* wiredPagesReservation)
2719 {
2720 VMCache* upperCache;
2721
2722 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2723
2724 // We need to separate the cache from its areas. The cache goes one level
2725 // deeper and we create a new cache inbetween.
2726
2727 // create an anonymous cache
2728 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2729 lowerCache->GuardSize() / B_PAGE_SIZE,
2730 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2731 VM_PRIORITY_USER);
2732 if (status != B_OK)
2733 return status;
2734
2735 upperCache->Lock();
2736
2737 upperCache->temporary = 1;
2738 upperCache->virtual_base = lowerCache->virtual_base;
2739 upperCache->virtual_end = lowerCache->virtual_end;
2740
2741 // transfer the lower cache areas to the upper cache
2742 rw_lock_write_lock(&sAreaCacheLock);
2743 upperCache->TransferAreas(lowerCache);
2744 rw_lock_write_unlock(&sAreaCacheLock);
2745
2746 lowerCache->AddConsumer(upperCache);
2747
2748 // We now need to remap all pages from all of the cache's areas read-only,
2749 // so that a copy will be created on next write access. If there are wired
2750 // pages, we keep their protection, move them to the upper cache and create
2751 // copies for the lower cache.
2752 if (wiredPagesReservation != NULL) {
2753 // We need to handle wired pages -- iterate through the cache's pages.
2754 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2755 vm_page* page = it.Next();) {
2756 if (page->WiredCount() > 0) {
2757 // allocate a new page and copy the wired one
2758 vm_page* copiedPage = vm_page_allocate_page(
2759 wiredPagesReservation, PAGE_STATE_ACTIVE);
2760
2761 vm_memcpy_physical_page(
2762 copiedPage->physical_page_number * B_PAGE_SIZE,
2763 page->physical_page_number * B_PAGE_SIZE);
2764
2765 // move the wired page to the upper cache (note: removing is OK
2766 // with the SplayTree iterator) and insert the copy
2767 upperCache->MovePage(page);
2768 lowerCache->InsertPage(copiedPage,
2769 page->cache_offset * B_PAGE_SIZE);
2770
2771 DEBUG_PAGE_ACCESS_END(copiedPage);
2772 } else {
2773 // Change the protection of this page in all areas.
2774 for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2775 tempArea = tempArea->cache_next) {
2776 if (!is_page_in_area(tempArea, page))
2777 continue;
2778
2779 // The area must be readable in the same way it was
2780 // previously writable.
2781 addr_t address = virtual_page_address(tempArea, page);
2782 uint32 protection = 0;
2783 uint32 pageProtection = get_area_page_protection(tempArea, address);
2784 if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2785 protection |= B_KERNEL_READ_AREA;
2786 if ((pageProtection & B_READ_AREA) != 0)
2787 protection |= B_READ_AREA;
2788
2789 VMTranslationMap* map
2790 = tempArea->address_space->TranslationMap();
2791 map->Lock();
2792 map->ProtectPage(tempArea, address, protection);
2793 map->Unlock();
2794 }
2795 }
2796 }
2797 } else {
2798 ASSERT(lowerCache->WiredPagesCount() == 0);
2799
2800 // just change the protection of all areas
2801 for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2802 tempArea = tempArea->cache_next) {
2803 if (tempArea->page_protections != NULL) {
2804 // Change the protection of all pages in this area.
2805 VMTranslationMap* map = tempArea->address_space->TranslationMap();
2806 map->Lock();
2807 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2808 vm_page* page = it.Next();) {
2809 if (!is_page_in_area(tempArea, page))
2810 continue;
2811
2812 // The area must be readable in the same way it was
2813 // previously writable.
2814 addr_t address = virtual_page_address(tempArea, page);
2815 uint32 protection = 0;
2816 uint32 pageProtection = get_area_page_protection(tempArea, address);
2817 if ((pageProtection & B_KERNEL_READ_AREA) != 0)
2818 protection |= B_KERNEL_READ_AREA;
2819 if ((pageProtection & B_READ_AREA) != 0)
2820 protection |= B_READ_AREA;
2821
2822 map->ProtectPage(tempArea, address, protection);
2823 }
2824 map->Unlock();
2825 continue;
2826 }
2827 // The area must be readable in the same way it was previously
2828 // writable.
2829 uint32 protection = 0;
2830 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0)
2831 protection |= B_KERNEL_READ_AREA;
2832 if ((tempArea->protection & B_READ_AREA) != 0)
2833 protection |= B_READ_AREA;
2834
2835 VMTranslationMap* map = tempArea->address_space->TranslationMap();
2836 map->Lock();
2837 map->ProtectArea(tempArea, protection);
2838 map->Unlock();
2839 }
2840 }
2841
2842 vm_area_put_locked_cache(upperCache);
2843
2844 return B_OK;
2845 }
2846
2847
2848 area_id
vm_copy_area(team_id team,const char * name,void ** _address,uint32 addressSpec,area_id sourceID)2849 vm_copy_area(team_id team, const char* name, void** _address,
2850 uint32 addressSpec, area_id sourceID)
2851 {
2852 // Do the locking: target address space, all address spaces associated with
2853 // the source cache, and the cache itself.
2854 MultiAddressSpaceLocker locker;
2855 VMAddressSpace* targetAddressSpace;
2856 VMCache* cache;
2857 VMArea* source;
2858 AreaCacheLocker cacheLocker;
2859 status_t status;
2860 bool sharedArea;
2861
2862 page_num_t wiredPages = 0;
2863 vm_page_reservation wiredPagesReservation;
2864
2865 bool restart;
2866 do {
2867 restart = false;
2868
2869 locker.Unset();
2870 status = locker.AddTeam(team, true, &targetAddressSpace);
2871 if (status == B_OK) {
2872 status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2873 &cache);
2874 }
2875 if (status != B_OK)
2876 return status;
2877
2878 cacheLocker.SetTo(cache, true); // already locked
2879
2880 sharedArea = (source->protection & B_SHARED_AREA) != 0;
2881
2882 page_num_t oldWiredPages = wiredPages;
2883 wiredPages = 0;
2884
2885 // If the source area isn't shared, count the number of wired pages in
2886 // the cache and reserve as many pages.
2887 if (!sharedArea) {
2888 wiredPages = cache->WiredPagesCount();
2889
2890 if (wiredPages > oldWiredPages) {
2891 cacheLocker.Unlock();
2892 locker.Unlock();
2893
2894 if (oldWiredPages > 0)
2895 vm_page_unreserve_pages(&wiredPagesReservation);
2896
2897 vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2898 VM_PRIORITY_USER);
2899
2900 restart = true;
2901 }
2902 } else if (oldWiredPages > 0)
2903 vm_page_unreserve_pages(&wiredPagesReservation);
2904 } while (restart);
2905
2906 // unreserve pages later
2907 struct PagesUnreserver {
2908 PagesUnreserver(vm_page_reservation* reservation)
2909 :
2910 fReservation(reservation)
2911 {
2912 }
2913
2914 ~PagesUnreserver()
2915 {
2916 if (fReservation != NULL)
2917 vm_page_unreserve_pages(fReservation);
2918 }
2919
2920 private:
2921 vm_page_reservation* fReservation;
2922 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2923
2924 bool writableCopy
2925 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2926 uint8* targetPageProtections = NULL;
2927
2928 if (source->page_protections != NULL) {
2929 const size_t bytes = area_page_protections_size(source->Size());
2930 targetPageProtections = (uint8*)malloc_etc(bytes,
2931 (source->address_space == VMAddressSpace::Kernel()
2932 || targetAddressSpace == VMAddressSpace::Kernel())
2933 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
2934 if (targetPageProtections == NULL)
2935 return B_NO_MEMORY;
2936
2937 memcpy(targetPageProtections, source->page_protections, bytes);
2938
2939 for (size_t i = 0; i < bytes; i++) {
2940 if ((targetPageProtections[i]
2941 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) {
2942 writableCopy = true;
2943 break;
2944 }
2945 }
2946 }
2947
2948 if (addressSpec == B_CLONE_ADDRESS) {
2949 addressSpec = B_EXACT_ADDRESS;
2950 *_address = (void*)source->Base();
2951 }
2952
2953 // First, create a cache on top of the source area, respectively use the
2954 // existing one, if this is a shared area.
2955
2956 VMArea* target;
2957 virtual_address_restrictions addressRestrictions = {};
2958 addressRestrictions.address = *_address;
2959 addressRestrictions.address_specification = addressSpec;
2960 status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2961 name, source->Size(), source->wiring, source->protection,
2962 source->protection_max,
2963 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2964 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2965 &addressRestrictions, true, &target, _address);
2966 if (status < B_OK) {
2967 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE);
2968 return status;
2969 }
2970
2971 if (targetPageProtections != NULL) {
2972 target->page_protections = targetPageProtections;
2973
2974 if (!sharedArea) {
2975 // Shrink the commitment (this should never fail).
2976 AreaCacheLocker locker(target);
2977 const size_t newPageCommitment = compute_area_page_commitment(target);
2978 target->cache->Commit(newPageCommitment * B_PAGE_SIZE, VM_PRIORITY_USER);
2979 }
2980 }
2981
2982 if (sharedArea) {
2983 // The new area uses the old area's cache, but map_backing_store()
2984 // hasn't acquired a ref. So we have to do that now.
2985 cache->AcquireRefLocked();
2986 }
2987
2988 // If the source area is writable, we need to move it one layer up as well
2989 if (!sharedArea) {
2990 if (writableCopy) {
2991 // TODO: do something more useful if this fails!
2992 if (vm_copy_on_write_area(cache,
2993 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2994 panic("vm_copy_on_write_area() failed!\n");
2995 }
2996 }
2997 }
2998
2999 // we return the ID of the newly created area
3000 return target->id;
3001 }
3002
3003
3004 status_t
vm_set_area_protection(team_id team,area_id areaID,uint32 newProtection,bool kernel)3005 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
3006 bool kernel)
3007 {
3008 fix_protection(&newProtection);
3009
3010 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
3011 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
3012
3013 if (!arch_vm_supports_protection(newProtection))
3014 return B_NOT_SUPPORTED;
3015
3016 bool becomesWritable
3017 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
3018
3019 // lock address spaces and cache
3020 MultiAddressSpaceLocker locker;
3021 VMCache* cache;
3022 VMArea* area;
3023 status_t status;
3024 AreaCacheLocker cacheLocker;
3025 bool isWritable;
3026
3027 bool restart;
3028 do {
3029 restart = false;
3030
3031 locker.Unset();
3032 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
3033 if (status != B_OK)
3034 return status;
3035
3036 cacheLocker.SetTo(cache, true); // already locked
3037
3038 if (!kernel && (area->address_space == VMAddressSpace::Kernel()
3039 || (area->protection & B_KERNEL_AREA) != 0)) {
3040 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
3041 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32
3042 " (%s)\n", team, newProtection, areaID, area->name);
3043 return B_NOT_ALLOWED;
3044 }
3045 if (!kernel && area->protection_max != 0
3046 && (newProtection & area->protection_max)
3047 != (newProtection & B_USER_PROTECTION)) {
3048 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
3049 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel "
3050 "area %" B_PRId32 " (%s)\n", team, newProtection,
3051 area->protection_max, areaID, area->name);
3052 return B_NOT_ALLOWED;
3053 }
3054
3055 if (team != VMAddressSpace::KernelID()
3056 && area->address_space->ID() != team) {
3057 // unless you're the kernel, you are only allowed to set
3058 // the protection of your own areas
3059 return B_NOT_ALLOWED;
3060 }
3061
3062 if (area->protection == newProtection)
3063 return B_OK;
3064
3065 isWritable
3066 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
3067
3068 // Make sure the area (respectively, if we're going to call
3069 // vm_copy_on_write_area(), all areas of the cache) doesn't have any
3070 // wired ranges.
3071 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
3072 for (VMArea* otherArea = cache->areas; otherArea != NULL;
3073 otherArea = otherArea->cache_next) {
3074 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
3075 restart = true;
3076 break;
3077 }
3078 }
3079 } else {
3080 if (wait_if_area_is_wired(area, &locker, &cacheLocker))
3081 restart = true;
3082 }
3083 } while (restart);
3084
3085 if (area->page_protections != NULL) {
3086 // Get rid of the per-page protections.
3087 free_etc(area->page_protections,
3088 area->address_space == VMAddressSpace::Kernel() ? HEAP_DONT_LOCK_KERNEL_SPACE : 0);
3089 area->page_protections = NULL;
3090
3091 // Assume the existing protections don't match the new ones.
3092 isWritable = !becomesWritable;
3093 }
3094
3095 bool changePageProtection = true;
3096 bool changeTopCachePagesOnly = false;
3097
3098 if (isWritable && !becomesWritable) {
3099 // writable -> !writable
3100
3101 if (cache->source != NULL && cache->temporary) {
3102 if (cache->CountWritableAreas(area) == 0) {
3103 // Since this cache now lives from the pages in its source cache,
3104 // we can change the cache's commitment to take only those pages
3105 // into account that really are in this cache.
3106
3107 status = cache->Commit(cache->page_count * B_PAGE_SIZE,
3108 team == VMAddressSpace::KernelID()
3109 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3110
3111 // TODO: we may be able to join with our source cache, if
3112 // count == 0
3113 }
3114 }
3115
3116 // If only the writability changes, we can just remap the pages of the
3117 // top cache, since the pages of lower caches are mapped read-only
3118 // anyway. That's advantageous only, if the number of pages in the cache
3119 // is significantly smaller than the number of pages in the area,
3120 // though.
3121 if (newProtection
3122 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
3123 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
3124 changeTopCachePagesOnly = true;
3125 }
3126 } else if (!isWritable && becomesWritable) {
3127 // !writable -> writable
3128
3129 if (!cache->consumers.IsEmpty()) {
3130 // There are consumers -- we have to insert a new cache. Fortunately
3131 // vm_copy_on_write_area() does everything that's needed.
3132 changePageProtection = false;
3133 status = vm_copy_on_write_area(cache, NULL);
3134 } else {
3135 // No consumers, so we don't need to insert a new one.
3136 if (cache->source != NULL && cache->temporary) {
3137 // the cache's commitment must contain all possible pages
3138 status = cache->Commit(cache->virtual_end - cache->virtual_base,
3139 team == VMAddressSpace::KernelID()
3140 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
3141 }
3142
3143 if (status == B_OK && cache->source != NULL) {
3144 // There's a source cache, hence we can't just change all pages'
3145 // protection or we might allow writing into pages belonging to
3146 // a lower cache.
3147 changeTopCachePagesOnly = true;
3148 }
3149 }
3150 } else {
3151 // we don't have anything special to do in all other cases
3152 }
3153
3154 if (status == B_OK) {
3155 // remap existing pages in this cache
3156 if (changePageProtection) {
3157 VMTranslationMap* map = area->address_space->TranslationMap();
3158 map->Lock();
3159
3160 if (changeTopCachePagesOnly) {
3161 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
3162 page_num_t lastPageOffset
3163 = firstPageOffset + area->Size() / B_PAGE_SIZE;
3164 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
3165 vm_page* page = it.Next();) {
3166 if (page->cache_offset >= firstPageOffset
3167 && page->cache_offset <= lastPageOffset) {
3168 addr_t address = virtual_page_address(area, page);
3169 map->ProtectPage(area, address, newProtection);
3170 }
3171 }
3172 } else
3173 map->ProtectArea(area, newProtection);
3174
3175 map->Unlock();
3176 }
3177
3178 area->protection = newProtection;
3179 }
3180
3181 return status;
3182 }
3183
3184
3185 status_t
vm_get_page_mapping(team_id team,addr_t vaddr,phys_addr_t * paddr)3186 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
3187 {
3188 VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
3189 if (addressSpace == NULL)
3190 return B_BAD_TEAM_ID;
3191
3192 VMTranslationMap* map = addressSpace->TranslationMap();
3193
3194 map->Lock();
3195 uint32 dummyFlags;
3196 status_t status = map->Query(vaddr, paddr, &dummyFlags);
3197 map->Unlock();
3198
3199 addressSpace->Put();
3200 return status;
3201 }
3202
3203
3204 /*! The page's cache must be locked.
3205 */
3206 bool
vm_test_map_modification(vm_page * page)3207 vm_test_map_modification(vm_page* page)
3208 {
3209 if (page->modified)
3210 return true;
3211
3212 vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3213 vm_page_mapping* mapping;
3214 while ((mapping = iterator.Next()) != NULL) {
3215 VMArea* area = mapping->area;
3216 VMTranslationMap* map = area->address_space->TranslationMap();
3217
3218 phys_addr_t physicalAddress;
3219 uint32 flags;
3220 map->Lock();
3221 map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
3222 map->Unlock();
3223
3224 if ((flags & PAGE_MODIFIED) != 0)
3225 return true;
3226 }
3227
3228 return false;
3229 }
3230
3231
3232 /*! The page's cache must be locked.
3233 */
3234 void
vm_clear_map_flags(vm_page * page,uint32 flags)3235 vm_clear_map_flags(vm_page* page, uint32 flags)
3236 {
3237 if ((flags & PAGE_ACCESSED) != 0)
3238 page->accessed = false;
3239 if ((flags & PAGE_MODIFIED) != 0)
3240 page->modified = false;
3241
3242 vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3243 vm_page_mapping* mapping;
3244 while ((mapping = iterator.Next()) != NULL) {
3245 VMArea* area = mapping->area;
3246 VMTranslationMap* map = area->address_space->TranslationMap();
3247
3248 map->Lock();
3249 map->ClearFlags(virtual_page_address(area, page), flags);
3250 map->Unlock();
3251 }
3252 }
3253
3254
3255 /*! Removes all mappings from a page.
3256 After you've called this function, the page is unmapped from memory and
3257 the page's \c accessed and \c modified flags have been updated according
3258 to the state of the mappings.
3259 The page's cache must be locked.
3260 */
3261 void
vm_remove_all_page_mappings(vm_page * page)3262 vm_remove_all_page_mappings(vm_page* page)
3263 {
3264 while (vm_page_mapping* mapping = page->mappings.Head()) {
3265 VMArea* area = mapping->area;
3266 VMTranslationMap* map = area->address_space->TranslationMap();
3267 addr_t address = virtual_page_address(area, page);
3268 map->UnmapPage(area, address, false);
3269 }
3270 }
3271
3272
3273 int32
vm_clear_page_mapping_accessed_flags(struct vm_page * page)3274 vm_clear_page_mapping_accessed_flags(struct vm_page *page)
3275 {
3276 int32 count = 0;
3277
3278 vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
3279 vm_page_mapping* mapping;
3280 while ((mapping = iterator.Next()) != NULL) {
3281 VMArea* area = mapping->area;
3282 VMTranslationMap* map = area->address_space->TranslationMap();
3283
3284 bool modified;
3285 if (map->ClearAccessedAndModified(area,
3286 virtual_page_address(area, page), false, modified)) {
3287 count++;
3288 }
3289
3290 page->modified |= modified;
3291 }
3292
3293
3294 if (page->accessed) {
3295 count++;
3296 page->accessed = false;
3297 }
3298
3299 return count;
3300 }
3301
3302
3303 /*! Removes all mappings of a page and/or clears the accessed bits of the
3304 mappings.
3305 The function iterates through the page mappings and removes them until
3306 encountering one that has been accessed. From then on it will continue to
3307 iterate, but only clear the accessed flag of the mapping. The page's
3308 \c modified bit will be updated accordingly, the \c accessed bit will be
3309 cleared.
3310 \return The number of mapping accessed bits encountered, including the
3311 \c accessed bit of the page itself. If \c 0 is returned, all mappings
3312 of the page have been removed.
3313 */
3314 int32
vm_remove_all_page_mappings_if_unaccessed(struct vm_page * page)3315 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
3316 {
3317 ASSERT(page->WiredCount() == 0);
3318
3319 if (page->accessed)
3320 return vm_clear_page_mapping_accessed_flags(page);
3321
3322 while (vm_page_mapping* mapping = page->mappings.Head()) {
3323 VMArea* area = mapping->area;
3324 VMTranslationMap* map = area->address_space->TranslationMap();
3325 addr_t address = virtual_page_address(area, page);
3326 bool modified = false;
3327 if (map->ClearAccessedAndModified(area, address, true, modified)) {
3328 page->accessed = true;
3329 page->modified |= modified;
3330 return vm_clear_page_mapping_accessed_flags(page);
3331 }
3332 page->modified |= modified;
3333 }
3334
3335 return 0;
3336 }
3337
3338
3339 /*! Deletes all areas and reserved regions in the given address space.
3340
3341 The caller must ensure that none of the areas has any wired ranges.
3342
3343 \param addressSpace The address space.
3344 \param deletingAddressSpace \c true, if the address space is in the process
3345 of being deleted.
3346 */
3347 void
vm_delete_areas(struct VMAddressSpace * addressSpace,bool deletingAddressSpace)3348 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3349 {
3350 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3351 addressSpace->ID()));
3352
3353 addressSpace->WriteLock();
3354
3355 // remove all reserved areas in this address space
3356 addressSpace->UnreserveAllAddressRanges(0);
3357
3358 // remove all areas from the areas map at once (to avoid lock contention)
3359 VMAreas::WriteLock();
3360 {
3361 VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
3362 while (VMArea* area = it.Next())
3363 VMAreas::Remove(area);
3364 }
3365 VMAreas::WriteUnlock();
3366
3367 // delete all the areas in this address space
3368 while (VMArea* area = addressSpace->FirstArea()) {
3369 ASSERT(!area->IsWired());
3370 delete_area(addressSpace, area, deletingAddressSpace, true);
3371 }
3372
3373 addressSpace->WriteUnlock();
3374 }
3375
3376
3377 static area_id
vm_area_for(addr_t address,bool kernel)3378 vm_area_for(addr_t address, bool kernel)
3379 {
3380 team_id team;
3381 if (IS_USER_ADDRESS(address)) {
3382 // we try the user team address space, if any
3383 team = VMAddressSpace::CurrentID();
3384 if (team < 0)
3385 return team;
3386 } else
3387 team = VMAddressSpace::KernelID();
3388
3389 AddressSpaceReadLocker locker(team);
3390 if (!locker.IsLocked())
3391 return B_BAD_TEAM_ID;
3392
3393 VMArea* area = locker.AddressSpace()->LookupArea(address);
3394 if (area != NULL) {
3395 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0
3396 && (area->protection & B_KERNEL_AREA) != 0)
3397 return B_ERROR;
3398
3399 return area->id;
3400 }
3401
3402 return B_ERROR;
3403 }
3404
3405
3406 /*! Frees physical pages that were used during the boot process.
3407 \a end is inclusive.
3408 */
3409 static void
unmap_and_free_physical_pages(VMTranslationMap * map,addr_t start,addr_t end)3410 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3411 {
3412 // free all physical pages in the specified range
3413
3414 for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3415 phys_addr_t physicalAddress;
3416 uint32 flags;
3417
3418 if (map->Query(current, &physicalAddress, &flags) == B_OK
3419 && (flags & PAGE_PRESENT) != 0) {
3420 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3421 if (page != NULL && page->State() != PAGE_STATE_FREE
3422 && page->State() != PAGE_STATE_CLEAR
3423 && page->State() != PAGE_STATE_UNUSED) {
3424 DEBUG_PAGE_ACCESS_START(page);
3425 vm_page_set_state(page, PAGE_STATE_FREE);
3426 }
3427 }
3428 }
3429
3430 // unmap the memory
3431 map->Unmap(start, end);
3432 }
3433
3434
3435 void
vm_free_unused_boot_loader_range(addr_t start,addr_t size)3436 vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3437 {
3438 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3439 addr_t end = start + (size - 1);
3440 addr_t lastEnd = start;
3441
3442 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3443 (void*)start, (void*)end));
3444
3445 // The areas are sorted in virtual address space order, so
3446 // we just have to find the holes between them that fall
3447 // into the area we should dispose
3448
3449 map->Lock();
3450
3451 for (VMAddressSpace::AreaIterator it
3452 = VMAddressSpace::Kernel()->GetAreaIterator();
3453 VMArea* area = it.Next();) {
3454 addr_t areaStart = area->Base();
3455 addr_t areaEnd = areaStart + (area->Size() - 1);
3456
3457 if (areaEnd < start)
3458 continue;
3459
3460 if (areaStart > end) {
3461 // we are done, the area is already beyond of what we have to free
3462 break;
3463 }
3464
3465 if (areaStart > lastEnd) {
3466 // this is something we can free
3467 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3468 (void*)areaStart));
3469 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3470 }
3471
3472 if (areaEnd >= end) {
3473 lastEnd = areaEnd;
3474 // no +1 to prevent potential overflow
3475 break;
3476 }
3477
3478 lastEnd = areaEnd + 1;
3479 }
3480
3481 if (lastEnd < end) {
3482 // we can also get rid of some space at the end of the area
3483 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3484 (void*)end));
3485 unmap_and_free_physical_pages(map, lastEnd, end);
3486 }
3487
3488 map->Unlock();
3489 }
3490
3491
3492 static void
create_preloaded_image_areas(struct preloaded_image * _image)3493 create_preloaded_image_areas(struct preloaded_image* _image)
3494 {
3495 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3496 char name[B_OS_NAME_LENGTH];
3497 void* address;
3498 int32 length;
3499
3500 // use file name to create a good area name
3501 char* fileName = strrchr(image->name, '/');
3502 if (fileName == NULL)
3503 fileName = image->name;
3504 else
3505 fileName++;
3506
3507 length = strlen(fileName);
3508 // make sure there is enough space for the suffix
3509 if (length > 25)
3510 length = 25;
3511
3512 memcpy(name, fileName, length);
3513 strcpy(name + length, "_text");
3514 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3515 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3516 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3517 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3518 // this will later be remapped read-only/executable by the
3519 // ELF initialization code
3520
3521 strcpy(name + length, "_data");
3522 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3523 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3524 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3525 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3526 }
3527
3528
3529 /*! Frees all previously kernel arguments areas from the kernel_args structure.
3530 Any boot loader resources contained in that arguments must not be accessed
3531 anymore past this point.
3532 */
3533 void
vm_free_kernel_args(kernel_args * args)3534 vm_free_kernel_args(kernel_args* args)
3535 {
3536 TRACE(("vm_free_kernel_args()\n"));
3537
3538 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3539 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3540 if (area >= B_OK)
3541 delete_area(area);
3542 }
3543 }
3544
3545
3546 static void
allocate_kernel_args(kernel_args * args)3547 allocate_kernel_args(kernel_args* args)
3548 {
3549 TRACE(("allocate_kernel_args()\n"));
3550
3551 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3552 const addr_range& range = args->kernel_args_range[i];
3553 void* address = (void*)(addr_t)range.start;
3554
3555 create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3556 range.size, B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3557 }
3558 }
3559
3560
3561 static void
unreserve_boot_loader_ranges(kernel_args * args)3562 unreserve_boot_loader_ranges(kernel_args* args)
3563 {
3564 TRACE(("unreserve_boot_loader_ranges()\n"));
3565
3566 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3567 const addr_range& range = args->virtual_allocated_range[i];
3568 vm_unreserve_address_range(VMAddressSpace::KernelID(),
3569 (void*)(addr_t)range.start, range.size);
3570 }
3571 }
3572
3573
3574 static void
reserve_boot_loader_ranges(kernel_args * args)3575 reserve_boot_loader_ranges(kernel_args* args)
3576 {
3577 TRACE(("reserve_boot_loader_ranges()\n"));
3578
3579 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3580 const addr_range& range = args->virtual_allocated_range[i];
3581 void* address = (void*)(addr_t)range.start;
3582
3583 // If the address is no kernel address, we just skip it. The
3584 // architecture specific code has to deal with it.
3585 if (!IS_KERNEL_ADDRESS(address)) {
3586 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3587 B_PRIu64 "\n", address, range.size);
3588 continue;
3589 }
3590
3591 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3592 &address, B_EXACT_ADDRESS, range.size, 0);
3593 if (status < B_OK)
3594 panic("could not reserve boot loader ranges\n");
3595 }
3596 }
3597
3598
3599 static addr_t
allocate_early_virtual(kernel_args * args,size_t size,addr_t alignment)3600 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3601 {
3602 size = PAGE_ALIGN(size);
3603 if (alignment <= B_PAGE_SIZE) {
3604 // All allocations are naturally page-aligned.
3605 alignment = 0;
3606 } else {
3607 ASSERT((alignment % B_PAGE_SIZE) == 0);
3608 }
3609
3610 // Find a slot in the virtual allocation ranges.
3611 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3612 // Check if the space between this one and the previous is big enough.
3613 const addr_range& range = args->virtual_allocated_range[i];
3614 addr_range& previousRange = args->virtual_allocated_range[i - 1];
3615 const addr_t previousRangeEnd = previousRange.start + previousRange.size;
3616
3617 addr_t base = alignment > 0
3618 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3619
3620 if (base >= KERNEL_BASE && base < range.start && (range.start - base) >= size) {
3621 previousRange.size += base + size - previousRangeEnd;
3622 return base;
3623 }
3624 }
3625
3626 // We didn't find one between allocation ranges. This is OK.
3627 // See if there's a gap after the last one.
3628 addr_range& lastRange
3629 = args->virtual_allocated_range[args->num_virtual_allocated_ranges - 1];
3630 const addr_t lastRangeEnd = lastRange.start + lastRange.size;
3631 addr_t base = alignment > 0
3632 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3633 if ((KERNEL_BASE + (KERNEL_SIZE - 1) - base) >= size) {
3634 lastRange.size += base + size - lastRangeEnd;
3635 return base;
3636 }
3637
3638 // See if there's a gap before the first one.
3639 addr_range& firstRange = args->virtual_allocated_range[0];
3640 if (firstRange.start > KERNEL_BASE && (firstRange.start - KERNEL_BASE) >= size) {
3641 base = firstRange.start - size;
3642 if (alignment > 0)
3643 base = ROUNDDOWN(base, alignment);
3644
3645 if (base >= KERNEL_BASE) {
3646 firstRange.size += firstRange.start - base;
3647 firstRange.start = base;
3648 return base;
3649 }
3650 }
3651
3652 return 0;
3653 }
3654
3655
3656 static bool
is_page_in_physical_memory_range(kernel_args * args,phys_addr_t address)3657 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3658 {
3659 // TODO: horrible brute-force method of determining if the page can be
3660 // allocated
3661 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3662 const addr_range& range = args->physical_memory_range[i];
3663 if (address >= range.start && address < (range.start + range.size))
3664 return true;
3665 }
3666 return false;
3667 }
3668
3669
3670 page_num_t
vm_allocate_early_physical_page(kernel_args * args)3671 vm_allocate_early_physical_page(kernel_args* args)
3672 {
3673 return vm_allocate_early_physical_page_etc(args);
3674 }
3675
3676
3677 page_num_t
vm_allocate_early_physical_page_etc(kernel_args * args,phys_addr_t maxAddress)3678 vm_allocate_early_physical_page_etc(kernel_args* args, phys_addr_t maxAddress)
3679 {
3680 if (args->num_physical_allocated_ranges == 0) {
3681 panic("early physical page allocations no longer possible!");
3682 return 0;
3683 }
3684 if (maxAddress == 0)
3685 maxAddress = __HAIKU_PHYS_ADDR_MAX;
3686
3687 #if defined(B_HAIKU_PHYSICAL_64_BIT)
3688 // Check if the last physical range is above the 32-bit maximum.
3689 const addr_range& lastMemoryRange =
3690 args->physical_memory_range[args->num_physical_memory_ranges - 1];
3691 const uint64 post32bitAddr = 0x100000000LL;
3692 if ((lastMemoryRange.start + lastMemoryRange.size) > post32bitAddr
3693 && args->num_physical_allocated_ranges < MAX_PHYSICAL_ALLOCATED_RANGE) {
3694 // To avoid consuming physical memory in the 32-bit range (which drivers may need),
3695 // ensure the last allocated range at least ends past the 32-bit boundary.
3696 const addr_range& lastAllocatedRange =
3697 args->physical_allocated_range[args->num_physical_allocated_ranges - 1];
3698 const phys_addr_t lastAllocatedPage = lastAllocatedRange.start + lastAllocatedRange.size;
3699 if (lastAllocatedPage < post32bitAddr) {
3700 // Create ranges until we have one at least starting at the first point past 4GB.
3701 // (Some of the logic here is similar to the new-range code at the end of the method.)
3702 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3703 addr_range& memoryRange = args->physical_memory_range[i];
3704 if ((memoryRange.start + memoryRange.size) < lastAllocatedPage)
3705 continue;
3706 if (memoryRange.size < (B_PAGE_SIZE * 128))
3707 continue;
3708
3709 uint64 rangeStart = memoryRange.start;
3710 if ((memoryRange.start + memoryRange.size) <= post32bitAddr) {
3711 if (memoryRange.start < lastAllocatedPage)
3712 continue;
3713
3714 // Range has no pages allocated and ends before the 32-bit boundary.
3715 } else {
3716 // Range ends past the 32-bit boundary. It could have some pages allocated,
3717 // but if we're here, we know that nothing is allocated above the boundary,
3718 // so we want to create a new range with it regardless.
3719 if (rangeStart < post32bitAddr)
3720 rangeStart = post32bitAddr;
3721 }
3722
3723 addr_range& allocatedRange =
3724 args->physical_allocated_range[args->num_physical_allocated_ranges++];
3725 allocatedRange.start = rangeStart;
3726 allocatedRange.size = 0;
3727
3728 if (rangeStart >= post32bitAddr)
3729 break;
3730 if (args->num_physical_allocated_ranges == MAX_PHYSICAL_ALLOCATED_RANGE)
3731 break;
3732 }
3733 }
3734 }
3735 #endif
3736
3737 // Try expanding the existing physical ranges upwards.
3738 for (int32 i = args->num_physical_allocated_ranges - 1; i >= 0; i--) {
3739 addr_range& range = args->physical_allocated_range[i];
3740 phys_addr_t nextPage = range.start + range.size;
3741
3742 // check constraints
3743 if (nextPage > maxAddress)
3744 continue;
3745
3746 // make sure the page does not collide with the next allocated range
3747 if ((i + 1) < (int32)args->num_physical_allocated_ranges) {
3748 addr_range& nextRange = args->physical_allocated_range[i + 1];
3749 if (nextRange.size != 0 && nextPage >= nextRange.start)
3750 continue;
3751 }
3752 // see if the next page fits in the memory block
3753 if (is_page_in_physical_memory_range(args, nextPage)) {
3754 // we got one!
3755 range.size += B_PAGE_SIZE;
3756 return nextPage / B_PAGE_SIZE;
3757 }
3758 }
3759
3760 // Expanding upwards didn't work, try going downwards.
3761 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3762 addr_range& range = args->physical_allocated_range[i];
3763 phys_addr_t nextPage = range.start - B_PAGE_SIZE;
3764
3765 // check constraints
3766 if (nextPage > maxAddress)
3767 continue;
3768
3769 // make sure the page does not collide with the previous allocated range
3770 if (i > 0) {
3771 addr_range& previousRange = args->physical_allocated_range[i - 1];
3772 if (previousRange.size != 0 && nextPage < (previousRange.start + previousRange.size))
3773 continue;
3774 }
3775 // see if the next physical page fits in the memory block
3776 if (is_page_in_physical_memory_range(args, nextPage)) {
3777 // we got one!
3778 range.start -= B_PAGE_SIZE;
3779 range.size += B_PAGE_SIZE;
3780 return nextPage / B_PAGE_SIZE;
3781 }
3782 }
3783
3784 // Try starting a new range.
3785 if (args->num_physical_allocated_ranges < MAX_PHYSICAL_ALLOCATED_RANGE) {
3786 const addr_range& lastAllocatedRange =
3787 args->physical_allocated_range[args->num_physical_allocated_ranges - 1];
3788 const phys_addr_t lastAllocatedPage = lastAllocatedRange.start + lastAllocatedRange.size;
3789
3790 phys_addr_t nextPage = 0;
3791 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3792 const addr_range& range = args->physical_memory_range[i];
3793 // Ignore everything before the last-allocated page, as well as small ranges.
3794 if (range.start < lastAllocatedPage || range.size < (B_PAGE_SIZE * 128))
3795 continue;
3796 if (range.start > maxAddress)
3797 break;
3798
3799 nextPage = range.start;
3800 break;
3801 }
3802
3803 if (nextPage != 0) {
3804 // we got one!
3805 addr_range& range =
3806 args->physical_allocated_range[args->num_physical_allocated_ranges++];
3807 range.start = nextPage;
3808 range.size = B_PAGE_SIZE;
3809 return nextPage / B_PAGE_SIZE;
3810 }
3811 }
3812
3813 return 0;
3814 // could not allocate a block
3815 }
3816
3817
3818 /*! This one uses the kernel_args' physical and virtual memory ranges to
3819 allocate some pages before the VM is completely up.
3820 */
3821 addr_t
vm_allocate_early(kernel_args * args,size_t virtualSize,size_t physicalSize,uint32 attributes,addr_t alignment)3822 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3823 uint32 attributes, addr_t alignment)
3824 {
3825 if (physicalSize > virtualSize)
3826 physicalSize = virtualSize;
3827
3828 // find the vaddr to allocate at
3829 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3830 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3831 if (virtualBase == 0) {
3832 panic("vm_allocate_early: could not allocate virtual address\n");
3833 return 0;
3834 }
3835
3836 // map the pages
3837 for (uint32 i = 0; i < HOWMANY(physicalSize, B_PAGE_SIZE); i++) {
3838 page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3839 if (physicalAddress == 0)
3840 panic("error allocating early page!\n");
3841
3842 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3843
3844 status_t status = arch_vm_translation_map_early_map(args,
3845 virtualBase + i * B_PAGE_SIZE,
3846 physicalAddress * B_PAGE_SIZE, attributes,
3847 &vm_allocate_early_physical_page);
3848 if (status != B_OK)
3849 panic("error mapping early page!");
3850 }
3851
3852 return virtualBase;
3853 }
3854
3855
3856 /*! The main entrance point to initialize the VM. */
3857 status_t
vm_init(kernel_args * args)3858 vm_init(kernel_args* args)
3859 {
3860 struct preloaded_image* image;
3861 void* address;
3862 status_t err = 0;
3863 uint32 i;
3864
3865 TRACE(("vm_init: entry\n"));
3866 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3867 err = arch_vm_init(args);
3868
3869 // initialize some globals
3870 vm_page_init_num_pages(args);
3871 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3872
3873 slab_init(args);
3874
3875 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3876 off_t heapSize = INITIAL_HEAP_SIZE;
3877 // try to accomodate low memory systems
3878 while (heapSize > sAvailableMemory / 8)
3879 heapSize /= 2;
3880 if (heapSize < 1024 * 1024)
3881 panic("vm_init: go buy some RAM please.");
3882
3883 // map in the new heap and initialize it
3884 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3885 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3886 TRACE(("heap at 0x%lx\n", heapBase));
3887 heap_init(heapBase, heapSize);
3888 #endif
3889
3890 // initialize the free page list and physical page mapper
3891 vm_page_init(args);
3892
3893 // initialize the cache allocators
3894 vm_cache_init(args);
3895
3896 {
3897 status_t error = VMAreas::Init();
3898 if (error != B_OK)
3899 panic("vm_init: error initializing areas map\n");
3900 }
3901
3902 VMAddressSpace::Init();
3903 reserve_boot_loader_ranges(args);
3904
3905 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3906 heap_init_post_area();
3907 #endif
3908
3909 // Do any further initialization that the architecture dependant layers may
3910 // need now
3911 arch_vm_translation_map_init_post_area(args);
3912 arch_vm_init_post_area(args);
3913 vm_page_init_post_area(args);
3914 slab_init_post_area();
3915
3916 // allocate areas to represent stuff that already exists
3917
3918 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3919 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
3920 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
3921 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3922 #endif
3923
3924 allocate_kernel_args(args);
3925
3926 create_preloaded_image_areas(args->kernel_image);
3927
3928 // allocate areas for preloaded images
3929 for (image = args->preloaded_images; image != NULL; image = image->next)
3930 create_preloaded_image_areas(image);
3931
3932 // allocate kernel stacks
3933 for (i = 0; i < args->num_cpus; i++) {
3934 char name[64];
3935
3936 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
3937 address = (void*)args->cpu_kstack[i].start;
3938 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
3939 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3940 }
3941
3942 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
3943 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
3944
3945 #if PARANOID_KERNEL_MALLOC
3946 vm_block_address_range("uninitialized heap memory",
3947 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3948 #endif
3949 #if PARANOID_KERNEL_FREE
3950 vm_block_address_range("freed heap memory",
3951 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
3952 #endif
3953
3954 create_page_mappings_object_caches();
3955
3956 vm_debug_init();
3957
3958 TRACE(("vm_init: exit\n"));
3959
3960 vm_cache_init_post_heap();
3961
3962 return err;
3963 }
3964
3965
3966 status_t
vm_init_post_sem(kernel_args * args)3967 vm_init_post_sem(kernel_args* args)
3968 {
3969 // This frees all unused boot loader resources and makes its space available
3970 // again
3971 arch_vm_init_end(args);
3972 unreserve_boot_loader_ranges(args);
3973
3974 // fill in all of the semaphores that were not allocated before
3975 // since we're still single threaded and only the kernel address space
3976 // exists, it isn't that hard to find all of the ones we need to create
3977
3978 arch_vm_translation_map_init_post_sem(args);
3979
3980 slab_init_post_sem();
3981
3982 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3983 heap_init_post_sem();
3984 #endif
3985
3986 return B_OK;
3987 }
3988
3989
3990 status_t
vm_init_post_thread(kernel_args * args)3991 vm_init_post_thread(kernel_args* args)
3992 {
3993 vm_page_init_post_thread(args);
3994 slab_init_post_thread();
3995 return heap_init_post_thread();
3996 }
3997
3998
3999 status_t
vm_init_post_modules(kernel_args * args)4000 vm_init_post_modules(kernel_args* args)
4001 {
4002 return arch_vm_init_post_modules(args);
4003 }
4004
4005
4006 void
permit_page_faults(void)4007 permit_page_faults(void)
4008 {
4009 Thread* thread = thread_get_current_thread();
4010 if (thread != NULL)
4011 atomic_add(&thread->page_faults_allowed, 1);
4012 }
4013
4014
4015 void
forbid_page_faults(void)4016 forbid_page_faults(void)
4017 {
4018 Thread* thread = thread_get_current_thread();
4019 if (thread != NULL)
4020 atomic_add(&thread->page_faults_allowed, -1);
4021 }
4022
4023
4024 status_t
vm_page_fault(addr_t address,addr_t faultAddress,bool isWrite,bool isExecute,bool isUser,addr_t * newIP)4025 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4026 bool isUser, addr_t* newIP)
4027 {
4028 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4029 faultAddress));
4030
4031 TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4032
4033 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4034 VMAddressSpace* addressSpace = NULL;
4035
4036 status_t status = B_OK;
4037 *newIP = 0;
4038 atomic_add((int32*)&sPageFaults, 1);
4039
4040 if (IS_KERNEL_ADDRESS(pageAddress)) {
4041 addressSpace = VMAddressSpace::GetKernel();
4042 } else if (IS_USER_ADDRESS(pageAddress)) {
4043 addressSpace = VMAddressSpace::GetCurrent();
4044 if (addressSpace == NULL) {
4045 if (!isUser) {
4046 dprintf("vm_page_fault: kernel thread accessing invalid user "
4047 "memory!\n");
4048 status = B_BAD_ADDRESS;
4049 TPF(PageFaultError(-1,
4050 VMPageFaultTracing
4051 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4052 } else {
4053 // XXX weird state.
4054 panic("vm_page_fault: non kernel thread accessing user memory "
4055 "that doesn't exist!\n");
4056 status = B_BAD_ADDRESS;
4057 }
4058 }
4059 } else {
4060 // the hit was probably in the 64k DMZ between kernel and user space
4061 // this keeps a user space thread from passing a buffer that crosses
4062 // into kernel space
4063 status = B_BAD_ADDRESS;
4064 TPF(PageFaultError(-1,
4065 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4066 }
4067
4068 if (status == B_OK) {
4069 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4070 isUser, NULL);
4071 }
4072
4073 if (status < B_OK) {
4074 if (!isUser) {
4075 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4076 "0x%lx, ip 0x%lx, write %d, kernel, exec %d, thread 0x%" B_PRIx32 "\n",
4077 strerror(status), address, faultAddress, isWrite, isExecute,
4078 thread_get_current_thread_id());
4079
4080 Thread* thread = thread_get_current_thread();
4081 if (thread != NULL && thread->fault_handler != 0) {
4082 // this will cause the arch dependant page fault handler to
4083 // modify the IP on the interrupt frame or whatever to return
4084 // to this address
4085 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4086 } else {
4087 // unhandled page fault in the kernel
4088 panic("vm_page_fault: unhandled page fault in kernel space at "
4089 "0x%lx, ip 0x%lx\n", address, faultAddress);
4090 }
4091 } else {
4092 Thread* thread = thread_get_current_thread();
4093
4094 #ifdef TRACE_FAULTS
4095 VMArea* area = NULL;
4096 if (addressSpace != NULL) {
4097 addressSpace->ReadLock();
4098 area = addressSpace->LookupArea(faultAddress);
4099 }
4100
4101 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4102 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4103 "(\"%s\" +%#lx)\n", thread->name, thread->id,
4104 thread->team->Name(), thread->team->id,
4105 isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4106 faultAddress, area ? area->name : "???", faultAddress - (area ?
4107 area->Base() : 0x0));
4108
4109 if (addressSpace != NULL)
4110 addressSpace->ReadUnlock();
4111 #endif
4112
4113 // If the thread has a signal handler for SIGSEGV, we simply
4114 // send it the signal. Otherwise we notify the user debugger
4115 // first.
4116 struct sigaction action;
4117 if ((sigaction(SIGSEGV, NULL, &action) == 0
4118 && action.sa_handler != SIG_DFL
4119 && action.sa_handler != SIG_IGN)
4120 || user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4121 SIGSEGV)) {
4122 Signal signal(SIGSEGV,
4123 status == B_PERMISSION_DENIED
4124 ? SEGV_ACCERR : SEGV_MAPERR,
4125 EFAULT, thread->team->id);
4126 signal.SetAddress((void*)address);
4127 send_signal_to_thread(thread, signal, 0);
4128 }
4129 }
4130 }
4131
4132 if (addressSpace != NULL)
4133 addressSpace->Put();
4134
4135 return B_HANDLED_INTERRUPT;
4136 }
4137
4138
4139 struct PageFaultContext {
4140 AddressSpaceReadLocker addressSpaceLocker;
4141 VMCacheChainLocker cacheChainLocker;
4142
4143 VMTranslationMap* map;
4144 VMCache* topCache;
4145 off_t cacheOffset;
4146 vm_page_reservation reservation;
4147 bool isWrite;
4148
4149 // return values
4150 vm_page* page;
4151 bool restart;
4152 bool pageAllocated;
4153
4154
PageFaultContextPageFaultContext4155 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4156 :
4157 addressSpaceLocker(addressSpace, true),
4158 map(addressSpace->TranslationMap()),
4159 isWrite(isWrite)
4160 {
4161 }
4162
~PageFaultContextPageFaultContext4163 ~PageFaultContext()
4164 {
4165 UnlockAll();
4166 vm_page_unreserve_pages(&reservation);
4167 }
4168
PreparePageFaultContext4169 void Prepare(VMCache* topCache, off_t cacheOffset)
4170 {
4171 this->topCache = topCache;
4172 this->cacheOffset = cacheOffset;
4173 page = NULL;
4174 restart = false;
4175 pageAllocated = false;
4176
4177 cacheChainLocker.SetTo(topCache);
4178 }
4179
UnlockAllPageFaultContext4180 void UnlockAll(VMCache* exceptCache = NULL)
4181 {
4182 topCache = NULL;
4183 addressSpaceLocker.Unlock();
4184 cacheChainLocker.Unlock(exceptCache);
4185 }
4186 };
4187
4188
4189 /*! Gets the page that should be mapped into the area.
4190 Returns an error code other than \c B_OK, if the page couldn't be found or
4191 paged in. The locking state of the address space and the caches is undefined
4192 in that case.
4193 Returns \c B_OK with \c context.restart set to \c true, if the functions
4194 had to unlock the address space and all caches and is supposed to be called
4195 again.
4196 Returns \c B_OK with \c context.restart set to \c false, if the page was
4197 found. It is returned in \c context.page. The address space will still be
4198 locked as well as all caches starting from the top cache to at least the
4199 cache the page lives in.
4200 */
4201 static status_t
fault_get_page(PageFaultContext & context)4202 fault_get_page(PageFaultContext& context)
4203 {
4204 VMCache* cache = context.topCache;
4205 VMCache* lastCache = NULL;
4206 vm_page* page = NULL;
4207
4208 while (cache != NULL) {
4209 // We already hold the lock of the cache at this point.
4210
4211 lastCache = cache;
4212
4213 page = cache->LookupPage(context.cacheOffset);
4214 if (page != NULL && page->busy) {
4215 // page must be busy -- wait for it to become unbusy
4216 context.UnlockAll(cache);
4217 cache->ReleaseRefLocked();
4218 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4219
4220 // restart the whole process
4221 context.restart = true;
4222 return B_OK;
4223 }
4224
4225 if (page != NULL)
4226 break;
4227
4228 // The current cache does not contain the page we're looking for.
4229
4230 // see if the backing store has it
4231 if (cache->HasPage(context.cacheOffset)) {
4232 // insert a fresh page and mark it busy -- we're going to read it in
4233 page = vm_page_allocate_page(&context.reservation,
4234 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4235 cache->InsertPage(page, context.cacheOffset);
4236
4237 // We need to unlock all caches and the address space while reading
4238 // the page in. Keep a reference to the cache around.
4239 cache->AcquireRefLocked();
4240 context.UnlockAll();
4241
4242 // read the page in
4243 generic_io_vec vec;
4244 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4245 generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4246
4247 status_t status = cache->Read(context.cacheOffset, &vec, 1,
4248 B_PHYSICAL_IO_REQUEST, &bytesRead);
4249
4250 cache->Lock();
4251
4252 if (status < B_OK) {
4253 // on error remove and free the page
4254 dprintf("reading page from cache %p returned: %s!\n",
4255 cache, strerror(status));
4256
4257 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4258 cache->RemovePage(page);
4259 vm_page_set_state(page, PAGE_STATE_FREE);
4260
4261 cache->ReleaseRefAndUnlock();
4262 return status;
4263 }
4264
4265 // mark the page unbusy again
4266 cache->MarkPageUnbusy(page);
4267
4268 DEBUG_PAGE_ACCESS_END(page);
4269
4270 // Since we needed to unlock everything temporarily, the area
4271 // situation might have changed. So we need to restart the whole
4272 // process.
4273 cache->ReleaseRefAndUnlock();
4274 context.restart = true;
4275 return B_OK;
4276 }
4277
4278 cache = context.cacheChainLocker.LockSourceCache();
4279 }
4280
4281 if (page == NULL) {
4282 // There was no adequate page, determine the cache for a clean one.
4283 // Read-only pages come in the deepest cache, only the top most cache
4284 // may have direct write access.
4285 cache = context.isWrite ? context.topCache : lastCache;
4286
4287 // allocate a clean page
4288 page = vm_page_allocate_page(&context.reservation,
4289 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4290 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4291 page->physical_page_number));
4292
4293 // insert the new page into our cache
4294 cache->InsertPage(page, context.cacheOffset);
4295 context.pageAllocated = true;
4296 } else if (page->Cache() != context.topCache && context.isWrite) {
4297 // We have a page that has the data we want, but in the wrong cache
4298 // object so we need to copy it and stick it into the top cache.
4299 vm_page* sourcePage = page;
4300
4301 // TODO: If memory is low, it might be a good idea to steal the page
4302 // from our source cache -- if possible, that is.
4303 FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4304 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4305
4306 // To not needlessly kill concurrency we unlock all caches but the top
4307 // one while copying the page. Lacking another mechanism to ensure that
4308 // the source page doesn't disappear, we mark it busy.
4309 sourcePage->busy = true;
4310 context.cacheChainLocker.UnlockKeepRefs(true);
4311
4312 // copy the page
4313 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4314 sourcePage->physical_page_number * B_PAGE_SIZE);
4315
4316 context.cacheChainLocker.RelockCaches(true);
4317 sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4318
4319 // insert the new page into our cache
4320 context.topCache->InsertPage(page, context.cacheOffset);
4321 context.pageAllocated = true;
4322 } else
4323 DEBUG_PAGE_ACCESS_START(page);
4324
4325 context.page = page;
4326 return B_OK;
4327 }
4328
4329
4330 /*! Makes sure the address in the given address space is mapped.
4331
4332 \param addressSpace The address space.
4333 \param originalAddress The address. Doesn't need to be page aligned.
4334 \param isWrite If \c true the address shall be write-accessible.
4335 \param isUser If \c true the access is requested by a userland team.
4336 \param wirePage On success, if non \c NULL, the wired count of the page
4337 mapped at the given address is incremented and the page is returned
4338 via this parameter.
4339 \return \c B_OK on success, another error code otherwise.
4340 */
4341 static status_t
vm_soft_fault(VMAddressSpace * addressSpace,addr_t originalAddress,bool isWrite,bool isExecute,bool isUser,vm_page ** wirePage)4342 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4343 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4344 {
4345 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4346 "isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4347 originalAddress, isWrite, isUser));
4348
4349 PageFaultContext context(addressSpace, isWrite);
4350
4351 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4352 status_t status = B_OK;
4353
4354 addressSpace->IncrementFaultCount();
4355
4356 // We may need up to 2 pages plus pages needed for mapping them -- reserving
4357 // the pages upfront makes sure we don't have any cache locked, so that the
4358 // page daemon/thief can do their job without problems.
4359 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4360 originalAddress);
4361 context.addressSpaceLocker.Unlock();
4362 vm_page_reserve_pages(&context.reservation, reservePages,
4363 addressSpace == VMAddressSpace::Kernel()
4364 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4365
4366 #ifdef TRACE_FAULTS
4367 const bool logFaults = true;
4368 #else
4369 const bool logFaults = !isUser;
4370 #endif
4371 while (true) {
4372 context.addressSpaceLocker.Lock();
4373
4374 // get the area the fault was in
4375 VMArea* area = addressSpace->LookupArea(address);
4376 if (area == NULL) {
4377 if (logFaults) {
4378 dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4379 "space\n", originalAddress);
4380 }
4381 TPF(PageFaultError(-1,
4382 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4383 status = B_BAD_ADDRESS;
4384 break;
4385 }
4386
4387 // check permissions
4388 uint32 protection = get_area_page_protection(area, address);
4389 if (isUser && (protection & B_USER_PROTECTION) == 0
4390 && (area->protection & B_KERNEL_AREA) != 0) {
4391 if (logFaults) {
4392 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4393 area->id, (void*)originalAddress);
4394 }
4395 TPF(PageFaultError(area->id,
4396 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4397 status = B_PERMISSION_DENIED;
4398 break;
4399 }
4400 if (isWrite && (protection
4401 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4402 if (logFaults) {
4403 dprintf("write access attempted on write-protected area 0x%"
4404 B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4405 }
4406 TPF(PageFaultError(area->id,
4407 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4408 status = B_PERMISSION_DENIED;
4409 break;
4410 } else if (isExecute && (protection
4411 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4412 if (logFaults) {
4413 dprintf("instruction fetch attempted on execute-protected area 0x%"
4414 B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4415 }
4416 TPF(PageFaultError(area->id,
4417 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4418 status = B_PERMISSION_DENIED;
4419 break;
4420 } else if (!isWrite && !isExecute && (protection
4421 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4422 if (logFaults) {
4423 dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4424 " at %p\n", area->id, (void*)originalAddress);
4425 }
4426 TPF(PageFaultError(area->id,
4427 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4428 status = B_PERMISSION_DENIED;
4429 break;
4430 }
4431
4432 // We have the area, it was a valid access, so let's try to resolve the
4433 // page fault now.
4434 // At first, the top most cache from the area is investigated.
4435
4436 context.Prepare(vm_area_get_locked_cache(area),
4437 address - area->Base() + area->cache_offset);
4438
4439 // See if this cache has a fault handler -- this will do all the work
4440 // for us.
4441 {
4442 // Note, since the page fault is resolved with interrupts enabled,
4443 // the fault handler could be called more than once for the same
4444 // reason -- the store must take this into account.
4445 status = context.topCache->Fault(addressSpace, context.cacheOffset);
4446 if (status != B_BAD_HANDLER)
4447 break;
4448 }
4449
4450 // The top most cache has no fault handler, so let's see if the cache or
4451 // its sources already have the page we're searching for (we're going
4452 // from top to bottom).
4453 status = fault_get_page(context);
4454 if (status != B_OK) {
4455 TPF(PageFaultError(area->id, status));
4456 break;
4457 }
4458
4459 if (context.restart)
4460 continue;
4461
4462 // All went fine, all there is left to do is to map the page into the
4463 // address space.
4464 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4465 context.page));
4466
4467 // If the page doesn't reside in the area's cache, we need to make sure
4468 // it's mapped in read-only, so that we cannot overwrite someone else's
4469 // data (copy-on-write)
4470 uint32 newProtection = protection;
4471 if (context.page->Cache() != context.topCache && !isWrite)
4472 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4473
4474 bool unmapPage = false;
4475 bool mapPage = true;
4476
4477 // check whether there's already a page mapped at the address
4478 context.map->Lock();
4479
4480 phys_addr_t physicalAddress;
4481 uint32 flags;
4482 vm_page* mappedPage = NULL;
4483 if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4484 && (flags & PAGE_PRESENT) != 0
4485 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4486 != NULL) {
4487 // Yep there's already a page. If it's ours, we can simply adjust
4488 // its protection. Otherwise we have to unmap it.
4489 if (mappedPage == context.page) {
4490 context.map->ProtectPage(area, address, newProtection);
4491 // Note: We assume that ProtectPage() is atomic (i.e.
4492 // the page isn't temporarily unmapped), otherwise we'd have
4493 // to make sure it isn't wired.
4494 mapPage = false;
4495 } else
4496 unmapPage = true;
4497 }
4498
4499 context.map->Unlock();
4500
4501 if (unmapPage) {
4502 // If the page is wired, we can't unmap it. Wait until it is unwired
4503 // again and restart. Note that the page cannot be wired for
4504 // writing, since it it isn't in the topmost cache. So we can safely
4505 // ignore ranges wired for writing (our own and other concurrent
4506 // wiring attempts in progress) and in fact have to do that to avoid
4507 // a deadlock.
4508 VMAreaUnwiredWaiter waiter;
4509 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4510 VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4511 // unlock everything and wait
4512 if (context.pageAllocated) {
4513 // ... but since we allocated a page and inserted it into
4514 // the top cache, remove and free it first. Otherwise we'd
4515 // have a page from a lower cache mapped while an upper
4516 // cache has a page that would shadow it.
4517 context.topCache->RemovePage(context.page);
4518 vm_page_free_etc(context.topCache, context.page,
4519 &context.reservation);
4520 } else
4521 DEBUG_PAGE_ACCESS_END(context.page);
4522
4523 context.UnlockAll();
4524 waiter.waitEntry.Wait();
4525 continue;
4526 }
4527
4528 // Note: The mapped page is a page of a lower cache. We are
4529 // guaranteed to have that cached locked, our new page is a copy of
4530 // that page, and the page is not busy. The logic for that guarantee
4531 // is as follows: Since the page is mapped, it must live in the top
4532 // cache (ruled out above) or any of its lower caches, and there is
4533 // (was before the new page was inserted) no other page in any
4534 // cache between the top cache and the page's cache (otherwise that
4535 // would be mapped instead). That in turn means that our algorithm
4536 // must have found it and therefore it cannot be busy either.
4537 DEBUG_PAGE_ACCESS_START(mappedPage);
4538 unmap_page(area, address);
4539 DEBUG_PAGE_ACCESS_END(mappedPage);
4540 }
4541
4542 if (mapPage) {
4543 if (map_page(area, context.page, address, newProtection,
4544 &context.reservation) != B_OK) {
4545 // Mapping can only fail, when the page mapping object couldn't
4546 // be allocated. Save for the missing mapping everything is
4547 // fine, though. If this was a regular page fault, we'll simply
4548 // leave and probably fault again. To make sure we'll have more
4549 // luck then, we ensure that the minimum object reserve is
4550 // available.
4551 DEBUG_PAGE_ACCESS_END(context.page);
4552
4553 context.UnlockAll();
4554
4555 if (object_cache_reserve(page_mapping_object_cache_for(
4556 context.page->physical_page_number), 1, 0)
4557 != B_OK) {
4558 // Apparently the situation is serious. Let's get ourselves
4559 // killed.
4560 status = B_NO_MEMORY;
4561 } else if (wirePage != NULL) {
4562 // The caller expects us to wire the page. Since
4563 // object_cache_reserve() succeeded, we should now be able
4564 // to allocate a mapping structure. Restart.
4565 continue;
4566 }
4567
4568 break;
4569 }
4570 } else if (context.page->State() == PAGE_STATE_INACTIVE)
4571 vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4572
4573 // also wire the page, if requested
4574 if (wirePage != NULL && status == B_OK) {
4575 increment_page_wired_count(context.page);
4576 *wirePage = context.page;
4577 }
4578
4579 DEBUG_PAGE_ACCESS_END(context.page);
4580
4581 break;
4582 }
4583
4584 return status;
4585 }
4586
4587
4588 status_t
vm_get_physical_page(phys_addr_t paddr,addr_t * _vaddr,void ** _handle)4589 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4590 {
4591 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4592 }
4593
4594 status_t
vm_put_physical_page(addr_t vaddr,void * handle)4595 vm_put_physical_page(addr_t vaddr, void* handle)
4596 {
4597 return sPhysicalPageMapper->PutPage(vaddr, handle);
4598 }
4599
4600
4601 status_t
vm_get_physical_page_current_cpu(phys_addr_t paddr,addr_t * _vaddr,void ** _handle)4602 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4603 void** _handle)
4604 {
4605 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4606 }
4607
4608 status_t
vm_put_physical_page_current_cpu(addr_t vaddr,void * handle)4609 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4610 {
4611 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4612 }
4613
4614
4615 status_t
vm_get_physical_page_debug(phys_addr_t paddr,addr_t * _vaddr,void ** _handle)4616 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4617 {
4618 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4619 }
4620
4621 status_t
vm_put_physical_page_debug(addr_t vaddr,void * handle)4622 vm_put_physical_page_debug(addr_t vaddr, void* handle)
4623 {
4624 return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4625 }
4626
4627
4628 void
vm_get_info(system_info * info)4629 vm_get_info(system_info* info)
4630 {
4631 swap_get_info(info);
4632
4633 MutexLocker locker(sAvailableMemoryLock);
4634 info->needed_memory = sNeededMemory;
4635 info->free_memory = sAvailableMemory;
4636 }
4637
4638
4639 uint32
vm_num_page_faults(void)4640 vm_num_page_faults(void)
4641 {
4642 return sPageFaults;
4643 }
4644
4645
4646 off_t
vm_available_memory(void)4647 vm_available_memory(void)
4648 {
4649 MutexLocker locker(sAvailableMemoryLock);
4650 return sAvailableMemory;
4651 }
4652
4653
4654 /*! Like vm_available_memory(), but only for use in the kernel
4655 debugger.
4656 */
4657 off_t
vm_available_memory_debug(void)4658 vm_available_memory_debug(void)
4659 {
4660 return sAvailableMemory;
4661 }
4662
4663
4664 off_t
vm_available_not_needed_memory(void)4665 vm_available_not_needed_memory(void)
4666 {
4667 MutexLocker locker(sAvailableMemoryLock);
4668 return sAvailableMemory - sNeededMemory;
4669 }
4670
4671
4672 /*! Like vm_available_not_needed_memory(), but only for use in the kernel
4673 debugger.
4674 */
4675 off_t
vm_available_not_needed_memory_debug(void)4676 vm_available_not_needed_memory_debug(void)
4677 {
4678 return sAvailableMemory - sNeededMemory;
4679 }
4680
4681
4682 size_t
vm_kernel_address_space_left(void)4683 vm_kernel_address_space_left(void)
4684 {
4685 return VMAddressSpace::Kernel()->FreeSpace();
4686 }
4687
4688
4689 void
vm_unreserve_memory(size_t amount)4690 vm_unreserve_memory(size_t amount)
4691 {
4692 mutex_lock(&sAvailableMemoryLock);
4693
4694 sAvailableMemory += amount;
4695
4696 mutex_unlock(&sAvailableMemoryLock);
4697 }
4698
4699
4700 status_t
vm_try_reserve_memory(size_t amount,int priority,bigtime_t timeout)4701 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4702 {
4703 size_t reserve = kMemoryReserveForPriority[priority];
4704
4705 MutexLocker locker(sAvailableMemoryLock);
4706
4707 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4708
4709 if (sAvailableMemory >= (off_t)(amount + reserve)) {
4710 sAvailableMemory -= amount;
4711 return B_OK;
4712 }
4713
4714 if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) {
4715 // Do not wait for something that will never happen.
4716 return B_NO_MEMORY;
4717 }
4718
4719 if (timeout <= 0)
4720 return B_NO_MEMORY;
4721
4722 // turn timeout into an absolute timeout
4723 timeout += system_time();
4724
4725 // loop until we've got the memory or the timeout occurs
4726 do {
4727 sNeededMemory += amount;
4728
4729 // call the low resource manager
4730 locker.Unlock();
4731 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4732 B_ABSOLUTE_TIMEOUT, timeout);
4733 locker.Lock();
4734
4735 sNeededMemory -= amount;
4736
4737 if (sAvailableMemory >= (off_t)(amount + reserve)) {
4738 sAvailableMemory -= amount;
4739 return B_OK;
4740 }
4741 } while (timeout > system_time());
4742
4743 return B_NO_MEMORY;
4744 }
4745
4746
4747 status_t
vm_set_area_memory_type(area_id id,phys_addr_t physicalBase,uint32 type)4748 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4749 {
4750 // NOTE: The caller is responsible for synchronizing calls to this function!
4751
4752 AddressSpaceReadLocker locker;
4753 VMArea* area;
4754 status_t status = locker.SetFromArea(id, area);
4755 if (status != B_OK)
4756 return status;
4757
4758 // nothing to do, if the type doesn't change
4759 uint32 oldType = area->MemoryType();
4760 if (type == oldType)
4761 return B_OK;
4762
4763 // set the memory type of the area and the mapped pages
4764 VMTranslationMap* map = area->address_space->TranslationMap();
4765 map->Lock();
4766 area->SetMemoryType(type);
4767 map->ProtectArea(area, area->protection);
4768 map->Unlock();
4769
4770 // set the physical memory type
4771 status_t error = arch_vm_set_memory_type(area, physicalBase, type, NULL);
4772 if (error != B_OK) {
4773 // reset the memory type of the area and the mapped pages
4774 map->Lock();
4775 area->SetMemoryType(oldType);
4776 map->ProtectArea(area, area->protection);
4777 map->Unlock();
4778 return error;
4779 }
4780
4781 return B_OK;
4782
4783 }
4784
4785
4786 /*! This function enforces some protection properties:
4787 - kernel areas must be W^X (after kernel startup)
4788 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4789 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4790 */
4791 static void
fix_protection(uint32 * protection)4792 fix_protection(uint32* protection)
4793 {
4794 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4795 && ((*protection & B_KERNEL_WRITE_AREA) != 0
4796 || (*protection & B_WRITE_AREA) != 0)
4797 && !gKernelStartup)
4798 panic("kernel areas cannot be both writable and executable!");
4799
4800 if ((*protection & B_KERNEL_PROTECTION) == 0) {
4801 if ((*protection & B_WRITE_AREA) != 0)
4802 *protection |= B_KERNEL_WRITE_AREA;
4803 if ((*protection & B_READ_AREA) != 0)
4804 *protection |= B_KERNEL_READ_AREA;
4805 }
4806 }
4807
4808
4809 static void
fill_area_info(struct VMArea * area,area_info * info,size_t size)4810 fill_area_info(struct VMArea* area, area_info* info, size_t size)
4811 {
4812 strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4813 info->area = area->id;
4814 info->address = (void*)area->Base();
4815 info->size = area->Size();
4816 info->protection = area->protection;
4817 info->lock = area->wiring;
4818 info->team = area->address_space->ID();
4819 info->copy_count = 0;
4820 info->in_count = 0;
4821 info->out_count = 0;
4822 // TODO: retrieve real values here!
4823
4824 VMCache* cache = vm_area_get_locked_cache(area);
4825
4826 // Note, this is a simplification; the cache could be larger than this area
4827 info->ram_size = cache->page_count * B_PAGE_SIZE;
4828
4829 vm_area_put_locked_cache(cache);
4830 }
4831
4832
4833 static status_t
vm_resize_area(area_id areaID,size_t newSize,bool kernel)4834 vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4835 {
4836 // is newSize a multiple of B_PAGE_SIZE?
4837 if (newSize & (B_PAGE_SIZE - 1))
4838 return B_BAD_VALUE;
4839
4840 // lock all affected address spaces and the cache
4841 VMArea* area;
4842 VMCache* cache;
4843
4844 MultiAddressSpaceLocker locker;
4845 AreaCacheLocker cacheLocker;
4846
4847 status_t status;
4848 size_t oldSize;
4849 bool anyKernelArea;
4850 bool restart;
4851
4852 do {
4853 anyKernelArea = false;
4854 restart = false;
4855
4856 locker.Unset();
4857 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4858 if (status != B_OK)
4859 return status;
4860 cacheLocker.SetTo(cache, true); // already locked
4861
4862 // enforce restrictions
4863 if (!kernel && (area->address_space == VMAddressSpace::Kernel()
4864 || (area->protection & B_KERNEL_AREA) != 0)) {
4865 dprintf("vm_resize_area: team %" B_PRId32 " tried to "
4866 "resize kernel area %" B_PRId32 " (%s)\n",
4867 team_get_current_team_id(), areaID, area->name);
4868 return B_NOT_ALLOWED;
4869 }
4870 // TODO: Enforce all restrictions (team, etc.)!
4871
4872 oldSize = area->Size();
4873 if (newSize == oldSize)
4874 return B_OK;
4875
4876 if (cache->type != CACHE_TYPE_RAM)
4877 return B_NOT_ALLOWED;
4878
4879 if (oldSize < newSize) {
4880 // We need to check if all areas of this cache can be resized.
4881 for (VMArea* current = cache->areas; current != NULL;
4882 current = current->cache_next) {
4883 if (!current->address_space->CanResizeArea(current, newSize))
4884 return B_ERROR;
4885 anyKernelArea
4886 |= current->address_space == VMAddressSpace::Kernel();
4887 }
4888 } else {
4889 // We're shrinking the areas, so we must make sure the affected
4890 // ranges are not wired.
4891 for (VMArea* current = cache->areas; current != NULL;
4892 current = current->cache_next) {
4893 anyKernelArea
4894 |= current->address_space == VMAddressSpace::Kernel();
4895
4896 if (wait_if_area_range_is_wired(current,
4897 current->Base() + newSize, oldSize - newSize, &locker,
4898 &cacheLocker)) {
4899 restart = true;
4900 break;
4901 }
4902 }
4903 }
4904 } while (restart);
4905
4906 // Okay, looks good so far, so let's do it
4907
4908 int priority = kernel && anyKernelArea
4909 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
4910 uint32 allocationFlags = kernel && anyKernelArea
4911 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
4912
4913 if (oldSize < newSize) {
4914 // Growing the cache can fail, so we do it first.
4915 status = cache->Resize(cache->virtual_base + newSize, priority);
4916 if (status != B_OK)
4917 return status;
4918 }
4919
4920 for (VMArea* current = cache->areas; current != NULL;
4921 current = current->cache_next) {
4922 status = current->address_space->ResizeArea(current, newSize,
4923 allocationFlags);
4924 if (status != B_OK)
4925 break;
4926
4927 // We also need to unmap all pages beyond the new size, if the area has
4928 // shrunk
4929 if (newSize < oldSize) {
4930 VMCacheChainLocker cacheChainLocker(cache);
4931 cacheChainLocker.LockAllSourceCaches();
4932
4933 unmap_pages(current, current->Base() + newSize,
4934 oldSize - newSize);
4935
4936 cacheChainLocker.Unlock(cache);
4937 }
4938 }
4939
4940 if (status == B_OK) {
4941 // Shrink or grow individual page protections if in use.
4942 if (area->page_protections != NULL) {
4943 size_t bytes = area_page_protections_size(newSize);
4944 uint8* newProtections
4945 = (uint8*)realloc(area->page_protections, bytes);
4946 if (newProtections == NULL)
4947 status = B_NO_MEMORY;
4948 else {
4949 area->page_protections = newProtections;
4950
4951 if (oldSize < newSize) {
4952 // init the additional page protections to that of the area
4953 uint32 offset = area_page_protections_size(oldSize);
4954 uint32 areaProtection = area->protection
4955 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
4956 memset(area->page_protections + offset,
4957 areaProtection | (areaProtection << 4), bytes - offset);
4958 if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
4959 uint8& entry = area->page_protections[offset - 1];
4960 entry = (entry & 0x0f) | (areaProtection << 4);
4961 }
4962 }
4963 }
4964 }
4965 }
4966
4967 // shrinking the cache can't fail, so we do it now
4968 if (status == B_OK && newSize < oldSize)
4969 status = cache->Resize(cache->virtual_base + newSize, priority);
4970
4971 if (status != B_OK) {
4972 // Something failed -- resize the areas back to their original size.
4973 // This can fail, too, in which case we're seriously screwed.
4974 for (VMArea* current = cache->areas; current != NULL;
4975 current = current->cache_next) {
4976 if (current->address_space->ResizeArea(current, oldSize,
4977 allocationFlags) != B_OK) {
4978 panic("vm_resize_area(): Failed and not being able to restore "
4979 "original state.");
4980 }
4981 }
4982
4983 cache->Resize(cache->virtual_base + oldSize, priority);
4984 }
4985
4986 // TODO: we must honour the lock restrictions of this area
4987 return status;
4988 }
4989
4990
4991 status_t
vm_memset_physical(phys_addr_t address,int value,phys_size_t length)4992 vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
4993 {
4994 return sPhysicalPageMapper->MemsetPhysical(address, value, length);
4995 }
4996
4997
4998 status_t
vm_memcpy_from_physical(void * to,phys_addr_t from,size_t length,bool user)4999 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5000 {
5001 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5002 }
5003
5004
5005 status_t
vm_memcpy_to_physical(phys_addr_t to,const void * _from,size_t length,bool user)5006 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5007 bool user)
5008 {
5009 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5010 }
5011
5012
5013 void
vm_memcpy_physical_page(phys_addr_t to,phys_addr_t from)5014 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5015 {
5016 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5017 }
5018
5019
5020 /** Validate that a memory range is either fully in kernel space, or fully in
5021 * userspace */
5022 static inline bool
validate_memory_range(const void * addr,size_t size)5023 validate_memory_range(const void* addr, size_t size)
5024 {
5025 addr_t address = (addr_t)addr;
5026
5027 // Check for overflows on all addresses.
5028 if ((address + size) < address)
5029 return false;
5030
5031 // Validate that the address range does not cross the kernel/user boundary.
5032 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1);
5033 }
5034
5035
5036 // #pragma mark - kernel public API
5037
5038
5039 status_t
user_memcpy(void * to,const void * from,size_t size)5040 user_memcpy(void* to, const void* from, size_t size)
5041 {
5042 if (!validate_memory_range(to, size) || !validate_memory_range(from, size))
5043 return B_BAD_ADDRESS;
5044
5045 if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5046 return B_BAD_ADDRESS;
5047
5048 return B_OK;
5049 }
5050
5051
5052 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to
5053 the string in \a to, NULL-terminating the result.
5054
5055 \param to Pointer to the destination C-string.
5056 \param from Pointer to the source C-string.
5057 \param size Size in bytes of the string buffer pointed to by \a to.
5058
5059 \return strlen(\a from).
5060 */
5061 ssize_t
user_strlcpy(char * to,const char * from,size_t size)5062 user_strlcpy(char* to, const char* from, size_t size)
5063 {
5064 if (to == NULL && size != 0)
5065 return B_BAD_VALUE;
5066 if (from == NULL)
5067 return B_BAD_ADDRESS;
5068
5069 // Protect the source address from overflows.
5070 size_t maxSize = size;
5071 if ((addr_t)from + maxSize < (addr_t)from)
5072 maxSize -= (addr_t)from + maxSize;
5073 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize))
5074 maxSize = USER_TOP - (addr_t)from;
5075
5076 if (!validate_memory_range(to, maxSize))
5077 return B_BAD_ADDRESS;
5078
5079 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5080 if (result < 0)
5081 return result;
5082
5083 // If we hit the address overflow boundary, fail.
5084 if ((size_t)result >= maxSize && maxSize < size)
5085 return B_BAD_ADDRESS;
5086
5087 return result;
5088 }
5089
5090
5091 status_t
user_memset(void * s,char c,size_t count)5092 user_memset(void* s, char c, size_t count)
5093 {
5094 if (!validate_memory_range(s, count))
5095 return B_BAD_ADDRESS;
5096
5097 if (arch_cpu_user_memset(s, c, count) < B_OK)
5098 return B_BAD_ADDRESS;
5099
5100 return B_OK;
5101 }
5102
5103
5104 /*! Wires a single page at the given address.
5105
5106 \param team The team whose address space the address belongs to. Supports
5107 also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5108 parameter is ignored.
5109 \param address address The virtual address to wire down. Does not need to
5110 be page aligned.
5111 \param writable If \c true the page shall be writable.
5112 \param info On success the info is filled in, among other things
5113 containing the physical address the given virtual one translates to.
5114 \return \c B_OK, when the page could be wired, another error code otherwise.
5115 */
5116 status_t
vm_wire_page(team_id team,addr_t address,bool writable,VMPageWiringInfo * info)5117 vm_wire_page(team_id team, addr_t address, bool writable,
5118 VMPageWiringInfo* info)
5119 {
5120 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5121 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5122
5123 // compute the page protection that is required
5124 bool isUser = IS_USER_ADDRESS(address);
5125 uint32 requiredProtection = PAGE_PRESENT
5126 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5127 if (writable)
5128 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5129
5130 // get and read lock the address space
5131 VMAddressSpace* addressSpace = NULL;
5132 if (isUser) {
5133 if (team == B_CURRENT_TEAM)
5134 addressSpace = VMAddressSpace::GetCurrent();
5135 else
5136 addressSpace = VMAddressSpace::Get(team);
5137 } else
5138 addressSpace = VMAddressSpace::GetKernel();
5139 if (addressSpace == NULL)
5140 return B_ERROR;
5141
5142 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5143
5144 VMTranslationMap* map = addressSpace->TranslationMap();
5145 status_t error = B_OK;
5146
5147 // get the area
5148 VMArea* area = addressSpace->LookupArea(pageAddress);
5149 if (area == NULL) {
5150 addressSpace->Put();
5151 return B_BAD_ADDRESS;
5152 }
5153
5154 // Lock the area's top cache. This is a requirement for VMArea::Wire().
5155 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5156
5157 // mark the area range wired
5158 area->Wire(&info->range);
5159
5160 // Lock the area's cache chain and the translation map. Needed to look
5161 // up the page and play with its wired count.
5162 cacheChainLocker.LockAllSourceCaches();
5163 map->Lock();
5164
5165 phys_addr_t physicalAddress;
5166 uint32 flags;
5167 vm_page* page;
5168 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5169 && (flags & requiredProtection) == requiredProtection
5170 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5171 != NULL) {
5172 // Already mapped with the correct permissions -- just increment
5173 // the page's wired count.
5174 increment_page_wired_count(page);
5175
5176 map->Unlock();
5177 cacheChainLocker.Unlock();
5178 addressSpaceLocker.Unlock();
5179 } else {
5180 // Let vm_soft_fault() map the page for us, if possible. We need
5181 // to fully unlock to avoid deadlocks. Since we have already
5182 // wired the area itself, nothing disturbing will happen with it
5183 // in the meantime.
5184 map->Unlock();
5185 cacheChainLocker.Unlock();
5186 addressSpaceLocker.Unlock();
5187
5188 error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5189 isUser, &page);
5190
5191 if (error != B_OK) {
5192 // The page could not be mapped -- clean up.
5193 VMCache* cache = vm_area_get_locked_cache(area);
5194 area->Unwire(&info->range);
5195 cache->ReleaseRefAndUnlock();
5196 addressSpace->Put();
5197 return error;
5198 }
5199 }
5200
5201 info->physicalAddress
5202 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5203 + address % B_PAGE_SIZE;
5204 info->page = page;
5205
5206 return B_OK;
5207 }
5208
5209
5210 /*! Unwires a single page previously wired via vm_wire_page().
5211
5212 \param info The same object passed to vm_wire_page() before.
5213 */
5214 void
vm_unwire_page(VMPageWiringInfo * info)5215 vm_unwire_page(VMPageWiringInfo* info)
5216 {
5217 // lock the address space
5218 VMArea* area = info->range.area;
5219 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5220 // takes over our reference
5221
5222 // lock the top cache
5223 VMCache* cache = vm_area_get_locked_cache(area);
5224 VMCacheChainLocker cacheChainLocker(cache);
5225
5226 if (info->page->Cache() != cache) {
5227 // The page is not in the top cache, so we lock the whole cache chain
5228 // before touching the page's wired count.
5229 cacheChainLocker.LockAllSourceCaches();
5230 }
5231
5232 decrement_page_wired_count(info->page);
5233
5234 // remove the wired range from the range
5235 area->Unwire(&info->range);
5236
5237 cacheChainLocker.Unlock();
5238 }
5239
5240
5241 /*! Wires down the given address range in the specified team's address space.
5242
5243 If successful the function
5244 - acquires a reference to the specified team's address space,
5245 - adds respective wired ranges to all areas that intersect with the given
5246 address range,
5247 - makes sure all pages in the given address range are mapped with the
5248 requested access permissions and increments their wired count.
5249
5250 It fails, when \a team doesn't specify a valid address space, when any part
5251 of the specified address range is not covered by areas, when the concerned
5252 areas don't allow mapping with the requested permissions, or when mapping
5253 failed for another reason.
5254
5255 When successful the call must be balanced by a unlock_memory_etc() call with
5256 the exact same parameters.
5257
5258 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5259 supported.
5260 \param address The start of the address range to be wired.
5261 \param numBytes The size of the address range to be wired.
5262 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5263 requests that the range must be wired writable ("read from device
5264 into memory").
5265 \return \c B_OK on success, another error code otherwise.
5266 */
5267 status_t
lock_memory_etc(team_id team,void * address,size_t numBytes,uint32 flags)5268 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5269 {
5270 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5271 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5272
5273 // compute the page protection that is required
5274 bool isUser = IS_USER_ADDRESS(address);
5275 bool writable = (flags & B_READ_DEVICE) == 0;
5276 uint32 requiredProtection = PAGE_PRESENT
5277 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5278 if (writable)
5279 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5280
5281 uint32 mallocFlags = isUser
5282 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5283
5284 // get and read lock the address space
5285 VMAddressSpace* addressSpace = NULL;
5286 if (isUser) {
5287 if (team == B_CURRENT_TEAM)
5288 addressSpace = VMAddressSpace::GetCurrent();
5289 else
5290 addressSpace = VMAddressSpace::Get(team);
5291 } else
5292 addressSpace = VMAddressSpace::GetKernel();
5293 if (addressSpace == NULL)
5294 return B_ERROR;
5295
5296 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5297 // We get a new address space reference here. The one we got above will
5298 // be freed by unlock_memory_etc().
5299
5300 VMTranslationMap* map = addressSpace->TranslationMap();
5301 status_t error = B_OK;
5302
5303 // iterate through all concerned areas
5304 addr_t nextAddress = lockBaseAddress;
5305 while (nextAddress != lockEndAddress) {
5306 // get the next area
5307 VMArea* area = addressSpace->LookupArea(nextAddress);
5308 if (area == NULL) {
5309 error = B_BAD_ADDRESS;
5310 break;
5311 }
5312
5313 addr_t areaStart = nextAddress;
5314 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5315
5316 // allocate the wired range (do that before locking the cache to avoid
5317 // deadlocks)
5318 VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5319 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5320 if (range == NULL) {
5321 error = B_NO_MEMORY;
5322 break;
5323 }
5324
5325 // Lock the area's top cache. This is a requirement for VMArea::Wire().
5326 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5327
5328 // mark the area range wired
5329 area->Wire(range);
5330
5331 // Depending on the area cache type and the wiring, we may not need to
5332 // look at the individual pages.
5333 if (area->cache_type == CACHE_TYPE_NULL
5334 || area->cache_type == CACHE_TYPE_DEVICE
5335 || area->wiring == B_FULL_LOCK
5336 || area->wiring == B_CONTIGUOUS) {
5337 nextAddress = areaEnd;
5338 continue;
5339 }
5340
5341 // Lock the area's cache chain and the translation map. Needed to look
5342 // up pages and play with their wired count.
5343 cacheChainLocker.LockAllSourceCaches();
5344 map->Lock();
5345
5346 // iterate through the pages and wire them
5347 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5348 phys_addr_t physicalAddress;
5349 uint32 flags;
5350
5351 vm_page* page;
5352 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5353 && (flags & requiredProtection) == requiredProtection
5354 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5355 != NULL) {
5356 // Already mapped with the correct permissions -- just increment
5357 // the page's wired count.
5358 increment_page_wired_count(page);
5359 } else {
5360 // Let vm_soft_fault() map the page for us, if possible. We need
5361 // to fully unlock to avoid deadlocks. Since we have already
5362 // wired the area itself, nothing disturbing will happen with it
5363 // in the meantime.
5364 map->Unlock();
5365 cacheChainLocker.Unlock();
5366 addressSpaceLocker.Unlock();
5367
5368 error = vm_soft_fault(addressSpace, nextAddress, writable,
5369 false, isUser, &page);
5370
5371 addressSpaceLocker.Lock();
5372 cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5373 cacheChainLocker.LockAllSourceCaches();
5374 map->Lock();
5375 }
5376
5377 if (error != B_OK)
5378 break;
5379 }
5380
5381 map->Unlock();
5382
5383 if (error == B_OK) {
5384 cacheChainLocker.Unlock();
5385 } else {
5386 // An error occurred, so abort right here. If the current address
5387 // is the first in this area, unwire the area, since we won't get
5388 // to it when reverting what we've done so far.
5389 if (nextAddress == areaStart) {
5390 area->Unwire(range);
5391 cacheChainLocker.Unlock();
5392 range->~VMAreaWiredRange();
5393 free_etc(range, mallocFlags);
5394 } else
5395 cacheChainLocker.Unlock();
5396
5397 break;
5398 }
5399 }
5400
5401 if (error != B_OK) {
5402 // An error occurred, so unwire all that we've already wired. Note that
5403 // even if not a single page was wired, unlock_memory_etc() is called
5404 // to put the address space reference.
5405 addressSpaceLocker.Unlock();
5406 unlock_memory_etc(team, (void*)lockBaseAddress,
5407 nextAddress - lockBaseAddress, flags);
5408 }
5409
5410 return error;
5411 }
5412
5413
5414 status_t
lock_memory(void * address,size_t numBytes,uint32 flags)5415 lock_memory(void* address, size_t numBytes, uint32 flags)
5416 {
5417 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5418 }
5419
5420
5421 /*! Unwires an address range previously wired with lock_memory_etc().
5422
5423 Note that a call to this function must balance a previous lock_memory_etc()
5424 call with exactly the same parameters.
5425 */
5426 status_t
unlock_memory_etc(team_id team,void * address,size_t numBytes,uint32 flags)5427 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5428 {
5429 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5430 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5431
5432 // compute the page protection that is required
5433 bool isUser = IS_USER_ADDRESS(address);
5434 bool writable = (flags & B_READ_DEVICE) == 0;
5435 uint32 requiredProtection = PAGE_PRESENT
5436 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5437 if (writable)
5438 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5439
5440 uint32 mallocFlags = isUser
5441 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5442
5443 // get and read lock the address space
5444 VMAddressSpace* addressSpace = NULL;
5445 if (isUser) {
5446 if (team == B_CURRENT_TEAM)
5447 addressSpace = VMAddressSpace::GetCurrent();
5448 else
5449 addressSpace = VMAddressSpace::Get(team);
5450 } else
5451 addressSpace = VMAddressSpace::GetKernel();
5452 if (addressSpace == NULL)
5453 return B_ERROR;
5454
5455 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5456 // Take over the address space reference. We don't unlock until we're
5457 // done.
5458
5459 VMTranslationMap* map = addressSpace->TranslationMap();
5460 status_t error = B_OK;
5461
5462 // iterate through all concerned areas
5463 addr_t nextAddress = lockBaseAddress;
5464 while (nextAddress != lockEndAddress) {
5465 // get the next area
5466 VMArea* area = addressSpace->LookupArea(nextAddress);
5467 if (area == NULL) {
5468 error = B_BAD_ADDRESS;
5469 break;
5470 }
5471
5472 addr_t areaStart = nextAddress;
5473 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5474
5475 // Lock the area's top cache. This is a requirement for
5476 // VMArea::Unwire().
5477 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5478
5479 // Depending on the area cache type and the wiring, we may not need to
5480 // look at the individual pages.
5481 if (area->cache_type == CACHE_TYPE_NULL
5482 || area->cache_type == CACHE_TYPE_DEVICE
5483 || area->wiring == B_FULL_LOCK
5484 || area->wiring == B_CONTIGUOUS) {
5485 // unwire the range (to avoid deadlocks we delete the range after
5486 // unlocking the cache)
5487 nextAddress = areaEnd;
5488 VMAreaWiredRange* range = area->Unwire(areaStart,
5489 areaEnd - areaStart, writable);
5490 cacheChainLocker.Unlock();
5491 if (range != NULL) {
5492 range->~VMAreaWiredRange();
5493 free_etc(range, mallocFlags);
5494 }
5495 continue;
5496 }
5497
5498 // Lock the area's cache chain and the translation map. Needed to look
5499 // up pages and play with their wired count.
5500 cacheChainLocker.LockAllSourceCaches();
5501 map->Lock();
5502
5503 // iterate through the pages and unwire them
5504 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5505 phys_addr_t physicalAddress;
5506 uint32 flags;
5507
5508 vm_page* page;
5509 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5510 && (flags & PAGE_PRESENT) != 0
5511 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5512 != NULL) {
5513 // Already mapped with the correct permissions -- just increment
5514 // the page's wired count.
5515 decrement_page_wired_count(page);
5516 } else {
5517 panic("unlock_memory_etc(): Failed to unwire page: address "
5518 "space %p, address: %#" B_PRIxADDR, addressSpace,
5519 nextAddress);
5520 error = B_BAD_VALUE;
5521 break;
5522 }
5523 }
5524
5525 map->Unlock();
5526
5527 // All pages are unwired. Remove the area's wired range as well (to
5528 // avoid deadlocks we delete the range after unlocking the cache).
5529 VMAreaWiredRange* range = area->Unwire(areaStart,
5530 areaEnd - areaStart, writable);
5531
5532 cacheChainLocker.Unlock();
5533
5534 if (range != NULL) {
5535 range->~VMAreaWiredRange();
5536 free_etc(range, mallocFlags);
5537 }
5538
5539 if (error != B_OK)
5540 break;
5541 }
5542
5543 // get rid of the address space reference lock_memory_etc() acquired
5544 addressSpace->Put();
5545
5546 return error;
5547 }
5548
5549
5550 status_t
unlock_memory(void * address,size_t numBytes,uint32 flags)5551 unlock_memory(void* address, size_t numBytes, uint32 flags)
5552 {
5553 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5554 }
5555
5556
5557 /*! Similar to get_memory_map(), but also allows to specify the address space
5558 for the memory in question and has a saner semantics.
5559 Returns \c B_OK when the complete range could be translated or
5560 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5561 case the actual number of entries is written to \c *_numEntries. Any other
5562 error case indicates complete failure; \c *_numEntries will be set to \c 0
5563 in this case.
5564 */
5565 status_t
get_memory_map_etc(team_id team,const void * address,size_t numBytes,physical_entry * table,uint32 * _numEntries)5566 get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5567 physical_entry* table, uint32* _numEntries)
5568 {
5569 uint32 numEntries = *_numEntries;
5570 *_numEntries = 0;
5571
5572 VMAddressSpace* addressSpace;
5573 addr_t virtualAddress = (addr_t)address;
5574 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5575 phys_addr_t physicalAddress;
5576 status_t status = B_OK;
5577 int32 index = -1;
5578 addr_t offset = 0;
5579 bool interrupts = are_interrupts_enabled();
5580
5581 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5582 "entries)\n", team, address, numBytes, numEntries));
5583
5584 if (numEntries == 0 || numBytes == 0)
5585 return B_BAD_VALUE;
5586
5587 // in which address space is the address to be found?
5588 if (IS_USER_ADDRESS(virtualAddress)) {
5589 if (team == B_CURRENT_TEAM)
5590 addressSpace = VMAddressSpace::GetCurrent();
5591 else
5592 addressSpace = VMAddressSpace::Get(team);
5593 } else
5594 addressSpace = VMAddressSpace::GetKernel();
5595
5596 if (addressSpace == NULL)
5597 return B_ERROR;
5598
5599 VMTranslationMap* map = addressSpace->TranslationMap();
5600
5601 if (interrupts)
5602 map->Lock();
5603
5604 while (offset < numBytes) {
5605 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5606 uint32 flags;
5607
5608 if (interrupts) {
5609 status = map->Query((addr_t)address + offset, &physicalAddress,
5610 &flags);
5611 } else {
5612 status = map->QueryInterrupt((addr_t)address + offset,
5613 &physicalAddress, &flags);
5614 }
5615 if (status < B_OK)
5616 break;
5617 if ((flags & PAGE_PRESENT) == 0) {
5618 panic("get_memory_map() called on unmapped memory!");
5619 return B_BAD_ADDRESS;
5620 }
5621
5622 if (index < 0 && pageOffset > 0) {
5623 physicalAddress += pageOffset;
5624 if (bytes > B_PAGE_SIZE - pageOffset)
5625 bytes = B_PAGE_SIZE - pageOffset;
5626 }
5627
5628 // need to switch to the next physical_entry?
5629 if (index < 0 || table[index].address
5630 != physicalAddress - table[index].size) {
5631 if ((uint32)++index + 1 > numEntries) {
5632 // table to small
5633 break;
5634 }
5635 table[index].address = physicalAddress;
5636 table[index].size = bytes;
5637 } else {
5638 // page does fit in current entry
5639 table[index].size += bytes;
5640 }
5641
5642 offset += bytes;
5643 }
5644
5645 if (interrupts)
5646 map->Unlock();
5647
5648 if (status != B_OK)
5649 return status;
5650
5651 if ((uint32)index + 1 > numEntries) {
5652 *_numEntries = index;
5653 return B_BUFFER_OVERFLOW;
5654 }
5655
5656 *_numEntries = index + 1;
5657 return B_OK;
5658 }
5659
5660
5661 /*! According to the BeBook, this function should always succeed.
5662 This is no longer the case.
5663 */
5664 extern "C" int32
__get_memory_map_haiku(const void * address,size_t numBytes,physical_entry * table,int32 numEntries)5665 __get_memory_map_haiku(const void* address, size_t numBytes,
5666 physical_entry* table, int32 numEntries)
5667 {
5668 uint32 entriesRead = numEntries;
5669 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5670 table, &entriesRead);
5671 if (error != B_OK)
5672 return error;
5673
5674 // close the entry list
5675
5676 // if it's only one entry, we will silently accept the missing ending
5677 if (numEntries == 1)
5678 return B_OK;
5679
5680 if (entriesRead + 1 > (uint32)numEntries)
5681 return B_BUFFER_OVERFLOW;
5682
5683 table[entriesRead].address = 0;
5684 table[entriesRead].size = 0;
5685
5686 return B_OK;
5687 }
5688
5689
5690 area_id
area_for(void * address)5691 area_for(void* address)
5692 {
5693 return vm_area_for((addr_t)address, true);
5694 }
5695
5696
5697 area_id
find_area(const char * name)5698 find_area(const char* name)
5699 {
5700 return VMAreas::Find(name);
5701 }
5702
5703
5704 status_t
_get_area_info(area_id id,area_info * info,size_t size)5705 _get_area_info(area_id id, area_info* info, size_t size)
5706 {
5707 if (size != sizeof(area_info) || info == NULL)
5708 return B_BAD_VALUE;
5709
5710 AddressSpaceReadLocker locker;
5711 VMArea* area;
5712 status_t status = locker.SetFromArea(id, area);
5713 if (status != B_OK)
5714 return status;
5715
5716 fill_area_info(area, info, size);
5717 return B_OK;
5718 }
5719
5720
5721 status_t
_get_next_area_info(team_id team,ssize_t * cookie,area_info * info,size_t size)5722 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5723 {
5724 addr_t nextBase = *(addr_t*)cookie;
5725
5726 // we're already through the list
5727 if (nextBase == (addr_t)-1)
5728 return B_ENTRY_NOT_FOUND;
5729
5730 if (team == B_CURRENT_TEAM)
5731 team = team_get_current_team_id();
5732
5733 AddressSpaceReadLocker locker(team);
5734 if (!locker.IsLocked())
5735 return B_BAD_TEAM_ID;
5736
5737 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false);
5738 if (area == NULL) {
5739 nextBase = (addr_t)-1;
5740 return B_ENTRY_NOT_FOUND;
5741 }
5742
5743 fill_area_info(area, info, size);
5744 *cookie = (ssize_t)(area->Base() + 1);
5745
5746 return B_OK;
5747 }
5748
5749
5750 status_t
set_area_protection(area_id area,uint32 newProtection)5751 set_area_protection(area_id area, uint32 newProtection)
5752 {
5753 return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5754 newProtection, true);
5755 }
5756
5757
5758 status_t
resize_area(area_id areaID,size_t newSize)5759 resize_area(area_id areaID, size_t newSize)
5760 {
5761 return vm_resize_area(areaID, newSize, true);
5762 }
5763
5764
5765 /*! Transfers the specified area to a new team. The caller must be the owner
5766 of the area.
5767 */
5768 area_id
transfer_area(area_id id,void ** _address,uint32 addressSpec,team_id target,bool kernel)5769 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5770 bool kernel)
5771 {
5772 area_info info;
5773 status_t status = get_area_info(id, &info);
5774 if (status != B_OK)
5775 return status;
5776
5777 if (!kernel && info.team != thread_get_current_thread()->team->id)
5778 return B_PERMISSION_DENIED;
5779
5780 // We need to mark the area cloneable so the following operations work.
5781 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
5782 if (status != B_OK)
5783 return status;
5784
5785 area_id clonedArea = vm_clone_area(target, info.name, _address,
5786 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
5787 if (clonedArea < 0)
5788 return clonedArea;
5789
5790 status = vm_delete_area(info.team, id, kernel);
5791 if (status != B_OK) {
5792 vm_delete_area(target, clonedArea, kernel);
5793 return status;
5794 }
5795
5796 // Now we can reset the protection to whatever it was before.
5797 set_area_protection(clonedArea, info.protection);
5798
5799 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
5800
5801 return clonedArea;
5802 }
5803
5804
5805 extern "C" area_id
__map_physical_memory_haiku(const char * name,phys_addr_t physicalAddress,size_t numBytes,uint32 addressSpec,uint32 protection,void ** _virtualAddress)5806 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
5807 size_t numBytes, uint32 addressSpec, uint32 protection,
5808 void** _virtualAddress)
5809 {
5810 if (!arch_vm_supports_protection(protection))
5811 return B_NOT_SUPPORTED;
5812
5813 fix_protection(&protection);
5814
5815 return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
5816 _virtualAddress, addressSpec, numBytes, protection, physicalAddress,
5817 false);
5818 }
5819
5820
5821 area_id
clone_area(const char * name,void ** _address,uint32 addressSpec,uint32 protection,area_id source)5822 clone_area(const char* name, void** _address, uint32 addressSpec,
5823 uint32 protection, area_id source)
5824 {
5825 if ((protection & B_KERNEL_PROTECTION) == 0)
5826 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
5827
5828 return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
5829 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
5830 }
5831
5832
5833 area_id
create_area_etc(team_id team,const char * name,size_t size,uint32 lock,uint32 protection,uint32 flags,uint32 guardSize,const virtual_address_restrictions * virtualAddressRestrictions,const physical_address_restrictions * physicalAddressRestrictions,void ** _address)5834 create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
5835 uint32 protection, uint32 flags, uint32 guardSize,
5836 const virtual_address_restrictions* virtualAddressRestrictions,
5837 const physical_address_restrictions* physicalAddressRestrictions,
5838 void** _address)
5839 {
5840 fix_protection(&protection);
5841
5842 return vm_create_anonymous_area(team, name, size, lock, protection, flags,
5843 guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
5844 true, _address);
5845 }
5846
5847
5848 extern "C" area_id
__create_area_haiku(const char * name,void ** _address,uint32 addressSpec,size_t size,uint32 lock,uint32 protection)5849 __create_area_haiku(const char* name, void** _address, uint32 addressSpec,
5850 size_t size, uint32 lock, uint32 protection)
5851 {
5852 fix_protection(&protection);
5853
5854 virtual_address_restrictions virtualRestrictions = {};
5855 virtualRestrictions.address = *_address;
5856 virtualRestrictions.address_specification = addressSpec;
5857 physical_address_restrictions physicalRestrictions = {};
5858 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
5859 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
5860 true, _address);
5861 }
5862
5863
5864 status_t
delete_area(area_id area)5865 delete_area(area_id area)
5866 {
5867 return vm_delete_area(VMAddressSpace::KernelID(), area, true);
5868 }
5869
5870
5871 // #pragma mark - Userland syscalls
5872
5873
5874 status_t
_user_reserve_address_range(addr_t * userAddress,uint32 addressSpec,addr_t size)5875 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
5876 addr_t size)
5877 {
5878 // filter out some unavailable values (for userland)
5879 switch (addressSpec) {
5880 case B_ANY_KERNEL_ADDRESS:
5881 case B_ANY_KERNEL_BLOCK_ADDRESS:
5882 return B_BAD_VALUE;
5883 }
5884
5885 addr_t address;
5886
5887 if (!IS_USER_ADDRESS(userAddress)
5888 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
5889 return B_BAD_ADDRESS;
5890
5891 status_t status = vm_reserve_address_range(
5892 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
5893 RESERVED_AVOID_BASE);
5894 if (status != B_OK)
5895 return status;
5896
5897 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
5898 vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5899 (void*)address, size);
5900 return B_BAD_ADDRESS;
5901 }
5902
5903 return B_OK;
5904 }
5905
5906
5907 status_t
_user_unreserve_address_range(addr_t address,addr_t size)5908 _user_unreserve_address_range(addr_t address, addr_t size)
5909 {
5910 return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
5911 (void*)address, size);
5912 }
5913
5914
5915 area_id
_user_area_for(void * address)5916 _user_area_for(void* address)
5917 {
5918 return vm_area_for((addr_t)address, false);
5919 }
5920
5921
5922 area_id
_user_find_area(const char * userName)5923 _user_find_area(const char* userName)
5924 {
5925 char name[B_OS_NAME_LENGTH];
5926
5927 if (!IS_USER_ADDRESS(userName)
5928 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
5929 return B_BAD_ADDRESS;
5930
5931 return find_area(name);
5932 }
5933
5934
5935 status_t
_user_get_area_info(area_id area,area_info * userInfo)5936 _user_get_area_info(area_id area, area_info* userInfo)
5937 {
5938 if (!IS_USER_ADDRESS(userInfo))
5939 return B_BAD_ADDRESS;
5940
5941 area_info info;
5942 status_t status = get_area_info(area, &info);
5943 if (status < B_OK)
5944 return status;
5945
5946 // TODO: do we want to prevent userland from seeing kernel protections?
5947 //info.protection &= B_USER_PROTECTION;
5948
5949 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5950 return B_BAD_ADDRESS;
5951
5952 return status;
5953 }
5954
5955
5956 status_t
_user_get_next_area_info(team_id team,ssize_t * userCookie,area_info * userInfo)5957 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
5958 {
5959 ssize_t cookie;
5960
5961 if (!IS_USER_ADDRESS(userCookie)
5962 || !IS_USER_ADDRESS(userInfo)
5963 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
5964 return B_BAD_ADDRESS;
5965
5966 area_info info;
5967 status_t status = _get_next_area_info(team, &cookie, &info,
5968 sizeof(area_info));
5969 if (status != B_OK)
5970 return status;
5971
5972 //info.protection &= B_USER_PROTECTION;
5973
5974 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
5975 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
5976 return B_BAD_ADDRESS;
5977
5978 return status;
5979 }
5980
5981
5982 status_t
_user_set_area_protection(area_id area,uint32 newProtection)5983 _user_set_area_protection(area_id area, uint32 newProtection)
5984 {
5985 if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0)
5986 return B_BAD_VALUE;
5987
5988 return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
5989 newProtection, false);
5990 }
5991
5992
5993 status_t
_user_resize_area(area_id area,size_t newSize)5994 _user_resize_area(area_id area, size_t newSize)
5995 {
5996 // TODO: Since we restrict deleting of areas to those owned by the team,
5997 // we should also do that for resizing (check other functions, too).
5998 return vm_resize_area(area, newSize, false);
5999 }
6000
6001
6002 area_id
_user_transfer_area(area_id area,void ** userAddress,uint32 addressSpec,team_id target)6003 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6004 team_id target)
6005 {
6006 // filter out some unavailable values (for userland)
6007 switch (addressSpec) {
6008 case B_ANY_KERNEL_ADDRESS:
6009 case B_ANY_KERNEL_BLOCK_ADDRESS:
6010 return B_BAD_VALUE;
6011 }
6012
6013 void* address;
6014 if (!IS_USER_ADDRESS(userAddress)
6015 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6016 return B_BAD_ADDRESS;
6017
6018 area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6019 if (newArea < B_OK)
6020 return newArea;
6021
6022 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6023 return B_BAD_ADDRESS;
6024
6025 return newArea;
6026 }
6027
6028
6029 area_id
_user_clone_area(const char * userName,void ** userAddress,uint32 addressSpec,uint32 protection,area_id sourceArea)6030 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6031 uint32 protection, area_id sourceArea)
6032 {
6033 char name[B_OS_NAME_LENGTH];
6034 void* address;
6035
6036 // filter out some unavailable values (for userland)
6037 switch (addressSpec) {
6038 case B_ANY_KERNEL_ADDRESS:
6039 case B_ANY_KERNEL_BLOCK_ADDRESS:
6040 return B_BAD_VALUE;
6041 }
6042 if ((protection & ~B_USER_AREA_FLAGS) != 0)
6043 return B_BAD_VALUE;
6044
6045 if (!IS_USER_ADDRESS(userName)
6046 || !IS_USER_ADDRESS(userAddress)
6047 || user_strlcpy(name, userName, sizeof(name)) < B_OK
6048 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6049 return B_BAD_ADDRESS;
6050
6051 fix_protection(&protection);
6052
6053 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6054 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6055 false);
6056 if (clonedArea < B_OK)
6057 return clonedArea;
6058
6059 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6060 delete_area(clonedArea);
6061 return B_BAD_ADDRESS;
6062 }
6063
6064 return clonedArea;
6065 }
6066
6067
6068 area_id
_user_create_area(const char * userName,void ** userAddress,uint32 addressSpec,size_t size,uint32 lock,uint32 protection)6069 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6070 size_t size, uint32 lock, uint32 protection)
6071 {
6072 char name[B_OS_NAME_LENGTH];
6073 void* address;
6074
6075 // filter out some unavailable values (for userland)
6076 switch (addressSpec) {
6077 case B_ANY_KERNEL_ADDRESS:
6078 case B_ANY_KERNEL_BLOCK_ADDRESS:
6079 return B_BAD_VALUE;
6080 }
6081 if ((protection & ~B_USER_AREA_FLAGS) != 0)
6082 return B_BAD_VALUE;
6083
6084 if (!IS_USER_ADDRESS(userName)
6085 || !IS_USER_ADDRESS(userAddress)
6086 || user_strlcpy(name, userName, sizeof(name)) < B_OK
6087 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6088 return B_BAD_ADDRESS;
6089
6090 if (addressSpec == B_EXACT_ADDRESS
6091 && IS_KERNEL_ADDRESS(address))
6092 return B_BAD_VALUE;
6093
6094 if (addressSpec == B_ANY_ADDRESS)
6095 addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6096 if (addressSpec == B_BASE_ADDRESS)
6097 addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6098
6099 fix_protection(&protection);
6100
6101 virtual_address_restrictions virtualRestrictions = {};
6102 virtualRestrictions.address = address;
6103 virtualRestrictions.address_specification = addressSpec;
6104 physical_address_restrictions physicalRestrictions = {};
6105 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6106 size, lock, protection, 0, 0, &virtualRestrictions,
6107 &physicalRestrictions, false, &address);
6108
6109 if (area >= B_OK
6110 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6111 delete_area(area);
6112 return B_BAD_ADDRESS;
6113 }
6114
6115 return area;
6116 }
6117
6118
6119 status_t
_user_delete_area(area_id area)6120 _user_delete_area(area_id area)
6121 {
6122 // Unlike the BeOS implementation, you can now only delete areas
6123 // that you have created yourself from userland.
6124 // The documentation to delete_area() explicitly states that this
6125 // will be restricted in the future, and so it will.
6126 return vm_delete_area(VMAddressSpace::CurrentID(), area, false);
6127 }
6128
6129
6130 // TODO: create a BeOS style call for this!
6131
6132 area_id
_user_map_file(const char * userName,void ** userAddress,uint32 addressSpec,size_t size,uint32 protection,uint32 mapping,bool unmapAddressRange,int fd,off_t offset)6133 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec,
6134 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
6135 int fd, off_t offset)
6136 {
6137 char name[B_OS_NAME_LENGTH];
6138 void* address;
6139 area_id area;
6140
6141 if ((protection & ~B_USER_AREA_FLAGS) != 0)
6142 return B_BAD_VALUE;
6143
6144 fix_protection(&protection);
6145
6146 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress)
6147 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK
6148 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6149 return B_BAD_ADDRESS;
6150
6151 if (addressSpec == B_EXACT_ADDRESS) {
6152 if ((addr_t)address + size < (addr_t)address
6153 || (addr_t)address % B_PAGE_SIZE != 0) {
6154 return B_BAD_VALUE;
6155 }
6156 if (!IS_USER_ADDRESS(address)
6157 || !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6158 return B_BAD_ADDRESS;
6159 }
6160 }
6161
6162 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address,
6163 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset,
6164 false);
6165 if (area < B_OK)
6166 return area;
6167
6168 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6169 return B_BAD_ADDRESS;
6170
6171 return area;
6172 }
6173
6174
6175 status_t
_user_unmap_memory(void * _address,size_t size)6176 _user_unmap_memory(void* _address, size_t size)
6177 {
6178 addr_t address = (addr_t)_address;
6179
6180 // check params
6181 if (size == 0 || (addr_t)address + size < (addr_t)address
6182 || (addr_t)address % B_PAGE_SIZE != 0) {
6183 return B_BAD_VALUE;
6184 }
6185
6186 if (!IS_USER_ADDRESS(address)
6187 || !IS_USER_ADDRESS((addr_t)address + size - 1)) {
6188 return B_BAD_ADDRESS;
6189 }
6190
6191 // Write lock the address space and ensure the address range is not wired.
6192 AddressSpaceWriteLocker locker;
6193 do {
6194 status_t status = locker.SetTo(team_get_current_team_id());
6195 if (status != B_OK)
6196 return status;
6197 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address,
6198 size, &locker));
6199
6200 // unmap
6201 return unmap_address_range(locker.AddressSpace(), address, size, false);
6202 }
6203
6204
6205 status_t
_user_set_memory_protection(void * _address,size_t size,uint32 protection)6206 _user_set_memory_protection(void* _address, size_t size, uint32 protection)
6207 {
6208 // check address range
6209 addr_t address = (addr_t)_address;
6210 size = PAGE_ALIGN(size);
6211
6212 if ((address % B_PAGE_SIZE) != 0)
6213 return B_BAD_VALUE;
6214 if (!is_user_address_range(_address, size)) {
6215 // weird error code required by POSIX
6216 return ENOMEM;
6217 }
6218
6219 // extend and check protection
6220 if ((protection & ~B_USER_PROTECTION) != 0)
6221 return B_BAD_VALUE;
6222
6223 fix_protection(&protection);
6224
6225 // We need to write lock the address space, since we're going to play with
6226 // the areas. Also make sure that none of the areas is wired and that we're
6227 // actually allowed to change the protection.
6228 AddressSpaceWriteLocker locker;
6229
6230 bool restart;
6231 do {
6232 restart = false;
6233
6234 status_t status = locker.SetTo(team_get_current_team_id());
6235 if (status != B_OK)
6236 return status;
6237
6238 // First round: Check whether the whole range is covered by areas and we
6239 // are allowed to modify them.
6240 addr_t currentAddress = address;
6241 size_t sizeLeft = size;
6242 while (sizeLeft > 0) {
6243 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6244 if (area == NULL)
6245 return B_NO_MEMORY;
6246
6247 if ((area->protection & B_KERNEL_AREA) != 0)
6248 return B_NOT_ALLOWED;
6249 if (area->protection_max != 0
6250 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) {
6251 return B_NOT_ALLOWED;
6252 }
6253
6254 addr_t offset = currentAddress - area->Base();
6255 size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6256
6257 AreaCacheLocker cacheLocker(area);
6258
6259 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize,
6260 &locker, &cacheLocker)) {
6261 restart = true;
6262 break;
6263 }
6264
6265 cacheLocker.Unlock();
6266
6267 currentAddress += rangeSize;
6268 sizeLeft -= rangeSize;
6269 }
6270 } while (restart);
6271
6272 // Second round: If the protections differ from that of the area, create a
6273 // page protection array and re-map mapped pages.
6274 VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
6275 addr_t currentAddress = address;
6276 size_t sizeLeft = size;
6277 while (sizeLeft > 0) {
6278 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress);
6279 if (area == NULL)
6280 return B_NO_MEMORY;
6281
6282 addr_t offset = currentAddress - area->Base();
6283 size_t rangeSize = min_c(area->Size() - offset, sizeLeft);
6284
6285 currentAddress += rangeSize;
6286 sizeLeft -= rangeSize;
6287
6288 if (area->page_protections == NULL) {
6289 if (area->protection == protection)
6290 continue;
6291 if (offset == 0 && rangeSize == area->Size()) {
6292 // The whole area is covered: let set_area_protection handle it.
6293 status_t status = vm_set_area_protection(area->address_space->ID(),
6294 area->id, protection, false);
6295 if (status != B_OK)
6296 return status;
6297 continue;
6298 }
6299
6300 status_t status = allocate_area_page_protections(area);
6301 if (status != B_OK)
6302 return status;
6303 }
6304
6305 // We need to lock the complete cache chain, since we potentially unmap
6306 // pages of lower caches.
6307 VMCache* topCache = vm_area_get_locked_cache(area);
6308 VMCacheChainLocker cacheChainLocker(topCache);
6309 cacheChainLocker.LockAllSourceCaches();
6310
6311 // Adjust the committed size, if necessary.
6312 if (topCache->source != NULL && topCache->temporary) {
6313 const bool becomesWritable = (protection & B_WRITE_AREA) != 0;
6314 ssize_t commitmentChange = 0;
6315 const off_t areaCacheBase = area->Base() - area->cache_offset;
6316 for (addr_t pageAddress = area->Base() + offset;
6317 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6318 if (topCache->LookupPage(pageAddress - areaCacheBase) != NULL) {
6319 // This page should already be accounted for in the commitment.
6320 continue;
6321 }
6322
6323 const bool isWritable
6324 = (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0;
6325
6326 if (becomesWritable && !isWritable)
6327 commitmentChange += B_PAGE_SIZE;
6328 else if (!becomesWritable && isWritable)
6329 commitmentChange -= B_PAGE_SIZE;
6330 }
6331
6332 if (commitmentChange != 0) {
6333 const off_t newCommitment = topCache->committed_size + commitmentChange;
6334 ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base));
6335 status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER);
6336 if (status != B_OK)
6337 return status;
6338 }
6339 }
6340
6341 for (addr_t pageAddress = area->Base() + offset;
6342 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) {
6343 map->Lock();
6344
6345 set_area_page_protection(area, pageAddress, protection);
6346
6347 phys_addr_t physicalAddress;
6348 uint32 flags;
6349
6350 status_t error = map->Query(pageAddress, &physicalAddress, &flags);
6351 if (error != B_OK || (flags & PAGE_PRESENT) == 0) {
6352 map->Unlock();
6353 continue;
6354 }
6355
6356 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
6357 if (page == NULL) {
6358 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR
6359 "\n", area, physicalAddress);
6360 map->Unlock();
6361 return B_ERROR;
6362 }
6363
6364 // If the page is not in the topmost cache and write access is
6365 // requested, we have to unmap it. Otherwise we can re-map it with
6366 // the new protection.
6367 bool unmapPage = page->Cache() != topCache
6368 && (protection & B_WRITE_AREA) != 0;
6369
6370 if (!unmapPage)
6371 map->ProtectPage(area, pageAddress, protection);
6372
6373 map->Unlock();
6374
6375 if (unmapPage) {
6376 DEBUG_PAGE_ACCESS_START(page);
6377 unmap_page(area, pageAddress);
6378 DEBUG_PAGE_ACCESS_END(page);
6379 }
6380 }
6381 }
6382
6383 return B_OK;
6384 }
6385
6386
6387 status_t
_user_sync_memory(void * _address,size_t size,uint32 flags)6388 _user_sync_memory(void* _address, size_t size, uint32 flags)
6389 {
6390 addr_t address = (addr_t)_address;
6391 size = PAGE_ALIGN(size);
6392
6393 // check params
6394 if ((address % B_PAGE_SIZE) != 0)
6395 return B_BAD_VALUE;
6396 if (!is_user_address_range(_address, size)) {
6397 // weird error code required by POSIX
6398 return ENOMEM;
6399 }
6400
6401 bool writeSync = (flags & MS_SYNC) != 0;
6402 bool writeAsync = (flags & MS_ASYNC) != 0;
6403 if (writeSync && writeAsync)
6404 return B_BAD_VALUE;
6405
6406 if (size == 0 || (!writeSync && !writeAsync))
6407 return B_OK;
6408
6409 // iterate through the range and sync all concerned areas
6410 while (size > 0) {
6411 // read lock the address space
6412 AddressSpaceReadLocker locker;
6413 status_t error = locker.SetTo(team_get_current_team_id());
6414 if (error != B_OK)
6415 return error;
6416
6417 // get the first area
6418 VMArea* area = locker.AddressSpace()->LookupArea(address);
6419 if (area == NULL)
6420 return B_NO_MEMORY;
6421
6422 uint32 offset = address - area->Base();
6423 size_t rangeSize = min_c(area->Size() - offset, size);
6424 offset += area->cache_offset;
6425
6426 // lock the cache
6427 AreaCacheLocker cacheLocker(area);
6428 if (!cacheLocker)
6429 return B_BAD_VALUE;
6430 VMCache* cache = area->cache;
6431
6432 locker.Unlock();
6433
6434 uint32 firstPage = offset >> PAGE_SHIFT;
6435 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT);
6436
6437 // write the pages
6438 if (cache->type == CACHE_TYPE_VNODE) {
6439 if (writeSync) {
6440 // synchronous
6441 error = vm_page_write_modified_page_range(cache, firstPage,
6442 endPage);
6443 if (error != B_OK)
6444 return error;
6445 } else {
6446 // asynchronous
6447 vm_page_schedule_write_page_range(cache, firstPage, endPage);
6448 // TODO: This is probably not quite what is supposed to happen.
6449 // Especially when a lot has to be written, it might take ages
6450 // until it really hits the disk.
6451 }
6452 }
6453
6454 address += rangeSize;
6455 size -= rangeSize;
6456 }
6457
6458 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to
6459 // synchronize multiple mappings of the same file. In our VM they never get
6460 // out of sync, though, so we don't have to do anything.
6461
6462 return B_OK;
6463 }
6464
6465
6466 status_t
_user_memory_advice(void * _address,size_t size,uint32 advice)6467 _user_memory_advice(void* _address, size_t size, uint32 advice)
6468 {
6469 addr_t address = (addr_t)_address;
6470 if ((address % B_PAGE_SIZE) != 0)
6471 return B_BAD_VALUE;
6472
6473 size = PAGE_ALIGN(size);
6474 if (!is_user_address_range(_address, size)) {
6475 // weird error code required by POSIX
6476 return B_NO_MEMORY;
6477 }
6478
6479 switch (advice) {
6480 case MADV_NORMAL:
6481 case MADV_SEQUENTIAL:
6482 case MADV_RANDOM:
6483 case MADV_WILLNEED:
6484 case MADV_DONTNEED:
6485 // TODO: Implement!
6486 break;
6487
6488 case MADV_FREE:
6489 {
6490 AddressSpaceWriteLocker locker;
6491 do {
6492 status_t status = locker.SetTo(team_get_current_team_id());
6493 if (status != B_OK)
6494 return status;
6495 } while (wait_if_address_range_is_wired(locker.AddressSpace(),
6496 address, size, &locker));
6497
6498 discard_address_range(locker.AddressSpace(), address, size, false);
6499 break;
6500 }
6501
6502 default:
6503 return B_BAD_VALUE;
6504 }
6505
6506 return B_OK;
6507 }
6508
6509
6510 status_t
_user_get_memory_properties(team_id teamID,const void * address,uint32 * _protected,uint32 * _lock)6511 _user_get_memory_properties(team_id teamID, const void* address,
6512 uint32* _protected, uint32* _lock)
6513 {
6514 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock))
6515 return B_BAD_ADDRESS;
6516
6517 AddressSpaceReadLocker locker;
6518 status_t error = locker.SetTo(teamID);
6519 if (error != B_OK)
6520 return error;
6521
6522 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address);
6523 if (area == NULL)
6524 return B_NO_MEMORY;
6525
6526 uint32 protection = get_area_page_protection(area, (addr_t)address);
6527 uint32 wiring = area->wiring;
6528
6529 locker.Unlock();
6530
6531 error = user_memcpy(_protected, &protection, sizeof(protection));
6532 if (error != B_OK)
6533 return error;
6534
6535 error = user_memcpy(_lock, &wiring, sizeof(wiring));
6536
6537 return error;
6538 }
6539
6540
6541 static status_t
user_set_memory_swappable(const void * _address,size_t size,bool swappable)6542 user_set_memory_swappable(const void* _address, size_t size, bool swappable)
6543 {
6544 #if ENABLE_SWAP_SUPPORT
6545 // check address range
6546 addr_t address = (addr_t)_address;
6547 size = PAGE_ALIGN(size);
6548
6549 if ((address % B_PAGE_SIZE) != 0)
6550 return EINVAL;
6551 if (!is_user_address_range(_address, size))
6552 return EINVAL;
6553
6554 const addr_t endAddress = address + size;
6555
6556 AddressSpaceReadLocker addressSpaceLocker;
6557 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id());
6558 if (error != B_OK)
6559 return error;
6560 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace();
6561
6562 // iterate through all concerned areas
6563 addr_t nextAddress = address;
6564 while (nextAddress != endAddress) {
6565 // get the next area
6566 VMArea* area = addressSpace->LookupArea(nextAddress);
6567 if (area == NULL) {
6568 error = B_BAD_ADDRESS;
6569 break;
6570 }
6571
6572 const addr_t areaStart = nextAddress;
6573 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size());
6574 nextAddress = areaEnd;
6575
6576 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6577 if (error != B_OK) {
6578 // We don't need to unset or reset things on failure.
6579 break;
6580 }
6581
6582 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
6583 VMAnonymousCache* anonCache = NULL;
6584 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) {
6585 // This memory will aready never be swapped. Nothing to do.
6586 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) {
6587 error = anonCache->SetCanSwapPages(areaStart - area->Base(),
6588 areaEnd - areaStart, swappable);
6589 } else {
6590 // Some other cache type? We cannot affect anything here.
6591 error = EINVAL;
6592 }
6593
6594 cacheChainLocker.Unlock();
6595
6596 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0);
6597 if (error != B_OK)
6598 break;
6599 }
6600
6601 return error;
6602 #else
6603 // No swap support? Nothing to do.
6604 return B_OK;
6605 #endif
6606 }
6607
6608
6609 status_t
_user_mlock(const void * _address,size_t size)6610 _user_mlock(const void* _address, size_t size)
6611 {
6612 return user_set_memory_swappable(_address, size, false);
6613 }
6614
6615
6616 status_t
_user_munlock(const void * _address,size_t size)6617 _user_munlock(const void* _address, size_t size)
6618 {
6619 // TODO: B_SHARED_AREAs need to be handled a bit differently:
6620 // if multiple clones of an area had mlock() called on them,
6621 // munlock() must also be called on all of them to actually unlock.
6622 // (At present, the first munlock() will unlock all.)
6623 // TODO: fork() should automatically unlock memory in the child.
6624 return user_set_memory_swappable(_address, size, true);
6625 }
6626
6627
6628 // #pragma mark -- compatibility
6629
6630
6631 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6632
6633
6634 struct physical_entry_beos {
6635 uint32 address;
6636 uint32 size;
6637 };
6638
6639
6640 /*! The physical_entry structure has changed. We need to translate it to the
6641 old one.
6642 */
6643 extern "C" int32
__get_memory_map_beos(const void * _address,size_t numBytes,physical_entry_beos * table,int32 numEntries)6644 __get_memory_map_beos(const void* _address, size_t numBytes,
6645 physical_entry_beos* table, int32 numEntries)
6646 {
6647 if (numEntries <= 0)
6648 return B_BAD_VALUE;
6649
6650 const uint8* address = (const uint8*)_address;
6651
6652 int32 count = 0;
6653 while (numBytes > 0 && count < numEntries) {
6654 physical_entry entry;
6655 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1);
6656 if (result < 0) {
6657 if (result != B_BUFFER_OVERFLOW)
6658 return result;
6659 }
6660
6661 if (entry.address >= (phys_addr_t)1 << 32) {
6662 panic("get_memory_map(): Address is greater 4 GB!");
6663 return B_ERROR;
6664 }
6665
6666 table[count].address = entry.address;
6667 table[count++].size = entry.size;
6668
6669 address += entry.size;
6670 numBytes -= entry.size;
6671 }
6672
6673 // null-terminate the table, if possible
6674 if (count < numEntries) {
6675 table[count].address = 0;
6676 table[count].size = 0;
6677 }
6678
6679 return B_OK;
6680 }
6681
6682
6683 /*! The type of the \a physicalAddress parameter has changed from void* to
6684 phys_addr_t.
6685 */
6686 extern "C" area_id
__map_physical_memory_beos(const char * name,void * physicalAddress,size_t numBytes,uint32 addressSpec,uint32 protection,void ** _virtualAddress)6687 __map_physical_memory_beos(const char* name, void* physicalAddress,
6688 size_t numBytes, uint32 addressSpec, uint32 protection,
6689 void** _virtualAddress)
6690 {
6691 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes,
6692 addressSpec, protection, _virtualAddress);
6693 }
6694
6695
6696 /*! The caller might not be able to deal with physical addresses >= 4 GB, so
6697 we meddle with the \a lock parameter to force 32 bit.
6698 */
6699 extern "C" area_id
__create_area_beos(const char * name,void ** _address,uint32 addressSpec,size_t size,uint32 lock,uint32 protection)6700 __create_area_beos(const char* name, void** _address, uint32 addressSpec,
6701 size_t size, uint32 lock, uint32 protection)
6702 {
6703 switch (lock) {
6704 case B_NO_LOCK:
6705 break;
6706 case B_FULL_LOCK:
6707 case B_LAZY_LOCK:
6708 lock = B_32_BIT_FULL_LOCK;
6709 break;
6710 case B_CONTIGUOUS:
6711 lock = B_32_BIT_CONTIGUOUS;
6712 break;
6713 }
6714
6715 return __create_area_haiku(name, _address, addressSpec, size, lock,
6716 protection);
6717 }
6718
6719
6720 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@",
6721 "BASE");
6722 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos",
6723 "map_physical_memory@", "BASE");
6724 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@",
6725 "BASE");
6726
6727 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6728 "get_memory_map@@", "1_ALPHA3");
6729 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6730 "map_physical_memory@@", "1_ALPHA3");
6731 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6732 "1_ALPHA3");
6733
6734
6735 #else
6736
6737
6738 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku",
6739 "get_memory_map@@", "BASE");
6740 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku",
6741 "map_physical_memory@@", "BASE");
6742 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@",
6743 "BASE");
6744
6745
6746 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32
6747