1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/BitUtils.h> 51 #include <util/ThreadAutoLock.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_priv.h> 54 #include <vm/VMAddressSpace.h> 55 #include <vm/VMArea.h> 56 #include <vm/VMCache.h> 57 58 #include "VMAddressSpaceLocking.h" 59 #include "VMAnonymousCache.h" 60 #include "VMAnonymousNoSwapCache.h" 61 #include "IORequest.h" 62 63 64 //#define TRACE_VM 65 //#define TRACE_FAULTS 66 #ifdef TRACE_VM 67 # define TRACE(x) dprintf x 68 #else 69 # define TRACE(x) ; 70 #endif 71 #ifdef TRACE_FAULTS 72 # define FTRACE(x) dprintf x 73 #else 74 # define FTRACE(x) ; 75 #endif 76 77 78 namespace { 79 80 class AreaCacheLocking { 81 public: 82 inline bool Lock(VMCache* lockable) 83 { 84 return false; 85 } 86 87 inline void Unlock(VMCache* lockable) 88 { 89 vm_area_put_locked_cache(lockable); 90 } 91 }; 92 93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 94 public: 95 inline AreaCacheLocker(VMCache* cache = NULL) 96 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 97 { 98 } 99 100 inline AreaCacheLocker(VMArea* area) 101 : AutoLocker<VMCache, AreaCacheLocking>() 102 { 103 SetTo(area); 104 } 105 106 inline void SetTo(VMCache* cache, bool alreadyLocked) 107 { 108 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 109 } 110 111 inline void SetTo(VMArea* area) 112 { 113 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 114 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 115 } 116 }; 117 118 119 class VMCacheChainLocker { 120 public: 121 VMCacheChainLocker() 122 : 123 fTopCache(NULL), 124 fBottomCache(NULL) 125 { 126 } 127 128 VMCacheChainLocker(VMCache* topCache) 129 : 130 fTopCache(topCache), 131 fBottomCache(topCache) 132 { 133 } 134 135 ~VMCacheChainLocker() 136 { 137 Unlock(); 138 } 139 140 void SetTo(VMCache* topCache) 141 { 142 fTopCache = topCache; 143 fBottomCache = topCache; 144 145 if (topCache != NULL) 146 topCache->SetUserData(NULL); 147 } 148 149 VMCache* LockSourceCache() 150 { 151 if (fBottomCache == NULL || fBottomCache->source == NULL) 152 return NULL; 153 154 VMCache* previousCache = fBottomCache; 155 156 fBottomCache = fBottomCache->source; 157 fBottomCache->Lock(); 158 fBottomCache->AcquireRefLocked(); 159 fBottomCache->SetUserData(previousCache); 160 161 return fBottomCache; 162 } 163 164 void LockAllSourceCaches() 165 { 166 while (LockSourceCache() != NULL) { 167 } 168 } 169 170 void Unlock(VMCache* exceptCache = NULL) 171 { 172 if (fTopCache == NULL) 173 return; 174 175 // Unlock caches in source -> consumer direction. This is important to 176 // avoid double-locking and a reversal of locking order in case a cache 177 // is eligable for merging. 178 VMCache* cache = fBottomCache; 179 while (cache != NULL) { 180 VMCache* nextCache = (VMCache*)cache->UserData(); 181 if (cache != exceptCache) 182 cache->ReleaseRefAndUnlock(cache != fTopCache); 183 184 if (cache == fTopCache) 185 break; 186 187 cache = nextCache; 188 } 189 190 fTopCache = NULL; 191 fBottomCache = NULL; 192 } 193 194 void UnlockKeepRefs(bool keepTopCacheLocked) 195 { 196 if (fTopCache == NULL) 197 return; 198 199 VMCache* nextCache = fBottomCache; 200 VMCache* cache = NULL; 201 202 while (keepTopCacheLocked 203 ? nextCache != fTopCache : cache != fTopCache) { 204 cache = nextCache; 205 nextCache = (VMCache*)cache->UserData(); 206 cache->Unlock(cache != fTopCache); 207 } 208 } 209 210 void RelockCaches(bool topCacheLocked) 211 { 212 if (fTopCache == NULL) 213 return; 214 215 VMCache* nextCache = fTopCache; 216 VMCache* cache = NULL; 217 if (topCacheLocked) { 218 cache = nextCache; 219 nextCache = cache->source; 220 } 221 222 while (cache != fBottomCache && nextCache != NULL) { 223 VMCache* consumer = cache; 224 cache = nextCache; 225 nextCache = cache->source; 226 cache->Lock(); 227 cache->SetUserData(consumer); 228 } 229 } 230 231 private: 232 VMCache* fTopCache; 233 VMCache* fBottomCache; 234 }; 235 236 } // namespace 237 238 239 // The memory reserve an allocation of the certain priority must not touch. 240 static const size_t kMemoryReserveForPriority[] = { 241 VM_MEMORY_RESERVE_USER, // user 242 VM_MEMORY_RESERVE_SYSTEM, // system 243 0 // VIP 244 }; 245 246 247 static ObjectCache** sPageMappingsObjectCaches; 248 static uint32 sPageMappingsMask; 249 250 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 251 252 static off_t sAvailableMemory; 253 static off_t sNeededMemory; 254 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 255 static uint32 sPageFaults; 256 257 static VMPhysicalPageMapper* sPhysicalPageMapper; 258 259 260 // function declarations 261 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 262 bool deletingAddressSpace, bool alreadyRemoved = false); 263 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 264 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 265 static status_t map_backing_store(VMAddressSpace* addressSpace, 266 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 267 int protection, int protectionMax, int mapping, uint32 flags, 268 const virtual_address_restrictions* addressRestrictions, bool kernel, 269 VMArea** _area, void** _virtualAddress); 270 static void fix_protection(uint32* protection); 271 272 273 // #pragma mark - 274 275 276 #if VM_PAGE_FAULT_TRACING 277 278 namespace VMPageFaultTracing { 279 280 class PageFaultStart : public AbstractTraceEntry { 281 public: 282 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 283 : 284 fAddress(address), 285 fPC(pc), 286 fWrite(write), 287 fUser(user) 288 { 289 Initialized(); 290 } 291 292 virtual void AddDump(TraceOutput& out) 293 { 294 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 295 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 296 } 297 298 private: 299 addr_t fAddress; 300 addr_t fPC; 301 bool fWrite; 302 bool fUser; 303 }; 304 305 306 // page fault errors 307 enum { 308 PAGE_FAULT_ERROR_NO_AREA = 0, 309 PAGE_FAULT_ERROR_KERNEL_ONLY, 310 PAGE_FAULT_ERROR_WRITE_PROTECTED, 311 PAGE_FAULT_ERROR_READ_PROTECTED, 312 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 313 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 314 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 315 }; 316 317 318 class PageFaultError : public AbstractTraceEntry { 319 public: 320 PageFaultError(area_id area, status_t error) 321 : 322 fArea(area), 323 fError(error) 324 { 325 Initialized(); 326 } 327 328 virtual void AddDump(TraceOutput& out) 329 { 330 switch (fError) { 331 case PAGE_FAULT_ERROR_NO_AREA: 332 out.Print("page fault error: no area"); 333 break; 334 case PAGE_FAULT_ERROR_KERNEL_ONLY: 335 out.Print("page fault error: area: %ld, kernel only", fArea); 336 break; 337 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 338 out.Print("page fault error: area: %ld, write protected", 339 fArea); 340 break; 341 case PAGE_FAULT_ERROR_READ_PROTECTED: 342 out.Print("page fault error: area: %ld, read protected", fArea); 343 break; 344 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 345 out.Print("page fault error: area: %ld, execute protected", 346 fArea); 347 break; 348 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 349 out.Print("page fault error: kernel touching bad user memory"); 350 break; 351 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 352 out.Print("page fault error: no address space"); 353 break; 354 default: 355 out.Print("page fault error: area: %ld, error: %s", fArea, 356 strerror(fError)); 357 break; 358 } 359 } 360 361 private: 362 area_id fArea; 363 status_t fError; 364 }; 365 366 367 class PageFaultDone : public AbstractTraceEntry { 368 public: 369 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 370 vm_page* page) 371 : 372 fArea(area), 373 fTopCache(topCache), 374 fCache(cache), 375 fPage(page) 376 { 377 Initialized(); 378 } 379 380 virtual void AddDump(TraceOutput& out) 381 { 382 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 383 "page: %p", fArea, fTopCache, fCache, fPage); 384 } 385 386 private: 387 area_id fArea; 388 VMCache* fTopCache; 389 VMCache* fCache; 390 vm_page* fPage; 391 }; 392 393 } // namespace VMPageFaultTracing 394 395 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 396 #else 397 # define TPF(x) ; 398 #endif // VM_PAGE_FAULT_TRACING 399 400 401 // #pragma mark - page mappings allocation 402 403 404 static void 405 create_page_mappings_object_caches() 406 { 407 // We want an even power of 2 smaller than the number of CPUs. 408 const int32 numCPUs = smp_get_num_cpus(); 409 int32 count = next_power_of_2(numCPUs); 410 if (count > numCPUs) 411 count >>= 1; 412 sPageMappingsMask = count - 1; 413 414 sPageMappingsObjectCaches = new object_cache*[count]; 415 if (sPageMappingsObjectCaches == NULL) 416 panic("failed to allocate page mappings object_cache array"); 417 418 for (int32 i = 0; i < count; i++) { 419 char name[32]; 420 snprintf(name, sizeof(name), "page mappings %" B_PRId32, i); 421 422 object_cache* cache = create_object_cache_etc(name, 423 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 424 NULL, NULL); 425 if (cache == NULL) 426 panic("failed to create page mappings object_cache"); 427 428 object_cache_set_minimum_reserve(cache, 1024); 429 sPageMappingsObjectCaches[i] = cache; 430 } 431 } 432 433 434 static object_cache* 435 page_mapping_object_cache_for(page_num_t page) 436 { 437 return sPageMappingsObjectCaches[page & sPageMappingsMask]; 438 } 439 440 441 static vm_page_mapping* 442 allocate_page_mapping(page_num_t page, uint32 flags = 0) 443 { 444 return (vm_page_mapping*)object_cache_alloc(page_mapping_object_cache_for(page), 445 flags); 446 } 447 448 449 void 450 vm_free_page_mapping(page_num_t page, vm_page_mapping* mapping, uint32 flags) 451 { 452 object_cache_free(page_mapping_object_cache_for(page), mapping, flags); 453 } 454 455 456 // #pragma mark - 457 458 459 /*! The page's cache must be locked. 460 */ 461 static inline void 462 increment_page_wired_count(vm_page* page) 463 { 464 if (!page->IsMapped()) 465 atomic_add(&gMappedPagesCount, 1); 466 page->IncrementWiredCount(); 467 } 468 469 470 /*! The page's cache must be locked. 471 */ 472 static inline void 473 decrement_page_wired_count(vm_page* page) 474 { 475 page->DecrementWiredCount(); 476 if (!page->IsMapped()) 477 atomic_add(&gMappedPagesCount, -1); 478 } 479 480 481 static inline addr_t 482 virtual_page_address(VMArea* area, vm_page* page) 483 { 484 return area->Base() 485 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 486 } 487 488 489 static inline bool 490 is_page_in_area(VMArea* area, vm_page* page) 491 { 492 off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT); 493 return pageCacheOffsetBytes >= area->cache_offset 494 && pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size(); 495 } 496 497 498 //! You need to have the address space locked when calling this function 499 static VMArea* 500 lookup_area(VMAddressSpace* addressSpace, area_id id) 501 { 502 VMAreas::ReadLock(); 503 504 VMArea* area = VMAreas::LookupLocked(id); 505 if (area != NULL && area->address_space != addressSpace) 506 area = NULL; 507 508 VMAreas::ReadUnlock(); 509 510 return area; 511 } 512 513 514 static inline size_t 515 area_page_protections_size(size_t areaSize) 516 { 517 // In the page protections we store only the three user protections, 518 // so we use 4 bits per page. 519 return (areaSize / B_PAGE_SIZE + 1) / 2; 520 } 521 522 523 static status_t 524 allocate_area_page_protections(VMArea* area) 525 { 526 size_t bytes = area_page_protections_size(area->Size()); 527 area->page_protections = (uint8*)malloc_etc(bytes, 528 area->address_space == VMAddressSpace::Kernel() 529 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 530 if (area->page_protections == NULL) 531 return B_NO_MEMORY; 532 533 // init the page protections for all pages to that of the area 534 uint32 areaProtection = area->protection 535 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 536 memset(area->page_protections, areaProtection | (areaProtection << 4), bytes); 537 538 // clear protections from the area 539 area->protection &= ~(B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA 540 | B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA | B_KERNEL_EXECUTE_AREA); 541 return B_OK; 542 } 543 544 545 static inline void 546 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 547 { 548 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 549 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 550 uint8& entry = area->page_protections[pageIndex / 2]; 551 if (pageIndex % 2 == 0) 552 entry = (entry & 0xf0) | protection; 553 else 554 entry = (entry & 0x0f) | (protection << 4); 555 } 556 557 558 static inline uint32 559 get_area_page_protection(VMArea* area, addr_t pageAddress) 560 { 561 if (area->page_protections == NULL) 562 return area->protection; 563 564 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 565 uint32 protection = area->page_protections[pageIndex / 2]; 566 if (pageIndex % 2 == 0) 567 protection &= 0x0f; 568 else 569 protection >>= 4; 570 571 uint32 kernelProtection = 0; 572 if ((protection & B_READ_AREA) != 0) 573 kernelProtection |= B_KERNEL_READ_AREA; 574 if ((protection & B_WRITE_AREA) != 0) 575 kernelProtection |= B_KERNEL_WRITE_AREA; 576 577 // If this is a kernel area we return only the kernel flags. 578 if (area->address_space == VMAddressSpace::Kernel()) 579 return kernelProtection; 580 581 return protection | kernelProtection; 582 } 583 584 585 static inline uint8* 586 realloc_page_protections(uint8* pageProtections, size_t areaSize, 587 uint32 allocationFlags) 588 { 589 size_t bytes = area_page_protections_size(areaSize); 590 return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags); 591 } 592 593 594 /*! The caller must have reserved enough pages the translation map 595 implementation might need to map this page. 596 The page's cache must be locked. 597 */ 598 static status_t 599 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 600 vm_page_reservation* reservation) 601 { 602 VMTranslationMap* map = area->address_space->TranslationMap(); 603 604 bool wasMapped = page->IsMapped(); 605 606 if (area->wiring == B_NO_LOCK) { 607 DEBUG_PAGE_ACCESS_CHECK(page); 608 609 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 610 vm_page_mapping* mapping = allocate_page_mapping(page->physical_page_number, 611 CACHE_DONT_WAIT_FOR_MEMORY 612 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 613 if (mapping == NULL) 614 return B_NO_MEMORY; 615 616 mapping->page = page; 617 mapping->area = area; 618 619 map->Lock(); 620 621 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 622 area->MemoryType(), reservation); 623 624 // insert mapping into lists 625 if (!page->IsMapped()) 626 atomic_add(&gMappedPagesCount, 1); 627 628 page->mappings.Add(mapping); 629 area->mappings.Add(mapping); 630 631 map->Unlock(); 632 } else { 633 DEBUG_PAGE_ACCESS_CHECK(page); 634 635 map->Lock(); 636 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 637 area->MemoryType(), reservation); 638 map->Unlock(); 639 640 increment_page_wired_count(page); 641 } 642 643 if (!wasMapped) { 644 // The page is mapped now, so we must not remain in the cached queue. 645 // It also makes sense to move it from the inactive to the active, since 646 // otherwise the page daemon wouldn't come to keep track of it (in idle 647 // mode) -- if the page isn't touched, it will be deactivated after a 648 // full iteration through the queue at the latest. 649 if (page->State() == PAGE_STATE_CACHED 650 || page->State() == PAGE_STATE_INACTIVE) { 651 vm_page_set_state(page, PAGE_STATE_ACTIVE); 652 } 653 } 654 655 return B_OK; 656 } 657 658 659 /*! If \a preserveModified is \c true, the caller must hold the lock of the 660 page's cache. 661 */ 662 static inline bool 663 unmap_page(VMArea* area, addr_t virtualAddress) 664 { 665 return area->address_space->TranslationMap()->UnmapPage(area, 666 virtualAddress, true); 667 } 668 669 670 /*! If \a preserveModified is \c true, the caller must hold the lock of all 671 mapped pages' caches. 672 */ 673 static inline void 674 unmap_pages(VMArea* area, addr_t base, size_t size) 675 { 676 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 677 } 678 679 680 static inline bool 681 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 682 { 683 if (address < area->Base()) { 684 offset = area->Base() - address; 685 if (offset >= size) 686 return false; 687 688 address = area->Base(); 689 size -= offset; 690 offset = 0; 691 if (size > area->Size()) 692 size = area->Size(); 693 694 return true; 695 } 696 697 offset = address - area->Base(); 698 if (offset >= area->Size()) 699 return false; 700 701 if (size >= area->Size() - offset) 702 size = area->Size() - offset; 703 704 return true; 705 } 706 707 708 /*! Cuts a piece out of an area. If the given cut range covers the complete 709 area, it is deleted. If it covers the beginning or the end, the area is 710 resized accordingly. If the range covers some part in the middle of the 711 area, it is split in two; in this case the second area is returned via 712 \a _secondArea (the variable is left untouched in the other cases). 713 The address space must be write locked. 714 The caller must ensure that no part of the given range is wired. 715 */ 716 static status_t 717 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 718 addr_t size, VMArea** _secondArea, bool kernel) 719 { 720 addr_t offset; 721 if (!intersect_area(area, address, size, offset)) 722 return B_OK; 723 724 // Is the area fully covered? 725 if (address == area->Base() && size == area->Size()) { 726 delete_area(addressSpace, area, false); 727 return B_OK; 728 } 729 730 int priority; 731 uint32 allocationFlags; 732 if (addressSpace == VMAddressSpace::Kernel()) { 733 priority = VM_PRIORITY_SYSTEM; 734 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 735 | HEAP_DONT_LOCK_KERNEL_SPACE; 736 } else { 737 priority = VM_PRIORITY_USER; 738 allocationFlags = 0; 739 } 740 741 VMCache* cache = vm_area_get_locked_cache(area); 742 VMCacheChainLocker cacheChainLocker(cache); 743 cacheChainLocker.LockAllSourceCaches(); 744 745 // If no one else uses the area's cache and it's an anonymous cache, we can 746 // resize or split it, too. 747 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 748 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 749 750 const addr_t oldSize = area->Size(); 751 752 // Cut the end only? 753 if (offset > 0 && size == area->Size() - offset) { 754 status_t error = addressSpace->ShrinkAreaTail(area, offset, 755 allocationFlags); 756 if (error != B_OK) 757 return error; 758 759 if (area->page_protections != NULL) { 760 uint8* newProtections = realloc_page_protections( 761 area->page_protections, area->Size(), allocationFlags); 762 763 if (newProtections == NULL) { 764 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 765 return B_NO_MEMORY; 766 } 767 768 area->page_protections = newProtections; 769 } 770 771 // unmap pages 772 unmap_pages(area, address, size); 773 774 if (onlyCacheUser) { 775 // Since VMCache::Resize() can temporarily drop the lock, we must 776 // unlock all lower caches to prevent locking order inversion. 777 cacheChainLocker.Unlock(cache); 778 cache->Resize(cache->virtual_base + offset, priority); 779 cache->ReleaseRefAndUnlock(); 780 } 781 782 return B_OK; 783 } 784 785 // Cut the beginning only? 786 if (area->Base() == address) { 787 uint8* newProtections = NULL; 788 if (area->page_protections != NULL) { 789 // Allocate all memory before shifting as the shift might lose some 790 // bits. 791 newProtections = realloc_page_protections(NULL, area->Size(), 792 allocationFlags); 793 794 if (newProtections == NULL) 795 return B_NO_MEMORY; 796 } 797 798 // resize the area 799 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 800 allocationFlags); 801 if (error != B_OK) { 802 if (newProtections != NULL) 803 free_etc(newProtections, allocationFlags); 804 return error; 805 } 806 807 if (area->page_protections != NULL) { 808 size_t oldBytes = area_page_protections_size(oldSize); 809 ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE; 810 bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4)); 811 812 size_t bytes = area_page_protections_size(area->Size()); 813 memcpy(newProtections, area->page_protections, bytes); 814 free_etc(area->page_protections, allocationFlags); 815 area->page_protections = newProtections; 816 } 817 818 // unmap pages 819 unmap_pages(area, address, size); 820 821 if (onlyCacheUser) { 822 // Since VMCache::Rebase() can temporarily drop the lock, we must 823 // unlock all lower caches to prevent locking order inversion. 824 cacheChainLocker.Unlock(cache); 825 cache->Rebase(cache->virtual_base + size, priority); 826 cache->ReleaseRefAndUnlock(); 827 } 828 area->cache_offset += size; 829 830 return B_OK; 831 } 832 833 // The tough part -- cut a piece out of the middle of the area. 834 // We do that by shrinking the area to the begin section and creating a 835 // new area for the end section. 836 addr_t firstNewSize = offset; 837 addr_t secondBase = address + size; 838 addr_t secondSize = area->Size() - offset - size; 839 840 // unmap pages 841 unmap_pages(area, address, area->Size() - firstNewSize); 842 843 // resize the area 844 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 845 allocationFlags); 846 if (error != B_OK) 847 return error; 848 849 uint8* areaNewProtections = NULL; 850 uint8* secondAreaNewProtections = NULL; 851 852 // Try to allocate the new memory before making some hard to reverse 853 // changes. 854 if (area->page_protections != NULL) { 855 areaNewProtections = realloc_page_protections(NULL, area->Size(), 856 allocationFlags); 857 secondAreaNewProtections = realloc_page_protections(NULL, secondSize, 858 allocationFlags); 859 860 if (areaNewProtections == NULL || secondAreaNewProtections == NULL) { 861 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 862 free_etc(areaNewProtections, allocationFlags); 863 free_etc(secondAreaNewProtections, allocationFlags); 864 return B_NO_MEMORY; 865 } 866 } 867 868 virtual_address_restrictions addressRestrictions = {}; 869 addressRestrictions.address = (void*)secondBase; 870 addressRestrictions.address_specification = B_EXACT_ADDRESS; 871 VMArea* secondArea; 872 873 if (onlyCacheUser) { 874 // Create a new cache for the second area. 875 VMCache* secondCache; 876 error = VMCacheFactory::CreateAnonymousCache(secondCache, 877 area->protection & B_OVERCOMMITTING_AREA, 0, 0, 878 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 879 if (error != B_OK) { 880 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 881 free_etc(areaNewProtections, allocationFlags); 882 free_etc(secondAreaNewProtections, allocationFlags); 883 return error; 884 } 885 886 secondCache->Lock(); 887 secondCache->temporary = cache->temporary; 888 secondCache->virtual_base = area->cache_offset; 889 secondCache->virtual_end = area->cache_offset + secondSize; 890 891 // Transfer the concerned pages from the first cache. 892 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 893 error = secondCache->Adopt(cache, adoptOffset, secondSize, 894 area->cache_offset); 895 896 if (error == B_OK) { 897 // Since VMCache::Resize() can temporarily drop the lock, we must 898 // unlock all lower caches to prevent locking order inversion. 899 cacheChainLocker.Unlock(cache); 900 cache->Resize(cache->virtual_base + firstNewSize, priority); 901 // Don't unlock the cache yet because we might have to resize it 902 // back. 903 904 // Map the second area. 905 error = map_backing_store(addressSpace, secondCache, 906 area->cache_offset, area->name, secondSize, area->wiring, 907 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 908 &addressRestrictions, kernel, &secondArea, NULL); 909 } 910 911 if (error != B_OK) { 912 // Restore the original cache. 913 cache->Resize(cache->virtual_base + oldSize, priority); 914 915 // Move the pages back. 916 status_t readoptStatus = cache->Adopt(secondCache, 917 area->cache_offset, secondSize, adoptOffset); 918 if (readoptStatus != B_OK) { 919 // Some (swap) pages have not been moved back and will be lost 920 // once the second cache is deleted. 921 panic("failed to restore cache range: %s", 922 strerror(readoptStatus)); 923 924 // TODO: Handle out of memory cases by freeing memory and 925 // retrying. 926 } 927 928 cache->ReleaseRefAndUnlock(); 929 secondCache->ReleaseRefAndUnlock(); 930 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 931 free_etc(areaNewProtections, allocationFlags); 932 free_etc(secondAreaNewProtections, allocationFlags); 933 return error; 934 } 935 936 // Now we can unlock it. 937 cache->ReleaseRefAndUnlock(); 938 secondCache->Unlock(); 939 } else { 940 error = map_backing_store(addressSpace, cache, area->cache_offset 941 + (secondBase - area->Base()), 942 area->name, secondSize, area->wiring, area->protection, 943 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 944 &addressRestrictions, kernel, &secondArea, NULL); 945 if (error != B_OK) { 946 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 947 free_etc(areaNewProtections, allocationFlags); 948 free_etc(secondAreaNewProtections, allocationFlags); 949 return error; 950 } 951 // We need a cache reference for the new area. 952 cache->AcquireRefLocked(); 953 } 954 955 if (area->page_protections != NULL) { 956 // Copy the protection bits of the first area. 957 size_t areaBytes = area_page_protections_size(area->Size()); 958 memcpy(areaNewProtections, area->page_protections, areaBytes); 959 uint8* areaOldProtections = area->page_protections; 960 area->page_protections = areaNewProtections; 961 962 // Shift the protection bits of the second area to the start of 963 // the old array. 964 size_t oldBytes = area_page_protections_size(oldSize); 965 addr_t secondAreaOffset = secondBase - area->Base(); 966 ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE; 967 bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4)); 968 969 // Copy the protection bits of the second area. 970 size_t secondAreaBytes = area_page_protections_size(secondSize); 971 memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes); 972 secondArea->page_protections = secondAreaNewProtections; 973 974 // We don't need this anymore. 975 free_etc(areaOldProtections, allocationFlags); 976 977 // Set the correct page protections for the second area. 978 VMTranslationMap* map = addressSpace->TranslationMap(); 979 map->Lock(); 980 for (VMCachePagesTree::Iterator it 981 = secondArea->cache->pages.GetIterator(); 982 vm_page* page = it.Next();) { 983 if (is_page_in_area(secondArea, page)) { 984 addr_t address = virtual_page_address(secondArea, page); 985 uint32 pageProtection 986 = get_area_page_protection(secondArea, address); 987 map->ProtectPage(secondArea, address, pageProtection); 988 } 989 } 990 map->Unlock(); 991 } 992 993 if (_secondArea != NULL) 994 *_secondArea = secondArea; 995 996 return B_OK; 997 } 998 999 1000 /*! Deletes or cuts all areas in the given address range. 1001 The address space must be write-locked. 1002 The caller must ensure that no part of the given range is wired. 1003 */ 1004 static status_t 1005 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 1006 bool kernel) 1007 { 1008 size = PAGE_ALIGN(size); 1009 1010 // Check, whether the caller is allowed to modify the concerned areas. 1011 if (!kernel) { 1012 for (VMAddressSpace::AreaRangeIterator it 1013 = addressSpace->GetAreaRangeIterator(address, size); 1014 VMArea* area = it.Next();) { 1015 1016 if ((area->protection & B_KERNEL_AREA) != 0) { 1017 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 1018 "unmap range of kernel area %" B_PRId32 " (%s)\n", 1019 team_get_current_team_id(), area->id, area->name); 1020 return B_NOT_ALLOWED; 1021 } 1022 } 1023 } 1024 1025 for (VMAddressSpace::AreaRangeIterator it 1026 = addressSpace->GetAreaRangeIterator(address, size); 1027 VMArea* area = it.Next();) { 1028 1029 status_t error = cut_area(addressSpace, area, address, size, NULL, 1030 kernel); 1031 if (error != B_OK) 1032 return error; 1033 // Failing after already messing with areas is ugly, but we 1034 // can't do anything about it. 1035 } 1036 1037 return B_OK; 1038 } 1039 1040 1041 static status_t 1042 discard_area_range(VMArea* area, addr_t address, addr_t size) 1043 { 1044 addr_t offset; 1045 if (!intersect_area(area, address, size, offset)) 1046 return B_OK; 1047 1048 // If someone else uses the area's cache or it's not an anonymous cache, we 1049 // can't discard. 1050 VMCache* cache = vm_area_get_locked_cache(area); 1051 if (cache->areas != area || area->cache_next != NULL 1052 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 1053 return B_OK; 1054 } 1055 1056 VMCacheChainLocker cacheChainLocker(cache); 1057 cacheChainLocker.LockAllSourceCaches(); 1058 1059 unmap_pages(area, address, size); 1060 1061 // Since VMCache::Discard() can temporarily drop the lock, we must 1062 // unlock all lower caches to prevent locking order inversion. 1063 cacheChainLocker.Unlock(cache); 1064 cache->Discard(cache->virtual_base + offset, size); 1065 cache->ReleaseRefAndUnlock(); 1066 1067 return B_OK; 1068 } 1069 1070 1071 static status_t 1072 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 1073 bool kernel) 1074 { 1075 for (VMAddressSpace::AreaRangeIterator it 1076 = addressSpace->GetAreaRangeIterator(address, size); 1077 VMArea* area = it.Next();) { 1078 status_t error = discard_area_range(area, address, size); 1079 if (error != B_OK) 1080 return error; 1081 } 1082 1083 return B_OK; 1084 } 1085 1086 1087 /*! You need to hold the lock of the cache and the write lock of the address 1088 space when calling this function. 1089 Note, that in case of error your cache will be temporarily unlocked. 1090 If \a addressSpec is \c B_EXACT_ADDRESS and the 1091 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 1092 that no part of the specified address range (base \c *_virtualAddress, size 1093 \a size) is wired. The cache will also be temporarily unlocked. 1094 */ 1095 static status_t 1096 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 1097 const char* areaName, addr_t size, int wiring, int protection, 1098 int protectionMax, int mapping, 1099 uint32 flags, const virtual_address_restrictions* addressRestrictions, 1100 bool kernel, VMArea** _area, void** _virtualAddress) 1101 { 1102 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 1103 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 1104 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 1105 addressSpace, cache, addressRestrictions->address, offset, size, 1106 addressRestrictions->address_specification, wiring, protection, 1107 protectionMax, _area, areaName)); 1108 cache->AssertLocked(); 1109 1110 if (size == 0) { 1111 #if KDEBUG 1112 panic("map_backing_store(): called with size=0 for area '%s'!", 1113 areaName); 1114 #endif 1115 return B_BAD_VALUE; 1116 } 1117 if (offset < 0) 1118 return B_BAD_VALUE; 1119 1120 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 1121 | HEAP_DONT_LOCK_KERNEL_SPACE; 1122 int priority; 1123 if (addressSpace != VMAddressSpace::Kernel()) { 1124 priority = VM_PRIORITY_USER; 1125 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 1126 priority = VM_PRIORITY_VIP; 1127 allocationFlags |= HEAP_PRIORITY_VIP; 1128 } else 1129 priority = VM_PRIORITY_SYSTEM; 1130 1131 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 1132 allocationFlags); 1133 if (mapping != REGION_PRIVATE_MAP) 1134 area->protection_max = protectionMax & B_USER_PROTECTION; 1135 if (area == NULL) 1136 return B_NO_MEMORY; 1137 1138 status_t status; 1139 1140 // if this is a private map, we need to create a new cache 1141 // to handle the private copies of pages as they are written to 1142 VMCache* sourceCache = cache; 1143 if (mapping == REGION_PRIVATE_MAP) { 1144 VMCache* newCache; 1145 1146 // create an anonymous cache 1147 status = VMCacheFactory::CreateAnonymousCache(newCache, 1148 (protection & B_STACK_AREA) != 0 1149 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 1150 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 1151 if (status != B_OK) 1152 goto err1; 1153 1154 newCache->Lock(); 1155 newCache->temporary = 1; 1156 newCache->virtual_base = offset; 1157 newCache->virtual_end = offset + size; 1158 1159 cache->AddConsumer(newCache); 1160 1161 cache = newCache; 1162 } 1163 1164 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 1165 status = cache->SetMinimalCommitment(size, priority); 1166 if (status != B_OK) 1167 goto err2; 1168 } 1169 1170 // check to see if this address space has entered DELETE state 1171 if (addressSpace->IsBeingDeleted()) { 1172 // okay, someone is trying to delete this address space now, so we can't 1173 // insert the area, so back out 1174 status = B_BAD_TEAM_ID; 1175 goto err2; 1176 } 1177 1178 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1179 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1180 // temporarily unlock the current cache since it might be mapped to 1181 // some existing area, and unmap_address_range also needs to lock that 1182 // cache to delete the area. 1183 cache->Unlock(); 1184 status = unmap_address_range(addressSpace, 1185 (addr_t)addressRestrictions->address, size, kernel); 1186 cache->Lock(); 1187 if (status != B_OK) 1188 goto err2; 1189 } 1190 1191 status = addressSpace->InsertArea(area, size, addressRestrictions, 1192 allocationFlags, _virtualAddress); 1193 if (status == B_NO_MEMORY 1194 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1195 // Due to how many locks are held, we cannot wait here for space to be 1196 // freed up, but we can at least notify the low_resource handler. 1197 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1198 } 1199 if (status != B_OK) 1200 goto err2; 1201 1202 // attach the cache to the area 1203 area->cache = cache; 1204 area->cache_offset = offset; 1205 1206 // point the cache back to the area 1207 cache->InsertAreaLocked(area); 1208 if (mapping == REGION_PRIVATE_MAP) 1209 cache->Unlock(); 1210 1211 // insert the area in the global areas map 1212 status = VMAreas::Insert(area); 1213 if (status != B_OK) 1214 goto err3; 1215 1216 // grab a ref to the address space (the area holds this) 1217 addressSpace->Get(); 1218 1219 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1220 // cache, sourceCache, areaName, area); 1221 1222 *_area = area; 1223 return B_OK; 1224 1225 err3: 1226 cache->Lock(); 1227 cache->RemoveArea(area); 1228 area->cache = NULL; 1229 err2: 1230 if (mapping == REGION_PRIVATE_MAP) { 1231 // We created this cache, so we must delete it again. Note, that we 1232 // need to temporarily unlock the source cache or we'll otherwise 1233 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1234 sourceCache->Unlock(); 1235 cache->ReleaseRefAndUnlock(); 1236 sourceCache->Lock(); 1237 } 1238 err1: 1239 addressSpace->DeleteArea(area, allocationFlags); 1240 return status; 1241 } 1242 1243 1244 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1245 locker1, locker2). 1246 */ 1247 template<typename LockerType1, typename LockerType2> 1248 static inline bool 1249 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1250 { 1251 area->cache->AssertLocked(); 1252 1253 VMAreaUnwiredWaiter waiter; 1254 if (!area->AddWaiterIfWired(&waiter)) 1255 return false; 1256 1257 // unlock everything and wait 1258 if (locker1 != NULL) 1259 locker1->Unlock(); 1260 if (locker2 != NULL) 1261 locker2->Unlock(); 1262 1263 waiter.waitEntry.Wait(); 1264 1265 return true; 1266 } 1267 1268 1269 /*! Checks whether the given area has any wired ranges intersecting with the 1270 specified range and waits, if so. 1271 1272 When it has to wait, the function calls \c Unlock() on both \a locker1 1273 and \a locker2, if given. 1274 The area's top cache must be locked and must be unlocked as a side effect 1275 of calling \c Unlock() on either \a locker1 or \a locker2. 1276 1277 If the function does not have to wait it does not modify or unlock any 1278 object. 1279 1280 \param area The area to be checked. 1281 \param base The base address of the range to check. 1282 \param size The size of the address range to check. 1283 \param locker1 An object to be unlocked when before starting to wait (may 1284 be \c NULL). 1285 \param locker2 An object to be unlocked when before starting to wait (may 1286 be \c NULL). 1287 \return \c true, if the function had to wait, \c false otherwise. 1288 */ 1289 template<typename LockerType1, typename LockerType2> 1290 static inline bool 1291 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1292 LockerType1* locker1, LockerType2* locker2) 1293 { 1294 area->cache->AssertLocked(); 1295 1296 VMAreaUnwiredWaiter waiter; 1297 if (!area->AddWaiterIfWired(&waiter, base, size)) 1298 return false; 1299 1300 // unlock everything and wait 1301 if (locker1 != NULL) 1302 locker1->Unlock(); 1303 if (locker2 != NULL) 1304 locker2->Unlock(); 1305 1306 waiter.waitEntry.Wait(); 1307 1308 return true; 1309 } 1310 1311 1312 /*! Checks whether the given address space has any wired ranges intersecting 1313 with the specified range and waits, if so. 1314 1315 Similar to wait_if_area_range_is_wired(), with the following differences: 1316 - All areas intersecting with the range are checked (respectively all until 1317 one is found that contains a wired range intersecting with the given 1318 range). 1319 - The given address space must at least be read-locked and must be unlocked 1320 when \c Unlock() is called on \a locker. 1321 - None of the areas' caches are allowed to be locked. 1322 */ 1323 template<typename LockerType> 1324 static inline bool 1325 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1326 size_t size, LockerType* locker) 1327 { 1328 for (VMAddressSpace::AreaRangeIterator it 1329 = addressSpace->GetAreaRangeIterator(base, size); 1330 VMArea* area = it.Next();) { 1331 1332 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1333 1334 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1335 return true; 1336 } 1337 1338 return false; 1339 } 1340 1341 1342 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1343 It must be called in a situation where the kernel address space may be 1344 locked. 1345 */ 1346 status_t 1347 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1348 { 1349 AddressSpaceReadLocker locker; 1350 VMArea* area; 1351 status_t status = locker.SetFromArea(id, area); 1352 if (status != B_OK) 1353 return status; 1354 1355 if (area->page_protections == NULL) { 1356 status = allocate_area_page_protections(area); 1357 if (status != B_OK) 1358 return status; 1359 } 1360 1361 *cookie = (void*)area; 1362 return B_OK; 1363 } 1364 1365 1366 /*! This is a debug helper function that can only be used with very specific 1367 use cases. 1368 Sets protection for the given address range to the protection specified. 1369 If \a protection is 0 then the involved pages will be marked non-present 1370 in the translation map to cause a fault on access. The pages aren't 1371 actually unmapped however so that they can be marked present again with 1372 additional calls to this function. For this to work the area must be 1373 fully locked in memory so that the pages aren't otherwise touched. 1374 This function does not lock the kernel address space and needs to be 1375 supplied with a \a cookie retrieved from a successful call to 1376 vm_prepare_kernel_area_debug_protection(). 1377 */ 1378 status_t 1379 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1380 uint32 protection) 1381 { 1382 // check address range 1383 addr_t address = (addr_t)_address; 1384 size = PAGE_ALIGN(size); 1385 1386 if ((address % B_PAGE_SIZE) != 0 1387 || (addr_t)address + size < (addr_t)address 1388 || !IS_KERNEL_ADDRESS(address) 1389 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1390 return B_BAD_VALUE; 1391 } 1392 1393 // Translate the kernel protection to user protection as we only store that. 1394 if ((protection & B_KERNEL_READ_AREA) != 0) 1395 protection |= B_READ_AREA; 1396 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1397 protection |= B_WRITE_AREA; 1398 1399 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1400 VMTranslationMap* map = addressSpace->TranslationMap(); 1401 VMArea* area = (VMArea*)cookie; 1402 1403 addr_t offset = address - area->Base(); 1404 if (area->Size() - offset < size) { 1405 panic("protect range not fully within supplied area"); 1406 return B_BAD_VALUE; 1407 } 1408 1409 if (area->page_protections == NULL) { 1410 panic("area has no page protections"); 1411 return B_BAD_VALUE; 1412 } 1413 1414 // Invalidate the mapping entries so any access to them will fault or 1415 // restore the mapping entries unchanged so that lookup will success again. 1416 map->Lock(); 1417 map->DebugMarkRangePresent(address, address + size, protection != 0); 1418 map->Unlock(); 1419 1420 // And set the proper page protections so that the fault case will actually 1421 // fail and not simply try to map a new page. 1422 for (addr_t pageAddress = address; pageAddress < address + size; 1423 pageAddress += B_PAGE_SIZE) { 1424 set_area_page_protection(area, pageAddress, protection); 1425 } 1426 1427 return B_OK; 1428 } 1429 1430 1431 status_t 1432 vm_block_address_range(const char* name, void* address, addr_t size) 1433 { 1434 if (!arch_vm_supports_protection(0)) 1435 return B_NOT_SUPPORTED; 1436 1437 AddressSpaceWriteLocker locker; 1438 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1439 if (status != B_OK) 1440 return status; 1441 1442 VMAddressSpace* addressSpace = locker.AddressSpace(); 1443 1444 // create an anonymous cache 1445 VMCache* cache; 1446 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1447 VM_PRIORITY_SYSTEM); 1448 if (status != B_OK) 1449 return status; 1450 1451 cache->temporary = 1; 1452 cache->virtual_end = size; 1453 cache->Lock(); 1454 1455 VMArea* area; 1456 virtual_address_restrictions addressRestrictions = {}; 1457 addressRestrictions.address = address; 1458 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1459 status = map_backing_store(addressSpace, cache, 0, name, size, 1460 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1461 true, &area, NULL); 1462 if (status != B_OK) { 1463 cache->ReleaseRefAndUnlock(); 1464 return status; 1465 } 1466 1467 cache->Unlock(); 1468 area->cache_type = CACHE_TYPE_RAM; 1469 return area->id; 1470 } 1471 1472 1473 status_t 1474 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1475 { 1476 AddressSpaceWriteLocker locker(team); 1477 if (!locker.IsLocked()) 1478 return B_BAD_TEAM_ID; 1479 1480 VMAddressSpace* addressSpace = locker.AddressSpace(); 1481 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1482 addressSpace == VMAddressSpace::Kernel() 1483 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1484 } 1485 1486 1487 status_t 1488 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1489 addr_t size, uint32 flags) 1490 { 1491 if (size == 0) 1492 return B_BAD_VALUE; 1493 1494 AddressSpaceWriteLocker locker(team); 1495 if (!locker.IsLocked()) 1496 return B_BAD_TEAM_ID; 1497 1498 virtual_address_restrictions addressRestrictions = {}; 1499 addressRestrictions.address = *_address; 1500 addressRestrictions.address_specification = addressSpec; 1501 VMAddressSpace* addressSpace = locker.AddressSpace(); 1502 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1503 addressSpace == VMAddressSpace::Kernel() 1504 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1505 _address); 1506 } 1507 1508 1509 area_id 1510 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1511 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1512 const virtual_address_restrictions* virtualAddressRestrictions, 1513 const physical_address_restrictions* physicalAddressRestrictions, 1514 bool kernel, void** _address) 1515 { 1516 VMArea* area; 1517 VMCache* cache; 1518 vm_page* page = NULL; 1519 bool isStack = (protection & B_STACK_AREA) != 0; 1520 page_num_t guardPages; 1521 bool canOvercommit = false; 1522 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1523 ? VM_PAGE_ALLOC_CLEAR : 0; 1524 1525 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1526 team, name, size)); 1527 1528 size = PAGE_ALIGN(size); 1529 guardSize = PAGE_ALIGN(guardSize); 1530 guardPages = guardSize / B_PAGE_SIZE; 1531 1532 if (size == 0 || size < guardSize) 1533 return B_BAD_VALUE; 1534 if (!arch_vm_supports_protection(protection)) 1535 return B_NOT_SUPPORTED; 1536 1537 if (team == B_CURRENT_TEAM) 1538 team = VMAddressSpace::CurrentID(); 1539 if (team < 0) 1540 return B_BAD_TEAM_ID; 1541 1542 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1543 canOvercommit = true; 1544 1545 #ifdef DEBUG_KERNEL_STACKS 1546 if ((protection & B_KERNEL_STACK_AREA) != 0) 1547 isStack = true; 1548 #endif 1549 1550 // check parameters 1551 switch (virtualAddressRestrictions->address_specification) { 1552 case B_ANY_ADDRESS: 1553 case B_EXACT_ADDRESS: 1554 case B_BASE_ADDRESS: 1555 case B_ANY_KERNEL_ADDRESS: 1556 case B_ANY_KERNEL_BLOCK_ADDRESS: 1557 case B_RANDOMIZED_ANY_ADDRESS: 1558 case B_RANDOMIZED_BASE_ADDRESS: 1559 break; 1560 1561 default: 1562 return B_BAD_VALUE; 1563 } 1564 1565 // If low or high physical address restrictions are given, we force 1566 // B_CONTIGUOUS wiring, since only then we'll use 1567 // vm_page_allocate_page_run() which deals with those restrictions. 1568 if (physicalAddressRestrictions->low_address != 0 1569 || physicalAddressRestrictions->high_address != 0) { 1570 wiring = B_CONTIGUOUS; 1571 } 1572 1573 physical_address_restrictions stackPhysicalRestrictions; 1574 bool doReserveMemory = false; 1575 switch (wiring) { 1576 case B_NO_LOCK: 1577 break; 1578 case B_FULL_LOCK: 1579 case B_LAZY_LOCK: 1580 case B_CONTIGUOUS: 1581 doReserveMemory = true; 1582 break; 1583 case B_ALREADY_WIRED: 1584 break; 1585 case B_LOMEM: 1586 stackPhysicalRestrictions = *physicalAddressRestrictions; 1587 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1588 physicalAddressRestrictions = &stackPhysicalRestrictions; 1589 wiring = B_CONTIGUOUS; 1590 doReserveMemory = true; 1591 break; 1592 case B_32_BIT_FULL_LOCK: 1593 if (B_HAIKU_PHYSICAL_BITS <= 32 1594 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1595 wiring = B_FULL_LOCK; 1596 doReserveMemory = true; 1597 break; 1598 } 1599 // TODO: We don't really support this mode efficiently. Just fall 1600 // through for now ... 1601 case B_32_BIT_CONTIGUOUS: 1602 #if B_HAIKU_PHYSICAL_BITS > 32 1603 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1604 stackPhysicalRestrictions = *physicalAddressRestrictions; 1605 stackPhysicalRestrictions.high_address 1606 = (phys_addr_t)1 << 32; 1607 physicalAddressRestrictions = &stackPhysicalRestrictions; 1608 } 1609 #endif 1610 wiring = B_CONTIGUOUS; 1611 doReserveMemory = true; 1612 break; 1613 default: 1614 return B_BAD_VALUE; 1615 } 1616 1617 // Optimization: For a single-page contiguous allocation without low/high 1618 // memory restriction B_FULL_LOCK wiring suffices. 1619 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1620 && physicalAddressRestrictions->low_address == 0 1621 && physicalAddressRestrictions->high_address == 0) { 1622 wiring = B_FULL_LOCK; 1623 } 1624 1625 // For full lock or contiguous areas we're also going to map the pages and 1626 // thus need to reserve pages for the mapping backend upfront. 1627 addr_t reservedMapPages = 0; 1628 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1629 AddressSpaceWriteLocker locker; 1630 status_t status = locker.SetTo(team); 1631 if (status != B_OK) 1632 return status; 1633 1634 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1635 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1636 } 1637 1638 int priority; 1639 if (team != VMAddressSpace::KernelID()) 1640 priority = VM_PRIORITY_USER; 1641 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1642 priority = VM_PRIORITY_VIP; 1643 else 1644 priority = VM_PRIORITY_SYSTEM; 1645 1646 // Reserve memory before acquiring the address space lock. This reduces the 1647 // chances of failure, since while holding the write lock to the address 1648 // space (if it is the kernel address space that is), the low memory handler 1649 // won't be able to free anything for us. 1650 addr_t reservedMemory = 0; 1651 if (doReserveMemory) { 1652 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1653 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1654 return B_NO_MEMORY; 1655 reservedMemory = size; 1656 // TODO: We don't reserve the memory for the pages for the page 1657 // directories/tables. We actually need to do since we currently don't 1658 // reclaim them (and probably can't reclaim all of them anyway). Thus 1659 // there are actually less physical pages than there should be, which 1660 // can get the VM into trouble in low memory situations. 1661 } 1662 1663 AddressSpaceWriteLocker locker; 1664 VMAddressSpace* addressSpace; 1665 status_t status; 1666 1667 // For full lock areas reserve the pages before locking the address 1668 // space. E.g. block caches can't release their memory while we hold the 1669 // address space lock. 1670 page_num_t reservedPages = reservedMapPages; 1671 if (wiring == B_FULL_LOCK) 1672 reservedPages += size / B_PAGE_SIZE; 1673 1674 vm_page_reservation reservation; 1675 if (reservedPages > 0) { 1676 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1677 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1678 priority)) { 1679 reservedPages = 0; 1680 status = B_WOULD_BLOCK; 1681 goto err0; 1682 } 1683 } else 1684 vm_page_reserve_pages(&reservation, reservedPages, priority); 1685 } 1686 1687 if (wiring == B_CONTIGUOUS) { 1688 // we try to allocate the page run here upfront as this may easily 1689 // fail for obvious reasons 1690 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1691 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1692 if (page == NULL) { 1693 status = B_NO_MEMORY; 1694 goto err0; 1695 } 1696 } 1697 1698 // Lock the address space and, if B_EXACT_ADDRESS and 1699 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1700 // is not wired. 1701 do { 1702 status = locker.SetTo(team); 1703 if (status != B_OK) 1704 goto err1; 1705 1706 addressSpace = locker.AddressSpace(); 1707 } while (virtualAddressRestrictions->address_specification 1708 == B_EXACT_ADDRESS 1709 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1710 && wait_if_address_range_is_wired(addressSpace, 1711 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1712 1713 // create an anonymous cache 1714 // if it's a stack, make sure that two pages are available at least 1715 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1716 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1717 wiring == B_NO_LOCK, priority); 1718 if (status != B_OK) 1719 goto err1; 1720 1721 cache->temporary = 1; 1722 cache->virtual_end = size; 1723 cache->committed_size = reservedMemory; 1724 // TODO: This should be done via a method. 1725 reservedMemory = 0; 1726 1727 cache->Lock(); 1728 1729 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1730 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1731 virtualAddressRestrictions, kernel, &area, _address); 1732 1733 if (status != B_OK) { 1734 cache->ReleaseRefAndUnlock(); 1735 goto err1; 1736 } 1737 1738 locker.DegradeToReadLock(); 1739 1740 switch (wiring) { 1741 case B_NO_LOCK: 1742 case B_LAZY_LOCK: 1743 // do nothing - the pages are mapped in as needed 1744 break; 1745 1746 case B_FULL_LOCK: 1747 { 1748 // Allocate and map all pages for this area 1749 1750 off_t offset = 0; 1751 for (addr_t address = area->Base(); 1752 address < area->Base() + (area->Size() - 1); 1753 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1754 #ifdef DEBUG_KERNEL_STACKS 1755 # ifdef STACK_GROWS_DOWNWARDS 1756 if (isStack && address < area->Base() 1757 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1758 # else 1759 if (isStack && address >= area->Base() + area->Size() 1760 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1761 # endif 1762 continue; 1763 #endif 1764 vm_page* page = vm_page_allocate_page(&reservation, 1765 PAGE_STATE_WIRED | pageAllocFlags); 1766 cache->InsertPage(page, offset); 1767 map_page(area, page, address, protection, &reservation); 1768 1769 DEBUG_PAGE_ACCESS_END(page); 1770 } 1771 1772 break; 1773 } 1774 1775 case B_ALREADY_WIRED: 1776 { 1777 // The pages should already be mapped. This is only really useful 1778 // during boot time. Find the appropriate vm_page objects and stick 1779 // them in the cache object. 1780 VMTranslationMap* map = addressSpace->TranslationMap(); 1781 off_t offset = 0; 1782 1783 if (!gKernelStartup) 1784 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1785 1786 map->Lock(); 1787 1788 for (addr_t virtualAddress = area->Base(); 1789 virtualAddress < area->Base() + (area->Size() - 1); 1790 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1791 phys_addr_t physicalAddress; 1792 uint32 flags; 1793 status = map->Query(virtualAddress, &physicalAddress, &flags); 1794 if (status < B_OK) { 1795 panic("looking up mapping failed for va 0x%lx\n", 1796 virtualAddress); 1797 } 1798 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1799 if (page == NULL) { 1800 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1801 "\n", physicalAddress); 1802 } 1803 1804 DEBUG_PAGE_ACCESS_START(page); 1805 1806 cache->InsertPage(page, offset); 1807 increment_page_wired_count(page); 1808 vm_page_set_state(page, PAGE_STATE_WIRED); 1809 page->busy = false; 1810 1811 DEBUG_PAGE_ACCESS_END(page); 1812 } 1813 1814 map->Unlock(); 1815 break; 1816 } 1817 1818 case B_CONTIGUOUS: 1819 { 1820 // We have already allocated our continuous pages run, so we can now 1821 // just map them in the address space 1822 VMTranslationMap* map = addressSpace->TranslationMap(); 1823 phys_addr_t physicalAddress 1824 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1825 addr_t virtualAddress = area->Base(); 1826 off_t offset = 0; 1827 1828 map->Lock(); 1829 1830 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1831 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1832 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1833 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1834 if (page == NULL) 1835 panic("couldn't lookup physical page just allocated\n"); 1836 1837 status = map->Map(virtualAddress, physicalAddress, protection, 1838 area->MemoryType(), &reservation); 1839 if (status < B_OK) 1840 panic("couldn't map physical page in page run\n"); 1841 1842 cache->InsertPage(page, offset); 1843 increment_page_wired_count(page); 1844 1845 DEBUG_PAGE_ACCESS_END(page); 1846 } 1847 1848 map->Unlock(); 1849 break; 1850 } 1851 1852 default: 1853 break; 1854 } 1855 1856 cache->Unlock(); 1857 1858 if (reservedPages > 0) 1859 vm_page_unreserve_pages(&reservation); 1860 1861 TRACE(("vm_create_anonymous_area: done\n")); 1862 1863 area->cache_type = CACHE_TYPE_RAM; 1864 return area->id; 1865 1866 err1: 1867 if (wiring == B_CONTIGUOUS) { 1868 // we had reserved the area space upfront... 1869 phys_addr_t pageNumber = page->physical_page_number; 1870 int32 i; 1871 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1872 page = vm_lookup_page(pageNumber); 1873 if (page == NULL) 1874 panic("couldn't lookup physical page just allocated\n"); 1875 1876 vm_page_set_state(page, PAGE_STATE_FREE); 1877 } 1878 } 1879 1880 err0: 1881 if (reservedPages > 0) 1882 vm_page_unreserve_pages(&reservation); 1883 if (reservedMemory > 0) 1884 vm_unreserve_memory(reservedMemory); 1885 1886 return status; 1887 } 1888 1889 1890 area_id 1891 vm_map_physical_memory(team_id team, const char* name, void** _address, 1892 uint32 addressSpec, addr_t size, uint32 protection, 1893 phys_addr_t physicalAddress, bool alreadyWired) 1894 { 1895 VMArea* area; 1896 VMCache* cache; 1897 addr_t mapOffset; 1898 1899 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1900 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1901 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1902 addressSpec, size, protection, physicalAddress)); 1903 1904 if (!arch_vm_supports_protection(protection)) 1905 return B_NOT_SUPPORTED; 1906 1907 AddressSpaceWriteLocker locker(team); 1908 if (!locker.IsLocked()) 1909 return B_BAD_TEAM_ID; 1910 1911 // if the physical address is somewhat inside a page, 1912 // move the actual area down to align on a page boundary 1913 mapOffset = physicalAddress % B_PAGE_SIZE; 1914 size += mapOffset; 1915 physicalAddress -= mapOffset; 1916 1917 size = PAGE_ALIGN(size); 1918 1919 // create a device cache 1920 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1921 if (status != B_OK) 1922 return status; 1923 1924 cache->virtual_end = size; 1925 1926 cache->Lock(); 1927 1928 virtual_address_restrictions addressRestrictions = {}; 1929 addressRestrictions.address = *_address; 1930 addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK; 1931 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1932 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, CREATE_AREA_DONT_COMMIT_MEMORY, 1933 &addressRestrictions, true, &area, _address); 1934 1935 if (status < B_OK) 1936 cache->ReleaseRefLocked(); 1937 1938 cache->Unlock(); 1939 1940 if (status == B_OK) { 1941 // Set requested memory type -- default to uncached, but allow 1942 // that to be overridden by ranges that may already exist. 1943 uint32 memoryType = addressSpec & B_MEMORY_TYPE_MASK; 1944 const bool weak = (memoryType == 0); 1945 if (weak) 1946 memoryType = B_UNCACHED_MEMORY; 1947 1948 status = arch_vm_set_memory_type(area, physicalAddress, memoryType, 1949 weak ? &memoryType : NULL); 1950 1951 area->SetMemoryType(memoryType); 1952 1953 if (status != B_OK) 1954 delete_area(locker.AddressSpace(), area, false); 1955 } 1956 1957 if (status != B_OK) 1958 return status; 1959 1960 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1961 1962 if (alreadyWired) { 1963 // The area is already mapped, but possibly not with the right 1964 // memory type. 1965 map->Lock(); 1966 map->ProtectArea(area, area->protection); 1967 map->Unlock(); 1968 } else { 1969 // Map the area completely. 1970 1971 // reserve pages needed for the mapping 1972 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1973 area->Base() + (size - 1)); 1974 vm_page_reservation reservation; 1975 vm_page_reserve_pages(&reservation, reservePages, 1976 team == VMAddressSpace::KernelID() 1977 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1978 1979 map->Lock(); 1980 1981 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1982 map->Map(area->Base() + offset, physicalAddress + offset, 1983 protection, area->MemoryType(), &reservation); 1984 } 1985 1986 map->Unlock(); 1987 1988 vm_page_unreserve_pages(&reservation); 1989 } 1990 1991 // modify the pointer returned to be offset back into the new area 1992 // the same way the physical address in was offset 1993 *_address = (void*)((addr_t)*_address + mapOffset); 1994 1995 area->cache_type = CACHE_TYPE_DEVICE; 1996 return area->id; 1997 } 1998 1999 2000 /*! Don't use! 2001 TODO: This function was introduced to map physical page vecs to 2002 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 2003 use a device cache and does not track vm_page::wired_count! 2004 */ 2005 area_id 2006 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 2007 uint32 addressSpec, addr_t* _size, uint32 protection, 2008 struct generic_io_vec* vecs, uint32 vecCount) 2009 { 2010 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 2011 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 2012 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 2013 addressSpec, _size, protection, vecs, vecCount)); 2014 2015 if (!arch_vm_supports_protection(protection) 2016 || (addressSpec & B_MEMORY_TYPE_MASK) != 0) { 2017 return B_NOT_SUPPORTED; 2018 } 2019 2020 AddressSpaceWriteLocker locker(team); 2021 if (!locker.IsLocked()) 2022 return B_BAD_TEAM_ID; 2023 2024 if (vecCount == 0) 2025 return B_BAD_VALUE; 2026 2027 addr_t size = 0; 2028 for (uint32 i = 0; i < vecCount; i++) { 2029 if (vecs[i].base % B_PAGE_SIZE != 0 2030 || vecs[i].length % B_PAGE_SIZE != 0) { 2031 return B_BAD_VALUE; 2032 } 2033 2034 size += vecs[i].length; 2035 } 2036 2037 // create a device cache 2038 VMCache* cache; 2039 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 2040 if (result != B_OK) 2041 return result; 2042 2043 cache->virtual_end = size; 2044 2045 cache->Lock(); 2046 2047 VMArea* area; 2048 virtual_address_restrictions addressRestrictions = {}; 2049 addressRestrictions.address = *_address; 2050 addressRestrictions.address_specification = addressSpec & ~B_MEMORY_TYPE_MASK; 2051 result = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 2052 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, CREATE_AREA_DONT_COMMIT_MEMORY, 2053 &addressRestrictions, true, &area, _address); 2054 2055 if (result != B_OK) 2056 cache->ReleaseRefLocked(); 2057 2058 cache->Unlock(); 2059 2060 if (result != B_OK) 2061 return result; 2062 2063 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2064 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 2065 area->Base() + (size - 1)); 2066 2067 vm_page_reservation reservation; 2068 vm_page_reserve_pages(&reservation, reservePages, 2069 team == VMAddressSpace::KernelID() 2070 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2071 map->Lock(); 2072 2073 uint32 vecIndex = 0; 2074 size_t vecOffset = 0; 2075 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2076 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 2077 vecOffset = 0; 2078 vecIndex++; 2079 } 2080 2081 if (vecIndex >= vecCount) 2082 break; 2083 2084 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 2085 protection, area->MemoryType(), &reservation); 2086 2087 vecOffset += B_PAGE_SIZE; 2088 } 2089 2090 map->Unlock(); 2091 vm_page_unreserve_pages(&reservation); 2092 2093 if (_size != NULL) 2094 *_size = size; 2095 2096 area->cache_type = CACHE_TYPE_DEVICE; 2097 return area->id; 2098 } 2099 2100 2101 area_id 2102 vm_create_null_area(team_id team, const char* name, void** address, 2103 uint32 addressSpec, addr_t size, uint32 flags) 2104 { 2105 size = PAGE_ALIGN(size); 2106 2107 // Lock the address space and, if B_EXACT_ADDRESS and 2108 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 2109 // is not wired. 2110 AddressSpaceWriteLocker locker; 2111 do { 2112 if (locker.SetTo(team) != B_OK) 2113 return B_BAD_TEAM_ID; 2114 } while (addressSpec == B_EXACT_ADDRESS 2115 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 2116 && wait_if_address_range_is_wired(locker.AddressSpace(), 2117 (addr_t)*address, size, &locker)); 2118 2119 // create a null cache 2120 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 2121 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 2122 VMCache* cache; 2123 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 2124 if (status != B_OK) 2125 return status; 2126 2127 cache->temporary = 1; 2128 cache->virtual_end = size; 2129 2130 cache->Lock(); 2131 2132 VMArea* area; 2133 virtual_address_restrictions addressRestrictions = {}; 2134 addressRestrictions.address = *address; 2135 addressRestrictions.address_specification = addressSpec; 2136 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 2137 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 2138 REGION_NO_PRIVATE_MAP, flags | CREATE_AREA_DONT_COMMIT_MEMORY, 2139 &addressRestrictions, true, &area, address); 2140 2141 if (status < B_OK) { 2142 cache->ReleaseRefAndUnlock(); 2143 return status; 2144 } 2145 2146 cache->Unlock(); 2147 2148 area->cache_type = CACHE_TYPE_NULL; 2149 return area->id; 2150 } 2151 2152 2153 /*! Creates the vnode cache for the specified \a vnode. 2154 The vnode has to be marked busy when calling this function. 2155 */ 2156 status_t 2157 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2158 { 2159 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2160 } 2161 2162 2163 /*! \a cache must be locked. The area's address space must be read-locked. 2164 */ 2165 static void 2166 pre_map_area_pages(VMArea* area, VMCache* cache, 2167 vm_page_reservation* reservation, int32 maxCount) 2168 { 2169 addr_t baseAddress = area->Base(); 2170 addr_t cacheOffset = area->cache_offset; 2171 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2172 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 2173 2174 VMCachePagesTree::Iterator it = cache->pages.GetIterator(firstPage, true, true); 2175 vm_page* page; 2176 while ((page = it.Next()) != NULL && maxCount > 0) { 2177 if (page->cache_offset >= endPage) 2178 break; 2179 2180 // skip busy and inactive pages 2181 if (page->busy || (page->usage_count == 0 && !page->accessed)) 2182 continue; 2183 2184 DEBUG_PAGE_ACCESS_START(page); 2185 map_page(area, page, 2186 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2187 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2188 maxCount--; 2189 DEBUG_PAGE_ACCESS_END(page); 2190 } 2191 } 2192 2193 2194 /*! Will map the file specified by \a fd to an area in memory. 2195 The file will be mirrored beginning at the specified \a offset. The 2196 \a offset and \a size arguments have to be page aligned. 2197 */ 2198 static area_id 2199 _vm_map_file(team_id team, const char* name, void** _address, 2200 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2201 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2202 { 2203 // TODO: for binary files, we want to make sure that they get the 2204 // copy of a file at a given time, ie. later changes should not 2205 // make it into the mapped copy -- this will need quite some changes 2206 // to be done in a nice way 2207 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2208 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2209 2210 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2211 size = PAGE_ALIGN(size); 2212 2213 if (mapping == REGION_NO_PRIVATE_MAP) 2214 protection |= B_SHARED_AREA; 2215 if (addressSpec != B_EXACT_ADDRESS) 2216 unmapAddressRange = false; 2217 2218 uint32 mappingFlags = 0; 2219 if (unmapAddressRange) 2220 mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE; 2221 2222 if (fd < 0) { 2223 virtual_address_restrictions virtualRestrictions = {}; 2224 virtualRestrictions.address = *_address; 2225 virtualRestrictions.address_specification = addressSpec; 2226 physical_address_restrictions physicalRestrictions = {}; 2227 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2228 mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2229 _address); 2230 } 2231 2232 // get the open flags of the FD 2233 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2234 if (descriptor == NULL) 2235 return EBADF; 2236 int32 openMode = descriptor->open_mode; 2237 put_fd(descriptor); 2238 2239 // The FD must open for reading at any rate. For shared mapping with write 2240 // access, additionally the FD must be open for writing. 2241 if ((openMode & O_ACCMODE) == O_WRONLY 2242 || (mapping == REGION_NO_PRIVATE_MAP 2243 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2244 && (openMode & O_ACCMODE) == O_RDONLY)) { 2245 return EACCES; 2246 } 2247 2248 uint32 protectionMax = 0; 2249 if (mapping == REGION_NO_PRIVATE_MAP) { 2250 if ((openMode & O_ACCMODE) == O_RDWR) 2251 protectionMax = protection | B_USER_PROTECTION; 2252 else 2253 protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA); 2254 } else if (mapping == REGION_PRIVATE_MAP) { 2255 // For privately mapped read-only regions, skip committing memory. 2256 // (If protections are changed later on, memory will be committed then.) 2257 if ((protection & B_WRITE_AREA) == 0) 2258 mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY; 2259 } 2260 2261 // get the vnode for the object, this also grabs a ref to it 2262 struct vnode* vnode = NULL; 2263 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2264 if (status < B_OK) 2265 return status; 2266 VnodePutter vnodePutter(vnode); 2267 2268 // If we're going to pre-map pages, we need to reserve the pages needed by 2269 // the mapping backend upfront. 2270 page_num_t reservedPreMapPages = 0; 2271 vm_page_reservation reservation; 2272 if ((protection & B_READ_AREA) != 0) { 2273 AddressSpaceWriteLocker locker; 2274 status = locker.SetTo(team); 2275 if (status != B_OK) 2276 return status; 2277 2278 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2279 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2280 2281 locker.Unlock(); 2282 2283 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2284 team == VMAddressSpace::KernelID() 2285 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2286 } 2287 2288 struct PageUnreserver { 2289 PageUnreserver(vm_page_reservation* reservation) 2290 : 2291 fReservation(reservation) 2292 { 2293 } 2294 2295 ~PageUnreserver() 2296 { 2297 if (fReservation != NULL) 2298 vm_page_unreserve_pages(fReservation); 2299 } 2300 2301 vm_page_reservation* fReservation; 2302 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2303 2304 // Lock the address space and, if the specified address range shall be 2305 // unmapped, ensure it is not wired. 2306 AddressSpaceWriteLocker locker; 2307 do { 2308 if (locker.SetTo(team) != B_OK) 2309 return B_BAD_TEAM_ID; 2310 } while (unmapAddressRange 2311 && wait_if_address_range_is_wired(locker.AddressSpace(), 2312 (addr_t)*_address, size, &locker)); 2313 2314 // TODO: this only works for file systems that use the file cache 2315 VMCache* cache; 2316 status = vfs_get_vnode_cache(vnode, &cache, false); 2317 if (status < B_OK) 2318 return status; 2319 2320 cache->Lock(); 2321 2322 VMArea* area; 2323 virtual_address_restrictions addressRestrictions = {}; 2324 addressRestrictions.address = *_address; 2325 addressRestrictions.address_specification = addressSpec; 2326 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2327 0, protection, protectionMax, mapping, mappingFlags, 2328 &addressRestrictions, kernel, &area, _address); 2329 2330 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2331 // map_backing_store() cannot know we no longer need the ref 2332 cache->ReleaseRefLocked(); 2333 } 2334 2335 if (status == B_OK && (protection & B_READ_AREA) != 0) { 2336 // Pre-map at most 10MB worth of pages. 2337 pre_map_area_pages(area, cache, &reservation, 2338 (10LL * 1024 * 1024) / B_PAGE_SIZE); 2339 } 2340 2341 cache->Unlock(); 2342 2343 if (status == B_OK) { 2344 // TODO: this probably deserves a smarter solution, e.g. probably 2345 // trigger prefetch somewhere else. 2346 2347 // Prefetch at most 10MB starting from "offset", but only if the cache 2348 // doesn't already contain more pages than the prefetch size. 2349 const size_t prefetch = min_c(size, 10LL * 1024 * 1024); 2350 if (cache->page_count < (prefetch / B_PAGE_SIZE)) 2351 cache_prefetch_vnode(vnode, offset, prefetch); 2352 } 2353 2354 if (status != B_OK) 2355 return status; 2356 2357 area->cache_type = CACHE_TYPE_VNODE; 2358 return area->id; 2359 } 2360 2361 2362 area_id 2363 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2364 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2365 int fd, off_t offset) 2366 { 2367 if (!arch_vm_supports_protection(protection)) 2368 return B_NOT_SUPPORTED; 2369 2370 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2371 mapping, unmapAddressRange, fd, offset, true); 2372 } 2373 2374 2375 VMCache* 2376 vm_area_get_locked_cache(VMArea* area) 2377 { 2378 rw_lock_read_lock(&sAreaCacheLock); 2379 2380 while (true) { 2381 VMCache* cache = area->cache; 2382 2383 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2384 // cache has been deleted 2385 rw_lock_read_lock(&sAreaCacheLock); 2386 continue; 2387 } 2388 2389 rw_lock_read_lock(&sAreaCacheLock); 2390 2391 if (cache == area->cache) { 2392 cache->AcquireRefLocked(); 2393 rw_lock_read_unlock(&sAreaCacheLock); 2394 return cache; 2395 } 2396 2397 // the cache changed in the meantime 2398 cache->Unlock(); 2399 } 2400 } 2401 2402 2403 void 2404 vm_area_put_locked_cache(VMCache* cache) 2405 { 2406 cache->ReleaseRefAndUnlock(); 2407 } 2408 2409 2410 area_id 2411 vm_clone_area(team_id team, const char* name, void** address, 2412 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2413 bool kernel) 2414 { 2415 VMArea* newArea = NULL; 2416 VMArea* sourceArea; 2417 2418 // Check whether the source area exists and is cloneable. If so, mark it 2419 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2420 { 2421 AddressSpaceWriteLocker locker; 2422 status_t status = locker.SetFromArea(sourceID, sourceArea); 2423 if (status != B_OK) 2424 return status; 2425 2426 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2427 return B_NOT_ALLOWED; 2428 2429 sourceArea->protection |= B_SHARED_AREA; 2430 protection |= B_SHARED_AREA; 2431 } 2432 2433 // Now lock both address spaces and actually do the cloning. 2434 2435 MultiAddressSpaceLocker locker; 2436 VMAddressSpace* sourceAddressSpace; 2437 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2438 if (status != B_OK) 2439 return status; 2440 2441 VMAddressSpace* targetAddressSpace; 2442 status = locker.AddTeam(team, true, &targetAddressSpace); 2443 if (status != B_OK) 2444 return status; 2445 2446 status = locker.Lock(); 2447 if (status != B_OK) 2448 return status; 2449 2450 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2451 if (sourceArea == NULL) 2452 return B_BAD_VALUE; 2453 2454 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2455 return B_NOT_ALLOWED; 2456 2457 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2458 2459 if (!kernel && sourceAddressSpace != targetAddressSpace 2460 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2461 #if KDEBUG 2462 Team* team = thread_get_current_thread()->team; 2463 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2464 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2465 #endif 2466 status = B_NOT_ALLOWED; 2467 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2468 status = B_NOT_ALLOWED; 2469 } else { 2470 uint32 flags = 0; 2471 if (mapping != REGION_PRIVATE_MAP) 2472 flags |= CREATE_AREA_DONT_COMMIT_MEMORY; 2473 2474 virtual_address_restrictions addressRestrictions = {}; 2475 addressRestrictions.address = *address; 2476 addressRestrictions.address_specification = addressSpec; 2477 status = map_backing_store(targetAddressSpace, cache, 2478 sourceArea->cache_offset, name, sourceArea->Size(), 2479 sourceArea->wiring, protection, sourceArea->protection_max, 2480 mapping, flags, &addressRestrictions, 2481 kernel, &newArea, address); 2482 } 2483 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2484 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2485 // to create a new cache, and has therefore already acquired a reference 2486 // to the source cache - but otherwise it has no idea that we need 2487 // one. 2488 cache->AcquireRefLocked(); 2489 } 2490 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2491 // we need to map in everything at this point 2492 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2493 // we don't have actual pages to map but a physical area 2494 VMTranslationMap* map 2495 = sourceArea->address_space->TranslationMap(); 2496 map->Lock(); 2497 2498 phys_addr_t physicalAddress; 2499 uint32 oldProtection; 2500 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2501 2502 map->Unlock(); 2503 2504 map = targetAddressSpace->TranslationMap(); 2505 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2506 newArea->Base() + (newArea->Size() - 1)); 2507 2508 vm_page_reservation reservation; 2509 vm_page_reserve_pages(&reservation, reservePages, 2510 targetAddressSpace == VMAddressSpace::Kernel() 2511 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2512 map->Lock(); 2513 2514 for (addr_t offset = 0; offset < newArea->Size(); 2515 offset += B_PAGE_SIZE) { 2516 map->Map(newArea->Base() + offset, physicalAddress + offset, 2517 protection, newArea->MemoryType(), &reservation); 2518 } 2519 2520 map->Unlock(); 2521 vm_page_unreserve_pages(&reservation); 2522 } else { 2523 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2524 size_t reservePages = map->MaxPagesNeededToMap( 2525 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2526 vm_page_reservation reservation; 2527 vm_page_reserve_pages(&reservation, reservePages, 2528 targetAddressSpace == VMAddressSpace::Kernel() 2529 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2530 2531 // map in all pages from source 2532 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2533 vm_page* page = it.Next();) { 2534 if (!page->busy) { 2535 DEBUG_PAGE_ACCESS_START(page); 2536 map_page(newArea, page, 2537 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2538 - newArea->cache_offset), 2539 protection, &reservation); 2540 DEBUG_PAGE_ACCESS_END(page); 2541 } 2542 } 2543 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2544 // ensuring that! 2545 2546 vm_page_unreserve_pages(&reservation); 2547 } 2548 } 2549 if (status == B_OK) 2550 newArea->cache_type = sourceArea->cache_type; 2551 2552 vm_area_put_locked_cache(cache); 2553 2554 if (status < B_OK) 2555 return status; 2556 2557 return newArea->id; 2558 } 2559 2560 2561 /*! Deletes the specified area of the given address space. 2562 2563 The address space must be write-locked. 2564 The caller must ensure that the area does not have any wired ranges. 2565 2566 \param addressSpace The address space containing the area. 2567 \param area The area to be deleted. 2568 \param deletingAddressSpace \c true, if the address space is in the process 2569 of being deleted. 2570 \param alreadyRemoved \c true, if the area was already removed from the global 2571 areas map (and thus had its ID deallocated.) 2572 */ 2573 static void 2574 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2575 bool deletingAddressSpace, bool alreadyRemoved) 2576 { 2577 ASSERT(!area->IsWired()); 2578 2579 if (area->id >= 0 && !alreadyRemoved) 2580 VMAreas::Remove(area); 2581 2582 // At this point the area is removed from the global hash table, but 2583 // still exists in the area list. 2584 2585 // Unmap the virtual address space the area occupied. 2586 { 2587 // We need to lock the complete cache chain. 2588 VMCache* topCache = vm_area_get_locked_cache(area); 2589 VMCacheChainLocker cacheChainLocker(topCache); 2590 cacheChainLocker.LockAllSourceCaches(); 2591 2592 // If the area's top cache is a temporary cache and the area is the only 2593 // one referencing it (besides us currently holding a second reference), 2594 // the unmapping code doesn't need to care about preserving the accessed 2595 // and dirty flags of the top cache page mappings. 2596 bool ignoreTopCachePageFlags 2597 = topCache->temporary && topCache->RefCount() == 2; 2598 2599 area->address_space->TranslationMap()->UnmapArea(area, 2600 deletingAddressSpace, ignoreTopCachePageFlags); 2601 } 2602 2603 if (!area->cache->temporary) 2604 area->cache->WriteModified(); 2605 2606 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2607 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2608 2609 arch_vm_unset_memory_type(area); 2610 addressSpace->RemoveArea(area, allocationFlags); 2611 addressSpace->Put(); 2612 2613 area->cache->RemoveArea(area); 2614 area->cache->ReleaseRef(); 2615 2616 addressSpace->DeleteArea(area, allocationFlags); 2617 } 2618 2619 2620 status_t 2621 vm_delete_area(team_id team, area_id id, bool kernel) 2622 { 2623 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2624 team, id)); 2625 2626 // lock the address space and make sure the area isn't wired 2627 AddressSpaceWriteLocker locker; 2628 VMArea* area; 2629 AreaCacheLocker cacheLocker; 2630 2631 do { 2632 status_t status = locker.SetFromArea(team, id, area); 2633 if (status != B_OK) 2634 return status; 2635 2636 cacheLocker.SetTo(area); 2637 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2638 2639 cacheLocker.Unlock(); 2640 2641 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2642 return B_NOT_ALLOWED; 2643 2644 delete_area(locker.AddressSpace(), area, false); 2645 return B_OK; 2646 } 2647 2648 2649 /*! Creates a new cache on top of given cache, moves all areas from 2650 the old cache to the new one, and changes the protection of all affected 2651 areas' pages to read-only. If requested, wired pages are moved up to the 2652 new cache and copies are added to the old cache in their place. 2653 Preconditions: 2654 - The given cache must be locked. 2655 - All of the cache's areas' address spaces must be read locked. 2656 - Either the cache must not have any wired ranges or a page reservation for 2657 all wired pages must be provided, so they can be copied. 2658 2659 \param lowerCache The cache on top of which a new cache shall be created. 2660 \param wiredPagesReservation If \c NULL there must not be any wired pages 2661 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2662 has wired page. The wired pages are copied in this case. 2663 */ 2664 static status_t 2665 vm_copy_on_write_area(VMCache* lowerCache, 2666 vm_page_reservation* wiredPagesReservation) 2667 { 2668 VMCache* upperCache; 2669 2670 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2671 2672 // We need to separate the cache from its areas. The cache goes one level 2673 // deeper and we create a new cache inbetween. 2674 2675 // create an anonymous cache 2676 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2677 lowerCache->GuardSize() / B_PAGE_SIZE, 2678 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2679 VM_PRIORITY_USER); 2680 if (status != B_OK) 2681 return status; 2682 2683 upperCache->Lock(); 2684 2685 upperCache->temporary = 1; 2686 upperCache->virtual_base = lowerCache->virtual_base; 2687 upperCache->virtual_end = lowerCache->virtual_end; 2688 2689 // transfer the lower cache areas to the upper cache 2690 rw_lock_write_lock(&sAreaCacheLock); 2691 upperCache->TransferAreas(lowerCache); 2692 rw_lock_write_unlock(&sAreaCacheLock); 2693 2694 lowerCache->AddConsumer(upperCache); 2695 2696 // We now need to remap all pages from all of the cache's areas read-only, 2697 // so that a copy will be created on next write access. If there are wired 2698 // pages, we keep their protection, move them to the upper cache and create 2699 // copies for the lower cache. 2700 if (wiredPagesReservation != NULL) { 2701 // We need to handle wired pages -- iterate through the cache's pages. 2702 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2703 vm_page* page = it.Next();) { 2704 if (page->WiredCount() > 0) { 2705 // allocate a new page and copy the wired one 2706 vm_page* copiedPage = vm_page_allocate_page( 2707 wiredPagesReservation, PAGE_STATE_ACTIVE); 2708 2709 vm_memcpy_physical_page( 2710 copiedPage->physical_page_number * B_PAGE_SIZE, 2711 page->physical_page_number * B_PAGE_SIZE); 2712 2713 // move the wired page to the upper cache (note: removing is OK 2714 // with the SplayTree iterator) and insert the copy 2715 upperCache->MovePage(page); 2716 lowerCache->InsertPage(copiedPage, 2717 page->cache_offset * B_PAGE_SIZE); 2718 2719 DEBUG_PAGE_ACCESS_END(copiedPage); 2720 } else { 2721 // Change the protection of this page in all areas. 2722 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2723 tempArea = tempArea->cache_next) { 2724 if (!is_page_in_area(tempArea, page)) 2725 continue; 2726 2727 // The area must be readable in the same way it was 2728 // previously writable. 2729 addr_t address = virtual_page_address(tempArea, page); 2730 uint32 protection = 0; 2731 uint32 pageProtection = get_area_page_protection(tempArea, address); 2732 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2733 protection |= B_KERNEL_READ_AREA; 2734 if ((pageProtection & B_READ_AREA) != 0) 2735 protection |= B_READ_AREA; 2736 2737 VMTranslationMap* map 2738 = tempArea->address_space->TranslationMap(); 2739 map->Lock(); 2740 map->ProtectPage(tempArea, address, protection); 2741 map->Unlock(); 2742 } 2743 } 2744 } 2745 } else { 2746 ASSERT(lowerCache->WiredPagesCount() == 0); 2747 2748 // just change the protection of all areas 2749 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2750 tempArea = tempArea->cache_next) { 2751 if (tempArea->page_protections != NULL) { 2752 // Change the protection of all pages in this area. 2753 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2754 map->Lock(); 2755 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2756 vm_page* page = it.Next();) { 2757 if (!is_page_in_area(tempArea, page)) 2758 continue; 2759 2760 // The area must be readable in the same way it was 2761 // previously writable. 2762 addr_t address = virtual_page_address(tempArea, page); 2763 uint32 protection = 0; 2764 uint32 pageProtection = get_area_page_protection(tempArea, address); 2765 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2766 protection |= B_KERNEL_READ_AREA; 2767 if ((pageProtection & B_READ_AREA) != 0) 2768 protection |= B_READ_AREA; 2769 2770 map->ProtectPage(tempArea, address, protection); 2771 } 2772 map->Unlock(); 2773 continue; 2774 } 2775 // The area must be readable in the same way it was previously 2776 // writable. 2777 uint32 protection = 0; 2778 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0) 2779 protection |= B_KERNEL_READ_AREA; 2780 if ((tempArea->protection & B_READ_AREA) != 0) 2781 protection |= B_READ_AREA; 2782 2783 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2784 map->Lock(); 2785 map->ProtectArea(tempArea, protection); 2786 map->Unlock(); 2787 } 2788 } 2789 2790 vm_area_put_locked_cache(upperCache); 2791 2792 return B_OK; 2793 } 2794 2795 2796 area_id 2797 vm_copy_area(team_id team, const char* name, void** _address, 2798 uint32 addressSpec, area_id sourceID) 2799 { 2800 // Do the locking: target address space, all address spaces associated with 2801 // the source cache, and the cache itself. 2802 MultiAddressSpaceLocker locker; 2803 VMAddressSpace* targetAddressSpace; 2804 VMCache* cache; 2805 VMArea* source; 2806 AreaCacheLocker cacheLocker; 2807 status_t status; 2808 bool sharedArea; 2809 2810 page_num_t wiredPages = 0; 2811 vm_page_reservation wiredPagesReservation; 2812 2813 bool restart; 2814 do { 2815 restart = false; 2816 2817 locker.Unset(); 2818 status = locker.AddTeam(team, true, &targetAddressSpace); 2819 if (status == B_OK) { 2820 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2821 &cache); 2822 } 2823 if (status != B_OK) 2824 return status; 2825 2826 cacheLocker.SetTo(cache, true); // already locked 2827 2828 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2829 2830 page_num_t oldWiredPages = wiredPages; 2831 wiredPages = 0; 2832 2833 // If the source area isn't shared, count the number of wired pages in 2834 // the cache and reserve as many pages. 2835 if (!sharedArea) { 2836 wiredPages = cache->WiredPagesCount(); 2837 2838 if (wiredPages > oldWiredPages) { 2839 cacheLocker.Unlock(); 2840 locker.Unlock(); 2841 2842 if (oldWiredPages > 0) 2843 vm_page_unreserve_pages(&wiredPagesReservation); 2844 2845 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2846 VM_PRIORITY_USER); 2847 2848 restart = true; 2849 } 2850 } else if (oldWiredPages > 0) 2851 vm_page_unreserve_pages(&wiredPagesReservation); 2852 } while (restart); 2853 2854 // unreserve pages later 2855 struct PagesUnreserver { 2856 PagesUnreserver(vm_page_reservation* reservation) 2857 : 2858 fReservation(reservation) 2859 { 2860 } 2861 2862 ~PagesUnreserver() 2863 { 2864 if (fReservation != NULL) 2865 vm_page_unreserve_pages(fReservation); 2866 } 2867 2868 private: 2869 vm_page_reservation* fReservation; 2870 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2871 2872 bool writableCopy 2873 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2874 uint8* targetPageProtections = NULL; 2875 2876 if (source->page_protections != NULL) { 2877 size_t bytes = area_page_protections_size(source->Size()); 2878 targetPageProtections = (uint8*)malloc_etc(bytes, 2879 (source->address_space == VMAddressSpace::Kernel() 2880 || targetAddressSpace == VMAddressSpace::Kernel()) 2881 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2882 if (targetPageProtections == NULL) 2883 return B_NO_MEMORY; 2884 2885 memcpy(targetPageProtections, source->page_protections, bytes); 2886 2887 if (!writableCopy) { 2888 for (size_t i = 0; i < bytes; i++) { 2889 if ((targetPageProtections[i] 2890 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2891 writableCopy = true; 2892 break; 2893 } 2894 } 2895 } 2896 } 2897 2898 if (addressSpec == B_CLONE_ADDRESS) { 2899 addressSpec = B_EXACT_ADDRESS; 2900 *_address = (void*)source->Base(); 2901 } 2902 2903 // First, create a cache on top of the source area, respectively use the 2904 // existing one, if this is a shared area. 2905 2906 VMArea* target; 2907 virtual_address_restrictions addressRestrictions = {}; 2908 addressRestrictions.address = *_address; 2909 addressRestrictions.address_specification = addressSpec; 2910 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2911 name, source->Size(), source->wiring, source->protection, 2912 source->protection_max, 2913 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2914 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2915 &addressRestrictions, true, &target, _address); 2916 if (status < B_OK) { 2917 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2918 return status; 2919 } 2920 2921 if (targetPageProtections != NULL) 2922 target->page_protections = targetPageProtections; 2923 2924 if (sharedArea) { 2925 // The new area uses the old area's cache, but map_backing_store() 2926 // hasn't acquired a ref. So we have to do that now. 2927 cache->AcquireRefLocked(); 2928 } 2929 2930 // If the source area is writable, we need to move it one layer up as well 2931 2932 if (!sharedArea) { 2933 if (writableCopy) { 2934 // TODO: do something more useful if this fails! 2935 if (vm_copy_on_write_area(cache, 2936 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2937 panic("vm_copy_on_write_area() failed!\n"); 2938 } 2939 } 2940 } 2941 2942 // we return the ID of the newly created area 2943 return target->id; 2944 } 2945 2946 2947 status_t 2948 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2949 bool kernel) 2950 { 2951 fix_protection(&newProtection); 2952 2953 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2954 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2955 2956 if (!arch_vm_supports_protection(newProtection)) 2957 return B_NOT_SUPPORTED; 2958 2959 bool becomesWritable 2960 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2961 2962 // lock address spaces and cache 2963 MultiAddressSpaceLocker locker; 2964 VMCache* cache; 2965 VMArea* area; 2966 status_t status; 2967 AreaCacheLocker cacheLocker; 2968 bool isWritable; 2969 2970 bool restart; 2971 do { 2972 restart = false; 2973 2974 locker.Unset(); 2975 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2976 if (status != B_OK) 2977 return status; 2978 2979 cacheLocker.SetTo(cache, true); // already locked 2980 2981 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2982 || (area->protection & B_KERNEL_AREA) != 0)) { 2983 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2984 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2985 " (%s)\n", team, newProtection, areaID, area->name); 2986 return B_NOT_ALLOWED; 2987 } 2988 if (!kernel && area->protection_max != 0 2989 && (newProtection & area->protection_max) 2990 != (newProtection & B_USER_PROTECTION)) { 2991 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2992 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2993 "area %" B_PRId32 " (%s)\n", team, newProtection, 2994 area->protection_max, areaID, area->name); 2995 return B_NOT_ALLOWED; 2996 } 2997 2998 if (team != VMAddressSpace::KernelID() 2999 && area->address_space->ID() != team) { 3000 // unless you're the kernel, you are only allowed to set 3001 // the protection of your own areas 3002 return B_NOT_ALLOWED; 3003 } 3004 3005 if (area->protection == newProtection) 3006 return B_OK; 3007 3008 isWritable 3009 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 3010 3011 // Make sure the area (respectively, if we're going to call 3012 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 3013 // wired ranges. 3014 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 3015 for (VMArea* otherArea = cache->areas; otherArea != NULL; 3016 otherArea = otherArea->cache_next) { 3017 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 3018 restart = true; 3019 break; 3020 } 3021 } 3022 } else { 3023 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 3024 restart = true; 3025 } 3026 } while (restart); 3027 3028 if (area->page_protections != NULL) { 3029 // Get rid of the per-page protections. 3030 free_etc(area->page_protections, 3031 area->address_space == VMAddressSpace::Kernel() ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 3032 area->page_protections = NULL; 3033 3034 // Assume the existing protections don't match the new ones. 3035 isWritable = !becomesWritable; 3036 } 3037 3038 bool changePageProtection = true; 3039 bool changeTopCachePagesOnly = false; 3040 3041 if (isWritable && !becomesWritable) { 3042 // writable -> !writable 3043 3044 if (cache->source != NULL && cache->temporary) { 3045 if (cache->CountWritableAreas(area) == 0) { 3046 // Since this cache now lives from the pages in its source cache, 3047 // we can change the cache's commitment to take only those pages 3048 // into account that really are in this cache. 3049 3050 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 3051 team == VMAddressSpace::KernelID() 3052 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 3053 3054 // TODO: we may be able to join with our source cache, if 3055 // count == 0 3056 } 3057 } 3058 3059 // If only the writability changes, we can just remap the pages of the 3060 // top cache, since the pages of lower caches are mapped read-only 3061 // anyway. That's advantageous only, if the number of pages in the cache 3062 // is significantly smaller than the number of pages in the area, 3063 // though. 3064 if (newProtection 3065 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 3066 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 3067 changeTopCachePagesOnly = true; 3068 } 3069 } else if (!isWritable && becomesWritable) { 3070 // !writable -> writable 3071 3072 if (!cache->consumers.IsEmpty()) { 3073 // There are consumers -- we have to insert a new cache. Fortunately 3074 // vm_copy_on_write_area() does everything that's needed. 3075 changePageProtection = false; 3076 status = vm_copy_on_write_area(cache, NULL); 3077 } else { 3078 // No consumers, so we don't need to insert a new one. 3079 if (cache->source != NULL && cache->temporary) { 3080 // the cache's commitment must contain all possible pages 3081 status = cache->Commit(cache->virtual_end - cache->virtual_base, 3082 team == VMAddressSpace::KernelID() 3083 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 3084 } 3085 3086 if (status == B_OK && cache->source != NULL) { 3087 // There's a source cache, hence we can't just change all pages' 3088 // protection or we might allow writing into pages belonging to 3089 // a lower cache. 3090 changeTopCachePagesOnly = true; 3091 } 3092 } 3093 } else { 3094 // we don't have anything special to do in all other cases 3095 } 3096 3097 if (status == B_OK) { 3098 // remap existing pages in this cache 3099 if (changePageProtection) { 3100 VMTranslationMap* map = area->address_space->TranslationMap(); 3101 map->Lock(); 3102 3103 if (changeTopCachePagesOnly) { 3104 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 3105 page_num_t lastPageOffset 3106 = firstPageOffset + area->Size() / B_PAGE_SIZE; 3107 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3108 vm_page* page = it.Next();) { 3109 if (page->cache_offset >= firstPageOffset 3110 && page->cache_offset <= lastPageOffset) { 3111 addr_t address = virtual_page_address(area, page); 3112 map->ProtectPage(area, address, newProtection); 3113 } 3114 } 3115 } else 3116 map->ProtectArea(area, newProtection); 3117 3118 map->Unlock(); 3119 } 3120 3121 area->protection = newProtection; 3122 } 3123 3124 return status; 3125 } 3126 3127 3128 status_t 3129 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 3130 { 3131 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 3132 if (addressSpace == NULL) 3133 return B_BAD_TEAM_ID; 3134 3135 VMTranslationMap* map = addressSpace->TranslationMap(); 3136 3137 map->Lock(); 3138 uint32 dummyFlags; 3139 status_t status = map->Query(vaddr, paddr, &dummyFlags); 3140 map->Unlock(); 3141 3142 addressSpace->Put(); 3143 return status; 3144 } 3145 3146 3147 /*! The page's cache must be locked. 3148 */ 3149 bool 3150 vm_test_map_modification(vm_page* page) 3151 { 3152 if (page->modified) 3153 return true; 3154 3155 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3156 vm_page_mapping* mapping; 3157 while ((mapping = iterator.Next()) != NULL) { 3158 VMArea* area = mapping->area; 3159 VMTranslationMap* map = area->address_space->TranslationMap(); 3160 3161 phys_addr_t physicalAddress; 3162 uint32 flags; 3163 map->Lock(); 3164 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 3165 map->Unlock(); 3166 3167 if ((flags & PAGE_MODIFIED) != 0) 3168 return true; 3169 } 3170 3171 return false; 3172 } 3173 3174 3175 /*! The page's cache must be locked. 3176 */ 3177 void 3178 vm_clear_map_flags(vm_page* page, uint32 flags) 3179 { 3180 if ((flags & PAGE_ACCESSED) != 0) 3181 page->accessed = false; 3182 if ((flags & PAGE_MODIFIED) != 0) 3183 page->modified = false; 3184 3185 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3186 vm_page_mapping* mapping; 3187 while ((mapping = iterator.Next()) != NULL) { 3188 VMArea* area = mapping->area; 3189 VMTranslationMap* map = area->address_space->TranslationMap(); 3190 3191 map->Lock(); 3192 map->ClearFlags(virtual_page_address(area, page), flags); 3193 map->Unlock(); 3194 } 3195 } 3196 3197 3198 /*! Removes all mappings from a page. 3199 After you've called this function, the page is unmapped from memory and 3200 the page's \c accessed and \c modified flags have been updated according 3201 to the state of the mappings. 3202 The page's cache must be locked. 3203 */ 3204 void 3205 vm_remove_all_page_mappings(vm_page* page) 3206 { 3207 while (vm_page_mapping* mapping = page->mappings.Head()) { 3208 VMArea* area = mapping->area; 3209 VMTranslationMap* map = area->address_space->TranslationMap(); 3210 addr_t address = virtual_page_address(area, page); 3211 map->UnmapPage(area, address, false); 3212 } 3213 } 3214 3215 3216 int32 3217 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 3218 { 3219 int32 count = 0; 3220 3221 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3222 vm_page_mapping* mapping; 3223 while ((mapping = iterator.Next()) != NULL) { 3224 VMArea* area = mapping->area; 3225 VMTranslationMap* map = area->address_space->TranslationMap(); 3226 3227 bool modified; 3228 if (map->ClearAccessedAndModified(area, 3229 virtual_page_address(area, page), false, modified)) { 3230 count++; 3231 } 3232 3233 page->modified |= modified; 3234 } 3235 3236 3237 if (page->accessed) { 3238 count++; 3239 page->accessed = false; 3240 } 3241 3242 return count; 3243 } 3244 3245 3246 /*! Removes all mappings of a page and/or clears the accessed bits of the 3247 mappings. 3248 The function iterates through the page mappings and removes them until 3249 encountering one that has been accessed. From then on it will continue to 3250 iterate, but only clear the accessed flag of the mapping. The page's 3251 \c modified bit will be updated accordingly, the \c accessed bit will be 3252 cleared. 3253 \return The number of mapping accessed bits encountered, including the 3254 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3255 of the page have been removed. 3256 */ 3257 int32 3258 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3259 { 3260 ASSERT(page->WiredCount() == 0); 3261 3262 if (page->accessed) 3263 return vm_clear_page_mapping_accessed_flags(page); 3264 3265 while (vm_page_mapping* mapping = page->mappings.Head()) { 3266 VMArea* area = mapping->area; 3267 VMTranslationMap* map = area->address_space->TranslationMap(); 3268 addr_t address = virtual_page_address(area, page); 3269 bool modified = false; 3270 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3271 page->accessed = true; 3272 page->modified |= modified; 3273 return vm_clear_page_mapping_accessed_flags(page); 3274 } 3275 page->modified |= modified; 3276 } 3277 3278 return 0; 3279 } 3280 3281 3282 /*! Deletes all areas and reserved regions in the given address space. 3283 3284 The caller must ensure that none of the areas has any wired ranges. 3285 3286 \param addressSpace The address space. 3287 \param deletingAddressSpace \c true, if the address space is in the process 3288 of being deleted. 3289 */ 3290 void 3291 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3292 { 3293 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3294 addressSpace->ID())); 3295 3296 addressSpace->WriteLock(); 3297 3298 // remove all reserved areas in this address space 3299 addressSpace->UnreserveAllAddressRanges(0); 3300 3301 // remove all areas from the areas map at once (to avoid lock contention) 3302 VMAreas::WriteLock(); 3303 { 3304 VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator(); 3305 while (VMArea* area = it.Next()) 3306 VMAreas::Remove(area); 3307 } 3308 VMAreas::WriteUnlock(); 3309 3310 // delete all the areas in this address space 3311 while (VMArea* area = addressSpace->FirstArea()) { 3312 ASSERT(!area->IsWired()); 3313 delete_area(addressSpace, area, deletingAddressSpace, true); 3314 } 3315 3316 addressSpace->WriteUnlock(); 3317 } 3318 3319 3320 static area_id 3321 vm_area_for(addr_t address, bool kernel) 3322 { 3323 team_id team; 3324 if (IS_USER_ADDRESS(address)) { 3325 // we try the user team address space, if any 3326 team = VMAddressSpace::CurrentID(); 3327 if (team < 0) 3328 return team; 3329 } else 3330 team = VMAddressSpace::KernelID(); 3331 3332 AddressSpaceReadLocker locker(team); 3333 if (!locker.IsLocked()) 3334 return B_BAD_TEAM_ID; 3335 3336 VMArea* area = locker.AddressSpace()->LookupArea(address); 3337 if (area != NULL) { 3338 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0 3339 && (area->protection & B_KERNEL_AREA) != 0) 3340 return B_ERROR; 3341 3342 return area->id; 3343 } 3344 3345 return B_ERROR; 3346 } 3347 3348 3349 /*! Frees physical pages that were used during the boot process. 3350 \a end is inclusive. 3351 */ 3352 static void 3353 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3354 { 3355 // free all physical pages in the specified range 3356 3357 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3358 phys_addr_t physicalAddress; 3359 uint32 flags; 3360 3361 if (map->Query(current, &physicalAddress, &flags) == B_OK 3362 && (flags & PAGE_PRESENT) != 0) { 3363 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3364 if (page != NULL && page->State() != PAGE_STATE_FREE 3365 && page->State() != PAGE_STATE_CLEAR 3366 && page->State() != PAGE_STATE_UNUSED) { 3367 DEBUG_PAGE_ACCESS_START(page); 3368 vm_page_set_state(page, PAGE_STATE_FREE); 3369 } 3370 } 3371 } 3372 3373 // unmap the memory 3374 map->Unmap(start, end); 3375 } 3376 3377 3378 void 3379 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3380 { 3381 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3382 addr_t end = start + (size - 1); 3383 addr_t lastEnd = start; 3384 3385 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3386 (void*)start, (void*)end)); 3387 3388 // The areas are sorted in virtual address space order, so 3389 // we just have to find the holes between them that fall 3390 // into the area we should dispose 3391 3392 map->Lock(); 3393 3394 for (VMAddressSpace::AreaIterator it 3395 = VMAddressSpace::Kernel()->GetAreaIterator(); 3396 VMArea* area = it.Next();) { 3397 addr_t areaStart = area->Base(); 3398 addr_t areaEnd = areaStart + (area->Size() - 1); 3399 3400 if (areaEnd < start) 3401 continue; 3402 3403 if (areaStart > end) { 3404 // we are done, the area is already beyond of what we have to free 3405 break; 3406 } 3407 3408 if (areaStart > lastEnd) { 3409 // this is something we can free 3410 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3411 (void*)areaStart)); 3412 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3413 } 3414 3415 if (areaEnd >= end) { 3416 lastEnd = areaEnd; 3417 // no +1 to prevent potential overflow 3418 break; 3419 } 3420 3421 lastEnd = areaEnd + 1; 3422 } 3423 3424 if (lastEnd < end) { 3425 // we can also get rid of some space at the end of the area 3426 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3427 (void*)end)); 3428 unmap_and_free_physical_pages(map, lastEnd, end); 3429 } 3430 3431 map->Unlock(); 3432 } 3433 3434 3435 static void 3436 create_preloaded_image_areas(struct preloaded_image* _image) 3437 { 3438 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3439 char name[B_OS_NAME_LENGTH]; 3440 void* address; 3441 int32 length; 3442 3443 // use file name to create a good area name 3444 char* fileName = strrchr(image->name, '/'); 3445 if (fileName == NULL) 3446 fileName = image->name; 3447 else 3448 fileName++; 3449 3450 length = strlen(fileName); 3451 // make sure there is enough space for the suffix 3452 if (length > 25) 3453 length = 25; 3454 3455 memcpy(name, fileName, length); 3456 strcpy(name + length, "_text"); 3457 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3458 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3459 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3460 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3461 // this will later be remapped read-only/executable by the 3462 // ELF initialization code 3463 3464 strcpy(name + length, "_data"); 3465 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3466 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3467 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3468 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3469 } 3470 3471 3472 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3473 Any boot loader resources contained in that arguments must not be accessed 3474 anymore past this point. 3475 */ 3476 void 3477 vm_free_kernel_args(kernel_args* args) 3478 { 3479 TRACE(("vm_free_kernel_args()\n")); 3480 3481 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3482 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3483 if (area >= B_OK) 3484 delete_area(area); 3485 } 3486 } 3487 3488 3489 static void 3490 allocate_kernel_args(kernel_args* args) 3491 { 3492 TRACE(("allocate_kernel_args()\n")); 3493 3494 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3495 const addr_range& range = args->kernel_args_range[i]; 3496 void* address = (void*)(addr_t)range.start; 3497 3498 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3499 range.size, B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3500 } 3501 } 3502 3503 3504 static void 3505 unreserve_boot_loader_ranges(kernel_args* args) 3506 { 3507 TRACE(("unreserve_boot_loader_ranges()\n")); 3508 3509 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3510 const addr_range& range = args->virtual_allocated_range[i]; 3511 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3512 (void*)(addr_t)range.start, range.size); 3513 } 3514 } 3515 3516 3517 static void 3518 reserve_boot_loader_ranges(kernel_args* args) 3519 { 3520 TRACE(("reserve_boot_loader_ranges()\n")); 3521 3522 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3523 const addr_range& range = args->virtual_allocated_range[i]; 3524 void* address = (void*)(addr_t)range.start; 3525 3526 // If the address is no kernel address, we just skip it. The 3527 // architecture specific code has to deal with it. 3528 if (!IS_KERNEL_ADDRESS(address)) { 3529 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 3530 B_PRIu64 "\n", address, range.size); 3531 continue; 3532 } 3533 3534 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 3535 &address, B_EXACT_ADDRESS, range.size, 0); 3536 if (status < B_OK) 3537 panic("could not reserve boot loader ranges\n"); 3538 } 3539 } 3540 3541 3542 static addr_t 3543 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 3544 { 3545 size = PAGE_ALIGN(size); 3546 if (alignment <= B_PAGE_SIZE) { 3547 // All allocations are naturally page-aligned. 3548 alignment = 0; 3549 } else { 3550 ASSERT((alignment % B_PAGE_SIZE) == 0); 3551 } 3552 3553 // Find a slot in the virtual allocation ranges. 3554 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 3555 // Check if the space between this one and the previous is big enough. 3556 const addr_range& range = args->virtual_allocated_range[i]; 3557 addr_range& previousRange = args->virtual_allocated_range[i - 1]; 3558 const addr_t previousRangeEnd = previousRange.start + previousRange.size; 3559 3560 addr_t base = alignment > 0 3561 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 3562 3563 if (base >= KERNEL_BASE && base < range.start && (range.start - base) >= size) { 3564 previousRange.size += base + size - previousRangeEnd; 3565 return base; 3566 } 3567 } 3568 3569 // We didn't find one between allocation ranges. This is OK. 3570 // See if there's a gap after the last one. 3571 addr_range& lastRange 3572 = args->virtual_allocated_range[args->num_virtual_allocated_ranges - 1]; 3573 const addr_t lastRangeEnd = lastRange.start + lastRange.size; 3574 addr_t base = alignment > 0 3575 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 3576 if ((KERNEL_BASE + (KERNEL_SIZE - 1) - base) >= size) { 3577 lastRange.size += base + size - lastRangeEnd; 3578 return base; 3579 } 3580 3581 // See if there's a gap before the first one. 3582 addr_range& firstRange = args->virtual_allocated_range[0]; 3583 if (firstRange.start > KERNEL_BASE && (firstRange.start - KERNEL_BASE) >= size) { 3584 base = firstRange.start - size; 3585 if (alignment > 0) 3586 base = ROUNDDOWN(base, alignment); 3587 3588 if (base >= KERNEL_BASE) { 3589 firstRange.size += firstRange.start - base; 3590 firstRange.start = base; 3591 return base; 3592 } 3593 } 3594 3595 return 0; 3596 } 3597 3598 3599 static bool 3600 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 3601 { 3602 // TODO: horrible brute-force method of determining if the page can be 3603 // allocated 3604 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3605 const addr_range& range = args->physical_memory_range[i]; 3606 if (address >= range.start && address < (range.start + range.size)) 3607 return true; 3608 } 3609 return false; 3610 } 3611 3612 3613 page_num_t 3614 vm_allocate_early_physical_page(kernel_args* args) 3615 { 3616 return vm_allocate_early_physical_page_etc(args); 3617 } 3618 3619 3620 page_num_t 3621 vm_allocate_early_physical_page_etc(kernel_args* args, phys_addr_t maxAddress) 3622 { 3623 if (args->num_physical_allocated_ranges == 0) { 3624 panic("early physical page allocations no longer possible!"); 3625 return 0; 3626 } 3627 if (maxAddress == 0) 3628 maxAddress = __HAIKU_PHYS_ADDR_MAX; 3629 3630 #if defined(B_HAIKU_PHYSICAL_64_BIT) 3631 // Check if the last physical range is above the 32-bit maximum. 3632 const addr_range& lastMemoryRange = 3633 args->physical_memory_range[args->num_physical_memory_ranges - 1]; 3634 const uint64 post32bitAddr = 0x100000000LL; 3635 if ((lastMemoryRange.start + lastMemoryRange.size) > post32bitAddr 3636 && args->num_physical_allocated_ranges < MAX_PHYSICAL_ALLOCATED_RANGE) { 3637 // To avoid consuming physical memory in the 32-bit range (which drivers may need), 3638 // ensure the last allocated range at least ends past the 32-bit boundary. 3639 const addr_range& lastAllocatedRange = 3640 args->physical_allocated_range[args->num_physical_allocated_ranges - 1]; 3641 const phys_addr_t lastAllocatedPage = lastAllocatedRange.start + lastAllocatedRange.size; 3642 if (lastAllocatedPage < post32bitAddr) { 3643 // Create ranges until we have one at least starting at the first point past 4GB. 3644 // (Some of the logic here is similar to the new-range code at the end of the method.) 3645 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3646 addr_range& memoryRange = args->physical_memory_range[i]; 3647 if ((memoryRange.start + memoryRange.size) < lastAllocatedPage) 3648 continue; 3649 if (memoryRange.size < (B_PAGE_SIZE * 128)) 3650 continue; 3651 3652 uint64 rangeStart = memoryRange.start; 3653 if ((memoryRange.start + memoryRange.size) <= post32bitAddr) { 3654 if (memoryRange.start < lastAllocatedPage) 3655 continue; 3656 3657 // Range has no pages allocated and ends before the 32-bit boundary. 3658 } else { 3659 // Range ends past the 32-bit boundary. It could have some pages allocated, 3660 // but if we're here, we know that nothing is allocated above the boundary, 3661 // so we want to create a new range with it regardless. 3662 if (rangeStart < post32bitAddr) 3663 rangeStart = post32bitAddr; 3664 } 3665 3666 addr_range& allocatedRange = 3667 args->physical_allocated_range[args->num_physical_allocated_ranges++]; 3668 allocatedRange.start = rangeStart; 3669 allocatedRange.size = 0; 3670 3671 if (rangeStart >= post32bitAddr) 3672 break; 3673 if (args->num_physical_allocated_ranges == MAX_PHYSICAL_ALLOCATED_RANGE) 3674 break; 3675 } 3676 } 3677 } 3678 #endif 3679 3680 // Try expanding the existing physical ranges upwards. 3681 for (int32 i = args->num_physical_allocated_ranges - 1; i >= 0; i--) { 3682 addr_range& range = args->physical_allocated_range[i]; 3683 phys_addr_t nextPage = range.start + range.size; 3684 3685 // check constraints 3686 if (nextPage > maxAddress) 3687 continue; 3688 3689 // make sure the page does not collide with the next allocated range 3690 if ((i + 1) < (int32)args->num_physical_allocated_ranges) { 3691 addr_range& nextRange = args->physical_allocated_range[i + 1]; 3692 if (nextRange.size != 0 && nextPage >= nextRange.start) 3693 continue; 3694 } 3695 // see if the next page fits in the memory block 3696 if (is_page_in_physical_memory_range(args, nextPage)) { 3697 // we got one! 3698 range.size += B_PAGE_SIZE; 3699 return nextPage / B_PAGE_SIZE; 3700 } 3701 } 3702 3703 // Expanding upwards didn't work, try going downwards. 3704 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 3705 addr_range& range = args->physical_allocated_range[i]; 3706 phys_addr_t nextPage = range.start - B_PAGE_SIZE; 3707 3708 // check constraints 3709 if (nextPage > maxAddress) 3710 continue; 3711 3712 // make sure the page does not collide with the previous allocated range 3713 if (i > 0) { 3714 addr_range& previousRange = args->physical_allocated_range[i - 1]; 3715 if (previousRange.size != 0 && nextPage < (previousRange.start + previousRange.size)) 3716 continue; 3717 } 3718 // see if the next physical page fits in the memory block 3719 if (is_page_in_physical_memory_range(args, nextPage)) { 3720 // we got one! 3721 range.start -= B_PAGE_SIZE; 3722 range.size += B_PAGE_SIZE; 3723 return nextPage / B_PAGE_SIZE; 3724 } 3725 } 3726 3727 // Try starting a new range. 3728 if (args->num_physical_allocated_ranges < MAX_PHYSICAL_ALLOCATED_RANGE) { 3729 const addr_range& lastAllocatedRange = 3730 args->physical_allocated_range[args->num_physical_allocated_ranges - 1]; 3731 const phys_addr_t lastAllocatedPage = lastAllocatedRange.start + lastAllocatedRange.size; 3732 3733 phys_addr_t nextPage = 0; 3734 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 3735 const addr_range& range = args->physical_memory_range[i]; 3736 // Ignore everything before the last-allocated page, as well as small ranges. 3737 if (range.start < lastAllocatedPage || range.size < (B_PAGE_SIZE * 128)) 3738 continue; 3739 if (range.start > maxAddress) 3740 break; 3741 3742 nextPage = range.start; 3743 break; 3744 } 3745 3746 if (nextPage != 0) { 3747 // we got one! 3748 addr_range& range = 3749 args->physical_allocated_range[args->num_physical_allocated_ranges++]; 3750 range.start = nextPage; 3751 range.size = B_PAGE_SIZE; 3752 return nextPage / B_PAGE_SIZE; 3753 } 3754 } 3755 3756 return 0; 3757 // could not allocate a block 3758 } 3759 3760 3761 /*! This one uses the kernel_args' physical and virtual memory ranges to 3762 allocate some pages before the VM is completely up. 3763 */ 3764 addr_t 3765 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 3766 uint32 attributes, addr_t alignment) 3767 { 3768 if (physicalSize > virtualSize) 3769 physicalSize = virtualSize; 3770 3771 // find the vaddr to allocate at 3772 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 3773 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 3774 if (virtualBase == 0) { 3775 panic("vm_allocate_early: could not allocate virtual address\n"); 3776 return 0; 3777 } 3778 3779 // map the pages 3780 for (uint32 i = 0; i < HOWMANY(physicalSize, B_PAGE_SIZE); i++) { 3781 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 3782 if (physicalAddress == 0) 3783 panic("error allocating early page!\n"); 3784 3785 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 3786 3787 status_t status = arch_vm_translation_map_early_map(args, 3788 virtualBase + i * B_PAGE_SIZE, 3789 physicalAddress * B_PAGE_SIZE, attributes, 3790 &vm_allocate_early_physical_page); 3791 if (status != B_OK) 3792 panic("error mapping early page!"); 3793 } 3794 3795 return virtualBase; 3796 } 3797 3798 3799 /*! The main entrance point to initialize the VM. */ 3800 status_t 3801 vm_init(kernel_args* args) 3802 { 3803 struct preloaded_image* image; 3804 void* address; 3805 status_t err = 0; 3806 uint32 i; 3807 3808 TRACE(("vm_init: entry\n")); 3809 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 3810 err = arch_vm_init(args); 3811 3812 // initialize some globals 3813 vm_page_init_num_pages(args); 3814 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 3815 3816 slab_init(args); 3817 3818 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3819 off_t heapSize = INITIAL_HEAP_SIZE; 3820 // try to accomodate low memory systems 3821 while (heapSize > sAvailableMemory / 8) 3822 heapSize /= 2; 3823 if (heapSize < 1024 * 1024) 3824 panic("vm_init: go buy some RAM please."); 3825 3826 // map in the new heap and initialize it 3827 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 3828 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 3829 TRACE(("heap at 0x%lx\n", heapBase)); 3830 heap_init(heapBase, heapSize); 3831 #endif 3832 3833 // initialize the free page list and physical page mapper 3834 vm_page_init(args); 3835 3836 // initialize the cache allocators 3837 vm_cache_init(args); 3838 3839 { 3840 status_t error = VMAreas::Init(); 3841 if (error != B_OK) 3842 panic("vm_init: error initializing areas map\n"); 3843 } 3844 3845 VMAddressSpace::Init(); 3846 reserve_boot_loader_ranges(args); 3847 3848 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3849 heap_init_post_area(); 3850 #endif 3851 3852 // Do any further initialization that the architecture dependant layers may 3853 // need now 3854 arch_vm_translation_map_init_post_area(args); 3855 arch_vm_init_post_area(args); 3856 vm_page_init_post_area(args); 3857 slab_init_post_area(); 3858 3859 // allocate areas to represent stuff that already exists 3860 3861 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3862 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 3863 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 3864 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3865 #endif 3866 3867 allocate_kernel_args(args); 3868 3869 create_preloaded_image_areas(args->kernel_image); 3870 3871 // allocate areas for preloaded images 3872 for (image = args->preloaded_images; image != NULL; image = image->next) 3873 create_preloaded_image_areas(image); 3874 3875 // allocate kernel stacks 3876 for (i = 0; i < args->num_cpus; i++) { 3877 char name[64]; 3878 3879 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 3880 address = (void*)args->cpu_kstack[i].start; 3881 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 3882 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3883 } 3884 3885 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 3886 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 3887 3888 #if PARANOID_KERNEL_MALLOC 3889 vm_block_address_range("uninitialized heap memory", 3890 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3891 #endif 3892 #if PARANOID_KERNEL_FREE 3893 vm_block_address_range("freed heap memory", 3894 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 3895 #endif 3896 3897 create_page_mappings_object_caches(); 3898 3899 vm_debug_init(); 3900 3901 TRACE(("vm_init: exit\n")); 3902 3903 vm_cache_init_post_heap(); 3904 3905 return err; 3906 } 3907 3908 3909 status_t 3910 vm_init_post_sem(kernel_args* args) 3911 { 3912 // This frees all unused boot loader resources and makes its space available 3913 // again 3914 arch_vm_init_end(args); 3915 unreserve_boot_loader_ranges(args); 3916 3917 // fill in all of the semaphores that were not allocated before 3918 // since we're still single threaded and only the kernel address space 3919 // exists, it isn't that hard to find all of the ones we need to create 3920 3921 arch_vm_translation_map_init_post_sem(args); 3922 3923 slab_init_post_sem(); 3924 3925 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 3926 heap_init_post_sem(); 3927 #endif 3928 3929 return B_OK; 3930 } 3931 3932 3933 status_t 3934 vm_init_post_thread(kernel_args* args) 3935 { 3936 vm_page_init_post_thread(args); 3937 slab_init_post_thread(); 3938 return heap_init_post_thread(); 3939 } 3940 3941 3942 status_t 3943 vm_init_post_modules(kernel_args* args) 3944 { 3945 return arch_vm_init_post_modules(args); 3946 } 3947 3948 3949 void 3950 permit_page_faults(void) 3951 { 3952 Thread* thread = thread_get_current_thread(); 3953 if (thread != NULL) 3954 atomic_add(&thread->page_faults_allowed, 1); 3955 } 3956 3957 3958 void 3959 forbid_page_faults(void) 3960 { 3961 Thread* thread = thread_get_current_thread(); 3962 if (thread != NULL) 3963 atomic_add(&thread->page_faults_allowed, -1); 3964 } 3965 3966 3967 status_t 3968 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 3969 bool isUser, addr_t* newIP) 3970 { 3971 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 3972 faultAddress)); 3973 3974 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 3975 3976 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 3977 VMAddressSpace* addressSpace = NULL; 3978 3979 status_t status = B_OK; 3980 *newIP = 0; 3981 atomic_add((int32*)&sPageFaults, 1); 3982 3983 if (IS_KERNEL_ADDRESS(pageAddress)) { 3984 addressSpace = VMAddressSpace::GetKernel(); 3985 } else if (IS_USER_ADDRESS(pageAddress)) { 3986 addressSpace = VMAddressSpace::GetCurrent(); 3987 if (addressSpace == NULL) { 3988 if (!isUser) { 3989 dprintf("vm_page_fault: kernel thread accessing invalid user " 3990 "memory!\n"); 3991 status = B_BAD_ADDRESS; 3992 TPF(PageFaultError(-1, 3993 VMPageFaultTracing 3994 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 3995 } else { 3996 // XXX weird state. 3997 panic("vm_page_fault: non kernel thread accessing user memory " 3998 "that doesn't exist!\n"); 3999 status = B_BAD_ADDRESS; 4000 } 4001 } 4002 } else { 4003 // the hit was probably in the 64k DMZ between kernel and user space 4004 // this keeps a user space thread from passing a buffer that crosses 4005 // into kernel space 4006 status = B_BAD_ADDRESS; 4007 TPF(PageFaultError(-1, 4008 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4009 } 4010 4011 if (status == B_OK) { 4012 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4013 isUser, NULL); 4014 } 4015 4016 if (status < B_OK) { 4017 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4018 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4019 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4020 thread_get_current_thread_id()); 4021 if (!isUser) { 4022 Thread* thread = thread_get_current_thread(); 4023 if (thread != NULL && thread->fault_handler != 0) { 4024 // this will cause the arch dependant page fault handler to 4025 // modify the IP on the interrupt frame or whatever to return 4026 // to this address 4027 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4028 } else { 4029 // unhandled page fault in the kernel 4030 panic("vm_page_fault: unhandled page fault in kernel space at " 4031 "0x%lx, ip 0x%lx\n", address, faultAddress); 4032 } 4033 } else { 4034 Thread* thread = thread_get_current_thread(); 4035 4036 #ifdef TRACE_FAULTS 4037 VMArea* area = NULL; 4038 if (addressSpace != NULL) { 4039 addressSpace->ReadLock(); 4040 area = addressSpace->LookupArea(faultAddress); 4041 } 4042 4043 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4044 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4045 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4046 thread->team->Name(), thread->team->id, 4047 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4048 faultAddress, area ? area->name : "???", faultAddress - (area ? 4049 area->Base() : 0x0)); 4050 4051 if (addressSpace != NULL) 4052 addressSpace->ReadUnlock(); 4053 #endif 4054 4055 // If the thread has a signal handler for SIGSEGV, we simply 4056 // send it the signal. Otherwise we notify the user debugger 4057 // first. 4058 struct sigaction action; 4059 if ((sigaction(SIGSEGV, NULL, &action) == 0 4060 && action.sa_handler != SIG_DFL 4061 && action.sa_handler != SIG_IGN) 4062 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4063 SIGSEGV)) { 4064 Signal signal(SIGSEGV, 4065 status == B_PERMISSION_DENIED 4066 ? SEGV_ACCERR : SEGV_MAPERR, 4067 EFAULT, thread->team->id); 4068 signal.SetAddress((void*)address); 4069 send_signal_to_thread(thread, signal, 0); 4070 } 4071 } 4072 } 4073 4074 if (addressSpace != NULL) 4075 addressSpace->Put(); 4076 4077 return B_HANDLED_INTERRUPT; 4078 } 4079 4080 4081 struct PageFaultContext { 4082 AddressSpaceReadLocker addressSpaceLocker; 4083 VMCacheChainLocker cacheChainLocker; 4084 4085 VMTranslationMap* map; 4086 VMCache* topCache; 4087 off_t cacheOffset; 4088 vm_page_reservation reservation; 4089 bool isWrite; 4090 4091 // return values 4092 vm_page* page; 4093 bool restart; 4094 bool pageAllocated; 4095 4096 4097 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4098 : 4099 addressSpaceLocker(addressSpace, true), 4100 map(addressSpace->TranslationMap()), 4101 isWrite(isWrite) 4102 { 4103 } 4104 4105 ~PageFaultContext() 4106 { 4107 UnlockAll(); 4108 vm_page_unreserve_pages(&reservation); 4109 } 4110 4111 void Prepare(VMCache* topCache, off_t cacheOffset) 4112 { 4113 this->topCache = topCache; 4114 this->cacheOffset = cacheOffset; 4115 page = NULL; 4116 restart = false; 4117 pageAllocated = false; 4118 4119 cacheChainLocker.SetTo(topCache); 4120 } 4121 4122 void UnlockAll(VMCache* exceptCache = NULL) 4123 { 4124 topCache = NULL; 4125 addressSpaceLocker.Unlock(); 4126 cacheChainLocker.Unlock(exceptCache); 4127 } 4128 }; 4129 4130 4131 /*! Gets the page that should be mapped into the area. 4132 Returns an error code other than \c B_OK, if the page couldn't be found or 4133 paged in. The locking state of the address space and the caches is undefined 4134 in that case. 4135 Returns \c B_OK with \c context.restart set to \c true, if the functions 4136 had to unlock the address space and all caches and is supposed to be called 4137 again. 4138 Returns \c B_OK with \c context.restart set to \c false, if the page was 4139 found. It is returned in \c context.page. The address space will still be 4140 locked as well as all caches starting from the top cache to at least the 4141 cache the page lives in. 4142 */ 4143 static status_t 4144 fault_get_page(PageFaultContext& context) 4145 { 4146 VMCache* cache = context.topCache; 4147 VMCache* lastCache = NULL; 4148 vm_page* page = NULL; 4149 4150 while (cache != NULL) { 4151 // We already hold the lock of the cache at this point. 4152 4153 lastCache = cache; 4154 4155 page = cache->LookupPage(context.cacheOffset); 4156 if (page != NULL && page->busy) { 4157 // page must be busy -- wait for it to become unbusy 4158 context.UnlockAll(cache); 4159 cache->ReleaseRefLocked(); 4160 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4161 4162 // restart the whole process 4163 context.restart = true; 4164 return B_OK; 4165 } 4166 4167 if (page != NULL) 4168 break; 4169 4170 // The current cache does not contain the page we're looking for. 4171 4172 // see if the backing store has it 4173 if (cache->HasPage(context.cacheOffset)) { 4174 // insert a fresh page and mark it busy -- we're going to read it in 4175 page = vm_page_allocate_page(&context.reservation, 4176 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4177 cache->InsertPage(page, context.cacheOffset); 4178 4179 // We need to unlock all caches and the address space while reading 4180 // the page in. Keep a reference to the cache around. 4181 cache->AcquireRefLocked(); 4182 context.UnlockAll(); 4183 4184 // read the page in 4185 generic_io_vec vec; 4186 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4187 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4188 4189 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4190 B_PHYSICAL_IO_REQUEST, &bytesRead); 4191 4192 cache->Lock(); 4193 4194 if (status < B_OK) { 4195 // on error remove and free the page 4196 dprintf("reading page from cache %p returned: %s!\n", 4197 cache, strerror(status)); 4198 4199 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4200 cache->RemovePage(page); 4201 vm_page_set_state(page, PAGE_STATE_FREE); 4202 4203 cache->ReleaseRefAndUnlock(); 4204 return status; 4205 } 4206 4207 // mark the page unbusy again 4208 cache->MarkPageUnbusy(page); 4209 4210 DEBUG_PAGE_ACCESS_END(page); 4211 4212 // Since we needed to unlock everything temporarily, the area 4213 // situation might have changed. So we need to restart the whole 4214 // process. 4215 cache->ReleaseRefAndUnlock(); 4216 context.restart = true; 4217 return B_OK; 4218 } 4219 4220 cache = context.cacheChainLocker.LockSourceCache(); 4221 } 4222 4223 if (page == NULL) { 4224 // There was no adequate page, determine the cache for a clean one. 4225 // Read-only pages come in the deepest cache, only the top most cache 4226 // may have direct write access. 4227 cache = context.isWrite ? context.topCache : lastCache; 4228 4229 // allocate a clean page 4230 page = vm_page_allocate_page(&context.reservation, 4231 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4232 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4233 page->physical_page_number)); 4234 4235 // insert the new page into our cache 4236 cache->InsertPage(page, context.cacheOffset); 4237 context.pageAllocated = true; 4238 } else if (page->Cache() != context.topCache && context.isWrite) { 4239 // We have a page that has the data we want, but in the wrong cache 4240 // object so we need to copy it and stick it into the top cache. 4241 vm_page* sourcePage = page; 4242 4243 // TODO: If memory is low, it might be a good idea to steal the page 4244 // from our source cache -- if possible, that is. 4245 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4246 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4247 4248 // To not needlessly kill concurrency we unlock all caches but the top 4249 // one while copying the page. Lacking another mechanism to ensure that 4250 // the source page doesn't disappear, we mark it busy. 4251 sourcePage->busy = true; 4252 context.cacheChainLocker.UnlockKeepRefs(true); 4253 4254 // copy the page 4255 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4256 sourcePage->physical_page_number * B_PAGE_SIZE); 4257 4258 context.cacheChainLocker.RelockCaches(true); 4259 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4260 4261 // insert the new page into our cache 4262 context.topCache->InsertPage(page, context.cacheOffset); 4263 context.pageAllocated = true; 4264 } else 4265 DEBUG_PAGE_ACCESS_START(page); 4266 4267 context.page = page; 4268 return B_OK; 4269 } 4270 4271 4272 /*! Makes sure the address in the given address space is mapped. 4273 4274 \param addressSpace The address space. 4275 \param originalAddress The address. Doesn't need to be page aligned. 4276 \param isWrite If \c true the address shall be write-accessible. 4277 \param isUser If \c true the access is requested by a userland team. 4278 \param wirePage On success, if non \c NULL, the wired count of the page 4279 mapped at the given address is incremented and the page is returned 4280 via this parameter. 4281 \return \c B_OK on success, another error code otherwise. 4282 */ 4283 static status_t 4284 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4285 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4286 { 4287 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4288 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4289 originalAddress, isWrite, isUser)); 4290 4291 PageFaultContext context(addressSpace, isWrite); 4292 4293 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4294 status_t status = B_OK; 4295 4296 addressSpace->IncrementFaultCount(); 4297 4298 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4299 // the pages upfront makes sure we don't have any cache locked, so that the 4300 // page daemon/thief can do their job without problems. 4301 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4302 originalAddress); 4303 context.addressSpaceLocker.Unlock(); 4304 vm_page_reserve_pages(&context.reservation, reservePages, 4305 addressSpace == VMAddressSpace::Kernel() 4306 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4307 4308 while (true) { 4309 context.addressSpaceLocker.Lock(); 4310 4311 // get the area the fault was in 4312 VMArea* area = addressSpace->LookupArea(address); 4313 if (area == NULL) { 4314 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4315 "space\n", originalAddress); 4316 TPF(PageFaultError(-1, 4317 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4318 status = B_BAD_ADDRESS; 4319 break; 4320 } 4321 4322 // check permissions 4323 uint32 protection = get_area_page_protection(area, address); 4324 if (isUser && (protection & B_USER_PROTECTION) == 0 4325 && (area->protection & B_KERNEL_AREA) != 0) { 4326 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4327 area->id, (void*)originalAddress); 4328 TPF(PageFaultError(area->id, 4329 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4330 status = B_PERMISSION_DENIED; 4331 break; 4332 } 4333 if (isWrite && (protection 4334 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4335 dprintf("write access attempted on write-protected area 0x%" 4336 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4337 TPF(PageFaultError(area->id, 4338 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4339 status = B_PERMISSION_DENIED; 4340 break; 4341 } else if (isExecute && (protection 4342 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4343 dprintf("instruction fetch attempted on execute-protected area 0x%" 4344 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4345 TPF(PageFaultError(area->id, 4346 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4347 status = B_PERMISSION_DENIED; 4348 break; 4349 } else if (!isWrite && !isExecute && (protection 4350 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4351 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4352 " at %p\n", area->id, (void*)originalAddress); 4353 TPF(PageFaultError(area->id, 4354 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4355 status = B_PERMISSION_DENIED; 4356 break; 4357 } 4358 4359 // We have the area, it was a valid access, so let's try to resolve the 4360 // page fault now. 4361 // At first, the top most cache from the area is investigated. 4362 4363 context.Prepare(vm_area_get_locked_cache(area), 4364 address - area->Base() + area->cache_offset); 4365 4366 // See if this cache has a fault handler -- this will do all the work 4367 // for us. 4368 { 4369 // Note, since the page fault is resolved with interrupts enabled, 4370 // the fault handler could be called more than once for the same 4371 // reason -- the store must take this into account. 4372 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4373 if (status != B_BAD_HANDLER) 4374 break; 4375 } 4376 4377 // The top most cache has no fault handler, so let's see if the cache or 4378 // its sources already have the page we're searching for (we're going 4379 // from top to bottom). 4380 status = fault_get_page(context); 4381 if (status != B_OK) { 4382 TPF(PageFaultError(area->id, status)); 4383 break; 4384 } 4385 4386 if (context.restart) 4387 continue; 4388 4389 // All went fine, all there is left to do is to map the page into the 4390 // address space. 4391 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4392 context.page)); 4393 4394 // If the page doesn't reside in the area's cache, we need to make sure 4395 // it's mapped in read-only, so that we cannot overwrite someone else's 4396 // data (copy-on-write) 4397 uint32 newProtection = protection; 4398 if (context.page->Cache() != context.topCache && !isWrite) 4399 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4400 4401 bool unmapPage = false; 4402 bool mapPage = true; 4403 4404 // check whether there's already a page mapped at the address 4405 context.map->Lock(); 4406 4407 phys_addr_t physicalAddress; 4408 uint32 flags; 4409 vm_page* mappedPage = NULL; 4410 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4411 && (flags & PAGE_PRESENT) != 0 4412 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4413 != NULL) { 4414 // Yep there's already a page. If it's ours, we can simply adjust 4415 // its protection. Otherwise we have to unmap it. 4416 if (mappedPage == context.page) { 4417 context.map->ProtectPage(area, address, newProtection); 4418 // Note: We assume that ProtectPage() is atomic (i.e. 4419 // the page isn't temporarily unmapped), otherwise we'd have 4420 // to make sure it isn't wired. 4421 mapPage = false; 4422 } else 4423 unmapPage = true; 4424 } 4425 4426 context.map->Unlock(); 4427 4428 if (unmapPage) { 4429 // If the page is wired, we can't unmap it. Wait until it is unwired 4430 // again and restart. Note that the page cannot be wired for 4431 // writing, since it it isn't in the topmost cache. So we can safely 4432 // ignore ranges wired for writing (our own and other concurrent 4433 // wiring attempts in progress) and in fact have to do that to avoid 4434 // a deadlock. 4435 VMAreaUnwiredWaiter waiter; 4436 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4437 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4438 // unlock everything and wait 4439 if (context.pageAllocated) { 4440 // ... but since we allocated a page and inserted it into 4441 // the top cache, remove and free it first. Otherwise we'd 4442 // have a page from a lower cache mapped while an upper 4443 // cache has a page that would shadow it. 4444 context.topCache->RemovePage(context.page); 4445 vm_page_free_etc(context.topCache, context.page, 4446 &context.reservation); 4447 } else 4448 DEBUG_PAGE_ACCESS_END(context.page); 4449 4450 context.UnlockAll(); 4451 waiter.waitEntry.Wait(); 4452 continue; 4453 } 4454 4455 // Note: The mapped page is a page of a lower cache. We are 4456 // guaranteed to have that cached locked, our new page is a copy of 4457 // that page, and the page is not busy. The logic for that guarantee 4458 // is as follows: Since the page is mapped, it must live in the top 4459 // cache (ruled out above) or any of its lower caches, and there is 4460 // (was before the new page was inserted) no other page in any 4461 // cache between the top cache and the page's cache (otherwise that 4462 // would be mapped instead). That in turn means that our algorithm 4463 // must have found it and therefore it cannot be busy either. 4464 DEBUG_PAGE_ACCESS_START(mappedPage); 4465 unmap_page(area, address); 4466 DEBUG_PAGE_ACCESS_END(mappedPage); 4467 } 4468 4469 if (mapPage) { 4470 if (map_page(area, context.page, address, newProtection, 4471 &context.reservation) != B_OK) { 4472 // Mapping can only fail, when the page mapping object couldn't 4473 // be allocated. Save for the missing mapping everything is 4474 // fine, though. If this was a regular page fault, we'll simply 4475 // leave and probably fault again. To make sure we'll have more 4476 // luck then, we ensure that the minimum object reserve is 4477 // available. 4478 DEBUG_PAGE_ACCESS_END(context.page); 4479 4480 context.UnlockAll(); 4481 4482 if (object_cache_reserve(page_mapping_object_cache_for( 4483 context.page->physical_page_number), 1, 0) 4484 != B_OK) { 4485 // Apparently the situation is serious. Let's get ourselves 4486 // killed. 4487 status = B_NO_MEMORY; 4488 } else if (wirePage != NULL) { 4489 // The caller expects us to wire the page. Since 4490 // object_cache_reserve() succeeded, we should now be able 4491 // to allocate a mapping structure. Restart. 4492 continue; 4493 } 4494 4495 break; 4496 } 4497 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4498 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4499 4500 // also wire the page, if requested 4501 if (wirePage != NULL && status == B_OK) { 4502 increment_page_wired_count(context.page); 4503 *wirePage = context.page; 4504 } 4505 4506 DEBUG_PAGE_ACCESS_END(context.page); 4507 4508 break; 4509 } 4510 4511 return status; 4512 } 4513 4514 4515 status_t 4516 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4517 { 4518 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4519 } 4520 4521 status_t 4522 vm_put_physical_page(addr_t vaddr, void* handle) 4523 { 4524 return sPhysicalPageMapper->PutPage(vaddr, handle); 4525 } 4526 4527 4528 status_t 4529 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4530 void** _handle) 4531 { 4532 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4533 } 4534 4535 status_t 4536 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4537 { 4538 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4539 } 4540 4541 4542 status_t 4543 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4544 { 4545 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4546 } 4547 4548 status_t 4549 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4550 { 4551 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4552 } 4553 4554 4555 void 4556 vm_get_info(system_info* info) 4557 { 4558 swap_get_info(info); 4559 4560 MutexLocker locker(sAvailableMemoryLock); 4561 info->needed_memory = sNeededMemory; 4562 info->free_memory = sAvailableMemory; 4563 } 4564 4565 4566 uint32 4567 vm_num_page_faults(void) 4568 { 4569 return sPageFaults; 4570 } 4571 4572 4573 off_t 4574 vm_available_memory(void) 4575 { 4576 MutexLocker locker(sAvailableMemoryLock); 4577 return sAvailableMemory; 4578 } 4579 4580 4581 /*! Like vm_available_memory(), but only for use in the kernel 4582 debugger. 4583 */ 4584 off_t 4585 vm_available_memory_debug(void) 4586 { 4587 return sAvailableMemory; 4588 } 4589 4590 4591 off_t 4592 vm_available_not_needed_memory(void) 4593 { 4594 MutexLocker locker(sAvailableMemoryLock); 4595 return sAvailableMemory - sNeededMemory; 4596 } 4597 4598 4599 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 4600 debugger. 4601 */ 4602 off_t 4603 vm_available_not_needed_memory_debug(void) 4604 { 4605 return sAvailableMemory - sNeededMemory; 4606 } 4607 4608 4609 size_t 4610 vm_kernel_address_space_left(void) 4611 { 4612 return VMAddressSpace::Kernel()->FreeSpace(); 4613 } 4614 4615 4616 void 4617 vm_unreserve_memory(size_t amount) 4618 { 4619 mutex_lock(&sAvailableMemoryLock); 4620 4621 sAvailableMemory += amount; 4622 4623 mutex_unlock(&sAvailableMemoryLock); 4624 } 4625 4626 4627 status_t 4628 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 4629 { 4630 size_t reserve = kMemoryReserveForPriority[priority]; 4631 4632 MutexLocker locker(sAvailableMemoryLock); 4633 4634 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 4635 4636 if (sAvailableMemory >= (off_t)(amount + reserve)) { 4637 sAvailableMemory -= amount; 4638 return B_OK; 4639 } 4640 4641 if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) { 4642 // Do not wait for something that will never happen. 4643 return B_NO_MEMORY; 4644 } 4645 4646 if (timeout <= 0) 4647 return B_NO_MEMORY; 4648 4649 // turn timeout into an absolute timeout 4650 timeout += system_time(); 4651 4652 // loop until we've got the memory or the timeout occurs 4653 do { 4654 sNeededMemory += amount; 4655 4656 // call the low resource manager 4657 locker.Unlock(); 4658 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 4659 B_ABSOLUTE_TIMEOUT, timeout); 4660 locker.Lock(); 4661 4662 sNeededMemory -= amount; 4663 4664 if (sAvailableMemory >= (off_t)(amount + reserve)) { 4665 sAvailableMemory -= amount; 4666 return B_OK; 4667 } 4668 } while (timeout > system_time()); 4669 4670 return B_NO_MEMORY; 4671 } 4672 4673 4674 status_t 4675 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 4676 { 4677 // NOTE: The caller is responsible for synchronizing calls to this function! 4678 4679 AddressSpaceReadLocker locker; 4680 VMArea* area; 4681 status_t status = locker.SetFromArea(id, area); 4682 if (status != B_OK) 4683 return status; 4684 4685 // nothing to do, if the type doesn't change 4686 uint32 oldType = area->MemoryType(); 4687 if (type == oldType) 4688 return B_OK; 4689 4690 // set the memory type of the area and the mapped pages 4691 VMTranslationMap* map = area->address_space->TranslationMap(); 4692 map->Lock(); 4693 area->SetMemoryType(type); 4694 map->ProtectArea(area, area->protection); 4695 map->Unlock(); 4696 4697 // set the physical memory type 4698 status_t error = arch_vm_set_memory_type(area, physicalBase, type, NULL); 4699 if (error != B_OK) { 4700 // reset the memory type of the area and the mapped pages 4701 map->Lock(); 4702 area->SetMemoryType(oldType); 4703 map->ProtectArea(area, area->protection); 4704 map->Unlock(); 4705 return error; 4706 } 4707 4708 return B_OK; 4709 4710 } 4711 4712 4713 /*! This function enforces some protection properties: 4714 - kernel areas must be W^X (after kernel startup) 4715 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 4716 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 4717 */ 4718 static void 4719 fix_protection(uint32* protection) 4720 { 4721 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 4722 && ((*protection & B_KERNEL_WRITE_AREA) != 0 4723 || (*protection & B_WRITE_AREA) != 0) 4724 && !gKernelStartup) 4725 panic("kernel areas cannot be both writable and executable!"); 4726 4727 if ((*protection & B_KERNEL_PROTECTION) == 0) { 4728 if ((*protection & B_WRITE_AREA) != 0) 4729 *protection |= B_KERNEL_WRITE_AREA; 4730 if ((*protection & B_READ_AREA) != 0) 4731 *protection |= B_KERNEL_READ_AREA; 4732 } 4733 } 4734 4735 4736 static void 4737 fill_area_info(struct VMArea* area, area_info* info, size_t size) 4738 { 4739 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 4740 info->area = area->id; 4741 info->address = (void*)area->Base(); 4742 info->size = area->Size(); 4743 info->protection = area->protection; 4744 info->lock = area->wiring; 4745 info->team = area->address_space->ID(); 4746 info->copy_count = 0; 4747 info->in_count = 0; 4748 info->out_count = 0; 4749 // TODO: retrieve real values here! 4750 4751 VMCache* cache = vm_area_get_locked_cache(area); 4752 4753 // Note, this is a simplification; the cache could be larger than this area 4754 info->ram_size = cache->page_count * B_PAGE_SIZE; 4755 4756 vm_area_put_locked_cache(cache); 4757 } 4758 4759 4760 static status_t 4761 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 4762 { 4763 // is newSize a multiple of B_PAGE_SIZE? 4764 if (newSize & (B_PAGE_SIZE - 1)) 4765 return B_BAD_VALUE; 4766 4767 // lock all affected address spaces and the cache 4768 VMArea* area; 4769 VMCache* cache; 4770 4771 MultiAddressSpaceLocker locker; 4772 AreaCacheLocker cacheLocker; 4773 4774 status_t status; 4775 size_t oldSize; 4776 bool anyKernelArea; 4777 bool restart; 4778 4779 do { 4780 anyKernelArea = false; 4781 restart = false; 4782 4783 locker.Unset(); 4784 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 4785 if (status != B_OK) 4786 return status; 4787 cacheLocker.SetTo(cache, true); // already locked 4788 4789 // enforce restrictions 4790 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 4791 || (area->protection & B_KERNEL_AREA) != 0)) { 4792 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 4793 "resize kernel area %" B_PRId32 " (%s)\n", 4794 team_get_current_team_id(), areaID, area->name); 4795 return B_NOT_ALLOWED; 4796 } 4797 // TODO: Enforce all restrictions (team, etc.)! 4798 4799 oldSize = area->Size(); 4800 if (newSize == oldSize) 4801 return B_OK; 4802 4803 if (cache->type != CACHE_TYPE_RAM) 4804 return B_NOT_ALLOWED; 4805 4806 if (oldSize < newSize) { 4807 // We need to check if all areas of this cache can be resized. 4808 for (VMArea* current = cache->areas; current != NULL; 4809 current = current->cache_next) { 4810 if (!current->address_space->CanResizeArea(current, newSize)) 4811 return B_ERROR; 4812 anyKernelArea 4813 |= current->address_space == VMAddressSpace::Kernel(); 4814 } 4815 } else { 4816 // We're shrinking the areas, so we must make sure the affected 4817 // ranges are not wired. 4818 for (VMArea* current = cache->areas; current != NULL; 4819 current = current->cache_next) { 4820 anyKernelArea 4821 |= current->address_space == VMAddressSpace::Kernel(); 4822 4823 if (wait_if_area_range_is_wired(current, 4824 current->Base() + newSize, oldSize - newSize, &locker, 4825 &cacheLocker)) { 4826 restart = true; 4827 break; 4828 } 4829 } 4830 } 4831 } while (restart); 4832 4833 // Okay, looks good so far, so let's do it 4834 4835 int priority = kernel && anyKernelArea 4836 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 4837 uint32 allocationFlags = kernel && anyKernelArea 4838 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 4839 4840 if (oldSize < newSize) { 4841 // Growing the cache can fail, so we do it first. 4842 status = cache->Resize(cache->virtual_base + newSize, priority); 4843 if (status != B_OK) 4844 return status; 4845 } 4846 4847 for (VMArea* current = cache->areas; current != NULL; 4848 current = current->cache_next) { 4849 status = current->address_space->ResizeArea(current, newSize, 4850 allocationFlags); 4851 if (status != B_OK) 4852 break; 4853 4854 // We also need to unmap all pages beyond the new size, if the area has 4855 // shrunk 4856 if (newSize < oldSize) { 4857 VMCacheChainLocker cacheChainLocker(cache); 4858 cacheChainLocker.LockAllSourceCaches(); 4859 4860 unmap_pages(current, current->Base() + newSize, 4861 oldSize - newSize); 4862 4863 cacheChainLocker.Unlock(cache); 4864 } 4865 } 4866 4867 if (status == B_OK) { 4868 // Shrink or grow individual page protections if in use. 4869 if (area->page_protections != NULL) { 4870 size_t bytes = area_page_protections_size(newSize); 4871 uint8* newProtections 4872 = (uint8*)realloc(area->page_protections, bytes); 4873 if (newProtections == NULL) 4874 status = B_NO_MEMORY; 4875 else { 4876 area->page_protections = newProtections; 4877 4878 if (oldSize < newSize) { 4879 // init the additional page protections to that of the area 4880 uint32 offset = area_page_protections_size(oldSize); 4881 uint32 areaProtection = area->protection 4882 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 4883 memset(area->page_protections + offset, 4884 areaProtection | (areaProtection << 4), bytes - offset); 4885 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 4886 uint8& entry = area->page_protections[offset - 1]; 4887 entry = (entry & 0x0f) | (areaProtection << 4); 4888 } 4889 } 4890 } 4891 } 4892 } 4893 4894 // shrinking the cache can't fail, so we do it now 4895 if (status == B_OK && newSize < oldSize) 4896 status = cache->Resize(cache->virtual_base + newSize, priority); 4897 4898 if (status != B_OK) { 4899 // Something failed -- resize the areas back to their original size. 4900 // This can fail, too, in which case we're seriously screwed. 4901 for (VMArea* current = cache->areas; current != NULL; 4902 current = current->cache_next) { 4903 if (current->address_space->ResizeArea(current, oldSize, 4904 allocationFlags) != B_OK) { 4905 panic("vm_resize_area(): Failed and not being able to restore " 4906 "original state."); 4907 } 4908 } 4909 4910 cache->Resize(cache->virtual_base + oldSize, priority); 4911 } 4912 4913 // TODO: we must honour the lock restrictions of this area 4914 return status; 4915 } 4916 4917 4918 status_t 4919 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 4920 { 4921 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 4922 } 4923 4924 4925 status_t 4926 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 4927 { 4928 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 4929 } 4930 4931 4932 status_t 4933 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 4934 bool user) 4935 { 4936 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 4937 } 4938 4939 4940 void 4941 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 4942 { 4943 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 4944 } 4945 4946 4947 /** Validate that a memory range is either fully in kernel space, or fully in 4948 * userspace */ 4949 static inline bool 4950 validate_memory_range(const void* addr, size_t size) 4951 { 4952 addr_t address = (addr_t)addr; 4953 4954 // Check for overflows on all addresses. 4955 if ((address + size) < address) 4956 return false; 4957 4958 // Validate that the address range does not cross the kernel/user boundary. 4959 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 4960 } 4961 4962 4963 // #pragma mark - kernel public API 4964 4965 4966 status_t 4967 user_memcpy(void* to, const void* from, size_t size) 4968 { 4969 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 4970 return B_BAD_ADDRESS; 4971 4972 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 4973 return B_BAD_ADDRESS; 4974 4975 return B_OK; 4976 } 4977 4978 4979 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 4980 the string in \a to, NULL-terminating the result. 4981 4982 \param to Pointer to the destination C-string. 4983 \param from Pointer to the source C-string. 4984 \param size Size in bytes of the string buffer pointed to by \a to. 4985 4986 \return strlen(\a from). 4987 */ 4988 ssize_t 4989 user_strlcpy(char* to, const char* from, size_t size) 4990 { 4991 if (to == NULL && size != 0) 4992 return B_BAD_VALUE; 4993 if (from == NULL) 4994 return B_BAD_ADDRESS; 4995 4996 // Protect the source address from overflows. 4997 size_t maxSize = size; 4998 if ((addr_t)from + maxSize < (addr_t)from) 4999 maxSize -= (addr_t)from + maxSize; 5000 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5001 maxSize = USER_TOP - (addr_t)from; 5002 5003 if (!validate_memory_range(to, maxSize)) 5004 return B_BAD_ADDRESS; 5005 5006 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5007 if (result < 0) 5008 return result; 5009 5010 // If we hit the address overflow boundary, fail. 5011 if ((size_t)result >= maxSize && maxSize < size) 5012 return B_BAD_ADDRESS; 5013 5014 return result; 5015 } 5016 5017 5018 status_t 5019 user_memset(void* s, char c, size_t count) 5020 { 5021 if (!validate_memory_range(s, count)) 5022 return B_BAD_ADDRESS; 5023 5024 if (arch_cpu_user_memset(s, c, count) < B_OK) 5025 return B_BAD_ADDRESS; 5026 5027 return B_OK; 5028 } 5029 5030 5031 /*! Wires a single page at the given address. 5032 5033 \param team The team whose address space the address belongs to. Supports 5034 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5035 parameter is ignored. 5036 \param address address The virtual address to wire down. Does not need to 5037 be page aligned. 5038 \param writable If \c true the page shall be writable. 5039 \param info On success the info is filled in, among other things 5040 containing the physical address the given virtual one translates to. 5041 \return \c B_OK, when the page could be wired, another error code otherwise. 5042 */ 5043 status_t 5044 vm_wire_page(team_id team, addr_t address, bool writable, 5045 VMPageWiringInfo* info) 5046 { 5047 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5048 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5049 5050 // compute the page protection that is required 5051 bool isUser = IS_USER_ADDRESS(address); 5052 uint32 requiredProtection = PAGE_PRESENT 5053 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5054 if (writable) 5055 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5056 5057 // get and read lock the address space 5058 VMAddressSpace* addressSpace = NULL; 5059 if (isUser) { 5060 if (team == B_CURRENT_TEAM) 5061 addressSpace = VMAddressSpace::GetCurrent(); 5062 else 5063 addressSpace = VMAddressSpace::Get(team); 5064 } else 5065 addressSpace = VMAddressSpace::GetKernel(); 5066 if (addressSpace == NULL) 5067 return B_ERROR; 5068 5069 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5070 5071 VMTranslationMap* map = addressSpace->TranslationMap(); 5072 status_t error = B_OK; 5073 5074 // get the area 5075 VMArea* area = addressSpace->LookupArea(pageAddress); 5076 if (area == NULL) { 5077 addressSpace->Put(); 5078 return B_BAD_ADDRESS; 5079 } 5080 5081 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5082 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5083 5084 // mark the area range wired 5085 area->Wire(&info->range); 5086 5087 // Lock the area's cache chain and the translation map. Needed to look 5088 // up the page and play with its wired count. 5089 cacheChainLocker.LockAllSourceCaches(); 5090 map->Lock(); 5091 5092 phys_addr_t physicalAddress; 5093 uint32 flags; 5094 vm_page* page; 5095 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5096 && (flags & requiredProtection) == requiredProtection 5097 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5098 != NULL) { 5099 // Already mapped with the correct permissions -- just increment 5100 // the page's wired count. 5101 increment_page_wired_count(page); 5102 5103 map->Unlock(); 5104 cacheChainLocker.Unlock(); 5105 addressSpaceLocker.Unlock(); 5106 } else { 5107 // Let vm_soft_fault() map the page for us, if possible. We need 5108 // to fully unlock to avoid deadlocks. Since we have already 5109 // wired the area itself, nothing disturbing will happen with it 5110 // in the meantime. 5111 map->Unlock(); 5112 cacheChainLocker.Unlock(); 5113 addressSpaceLocker.Unlock(); 5114 5115 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5116 isUser, &page); 5117 5118 if (error != B_OK) { 5119 // The page could not be mapped -- clean up. 5120 VMCache* cache = vm_area_get_locked_cache(area); 5121 area->Unwire(&info->range); 5122 cache->ReleaseRefAndUnlock(); 5123 addressSpace->Put(); 5124 return error; 5125 } 5126 } 5127 5128 info->physicalAddress 5129 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5130 + address % B_PAGE_SIZE; 5131 info->page = page; 5132 5133 return B_OK; 5134 } 5135 5136 5137 /*! Unwires a single page previously wired via vm_wire_page(). 5138 5139 \param info The same object passed to vm_wire_page() before. 5140 */ 5141 void 5142 vm_unwire_page(VMPageWiringInfo* info) 5143 { 5144 // lock the address space 5145 VMArea* area = info->range.area; 5146 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5147 // takes over our reference 5148 5149 // lock the top cache 5150 VMCache* cache = vm_area_get_locked_cache(area); 5151 VMCacheChainLocker cacheChainLocker(cache); 5152 5153 if (info->page->Cache() != cache) { 5154 // The page is not in the top cache, so we lock the whole cache chain 5155 // before touching the page's wired count. 5156 cacheChainLocker.LockAllSourceCaches(); 5157 } 5158 5159 decrement_page_wired_count(info->page); 5160 5161 // remove the wired range from the range 5162 area->Unwire(&info->range); 5163 5164 cacheChainLocker.Unlock(); 5165 } 5166 5167 5168 /*! Wires down the given address range in the specified team's address space. 5169 5170 If successful the function 5171 - acquires a reference to the specified team's address space, 5172 - adds respective wired ranges to all areas that intersect with the given 5173 address range, 5174 - makes sure all pages in the given address range are mapped with the 5175 requested access permissions and increments their wired count. 5176 5177 It fails, when \a team doesn't specify a valid address space, when any part 5178 of the specified address range is not covered by areas, when the concerned 5179 areas don't allow mapping with the requested permissions, or when mapping 5180 failed for another reason. 5181 5182 When successful the call must be balanced by a unlock_memory_etc() call with 5183 the exact same parameters. 5184 5185 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5186 supported. 5187 \param address The start of the address range to be wired. 5188 \param numBytes The size of the address range to be wired. 5189 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5190 requests that the range must be wired writable ("read from device 5191 into memory"). 5192 \return \c B_OK on success, another error code otherwise. 5193 */ 5194 status_t 5195 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5196 { 5197 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5198 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5199 5200 // compute the page protection that is required 5201 bool isUser = IS_USER_ADDRESS(address); 5202 bool writable = (flags & B_READ_DEVICE) == 0; 5203 uint32 requiredProtection = PAGE_PRESENT 5204 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5205 if (writable) 5206 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5207 5208 uint32 mallocFlags = isUser 5209 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5210 5211 // get and read lock the address space 5212 VMAddressSpace* addressSpace = NULL; 5213 if (isUser) { 5214 if (team == B_CURRENT_TEAM) 5215 addressSpace = VMAddressSpace::GetCurrent(); 5216 else 5217 addressSpace = VMAddressSpace::Get(team); 5218 } else 5219 addressSpace = VMAddressSpace::GetKernel(); 5220 if (addressSpace == NULL) 5221 return B_ERROR; 5222 5223 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5224 // We get a new address space reference here. The one we got above will 5225 // be freed by unlock_memory_etc(). 5226 5227 VMTranslationMap* map = addressSpace->TranslationMap(); 5228 status_t error = B_OK; 5229 5230 // iterate through all concerned areas 5231 addr_t nextAddress = lockBaseAddress; 5232 while (nextAddress != lockEndAddress) { 5233 // get the next area 5234 VMArea* area = addressSpace->LookupArea(nextAddress); 5235 if (area == NULL) { 5236 error = B_BAD_ADDRESS; 5237 break; 5238 } 5239 5240 addr_t areaStart = nextAddress; 5241 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5242 5243 // allocate the wired range (do that before locking the cache to avoid 5244 // deadlocks) 5245 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5246 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5247 if (range == NULL) { 5248 error = B_NO_MEMORY; 5249 break; 5250 } 5251 5252 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5253 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5254 5255 // mark the area range wired 5256 area->Wire(range); 5257 5258 // Depending on the area cache type and the wiring, we may not need to 5259 // look at the individual pages. 5260 if (area->cache_type == CACHE_TYPE_NULL 5261 || area->cache_type == CACHE_TYPE_DEVICE 5262 || area->wiring == B_FULL_LOCK 5263 || area->wiring == B_CONTIGUOUS) { 5264 nextAddress = areaEnd; 5265 continue; 5266 } 5267 5268 // Lock the area's cache chain and the translation map. Needed to look 5269 // up pages and play with their wired count. 5270 cacheChainLocker.LockAllSourceCaches(); 5271 map->Lock(); 5272 5273 // iterate through the pages and wire them 5274 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5275 phys_addr_t physicalAddress; 5276 uint32 flags; 5277 5278 vm_page* page; 5279 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5280 && (flags & requiredProtection) == requiredProtection 5281 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5282 != NULL) { 5283 // Already mapped with the correct permissions -- just increment 5284 // the page's wired count. 5285 increment_page_wired_count(page); 5286 } else { 5287 // Let vm_soft_fault() map the page for us, if possible. We need 5288 // to fully unlock to avoid deadlocks. Since we have already 5289 // wired the area itself, nothing disturbing will happen with it 5290 // in the meantime. 5291 map->Unlock(); 5292 cacheChainLocker.Unlock(); 5293 addressSpaceLocker.Unlock(); 5294 5295 error = vm_soft_fault(addressSpace, nextAddress, writable, 5296 false, isUser, &page); 5297 5298 addressSpaceLocker.Lock(); 5299 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5300 cacheChainLocker.LockAllSourceCaches(); 5301 map->Lock(); 5302 } 5303 5304 if (error != B_OK) 5305 break; 5306 } 5307 5308 map->Unlock(); 5309 5310 if (error == B_OK) { 5311 cacheChainLocker.Unlock(); 5312 } else { 5313 // An error occurred, so abort right here. If the current address 5314 // is the first in this area, unwire the area, since we won't get 5315 // to it when reverting what we've done so far. 5316 if (nextAddress == areaStart) { 5317 area->Unwire(range); 5318 cacheChainLocker.Unlock(); 5319 range->~VMAreaWiredRange(); 5320 free_etc(range, mallocFlags); 5321 } else 5322 cacheChainLocker.Unlock(); 5323 5324 break; 5325 } 5326 } 5327 5328 if (error != B_OK) { 5329 // An error occurred, so unwire all that we've already wired. Note that 5330 // even if not a single page was wired, unlock_memory_etc() is called 5331 // to put the address space reference. 5332 addressSpaceLocker.Unlock(); 5333 unlock_memory_etc(team, (void*)lockBaseAddress, 5334 nextAddress - lockBaseAddress, flags); 5335 } 5336 5337 return error; 5338 } 5339 5340 5341 status_t 5342 lock_memory(void* address, size_t numBytes, uint32 flags) 5343 { 5344 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5345 } 5346 5347 5348 /*! Unwires an address range previously wired with lock_memory_etc(). 5349 5350 Note that a call to this function must balance a previous lock_memory_etc() 5351 call with exactly the same parameters. 5352 */ 5353 status_t 5354 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5355 { 5356 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5357 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5358 5359 // compute the page protection that is required 5360 bool isUser = IS_USER_ADDRESS(address); 5361 bool writable = (flags & B_READ_DEVICE) == 0; 5362 uint32 requiredProtection = PAGE_PRESENT 5363 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5364 if (writable) 5365 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5366 5367 uint32 mallocFlags = isUser 5368 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5369 5370 // get and read lock the address space 5371 VMAddressSpace* addressSpace = NULL; 5372 if (isUser) { 5373 if (team == B_CURRENT_TEAM) 5374 addressSpace = VMAddressSpace::GetCurrent(); 5375 else 5376 addressSpace = VMAddressSpace::Get(team); 5377 } else 5378 addressSpace = VMAddressSpace::GetKernel(); 5379 if (addressSpace == NULL) 5380 return B_ERROR; 5381 5382 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5383 // Take over the address space reference. We don't unlock until we're 5384 // done. 5385 5386 VMTranslationMap* map = addressSpace->TranslationMap(); 5387 status_t error = B_OK; 5388 5389 // iterate through all concerned areas 5390 addr_t nextAddress = lockBaseAddress; 5391 while (nextAddress != lockEndAddress) { 5392 // get the next area 5393 VMArea* area = addressSpace->LookupArea(nextAddress); 5394 if (area == NULL) { 5395 error = B_BAD_ADDRESS; 5396 break; 5397 } 5398 5399 addr_t areaStart = nextAddress; 5400 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5401 5402 // Lock the area's top cache. This is a requirement for 5403 // VMArea::Unwire(). 5404 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5405 5406 // Depending on the area cache type and the wiring, we may not need to 5407 // look at the individual pages. 5408 if (area->cache_type == CACHE_TYPE_NULL 5409 || area->cache_type == CACHE_TYPE_DEVICE 5410 || area->wiring == B_FULL_LOCK 5411 || area->wiring == B_CONTIGUOUS) { 5412 // unwire the range (to avoid deadlocks we delete the range after 5413 // unlocking the cache) 5414 nextAddress = areaEnd; 5415 VMAreaWiredRange* range = area->Unwire(areaStart, 5416 areaEnd - areaStart, writable); 5417 cacheChainLocker.Unlock(); 5418 if (range != NULL) { 5419 range->~VMAreaWiredRange(); 5420 free_etc(range, mallocFlags); 5421 } 5422 continue; 5423 } 5424 5425 // Lock the area's cache chain and the translation map. Needed to look 5426 // up pages and play with their wired count. 5427 cacheChainLocker.LockAllSourceCaches(); 5428 map->Lock(); 5429 5430 // iterate through the pages and unwire them 5431 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5432 phys_addr_t physicalAddress; 5433 uint32 flags; 5434 5435 vm_page* page; 5436 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5437 && (flags & PAGE_PRESENT) != 0 5438 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5439 != NULL) { 5440 // Already mapped with the correct permissions -- just increment 5441 // the page's wired count. 5442 decrement_page_wired_count(page); 5443 } else { 5444 panic("unlock_memory_etc(): Failed to unwire page: address " 5445 "space %p, address: %#" B_PRIxADDR, addressSpace, 5446 nextAddress); 5447 error = B_BAD_VALUE; 5448 break; 5449 } 5450 } 5451 5452 map->Unlock(); 5453 5454 // All pages are unwired. Remove the area's wired range as well (to 5455 // avoid deadlocks we delete the range after unlocking the cache). 5456 VMAreaWiredRange* range = area->Unwire(areaStart, 5457 areaEnd - areaStart, writable); 5458 5459 cacheChainLocker.Unlock(); 5460 5461 if (range != NULL) { 5462 range->~VMAreaWiredRange(); 5463 free_etc(range, mallocFlags); 5464 } 5465 5466 if (error != B_OK) 5467 break; 5468 } 5469 5470 // get rid of the address space reference lock_memory_etc() acquired 5471 addressSpace->Put(); 5472 5473 return error; 5474 } 5475 5476 5477 status_t 5478 unlock_memory(void* address, size_t numBytes, uint32 flags) 5479 { 5480 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5481 } 5482 5483 5484 /*! Similar to get_memory_map(), but also allows to specify the address space 5485 for the memory in question and has a saner semantics. 5486 Returns \c B_OK when the complete range could be translated or 5487 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5488 case the actual number of entries is written to \c *_numEntries. Any other 5489 error case indicates complete failure; \c *_numEntries will be set to \c 0 5490 in this case. 5491 */ 5492 status_t 5493 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5494 physical_entry* table, uint32* _numEntries) 5495 { 5496 uint32 numEntries = *_numEntries; 5497 *_numEntries = 0; 5498 5499 VMAddressSpace* addressSpace; 5500 addr_t virtualAddress = (addr_t)address; 5501 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 5502 phys_addr_t physicalAddress; 5503 status_t status = B_OK; 5504 int32 index = -1; 5505 addr_t offset = 0; 5506 bool interrupts = are_interrupts_enabled(); 5507 5508 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 5509 "entries)\n", team, address, numBytes, numEntries)); 5510 5511 if (numEntries == 0 || numBytes == 0) 5512 return B_BAD_VALUE; 5513 5514 // in which address space is the address to be found? 5515 if (IS_USER_ADDRESS(virtualAddress)) { 5516 if (team == B_CURRENT_TEAM) 5517 addressSpace = VMAddressSpace::GetCurrent(); 5518 else 5519 addressSpace = VMAddressSpace::Get(team); 5520 } else 5521 addressSpace = VMAddressSpace::GetKernel(); 5522 5523 if (addressSpace == NULL) 5524 return B_ERROR; 5525 5526 VMTranslationMap* map = addressSpace->TranslationMap(); 5527 5528 if (interrupts) 5529 map->Lock(); 5530 5531 while (offset < numBytes) { 5532 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 5533 uint32 flags; 5534 5535 if (interrupts) { 5536 status = map->Query((addr_t)address + offset, &physicalAddress, 5537 &flags); 5538 } else { 5539 status = map->QueryInterrupt((addr_t)address + offset, 5540 &physicalAddress, &flags); 5541 } 5542 if (status < B_OK) 5543 break; 5544 if ((flags & PAGE_PRESENT) == 0) { 5545 panic("get_memory_map() called on unmapped memory!"); 5546 return B_BAD_ADDRESS; 5547 } 5548 5549 if (index < 0 && pageOffset > 0) { 5550 physicalAddress += pageOffset; 5551 if (bytes > B_PAGE_SIZE - pageOffset) 5552 bytes = B_PAGE_SIZE - pageOffset; 5553 } 5554 5555 // need to switch to the next physical_entry? 5556 if (index < 0 || table[index].address 5557 != physicalAddress - table[index].size) { 5558 if ((uint32)++index + 1 > numEntries) { 5559 // table to small 5560 break; 5561 } 5562 table[index].address = physicalAddress; 5563 table[index].size = bytes; 5564 } else { 5565 // page does fit in current entry 5566 table[index].size += bytes; 5567 } 5568 5569 offset += bytes; 5570 } 5571 5572 if (interrupts) 5573 map->Unlock(); 5574 5575 if (status != B_OK) 5576 return status; 5577 5578 if ((uint32)index + 1 > numEntries) { 5579 *_numEntries = index; 5580 return B_BUFFER_OVERFLOW; 5581 } 5582 5583 *_numEntries = index + 1; 5584 return B_OK; 5585 } 5586 5587 5588 /*! According to the BeBook, this function should always succeed. 5589 This is no longer the case. 5590 */ 5591 extern "C" int32 5592 __get_memory_map_haiku(const void* address, size_t numBytes, 5593 physical_entry* table, int32 numEntries) 5594 { 5595 uint32 entriesRead = numEntries; 5596 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 5597 table, &entriesRead); 5598 if (error != B_OK) 5599 return error; 5600 5601 // close the entry list 5602 5603 // if it's only one entry, we will silently accept the missing ending 5604 if (numEntries == 1) 5605 return B_OK; 5606 5607 if (entriesRead + 1 > (uint32)numEntries) 5608 return B_BUFFER_OVERFLOW; 5609 5610 table[entriesRead].address = 0; 5611 table[entriesRead].size = 0; 5612 5613 return B_OK; 5614 } 5615 5616 5617 area_id 5618 area_for(void* address) 5619 { 5620 return vm_area_for((addr_t)address, true); 5621 } 5622 5623 5624 area_id 5625 find_area(const char* name) 5626 { 5627 return VMAreas::Find(name); 5628 } 5629 5630 5631 status_t 5632 _get_area_info(area_id id, area_info* info, size_t size) 5633 { 5634 if (size != sizeof(area_info) || info == NULL) 5635 return B_BAD_VALUE; 5636 5637 AddressSpaceReadLocker locker; 5638 VMArea* area; 5639 status_t status = locker.SetFromArea(id, area); 5640 if (status != B_OK) 5641 return status; 5642 5643 fill_area_info(area, info, size); 5644 return B_OK; 5645 } 5646 5647 5648 status_t 5649 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 5650 { 5651 addr_t nextBase = *(addr_t*)cookie; 5652 5653 // we're already through the list 5654 if (nextBase == (addr_t)-1) 5655 return B_ENTRY_NOT_FOUND; 5656 5657 if (team == B_CURRENT_TEAM) 5658 team = team_get_current_team_id(); 5659 5660 AddressSpaceReadLocker locker(team); 5661 if (!locker.IsLocked()) 5662 return B_BAD_TEAM_ID; 5663 5664 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 5665 if (area == NULL) { 5666 nextBase = (addr_t)-1; 5667 return B_ENTRY_NOT_FOUND; 5668 } 5669 5670 fill_area_info(area, info, size); 5671 *cookie = (ssize_t)(area->Base() + 1); 5672 5673 return B_OK; 5674 } 5675 5676 5677 status_t 5678 set_area_protection(area_id area, uint32 newProtection) 5679 { 5680 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 5681 newProtection, true); 5682 } 5683 5684 5685 status_t 5686 resize_area(area_id areaID, size_t newSize) 5687 { 5688 return vm_resize_area(areaID, newSize, true); 5689 } 5690 5691 5692 /*! Transfers the specified area to a new team. The caller must be the owner 5693 of the area. 5694 */ 5695 area_id 5696 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 5697 bool kernel) 5698 { 5699 area_info info; 5700 status_t status = get_area_info(id, &info); 5701 if (status != B_OK) 5702 return status; 5703 5704 if (!kernel && info.team != thread_get_current_thread()->team->id) 5705 return B_PERMISSION_DENIED; 5706 5707 // We need to mark the area cloneable so the following operations work. 5708 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 5709 if (status != B_OK) 5710 return status; 5711 5712 area_id clonedArea = vm_clone_area(target, info.name, _address, 5713 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 5714 if (clonedArea < 0) 5715 return clonedArea; 5716 5717 status = vm_delete_area(info.team, id, kernel); 5718 if (status != B_OK) { 5719 vm_delete_area(target, clonedArea, kernel); 5720 return status; 5721 } 5722 5723 // Now we can reset the protection to whatever it was before. 5724 set_area_protection(clonedArea, info.protection); 5725 5726 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 5727 5728 return clonedArea; 5729 } 5730 5731 5732 extern "C" area_id 5733 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 5734 size_t numBytes, uint32 addressSpec, uint32 protection, 5735 void** _virtualAddress) 5736 { 5737 if (!arch_vm_supports_protection(protection)) 5738 return B_NOT_SUPPORTED; 5739 5740 fix_protection(&protection); 5741 5742 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 5743 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 5744 false); 5745 } 5746 5747 5748 area_id 5749 clone_area(const char* name, void** _address, uint32 addressSpec, 5750 uint32 protection, area_id source) 5751 { 5752 if ((protection & B_KERNEL_PROTECTION) == 0) 5753 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 5754 5755 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 5756 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 5757 } 5758 5759 5760 area_id 5761 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 5762 uint32 protection, uint32 flags, uint32 guardSize, 5763 const virtual_address_restrictions* virtualAddressRestrictions, 5764 const physical_address_restrictions* physicalAddressRestrictions, 5765 void** _address) 5766 { 5767 fix_protection(&protection); 5768 5769 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 5770 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 5771 true, _address); 5772 } 5773 5774 5775 extern "C" area_id 5776 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 5777 size_t size, uint32 lock, uint32 protection) 5778 { 5779 fix_protection(&protection); 5780 5781 virtual_address_restrictions virtualRestrictions = {}; 5782 virtualRestrictions.address = *_address; 5783 virtualRestrictions.address_specification = addressSpec; 5784 physical_address_restrictions physicalRestrictions = {}; 5785 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 5786 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 5787 true, _address); 5788 } 5789 5790 5791 status_t 5792 delete_area(area_id area) 5793 { 5794 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 5795 } 5796 5797 5798 // #pragma mark - Userland syscalls 5799 5800 5801 status_t 5802 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 5803 addr_t size) 5804 { 5805 // filter out some unavailable values (for userland) 5806 switch (addressSpec) { 5807 case B_ANY_KERNEL_ADDRESS: 5808 case B_ANY_KERNEL_BLOCK_ADDRESS: 5809 return B_BAD_VALUE; 5810 } 5811 5812 addr_t address; 5813 5814 if (!IS_USER_ADDRESS(userAddress) 5815 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 5816 return B_BAD_ADDRESS; 5817 5818 status_t status = vm_reserve_address_range( 5819 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 5820 RESERVED_AVOID_BASE); 5821 if (status != B_OK) 5822 return status; 5823 5824 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 5825 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5826 (void*)address, size); 5827 return B_BAD_ADDRESS; 5828 } 5829 5830 return B_OK; 5831 } 5832 5833 5834 status_t 5835 _user_unreserve_address_range(addr_t address, addr_t size) 5836 { 5837 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 5838 (void*)address, size); 5839 } 5840 5841 5842 area_id 5843 _user_area_for(void* address) 5844 { 5845 return vm_area_for((addr_t)address, false); 5846 } 5847 5848 5849 area_id 5850 _user_find_area(const char* userName) 5851 { 5852 char name[B_OS_NAME_LENGTH]; 5853 5854 if (!IS_USER_ADDRESS(userName) 5855 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 5856 return B_BAD_ADDRESS; 5857 5858 return find_area(name); 5859 } 5860 5861 5862 status_t 5863 _user_get_area_info(area_id area, area_info* userInfo) 5864 { 5865 if (!IS_USER_ADDRESS(userInfo)) 5866 return B_BAD_ADDRESS; 5867 5868 area_info info; 5869 status_t status = get_area_info(area, &info); 5870 if (status < B_OK) 5871 return status; 5872 5873 // TODO: do we want to prevent userland from seeing kernel protections? 5874 //info.protection &= B_USER_PROTECTION; 5875 5876 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5877 return B_BAD_ADDRESS; 5878 5879 return status; 5880 } 5881 5882 5883 status_t 5884 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 5885 { 5886 ssize_t cookie; 5887 5888 if (!IS_USER_ADDRESS(userCookie) 5889 || !IS_USER_ADDRESS(userInfo) 5890 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 5891 return B_BAD_ADDRESS; 5892 5893 area_info info; 5894 status_t status = _get_next_area_info(team, &cookie, &info, 5895 sizeof(area_info)); 5896 if (status != B_OK) 5897 return status; 5898 5899 //info.protection &= B_USER_PROTECTION; 5900 5901 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 5902 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 5903 return B_BAD_ADDRESS; 5904 5905 return status; 5906 } 5907 5908 5909 status_t 5910 _user_set_area_protection(area_id area, uint32 newProtection) 5911 { 5912 if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0) 5913 return B_BAD_VALUE; 5914 5915 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 5916 newProtection, false); 5917 } 5918 5919 5920 status_t 5921 _user_resize_area(area_id area, size_t newSize) 5922 { 5923 // TODO: Since we restrict deleting of areas to those owned by the team, 5924 // we should also do that for resizing (check other functions, too). 5925 return vm_resize_area(area, newSize, false); 5926 } 5927 5928 5929 area_id 5930 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 5931 team_id target) 5932 { 5933 // filter out some unavailable values (for userland) 5934 switch (addressSpec) { 5935 case B_ANY_KERNEL_ADDRESS: 5936 case B_ANY_KERNEL_BLOCK_ADDRESS: 5937 return B_BAD_VALUE; 5938 } 5939 5940 void* address; 5941 if (!IS_USER_ADDRESS(userAddress) 5942 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5943 return B_BAD_ADDRESS; 5944 5945 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 5946 if (newArea < B_OK) 5947 return newArea; 5948 5949 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 5950 return B_BAD_ADDRESS; 5951 5952 return newArea; 5953 } 5954 5955 5956 area_id 5957 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 5958 uint32 protection, area_id sourceArea) 5959 { 5960 char name[B_OS_NAME_LENGTH]; 5961 void* address; 5962 5963 // filter out some unavailable values (for userland) 5964 switch (addressSpec) { 5965 case B_ANY_KERNEL_ADDRESS: 5966 case B_ANY_KERNEL_BLOCK_ADDRESS: 5967 return B_BAD_VALUE; 5968 } 5969 if ((protection & ~B_USER_AREA_FLAGS) != 0) 5970 return B_BAD_VALUE; 5971 5972 if (!IS_USER_ADDRESS(userName) 5973 || !IS_USER_ADDRESS(userAddress) 5974 || user_strlcpy(name, userName, sizeof(name)) < B_OK 5975 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 5976 return B_BAD_ADDRESS; 5977 5978 fix_protection(&protection); 5979 5980 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 5981 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 5982 false); 5983 if (clonedArea < B_OK) 5984 return clonedArea; 5985 5986 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 5987 delete_area(clonedArea); 5988 return B_BAD_ADDRESS; 5989 } 5990 5991 return clonedArea; 5992 } 5993 5994 5995 area_id 5996 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 5997 size_t size, uint32 lock, uint32 protection) 5998 { 5999 char name[B_OS_NAME_LENGTH]; 6000 void* address; 6001 6002 // filter out some unavailable values (for userland) 6003 switch (addressSpec) { 6004 case B_ANY_KERNEL_ADDRESS: 6005 case B_ANY_KERNEL_BLOCK_ADDRESS: 6006 return B_BAD_VALUE; 6007 } 6008 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6009 return B_BAD_VALUE; 6010 6011 if (!IS_USER_ADDRESS(userName) 6012 || !IS_USER_ADDRESS(userAddress) 6013 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6014 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6015 return B_BAD_ADDRESS; 6016 6017 if (addressSpec == B_EXACT_ADDRESS 6018 && IS_KERNEL_ADDRESS(address)) 6019 return B_BAD_VALUE; 6020 6021 if (addressSpec == B_ANY_ADDRESS) 6022 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6023 if (addressSpec == B_BASE_ADDRESS) 6024 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6025 6026 fix_protection(&protection); 6027 6028 virtual_address_restrictions virtualRestrictions = {}; 6029 virtualRestrictions.address = address; 6030 virtualRestrictions.address_specification = addressSpec; 6031 physical_address_restrictions physicalRestrictions = {}; 6032 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6033 size, lock, protection, 0, 0, &virtualRestrictions, 6034 &physicalRestrictions, false, &address); 6035 6036 if (area >= B_OK 6037 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6038 delete_area(area); 6039 return B_BAD_ADDRESS; 6040 } 6041 6042 return area; 6043 } 6044 6045 6046 status_t 6047 _user_delete_area(area_id area) 6048 { 6049 // Unlike the BeOS implementation, you can now only delete areas 6050 // that you have created yourself from userland. 6051 // The documentation to delete_area() explicitly states that this 6052 // will be restricted in the future, and so it will. 6053 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6054 } 6055 6056 6057 // TODO: create a BeOS style call for this! 6058 6059 area_id 6060 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6061 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6062 int fd, off_t offset) 6063 { 6064 char name[B_OS_NAME_LENGTH]; 6065 void* address; 6066 area_id area; 6067 6068 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6069 return B_BAD_VALUE; 6070 6071 fix_protection(&protection); 6072 6073 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6074 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6075 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6076 return B_BAD_ADDRESS; 6077 6078 if (addressSpec == B_EXACT_ADDRESS) { 6079 if ((addr_t)address + size < (addr_t)address 6080 || (addr_t)address % B_PAGE_SIZE != 0) { 6081 return B_BAD_VALUE; 6082 } 6083 if (!IS_USER_ADDRESS(address) 6084 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6085 return B_BAD_ADDRESS; 6086 } 6087 } 6088 6089 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6090 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6091 false); 6092 if (area < B_OK) 6093 return area; 6094 6095 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6096 return B_BAD_ADDRESS; 6097 6098 return area; 6099 } 6100 6101 6102 status_t 6103 _user_unmap_memory(void* _address, size_t size) 6104 { 6105 addr_t address = (addr_t)_address; 6106 6107 // check params 6108 if (size == 0 || (addr_t)address + size < (addr_t)address 6109 || (addr_t)address % B_PAGE_SIZE != 0) { 6110 return B_BAD_VALUE; 6111 } 6112 6113 if (!IS_USER_ADDRESS(address) 6114 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6115 return B_BAD_ADDRESS; 6116 } 6117 6118 // Write lock the address space and ensure the address range is not wired. 6119 AddressSpaceWriteLocker locker; 6120 do { 6121 status_t status = locker.SetTo(team_get_current_team_id()); 6122 if (status != B_OK) 6123 return status; 6124 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6125 size, &locker)); 6126 6127 // unmap 6128 return unmap_address_range(locker.AddressSpace(), address, size, false); 6129 } 6130 6131 6132 status_t 6133 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6134 { 6135 // check address range 6136 addr_t address = (addr_t)_address; 6137 size = PAGE_ALIGN(size); 6138 6139 if ((address % B_PAGE_SIZE) != 0) 6140 return B_BAD_VALUE; 6141 if (!is_user_address_range(_address, size)) { 6142 // weird error code required by POSIX 6143 return ENOMEM; 6144 } 6145 6146 // extend and check protection 6147 if ((protection & ~B_USER_PROTECTION) != 0) 6148 return B_BAD_VALUE; 6149 6150 fix_protection(&protection); 6151 6152 // We need to write lock the address space, since we're going to play with 6153 // the areas. Also make sure that none of the areas is wired and that we're 6154 // actually allowed to change the protection. 6155 AddressSpaceWriteLocker locker; 6156 6157 bool restart; 6158 do { 6159 restart = false; 6160 6161 status_t status = locker.SetTo(team_get_current_team_id()); 6162 if (status != B_OK) 6163 return status; 6164 6165 // First round: Check whether the whole range is covered by areas and we 6166 // are allowed to modify them. 6167 addr_t currentAddress = address; 6168 size_t sizeLeft = size; 6169 while (sizeLeft > 0) { 6170 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6171 if (area == NULL) 6172 return B_NO_MEMORY; 6173 6174 if ((area->protection & B_KERNEL_AREA) != 0) 6175 return B_NOT_ALLOWED; 6176 if (area->protection_max != 0 6177 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) { 6178 return B_NOT_ALLOWED; 6179 } 6180 6181 addr_t offset = currentAddress - area->Base(); 6182 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6183 6184 AreaCacheLocker cacheLocker(area); 6185 6186 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6187 &locker, &cacheLocker)) { 6188 restart = true; 6189 break; 6190 } 6191 6192 cacheLocker.Unlock(); 6193 6194 currentAddress += rangeSize; 6195 sizeLeft -= rangeSize; 6196 } 6197 } while (restart); 6198 6199 // Second round: If the protections differ from that of the area, create a 6200 // page protection array and re-map mapped pages. 6201 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6202 addr_t currentAddress = address; 6203 size_t sizeLeft = size; 6204 while (sizeLeft > 0) { 6205 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6206 if (area == NULL) 6207 return B_NO_MEMORY; 6208 6209 addr_t offset = currentAddress - area->Base(); 6210 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6211 6212 currentAddress += rangeSize; 6213 sizeLeft -= rangeSize; 6214 6215 if (area->page_protections == NULL) { 6216 if (area->protection == protection) 6217 continue; 6218 if (offset == 0 && rangeSize == area->Size()) { 6219 // The whole area is covered: let set_area_protection handle it. 6220 status_t status = vm_set_area_protection(area->address_space->ID(), 6221 area->id, protection, false); 6222 if (status != B_OK) 6223 return status; 6224 continue; 6225 } 6226 6227 status_t status = allocate_area_page_protections(area); 6228 if (status != B_OK) 6229 return status; 6230 } 6231 6232 // We need to lock the complete cache chain, since we potentially unmap 6233 // pages of lower caches. 6234 VMCache* topCache = vm_area_get_locked_cache(area); 6235 VMCacheChainLocker cacheChainLocker(topCache); 6236 cacheChainLocker.LockAllSourceCaches(); 6237 6238 // Adjust the committed size, if necessary. 6239 if (topCache->source != NULL && topCache->temporary) { 6240 const bool becomesWritable = (protection & B_WRITE_AREA) != 0; 6241 ssize_t commitmentChange = 0; 6242 for (addr_t pageAddress = area->Base() + offset; 6243 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6244 if (topCache->LookupPage(pageAddress) != NULL) { 6245 // This page should already be accounted for in the commitment. 6246 continue; 6247 } 6248 6249 const bool isWritable 6250 = (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0; 6251 6252 if (becomesWritable && !isWritable) 6253 commitmentChange += B_PAGE_SIZE; 6254 else if (!becomesWritable && isWritable) 6255 commitmentChange -= B_PAGE_SIZE; 6256 } 6257 6258 if (commitmentChange != 0) { 6259 const off_t newCommitment = topCache->committed_size + commitmentChange; 6260 ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base)); 6261 status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER); 6262 if (status != B_OK) 6263 return status; 6264 } 6265 } 6266 6267 for (addr_t pageAddress = area->Base() + offset; 6268 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6269 map->Lock(); 6270 6271 set_area_page_protection(area, pageAddress, protection); 6272 6273 phys_addr_t physicalAddress; 6274 uint32 flags; 6275 6276 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6277 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6278 map->Unlock(); 6279 continue; 6280 } 6281 6282 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6283 if (page == NULL) { 6284 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6285 "\n", area, physicalAddress); 6286 map->Unlock(); 6287 return B_ERROR; 6288 } 6289 6290 // If the page is not in the topmost cache and write access is 6291 // requested, we have to unmap it. Otherwise we can re-map it with 6292 // the new protection. 6293 bool unmapPage = page->Cache() != topCache 6294 && (protection & B_WRITE_AREA) != 0; 6295 6296 if (!unmapPage) 6297 map->ProtectPage(area, pageAddress, protection); 6298 6299 map->Unlock(); 6300 6301 if (unmapPage) { 6302 DEBUG_PAGE_ACCESS_START(page); 6303 unmap_page(area, pageAddress); 6304 DEBUG_PAGE_ACCESS_END(page); 6305 } 6306 } 6307 } 6308 6309 return B_OK; 6310 } 6311 6312 6313 status_t 6314 _user_sync_memory(void* _address, size_t size, uint32 flags) 6315 { 6316 addr_t address = (addr_t)_address; 6317 size = PAGE_ALIGN(size); 6318 6319 // check params 6320 if ((address % B_PAGE_SIZE) != 0) 6321 return B_BAD_VALUE; 6322 if (!is_user_address_range(_address, size)) { 6323 // weird error code required by POSIX 6324 return ENOMEM; 6325 } 6326 6327 bool writeSync = (flags & MS_SYNC) != 0; 6328 bool writeAsync = (flags & MS_ASYNC) != 0; 6329 if (writeSync && writeAsync) 6330 return B_BAD_VALUE; 6331 6332 if (size == 0 || (!writeSync && !writeAsync)) 6333 return B_OK; 6334 6335 // iterate through the range and sync all concerned areas 6336 while (size > 0) { 6337 // read lock the address space 6338 AddressSpaceReadLocker locker; 6339 status_t error = locker.SetTo(team_get_current_team_id()); 6340 if (error != B_OK) 6341 return error; 6342 6343 // get the first area 6344 VMArea* area = locker.AddressSpace()->LookupArea(address); 6345 if (area == NULL) 6346 return B_NO_MEMORY; 6347 6348 uint32 offset = address - area->Base(); 6349 size_t rangeSize = min_c(area->Size() - offset, size); 6350 offset += area->cache_offset; 6351 6352 // lock the cache 6353 AreaCacheLocker cacheLocker(area); 6354 if (!cacheLocker) 6355 return B_BAD_VALUE; 6356 VMCache* cache = area->cache; 6357 6358 locker.Unlock(); 6359 6360 uint32 firstPage = offset >> PAGE_SHIFT; 6361 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6362 6363 // write the pages 6364 if (cache->type == CACHE_TYPE_VNODE) { 6365 if (writeSync) { 6366 // synchronous 6367 error = vm_page_write_modified_page_range(cache, firstPage, 6368 endPage); 6369 if (error != B_OK) 6370 return error; 6371 } else { 6372 // asynchronous 6373 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6374 // TODO: This is probably not quite what is supposed to happen. 6375 // Especially when a lot has to be written, it might take ages 6376 // until it really hits the disk. 6377 } 6378 } 6379 6380 address += rangeSize; 6381 size -= rangeSize; 6382 } 6383 6384 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6385 // synchronize multiple mappings of the same file. In our VM they never get 6386 // out of sync, though, so we don't have to do anything. 6387 6388 return B_OK; 6389 } 6390 6391 6392 status_t 6393 _user_memory_advice(void* _address, size_t size, uint32 advice) 6394 { 6395 addr_t address = (addr_t)_address; 6396 if ((address % B_PAGE_SIZE) != 0) 6397 return B_BAD_VALUE; 6398 6399 size = PAGE_ALIGN(size); 6400 if (!is_user_address_range(_address, size)) { 6401 // weird error code required by POSIX 6402 return B_NO_MEMORY; 6403 } 6404 6405 switch (advice) { 6406 case MADV_NORMAL: 6407 case MADV_SEQUENTIAL: 6408 case MADV_RANDOM: 6409 case MADV_WILLNEED: 6410 case MADV_DONTNEED: 6411 // TODO: Implement! 6412 break; 6413 6414 case MADV_FREE: 6415 { 6416 AddressSpaceWriteLocker locker; 6417 do { 6418 status_t status = locker.SetTo(team_get_current_team_id()); 6419 if (status != B_OK) 6420 return status; 6421 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6422 address, size, &locker)); 6423 6424 discard_address_range(locker.AddressSpace(), address, size, false); 6425 break; 6426 } 6427 6428 default: 6429 return B_BAD_VALUE; 6430 } 6431 6432 return B_OK; 6433 } 6434 6435 6436 status_t 6437 _user_get_memory_properties(team_id teamID, const void* address, 6438 uint32* _protected, uint32* _lock) 6439 { 6440 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6441 return B_BAD_ADDRESS; 6442 6443 AddressSpaceReadLocker locker; 6444 status_t error = locker.SetTo(teamID); 6445 if (error != B_OK) 6446 return error; 6447 6448 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6449 if (area == NULL) 6450 return B_NO_MEMORY; 6451 6452 uint32 protection = get_area_page_protection(area, (addr_t)address); 6453 uint32 wiring = area->wiring; 6454 6455 locker.Unlock(); 6456 6457 error = user_memcpy(_protected, &protection, sizeof(protection)); 6458 if (error != B_OK) 6459 return error; 6460 6461 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6462 6463 return error; 6464 } 6465 6466 6467 static status_t 6468 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 6469 { 6470 #if ENABLE_SWAP_SUPPORT 6471 // check address range 6472 addr_t address = (addr_t)_address; 6473 size = PAGE_ALIGN(size); 6474 6475 if ((address % B_PAGE_SIZE) != 0) 6476 return EINVAL; 6477 if (!is_user_address_range(_address, size)) 6478 return EINVAL; 6479 6480 const addr_t endAddress = address + size; 6481 6482 AddressSpaceReadLocker addressSpaceLocker; 6483 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 6484 if (error != B_OK) 6485 return error; 6486 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 6487 6488 // iterate through all concerned areas 6489 addr_t nextAddress = address; 6490 while (nextAddress != endAddress) { 6491 // get the next area 6492 VMArea* area = addressSpace->LookupArea(nextAddress); 6493 if (area == NULL) { 6494 error = B_BAD_ADDRESS; 6495 break; 6496 } 6497 6498 const addr_t areaStart = nextAddress; 6499 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 6500 nextAddress = areaEnd; 6501 6502 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6503 if (error != B_OK) { 6504 // We don't need to unset or reset things on failure. 6505 break; 6506 } 6507 6508 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6509 VMAnonymousCache* anonCache = NULL; 6510 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 6511 // This memory will aready never be swapped. Nothing to do. 6512 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 6513 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 6514 areaEnd - areaStart, swappable); 6515 } else { 6516 // Some other cache type? We cannot affect anything here. 6517 error = EINVAL; 6518 } 6519 6520 cacheChainLocker.Unlock(); 6521 6522 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6523 if (error != B_OK) 6524 break; 6525 } 6526 6527 return error; 6528 #else 6529 // No swap support? Nothing to do. 6530 return B_OK; 6531 #endif 6532 } 6533 6534 6535 status_t 6536 _user_mlock(const void* _address, size_t size) 6537 { 6538 return user_set_memory_swappable(_address, size, false); 6539 } 6540 6541 6542 status_t 6543 _user_munlock(const void* _address, size_t size) 6544 { 6545 // TODO: B_SHARED_AREAs need to be handled a bit differently: 6546 // if multiple clones of an area had mlock() called on them, 6547 // munlock() must also be called on all of them to actually unlock. 6548 // (At present, the first munlock() will unlock all.) 6549 // TODO: fork() should automatically unlock memory in the child. 6550 return user_set_memory_swappable(_address, size, true); 6551 } 6552 6553 6554 // #pragma mark -- compatibility 6555 6556 6557 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 6558 6559 6560 struct physical_entry_beos { 6561 uint32 address; 6562 uint32 size; 6563 }; 6564 6565 6566 /*! The physical_entry structure has changed. We need to translate it to the 6567 old one. 6568 */ 6569 extern "C" int32 6570 __get_memory_map_beos(const void* _address, size_t numBytes, 6571 physical_entry_beos* table, int32 numEntries) 6572 { 6573 if (numEntries <= 0) 6574 return B_BAD_VALUE; 6575 6576 const uint8* address = (const uint8*)_address; 6577 6578 int32 count = 0; 6579 while (numBytes > 0 && count < numEntries) { 6580 physical_entry entry; 6581 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 6582 if (result < 0) { 6583 if (result != B_BUFFER_OVERFLOW) 6584 return result; 6585 } 6586 6587 if (entry.address >= (phys_addr_t)1 << 32) { 6588 panic("get_memory_map(): Address is greater 4 GB!"); 6589 return B_ERROR; 6590 } 6591 6592 table[count].address = entry.address; 6593 table[count++].size = entry.size; 6594 6595 address += entry.size; 6596 numBytes -= entry.size; 6597 } 6598 6599 // null-terminate the table, if possible 6600 if (count < numEntries) { 6601 table[count].address = 0; 6602 table[count].size = 0; 6603 } 6604 6605 return B_OK; 6606 } 6607 6608 6609 /*! The type of the \a physicalAddress parameter has changed from void* to 6610 phys_addr_t. 6611 */ 6612 extern "C" area_id 6613 __map_physical_memory_beos(const char* name, void* physicalAddress, 6614 size_t numBytes, uint32 addressSpec, uint32 protection, 6615 void** _virtualAddress) 6616 { 6617 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 6618 addressSpec, protection, _virtualAddress); 6619 } 6620 6621 6622 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 6623 we meddle with the \a lock parameter to force 32 bit. 6624 */ 6625 extern "C" area_id 6626 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 6627 size_t size, uint32 lock, uint32 protection) 6628 { 6629 switch (lock) { 6630 case B_NO_LOCK: 6631 break; 6632 case B_FULL_LOCK: 6633 case B_LAZY_LOCK: 6634 lock = B_32_BIT_FULL_LOCK; 6635 break; 6636 case B_CONTIGUOUS: 6637 lock = B_32_BIT_CONTIGUOUS; 6638 break; 6639 } 6640 6641 return __create_area_haiku(name, _address, addressSpec, size, lock, 6642 protection); 6643 } 6644 6645 6646 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 6647 "BASE"); 6648 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 6649 "map_physical_memory@", "BASE"); 6650 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 6651 "BASE"); 6652 6653 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6654 "get_memory_map@@", "1_ALPHA3"); 6655 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6656 "map_physical_memory@@", "1_ALPHA3"); 6657 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6658 "1_ALPHA3"); 6659 6660 6661 #else 6662 6663 6664 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 6665 "get_memory_map@@", "BASE"); 6666 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 6667 "map_physical_memory@@", "BASE"); 6668 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 6669 "BASE"); 6670 6671 6672 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 6673