1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/BitUtils.h> 51 #include <util/ThreadAutoLock.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_priv.h> 54 #include <vm/VMAddressSpace.h> 55 #include <vm/VMArea.h> 56 #include <vm/VMCache.h> 57 58 #include "VMAddressSpaceLocking.h" 59 #include "VMAnonymousCache.h" 60 #include "VMAnonymousNoSwapCache.h" 61 #include "IORequest.h" 62 63 64 //#define TRACE_VM 65 //#define TRACE_FAULTS 66 #ifdef TRACE_VM 67 # define TRACE(x) dprintf x 68 #else 69 # define TRACE(x) ; 70 #endif 71 #ifdef TRACE_FAULTS 72 # define FTRACE(x) dprintf x 73 #else 74 # define FTRACE(x) ; 75 #endif 76 77 78 namespace { 79 80 class AreaCacheLocking { 81 public: 82 inline bool Lock(VMCache* lockable) 83 { 84 return false; 85 } 86 87 inline void Unlock(VMCache* lockable) 88 { 89 vm_area_put_locked_cache(lockable); 90 } 91 }; 92 93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 94 public: 95 inline AreaCacheLocker(VMCache* cache = NULL) 96 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 97 { 98 } 99 100 inline AreaCacheLocker(VMArea* area) 101 : AutoLocker<VMCache, AreaCacheLocking>() 102 { 103 SetTo(area); 104 } 105 106 inline void SetTo(VMCache* cache, bool alreadyLocked) 107 { 108 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 109 } 110 111 inline void SetTo(VMArea* area) 112 { 113 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 114 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 115 } 116 }; 117 118 119 class VMCacheChainLocker { 120 public: 121 VMCacheChainLocker() 122 : 123 fTopCache(NULL), 124 fBottomCache(NULL) 125 { 126 } 127 128 VMCacheChainLocker(VMCache* topCache) 129 : 130 fTopCache(topCache), 131 fBottomCache(topCache) 132 { 133 } 134 135 ~VMCacheChainLocker() 136 { 137 Unlock(); 138 } 139 140 void SetTo(VMCache* topCache) 141 { 142 fTopCache = topCache; 143 fBottomCache = topCache; 144 145 if (topCache != NULL) 146 topCache->SetUserData(NULL); 147 } 148 149 VMCache* LockSourceCache() 150 { 151 if (fBottomCache == NULL || fBottomCache->source == NULL) 152 return NULL; 153 154 VMCache* previousCache = fBottomCache; 155 156 fBottomCache = fBottomCache->source; 157 fBottomCache->Lock(); 158 fBottomCache->AcquireRefLocked(); 159 fBottomCache->SetUserData(previousCache); 160 161 return fBottomCache; 162 } 163 164 void LockAllSourceCaches() 165 { 166 while (LockSourceCache() != NULL) { 167 } 168 } 169 170 void Unlock(VMCache* exceptCache = NULL) 171 { 172 if (fTopCache == NULL) 173 return; 174 175 // Unlock caches in source -> consumer direction. This is important to 176 // avoid double-locking and a reversal of locking order in case a cache 177 // is eligable for merging. 178 VMCache* cache = fBottomCache; 179 while (cache != NULL) { 180 VMCache* nextCache = (VMCache*)cache->UserData(); 181 if (cache != exceptCache) 182 cache->ReleaseRefAndUnlock(cache != fTopCache); 183 184 if (cache == fTopCache) 185 break; 186 187 cache = nextCache; 188 } 189 190 fTopCache = NULL; 191 fBottomCache = NULL; 192 } 193 194 void UnlockKeepRefs(bool keepTopCacheLocked) 195 { 196 if (fTopCache == NULL) 197 return; 198 199 VMCache* nextCache = fBottomCache; 200 VMCache* cache = NULL; 201 202 while (keepTopCacheLocked 203 ? nextCache != fTopCache : cache != fTopCache) { 204 cache = nextCache; 205 nextCache = (VMCache*)cache->UserData(); 206 cache->Unlock(cache != fTopCache); 207 } 208 } 209 210 void RelockCaches(bool topCacheLocked) 211 { 212 if (fTopCache == NULL) 213 return; 214 215 VMCache* nextCache = fTopCache; 216 VMCache* cache = NULL; 217 if (topCacheLocked) { 218 cache = nextCache; 219 nextCache = cache->source; 220 } 221 222 while (cache != fBottomCache && nextCache != NULL) { 223 VMCache* consumer = cache; 224 cache = nextCache; 225 nextCache = cache->source; 226 cache->Lock(); 227 cache->SetUserData(consumer); 228 } 229 } 230 231 private: 232 VMCache* fTopCache; 233 VMCache* fBottomCache; 234 }; 235 236 } // namespace 237 238 239 // The memory reserve an allocation of the certain priority must not touch. 240 static const size_t kMemoryReserveForPriority[] = { 241 VM_MEMORY_RESERVE_USER, // user 242 VM_MEMORY_RESERVE_SYSTEM, // system 243 0 // VIP 244 }; 245 246 247 ObjectCache* gPageMappingsObjectCache; 248 249 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 250 251 static off_t sAvailableMemory; 252 static off_t sNeededMemory; 253 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 254 static uint32 sPageFaults; 255 256 static VMPhysicalPageMapper* sPhysicalPageMapper; 257 258 #if DEBUG_CACHE_LIST 259 260 struct cache_info { 261 VMCache* cache; 262 addr_t page_count; 263 addr_t committed; 264 }; 265 266 static const int kCacheInfoTableCount = 100 * 1024; 267 static cache_info* sCacheInfoTable; 268 269 #endif // DEBUG_CACHE_LIST 270 271 272 // function declarations 273 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 274 bool addressSpaceCleanup); 275 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 276 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 277 static status_t map_backing_store(VMAddressSpace* addressSpace, 278 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 279 int protection, int protectionMax, int mapping, uint32 flags, 280 const virtual_address_restrictions* addressRestrictions, bool kernel, 281 VMArea** _area, void** _virtualAddress); 282 static void fix_protection(uint32* protection); 283 284 285 // #pragma mark - 286 287 288 #if VM_PAGE_FAULT_TRACING 289 290 namespace VMPageFaultTracing { 291 292 class PageFaultStart : public AbstractTraceEntry { 293 public: 294 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 295 : 296 fAddress(address), 297 fPC(pc), 298 fWrite(write), 299 fUser(user) 300 { 301 Initialized(); 302 } 303 304 virtual void AddDump(TraceOutput& out) 305 { 306 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 307 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 308 } 309 310 private: 311 addr_t fAddress; 312 addr_t fPC; 313 bool fWrite; 314 bool fUser; 315 }; 316 317 318 // page fault errors 319 enum { 320 PAGE_FAULT_ERROR_NO_AREA = 0, 321 PAGE_FAULT_ERROR_KERNEL_ONLY, 322 PAGE_FAULT_ERROR_WRITE_PROTECTED, 323 PAGE_FAULT_ERROR_READ_PROTECTED, 324 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 325 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 326 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 327 }; 328 329 330 class PageFaultError : public AbstractTraceEntry { 331 public: 332 PageFaultError(area_id area, status_t error) 333 : 334 fArea(area), 335 fError(error) 336 { 337 Initialized(); 338 } 339 340 virtual void AddDump(TraceOutput& out) 341 { 342 switch (fError) { 343 case PAGE_FAULT_ERROR_NO_AREA: 344 out.Print("page fault error: no area"); 345 break; 346 case PAGE_FAULT_ERROR_KERNEL_ONLY: 347 out.Print("page fault error: area: %ld, kernel only", fArea); 348 break; 349 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 350 out.Print("page fault error: area: %ld, write protected", 351 fArea); 352 break; 353 case PAGE_FAULT_ERROR_READ_PROTECTED: 354 out.Print("page fault error: area: %ld, read protected", fArea); 355 break; 356 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 357 out.Print("page fault error: area: %ld, execute protected", 358 fArea); 359 break; 360 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 361 out.Print("page fault error: kernel touching bad user memory"); 362 break; 363 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 364 out.Print("page fault error: no address space"); 365 break; 366 default: 367 out.Print("page fault error: area: %ld, error: %s", fArea, 368 strerror(fError)); 369 break; 370 } 371 } 372 373 private: 374 area_id fArea; 375 status_t fError; 376 }; 377 378 379 class PageFaultDone : public AbstractTraceEntry { 380 public: 381 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 382 vm_page* page) 383 : 384 fArea(area), 385 fTopCache(topCache), 386 fCache(cache), 387 fPage(page) 388 { 389 Initialized(); 390 } 391 392 virtual void AddDump(TraceOutput& out) 393 { 394 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 395 "page: %p", fArea, fTopCache, fCache, fPage); 396 } 397 398 private: 399 area_id fArea; 400 VMCache* fTopCache; 401 VMCache* fCache; 402 vm_page* fPage; 403 }; 404 405 } // namespace VMPageFaultTracing 406 407 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 408 #else 409 # define TPF(x) ; 410 #endif // VM_PAGE_FAULT_TRACING 411 412 413 // #pragma mark - 414 415 416 /*! The page's cache must be locked. 417 */ 418 static inline void 419 increment_page_wired_count(vm_page* page) 420 { 421 if (!page->IsMapped()) 422 atomic_add(&gMappedPagesCount, 1); 423 page->IncrementWiredCount(); 424 } 425 426 427 /*! The page's cache must be locked. 428 */ 429 static inline void 430 decrement_page_wired_count(vm_page* page) 431 { 432 page->DecrementWiredCount(); 433 if (!page->IsMapped()) 434 atomic_add(&gMappedPagesCount, -1); 435 } 436 437 438 static inline addr_t 439 virtual_page_address(VMArea* area, vm_page* page) 440 { 441 return area->Base() 442 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 443 } 444 445 446 static inline bool 447 is_page_in_area(VMArea* area, vm_page* page) 448 { 449 off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT); 450 return pageCacheOffsetBytes >= area->cache_offset 451 && pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size(); 452 } 453 454 455 //! You need to have the address space locked when calling this function 456 static VMArea* 457 lookup_area(VMAddressSpace* addressSpace, area_id id) 458 { 459 VMAreas::ReadLock(); 460 461 VMArea* area = VMAreas::LookupLocked(id); 462 if (area != NULL && area->address_space != addressSpace) 463 area = NULL; 464 465 VMAreas::ReadUnlock(); 466 467 return area; 468 } 469 470 471 static inline size_t 472 area_page_protections_size(size_t areaSize) 473 { 474 // In the page protections we store only the three user protections, 475 // so we use 4 bits per page. 476 return (areaSize / B_PAGE_SIZE + 1) / 2; 477 } 478 479 480 static status_t 481 allocate_area_page_protections(VMArea* area) 482 { 483 size_t bytes = area_page_protections_size(area->Size()); 484 area->page_protections = (uint8*)malloc_etc(bytes, 485 area->address_space == VMAddressSpace::Kernel() 486 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 487 if (area->page_protections == NULL) 488 return B_NO_MEMORY; 489 490 // init the page protections for all pages to that of the area 491 uint32 areaProtection = area->protection 492 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 493 memset(area->page_protections, areaProtection | (areaProtection << 4), 494 bytes); 495 return B_OK; 496 } 497 498 499 static inline void 500 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 501 { 502 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 503 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 504 uint8& entry = area->page_protections[pageIndex / 2]; 505 if (pageIndex % 2 == 0) 506 entry = (entry & 0xf0) | protection; 507 else 508 entry = (entry & 0x0f) | (protection << 4); 509 } 510 511 512 static inline uint32 513 get_area_page_protection(VMArea* area, addr_t pageAddress) 514 { 515 if (area->page_protections == NULL) 516 return area->protection; 517 518 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 519 uint32 protection = area->page_protections[pageIndex / 2]; 520 if (pageIndex % 2 == 0) 521 protection &= 0x0f; 522 else 523 protection >>= 4; 524 525 uint32 kernelProtection = 0; 526 if ((protection & B_READ_AREA) != 0) 527 kernelProtection |= B_KERNEL_READ_AREA; 528 if ((protection & B_WRITE_AREA) != 0) 529 kernelProtection |= B_KERNEL_WRITE_AREA; 530 531 // If this is a kernel area we return only the kernel flags. 532 if (area->address_space == VMAddressSpace::Kernel()) 533 return kernelProtection; 534 535 return protection | kernelProtection; 536 } 537 538 539 static inline uint8* 540 realloc_page_protections(uint8* pageProtections, size_t areaSize, 541 uint32 allocationFlags) 542 { 543 size_t bytes = area_page_protections_size(areaSize); 544 return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags); 545 } 546 547 548 /*! The caller must have reserved enough pages the translation map 549 implementation might need to map this page. 550 The page's cache must be locked. 551 */ 552 static status_t 553 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 554 vm_page_reservation* reservation) 555 { 556 VMTranslationMap* map = area->address_space->TranslationMap(); 557 558 bool wasMapped = page->IsMapped(); 559 560 if (area->wiring == B_NO_LOCK) { 561 DEBUG_PAGE_ACCESS_CHECK(page); 562 563 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 564 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 565 gPageMappingsObjectCache, 566 CACHE_DONT_WAIT_FOR_MEMORY 567 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 568 if (mapping == NULL) 569 return B_NO_MEMORY; 570 571 mapping->page = page; 572 mapping->area = area; 573 574 map->Lock(); 575 576 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 577 area->MemoryType(), reservation); 578 579 // insert mapping into lists 580 if (!page->IsMapped()) 581 atomic_add(&gMappedPagesCount, 1); 582 583 page->mappings.Add(mapping); 584 area->mappings.Add(mapping); 585 586 map->Unlock(); 587 } else { 588 DEBUG_PAGE_ACCESS_CHECK(page); 589 590 map->Lock(); 591 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 592 area->MemoryType(), reservation); 593 map->Unlock(); 594 595 increment_page_wired_count(page); 596 } 597 598 if (!wasMapped) { 599 // The page is mapped now, so we must not remain in the cached queue. 600 // It also makes sense to move it from the inactive to the active, since 601 // otherwise the page daemon wouldn't come to keep track of it (in idle 602 // mode) -- if the page isn't touched, it will be deactivated after a 603 // full iteration through the queue at the latest. 604 if (page->State() == PAGE_STATE_CACHED 605 || page->State() == PAGE_STATE_INACTIVE) { 606 vm_page_set_state(page, PAGE_STATE_ACTIVE); 607 } 608 } 609 610 return B_OK; 611 } 612 613 614 /*! If \a preserveModified is \c true, the caller must hold the lock of the 615 page's cache. 616 */ 617 static inline bool 618 unmap_page(VMArea* area, addr_t virtualAddress) 619 { 620 return area->address_space->TranslationMap()->UnmapPage(area, 621 virtualAddress, true); 622 } 623 624 625 /*! If \a preserveModified is \c true, the caller must hold the lock of all 626 mapped pages' caches. 627 */ 628 static inline void 629 unmap_pages(VMArea* area, addr_t base, size_t size) 630 { 631 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 632 } 633 634 635 static inline bool 636 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 637 { 638 if (address < area->Base()) { 639 offset = area->Base() - address; 640 if (offset >= size) 641 return false; 642 643 address = area->Base(); 644 size -= offset; 645 offset = 0; 646 if (size > area->Size()) 647 size = area->Size(); 648 649 return true; 650 } 651 652 offset = address - area->Base(); 653 if (offset >= area->Size()) 654 return false; 655 656 if (size >= area->Size() - offset) 657 size = area->Size() - offset; 658 659 return true; 660 } 661 662 663 /*! Cuts a piece out of an area. If the given cut range covers the complete 664 area, it is deleted. If it covers the beginning or the end, the area is 665 resized accordingly. If the range covers some part in the middle of the 666 area, it is split in two; in this case the second area is returned via 667 \a _secondArea (the variable is left untouched in the other cases). 668 The address space must be write locked. 669 The caller must ensure that no part of the given range is wired. 670 */ 671 static status_t 672 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 673 addr_t size, VMArea** _secondArea, bool kernel) 674 { 675 addr_t offset; 676 if (!intersect_area(area, address, size, offset)) 677 return B_OK; 678 679 // Is the area fully covered? 680 if (address == area->Base() && size == area->Size()) { 681 delete_area(addressSpace, area, false); 682 return B_OK; 683 } 684 685 int priority; 686 uint32 allocationFlags; 687 if (addressSpace == VMAddressSpace::Kernel()) { 688 priority = VM_PRIORITY_SYSTEM; 689 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 690 | HEAP_DONT_LOCK_KERNEL_SPACE; 691 } else { 692 priority = VM_PRIORITY_USER; 693 allocationFlags = 0; 694 } 695 696 VMCache* cache = vm_area_get_locked_cache(area); 697 VMCacheChainLocker cacheChainLocker(cache); 698 cacheChainLocker.LockAllSourceCaches(); 699 700 // If no one else uses the area's cache and it's an anonymous cache, we can 701 // resize or split it, too. 702 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 703 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 704 705 const addr_t oldSize = area->Size(); 706 707 // Cut the end only? 708 if (offset > 0 && size == area->Size() - offset) { 709 status_t error = addressSpace->ShrinkAreaTail(area, offset, 710 allocationFlags); 711 if (error != B_OK) 712 return error; 713 714 if (area->page_protections != NULL) { 715 uint8* newProtections = realloc_page_protections( 716 area->page_protections, area->Size(), allocationFlags); 717 718 if (newProtections == NULL) { 719 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 720 return B_NO_MEMORY; 721 } 722 723 area->page_protections = newProtections; 724 } 725 726 // unmap pages 727 unmap_pages(area, address, size); 728 729 if (onlyCacheUser) { 730 // Since VMCache::Resize() can temporarily drop the lock, we must 731 // unlock all lower caches to prevent locking order inversion. 732 cacheChainLocker.Unlock(cache); 733 cache->Resize(cache->virtual_base + offset, priority); 734 cache->ReleaseRefAndUnlock(); 735 } 736 737 return B_OK; 738 } 739 740 // Cut the beginning only? 741 if (area->Base() == address) { 742 uint8* newProtections = NULL; 743 if (area->page_protections != NULL) { 744 // Allocate all memory before shifting as the shift might lose some 745 // bits. 746 newProtections = realloc_page_protections(NULL, area->Size(), 747 allocationFlags); 748 749 if (newProtections == NULL) 750 return B_NO_MEMORY; 751 } 752 753 // resize the area 754 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 755 allocationFlags); 756 if (error != B_OK) { 757 if (newProtections != NULL) 758 free_etc(newProtections, allocationFlags); 759 return error; 760 } 761 762 if (area->page_protections != NULL) { 763 size_t oldBytes = area_page_protections_size(oldSize); 764 ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE; 765 bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4)); 766 767 size_t bytes = area_page_protections_size(area->Size()); 768 memcpy(newProtections, area->page_protections, bytes); 769 free_etc(area->page_protections, allocationFlags); 770 area->page_protections = newProtections; 771 } 772 773 // unmap pages 774 unmap_pages(area, address, size); 775 776 if (onlyCacheUser) { 777 // Since VMCache::Rebase() can temporarily drop the lock, we must 778 // unlock all lower caches to prevent locking order inversion. 779 cacheChainLocker.Unlock(cache); 780 cache->Rebase(cache->virtual_base + size, priority); 781 cache->ReleaseRefAndUnlock(); 782 } 783 area->cache_offset += size; 784 785 return B_OK; 786 } 787 788 // The tough part -- cut a piece out of the middle of the area. 789 // We do that by shrinking the area to the begin section and creating a 790 // new area for the end section. 791 addr_t firstNewSize = offset; 792 addr_t secondBase = address + size; 793 addr_t secondSize = area->Size() - offset - size; 794 795 // unmap pages 796 unmap_pages(area, address, area->Size() - firstNewSize); 797 798 // resize the area 799 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 800 allocationFlags); 801 if (error != B_OK) 802 return error; 803 804 uint8* areaNewProtections = NULL; 805 uint8* secondAreaNewProtections = NULL; 806 807 // Try to allocate the new memory before making some hard to reverse 808 // changes. 809 if (area->page_protections != NULL) { 810 areaNewProtections = realloc_page_protections(NULL, area->Size(), 811 allocationFlags); 812 secondAreaNewProtections = realloc_page_protections(NULL, secondSize, 813 allocationFlags); 814 815 if (areaNewProtections == NULL || secondAreaNewProtections == NULL) { 816 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 817 free_etc(areaNewProtections, allocationFlags); 818 free_etc(secondAreaNewProtections, allocationFlags); 819 return B_NO_MEMORY; 820 } 821 } 822 823 virtual_address_restrictions addressRestrictions = {}; 824 addressRestrictions.address = (void*)secondBase; 825 addressRestrictions.address_specification = B_EXACT_ADDRESS; 826 VMArea* secondArea; 827 828 if (onlyCacheUser) { 829 // Create a new cache for the second area. 830 VMCache* secondCache; 831 error = VMCacheFactory::CreateAnonymousCache(secondCache, 832 area->protection & B_OVERCOMMITTING_AREA, 0, 0, 833 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 834 if (error != B_OK) { 835 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 836 free_etc(areaNewProtections, allocationFlags); 837 free_etc(secondAreaNewProtections, allocationFlags); 838 return error; 839 } 840 841 secondCache->Lock(); 842 secondCache->temporary = cache->temporary; 843 secondCache->virtual_base = area->cache_offset; 844 secondCache->virtual_end = area->cache_offset + secondSize; 845 846 // Transfer the concerned pages from the first cache. 847 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 848 error = secondCache->Adopt(cache, adoptOffset, secondSize, 849 area->cache_offset); 850 851 if (error == B_OK) { 852 // Since VMCache::Resize() can temporarily drop the lock, we must 853 // unlock all lower caches to prevent locking order inversion. 854 cacheChainLocker.Unlock(cache); 855 cache->Resize(cache->virtual_base + firstNewSize, priority); 856 // Don't unlock the cache yet because we might have to resize it 857 // back. 858 859 // Map the second area. 860 error = map_backing_store(addressSpace, secondCache, 861 area->cache_offset, area->name, secondSize, area->wiring, 862 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 863 &addressRestrictions, kernel, &secondArea, NULL); 864 } 865 866 if (error != B_OK) { 867 // Restore the original cache. 868 cache->Resize(cache->virtual_base + oldSize, priority); 869 870 // Move the pages back. 871 status_t readoptStatus = cache->Adopt(secondCache, 872 area->cache_offset, secondSize, adoptOffset); 873 if (readoptStatus != B_OK) { 874 // Some (swap) pages have not been moved back and will be lost 875 // once the second cache is deleted. 876 panic("failed to restore cache range: %s", 877 strerror(readoptStatus)); 878 879 // TODO: Handle out of memory cases by freeing memory and 880 // retrying. 881 } 882 883 cache->ReleaseRefAndUnlock(); 884 secondCache->ReleaseRefAndUnlock(); 885 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 886 free_etc(areaNewProtections, allocationFlags); 887 free_etc(secondAreaNewProtections, allocationFlags); 888 return error; 889 } 890 891 // Now we can unlock it. 892 cache->ReleaseRefAndUnlock(); 893 secondCache->Unlock(); 894 } else { 895 error = map_backing_store(addressSpace, cache, area->cache_offset 896 + (secondBase - area->Base()), 897 area->name, secondSize, area->wiring, area->protection, 898 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 899 &addressRestrictions, kernel, &secondArea, NULL); 900 if (error != B_OK) { 901 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 902 free_etc(areaNewProtections, allocationFlags); 903 free_etc(secondAreaNewProtections, allocationFlags); 904 return error; 905 } 906 // We need a cache reference for the new area. 907 cache->AcquireRefLocked(); 908 } 909 910 if (area->page_protections != NULL) { 911 // Copy the protection bits of the first area. 912 size_t areaBytes = area_page_protections_size(area->Size()); 913 memcpy(areaNewProtections, area->page_protections, areaBytes); 914 uint8* areaOldProtections = area->page_protections; 915 area->page_protections = areaNewProtections; 916 917 // Shift the protection bits of the second area to the start of 918 // the old array. 919 size_t oldBytes = area_page_protections_size(oldSize); 920 addr_t secondAreaOffset = secondBase - area->Base(); 921 ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE; 922 bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4)); 923 924 // Copy the protection bits of the second area. 925 size_t secondAreaBytes = area_page_protections_size(secondSize); 926 memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes); 927 secondArea->page_protections = secondAreaNewProtections; 928 929 // We don't need this anymore. 930 free_etc(areaOldProtections, allocationFlags); 931 932 // Set the correct page protections for the second area. 933 VMTranslationMap* map = addressSpace->TranslationMap(); 934 map->Lock(); 935 for (VMCachePagesTree::Iterator it 936 = secondArea->cache->pages.GetIterator(); 937 vm_page* page = it.Next();) { 938 if (is_page_in_area(secondArea, page)) { 939 addr_t address = virtual_page_address(secondArea, page); 940 uint32 pageProtection 941 = get_area_page_protection(secondArea, address); 942 map->ProtectPage(secondArea, address, pageProtection); 943 } 944 } 945 map->Unlock(); 946 } 947 948 if (_secondArea != NULL) 949 *_secondArea = secondArea; 950 951 return B_OK; 952 } 953 954 955 /*! Deletes or cuts all areas in the given address range. 956 The address space must be write-locked. 957 The caller must ensure that no part of the given range is wired. 958 */ 959 static status_t 960 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 961 bool kernel) 962 { 963 size = PAGE_ALIGN(size); 964 965 // Check, whether the caller is allowed to modify the concerned areas. 966 if (!kernel) { 967 for (VMAddressSpace::AreaRangeIterator it 968 = addressSpace->GetAreaRangeIterator(address, size); 969 VMArea* area = it.Next();) { 970 971 if ((area->protection & B_KERNEL_AREA) != 0) { 972 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 973 "unmap range of kernel area %" B_PRId32 " (%s)\n", 974 team_get_current_team_id(), area->id, area->name); 975 return B_NOT_ALLOWED; 976 } 977 } 978 } 979 980 for (VMAddressSpace::AreaRangeIterator it 981 = addressSpace->GetAreaRangeIterator(address, size); 982 VMArea* area = it.Next();) { 983 984 status_t error = cut_area(addressSpace, area, address, size, NULL, 985 kernel); 986 if (error != B_OK) 987 return error; 988 // Failing after already messing with areas is ugly, but we 989 // can't do anything about it. 990 } 991 992 return B_OK; 993 } 994 995 996 static status_t 997 discard_area_range(VMArea* area, addr_t address, addr_t size) 998 { 999 addr_t offset; 1000 if (!intersect_area(area, address, size, offset)) 1001 return B_OK; 1002 1003 // If someone else uses the area's cache or it's not an anonymous cache, we 1004 // can't discard. 1005 VMCache* cache = vm_area_get_locked_cache(area); 1006 if (cache->areas != area || area->cache_next != NULL 1007 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 1008 return B_OK; 1009 } 1010 1011 VMCacheChainLocker cacheChainLocker(cache); 1012 cacheChainLocker.LockAllSourceCaches(); 1013 1014 unmap_pages(area, address, size); 1015 1016 // Since VMCache::Discard() can temporarily drop the lock, we must 1017 // unlock all lower caches to prevent locking order inversion. 1018 cacheChainLocker.Unlock(cache); 1019 cache->Discard(cache->virtual_base + offset, size); 1020 cache->ReleaseRefAndUnlock(); 1021 1022 return B_OK; 1023 } 1024 1025 1026 static status_t 1027 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 1028 bool kernel) 1029 { 1030 for (VMAddressSpace::AreaRangeIterator it 1031 = addressSpace->GetAreaRangeIterator(address, size); 1032 VMArea* area = it.Next();) { 1033 status_t error = discard_area_range(area, address, size); 1034 if (error != B_OK) 1035 return error; 1036 } 1037 1038 return B_OK; 1039 } 1040 1041 1042 /*! You need to hold the lock of the cache and the write lock of the address 1043 space when calling this function. 1044 Note, that in case of error your cache will be temporarily unlocked. 1045 If \a addressSpec is \c B_EXACT_ADDRESS and the 1046 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 1047 that no part of the specified address range (base \c *_virtualAddress, size 1048 \a size) is wired. The cache will also be temporarily unlocked. 1049 */ 1050 static status_t 1051 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 1052 const char* areaName, addr_t size, int wiring, int protection, 1053 int protectionMax, int mapping, 1054 uint32 flags, const virtual_address_restrictions* addressRestrictions, 1055 bool kernel, VMArea** _area, void** _virtualAddress) 1056 { 1057 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 1058 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 1059 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 1060 addressSpace, cache, addressRestrictions->address, offset, size, 1061 addressRestrictions->address_specification, wiring, protection, 1062 protectionMax, _area, areaName)); 1063 cache->AssertLocked(); 1064 1065 if (size == 0) { 1066 #if KDEBUG 1067 panic("map_backing_store(): called with size=0 for area '%s'!", 1068 areaName); 1069 #endif 1070 return B_BAD_VALUE; 1071 } 1072 if (offset < 0) 1073 return B_BAD_VALUE; 1074 1075 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 1076 | HEAP_DONT_LOCK_KERNEL_SPACE; 1077 int priority; 1078 if (addressSpace != VMAddressSpace::Kernel()) { 1079 priority = VM_PRIORITY_USER; 1080 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 1081 priority = VM_PRIORITY_VIP; 1082 allocationFlags |= HEAP_PRIORITY_VIP; 1083 } else 1084 priority = VM_PRIORITY_SYSTEM; 1085 1086 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 1087 allocationFlags); 1088 if (mapping != REGION_PRIVATE_MAP) 1089 area->protection_max = protectionMax & B_USER_PROTECTION; 1090 if (area == NULL) 1091 return B_NO_MEMORY; 1092 1093 status_t status; 1094 1095 // if this is a private map, we need to create a new cache 1096 // to handle the private copies of pages as they are written to 1097 VMCache* sourceCache = cache; 1098 if (mapping == REGION_PRIVATE_MAP) { 1099 VMCache* newCache; 1100 1101 // create an anonymous cache 1102 status = VMCacheFactory::CreateAnonymousCache(newCache, 1103 (protection & B_STACK_AREA) != 0 1104 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 1105 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 1106 if (status != B_OK) 1107 goto err1; 1108 1109 newCache->Lock(); 1110 newCache->temporary = 1; 1111 newCache->virtual_base = offset; 1112 newCache->virtual_end = offset + size; 1113 1114 cache->AddConsumer(newCache); 1115 1116 cache = newCache; 1117 } 1118 1119 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 1120 status = cache->SetMinimalCommitment(size, priority); 1121 if (status != B_OK) 1122 goto err2; 1123 } 1124 1125 // check to see if this address space has entered DELETE state 1126 if (addressSpace->IsBeingDeleted()) { 1127 // okay, someone is trying to delete this address space now, so we can't 1128 // insert the area, so back out 1129 status = B_BAD_TEAM_ID; 1130 goto err2; 1131 } 1132 1133 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1134 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1135 // temporarily unlock the current cache since it might be mapped to 1136 // some existing area, and unmap_address_range also needs to lock that 1137 // cache to delete the area. 1138 cache->Unlock(); 1139 status = unmap_address_range(addressSpace, 1140 (addr_t)addressRestrictions->address, size, kernel); 1141 cache->Lock(); 1142 if (status != B_OK) 1143 goto err2; 1144 } 1145 1146 status = addressSpace->InsertArea(area, size, addressRestrictions, 1147 allocationFlags, _virtualAddress); 1148 if (status == B_NO_MEMORY 1149 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1150 // Due to how many locks are held, we cannot wait here for space to be 1151 // freed up, but we can at least notify the low_resource handler. 1152 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1153 } 1154 if (status != B_OK) 1155 goto err2; 1156 1157 // attach the cache to the area 1158 area->cache = cache; 1159 area->cache_offset = offset; 1160 1161 // point the cache back to the area 1162 cache->InsertAreaLocked(area); 1163 if (mapping == REGION_PRIVATE_MAP) 1164 cache->Unlock(); 1165 1166 // insert the area in the global areas map 1167 VMAreas::Insert(area); 1168 1169 // grab a ref to the address space (the area holds this) 1170 addressSpace->Get(); 1171 1172 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1173 // cache, sourceCache, areaName, area); 1174 1175 *_area = area; 1176 return B_OK; 1177 1178 err2: 1179 if (mapping == REGION_PRIVATE_MAP) { 1180 // We created this cache, so we must delete it again. Note, that we 1181 // need to temporarily unlock the source cache or we'll otherwise 1182 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1183 sourceCache->Unlock(); 1184 cache->ReleaseRefAndUnlock(); 1185 sourceCache->Lock(); 1186 } 1187 err1: 1188 addressSpace->DeleteArea(area, allocationFlags); 1189 return status; 1190 } 1191 1192 1193 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1194 locker1, locker2). 1195 */ 1196 template<typename LockerType1, typename LockerType2> 1197 static inline bool 1198 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1199 { 1200 area->cache->AssertLocked(); 1201 1202 VMAreaUnwiredWaiter waiter; 1203 if (!area->AddWaiterIfWired(&waiter)) 1204 return false; 1205 1206 // unlock everything and wait 1207 if (locker1 != NULL) 1208 locker1->Unlock(); 1209 if (locker2 != NULL) 1210 locker2->Unlock(); 1211 1212 waiter.waitEntry.Wait(); 1213 1214 return true; 1215 } 1216 1217 1218 /*! Checks whether the given area has any wired ranges intersecting with the 1219 specified range and waits, if so. 1220 1221 When it has to wait, the function calls \c Unlock() on both \a locker1 1222 and \a locker2, if given. 1223 The area's top cache must be locked and must be unlocked as a side effect 1224 of calling \c Unlock() on either \a locker1 or \a locker2. 1225 1226 If the function does not have to wait it does not modify or unlock any 1227 object. 1228 1229 \param area The area to be checked. 1230 \param base The base address of the range to check. 1231 \param size The size of the address range to check. 1232 \param locker1 An object to be unlocked when before starting to wait (may 1233 be \c NULL). 1234 \param locker2 An object to be unlocked when before starting to wait (may 1235 be \c NULL). 1236 \return \c true, if the function had to wait, \c false otherwise. 1237 */ 1238 template<typename LockerType1, typename LockerType2> 1239 static inline bool 1240 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1241 LockerType1* locker1, LockerType2* locker2) 1242 { 1243 area->cache->AssertLocked(); 1244 1245 VMAreaUnwiredWaiter waiter; 1246 if (!area->AddWaiterIfWired(&waiter, base, size)) 1247 return false; 1248 1249 // unlock everything and wait 1250 if (locker1 != NULL) 1251 locker1->Unlock(); 1252 if (locker2 != NULL) 1253 locker2->Unlock(); 1254 1255 waiter.waitEntry.Wait(); 1256 1257 return true; 1258 } 1259 1260 1261 /*! Checks whether the given address space has any wired ranges intersecting 1262 with the specified range and waits, if so. 1263 1264 Similar to wait_if_area_range_is_wired(), with the following differences: 1265 - All areas intersecting with the range are checked (respectively all until 1266 one is found that contains a wired range intersecting with the given 1267 range). 1268 - The given address space must at least be read-locked and must be unlocked 1269 when \c Unlock() is called on \a locker. 1270 - None of the areas' caches are allowed to be locked. 1271 */ 1272 template<typename LockerType> 1273 static inline bool 1274 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1275 size_t size, LockerType* locker) 1276 { 1277 for (VMAddressSpace::AreaRangeIterator it 1278 = addressSpace->GetAreaRangeIterator(base, size); 1279 VMArea* area = it.Next();) { 1280 1281 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1282 1283 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1284 return true; 1285 } 1286 1287 return false; 1288 } 1289 1290 1291 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1292 It must be called in a situation where the kernel address space may be 1293 locked. 1294 */ 1295 status_t 1296 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1297 { 1298 AddressSpaceReadLocker locker; 1299 VMArea* area; 1300 status_t status = locker.SetFromArea(id, area); 1301 if (status != B_OK) 1302 return status; 1303 1304 if (area->page_protections == NULL) { 1305 status = allocate_area_page_protections(area); 1306 if (status != B_OK) 1307 return status; 1308 } 1309 1310 *cookie = (void*)area; 1311 return B_OK; 1312 } 1313 1314 1315 /*! This is a debug helper function that can only be used with very specific 1316 use cases. 1317 Sets protection for the given address range to the protection specified. 1318 If \a protection is 0 then the involved pages will be marked non-present 1319 in the translation map to cause a fault on access. The pages aren't 1320 actually unmapped however so that they can be marked present again with 1321 additional calls to this function. For this to work the area must be 1322 fully locked in memory so that the pages aren't otherwise touched. 1323 This function does not lock the kernel address space and needs to be 1324 supplied with a \a cookie retrieved from a successful call to 1325 vm_prepare_kernel_area_debug_protection(). 1326 */ 1327 status_t 1328 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1329 uint32 protection) 1330 { 1331 // check address range 1332 addr_t address = (addr_t)_address; 1333 size = PAGE_ALIGN(size); 1334 1335 if ((address % B_PAGE_SIZE) != 0 1336 || (addr_t)address + size < (addr_t)address 1337 || !IS_KERNEL_ADDRESS(address) 1338 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1339 return B_BAD_VALUE; 1340 } 1341 1342 // Translate the kernel protection to user protection as we only store that. 1343 if ((protection & B_KERNEL_READ_AREA) != 0) 1344 protection |= B_READ_AREA; 1345 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1346 protection |= B_WRITE_AREA; 1347 1348 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1349 VMTranslationMap* map = addressSpace->TranslationMap(); 1350 VMArea* area = (VMArea*)cookie; 1351 1352 addr_t offset = address - area->Base(); 1353 if (area->Size() - offset < size) { 1354 panic("protect range not fully within supplied area"); 1355 return B_BAD_VALUE; 1356 } 1357 1358 if (area->page_protections == NULL) { 1359 panic("area has no page protections"); 1360 return B_BAD_VALUE; 1361 } 1362 1363 // Invalidate the mapping entries so any access to them will fault or 1364 // restore the mapping entries unchanged so that lookup will success again. 1365 map->Lock(); 1366 map->DebugMarkRangePresent(address, address + size, protection != 0); 1367 map->Unlock(); 1368 1369 // And set the proper page protections so that the fault case will actually 1370 // fail and not simply try to map a new page. 1371 for (addr_t pageAddress = address; pageAddress < address + size; 1372 pageAddress += B_PAGE_SIZE) { 1373 set_area_page_protection(area, pageAddress, protection); 1374 } 1375 1376 return B_OK; 1377 } 1378 1379 1380 status_t 1381 vm_block_address_range(const char* name, void* address, addr_t size) 1382 { 1383 if (!arch_vm_supports_protection(0)) 1384 return B_NOT_SUPPORTED; 1385 1386 AddressSpaceWriteLocker locker; 1387 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1388 if (status != B_OK) 1389 return status; 1390 1391 VMAddressSpace* addressSpace = locker.AddressSpace(); 1392 1393 // create an anonymous cache 1394 VMCache* cache; 1395 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1396 VM_PRIORITY_SYSTEM); 1397 if (status != B_OK) 1398 return status; 1399 1400 cache->temporary = 1; 1401 cache->virtual_end = size; 1402 cache->Lock(); 1403 1404 VMArea* area; 1405 virtual_address_restrictions addressRestrictions = {}; 1406 addressRestrictions.address = address; 1407 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1408 status = map_backing_store(addressSpace, cache, 0, name, size, 1409 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1410 true, &area, NULL); 1411 if (status != B_OK) { 1412 cache->ReleaseRefAndUnlock(); 1413 return status; 1414 } 1415 1416 cache->Unlock(); 1417 area->cache_type = CACHE_TYPE_RAM; 1418 return area->id; 1419 } 1420 1421 1422 status_t 1423 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1424 { 1425 AddressSpaceWriteLocker locker(team); 1426 if (!locker.IsLocked()) 1427 return B_BAD_TEAM_ID; 1428 1429 VMAddressSpace* addressSpace = locker.AddressSpace(); 1430 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1431 addressSpace == VMAddressSpace::Kernel() 1432 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1433 } 1434 1435 1436 status_t 1437 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1438 addr_t size, uint32 flags) 1439 { 1440 if (size == 0) 1441 return B_BAD_VALUE; 1442 1443 AddressSpaceWriteLocker locker(team); 1444 if (!locker.IsLocked()) 1445 return B_BAD_TEAM_ID; 1446 1447 virtual_address_restrictions addressRestrictions = {}; 1448 addressRestrictions.address = *_address; 1449 addressRestrictions.address_specification = addressSpec; 1450 VMAddressSpace* addressSpace = locker.AddressSpace(); 1451 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1452 addressSpace == VMAddressSpace::Kernel() 1453 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1454 _address); 1455 } 1456 1457 1458 area_id 1459 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1460 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1461 const virtual_address_restrictions* virtualAddressRestrictions, 1462 const physical_address_restrictions* physicalAddressRestrictions, 1463 bool kernel, void** _address) 1464 { 1465 VMArea* area; 1466 VMCache* cache; 1467 vm_page* page = NULL; 1468 bool isStack = (protection & B_STACK_AREA) != 0; 1469 page_num_t guardPages; 1470 bool canOvercommit = false; 1471 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1472 ? VM_PAGE_ALLOC_CLEAR : 0; 1473 1474 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1475 team, name, size)); 1476 1477 size = PAGE_ALIGN(size); 1478 guardSize = PAGE_ALIGN(guardSize); 1479 guardPages = guardSize / B_PAGE_SIZE; 1480 1481 if (size == 0 || size < guardSize) 1482 return B_BAD_VALUE; 1483 if (!arch_vm_supports_protection(protection)) 1484 return B_NOT_SUPPORTED; 1485 1486 if (team == B_CURRENT_TEAM) 1487 team = VMAddressSpace::CurrentID(); 1488 if (team < 0) 1489 return B_BAD_TEAM_ID; 1490 1491 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1492 canOvercommit = true; 1493 1494 #ifdef DEBUG_KERNEL_STACKS 1495 if ((protection & B_KERNEL_STACK_AREA) != 0) 1496 isStack = true; 1497 #endif 1498 1499 // check parameters 1500 switch (virtualAddressRestrictions->address_specification) { 1501 case B_ANY_ADDRESS: 1502 case B_EXACT_ADDRESS: 1503 case B_BASE_ADDRESS: 1504 case B_ANY_KERNEL_ADDRESS: 1505 case B_ANY_KERNEL_BLOCK_ADDRESS: 1506 case B_RANDOMIZED_ANY_ADDRESS: 1507 case B_RANDOMIZED_BASE_ADDRESS: 1508 break; 1509 1510 default: 1511 return B_BAD_VALUE; 1512 } 1513 1514 // If low or high physical address restrictions are given, we force 1515 // B_CONTIGUOUS wiring, since only then we'll use 1516 // vm_page_allocate_page_run() which deals with those restrictions. 1517 if (physicalAddressRestrictions->low_address != 0 1518 || physicalAddressRestrictions->high_address != 0) { 1519 wiring = B_CONTIGUOUS; 1520 } 1521 1522 physical_address_restrictions stackPhysicalRestrictions; 1523 bool doReserveMemory = false; 1524 switch (wiring) { 1525 case B_NO_LOCK: 1526 break; 1527 case B_FULL_LOCK: 1528 case B_LAZY_LOCK: 1529 case B_CONTIGUOUS: 1530 doReserveMemory = true; 1531 break; 1532 case B_ALREADY_WIRED: 1533 break; 1534 case B_LOMEM: 1535 stackPhysicalRestrictions = *physicalAddressRestrictions; 1536 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1537 physicalAddressRestrictions = &stackPhysicalRestrictions; 1538 wiring = B_CONTIGUOUS; 1539 doReserveMemory = true; 1540 break; 1541 case B_32_BIT_FULL_LOCK: 1542 if (B_HAIKU_PHYSICAL_BITS <= 32 1543 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1544 wiring = B_FULL_LOCK; 1545 doReserveMemory = true; 1546 break; 1547 } 1548 // TODO: We don't really support this mode efficiently. Just fall 1549 // through for now ... 1550 case B_32_BIT_CONTIGUOUS: 1551 #if B_HAIKU_PHYSICAL_BITS > 32 1552 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1553 stackPhysicalRestrictions = *physicalAddressRestrictions; 1554 stackPhysicalRestrictions.high_address 1555 = (phys_addr_t)1 << 32; 1556 physicalAddressRestrictions = &stackPhysicalRestrictions; 1557 } 1558 #endif 1559 wiring = B_CONTIGUOUS; 1560 doReserveMemory = true; 1561 break; 1562 default: 1563 return B_BAD_VALUE; 1564 } 1565 1566 // Optimization: For a single-page contiguous allocation without low/high 1567 // memory restriction B_FULL_LOCK wiring suffices. 1568 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1569 && physicalAddressRestrictions->low_address == 0 1570 && physicalAddressRestrictions->high_address == 0) { 1571 wiring = B_FULL_LOCK; 1572 } 1573 1574 // For full lock or contiguous areas we're also going to map the pages and 1575 // thus need to reserve pages for the mapping backend upfront. 1576 addr_t reservedMapPages = 0; 1577 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1578 AddressSpaceWriteLocker locker; 1579 status_t status = locker.SetTo(team); 1580 if (status != B_OK) 1581 return status; 1582 1583 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1584 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1585 } 1586 1587 int priority; 1588 if (team != VMAddressSpace::KernelID()) 1589 priority = VM_PRIORITY_USER; 1590 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1591 priority = VM_PRIORITY_VIP; 1592 else 1593 priority = VM_PRIORITY_SYSTEM; 1594 1595 // Reserve memory before acquiring the address space lock. This reduces the 1596 // chances of failure, since while holding the write lock to the address 1597 // space (if it is the kernel address space that is), the low memory handler 1598 // won't be able to free anything for us. 1599 addr_t reservedMemory = 0; 1600 if (doReserveMemory) { 1601 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1602 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1603 return B_NO_MEMORY; 1604 reservedMemory = size; 1605 // TODO: We don't reserve the memory for the pages for the page 1606 // directories/tables. We actually need to do since we currently don't 1607 // reclaim them (and probably can't reclaim all of them anyway). Thus 1608 // there are actually less physical pages than there should be, which 1609 // can get the VM into trouble in low memory situations. 1610 } 1611 1612 AddressSpaceWriteLocker locker; 1613 VMAddressSpace* addressSpace; 1614 status_t status; 1615 1616 // For full lock areas reserve the pages before locking the address 1617 // space. E.g. block caches can't release their memory while we hold the 1618 // address space lock. 1619 page_num_t reservedPages = reservedMapPages; 1620 if (wiring == B_FULL_LOCK) 1621 reservedPages += size / B_PAGE_SIZE; 1622 1623 vm_page_reservation reservation; 1624 if (reservedPages > 0) { 1625 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1626 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1627 priority)) { 1628 reservedPages = 0; 1629 status = B_WOULD_BLOCK; 1630 goto err0; 1631 } 1632 } else 1633 vm_page_reserve_pages(&reservation, reservedPages, priority); 1634 } 1635 1636 if (wiring == B_CONTIGUOUS) { 1637 // we try to allocate the page run here upfront as this may easily 1638 // fail for obvious reasons 1639 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1640 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1641 if (page == NULL) { 1642 status = B_NO_MEMORY; 1643 goto err0; 1644 } 1645 } 1646 1647 // Lock the address space and, if B_EXACT_ADDRESS and 1648 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1649 // is not wired. 1650 do { 1651 status = locker.SetTo(team); 1652 if (status != B_OK) 1653 goto err1; 1654 1655 addressSpace = locker.AddressSpace(); 1656 } while (virtualAddressRestrictions->address_specification 1657 == B_EXACT_ADDRESS 1658 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1659 && wait_if_address_range_is_wired(addressSpace, 1660 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1661 1662 // create an anonymous cache 1663 // if it's a stack, make sure that two pages are available at least 1664 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1665 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1666 wiring == B_NO_LOCK, priority); 1667 if (status != B_OK) 1668 goto err1; 1669 1670 cache->temporary = 1; 1671 cache->virtual_end = size; 1672 cache->committed_size = reservedMemory; 1673 // TODO: This should be done via a method. 1674 reservedMemory = 0; 1675 1676 cache->Lock(); 1677 1678 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1679 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1680 virtualAddressRestrictions, kernel, &area, _address); 1681 1682 if (status != B_OK) { 1683 cache->ReleaseRefAndUnlock(); 1684 goto err1; 1685 } 1686 1687 locker.DegradeToReadLock(); 1688 1689 switch (wiring) { 1690 case B_NO_LOCK: 1691 case B_LAZY_LOCK: 1692 // do nothing - the pages are mapped in as needed 1693 break; 1694 1695 case B_FULL_LOCK: 1696 { 1697 // Allocate and map all pages for this area 1698 1699 off_t offset = 0; 1700 for (addr_t address = area->Base(); 1701 address < area->Base() + (area->Size() - 1); 1702 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1703 #ifdef DEBUG_KERNEL_STACKS 1704 # ifdef STACK_GROWS_DOWNWARDS 1705 if (isStack && address < area->Base() 1706 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1707 # else 1708 if (isStack && address >= area->Base() + area->Size() 1709 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1710 # endif 1711 continue; 1712 #endif 1713 vm_page* page = vm_page_allocate_page(&reservation, 1714 PAGE_STATE_WIRED | pageAllocFlags); 1715 cache->InsertPage(page, offset); 1716 map_page(area, page, address, protection, &reservation); 1717 1718 DEBUG_PAGE_ACCESS_END(page); 1719 } 1720 1721 break; 1722 } 1723 1724 case B_ALREADY_WIRED: 1725 { 1726 // The pages should already be mapped. This is only really useful 1727 // during boot time. Find the appropriate vm_page objects and stick 1728 // them in the cache object. 1729 VMTranslationMap* map = addressSpace->TranslationMap(); 1730 off_t offset = 0; 1731 1732 if (!gKernelStartup) 1733 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1734 1735 map->Lock(); 1736 1737 for (addr_t virtualAddress = area->Base(); 1738 virtualAddress < area->Base() + (area->Size() - 1); 1739 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1740 phys_addr_t physicalAddress; 1741 uint32 flags; 1742 status = map->Query(virtualAddress, &physicalAddress, &flags); 1743 if (status < B_OK) { 1744 panic("looking up mapping failed for va 0x%lx\n", 1745 virtualAddress); 1746 } 1747 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1748 if (page == NULL) { 1749 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1750 "\n", physicalAddress); 1751 } 1752 1753 DEBUG_PAGE_ACCESS_START(page); 1754 1755 cache->InsertPage(page, offset); 1756 increment_page_wired_count(page); 1757 vm_page_set_state(page, PAGE_STATE_WIRED); 1758 page->busy = false; 1759 1760 DEBUG_PAGE_ACCESS_END(page); 1761 } 1762 1763 map->Unlock(); 1764 break; 1765 } 1766 1767 case B_CONTIGUOUS: 1768 { 1769 // We have already allocated our continuous pages run, so we can now 1770 // just map them in the address space 1771 VMTranslationMap* map = addressSpace->TranslationMap(); 1772 phys_addr_t physicalAddress 1773 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1774 addr_t virtualAddress = area->Base(); 1775 off_t offset = 0; 1776 1777 map->Lock(); 1778 1779 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1780 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1781 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1782 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1783 if (page == NULL) 1784 panic("couldn't lookup physical page just allocated\n"); 1785 1786 status = map->Map(virtualAddress, physicalAddress, protection, 1787 area->MemoryType(), &reservation); 1788 if (status < B_OK) 1789 panic("couldn't map physical page in page run\n"); 1790 1791 cache->InsertPage(page, offset); 1792 increment_page_wired_count(page); 1793 1794 DEBUG_PAGE_ACCESS_END(page); 1795 } 1796 1797 map->Unlock(); 1798 break; 1799 } 1800 1801 default: 1802 break; 1803 } 1804 1805 cache->Unlock(); 1806 1807 if (reservedPages > 0) 1808 vm_page_unreserve_pages(&reservation); 1809 1810 TRACE(("vm_create_anonymous_area: done\n")); 1811 1812 area->cache_type = CACHE_TYPE_RAM; 1813 return area->id; 1814 1815 err1: 1816 if (wiring == B_CONTIGUOUS) { 1817 // we had reserved the area space upfront... 1818 phys_addr_t pageNumber = page->physical_page_number; 1819 int32 i; 1820 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1821 page = vm_lookup_page(pageNumber); 1822 if (page == NULL) 1823 panic("couldn't lookup physical page just allocated\n"); 1824 1825 vm_page_set_state(page, PAGE_STATE_FREE); 1826 } 1827 } 1828 1829 err0: 1830 if (reservedPages > 0) 1831 vm_page_unreserve_pages(&reservation); 1832 if (reservedMemory > 0) 1833 vm_unreserve_memory(reservedMemory); 1834 1835 return status; 1836 } 1837 1838 1839 area_id 1840 vm_map_physical_memory(team_id team, const char* name, void** _address, 1841 uint32 addressSpec, addr_t size, uint32 protection, 1842 phys_addr_t physicalAddress, bool alreadyWired) 1843 { 1844 VMArea* area; 1845 VMCache* cache; 1846 addr_t mapOffset; 1847 1848 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1849 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1850 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1851 addressSpec, size, protection, physicalAddress)); 1852 1853 if (!arch_vm_supports_protection(protection)) 1854 return B_NOT_SUPPORTED; 1855 1856 AddressSpaceWriteLocker locker(team); 1857 if (!locker.IsLocked()) 1858 return B_BAD_TEAM_ID; 1859 1860 // if the physical address is somewhat inside a page, 1861 // move the actual area down to align on a page boundary 1862 mapOffset = physicalAddress % B_PAGE_SIZE; 1863 size += mapOffset; 1864 physicalAddress -= mapOffset; 1865 1866 size = PAGE_ALIGN(size); 1867 1868 // create a device cache 1869 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1870 if (status != B_OK) 1871 return status; 1872 1873 cache->virtual_end = size; 1874 1875 cache->Lock(); 1876 1877 virtual_address_restrictions addressRestrictions = {}; 1878 addressRestrictions.address = *_address; 1879 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1880 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1881 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1882 true, &area, _address); 1883 1884 if (status < B_OK) 1885 cache->ReleaseRefLocked(); 1886 1887 cache->Unlock(); 1888 1889 if (status == B_OK) { 1890 // set requested memory type -- use uncached, if not given 1891 uint32 memoryType = addressSpec & B_MTR_MASK; 1892 if (memoryType == 0) 1893 memoryType = B_MTR_UC; 1894 1895 area->SetMemoryType(memoryType); 1896 1897 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1898 if (status != B_OK) 1899 delete_area(locker.AddressSpace(), area, false); 1900 } 1901 1902 if (status != B_OK) 1903 return status; 1904 1905 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1906 1907 if (alreadyWired) { 1908 // The area is already mapped, but possibly not with the right 1909 // memory type. 1910 map->Lock(); 1911 map->ProtectArea(area, area->protection); 1912 map->Unlock(); 1913 } else { 1914 // Map the area completely. 1915 1916 // reserve pages needed for the mapping 1917 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1918 area->Base() + (size - 1)); 1919 vm_page_reservation reservation; 1920 vm_page_reserve_pages(&reservation, reservePages, 1921 team == VMAddressSpace::KernelID() 1922 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1923 1924 map->Lock(); 1925 1926 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1927 map->Map(area->Base() + offset, physicalAddress + offset, 1928 protection, area->MemoryType(), &reservation); 1929 } 1930 1931 map->Unlock(); 1932 1933 vm_page_unreserve_pages(&reservation); 1934 } 1935 1936 // modify the pointer returned to be offset back into the new area 1937 // the same way the physical address in was offset 1938 *_address = (void*)((addr_t)*_address + mapOffset); 1939 1940 area->cache_type = CACHE_TYPE_DEVICE; 1941 return area->id; 1942 } 1943 1944 1945 /*! Don't use! 1946 TODO: This function was introduced to map physical page vecs to 1947 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1948 use a device cache and does not track vm_page::wired_count! 1949 */ 1950 area_id 1951 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1952 uint32 addressSpec, addr_t* _size, uint32 protection, 1953 struct generic_io_vec* vecs, uint32 vecCount) 1954 { 1955 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1956 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1957 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1958 addressSpec, _size, protection, vecs, vecCount)); 1959 1960 if (!arch_vm_supports_protection(protection) 1961 || (addressSpec & B_MTR_MASK) != 0) { 1962 return B_NOT_SUPPORTED; 1963 } 1964 1965 AddressSpaceWriteLocker locker(team); 1966 if (!locker.IsLocked()) 1967 return B_BAD_TEAM_ID; 1968 1969 if (vecCount == 0) 1970 return B_BAD_VALUE; 1971 1972 addr_t size = 0; 1973 for (uint32 i = 0; i < vecCount; i++) { 1974 if (vecs[i].base % B_PAGE_SIZE != 0 1975 || vecs[i].length % B_PAGE_SIZE != 0) { 1976 return B_BAD_VALUE; 1977 } 1978 1979 size += vecs[i].length; 1980 } 1981 1982 // create a device cache 1983 VMCache* cache; 1984 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1985 if (result != B_OK) 1986 return result; 1987 1988 cache->virtual_end = size; 1989 1990 cache->Lock(); 1991 1992 VMArea* area; 1993 virtual_address_restrictions addressRestrictions = {}; 1994 addressRestrictions.address = *_address; 1995 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1996 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1997 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1998 &addressRestrictions, true, &area, _address); 1999 2000 if (result != B_OK) 2001 cache->ReleaseRefLocked(); 2002 2003 cache->Unlock(); 2004 2005 if (result != B_OK) 2006 return result; 2007 2008 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2009 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 2010 area->Base() + (size - 1)); 2011 2012 vm_page_reservation reservation; 2013 vm_page_reserve_pages(&reservation, reservePages, 2014 team == VMAddressSpace::KernelID() 2015 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2016 map->Lock(); 2017 2018 uint32 vecIndex = 0; 2019 size_t vecOffset = 0; 2020 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2021 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 2022 vecOffset = 0; 2023 vecIndex++; 2024 } 2025 2026 if (vecIndex >= vecCount) 2027 break; 2028 2029 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 2030 protection, area->MemoryType(), &reservation); 2031 2032 vecOffset += B_PAGE_SIZE; 2033 } 2034 2035 map->Unlock(); 2036 vm_page_unreserve_pages(&reservation); 2037 2038 if (_size != NULL) 2039 *_size = size; 2040 2041 area->cache_type = CACHE_TYPE_DEVICE; 2042 return area->id; 2043 } 2044 2045 2046 area_id 2047 vm_create_null_area(team_id team, const char* name, void** address, 2048 uint32 addressSpec, addr_t size, uint32 flags) 2049 { 2050 size = PAGE_ALIGN(size); 2051 2052 // Lock the address space and, if B_EXACT_ADDRESS and 2053 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 2054 // is not wired. 2055 AddressSpaceWriteLocker locker; 2056 do { 2057 if (locker.SetTo(team) != B_OK) 2058 return B_BAD_TEAM_ID; 2059 } while (addressSpec == B_EXACT_ADDRESS 2060 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 2061 && wait_if_address_range_is_wired(locker.AddressSpace(), 2062 (addr_t)*address, size, &locker)); 2063 2064 // create a null cache 2065 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 2066 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 2067 VMCache* cache; 2068 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 2069 if (status != B_OK) 2070 return status; 2071 2072 cache->temporary = 1; 2073 cache->virtual_end = size; 2074 2075 cache->Lock(); 2076 2077 VMArea* area; 2078 virtual_address_restrictions addressRestrictions = {}; 2079 addressRestrictions.address = *address; 2080 addressRestrictions.address_specification = addressSpec; 2081 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 2082 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 2083 REGION_NO_PRIVATE_MAP, flags, 2084 &addressRestrictions, true, &area, address); 2085 2086 if (status < B_OK) { 2087 cache->ReleaseRefAndUnlock(); 2088 return status; 2089 } 2090 2091 cache->Unlock(); 2092 2093 area->cache_type = CACHE_TYPE_NULL; 2094 return area->id; 2095 } 2096 2097 2098 /*! Creates the vnode cache for the specified \a vnode. 2099 The vnode has to be marked busy when calling this function. 2100 */ 2101 status_t 2102 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2103 { 2104 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2105 } 2106 2107 2108 /*! \a cache must be locked. The area's address space must be read-locked. 2109 */ 2110 static void 2111 pre_map_area_pages(VMArea* area, VMCache* cache, 2112 vm_page_reservation* reservation) 2113 { 2114 addr_t baseAddress = area->Base(); 2115 addr_t cacheOffset = area->cache_offset; 2116 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2117 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 2118 2119 for (VMCachePagesTree::Iterator it 2120 = cache->pages.GetIterator(firstPage, true, true); 2121 vm_page* page = it.Next();) { 2122 if (page->cache_offset >= endPage) 2123 break; 2124 2125 // skip busy and inactive pages 2126 if (page->busy || page->usage_count == 0) 2127 continue; 2128 2129 DEBUG_PAGE_ACCESS_START(page); 2130 map_page(area, page, 2131 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2132 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2133 DEBUG_PAGE_ACCESS_END(page); 2134 } 2135 } 2136 2137 2138 /*! Will map the file specified by \a fd to an area in memory. 2139 The file will be mirrored beginning at the specified \a offset. The 2140 \a offset and \a size arguments have to be page aligned. 2141 */ 2142 static area_id 2143 _vm_map_file(team_id team, const char* name, void** _address, 2144 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2145 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2146 { 2147 // TODO: for binary files, we want to make sure that they get the 2148 // copy of a file at a given time, ie. later changes should not 2149 // make it into the mapped copy -- this will need quite some changes 2150 // to be done in a nice way 2151 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2152 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2153 2154 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2155 size = PAGE_ALIGN(size); 2156 2157 if (mapping == REGION_NO_PRIVATE_MAP) 2158 protection |= B_SHARED_AREA; 2159 if (addressSpec != B_EXACT_ADDRESS) 2160 unmapAddressRange = false; 2161 2162 uint32 mappingFlags = 0; 2163 if (unmapAddressRange) 2164 mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE; 2165 2166 if (fd < 0) { 2167 virtual_address_restrictions virtualRestrictions = {}; 2168 virtualRestrictions.address = *_address; 2169 virtualRestrictions.address_specification = addressSpec; 2170 physical_address_restrictions physicalRestrictions = {}; 2171 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2172 mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2173 _address); 2174 } 2175 2176 // get the open flags of the FD 2177 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2178 if (descriptor == NULL) 2179 return EBADF; 2180 int32 openMode = descriptor->open_mode; 2181 put_fd(descriptor); 2182 2183 // The FD must open for reading at any rate. For shared mapping with write 2184 // access, additionally the FD must be open for writing. 2185 if ((openMode & O_ACCMODE) == O_WRONLY 2186 || (mapping == REGION_NO_PRIVATE_MAP 2187 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2188 && (openMode & O_ACCMODE) == O_RDONLY)) { 2189 return EACCES; 2190 } 2191 2192 uint32 protectionMax = 0; 2193 if (mapping == REGION_NO_PRIVATE_MAP) { 2194 if ((openMode & O_ACCMODE) == O_RDWR) 2195 protectionMax = protection | B_USER_PROTECTION; 2196 else 2197 protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA); 2198 } else if (mapping == REGION_PRIVATE_MAP) { 2199 // For privately mapped read-only regions, skip committing memory. 2200 // (If protections are changed later on, memory will be committed then.) 2201 if ((protection & B_WRITE_AREA) == 0) 2202 mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY; 2203 } 2204 2205 // get the vnode for the object, this also grabs a ref to it 2206 struct vnode* vnode = NULL; 2207 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2208 if (status < B_OK) 2209 return status; 2210 VnodePutter vnodePutter(vnode); 2211 2212 // If we're going to pre-map pages, we need to reserve the pages needed by 2213 // the mapping backend upfront. 2214 page_num_t reservedPreMapPages = 0; 2215 vm_page_reservation reservation; 2216 if ((protection & B_READ_AREA) != 0) { 2217 AddressSpaceWriteLocker locker; 2218 status = locker.SetTo(team); 2219 if (status != B_OK) 2220 return status; 2221 2222 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2223 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2224 2225 locker.Unlock(); 2226 2227 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2228 team == VMAddressSpace::KernelID() 2229 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2230 } 2231 2232 struct PageUnreserver { 2233 PageUnreserver(vm_page_reservation* reservation) 2234 : 2235 fReservation(reservation) 2236 { 2237 } 2238 2239 ~PageUnreserver() 2240 { 2241 if (fReservation != NULL) 2242 vm_page_unreserve_pages(fReservation); 2243 } 2244 2245 vm_page_reservation* fReservation; 2246 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2247 2248 // Lock the address space and, if the specified address range shall be 2249 // unmapped, ensure it is not wired. 2250 AddressSpaceWriteLocker locker; 2251 do { 2252 if (locker.SetTo(team) != B_OK) 2253 return B_BAD_TEAM_ID; 2254 } while (unmapAddressRange 2255 && wait_if_address_range_is_wired(locker.AddressSpace(), 2256 (addr_t)*_address, size, &locker)); 2257 2258 // TODO: this only works for file systems that use the file cache 2259 VMCache* cache; 2260 status = vfs_get_vnode_cache(vnode, &cache, false); 2261 if (status < B_OK) 2262 return status; 2263 2264 cache->Lock(); 2265 2266 VMArea* area; 2267 virtual_address_restrictions addressRestrictions = {}; 2268 addressRestrictions.address = *_address; 2269 addressRestrictions.address_specification = addressSpec; 2270 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2271 0, protection, protectionMax, mapping, mappingFlags, 2272 &addressRestrictions, kernel, &area, _address); 2273 2274 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2275 // map_backing_store() cannot know we no longer need the ref 2276 cache->ReleaseRefLocked(); 2277 } 2278 2279 if (status == B_OK && (protection & B_READ_AREA) != 0) 2280 pre_map_area_pages(area, cache, &reservation); 2281 2282 cache->Unlock(); 2283 2284 if (status == B_OK) { 2285 // TODO: this probably deserves a smarter solution, ie. don't always 2286 // prefetch stuff, and also, probably don't trigger it at this place. 2287 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2288 // prefetches at max 10 MB starting from "offset" 2289 } 2290 2291 if (status != B_OK) 2292 return status; 2293 2294 area->cache_type = CACHE_TYPE_VNODE; 2295 return area->id; 2296 } 2297 2298 2299 area_id 2300 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2301 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2302 int fd, off_t offset) 2303 { 2304 if (!arch_vm_supports_protection(protection)) 2305 return B_NOT_SUPPORTED; 2306 2307 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2308 mapping, unmapAddressRange, fd, offset, true); 2309 } 2310 2311 2312 VMCache* 2313 vm_area_get_locked_cache(VMArea* area) 2314 { 2315 rw_lock_read_lock(&sAreaCacheLock); 2316 2317 while (true) { 2318 VMCache* cache = area->cache; 2319 2320 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2321 // cache has been deleted 2322 rw_lock_read_lock(&sAreaCacheLock); 2323 continue; 2324 } 2325 2326 rw_lock_read_lock(&sAreaCacheLock); 2327 2328 if (cache == area->cache) { 2329 cache->AcquireRefLocked(); 2330 rw_lock_read_unlock(&sAreaCacheLock); 2331 return cache; 2332 } 2333 2334 // the cache changed in the meantime 2335 cache->Unlock(); 2336 } 2337 } 2338 2339 2340 void 2341 vm_area_put_locked_cache(VMCache* cache) 2342 { 2343 cache->ReleaseRefAndUnlock(); 2344 } 2345 2346 2347 area_id 2348 vm_clone_area(team_id team, const char* name, void** address, 2349 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2350 bool kernel) 2351 { 2352 VMArea* newArea = NULL; 2353 VMArea* sourceArea; 2354 2355 // Check whether the source area exists and is cloneable. If so, mark it 2356 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2357 { 2358 AddressSpaceWriteLocker locker; 2359 status_t status = locker.SetFromArea(sourceID, sourceArea); 2360 if (status != B_OK) 2361 return status; 2362 2363 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2364 return B_NOT_ALLOWED; 2365 2366 sourceArea->protection |= B_SHARED_AREA; 2367 protection |= B_SHARED_AREA; 2368 } 2369 2370 // Now lock both address spaces and actually do the cloning. 2371 2372 MultiAddressSpaceLocker locker; 2373 VMAddressSpace* sourceAddressSpace; 2374 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2375 if (status != B_OK) 2376 return status; 2377 2378 VMAddressSpace* targetAddressSpace; 2379 status = locker.AddTeam(team, true, &targetAddressSpace); 2380 if (status != B_OK) 2381 return status; 2382 2383 status = locker.Lock(); 2384 if (status != B_OK) 2385 return status; 2386 2387 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2388 if (sourceArea == NULL) 2389 return B_BAD_VALUE; 2390 2391 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2392 return B_NOT_ALLOWED; 2393 2394 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2395 2396 if (!kernel && sourceAddressSpace != targetAddressSpace 2397 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2398 #if KDEBUG 2399 Team* team = thread_get_current_thread()->team; 2400 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2401 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2402 #endif 2403 status = B_NOT_ALLOWED; 2404 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2405 status = B_NOT_ALLOWED; 2406 } else { 2407 virtual_address_restrictions addressRestrictions = {}; 2408 addressRestrictions.address = *address; 2409 addressRestrictions.address_specification = addressSpec; 2410 status = map_backing_store(targetAddressSpace, cache, 2411 sourceArea->cache_offset, name, sourceArea->Size(), 2412 sourceArea->wiring, protection, sourceArea->protection_max, 2413 mapping, 0, &addressRestrictions, 2414 kernel, &newArea, address); 2415 } 2416 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2417 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2418 // to create a new cache, and has therefore already acquired a reference 2419 // to the source cache - but otherwise it has no idea that we need 2420 // one. 2421 cache->AcquireRefLocked(); 2422 } 2423 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2424 // we need to map in everything at this point 2425 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2426 // we don't have actual pages to map but a physical area 2427 VMTranslationMap* map 2428 = sourceArea->address_space->TranslationMap(); 2429 map->Lock(); 2430 2431 phys_addr_t physicalAddress; 2432 uint32 oldProtection; 2433 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2434 2435 map->Unlock(); 2436 2437 map = targetAddressSpace->TranslationMap(); 2438 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2439 newArea->Base() + (newArea->Size() - 1)); 2440 2441 vm_page_reservation reservation; 2442 vm_page_reserve_pages(&reservation, reservePages, 2443 targetAddressSpace == VMAddressSpace::Kernel() 2444 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2445 map->Lock(); 2446 2447 for (addr_t offset = 0; offset < newArea->Size(); 2448 offset += B_PAGE_SIZE) { 2449 map->Map(newArea->Base() + offset, physicalAddress + offset, 2450 protection, newArea->MemoryType(), &reservation); 2451 } 2452 2453 map->Unlock(); 2454 vm_page_unreserve_pages(&reservation); 2455 } else { 2456 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2457 size_t reservePages = map->MaxPagesNeededToMap( 2458 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2459 vm_page_reservation reservation; 2460 vm_page_reserve_pages(&reservation, reservePages, 2461 targetAddressSpace == VMAddressSpace::Kernel() 2462 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2463 2464 // map in all pages from source 2465 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2466 vm_page* page = it.Next();) { 2467 if (!page->busy) { 2468 DEBUG_PAGE_ACCESS_START(page); 2469 map_page(newArea, page, 2470 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2471 - newArea->cache_offset), 2472 protection, &reservation); 2473 DEBUG_PAGE_ACCESS_END(page); 2474 } 2475 } 2476 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2477 // ensuring that! 2478 2479 vm_page_unreserve_pages(&reservation); 2480 } 2481 } 2482 if (status == B_OK) 2483 newArea->cache_type = sourceArea->cache_type; 2484 2485 vm_area_put_locked_cache(cache); 2486 2487 if (status < B_OK) 2488 return status; 2489 2490 return newArea->id; 2491 } 2492 2493 2494 /*! Deletes the specified area of the given address space. 2495 2496 The address space must be write-locked. 2497 The caller must ensure that the area does not have any wired ranges. 2498 2499 \param addressSpace The address space containing the area. 2500 \param area The area to be deleted. 2501 \param deletingAddressSpace \c true, if the address space is in the process 2502 of being deleted. 2503 */ 2504 static void 2505 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2506 bool deletingAddressSpace) 2507 { 2508 ASSERT(!area->IsWired()); 2509 2510 VMAreas::Remove(area); 2511 2512 // At this point the area is removed from the global hash table, but 2513 // still exists in the area list. 2514 2515 // Unmap the virtual address space the area occupied. 2516 { 2517 // We need to lock the complete cache chain. 2518 VMCache* topCache = vm_area_get_locked_cache(area); 2519 VMCacheChainLocker cacheChainLocker(topCache); 2520 cacheChainLocker.LockAllSourceCaches(); 2521 2522 // If the area's top cache is a temporary cache and the area is the only 2523 // one referencing it (besides us currently holding a second reference), 2524 // the unmapping code doesn't need to care about preserving the accessed 2525 // and dirty flags of the top cache page mappings. 2526 bool ignoreTopCachePageFlags 2527 = topCache->temporary && topCache->RefCount() == 2; 2528 2529 area->address_space->TranslationMap()->UnmapArea(area, 2530 deletingAddressSpace, ignoreTopCachePageFlags); 2531 } 2532 2533 if (!area->cache->temporary) 2534 area->cache->WriteModified(); 2535 2536 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2537 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2538 2539 arch_vm_unset_memory_type(area); 2540 addressSpace->RemoveArea(area, allocationFlags); 2541 addressSpace->Put(); 2542 2543 area->cache->RemoveArea(area); 2544 area->cache->ReleaseRef(); 2545 2546 addressSpace->DeleteArea(area, allocationFlags); 2547 } 2548 2549 2550 status_t 2551 vm_delete_area(team_id team, area_id id, bool kernel) 2552 { 2553 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2554 team, id)); 2555 2556 // lock the address space and make sure the area isn't wired 2557 AddressSpaceWriteLocker locker; 2558 VMArea* area; 2559 AreaCacheLocker cacheLocker; 2560 2561 do { 2562 status_t status = locker.SetFromArea(team, id, area); 2563 if (status != B_OK) 2564 return status; 2565 2566 cacheLocker.SetTo(area); 2567 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2568 2569 cacheLocker.Unlock(); 2570 2571 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2572 return B_NOT_ALLOWED; 2573 2574 delete_area(locker.AddressSpace(), area, false); 2575 return B_OK; 2576 } 2577 2578 2579 /*! Creates a new cache on top of given cache, moves all areas from 2580 the old cache to the new one, and changes the protection of all affected 2581 areas' pages to read-only. If requested, wired pages are moved up to the 2582 new cache and copies are added to the old cache in their place. 2583 Preconditions: 2584 - The given cache must be locked. 2585 - All of the cache's areas' address spaces must be read locked. 2586 - Either the cache must not have any wired ranges or a page reservation for 2587 all wired pages must be provided, so they can be copied. 2588 2589 \param lowerCache The cache on top of which a new cache shall be created. 2590 \param wiredPagesReservation If \c NULL there must not be any wired pages 2591 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2592 has wired page. The wired pages are copied in this case. 2593 */ 2594 static status_t 2595 vm_copy_on_write_area(VMCache* lowerCache, 2596 vm_page_reservation* wiredPagesReservation) 2597 { 2598 VMCache* upperCache; 2599 2600 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2601 2602 // We need to separate the cache from its areas. The cache goes one level 2603 // deeper and we create a new cache inbetween. 2604 2605 // create an anonymous cache 2606 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2607 lowerCache->GuardSize() / B_PAGE_SIZE, 2608 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2609 VM_PRIORITY_USER); 2610 if (status != B_OK) 2611 return status; 2612 2613 upperCache->Lock(); 2614 2615 upperCache->temporary = 1; 2616 upperCache->virtual_base = lowerCache->virtual_base; 2617 upperCache->virtual_end = lowerCache->virtual_end; 2618 2619 // transfer the lower cache areas to the upper cache 2620 rw_lock_write_lock(&sAreaCacheLock); 2621 upperCache->TransferAreas(lowerCache); 2622 rw_lock_write_unlock(&sAreaCacheLock); 2623 2624 lowerCache->AddConsumer(upperCache); 2625 2626 // We now need to remap all pages from all of the cache's areas read-only, 2627 // so that a copy will be created on next write access. If there are wired 2628 // pages, we keep their protection, move them to the upper cache and create 2629 // copies for the lower cache. 2630 if (wiredPagesReservation != NULL) { 2631 // We need to handle wired pages -- iterate through the cache's pages. 2632 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2633 vm_page* page = it.Next();) { 2634 if (page->WiredCount() > 0) { 2635 // allocate a new page and copy the wired one 2636 vm_page* copiedPage = vm_page_allocate_page( 2637 wiredPagesReservation, PAGE_STATE_ACTIVE); 2638 2639 vm_memcpy_physical_page( 2640 copiedPage->physical_page_number * B_PAGE_SIZE, 2641 page->physical_page_number * B_PAGE_SIZE); 2642 2643 // move the wired page to the upper cache (note: removing is OK 2644 // with the SplayTree iterator) and insert the copy 2645 upperCache->MovePage(page); 2646 lowerCache->InsertPage(copiedPage, 2647 page->cache_offset * B_PAGE_SIZE); 2648 2649 DEBUG_PAGE_ACCESS_END(copiedPage); 2650 } else { 2651 // Change the protection of this page in all areas. 2652 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2653 tempArea = tempArea->cache_next) { 2654 if (!is_page_in_area(tempArea, page)) 2655 continue; 2656 2657 // The area must be readable in the same way it was 2658 // previously writable. 2659 addr_t address = virtual_page_address(tempArea, page); 2660 uint32 protection = 0; 2661 uint32 pageProtection = get_area_page_protection(tempArea, address); 2662 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2663 protection |= B_KERNEL_READ_AREA; 2664 if ((pageProtection & B_READ_AREA) != 0) 2665 protection |= B_READ_AREA; 2666 2667 VMTranslationMap* map 2668 = tempArea->address_space->TranslationMap(); 2669 map->Lock(); 2670 map->ProtectPage(tempArea, address, protection); 2671 map->Unlock(); 2672 } 2673 } 2674 } 2675 } else { 2676 ASSERT(lowerCache->WiredPagesCount() == 0); 2677 2678 // just change the protection of all areas 2679 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2680 tempArea = tempArea->cache_next) { 2681 if (tempArea->page_protections != NULL) { 2682 // Change the protection of all pages in this area. 2683 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2684 map->Lock(); 2685 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2686 vm_page* page = it.Next();) { 2687 if (!is_page_in_area(tempArea, page)) 2688 continue; 2689 2690 // The area must be readable in the same way it was 2691 // previously writable. 2692 addr_t address = virtual_page_address(tempArea, page); 2693 uint32 protection = 0; 2694 uint32 pageProtection = get_area_page_protection(tempArea, address); 2695 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2696 protection |= B_KERNEL_READ_AREA; 2697 if ((pageProtection & B_READ_AREA) != 0) 2698 protection |= B_READ_AREA; 2699 2700 map->ProtectPage(tempArea, address, protection); 2701 } 2702 map->Unlock(); 2703 continue; 2704 } 2705 // The area must be readable in the same way it was previously 2706 // writable. 2707 uint32 protection = 0; 2708 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0) 2709 protection |= B_KERNEL_READ_AREA; 2710 if ((tempArea->protection & B_READ_AREA) != 0) 2711 protection |= B_READ_AREA; 2712 2713 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2714 map->Lock(); 2715 map->ProtectArea(tempArea, protection); 2716 map->Unlock(); 2717 } 2718 } 2719 2720 vm_area_put_locked_cache(upperCache); 2721 2722 return B_OK; 2723 } 2724 2725 2726 area_id 2727 vm_copy_area(team_id team, const char* name, void** _address, 2728 uint32 addressSpec, area_id sourceID) 2729 { 2730 // Do the locking: target address space, all address spaces associated with 2731 // the source cache, and the cache itself. 2732 MultiAddressSpaceLocker locker; 2733 VMAddressSpace* targetAddressSpace; 2734 VMCache* cache; 2735 VMArea* source; 2736 AreaCacheLocker cacheLocker; 2737 status_t status; 2738 bool sharedArea; 2739 2740 page_num_t wiredPages = 0; 2741 vm_page_reservation wiredPagesReservation; 2742 2743 bool restart; 2744 do { 2745 restart = false; 2746 2747 locker.Unset(); 2748 status = locker.AddTeam(team, true, &targetAddressSpace); 2749 if (status == B_OK) { 2750 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2751 &cache); 2752 } 2753 if (status != B_OK) 2754 return status; 2755 2756 cacheLocker.SetTo(cache, true); // already locked 2757 2758 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2759 2760 page_num_t oldWiredPages = wiredPages; 2761 wiredPages = 0; 2762 2763 // If the source area isn't shared, count the number of wired pages in 2764 // the cache and reserve as many pages. 2765 if (!sharedArea) { 2766 wiredPages = cache->WiredPagesCount(); 2767 2768 if (wiredPages > oldWiredPages) { 2769 cacheLocker.Unlock(); 2770 locker.Unlock(); 2771 2772 if (oldWiredPages > 0) 2773 vm_page_unreserve_pages(&wiredPagesReservation); 2774 2775 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2776 VM_PRIORITY_USER); 2777 2778 restart = true; 2779 } 2780 } else if (oldWiredPages > 0) 2781 vm_page_unreserve_pages(&wiredPagesReservation); 2782 } while (restart); 2783 2784 // unreserve pages later 2785 struct PagesUnreserver { 2786 PagesUnreserver(vm_page_reservation* reservation) 2787 : 2788 fReservation(reservation) 2789 { 2790 } 2791 2792 ~PagesUnreserver() 2793 { 2794 if (fReservation != NULL) 2795 vm_page_unreserve_pages(fReservation); 2796 } 2797 2798 private: 2799 vm_page_reservation* fReservation; 2800 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2801 2802 bool writableCopy 2803 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2804 uint8* targetPageProtections = NULL; 2805 2806 if (source->page_protections != NULL) { 2807 size_t bytes = area_page_protections_size(source->Size()); 2808 targetPageProtections = (uint8*)malloc_etc(bytes, 2809 (source->address_space == VMAddressSpace::Kernel() 2810 || targetAddressSpace == VMAddressSpace::Kernel()) 2811 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2812 if (targetPageProtections == NULL) 2813 return B_NO_MEMORY; 2814 2815 memcpy(targetPageProtections, source->page_protections, bytes); 2816 2817 if (!writableCopy) { 2818 for (size_t i = 0; i < bytes; i++) { 2819 if ((targetPageProtections[i] 2820 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2821 writableCopy = true; 2822 break; 2823 } 2824 } 2825 } 2826 } 2827 2828 if (addressSpec == B_CLONE_ADDRESS) { 2829 addressSpec = B_EXACT_ADDRESS; 2830 *_address = (void*)source->Base(); 2831 } 2832 2833 // First, create a cache on top of the source area, respectively use the 2834 // existing one, if this is a shared area. 2835 2836 VMArea* target; 2837 virtual_address_restrictions addressRestrictions = {}; 2838 addressRestrictions.address = *_address; 2839 addressRestrictions.address_specification = addressSpec; 2840 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2841 name, source->Size(), source->wiring, source->protection, 2842 source->protection_max, 2843 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2844 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2845 &addressRestrictions, true, &target, _address); 2846 if (status < B_OK) { 2847 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2848 return status; 2849 } 2850 2851 if (targetPageProtections != NULL) 2852 target->page_protections = targetPageProtections; 2853 2854 if (sharedArea) { 2855 // The new area uses the old area's cache, but map_backing_store() 2856 // hasn't acquired a ref. So we have to do that now. 2857 cache->AcquireRefLocked(); 2858 } 2859 2860 // If the source area is writable, we need to move it one layer up as well 2861 2862 if (!sharedArea) { 2863 if (writableCopy) { 2864 // TODO: do something more useful if this fails! 2865 if (vm_copy_on_write_area(cache, 2866 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2867 panic("vm_copy_on_write_area() failed!\n"); 2868 } 2869 } 2870 } 2871 2872 // we return the ID of the newly created area 2873 return target->id; 2874 } 2875 2876 2877 status_t 2878 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2879 bool kernel) 2880 { 2881 fix_protection(&newProtection); 2882 2883 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2884 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2885 2886 if (!arch_vm_supports_protection(newProtection)) 2887 return B_NOT_SUPPORTED; 2888 2889 bool becomesWritable 2890 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2891 2892 // lock address spaces and cache 2893 MultiAddressSpaceLocker locker; 2894 VMCache* cache; 2895 VMArea* area; 2896 status_t status; 2897 AreaCacheLocker cacheLocker; 2898 bool isWritable; 2899 2900 bool restart; 2901 do { 2902 restart = false; 2903 2904 locker.Unset(); 2905 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2906 if (status != B_OK) 2907 return status; 2908 2909 cacheLocker.SetTo(cache, true); // already locked 2910 2911 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2912 || (area->protection & B_KERNEL_AREA) != 0)) { 2913 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2914 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2915 " (%s)\n", team, newProtection, areaID, area->name); 2916 return B_NOT_ALLOWED; 2917 } 2918 if (!kernel && area->protection_max != 0 2919 && (newProtection & area->protection_max) 2920 != (newProtection & B_USER_PROTECTION)) { 2921 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2922 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2923 "area %" B_PRId32 " (%s)\n", team, newProtection, 2924 area->protection_max, areaID, area->name); 2925 return B_NOT_ALLOWED; 2926 } 2927 2928 if (team != VMAddressSpace::KernelID() 2929 && area->address_space->ID() != team) { 2930 // unless you're the kernel, you are only allowed to set 2931 // the protection of your own areas 2932 return B_NOT_ALLOWED; 2933 } 2934 2935 if (area->protection == newProtection) 2936 return B_OK; 2937 2938 isWritable 2939 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2940 2941 // Make sure the area (respectively, if we're going to call 2942 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2943 // wired ranges. 2944 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2945 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2946 otherArea = otherArea->cache_next) { 2947 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2948 restart = true; 2949 break; 2950 } 2951 } 2952 } else { 2953 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2954 restart = true; 2955 } 2956 } while (restart); 2957 2958 bool changePageProtection = true; 2959 bool changeTopCachePagesOnly = false; 2960 2961 if (isWritable && !becomesWritable) { 2962 // writable -> !writable 2963 2964 if (cache->source != NULL && cache->temporary) { 2965 if (cache->CountWritableAreas(area) == 0) { 2966 // Since this cache now lives from the pages in its source cache, 2967 // we can change the cache's commitment to take only those pages 2968 // into account that really are in this cache. 2969 2970 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2971 team == VMAddressSpace::KernelID() 2972 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2973 2974 // TODO: we may be able to join with our source cache, if 2975 // count == 0 2976 } 2977 } 2978 2979 // If only the writability changes, we can just remap the pages of the 2980 // top cache, since the pages of lower caches are mapped read-only 2981 // anyway. That's advantageous only, if the number of pages in the cache 2982 // is significantly smaller than the number of pages in the area, 2983 // though. 2984 if (newProtection 2985 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2986 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2987 changeTopCachePagesOnly = true; 2988 } 2989 } else if (!isWritable && becomesWritable) { 2990 // !writable -> writable 2991 2992 if (!cache->consumers.IsEmpty()) { 2993 // There are consumers -- we have to insert a new cache. Fortunately 2994 // vm_copy_on_write_area() does everything that's needed. 2995 changePageProtection = false; 2996 status = vm_copy_on_write_area(cache, NULL); 2997 } else { 2998 // No consumers, so we don't need to insert a new one. 2999 if (cache->source != NULL && cache->temporary) { 3000 // the cache's commitment must contain all possible pages 3001 status = cache->Commit(cache->virtual_end - cache->virtual_base, 3002 team == VMAddressSpace::KernelID() 3003 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 3004 } 3005 3006 if (status == B_OK && cache->source != NULL) { 3007 // There's a source cache, hence we can't just change all pages' 3008 // protection or we might allow writing into pages belonging to 3009 // a lower cache. 3010 changeTopCachePagesOnly = true; 3011 } 3012 } 3013 } else { 3014 // we don't have anything special to do in all other cases 3015 } 3016 3017 if (status == B_OK) { 3018 // remap existing pages in this cache 3019 if (changePageProtection) { 3020 VMTranslationMap* map = area->address_space->TranslationMap(); 3021 map->Lock(); 3022 3023 if (changeTopCachePagesOnly) { 3024 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 3025 page_num_t lastPageOffset 3026 = firstPageOffset + area->Size() / B_PAGE_SIZE; 3027 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3028 vm_page* page = it.Next();) { 3029 if (page->cache_offset >= firstPageOffset 3030 && page->cache_offset <= lastPageOffset) { 3031 addr_t address = virtual_page_address(area, page); 3032 map->ProtectPage(area, address, newProtection); 3033 } 3034 } 3035 } else 3036 map->ProtectArea(area, newProtection); 3037 3038 map->Unlock(); 3039 } 3040 3041 area->protection = newProtection; 3042 } 3043 3044 return status; 3045 } 3046 3047 3048 status_t 3049 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 3050 { 3051 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 3052 if (addressSpace == NULL) 3053 return B_BAD_TEAM_ID; 3054 3055 VMTranslationMap* map = addressSpace->TranslationMap(); 3056 3057 map->Lock(); 3058 uint32 dummyFlags; 3059 status_t status = map->Query(vaddr, paddr, &dummyFlags); 3060 map->Unlock(); 3061 3062 addressSpace->Put(); 3063 return status; 3064 } 3065 3066 3067 /*! The page's cache must be locked. 3068 */ 3069 bool 3070 vm_test_map_modification(vm_page* page) 3071 { 3072 if (page->modified) 3073 return true; 3074 3075 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3076 vm_page_mapping* mapping; 3077 while ((mapping = iterator.Next()) != NULL) { 3078 VMArea* area = mapping->area; 3079 VMTranslationMap* map = area->address_space->TranslationMap(); 3080 3081 phys_addr_t physicalAddress; 3082 uint32 flags; 3083 map->Lock(); 3084 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 3085 map->Unlock(); 3086 3087 if ((flags & PAGE_MODIFIED) != 0) 3088 return true; 3089 } 3090 3091 return false; 3092 } 3093 3094 3095 /*! The page's cache must be locked. 3096 */ 3097 void 3098 vm_clear_map_flags(vm_page* page, uint32 flags) 3099 { 3100 if ((flags & PAGE_ACCESSED) != 0) 3101 page->accessed = false; 3102 if ((flags & PAGE_MODIFIED) != 0) 3103 page->modified = false; 3104 3105 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3106 vm_page_mapping* mapping; 3107 while ((mapping = iterator.Next()) != NULL) { 3108 VMArea* area = mapping->area; 3109 VMTranslationMap* map = area->address_space->TranslationMap(); 3110 3111 map->Lock(); 3112 map->ClearFlags(virtual_page_address(area, page), flags); 3113 map->Unlock(); 3114 } 3115 } 3116 3117 3118 /*! Removes all mappings from a page. 3119 After you've called this function, the page is unmapped from memory and 3120 the page's \c accessed and \c modified flags have been updated according 3121 to the state of the mappings. 3122 The page's cache must be locked. 3123 */ 3124 void 3125 vm_remove_all_page_mappings(vm_page* page) 3126 { 3127 while (vm_page_mapping* mapping = page->mappings.Head()) { 3128 VMArea* area = mapping->area; 3129 VMTranslationMap* map = area->address_space->TranslationMap(); 3130 addr_t address = virtual_page_address(area, page); 3131 map->UnmapPage(area, address, false); 3132 } 3133 } 3134 3135 3136 int32 3137 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 3138 { 3139 int32 count = 0; 3140 3141 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3142 vm_page_mapping* mapping; 3143 while ((mapping = iterator.Next()) != NULL) { 3144 VMArea* area = mapping->area; 3145 VMTranslationMap* map = area->address_space->TranslationMap(); 3146 3147 bool modified; 3148 if (map->ClearAccessedAndModified(area, 3149 virtual_page_address(area, page), false, modified)) { 3150 count++; 3151 } 3152 3153 page->modified |= modified; 3154 } 3155 3156 3157 if (page->accessed) { 3158 count++; 3159 page->accessed = false; 3160 } 3161 3162 return count; 3163 } 3164 3165 3166 /*! Removes all mappings of a page and/or clears the accessed bits of the 3167 mappings. 3168 The function iterates through the page mappings and removes them until 3169 encountering one that has been accessed. From then on it will continue to 3170 iterate, but only clear the accessed flag of the mapping. The page's 3171 \c modified bit will be updated accordingly, the \c accessed bit will be 3172 cleared. 3173 \return The number of mapping accessed bits encountered, including the 3174 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3175 of the page have been removed. 3176 */ 3177 int32 3178 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3179 { 3180 ASSERT(page->WiredCount() == 0); 3181 3182 if (page->accessed) 3183 return vm_clear_page_mapping_accessed_flags(page); 3184 3185 while (vm_page_mapping* mapping = page->mappings.Head()) { 3186 VMArea* area = mapping->area; 3187 VMTranslationMap* map = area->address_space->TranslationMap(); 3188 addr_t address = virtual_page_address(area, page); 3189 bool modified = false; 3190 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3191 page->accessed = true; 3192 page->modified |= modified; 3193 return vm_clear_page_mapping_accessed_flags(page); 3194 } 3195 page->modified |= modified; 3196 } 3197 3198 return 0; 3199 } 3200 3201 3202 static int 3203 display_mem(int argc, char** argv) 3204 { 3205 bool physical = false; 3206 addr_t copyAddress; 3207 int32 displayWidth; 3208 int32 itemSize; 3209 int32 num = -1; 3210 addr_t address; 3211 int i = 1, j; 3212 3213 if (argc > 1 && argv[1][0] == '-') { 3214 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3215 physical = true; 3216 i++; 3217 } else 3218 i = 99; 3219 } 3220 3221 if (argc < i + 1 || argc > i + 2) { 3222 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3223 "\tdl - 8 bytes\n" 3224 "\tdw - 4 bytes\n" 3225 "\tds - 2 bytes\n" 3226 "\tdb - 1 byte\n" 3227 "\tstring - a whole string\n" 3228 " -p or --physical only allows memory from a single page to be " 3229 "displayed.\n"); 3230 return 0; 3231 } 3232 3233 address = parse_expression(argv[i]); 3234 3235 if (argc > i + 1) 3236 num = parse_expression(argv[i + 1]); 3237 3238 // build the format string 3239 if (strcmp(argv[0], "db") == 0) { 3240 itemSize = 1; 3241 displayWidth = 16; 3242 } else if (strcmp(argv[0], "ds") == 0) { 3243 itemSize = 2; 3244 displayWidth = 8; 3245 } else if (strcmp(argv[0], "dw") == 0) { 3246 itemSize = 4; 3247 displayWidth = 4; 3248 } else if (strcmp(argv[0], "dl") == 0) { 3249 itemSize = 8; 3250 displayWidth = 2; 3251 } else if (strcmp(argv[0], "string") == 0) { 3252 itemSize = 1; 3253 displayWidth = -1; 3254 } else { 3255 kprintf("display_mem called in an invalid way!\n"); 3256 return 0; 3257 } 3258 3259 if (num <= 0) 3260 num = displayWidth; 3261 3262 void* physicalPageHandle = NULL; 3263 3264 if (physical) { 3265 int32 offset = address & (B_PAGE_SIZE - 1); 3266 if (num * itemSize + offset > B_PAGE_SIZE) { 3267 num = (B_PAGE_SIZE - offset) / itemSize; 3268 kprintf("NOTE: number of bytes has been cut to page size\n"); 3269 } 3270 3271 address = ROUNDDOWN(address, B_PAGE_SIZE); 3272 3273 if (vm_get_physical_page_debug(address, ©Address, 3274 &physicalPageHandle) != B_OK) { 3275 kprintf("getting the hardware page failed."); 3276 return 0; 3277 } 3278 3279 address += offset; 3280 copyAddress += offset; 3281 } else 3282 copyAddress = address; 3283 3284 if (!strcmp(argv[0], "string")) { 3285 kprintf("%p \"", (char*)copyAddress); 3286 3287 // string mode 3288 for (i = 0; true; i++) { 3289 char c; 3290 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3291 != B_OK 3292 || c == '\0') { 3293 break; 3294 } 3295 3296 if (c == '\n') 3297 kprintf("\\n"); 3298 else if (c == '\t') 3299 kprintf("\\t"); 3300 else { 3301 if (!isprint(c)) 3302 c = '.'; 3303 3304 kprintf("%c", c); 3305 } 3306 } 3307 3308 kprintf("\"\n"); 3309 } else { 3310 // number mode 3311 for (i = 0; i < num; i++) { 3312 uint64 value; 3313 3314 if ((i % displayWidth) == 0) { 3315 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3316 if (i != 0) 3317 kprintf("\n"); 3318 3319 kprintf("[0x%lx] ", address + i * itemSize); 3320 3321 for (j = 0; j < displayed; j++) { 3322 char c; 3323 if (debug_memcpy(B_CURRENT_TEAM, &c, 3324 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3325 displayed = j; 3326 break; 3327 } 3328 if (!isprint(c)) 3329 c = '.'; 3330 3331 kprintf("%c", c); 3332 } 3333 if (num > displayWidth) { 3334 // make sure the spacing in the last line is correct 3335 for (j = displayed; j < displayWidth * itemSize; j++) 3336 kprintf(" "); 3337 } 3338 kprintf(" "); 3339 } 3340 3341 if (debug_memcpy(B_CURRENT_TEAM, &value, 3342 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3343 kprintf("read fault"); 3344 break; 3345 } 3346 3347 switch (itemSize) { 3348 case 1: 3349 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3350 break; 3351 case 2: 3352 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3353 break; 3354 case 4: 3355 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3356 break; 3357 case 8: 3358 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3359 break; 3360 } 3361 } 3362 3363 kprintf("\n"); 3364 } 3365 3366 if (physical) { 3367 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3368 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3369 } 3370 return 0; 3371 } 3372 3373 3374 static void 3375 dump_cache_tree_recursively(VMCache* cache, int level, 3376 VMCache* highlightCache) 3377 { 3378 // print this cache 3379 for (int i = 0; i < level; i++) 3380 kprintf(" "); 3381 if (cache == highlightCache) 3382 kprintf("%p <--\n", cache); 3383 else 3384 kprintf("%p\n", cache); 3385 3386 // recursively print its consumers 3387 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3388 VMCache* consumer = it.Next();) { 3389 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3390 } 3391 } 3392 3393 3394 static int 3395 dump_cache_tree(int argc, char** argv) 3396 { 3397 if (argc != 2 || !strcmp(argv[1], "--help")) { 3398 kprintf("usage: %s <address>\n", argv[0]); 3399 return 0; 3400 } 3401 3402 addr_t address = parse_expression(argv[1]); 3403 if (address == 0) 3404 return 0; 3405 3406 VMCache* cache = (VMCache*)address; 3407 VMCache* root = cache; 3408 3409 // find the root cache (the transitive source) 3410 while (root->source != NULL) 3411 root = root->source; 3412 3413 dump_cache_tree_recursively(root, 0, cache); 3414 3415 return 0; 3416 } 3417 3418 3419 const char* 3420 vm_cache_type_to_string(int32 type) 3421 { 3422 switch (type) { 3423 case CACHE_TYPE_RAM: 3424 return "RAM"; 3425 case CACHE_TYPE_DEVICE: 3426 return "device"; 3427 case CACHE_TYPE_VNODE: 3428 return "vnode"; 3429 case CACHE_TYPE_NULL: 3430 return "null"; 3431 3432 default: 3433 return "unknown"; 3434 } 3435 } 3436 3437 3438 #if DEBUG_CACHE_LIST 3439 3440 static void 3441 update_cache_info_recursively(VMCache* cache, cache_info& info) 3442 { 3443 info.page_count += cache->page_count; 3444 if (cache->type == CACHE_TYPE_RAM) 3445 info.committed += cache->committed_size; 3446 3447 // recurse 3448 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3449 VMCache* consumer = it.Next();) { 3450 update_cache_info_recursively(consumer, info); 3451 } 3452 } 3453 3454 3455 static int 3456 cache_info_compare_page_count(const void* _a, const void* _b) 3457 { 3458 const cache_info* a = (const cache_info*)_a; 3459 const cache_info* b = (const cache_info*)_b; 3460 if (a->page_count == b->page_count) 3461 return 0; 3462 return a->page_count < b->page_count ? 1 : -1; 3463 } 3464 3465 3466 static int 3467 cache_info_compare_committed(const void* _a, const void* _b) 3468 { 3469 const cache_info* a = (const cache_info*)_a; 3470 const cache_info* b = (const cache_info*)_b; 3471 if (a->committed == b->committed) 3472 return 0; 3473 return a->committed < b->committed ? 1 : -1; 3474 } 3475 3476 3477 static void 3478 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3479 { 3480 for (int i = 0; i < level; i++) 3481 kprintf(" "); 3482 3483 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3484 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3485 cache->virtual_base, cache->virtual_end, cache->page_count); 3486 3487 if (level == 0) 3488 kprintf("/%lu", info.page_count); 3489 3490 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3491 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3492 3493 if (level == 0) 3494 kprintf("/%lu", info.committed); 3495 } 3496 3497 // areas 3498 if (cache->areas != NULL) { 3499 VMArea* area = cache->areas; 3500 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3501 area->name, area->address_space->ID()); 3502 3503 while (area->cache_next != NULL) { 3504 area = area->cache_next; 3505 kprintf(", %" B_PRId32, area->id); 3506 } 3507 } 3508 3509 kputs("\n"); 3510 3511 // recurse 3512 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3513 VMCache* consumer = it.Next();) { 3514 dump_caches_recursively(consumer, info, level + 1); 3515 } 3516 } 3517 3518 3519 static int 3520 dump_caches(int argc, char** argv) 3521 { 3522 if (sCacheInfoTable == NULL) { 3523 kprintf("No cache info table!\n"); 3524 return 0; 3525 } 3526 3527 bool sortByPageCount = true; 3528 3529 for (int32 i = 1; i < argc; i++) { 3530 if (strcmp(argv[i], "-c") == 0) { 3531 sortByPageCount = false; 3532 } else { 3533 print_debugger_command_usage(argv[0]); 3534 return 0; 3535 } 3536 } 3537 3538 uint32 totalCount = 0; 3539 uint32 rootCount = 0; 3540 off_t totalCommitted = 0; 3541 page_num_t totalPages = 0; 3542 3543 VMCache* cache = gDebugCacheList; 3544 while (cache) { 3545 totalCount++; 3546 if (cache->source == NULL) { 3547 cache_info stackInfo; 3548 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3549 ? sCacheInfoTable[rootCount] : stackInfo; 3550 rootCount++; 3551 info.cache = cache; 3552 info.page_count = 0; 3553 info.committed = 0; 3554 update_cache_info_recursively(cache, info); 3555 totalCommitted += info.committed; 3556 totalPages += info.page_count; 3557 } 3558 3559 cache = cache->debug_next; 3560 } 3561 3562 if (rootCount <= (uint32)kCacheInfoTableCount) { 3563 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3564 sortByPageCount 3565 ? &cache_info_compare_page_count 3566 : &cache_info_compare_committed); 3567 } 3568 3569 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3570 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3571 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3572 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3573 "page count" : "committed size"); 3574 3575 if (rootCount <= (uint32)kCacheInfoTableCount) { 3576 for (uint32 i = 0; i < rootCount; i++) { 3577 cache_info& info = sCacheInfoTable[i]; 3578 dump_caches_recursively(info.cache, info, 0); 3579 } 3580 } else 3581 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3582 3583 return 0; 3584 } 3585 3586 #endif // DEBUG_CACHE_LIST 3587 3588 3589 static int 3590 dump_cache(int argc, char** argv) 3591 { 3592 VMCache* cache; 3593 bool showPages = false; 3594 int i = 1; 3595 3596 if (argc < 2 || !strcmp(argv[1], "--help")) { 3597 kprintf("usage: %s [-ps] <address>\n" 3598 " if -p is specified, all pages are shown, if -s is used\n" 3599 " only the cache info is shown respectively.\n", argv[0]); 3600 return 0; 3601 } 3602 while (argv[i][0] == '-') { 3603 char* arg = argv[i] + 1; 3604 while (arg[0]) { 3605 if (arg[0] == 'p') 3606 showPages = true; 3607 arg++; 3608 } 3609 i++; 3610 } 3611 if (argv[i] == NULL) { 3612 kprintf("%s: invalid argument, pass address\n", argv[0]); 3613 return 0; 3614 } 3615 3616 addr_t address = parse_expression(argv[i]); 3617 if (address == 0) 3618 return 0; 3619 3620 cache = (VMCache*)address; 3621 3622 cache->Dump(showPages); 3623 3624 set_debug_variable("_sourceCache", (addr_t)cache->source); 3625 3626 return 0; 3627 } 3628 3629 3630 static void 3631 dump_area_struct(VMArea* area, bool mappings) 3632 { 3633 kprintf("AREA: %p\n", area); 3634 kprintf("name:\t\t'%s'\n", area->name); 3635 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3636 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3637 kprintf("base:\t\t0x%lx\n", area->Base()); 3638 kprintf("size:\t\t0x%lx\n", area->Size()); 3639 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3640 kprintf("page_protection:%p\n", area->page_protections); 3641 kprintf("wiring:\t\t0x%x\n", area->wiring); 3642 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3643 kprintf("cache:\t\t%p\n", area->cache); 3644 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3645 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3646 kprintf("cache_next:\t%p\n", area->cache_next); 3647 kprintf("cache_prev:\t%p\n", area->cache_prev); 3648 3649 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3650 if (mappings) { 3651 kprintf("page mappings:\n"); 3652 while (iterator.HasNext()) { 3653 vm_page_mapping* mapping = iterator.Next(); 3654 kprintf(" %p", mapping->page); 3655 } 3656 kprintf("\n"); 3657 } else { 3658 uint32 count = 0; 3659 while (iterator.Next() != NULL) { 3660 count++; 3661 } 3662 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3663 } 3664 } 3665 3666 3667 static int 3668 dump_area(int argc, char** argv) 3669 { 3670 bool mappings = false; 3671 bool found = false; 3672 int32 index = 1; 3673 VMArea* area; 3674 addr_t num; 3675 3676 if (argc < 2 || !strcmp(argv[1], "--help")) { 3677 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3678 "All areas matching either id/address/name are listed. You can\n" 3679 "force to check only a specific item by prefixing the specifier\n" 3680 "with the id/contains/address/name keywords.\n" 3681 "-m shows the area's mappings as well.\n"); 3682 return 0; 3683 } 3684 3685 if (!strcmp(argv[1], "-m")) { 3686 mappings = true; 3687 index++; 3688 } 3689 3690 int32 mode = 0xf; 3691 if (!strcmp(argv[index], "id")) 3692 mode = 1; 3693 else if (!strcmp(argv[index], "contains")) 3694 mode = 2; 3695 else if (!strcmp(argv[index], "name")) 3696 mode = 4; 3697 else if (!strcmp(argv[index], "address")) 3698 mode = 0; 3699 if (mode != 0xf) 3700 index++; 3701 3702 if (index >= argc) { 3703 kprintf("No area specifier given.\n"); 3704 return 0; 3705 } 3706 3707 num = parse_expression(argv[index]); 3708 3709 if (mode == 0) { 3710 dump_area_struct((struct VMArea*)num, mappings); 3711 } else { 3712 // walk through the area list, looking for the arguments as a name 3713 3714 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3715 while ((area = it.Next()) != NULL) { 3716 if (((mode & 4) != 0 3717 && !strcmp(argv[index], area->name)) 3718 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3719 || (((mode & 2) != 0 && area->Base() <= num 3720 && area->Base() + area->Size() > num))))) { 3721 dump_area_struct(area, mappings); 3722 found = true; 3723 } 3724 } 3725 3726 if (!found) 3727 kprintf("could not find area %s (%ld)\n", argv[index], num); 3728 } 3729 3730 return 0; 3731 } 3732 3733 3734 static int 3735 dump_area_list(int argc, char** argv) 3736 { 3737 VMArea* area; 3738 const char* name = NULL; 3739 int32 id = 0; 3740 3741 if (argc > 1) { 3742 id = parse_expression(argv[1]); 3743 if (id == 0) 3744 name = argv[1]; 3745 } 3746 3747 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3748 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3749 B_PRINTF_POINTER_WIDTH, "size"); 3750 3751 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3752 while ((area = it.Next()) != NULL) { 3753 if ((id != 0 && area->address_space->ID() != id) 3754 || (name != NULL && strstr(area->name, name) == NULL)) 3755 continue; 3756 3757 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3758 area->id, (void*)area->Base(), (void*)area->Size(), 3759 area->protection, area->wiring, area->name); 3760 } 3761 return 0; 3762 } 3763 3764 3765 static int 3766 dump_available_memory(int argc, char** argv) 3767 { 3768 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3769 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3770 return 0; 3771 } 3772 3773 3774 static int 3775 dump_mapping_info(int argc, char** argv) 3776 { 3777 bool reverseLookup = false; 3778 bool pageLookup = false; 3779 3780 int argi = 1; 3781 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3782 const char* arg = argv[argi]; 3783 if (strcmp(arg, "-r") == 0) { 3784 reverseLookup = true; 3785 } else if (strcmp(arg, "-p") == 0) { 3786 reverseLookup = true; 3787 pageLookup = true; 3788 } else { 3789 print_debugger_command_usage(argv[0]); 3790 return 0; 3791 } 3792 } 3793 3794 // We need at least one argument, the address. Optionally a thread ID can be 3795 // specified. 3796 if (argi >= argc || argi + 2 < argc) { 3797 print_debugger_command_usage(argv[0]); 3798 return 0; 3799 } 3800 3801 uint64 addressValue; 3802 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3803 return 0; 3804 3805 Team* team = NULL; 3806 if (argi < argc) { 3807 uint64 threadID; 3808 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3809 return 0; 3810 3811 Thread* thread = Thread::GetDebug(threadID); 3812 if (thread == NULL) { 3813 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3814 return 0; 3815 } 3816 3817 team = thread->team; 3818 } 3819 3820 if (reverseLookup) { 3821 phys_addr_t physicalAddress; 3822 if (pageLookup) { 3823 vm_page* page = (vm_page*)(addr_t)addressValue; 3824 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3825 } else { 3826 physicalAddress = (phys_addr_t)addressValue; 3827 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3828 } 3829 3830 kprintf(" Team Virtual Address Area\n"); 3831 kprintf("--------------------------------------\n"); 3832 3833 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3834 Callback() 3835 : 3836 fAddressSpace(NULL) 3837 { 3838 } 3839 3840 void SetAddressSpace(VMAddressSpace* addressSpace) 3841 { 3842 fAddressSpace = addressSpace; 3843 } 3844 3845 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3846 { 3847 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3848 virtualAddress); 3849 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3850 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3851 else 3852 kprintf("\n"); 3853 return false; 3854 } 3855 3856 private: 3857 VMAddressSpace* fAddressSpace; 3858 } callback; 3859 3860 if (team != NULL) { 3861 // team specified -- get its address space 3862 VMAddressSpace* addressSpace = team->address_space; 3863 if (addressSpace == NULL) { 3864 kprintf("Failed to get address space!\n"); 3865 return 0; 3866 } 3867 3868 callback.SetAddressSpace(addressSpace); 3869 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3870 physicalAddress, callback); 3871 } else { 3872 // no team specified -- iterate through all address spaces 3873 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3874 addressSpace != NULL; 3875 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3876 callback.SetAddressSpace(addressSpace); 3877 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3878 physicalAddress, callback); 3879 } 3880 } 3881 } else { 3882 // get the address space 3883 addr_t virtualAddress = (addr_t)addressValue; 3884 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3885 VMAddressSpace* addressSpace; 3886 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3887 addressSpace = VMAddressSpace::Kernel(); 3888 } else if (team != NULL) { 3889 addressSpace = team->address_space; 3890 } else { 3891 Thread* thread = debug_get_debugged_thread(); 3892 if (thread == NULL || thread->team == NULL) { 3893 kprintf("Failed to get team!\n"); 3894 return 0; 3895 } 3896 3897 addressSpace = thread->team->address_space; 3898 } 3899 3900 if (addressSpace == NULL) { 3901 kprintf("Failed to get address space!\n"); 3902 return 0; 3903 } 3904 3905 // let the translation map implementation do the job 3906 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3907 } 3908 3909 return 0; 3910 } 3911 3912 3913 /*! Deletes all areas and reserved regions in the given address space. 3914 3915 The caller must ensure that none of the areas has any wired ranges. 3916 3917 \param addressSpace The address space. 3918 \param deletingAddressSpace \c true, if the address space is in the process 3919 of being deleted. 3920 */ 3921 void 3922 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3923 { 3924 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3925 addressSpace->ID())); 3926 3927 addressSpace->WriteLock(); 3928 3929 // remove all reserved areas in this address space 3930 addressSpace->UnreserveAllAddressRanges(0); 3931 3932 // delete all the areas in this address space 3933 while (VMArea* area = addressSpace->FirstArea()) { 3934 ASSERT(!area->IsWired()); 3935 delete_area(addressSpace, area, deletingAddressSpace); 3936 } 3937 3938 addressSpace->WriteUnlock(); 3939 } 3940 3941 3942 static area_id 3943 vm_area_for(addr_t address, bool kernel) 3944 { 3945 team_id team; 3946 if (IS_USER_ADDRESS(address)) { 3947 // we try the user team address space, if any 3948 team = VMAddressSpace::CurrentID(); 3949 if (team < 0) 3950 return team; 3951 } else 3952 team = VMAddressSpace::KernelID(); 3953 3954 AddressSpaceReadLocker locker(team); 3955 if (!locker.IsLocked()) 3956 return B_BAD_TEAM_ID; 3957 3958 VMArea* area = locker.AddressSpace()->LookupArea(address); 3959 if (area != NULL) { 3960 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0 3961 && (area->protection & B_KERNEL_AREA) != 0) 3962 return B_ERROR; 3963 3964 return area->id; 3965 } 3966 3967 return B_ERROR; 3968 } 3969 3970 3971 /*! Frees physical pages that were used during the boot process. 3972 \a end is inclusive. 3973 */ 3974 static void 3975 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3976 { 3977 // free all physical pages in the specified range 3978 3979 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3980 phys_addr_t physicalAddress; 3981 uint32 flags; 3982 3983 if (map->Query(current, &physicalAddress, &flags) == B_OK 3984 && (flags & PAGE_PRESENT) != 0) { 3985 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3986 if (page != NULL && page->State() != PAGE_STATE_FREE 3987 && page->State() != PAGE_STATE_CLEAR 3988 && page->State() != PAGE_STATE_UNUSED) { 3989 DEBUG_PAGE_ACCESS_START(page); 3990 vm_page_set_state(page, PAGE_STATE_FREE); 3991 } 3992 } 3993 } 3994 3995 // unmap the memory 3996 map->Unmap(start, end); 3997 } 3998 3999 4000 void 4001 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 4002 { 4003 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 4004 addr_t end = start + (size - 1); 4005 addr_t lastEnd = start; 4006 4007 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 4008 (void*)start, (void*)end)); 4009 4010 // The areas are sorted in virtual address space order, so 4011 // we just have to find the holes between them that fall 4012 // into the area we should dispose 4013 4014 map->Lock(); 4015 4016 for (VMAddressSpace::AreaIterator it 4017 = VMAddressSpace::Kernel()->GetAreaIterator(); 4018 VMArea* area = it.Next();) { 4019 addr_t areaStart = area->Base(); 4020 addr_t areaEnd = areaStart + (area->Size() - 1); 4021 4022 if (areaEnd < start) 4023 continue; 4024 4025 if (areaStart > end) { 4026 // we are done, the area is already beyond of what we have to free 4027 break; 4028 } 4029 4030 if (areaStart > lastEnd) { 4031 // this is something we can free 4032 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 4033 (void*)areaStart)); 4034 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 4035 } 4036 4037 if (areaEnd >= end) { 4038 lastEnd = areaEnd; 4039 // no +1 to prevent potential overflow 4040 break; 4041 } 4042 4043 lastEnd = areaEnd + 1; 4044 } 4045 4046 if (lastEnd < end) { 4047 // we can also get rid of some space at the end of the area 4048 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 4049 (void*)end)); 4050 unmap_and_free_physical_pages(map, lastEnd, end); 4051 } 4052 4053 map->Unlock(); 4054 } 4055 4056 4057 static void 4058 create_preloaded_image_areas(struct preloaded_image* _image) 4059 { 4060 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 4061 char name[B_OS_NAME_LENGTH]; 4062 void* address; 4063 int32 length; 4064 4065 // use file name to create a good area name 4066 char* fileName = strrchr(image->name, '/'); 4067 if (fileName == NULL) 4068 fileName = image->name; 4069 else 4070 fileName++; 4071 4072 length = strlen(fileName); 4073 // make sure there is enough space for the suffix 4074 if (length > 25) 4075 length = 25; 4076 4077 memcpy(name, fileName, length); 4078 strcpy(name + length, "_text"); 4079 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 4080 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4081 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 4082 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4083 // this will later be remapped read-only/executable by the 4084 // ELF initialization code 4085 4086 strcpy(name + length, "_data"); 4087 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 4088 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4089 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 4090 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4091 } 4092 4093 4094 /*! Frees all previously kernel arguments areas from the kernel_args structure. 4095 Any boot loader resources contained in that arguments must not be accessed 4096 anymore past this point. 4097 */ 4098 void 4099 vm_free_kernel_args(kernel_args* args) 4100 { 4101 uint32 i; 4102 4103 TRACE(("vm_free_kernel_args()\n")); 4104 4105 for (i = 0; i < args->num_kernel_args_ranges; i++) { 4106 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 4107 if (area >= B_OK) 4108 delete_area(area); 4109 } 4110 } 4111 4112 4113 static void 4114 allocate_kernel_args(kernel_args* args) 4115 { 4116 TRACE(("allocate_kernel_args()\n")); 4117 4118 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 4119 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 4120 4121 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 4122 args->kernel_args_range[i].size, B_ALREADY_WIRED, 4123 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4124 } 4125 } 4126 4127 4128 static void 4129 unreserve_boot_loader_ranges(kernel_args* args) 4130 { 4131 TRACE(("unreserve_boot_loader_ranges()\n")); 4132 4133 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4134 vm_unreserve_address_range(VMAddressSpace::KernelID(), 4135 (void*)(addr_t)args->virtual_allocated_range[i].start, 4136 args->virtual_allocated_range[i].size); 4137 } 4138 } 4139 4140 4141 static void 4142 reserve_boot_loader_ranges(kernel_args* args) 4143 { 4144 TRACE(("reserve_boot_loader_ranges()\n")); 4145 4146 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4147 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 4148 4149 // If the address is no kernel address, we just skip it. The 4150 // architecture specific code has to deal with it. 4151 if (!IS_KERNEL_ADDRESS(address)) { 4152 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 4153 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 4154 continue; 4155 } 4156 4157 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 4158 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4159 if (status < B_OK) 4160 panic("could not reserve boot loader ranges\n"); 4161 } 4162 } 4163 4164 4165 static addr_t 4166 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 4167 { 4168 size = PAGE_ALIGN(size); 4169 4170 // find a slot in the virtual allocation addr range 4171 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 4172 // check to see if the space between this one and the last is big enough 4173 addr_t rangeStart = args->virtual_allocated_range[i].start; 4174 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4175 + args->virtual_allocated_range[i - 1].size; 4176 4177 addr_t base = alignment > 0 4178 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4179 4180 if (base >= KERNEL_BASE && base < rangeStart 4181 && rangeStart - base >= size) { 4182 args->virtual_allocated_range[i - 1].size 4183 += base + size - previousRangeEnd; 4184 return base; 4185 } 4186 } 4187 4188 // we hadn't found one between allocation ranges. this is ok. 4189 // see if there's a gap after the last one 4190 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4191 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4192 + args->virtual_allocated_range[lastEntryIndex].size; 4193 addr_t base = alignment > 0 4194 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4195 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4196 args->virtual_allocated_range[lastEntryIndex].size 4197 += base + size - lastRangeEnd; 4198 return base; 4199 } 4200 4201 // see if there's a gap before the first one 4202 addr_t rangeStart = args->virtual_allocated_range[0].start; 4203 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4204 base = rangeStart - size; 4205 if (alignment > 0) 4206 base = ROUNDDOWN(base, alignment); 4207 4208 if (base >= KERNEL_BASE) { 4209 args->virtual_allocated_range[0].start = base; 4210 args->virtual_allocated_range[0].size += rangeStart - base; 4211 return base; 4212 } 4213 } 4214 4215 return 0; 4216 } 4217 4218 4219 static bool 4220 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4221 { 4222 // TODO: horrible brute-force method of determining if the page can be 4223 // allocated 4224 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4225 if (address >= args->physical_memory_range[i].start 4226 && address < args->physical_memory_range[i].start 4227 + args->physical_memory_range[i].size) 4228 return true; 4229 } 4230 return false; 4231 } 4232 4233 4234 page_num_t 4235 vm_allocate_early_physical_page(kernel_args* args) 4236 { 4237 if (args->num_physical_allocated_ranges == 0) { 4238 panic("early physical page allocations no longer possible!"); 4239 return 0; 4240 } 4241 4242 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4243 phys_addr_t nextPage; 4244 4245 nextPage = args->physical_allocated_range[i].start 4246 + args->physical_allocated_range[i].size; 4247 // see if the page after the next allocated paddr run can be allocated 4248 if (i + 1 < args->num_physical_allocated_ranges 4249 && args->physical_allocated_range[i + 1].size != 0) { 4250 // see if the next page will collide with the next allocated range 4251 if (nextPage >= args->physical_allocated_range[i+1].start) 4252 continue; 4253 } 4254 // see if the next physical page fits in the memory block 4255 if (is_page_in_physical_memory_range(args, nextPage)) { 4256 // we got one! 4257 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4258 return nextPage / B_PAGE_SIZE; 4259 } 4260 } 4261 4262 // Expanding upwards didn't work, try going downwards. 4263 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4264 phys_addr_t nextPage; 4265 4266 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4267 // see if the page after the prev allocated paddr run can be allocated 4268 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4269 // see if the next page will collide with the next allocated range 4270 if (nextPage < args->physical_allocated_range[i-1].start 4271 + args->physical_allocated_range[i-1].size) 4272 continue; 4273 } 4274 // see if the next physical page fits in the memory block 4275 if (is_page_in_physical_memory_range(args, nextPage)) { 4276 // we got one! 4277 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4278 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4279 return nextPage / B_PAGE_SIZE; 4280 } 4281 } 4282 4283 return 0; 4284 // could not allocate a block 4285 } 4286 4287 4288 /*! This one uses the kernel_args' physical and virtual memory ranges to 4289 allocate some pages before the VM is completely up. 4290 */ 4291 addr_t 4292 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4293 uint32 attributes, addr_t alignment) 4294 { 4295 if (physicalSize > virtualSize) 4296 physicalSize = virtualSize; 4297 4298 // find the vaddr to allocate at 4299 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4300 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4301 if (virtualBase == 0) { 4302 panic("vm_allocate_early: could not allocate virtual address\n"); 4303 return 0; 4304 } 4305 4306 // map the pages 4307 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4308 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4309 if (physicalAddress == 0) 4310 panic("error allocating early page!\n"); 4311 4312 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4313 4314 status_t status = arch_vm_translation_map_early_map(args, 4315 virtualBase + i * B_PAGE_SIZE, 4316 physicalAddress * B_PAGE_SIZE, attributes, 4317 &vm_allocate_early_physical_page); 4318 if (status != B_OK) 4319 panic("error mapping early page!"); 4320 } 4321 4322 return virtualBase; 4323 } 4324 4325 4326 /*! The main entrance point to initialize the VM. */ 4327 status_t 4328 vm_init(kernel_args* args) 4329 { 4330 struct preloaded_image* image; 4331 void* address; 4332 status_t err = 0; 4333 uint32 i; 4334 4335 TRACE(("vm_init: entry\n")); 4336 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4337 err = arch_vm_init(args); 4338 4339 // initialize some globals 4340 vm_page_init_num_pages(args); 4341 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4342 4343 slab_init(args); 4344 4345 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4346 off_t heapSize = INITIAL_HEAP_SIZE; 4347 // try to accomodate low memory systems 4348 while (heapSize > sAvailableMemory / 8) 4349 heapSize /= 2; 4350 if (heapSize < 1024 * 1024) 4351 panic("vm_init: go buy some RAM please."); 4352 4353 // map in the new heap and initialize it 4354 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4355 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4356 TRACE(("heap at 0x%lx\n", heapBase)); 4357 heap_init(heapBase, heapSize); 4358 #endif 4359 4360 // initialize the free page list and physical page mapper 4361 vm_page_init(args); 4362 4363 // initialize the cache allocators 4364 vm_cache_init(args); 4365 4366 { 4367 status_t error = VMAreas::Init(); 4368 if (error != B_OK) 4369 panic("vm_init: error initializing areas map\n"); 4370 } 4371 4372 VMAddressSpace::Init(); 4373 reserve_boot_loader_ranges(args); 4374 4375 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4376 heap_init_post_area(); 4377 #endif 4378 4379 // Do any further initialization that the architecture dependant layers may 4380 // need now 4381 arch_vm_translation_map_init_post_area(args); 4382 arch_vm_init_post_area(args); 4383 vm_page_init_post_area(args); 4384 slab_init_post_area(); 4385 4386 // allocate areas to represent stuff that already exists 4387 4388 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4389 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4390 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4391 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4392 #endif 4393 4394 allocate_kernel_args(args); 4395 4396 create_preloaded_image_areas(args->kernel_image); 4397 4398 // allocate areas for preloaded images 4399 for (image = args->preloaded_images; image != NULL; image = image->next) 4400 create_preloaded_image_areas(image); 4401 4402 // allocate kernel stacks 4403 for (i = 0; i < args->num_cpus; i++) { 4404 char name[64]; 4405 4406 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4407 address = (void*)args->cpu_kstack[i].start; 4408 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4409 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4410 } 4411 4412 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4413 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4414 4415 #if PARANOID_KERNEL_MALLOC 4416 vm_block_address_range("uninitialized heap memory", 4417 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4418 #endif 4419 #if PARANOID_KERNEL_FREE 4420 vm_block_address_range("freed heap memory", 4421 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4422 #endif 4423 4424 // create the object cache for the page mappings 4425 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4426 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4427 NULL, NULL); 4428 if (gPageMappingsObjectCache == NULL) 4429 panic("failed to create page mappings object cache"); 4430 4431 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4432 4433 #if DEBUG_CACHE_LIST 4434 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4435 virtual_address_restrictions virtualRestrictions = {}; 4436 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4437 physical_address_restrictions physicalRestrictions = {}; 4438 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4439 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4440 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4441 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4442 &physicalRestrictions, (void**)&sCacheInfoTable); 4443 } 4444 #endif // DEBUG_CACHE_LIST 4445 4446 // add some debugger commands 4447 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4448 add_debugger_command("area", &dump_area, 4449 "Dump info about a particular area"); 4450 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4451 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4452 #if DEBUG_CACHE_LIST 4453 if (sCacheInfoTable != NULL) { 4454 add_debugger_command_etc("caches", &dump_caches, 4455 "List all VMCache trees", 4456 "[ \"-c\" ]\n" 4457 "All cache trees are listed sorted in decreasing order by number " 4458 "of\n" 4459 "used pages or, if \"-c\" is specified, by size of committed " 4460 "memory.\n", 4461 0); 4462 } 4463 #endif 4464 add_debugger_command("avail", &dump_available_memory, 4465 "Dump available memory"); 4466 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4467 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4468 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4469 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4470 add_debugger_command("string", &display_mem, "dump strings"); 4471 4472 add_debugger_command_etc("mapping", &dump_mapping_info, 4473 "Print address mapping information", 4474 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4475 "Prints low-level page mapping information for a given address. If\n" 4476 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4477 "address that is looked up in the translation map of the current\n" 4478 "team, respectively the team specified by thread ID <thread ID>. If\n" 4479 "\"-r\" is specified, <address> is a physical address that is\n" 4480 "searched in the translation map of all teams, respectively the team\n" 4481 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4482 "<address> is the address of a vm_page structure. The behavior is\n" 4483 "equivalent to specifying \"-r\" with the physical address of that\n" 4484 "page.\n", 4485 0); 4486 4487 TRACE(("vm_init: exit\n")); 4488 4489 vm_cache_init_post_heap(); 4490 4491 return err; 4492 } 4493 4494 4495 status_t 4496 vm_init_post_sem(kernel_args* args) 4497 { 4498 // This frees all unused boot loader resources and makes its space available 4499 // again 4500 arch_vm_init_end(args); 4501 unreserve_boot_loader_ranges(args); 4502 4503 // fill in all of the semaphores that were not allocated before 4504 // since we're still single threaded and only the kernel address space 4505 // exists, it isn't that hard to find all of the ones we need to create 4506 4507 arch_vm_translation_map_init_post_sem(args); 4508 4509 slab_init_post_sem(); 4510 4511 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4512 heap_init_post_sem(); 4513 #endif 4514 4515 return B_OK; 4516 } 4517 4518 4519 status_t 4520 vm_init_post_thread(kernel_args* args) 4521 { 4522 vm_page_init_post_thread(args); 4523 slab_init_post_thread(); 4524 return heap_init_post_thread(); 4525 } 4526 4527 4528 status_t 4529 vm_init_post_modules(kernel_args* args) 4530 { 4531 return arch_vm_init_post_modules(args); 4532 } 4533 4534 4535 void 4536 permit_page_faults(void) 4537 { 4538 Thread* thread = thread_get_current_thread(); 4539 if (thread != NULL) 4540 atomic_add(&thread->page_faults_allowed, 1); 4541 } 4542 4543 4544 void 4545 forbid_page_faults(void) 4546 { 4547 Thread* thread = thread_get_current_thread(); 4548 if (thread != NULL) 4549 atomic_add(&thread->page_faults_allowed, -1); 4550 } 4551 4552 4553 status_t 4554 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4555 bool isUser, addr_t* newIP) 4556 { 4557 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4558 faultAddress)); 4559 4560 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4561 4562 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4563 VMAddressSpace* addressSpace = NULL; 4564 4565 status_t status = B_OK; 4566 *newIP = 0; 4567 atomic_add((int32*)&sPageFaults, 1); 4568 4569 if (IS_KERNEL_ADDRESS(pageAddress)) { 4570 addressSpace = VMAddressSpace::GetKernel(); 4571 } else if (IS_USER_ADDRESS(pageAddress)) { 4572 addressSpace = VMAddressSpace::GetCurrent(); 4573 if (addressSpace == NULL) { 4574 if (!isUser) { 4575 dprintf("vm_page_fault: kernel thread accessing invalid user " 4576 "memory!\n"); 4577 status = B_BAD_ADDRESS; 4578 TPF(PageFaultError(-1, 4579 VMPageFaultTracing 4580 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4581 } else { 4582 // XXX weird state. 4583 panic("vm_page_fault: non kernel thread accessing user memory " 4584 "that doesn't exist!\n"); 4585 status = B_BAD_ADDRESS; 4586 } 4587 } 4588 } else { 4589 // the hit was probably in the 64k DMZ between kernel and user space 4590 // this keeps a user space thread from passing a buffer that crosses 4591 // into kernel space 4592 status = B_BAD_ADDRESS; 4593 TPF(PageFaultError(-1, 4594 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4595 } 4596 4597 if (status == B_OK) { 4598 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4599 isUser, NULL); 4600 } 4601 4602 if (status < B_OK) { 4603 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4604 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4605 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4606 thread_get_current_thread_id()); 4607 if (!isUser) { 4608 Thread* thread = thread_get_current_thread(); 4609 if (thread != NULL && thread->fault_handler != 0) { 4610 // this will cause the arch dependant page fault handler to 4611 // modify the IP on the interrupt frame or whatever to return 4612 // to this address 4613 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4614 } else { 4615 // unhandled page fault in the kernel 4616 panic("vm_page_fault: unhandled page fault in kernel space at " 4617 "0x%lx, ip 0x%lx\n", address, faultAddress); 4618 } 4619 } else { 4620 Thread* thread = thread_get_current_thread(); 4621 4622 #ifdef TRACE_FAULTS 4623 VMArea* area = NULL; 4624 if (addressSpace != NULL) { 4625 addressSpace->ReadLock(); 4626 area = addressSpace->LookupArea(faultAddress); 4627 } 4628 4629 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4630 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4631 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4632 thread->team->Name(), thread->team->id, 4633 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4634 faultAddress, area ? area->name : "???", faultAddress - (area ? 4635 area->Base() : 0x0)); 4636 4637 if (addressSpace != NULL) 4638 addressSpace->ReadUnlock(); 4639 #endif 4640 4641 // If the thread has a signal handler for SIGSEGV, we simply 4642 // send it the signal. Otherwise we notify the user debugger 4643 // first. 4644 struct sigaction action; 4645 if ((sigaction(SIGSEGV, NULL, &action) == 0 4646 && action.sa_handler != SIG_DFL 4647 && action.sa_handler != SIG_IGN) 4648 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4649 SIGSEGV)) { 4650 Signal signal(SIGSEGV, 4651 status == B_PERMISSION_DENIED 4652 ? SEGV_ACCERR : SEGV_MAPERR, 4653 EFAULT, thread->team->id); 4654 signal.SetAddress((void*)address); 4655 send_signal_to_thread(thread, signal, 0); 4656 } 4657 } 4658 } 4659 4660 if (addressSpace != NULL) 4661 addressSpace->Put(); 4662 4663 return B_HANDLED_INTERRUPT; 4664 } 4665 4666 4667 struct PageFaultContext { 4668 AddressSpaceReadLocker addressSpaceLocker; 4669 VMCacheChainLocker cacheChainLocker; 4670 4671 VMTranslationMap* map; 4672 VMCache* topCache; 4673 off_t cacheOffset; 4674 vm_page_reservation reservation; 4675 bool isWrite; 4676 4677 // return values 4678 vm_page* page; 4679 bool restart; 4680 bool pageAllocated; 4681 4682 4683 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4684 : 4685 addressSpaceLocker(addressSpace, true), 4686 map(addressSpace->TranslationMap()), 4687 isWrite(isWrite) 4688 { 4689 } 4690 4691 ~PageFaultContext() 4692 { 4693 UnlockAll(); 4694 vm_page_unreserve_pages(&reservation); 4695 } 4696 4697 void Prepare(VMCache* topCache, off_t cacheOffset) 4698 { 4699 this->topCache = topCache; 4700 this->cacheOffset = cacheOffset; 4701 page = NULL; 4702 restart = false; 4703 pageAllocated = false; 4704 4705 cacheChainLocker.SetTo(topCache); 4706 } 4707 4708 void UnlockAll(VMCache* exceptCache = NULL) 4709 { 4710 topCache = NULL; 4711 addressSpaceLocker.Unlock(); 4712 cacheChainLocker.Unlock(exceptCache); 4713 } 4714 }; 4715 4716 4717 /*! Gets the page that should be mapped into the area. 4718 Returns an error code other than \c B_OK, if the page couldn't be found or 4719 paged in. The locking state of the address space and the caches is undefined 4720 in that case. 4721 Returns \c B_OK with \c context.restart set to \c true, if the functions 4722 had to unlock the address space and all caches and is supposed to be called 4723 again. 4724 Returns \c B_OK with \c context.restart set to \c false, if the page was 4725 found. It is returned in \c context.page. The address space will still be 4726 locked as well as all caches starting from the top cache to at least the 4727 cache the page lives in. 4728 */ 4729 static status_t 4730 fault_get_page(PageFaultContext& context) 4731 { 4732 VMCache* cache = context.topCache; 4733 VMCache* lastCache = NULL; 4734 vm_page* page = NULL; 4735 4736 while (cache != NULL) { 4737 // We already hold the lock of the cache at this point. 4738 4739 lastCache = cache; 4740 4741 page = cache->LookupPage(context.cacheOffset); 4742 if (page != NULL && page->busy) { 4743 // page must be busy -- wait for it to become unbusy 4744 context.UnlockAll(cache); 4745 cache->ReleaseRefLocked(); 4746 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4747 4748 // restart the whole process 4749 context.restart = true; 4750 return B_OK; 4751 } 4752 4753 if (page != NULL) 4754 break; 4755 4756 // The current cache does not contain the page we're looking for. 4757 4758 // see if the backing store has it 4759 if (cache->HasPage(context.cacheOffset)) { 4760 // insert a fresh page and mark it busy -- we're going to read it in 4761 page = vm_page_allocate_page(&context.reservation, 4762 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4763 cache->InsertPage(page, context.cacheOffset); 4764 4765 // We need to unlock all caches and the address space while reading 4766 // the page in. Keep a reference to the cache around. 4767 cache->AcquireRefLocked(); 4768 context.UnlockAll(); 4769 4770 // read the page in 4771 generic_io_vec vec; 4772 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4773 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4774 4775 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4776 B_PHYSICAL_IO_REQUEST, &bytesRead); 4777 4778 cache->Lock(); 4779 4780 if (status < B_OK) { 4781 // on error remove and free the page 4782 dprintf("reading page from cache %p returned: %s!\n", 4783 cache, strerror(status)); 4784 4785 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4786 cache->RemovePage(page); 4787 vm_page_set_state(page, PAGE_STATE_FREE); 4788 4789 cache->ReleaseRefAndUnlock(); 4790 return status; 4791 } 4792 4793 // mark the page unbusy again 4794 cache->MarkPageUnbusy(page); 4795 4796 DEBUG_PAGE_ACCESS_END(page); 4797 4798 // Since we needed to unlock everything temporarily, the area 4799 // situation might have changed. So we need to restart the whole 4800 // process. 4801 cache->ReleaseRefAndUnlock(); 4802 context.restart = true; 4803 return B_OK; 4804 } 4805 4806 cache = context.cacheChainLocker.LockSourceCache(); 4807 } 4808 4809 if (page == NULL) { 4810 // There was no adequate page, determine the cache for a clean one. 4811 // Read-only pages come in the deepest cache, only the top most cache 4812 // may have direct write access. 4813 cache = context.isWrite ? context.topCache : lastCache; 4814 4815 // allocate a clean page 4816 page = vm_page_allocate_page(&context.reservation, 4817 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4818 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4819 page->physical_page_number)); 4820 4821 // insert the new page into our cache 4822 cache->InsertPage(page, context.cacheOffset); 4823 context.pageAllocated = true; 4824 } else if (page->Cache() != context.topCache && context.isWrite) { 4825 // We have a page that has the data we want, but in the wrong cache 4826 // object so we need to copy it and stick it into the top cache. 4827 vm_page* sourcePage = page; 4828 4829 // TODO: If memory is low, it might be a good idea to steal the page 4830 // from our source cache -- if possible, that is. 4831 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4832 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4833 4834 // To not needlessly kill concurrency we unlock all caches but the top 4835 // one while copying the page. Lacking another mechanism to ensure that 4836 // the source page doesn't disappear, we mark it busy. 4837 sourcePage->busy = true; 4838 context.cacheChainLocker.UnlockKeepRefs(true); 4839 4840 // copy the page 4841 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4842 sourcePage->physical_page_number * B_PAGE_SIZE); 4843 4844 context.cacheChainLocker.RelockCaches(true); 4845 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4846 4847 // insert the new page into our cache 4848 context.topCache->InsertPage(page, context.cacheOffset); 4849 context.pageAllocated = true; 4850 } else 4851 DEBUG_PAGE_ACCESS_START(page); 4852 4853 context.page = page; 4854 return B_OK; 4855 } 4856 4857 4858 /*! Makes sure the address in the given address space is mapped. 4859 4860 \param addressSpace The address space. 4861 \param originalAddress The address. Doesn't need to be page aligned. 4862 \param isWrite If \c true the address shall be write-accessible. 4863 \param isUser If \c true the access is requested by a userland team. 4864 \param wirePage On success, if non \c NULL, the wired count of the page 4865 mapped at the given address is incremented and the page is returned 4866 via this parameter. 4867 \return \c B_OK on success, another error code otherwise. 4868 */ 4869 static status_t 4870 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4871 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4872 { 4873 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4874 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4875 originalAddress, isWrite, isUser)); 4876 4877 PageFaultContext context(addressSpace, isWrite); 4878 4879 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4880 status_t status = B_OK; 4881 4882 addressSpace->IncrementFaultCount(); 4883 4884 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4885 // the pages upfront makes sure we don't have any cache locked, so that the 4886 // page daemon/thief can do their job without problems. 4887 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4888 originalAddress); 4889 context.addressSpaceLocker.Unlock(); 4890 vm_page_reserve_pages(&context.reservation, reservePages, 4891 addressSpace == VMAddressSpace::Kernel() 4892 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4893 4894 while (true) { 4895 context.addressSpaceLocker.Lock(); 4896 4897 // get the area the fault was in 4898 VMArea* area = addressSpace->LookupArea(address); 4899 if (area == NULL) { 4900 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4901 "space\n", originalAddress); 4902 TPF(PageFaultError(-1, 4903 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4904 status = B_BAD_ADDRESS; 4905 break; 4906 } 4907 4908 // check permissions 4909 uint32 protection = get_area_page_protection(area, address); 4910 if (isUser && (protection & B_USER_PROTECTION) == 0 4911 && (area->protection & B_KERNEL_AREA) != 0) { 4912 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4913 area->id, (void*)originalAddress); 4914 TPF(PageFaultError(area->id, 4915 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4916 status = B_PERMISSION_DENIED; 4917 break; 4918 } 4919 if (isWrite && (protection 4920 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4921 dprintf("write access attempted on write-protected area 0x%" 4922 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4923 TPF(PageFaultError(area->id, 4924 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4925 status = B_PERMISSION_DENIED; 4926 break; 4927 } else if (isExecute && (protection 4928 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4929 dprintf("instruction fetch attempted on execute-protected area 0x%" 4930 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4931 TPF(PageFaultError(area->id, 4932 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4933 status = B_PERMISSION_DENIED; 4934 break; 4935 } else if (!isWrite && !isExecute && (protection 4936 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4937 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4938 " at %p\n", area->id, (void*)originalAddress); 4939 TPF(PageFaultError(area->id, 4940 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4941 status = B_PERMISSION_DENIED; 4942 break; 4943 } 4944 4945 // We have the area, it was a valid access, so let's try to resolve the 4946 // page fault now. 4947 // At first, the top most cache from the area is investigated. 4948 4949 context.Prepare(vm_area_get_locked_cache(area), 4950 address - area->Base() + area->cache_offset); 4951 4952 // See if this cache has a fault handler -- this will do all the work 4953 // for us. 4954 { 4955 // Note, since the page fault is resolved with interrupts enabled, 4956 // the fault handler could be called more than once for the same 4957 // reason -- the store must take this into account. 4958 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4959 if (status != B_BAD_HANDLER) 4960 break; 4961 } 4962 4963 // The top most cache has no fault handler, so let's see if the cache or 4964 // its sources already have the page we're searching for (we're going 4965 // from top to bottom). 4966 status = fault_get_page(context); 4967 if (status != B_OK) { 4968 TPF(PageFaultError(area->id, status)); 4969 break; 4970 } 4971 4972 if (context.restart) 4973 continue; 4974 4975 // All went fine, all there is left to do is to map the page into the 4976 // address space. 4977 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4978 context.page)); 4979 4980 // If the page doesn't reside in the area's cache, we need to make sure 4981 // it's mapped in read-only, so that we cannot overwrite someone else's 4982 // data (copy-on-write) 4983 uint32 newProtection = protection; 4984 if (context.page->Cache() != context.topCache && !isWrite) 4985 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4986 4987 bool unmapPage = false; 4988 bool mapPage = true; 4989 4990 // check whether there's already a page mapped at the address 4991 context.map->Lock(); 4992 4993 phys_addr_t physicalAddress; 4994 uint32 flags; 4995 vm_page* mappedPage = NULL; 4996 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4997 && (flags & PAGE_PRESENT) != 0 4998 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4999 != NULL) { 5000 // Yep there's already a page. If it's ours, we can simply adjust 5001 // its protection. Otherwise we have to unmap it. 5002 if (mappedPage == context.page) { 5003 context.map->ProtectPage(area, address, newProtection); 5004 // Note: We assume that ProtectPage() is atomic (i.e. 5005 // the page isn't temporarily unmapped), otherwise we'd have 5006 // to make sure it isn't wired. 5007 mapPage = false; 5008 } else 5009 unmapPage = true; 5010 } 5011 5012 context.map->Unlock(); 5013 5014 if (unmapPage) { 5015 // If the page is wired, we can't unmap it. Wait until it is unwired 5016 // again and restart. Note that the page cannot be wired for 5017 // writing, since it it isn't in the topmost cache. So we can safely 5018 // ignore ranges wired for writing (our own and other concurrent 5019 // wiring attempts in progress) and in fact have to do that to avoid 5020 // a deadlock. 5021 VMAreaUnwiredWaiter waiter; 5022 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 5023 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 5024 // unlock everything and wait 5025 if (context.pageAllocated) { 5026 // ... but since we allocated a page and inserted it into 5027 // the top cache, remove and free it first. Otherwise we'd 5028 // have a page from a lower cache mapped while an upper 5029 // cache has a page that would shadow it. 5030 context.topCache->RemovePage(context.page); 5031 vm_page_free_etc(context.topCache, context.page, 5032 &context.reservation); 5033 } else 5034 DEBUG_PAGE_ACCESS_END(context.page); 5035 5036 context.UnlockAll(); 5037 waiter.waitEntry.Wait(); 5038 continue; 5039 } 5040 5041 // Note: The mapped page is a page of a lower cache. We are 5042 // guaranteed to have that cached locked, our new page is a copy of 5043 // that page, and the page is not busy. The logic for that guarantee 5044 // is as follows: Since the page is mapped, it must live in the top 5045 // cache (ruled out above) or any of its lower caches, and there is 5046 // (was before the new page was inserted) no other page in any 5047 // cache between the top cache and the page's cache (otherwise that 5048 // would be mapped instead). That in turn means that our algorithm 5049 // must have found it and therefore it cannot be busy either. 5050 DEBUG_PAGE_ACCESS_START(mappedPage); 5051 unmap_page(area, address); 5052 DEBUG_PAGE_ACCESS_END(mappedPage); 5053 } 5054 5055 if (mapPage) { 5056 if (map_page(area, context.page, address, newProtection, 5057 &context.reservation) != B_OK) { 5058 // Mapping can only fail, when the page mapping object couldn't 5059 // be allocated. Save for the missing mapping everything is 5060 // fine, though. If this was a regular page fault, we'll simply 5061 // leave and probably fault again. To make sure we'll have more 5062 // luck then, we ensure that the minimum object reserve is 5063 // available. 5064 DEBUG_PAGE_ACCESS_END(context.page); 5065 5066 context.UnlockAll(); 5067 5068 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 5069 != B_OK) { 5070 // Apparently the situation is serious. Let's get ourselves 5071 // killed. 5072 status = B_NO_MEMORY; 5073 } else if (wirePage != NULL) { 5074 // The caller expects us to wire the page. Since 5075 // object_cache_reserve() succeeded, we should now be able 5076 // to allocate a mapping structure. Restart. 5077 continue; 5078 } 5079 5080 break; 5081 } 5082 } else if (context.page->State() == PAGE_STATE_INACTIVE) 5083 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 5084 5085 // also wire the page, if requested 5086 if (wirePage != NULL && status == B_OK) { 5087 increment_page_wired_count(context.page); 5088 *wirePage = context.page; 5089 } 5090 5091 DEBUG_PAGE_ACCESS_END(context.page); 5092 5093 break; 5094 } 5095 5096 return status; 5097 } 5098 5099 5100 status_t 5101 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 5102 { 5103 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 5104 } 5105 5106 status_t 5107 vm_put_physical_page(addr_t vaddr, void* handle) 5108 { 5109 return sPhysicalPageMapper->PutPage(vaddr, handle); 5110 } 5111 5112 5113 status_t 5114 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 5115 void** _handle) 5116 { 5117 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 5118 } 5119 5120 status_t 5121 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5122 { 5123 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 5124 } 5125 5126 5127 status_t 5128 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 5129 { 5130 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 5131 } 5132 5133 status_t 5134 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5135 { 5136 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 5137 } 5138 5139 5140 void 5141 vm_get_info(system_info* info) 5142 { 5143 swap_get_info(info); 5144 5145 MutexLocker locker(sAvailableMemoryLock); 5146 info->needed_memory = sNeededMemory; 5147 info->free_memory = sAvailableMemory; 5148 } 5149 5150 5151 uint32 5152 vm_num_page_faults(void) 5153 { 5154 return sPageFaults; 5155 } 5156 5157 5158 off_t 5159 vm_available_memory(void) 5160 { 5161 MutexLocker locker(sAvailableMemoryLock); 5162 return sAvailableMemory; 5163 } 5164 5165 5166 off_t 5167 vm_available_not_needed_memory(void) 5168 { 5169 MutexLocker locker(sAvailableMemoryLock); 5170 return sAvailableMemory - sNeededMemory; 5171 } 5172 5173 5174 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 5175 debugger. 5176 */ 5177 off_t 5178 vm_available_not_needed_memory_debug(void) 5179 { 5180 return sAvailableMemory - sNeededMemory; 5181 } 5182 5183 5184 size_t 5185 vm_kernel_address_space_left(void) 5186 { 5187 return VMAddressSpace::Kernel()->FreeSpace(); 5188 } 5189 5190 5191 void 5192 vm_unreserve_memory(size_t amount) 5193 { 5194 mutex_lock(&sAvailableMemoryLock); 5195 5196 sAvailableMemory += amount; 5197 5198 mutex_unlock(&sAvailableMemoryLock); 5199 } 5200 5201 5202 status_t 5203 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5204 { 5205 size_t reserve = kMemoryReserveForPriority[priority]; 5206 5207 MutexLocker locker(sAvailableMemoryLock); 5208 5209 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5210 5211 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5212 sAvailableMemory -= amount; 5213 return B_OK; 5214 } 5215 5216 if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) { 5217 // Do not wait for something that will never happen. 5218 return B_NO_MEMORY; 5219 } 5220 5221 if (timeout <= 0) 5222 return B_NO_MEMORY; 5223 5224 // turn timeout into an absolute timeout 5225 timeout += system_time(); 5226 5227 // loop until we've got the memory or the timeout occurs 5228 do { 5229 sNeededMemory += amount; 5230 5231 // call the low resource manager 5232 locker.Unlock(); 5233 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5234 B_ABSOLUTE_TIMEOUT, timeout); 5235 locker.Lock(); 5236 5237 sNeededMemory -= amount; 5238 5239 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5240 sAvailableMemory -= amount; 5241 return B_OK; 5242 } 5243 } while (timeout > system_time()); 5244 5245 return B_NO_MEMORY; 5246 } 5247 5248 5249 status_t 5250 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5251 { 5252 // NOTE: The caller is responsible for synchronizing calls to this function! 5253 5254 AddressSpaceReadLocker locker; 5255 VMArea* area; 5256 status_t status = locker.SetFromArea(id, area); 5257 if (status != B_OK) 5258 return status; 5259 5260 // nothing to do, if the type doesn't change 5261 uint32 oldType = area->MemoryType(); 5262 if (type == oldType) 5263 return B_OK; 5264 5265 // set the memory type of the area and the mapped pages 5266 VMTranslationMap* map = area->address_space->TranslationMap(); 5267 map->Lock(); 5268 area->SetMemoryType(type); 5269 map->ProtectArea(area, area->protection); 5270 map->Unlock(); 5271 5272 // set the physical memory type 5273 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5274 if (error != B_OK) { 5275 // reset the memory type of the area and the mapped pages 5276 map->Lock(); 5277 area->SetMemoryType(oldType); 5278 map->ProtectArea(area, area->protection); 5279 map->Unlock(); 5280 return error; 5281 } 5282 5283 return B_OK; 5284 5285 } 5286 5287 5288 /*! This function enforces some protection properties: 5289 - kernel areas must be W^X (after kernel startup) 5290 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5291 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5292 */ 5293 static void 5294 fix_protection(uint32* protection) 5295 { 5296 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5297 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5298 || (*protection & B_WRITE_AREA) != 0) 5299 && !gKernelStartup) 5300 panic("kernel areas cannot be both writable and executable!"); 5301 5302 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5303 if ((*protection & B_WRITE_AREA) != 0) 5304 *protection |= B_KERNEL_WRITE_AREA; 5305 if ((*protection & B_READ_AREA) != 0) 5306 *protection |= B_KERNEL_READ_AREA; 5307 } 5308 } 5309 5310 5311 static void 5312 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5313 { 5314 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5315 info->area = area->id; 5316 info->address = (void*)area->Base(); 5317 info->size = area->Size(); 5318 info->protection = area->protection; 5319 info->lock = area->wiring; 5320 info->team = area->address_space->ID(); 5321 info->copy_count = 0; 5322 info->in_count = 0; 5323 info->out_count = 0; 5324 // TODO: retrieve real values here! 5325 5326 VMCache* cache = vm_area_get_locked_cache(area); 5327 5328 // Note, this is a simplification; the cache could be larger than this area 5329 info->ram_size = cache->page_count * B_PAGE_SIZE; 5330 5331 vm_area_put_locked_cache(cache); 5332 } 5333 5334 5335 static status_t 5336 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5337 { 5338 // is newSize a multiple of B_PAGE_SIZE? 5339 if (newSize & (B_PAGE_SIZE - 1)) 5340 return B_BAD_VALUE; 5341 5342 // lock all affected address spaces and the cache 5343 VMArea* area; 5344 VMCache* cache; 5345 5346 MultiAddressSpaceLocker locker; 5347 AreaCacheLocker cacheLocker; 5348 5349 status_t status; 5350 size_t oldSize; 5351 bool anyKernelArea; 5352 bool restart; 5353 5354 do { 5355 anyKernelArea = false; 5356 restart = false; 5357 5358 locker.Unset(); 5359 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5360 if (status != B_OK) 5361 return status; 5362 cacheLocker.SetTo(cache, true); // already locked 5363 5364 // enforce restrictions 5365 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5366 || (area->protection & B_KERNEL_AREA) != 0)) { 5367 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5368 "resize kernel area %" B_PRId32 " (%s)\n", 5369 team_get_current_team_id(), areaID, area->name); 5370 return B_NOT_ALLOWED; 5371 } 5372 // TODO: Enforce all restrictions (team, etc.)! 5373 5374 oldSize = area->Size(); 5375 if (newSize == oldSize) 5376 return B_OK; 5377 5378 if (cache->type != CACHE_TYPE_RAM) 5379 return B_NOT_ALLOWED; 5380 5381 if (oldSize < newSize) { 5382 // We need to check if all areas of this cache can be resized. 5383 for (VMArea* current = cache->areas; current != NULL; 5384 current = current->cache_next) { 5385 if (!current->address_space->CanResizeArea(current, newSize)) 5386 return B_ERROR; 5387 anyKernelArea 5388 |= current->address_space == VMAddressSpace::Kernel(); 5389 } 5390 } else { 5391 // We're shrinking the areas, so we must make sure the affected 5392 // ranges are not wired. 5393 for (VMArea* current = cache->areas; current != NULL; 5394 current = current->cache_next) { 5395 anyKernelArea 5396 |= current->address_space == VMAddressSpace::Kernel(); 5397 5398 if (wait_if_area_range_is_wired(current, 5399 current->Base() + newSize, oldSize - newSize, &locker, 5400 &cacheLocker)) { 5401 restart = true; 5402 break; 5403 } 5404 } 5405 } 5406 } while (restart); 5407 5408 // Okay, looks good so far, so let's do it 5409 5410 int priority = kernel && anyKernelArea 5411 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5412 uint32 allocationFlags = kernel && anyKernelArea 5413 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5414 5415 if (oldSize < newSize) { 5416 // Growing the cache can fail, so we do it first. 5417 status = cache->Resize(cache->virtual_base + newSize, priority); 5418 if (status != B_OK) 5419 return status; 5420 } 5421 5422 for (VMArea* current = cache->areas; current != NULL; 5423 current = current->cache_next) { 5424 status = current->address_space->ResizeArea(current, newSize, 5425 allocationFlags); 5426 if (status != B_OK) 5427 break; 5428 5429 // We also need to unmap all pages beyond the new size, if the area has 5430 // shrunk 5431 if (newSize < oldSize) { 5432 VMCacheChainLocker cacheChainLocker(cache); 5433 cacheChainLocker.LockAllSourceCaches(); 5434 5435 unmap_pages(current, current->Base() + newSize, 5436 oldSize - newSize); 5437 5438 cacheChainLocker.Unlock(cache); 5439 } 5440 } 5441 5442 if (status == B_OK) { 5443 // Shrink or grow individual page protections if in use. 5444 if (area->page_protections != NULL) { 5445 size_t bytes = area_page_protections_size(newSize); 5446 uint8* newProtections 5447 = (uint8*)realloc(area->page_protections, bytes); 5448 if (newProtections == NULL) 5449 status = B_NO_MEMORY; 5450 else { 5451 area->page_protections = newProtections; 5452 5453 if (oldSize < newSize) { 5454 // init the additional page protections to that of the area 5455 uint32 offset = area_page_protections_size(oldSize); 5456 uint32 areaProtection = area->protection 5457 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5458 memset(area->page_protections + offset, 5459 areaProtection | (areaProtection << 4), bytes - offset); 5460 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5461 uint8& entry = area->page_protections[offset - 1]; 5462 entry = (entry & 0x0f) | (areaProtection << 4); 5463 } 5464 } 5465 } 5466 } 5467 } 5468 5469 // shrinking the cache can't fail, so we do it now 5470 if (status == B_OK && newSize < oldSize) 5471 status = cache->Resize(cache->virtual_base + newSize, priority); 5472 5473 if (status != B_OK) { 5474 // Something failed -- resize the areas back to their original size. 5475 // This can fail, too, in which case we're seriously screwed. 5476 for (VMArea* current = cache->areas; current != NULL; 5477 current = current->cache_next) { 5478 if (current->address_space->ResizeArea(current, oldSize, 5479 allocationFlags) != B_OK) { 5480 panic("vm_resize_area(): Failed and not being able to restore " 5481 "original state."); 5482 } 5483 } 5484 5485 cache->Resize(cache->virtual_base + oldSize, priority); 5486 } 5487 5488 // TODO: we must honour the lock restrictions of this area 5489 return status; 5490 } 5491 5492 5493 status_t 5494 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5495 { 5496 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5497 } 5498 5499 5500 status_t 5501 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5502 { 5503 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5504 } 5505 5506 5507 status_t 5508 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5509 bool user) 5510 { 5511 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5512 } 5513 5514 5515 void 5516 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5517 { 5518 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5519 } 5520 5521 5522 /*! Copies a range of memory directly from/to a page that might not be mapped 5523 at the moment. 5524 5525 For \a unsafeMemory the current mapping (if any is ignored). The function 5526 walks through the respective area's cache chain to find the physical page 5527 and copies from/to it directly. 5528 The memory range starting at \a unsafeMemory with a length of \a size bytes 5529 must not cross a page boundary. 5530 5531 \param teamID The team ID identifying the address space \a unsafeMemory is 5532 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5533 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5534 is passed, the address space of the thread returned by 5535 debug_get_debugged_thread() is used. 5536 \param unsafeMemory The start of the unsafe memory range to be copied 5537 from/to. 5538 \param buffer A safely accessible kernel buffer to be copied from/to. 5539 \param size The number of bytes to be copied. 5540 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5541 \a unsafeMemory, the other way around otherwise. 5542 */ 5543 status_t 5544 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5545 size_t size, bool copyToUnsafe) 5546 { 5547 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5548 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5549 return B_BAD_VALUE; 5550 } 5551 5552 // get the address space for the debugged thread 5553 VMAddressSpace* addressSpace; 5554 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5555 addressSpace = VMAddressSpace::Kernel(); 5556 } else if (teamID == B_CURRENT_TEAM) { 5557 Thread* thread = debug_get_debugged_thread(); 5558 if (thread == NULL || thread->team == NULL) 5559 return B_BAD_ADDRESS; 5560 5561 addressSpace = thread->team->address_space; 5562 } else 5563 addressSpace = VMAddressSpace::DebugGet(teamID); 5564 5565 if (addressSpace == NULL) 5566 return B_BAD_ADDRESS; 5567 5568 // get the area 5569 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5570 if (area == NULL) 5571 return B_BAD_ADDRESS; 5572 5573 // search the page 5574 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5575 + area->cache_offset; 5576 VMCache* cache = area->cache; 5577 vm_page* page = NULL; 5578 while (cache != NULL) { 5579 page = cache->DebugLookupPage(cacheOffset); 5580 if (page != NULL) 5581 break; 5582 5583 // Page not found in this cache -- if it is paged out, we must not try 5584 // to get it from lower caches. 5585 if (cache->DebugHasPage(cacheOffset)) 5586 break; 5587 5588 cache = cache->source; 5589 } 5590 5591 if (page == NULL) 5592 return B_UNSUPPORTED; 5593 5594 // copy from/to physical memory 5595 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5596 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5597 5598 if (copyToUnsafe) { 5599 if (page->Cache() != area->cache) 5600 return B_UNSUPPORTED; 5601 5602 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5603 } 5604 5605 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5606 } 5607 5608 5609 /** Validate that a memory range is either fully in kernel space, or fully in 5610 * userspace */ 5611 static inline bool 5612 validate_memory_range(const void* addr, size_t size) 5613 { 5614 addr_t address = (addr_t)addr; 5615 5616 // Check for overflows on all addresses. 5617 if ((address + size) < address) 5618 return false; 5619 5620 // Validate that the address range does not cross the kernel/user boundary. 5621 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5622 } 5623 5624 5625 // #pragma mark - kernel public API 5626 5627 5628 status_t 5629 user_memcpy(void* to, const void* from, size_t size) 5630 { 5631 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5632 return B_BAD_ADDRESS; 5633 5634 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5635 return B_BAD_ADDRESS; 5636 5637 return B_OK; 5638 } 5639 5640 5641 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5642 the string in \a to, NULL-terminating the result. 5643 5644 \param to Pointer to the destination C-string. 5645 \param from Pointer to the source C-string. 5646 \param size Size in bytes of the string buffer pointed to by \a to. 5647 5648 \return strlen(\a from). 5649 */ 5650 ssize_t 5651 user_strlcpy(char* to, const char* from, size_t size) 5652 { 5653 if (to == NULL && size != 0) 5654 return B_BAD_VALUE; 5655 if (from == NULL) 5656 return B_BAD_ADDRESS; 5657 5658 // Protect the source address from overflows. 5659 size_t maxSize = size; 5660 if ((addr_t)from + maxSize < (addr_t)from) 5661 maxSize -= (addr_t)from + maxSize; 5662 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5663 maxSize = USER_TOP - (addr_t)from; 5664 5665 if (!validate_memory_range(to, maxSize)) 5666 return B_BAD_ADDRESS; 5667 5668 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5669 if (result < 0) 5670 return result; 5671 5672 // If we hit the address overflow boundary, fail. 5673 if ((size_t)result >= maxSize && maxSize < size) 5674 return B_BAD_ADDRESS; 5675 5676 return result; 5677 } 5678 5679 5680 status_t 5681 user_memset(void* s, char c, size_t count) 5682 { 5683 if (!validate_memory_range(s, count)) 5684 return B_BAD_ADDRESS; 5685 5686 if (arch_cpu_user_memset(s, c, count) < B_OK) 5687 return B_BAD_ADDRESS; 5688 5689 return B_OK; 5690 } 5691 5692 5693 /*! Wires a single page at the given address. 5694 5695 \param team The team whose address space the address belongs to. Supports 5696 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5697 parameter is ignored. 5698 \param address address The virtual address to wire down. Does not need to 5699 be page aligned. 5700 \param writable If \c true the page shall be writable. 5701 \param info On success the info is filled in, among other things 5702 containing the physical address the given virtual one translates to. 5703 \return \c B_OK, when the page could be wired, another error code otherwise. 5704 */ 5705 status_t 5706 vm_wire_page(team_id team, addr_t address, bool writable, 5707 VMPageWiringInfo* info) 5708 { 5709 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5710 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5711 5712 // compute the page protection that is required 5713 bool isUser = IS_USER_ADDRESS(address); 5714 uint32 requiredProtection = PAGE_PRESENT 5715 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5716 if (writable) 5717 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5718 5719 // get and read lock the address space 5720 VMAddressSpace* addressSpace = NULL; 5721 if (isUser) { 5722 if (team == B_CURRENT_TEAM) 5723 addressSpace = VMAddressSpace::GetCurrent(); 5724 else 5725 addressSpace = VMAddressSpace::Get(team); 5726 } else 5727 addressSpace = VMAddressSpace::GetKernel(); 5728 if (addressSpace == NULL) 5729 return B_ERROR; 5730 5731 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5732 5733 VMTranslationMap* map = addressSpace->TranslationMap(); 5734 status_t error = B_OK; 5735 5736 // get the area 5737 VMArea* area = addressSpace->LookupArea(pageAddress); 5738 if (area == NULL) { 5739 addressSpace->Put(); 5740 return B_BAD_ADDRESS; 5741 } 5742 5743 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5744 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5745 5746 // mark the area range wired 5747 area->Wire(&info->range); 5748 5749 // Lock the area's cache chain and the translation map. Needed to look 5750 // up the page and play with its wired count. 5751 cacheChainLocker.LockAllSourceCaches(); 5752 map->Lock(); 5753 5754 phys_addr_t physicalAddress; 5755 uint32 flags; 5756 vm_page* page; 5757 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5758 && (flags & requiredProtection) == requiredProtection 5759 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5760 != NULL) { 5761 // Already mapped with the correct permissions -- just increment 5762 // the page's wired count. 5763 increment_page_wired_count(page); 5764 5765 map->Unlock(); 5766 cacheChainLocker.Unlock(); 5767 addressSpaceLocker.Unlock(); 5768 } else { 5769 // Let vm_soft_fault() map the page for us, if possible. We need 5770 // to fully unlock to avoid deadlocks. Since we have already 5771 // wired the area itself, nothing disturbing will happen with it 5772 // in the meantime. 5773 map->Unlock(); 5774 cacheChainLocker.Unlock(); 5775 addressSpaceLocker.Unlock(); 5776 5777 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5778 isUser, &page); 5779 5780 if (error != B_OK) { 5781 // The page could not be mapped -- clean up. 5782 VMCache* cache = vm_area_get_locked_cache(area); 5783 area->Unwire(&info->range); 5784 cache->ReleaseRefAndUnlock(); 5785 addressSpace->Put(); 5786 return error; 5787 } 5788 } 5789 5790 info->physicalAddress 5791 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5792 + address % B_PAGE_SIZE; 5793 info->page = page; 5794 5795 return B_OK; 5796 } 5797 5798 5799 /*! Unwires a single page previously wired via vm_wire_page(). 5800 5801 \param info The same object passed to vm_wire_page() before. 5802 */ 5803 void 5804 vm_unwire_page(VMPageWiringInfo* info) 5805 { 5806 // lock the address space 5807 VMArea* area = info->range.area; 5808 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5809 // takes over our reference 5810 5811 // lock the top cache 5812 VMCache* cache = vm_area_get_locked_cache(area); 5813 VMCacheChainLocker cacheChainLocker(cache); 5814 5815 if (info->page->Cache() != cache) { 5816 // The page is not in the top cache, so we lock the whole cache chain 5817 // before touching the page's wired count. 5818 cacheChainLocker.LockAllSourceCaches(); 5819 } 5820 5821 decrement_page_wired_count(info->page); 5822 5823 // remove the wired range from the range 5824 area->Unwire(&info->range); 5825 5826 cacheChainLocker.Unlock(); 5827 } 5828 5829 5830 /*! Wires down the given address range in the specified team's address space. 5831 5832 If successful the function 5833 - acquires a reference to the specified team's address space, 5834 - adds respective wired ranges to all areas that intersect with the given 5835 address range, 5836 - makes sure all pages in the given address range are mapped with the 5837 requested access permissions and increments their wired count. 5838 5839 It fails, when \a team doesn't specify a valid address space, when any part 5840 of the specified address range is not covered by areas, when the concerned 5841 areas don't allow mapping with the requested permissions, or when mapping 5842 failed for another reason. 5843 5844 When successful the call must be balanced by a unlock_memory_etc() call with 5845 the exact same parameters. 5846 5847 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5848 supported. 5849 \param address The start of the address range to be wired. 5850 \param numBytes The size of the address range to be wired. 5851 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5852 requests that the range must be wired writable ("read from device 5853 into memory"). 5854 \return \c B_OK on success, another error code otherwise. 5855 */ 5856 status_t 5857 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5858 { 5859 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5860 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5861 5862 // compute the page protection that is required 5863 bool isUser = IS_USER_ADDRESS(address); 5864 bool writable = (flags & B_READ_DEVICE) == 0; 5865 uint32 requiredProtection = PAGE_PRESENT 5866 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5867 if (writable) 5868 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5869 5870 uint32 mallocFlags = isUser 5871 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5872 5873 // get and read lock the address space 5874 VMAddressSpace* addressSpace = NULL; 5875 if (isUser) { 5876 if (team == B_CURRENT_TEAM) 5877 addressSpace = VMAddressSpace::GetCurrent(); 5878 else 5879 addressSpace = VMAddressSpace::Get(team); 5880 } else 5881 addressSpace = VMAddressSpace::GetKernel(); 5882 if (addressSpace == NULL) 5883 return B_ERROR; 5884 5885 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5886 // We get a new address space reference here. The one we got above will 5887 // be freed by unlock_memory_etc(). 5888 5889 VMTranslationMap* map = addressSpace->TranslationMap(); 5890 status_t error = B_OK; 5891 5892 // iterate through all concerned areas 5893 addr_t nextAddress = lockBaseAddress; 5894 while (nextAddress != lockEndAddress) { 5895 // get the next area 5896 VMArea* area = addressSpace->LookupArea(nextAddress); 5897 if (area == NULL) { 5898 error = B_BAD_ADDRESS; 5899 break; 5900 } 5901 5902 addr_t areaStart = nextAddress; 5903 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5904 5905 // allocate the wired range (do that before locking the cache to avoid 5906 // deadlocks) 5907 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5908 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5909 if (range == NULL) { 5910 error = B_NO_MEMORY; 5911 break; 5912 } 5913 5914 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5915 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5916 5917 // mark the area range wired 5918 area->Wire(range); 5919 5920 // Depending on the area cache type and the wiring, we may not need to 5921 // look at the individual pages. 5922 if (area->cache_type == CACHE_TYPE_NULL 5923 || area->cache_type == CACHE_TYPE_DEVICE 5924 || area->wiring == B_FULL_LOCK 5925 || area->wiring == B_CONTIGUOUS) { 5926 nextAddress = areaEnd; 5927 continue; 5928 } 5929 5930 // Lock the area's cache chain and the translation map. Needed to look 5931 // up pages and play with their wired count. 5932 cacheChainLocker.LockAllSourceCaches(); 5933 map->Lock(); 5934 5935 // iterate through the pages and wire them 5936 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5937 phys_addr_t physicalAddress; 5938 uint32 flags; 5939 5940 vm_page* page; 5941 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5942 && (flags & requiredProtection) == requiredProtection 5943 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5944 != NULL) { 5945 // Already mapped with the correct permissions -- just increment 5946 // the page's wired count. 5947 increment_page_wired_count(page); 5948 } else { 5949 // Let vm_soft_fault() map the page for us, if possible. We need 5950 // to fully unlock to avoid deadlocks. Since we have already 5951 // wired the area itself, nothing disturbing will happen with it 5952 // in the meantime. 5953 map->Unlock(); 5954 cacheChainLocker.Unlock(); 5955 addressSpaceLocker.Unlock(); 5956 5957 error = vm_soft_fault(addressSpace, nextAddress, writable, 5958 false, isUser, &page); 5959 5960 addressSpaceLocker.Lock(); 5961 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5962 cacheChainLocker.LockAllSourceCaches(); 5963 map->Lock(); 5964 } 5965 5966 if (error != B_OK) 5967 break; 5968 } 5969 5970 map->Unlock(); 5971 5972 if (error == B_OK) { 5973 cacheChainLocker.Unlock(); 5974 } else { 5975 // An error occurred, so abort right here. If the current address 5976 // is the first in this area, unwire the area, since we won't get 5977 // to it when reverting what we've done so far. 5978 if (nextAddress == areaStart) { 5979 area->Unwire(range); 5980 cacheChainLocker.Unlock(); 5981 range->~VMAreaWiredRange(); 5982 free_etc(range, mallocFlags); 5983 } else 5984 cacheChainLocker.Unlock(); 5985 5986 break; 5987 } 5988 } 5989 5990 if (error != B_OK) { 5991 // An error occurred, so unwire all that we've already wired. Note that 5992 // even if not a single page was wired, unlock_memory_etc() is called 5993 // to put the address space reference. 5994 addressSpaceLocker.Unlock(); 5995 unlock_memory_etc(team, (void*)lockBaseAddress, 5996 nextAddress - lockBaseAddress, flags); 5997 } 5998 5999 return error; 6000 } 6001 6002 6003 status_t 6004 lock_memory(void* address, size_t numBytes, uint32 flags) 6005 { 6006 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 6007 } 6008 6009 6010 /*! Unwires an address range previously wired with lock_memory_etc(). 6011 6012 Note that a call to this function must balance a previous lock_memory_etc() 6013 call with exactly the same parameters. 6014 */ 6015 status_t 6016 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 6017 { 6018 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 6019 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 6020 6021 // compute the page protection that is required 6022 bool isUser = IS_USER_ADDRESS(address); 6023 bool writable = (flags & B_READ_DEVICE) == 0; 6024 uint32 requiredProtection = PAGE_PRESENT 6025 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 6026 if (writable) 6027 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 6028 6029 uint32 mallocFlags = isUser 6030 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 6031 6032 // get and read lock the address space 6033 VMAddressSpace* addressSpace = NULL; 6034 if (isUser) { 6035 if (team == B_CURRENT_TEAM) 6036 addressSpace = VMAddressSpace::GetCurrent(); 6037 else 6038 addressSpace = VMAddressSpace::Get(team); 6039 } else 6040 addressSpace = VMAddressSpace::GetKernel(); 6041 if (addressSpace == NULL) 6042 return B_ERROR; 6043 6044 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 6045 // Take over the address space reference. We don't unlock until we're 6046 // done. 6047 6048 VMTranslationMap* map = addressSpace->TranslationMap(); 6049 status_t error = B_OK; 6050 6051 // iterate through all concerned areas 6052 addr_t nextAddress = lockBaseAddress; 6053 while (nextAddress != lockEndAddress) { 6054 // get the next area 6055 VMArea* area = addressSpace->LookupArea(nextAddress); 6056 if (area == NULL) { 6057 error = B_BAD_ADDRESS; 6058 break; 6059 } 6060 6061 addr_t areaStart = nextAddress; 6062 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 6063 6064 // Lock the area's top cache. This is a requirement for 6065 // VMArea::Unwire(). 6066 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6067 6068 // Depending on the area cache type and the wiring, we may not need to 6069 // look at the individual pages. 6070 if (area->cache_type == CACHE_TYPE_NULL 6071 || area->cache_type == CACHE_TYPE_DEVICE 6072 || area->wiring == B_FULL_LOCK 6073 || area->wiring == B_CONTIGUOUS) { 6074 // unwire the range (to avoid deadlocks we delete the range after 6075 // unlocking the cache) 6076 nextAddress = areaEnd; 6077 VMAreaWiredRange* range = area->Unwire(areaStart, 6078 areaEnd - areaStart, writable); 6079 cacheChainLocker.Unlock(); 6080 if (range != NULL) { 6081 range->~VMAreaWiredRange(); 6082 free_etc(range, mallocFlags); 6083 } 6084 continue; 6085 } 6086 6087 // Lock the area's cache chain and the translation map. Needed to look 6088 // up pages and play with their wired count. 6089 cacheChainLocker.LockAllSourceCaches(); 6090 map->Lock(); 6091 6092 // iterate through the pages and unwire them 6093 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 6094 phys_addr_t physicalAddress; 6095 uint32 flags; 6096 6097 vm_page* page; 6098 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 6099 && (flags & PAGE_PRESENT) != 0 6100 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 6101 != NULL) { 6102 // Already mapped with the correct permissions -- just increment 6103 // the page's wired count. 6104 decrement_page_wired_count(page); 6105 } else { 6106 panic("unlock_memory_etc(): Failed to unwire page: address " 6107 "space %p, address: %#" B_PRIxADDR, addressSpace, 6108 nextAddress); 6109 error = B_BAD_VALUE; 6110 break; 6111 } 6112 } 6113 6114 map->Unlock(); 6115 6116 // All pages are unwired. Remove the area's wired range as well (to 6117 // avoid deadlocks we delete the range after unlocking the cache). 6118 VMAreaWiredRange* range = area->Unwire(areaStart, 6119 areaEnd - areaStart, writable); 6120 6121 cacheChainLocker.Unlock(); 6122 6123 if (range != NULL) { 6124 range->~VMAreaWiredRange(); 6125 free_etc(range, mallocFlags); 6126 } 6127 6128 if (error != B_OK) 6129 break; 6130 } 6131 6132 // get rid of the address space reference lock_memory_etc() acquired 6133 addressSpace->Put(); 6134 6135 return error; 6136 } 6137 6138 6139 status_t 6140 unlock_memory(void* address, size_t numBytes, uint32 flags) 6141 { 6142 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 6143 } 6144 6145 6146 /*! Similar to get_memory_map(), but also allows to specify the address space 6147 for the memory in question and has a saner semantics. 6148 Returns \c B_OK when the complete range could be translated or 6149 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 6150 case the actual number of entries is written to \c *_numEntries. Any other 6151 error case indicates complete failure; \c *_numEntries will be set to \c 0 6152 in this case. 6153 */ 6154 status_t 6155 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 6156 physical_entry* table, uint32* _numEntries) 6157 { 6158 uint32 numEntries = *_numEntries; 6159 *_numEntries = 0; 6160 6161 VMAddressSpace* addressSpace; 6162 addr_t virtualAddress = (addr_t)address; 6163 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 6164 phys_addr_t physicalAddress; 6165 status_t status = B_OK; 6166 int32 index = -1; 6167 addr_t offset = 0; 6168 bool interrupts = are_interrupts_enabled(); 6169 6170 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6171 "entries)\n", team, address, numBytes, numEntries)); 6172 6173 if (numEntries == 0 || numBytes == 0) 6174 return B_BAD_VALUE; 6175 6176 // in which address space is the address to be found? 6177 if (IS_USER_ADDRESS(virtualAddress)) { 6178 if (team == B_CURRENT_TEAM) 6179 addressSpace = VMAddressSpace::GetCurrent(); 6180 else 6181 addressSpace = VMAddressSpace::Get(team); 6182 } else 6183 addressSpace = VMAddressSpace::GetKernel(); 6184 6185 if (addressSpace == NULL) 6186 return B_ERROR; 6187 6188 VMTranslationMap* map = addressSpace->TranslationMap(); 6189 6190 if (interrupts) 6191 map->Lock(); 6192 6193 while (offset < numBytes) { 6194 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6195 uint32 flags; 6196 6197 if (interrupts) { 6198 status = map->Query((addr_t)address + offset, &physicalAddress, 6199 &flags); 6200 } else { 6201 status = map->QueryInterrupt((addr_t)address + offset, 6202 &physicalAddress, &flags); 6203 } 6204 if (status < B_OK) 6205 break; 6206 if ((flags & PAGE_PRESENT) == 0) { 6207 panic("get_memory_map() called on unmapped memory!"); 6208 return B_BAD_ADDRESS; 6209 } 6210 6211 if (index < 0 && pageOffset > 0) { 6212 physicalAddress += pageOffset; 6213 if (bytes > B_PAGE_SIZE - pageOffset) 6214 bytes = B_PAGE_SIZE - pageOffset; 6215 } 6216 6217 // need to switch to the next physical_entry? 6218 if (index < 0 || table[index].address 6219 != physicalAddress - table[index].size) { 6220 if ((uint32)++index + 1 > numEntries) { 6221 // table to small 6222 break; 6223 } 6224 table[index].address = physicalAddress; 6225 table[index].size = bytes; 6226 } else { 6227 // page does fit in current entry 6228 table[index].size += bytes; 6229 } 6230 6231 offset += bytes; 6232 } 6233 6234 if (interrupts) 6235 map->Unlock(); 6236 6237 if (status != B_OK) 6238 return status; 6239 6240 if ((uint32)index + 1 > numEntries) { 6241 *_numEntries = index; 6242 return B_BUFFER_OVERFLOW; 6243 } 6244 6245 *_numEntries = index + 1; 6246 return B_OK; 6247 } 6248 6249 6250 /*! According to the BeBook, this function should always succeed. 6251 This is no longer the case. 6252 */ 6253 extern "C" int32 6254 __get_memory_map_haiku(const void* address, size_t numBytes, 6255 physical_entry* table, int32 numEntries) 6256 { 6257 uint32 entriesRead = numEntries; 6258 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6259 table, &entriesRead); 6260 if (error != B_OK) 6261 return error; 6262 6263 // close the entry list 6264 6265 // if it's only one entry, we will silently accept the missing ending 6266 if (numEntries == 1) 6267 return B_OK; 6268 6269 if (entriesRead + 1 > (uint32)numEntries) 6270 return B_BUFFER_OVERFLOW; 6271 6272 table[entriesRead].address = 0; 6273 table[entriesRead].size = 0; 6274 6275 return B_OK; 6276 } 6277 6278 6279 area_id 6280 area_for(void* address) 6281 { 6282 return vm_area_for((addr_t)address, true); 6283 } 6284 6285 6286 area_id 6287 find_area(const char* name) 6288 { 6289 return VMAreas::Find(name); 6290 } 6291 6292 6293 status_t 6294 _get_area_info(area_id id, area_info* info, size_t size) 6295 { 6296 if (size != sizeof(area_info) || info == NULL) 6297 return B_BAD_VALUE; 6298 6299 AddressSpaceReadLocker locker; 6300 VMArea* area; 6301 status_t status = locker.SetFromArea(id, area); 6302 if (status != B_OK) 6303 return status; 6304 6305 fill_area_info(area, info, size); 6306 return B_OK; 6307 } 6308 6309 6310 status_t 6311 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6312 { 6313 addr_t nextBase = *(addr_t*)cookie; 6314 6315 // we're already through the list 6316 if (nextBase == (addr_t)-1) 6317 return B_ENTRY_NOT_FOUND; 6318 6319 if (team == B_CURRENT_TEAM) 6320 team = team_get_current_team_id(); 6321 6322 AddressSpaceReadLocker locker(team); 6323 if (!locker.IsLocked()) 6324 return B_BAD_TEAM_ID; 6325 6326 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6327 if (area == NULL) { 6328 nextBase = (addr_t)-1; 6329 return B_ENTRY_NOT_FOUND; 6330 } 6331 6332 fill_area_info(area, info, size); 6333 *cookie = (ssize_t)(area->Base() + 1); 6334 6335 return B_OK; 6336 } 6337 6338 6339 status_t 6340 set_area_protection(area_id area, uint32 newProtection) 6341 { 6342 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6343 newProtection, true); 6344 } 6345 6346 6347 status_t 6348 resize_area(area_id areaID, size_t newSize) 6349 { 6350 return vm_resize_area(areaID, newSize, true); 6351 } 6352 6353 6354 /*! Transfers the specified area to a new team. The caller must be the owner 6355 of the area. 6356 */ 6357 area_id 6358 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6359 bool kernel) 6360 { 6361 area_info info; 6362 status_t status = get_area_info(id, &info); 6363 if (status != B_OK) 6364 return status; 6365 6366 if (info.team != thread_get_current_thread()->team->id) 6367 return B_PERMISSION_DENIED; 6368 6369 // We need to mark the area cloneable so the following operations work. 6370 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6371 if (status != B_OK) 6372 return status; 6373 6374 area_id clonedArea = vm_clone_area(target, info.name, _address, 6375 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6376 if (clonedArea < 0) 6377 return clonedArea; 6378 6379 status = vm_delete_area(info.team, id, kernel); 6380 if (status != B_OK) { 6381 vm_delete_area(target, clonedArea, kernel); 6382 return status; 6383 } 6384 6385 // Now we can reset the protection to whatever it was before. 6386 set_area_protection(clonedArea, info.protection); 6387 6388 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6389 6390 return clonedArea; 6391 } 6392 6393 6394 extern "C" area_id 6395 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6396 size_t numBytes, uint32 addressSpec, uint32 protection, 6397 void** _virtualAddress) 6398 { 6399 if (!arch_vm_supports_protection(protection)) 6400 return B_NOT_SUPPORTED; 6401 6402 fix_protection(&protection); 6403 6404 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6405 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6406 false); 6407 } 6408 6409 6410 area_id 6411 clone_area(const char* name, void** _address, uint32 addressSpec, 6412 uint32 protection, area_id source) 6413 { 6414 if ((protection & B_KERNEL_PROTECTION) == 0) 6415 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6416 6417 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6418 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6419 } 6420 6421 6422 area_id 6423 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6424 uint32 protection, uint32 flags, uint32 guardSize, 6425 const virtual_address_restrictions* virtualAddressRestrictions, 6426 const physical_address_restrictions* physicalAddressRestrictions, 6427 void** _address) 6428 { 6429 fix_protection(&protection); 6430 6431 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6432 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6433 true, _address); 6434 } 6435 6436 6437 extern "C" area_id 6438 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6439 size_t size, uint32 lock, uint32 protection) 6440 { 6441 fix_protection(&protection); 6442 6443 virtual_address_restrictions virtualRestrictions = {}; 6444 virtualRestrictions.address = *_address; 6445 virtualRestrictions.address_specification = addressSpec; 6446 physical_address_restrictions physicalRestrictions = {}; 6447 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6448 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6449 true, _address); 6450 } 6451 6452 6453 status_t 6454 delete_area(area_id area) 6455 { 6456 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6457 } 6458 6459 6460 // #pragma mark - Userland syscalls 6461 6462 6463 status_t 6464 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6465 addr_t size) 6466 { 6467 // filter out some unavailable values (for userland) 6468 switch (addressSpec) { 6469 case B_ANY_KERNEL_ADDRESS: 6470 case B_ANY_KERNEL_BLOCK_ADDRESS: 6471 return B_BAD_VALUE; 6472 } 6473 6474 addr_t address; 6475 6476 if (!IS_USER_ADDRESS(userAddress) 6477 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6478 return B_BAD_ADDRESS; 6479 6480 status_t status = vm_reserve_address_range( 6481 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6482 RESERVED_AVOID_BASE); 6483 if (status != B_OK) 6484 return status; 6485 6486 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6487 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6488 (void*)address, size); 6489 return B_BAD_ADDRESS; 6490 } 6491 6492 return B_OK; 6493 } 6494 6495 6496 status_t 6497 _user_unreserve_address_range(addr_t address, addr_t size) 6498 { 6499 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6500 (void*)address, size); 6501 } 6502 6503 6504 area_id 6505 _user_area_for(void* address) 6506 { 6507 return vm_area_for((addr_t)address, false); 6508 } 6509 6510 6511 area_id 6512 _user_find_area(const char* userName) 6513 { 6514 char name[B_OS_NAME_LENGTH]; 6515 6516 if (!IS_USER_ADDRESS(userName) 6517 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6518 return B_BAD_ADDRESS; 6519 6520 return find_area(name); 6521 } 6522 6523 6524 status_t 6525 _user_get_area_info(area_id area, area_info* userInfo) 6526 { 6527 if (!IS_USER_ADDRESS(userInfo)) 6528 return B_BAD_ADDRESS; 6529 6530 area_info info; 6531 status_t status = get_area_info(area, &info); 6532 if (status < B_OK) 6533 return status; 6534 6535 // TODO: do we want to prevent userland from seeing kernel protections? 6536 //info.protection &= B_USER_PROTECTION; 6537 6538 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6539 return B_BAD_ADDRESS; 6540 6541 return status; 6542 } 6543 6544 6545 status_t 6546 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6547 { 6548 ssize_t cookie; 6549 6550 if (!IS_USER_ADDRESS(userCookie) 6551 || !IS_USER_ADDRESS(userInfo) 6552 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6553 return B_BAD_ADDRESS; 6554 6555 area_info info; 6556 status_t status = _get_next_area_info(team, &cookie, &info, 6557 sizeof(area_info)); 6558 if (status != B_OK) 6559 return status; 6560 6561 //info.protection &= B_USER_PROTECTION; 6562 6563 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6564 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6565 return B_BAD_ADDRESS; 6566 6567 return status; 6568 } 6569 6570 6571 status_t 6572 _user_set_area_protection(area_id area, uint32 newProtection) 6573 { 6574 if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0) 6575 return B_BAD_VALUE; 6576 6577 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6578 newProtection, false); 6579 } 6580 6581 6582 status_t 6583 _user_resize_area(area_id area, size_t newSize) 6584 { 6585 // TODO: Since we restrict deleting of areas to those owned by the team, 6586 // we should also do that for resizing (check other functions, too). 6587 return vm_resize_area(area, newSize, false); 6588 } 6589 6590 6591 area_id 6592 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6593 team_id target) 6594 { 6595 // filter out some unavailable values (for userland) 6596 switch (addressSpec) { 6597 case B_ANY_KERNEL_ADDRESS: 6598 case B_ANY_KERNEL_BLOCK_ADDRESS: 6599 return B_BAD_VALUE; 6600 } 6601 6602 void* address; 6603 if (!IS_USER_ADDRESS(userAddress) 6604 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6605 return B_BAD_ADDRESS; 6606 6607 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6608 if (newArea < B_OK) 6609 return newArea; 6610 6611 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6612 return B_BAD_ADDRESS; 6613 6614 return newArea; 6615 } 6616 6617 6618 area_id 6619 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6620 uint32 protection, area_id sourceArea) 6621 { 6622 char name[B_OS_NAME_LENGTH]; 6623 void* address; 6624 6625 // filter out some unavailable values (for userland) 6626 switch (addressSpec) { 6627 case B_ANY_KERNEL_ADDRESS: 6628 case B_ANY_KERNEL_BLOCK_ADDRESS: 6629 return B_BAD_VALUE; 6630 } 6631 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6632 return B_BAD_VALUE; 6633 6634 if (!IS_USER_ADDRESS(userName) 6635 || !IS_USER_ADDRESS(userAddress) 6636 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6637 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6638 return B_BAD_ADDRESS; 6639 6640 fix_protection(&protection); 6641 6642 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6643 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6644 false); 6645 if (clonedArea < B_OK) 6646 return clonedArea; 6647 6648 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6649 delete_area(clonedArea); 6650 return B_BAD_ADDRESS; 6651 } 6652 6653 return clonedArea; 6654 } 6655 6656 6657 area_id 6658 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6659 size_t size, uint32 lock, uint32 protection) 6660 { 6661 char name[B_OS_NAME_LENGTH]; 6662 void* address; 6663 6664 // filter out some unavailable values (for userland) 6665 switch (addressSpec) { 6666 case B_ANY_KERNEL_ADDRESS: 6667 case B_ANY_KERNEL_BLOCK_ADDRESS: 6668 return B_BAD_VALUE; 6669 } 6670 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6671 return B_BAD_VALUE; 6672 6673 if (!IS_USER_ADDRESS(userName) 6674 || !IS_USER_ADDRESS(userAddress) 6675 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6676 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6677 return B_BAD_ADDRESS; 6678 6679 if (addressSpec == B_EXACT_ADDRESS 6680 && IS_KERNEL_ADDRESS(address)) 6681 return B_BAD_VALUE; 6682 6683 if (addressSpec == B_ANY_ADDRESS) 6684 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6685 if (addressSpec == B_BASE_ADDRESS) 6686 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6687 6688 fix_protection(&protection); 6689 6690 virtual_address_restrictions virtualRestrictions = {}; 6691 virtualRestrictions.address = address; 6692 virtualRestrictions.address_specification = addressSpec; 6693 physical_address_restrictions physicalRestrictions = {}; 6694 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6695 size, lock, protection, 0, 0, &virtualRestrictions, 6696 &physicalRestrictions, false, &address); 6697 6698 if (area >= B_OK 6699 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6700 delete_area(area); 6701 return B_BAD_ADDRESS; 6702 } 6703 6704 return area; 6705 } 6706 6707 6708 status_t 6709 _user_delete_area(area_id area) 6710 { 6711 // Unlike the BeOS implementation, you can now only delete areas 6712 // that you have created yourself from userland. 6713 // The documentation to delete_area() explicitly states that this 6714 // will be restricted in the future, and so it will. 6715 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6716 } 6717 6718 6719 // TODO: create a BeOS style call for this! 6720 6721 area_id 6722 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6723 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6724 int fd, off_t offset) 6725 { 6726 char name[B_OS_NAME_LENGTH]; 6727 void* address; 6728 area_id area; 6729 6730 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6731 return B_BAD_VALUE; 6732 6733 fix_protection(&protection); 6734 6735 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6736 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6737 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6738 return B_BAD_ADDRESS; 6739 6740 if (addressSpec == B_EXACT_ADDRESS) { 6741 if ((addr_t)address + size < (addr_t)address 6742 || (addr_t)address % B_PAGE_SIZE != 0) { 6743 return B_BAD_VALUE; 6744 } 6745 if (!IS_USER_ADDRESS(address) 6746 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6747 return B_BAD_ADDRESS; 6748 } 6749 } 6750 6751 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6752 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6753 false); 6754 if (area < B_OK) 6755 return area; 6756 6757 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6758 return B_BAD_ADDRESS; 6759 6760 return area; 6761 } 6762 6763 6764 status_t 6765 _user_unmap_memory(void* _address, size_t size) 6766 { 6767 addr_t address = (addr_t)_address; 6768 6769 // check params 6770 if (size == 0 || (addr_t)address + size < (addr_t)address 6771 || (addr_t)address % B_PAGE_SIZE != 0) { 6772 return B_BAD_VALUE; 6773 } 6774 6775 if (!IS_USER_ADDRESS(address) 6776 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6777 return B_BAD_ADDRESS; 6778 } 6779 6780 // Write lock the address space and ensure the address range is not wired. 6781 AddressSpaceWriteLocker locker; 6782 do { 6783 status_t status = locker.SetTo(team_get_current_team_id()); 6784 if (status != B_OK) 6785 return status; 6786 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6787 size, &locker)); 6788 6789 // unmap 6790 return unmap_address_range(locker.AddressSpace(), address, size, false); 6791 } 6792 6793 6794 status_t 6795 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6796 { 6797 // check address range 6798 addr_t address = (addr_t)_address; 6799 size = PAGE_ALIGN(size); 6800 6801 if ((address % B_PAGE_SIZE) != 0) 6802 return B_BAD_VALUE; 6803 if (!is_user_address_range(_address, size)) { 6804 // weird error code required by POSIX 6805 return ENOMEM; 6806 } 6807 6808 // extend and check protection 6809 if ((protection & ~B_USER_PROTECTION) != 0) 6810 return B_BAD_VALUE; 6811 6812 fix_protection(&protection); 6813 6814 // We need to write lock the address space, since we're going to play with 6815 // the areas. Also make sure that none of the areas is wired and that we're 6816 // actually allowed to change the protection. 6817 AddressSpaceWriteLocker locker; 6818 6819 bool restart; 6820 do { 6821 restart = false; 6822 6823 status_t status = locker.SetTo(team_get_current_team_id()); 6824 if (status != B_OK) 6825 return status; 6826 6827 // First round: Check whether the whole range is covered by areas and we 6828 // are allowed to modify them. 6829 addr_t currentAddress = address; 6830 size_t sizeLeft = size; 6831 while (sizeLeft > 0) { 6832 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6833 if (area == NULL) 6834 return B_NO_MEMORY; 6835 6836 if ((area->protection & B_KERNEL_AREA) != 0) 6837 return B_NOT_ALLOWED; 6838 if (area->protection_max != 0 6839 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) { 6840 return B_NOT_ALLOWED; 6841 } 6842 6843 addr_t offset = currentAddress - area->Base(); 6844 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6845 6846 AreaCacheLocker cacheLocker(area); 6847 6848 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6849 &locker, &cacheLocker)) { 6850 restart = true; 6851 break; 6852 } 6853 6854 cacheLocker.Unlock(); 6855 6856 currentAddress += rangeSize; 6857 sizeLeft -= rangeSize; 6858 } 6859 } while (restart); 6860 6861 // Second round: If the protections differ from that of the area, create a 6862 // page protection array and re-map mapped pages. 6863 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6864 addr_t currentAddress = address; 6865 size_t sizeLeft = size; 6866 while (sizeLeft > 0) { 6867 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6868 if (area == NULL) 6869 return B_NO_MEMORY; 6870 6871 addr_t offset = currentAddress - area->Base(); 6872 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6873 6874 currentAddress += rangeSize; 6875 sizeLeft -= rangeSize; 6876 6877 if (area->page_protections == NULL) { 6878 if (area->protection == protection) 6879 continue; 6880 if (offset == 0 && rangeSize == area->Size()) { 6881 // The whole area is covered: let set_area_protection handle it. 6882 status_t status = vm_set_area_protection(area->address_space->ID(), 6883 area->id, protection, false); 6884 if (status != B_OK) 6885 return status; 6886 continue; 6887 } 6888 6889 status_t status = allocate_area_page_protections(area); 6890 if (status != B_OK) 6891 return status; 6892 } 6893 6894 // We need to lock the complete cache chain, since we potentially unmap 6895 // pages of lower caches. 6896 VMCache* topCache = vm_area_get_locked_cache(area); 6897 VMCacheChainLocker cacheChainLocker(topCache); 6898 cacheChainLocker.LockAllSourceCaches(); 6899 6900 // Adjust the committed size, if necessary. 6901 if (topCache->source != NULL && topCache->temporary) { 6902 const bool becomesWritable = (protection & B_WRITE_AREA) != 0; 6903 ssize_t commitmentChange = 0; 6904 for (addr_t pageAddress = area->Base() + offset; 6905 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6906 if (topCache->LookupPage(pageAddress) != NULL) { 6907 // This page should already be accounted for in the commitment. 6908 continue; 6909 } 6910 6911 const bool isWritable 6912 = (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0; 6913 6914 if (becomesWritable && !isWritable) 6915 commitmentChange += B_PAGE_SIZE; 6916 else if (!becomesWritable && isWritable) 6917 commitmentChange -= B_PAGE_SIZE; 6918 } 6919 6920 if (commitmentChange != 0) { 6921 const off_t newCommitment = topCache->committed_size + commitmentChange; 6922 ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base)); 6923 status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER); 6924 if (status != B_OK) 6925 return status; 6926 } 6927 } 6928 6929 for (addr_t pageAddress = area->Base() + offset; 6930 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6931 map->Lock(); 6932 6933 set_area_page_protection(area, pageAddress, protection); 6934 6935 phys_addr_t physicalAddress; 6936 uint32 flags; 6937 6938 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6939 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6940 map->Unlock(); 6941 continue; 6942 } 6943 6944 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6945 if (page == NULL) { 6946 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6947 "\n", area, physicalAddress); 6948 map->Unlock(); 6949 return B_ERROR; 6950 } 6951 6952 // If the page is not in the topmost cache and write access is 6953 // requested, we have to unmap it. Otherwise we can re-map it with 6954 // the new protection. 6955 bool unmapPage = page->Cache() != topCache 6956 && (protection & B_WRITE_AREA) != 0; 6957 6958 if (!unmapPage) 6959 map->ProtectPage(area, pageAddress, protection); 6960 6961 map->Unlock(); 6962 6963 if (unmapPage) { 6964 DEBUG_PAGE_ACCESS_START(page); 6965 unmap_page(area, pageAddress); 6966 DEBUG_PAGE_ACCESS_END(page); 6967 } 6968 } 6969 } 6970 6971 return B_OK; 6972 } 6973 6974 6975 status_t 6976 _user_sync_memory(void* _address, size_t size, uint32 flags) 6977 { 6978 addr_t address = (addr_t)_address; 6979 size = PAGE_ALIGN(size); 6980 6981 // check params 6982 if ((address % B_PAGE_SIZE) != 0) 6983 return B_BAD_VALUE; 6984 if (!is_user_address_range(_address, size)) { 6985 // weird error code required by POSIX 6986 return ENOMEM; 6987 } 6988 6989 bool writeSync = (flags & MS_SYNC) != 0; 6990 bool writeAsync = (flags & MS_ASYNC) != 0; 6991 if (writeSync && writeAsync) 6992 return B_BAD_VALUE; 6993 6994 if (size == 0 || (!writeSync && !writeAsync)) 6995 return B_OK; 6996 6997 // iterate through the range and sync all concerned areas 6998 while (size > 0) { 6999 // read lock the address space 7000 AddressSpaceReadLocker locker; 7001 status_t error = locker.SetTo(team_get_current_team_id()); 7002 if (error != B_OK) 7003 return error; 7004 7005 // get the first area 7006 VMArea* area = locker.AddressSpace()->LookupArea(address); 7007 if (area == NULL) 7008 return B_NO_MEMORY; 7009 7010 uint32 offset = address - area->Base(); 7011 size_t rangeSize = min_c(area->Size() - offset, size); 7012 offset += area->cache_offset; 7013 7014 // lock the cache 7015 AreaCacheLocker cacheLocker(area); 7016 if (!cacheLocker) 7017 return B_BAD_VALUE; 7018 VMCache* cache = area->cache; 7019 7020 locker.Unlock(); 7021 7022 uint32 firstPage = offset >> PAGE_SHIFT; 7023 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 7024 7025 // write the pages 7026 if (cache->type == CACHE_TYPE_VNODE) { 7027 if (writeSync) { 7028 // synchronous 7029 error = vm_page_write_modified_page_range(cache, firstPage, 7030 endPage); 7031 if (error != B_OK) 7032 return error; 7033 } else { 7034 // asynchronous 7035 vm_page_schedule_write_page_range(cache, firstPage, endPage); 7036 // TODO: This is probably not quite what is supposed to happen. 7037 // Especially when a lot has to be written, it might take ages 7038 // until it really hits the disk. 7039 } 7040 } 7041 7042 address += rangeSize; 7043 size -= rangeSize; 7044 } 7045 7046 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 7047 // synchronize multiple mappings of the same file. In our VM they never get 7048 // out of sync, though, so we don't have to do anything. 7049 7050 return B_OK; 7051 } 7052 7053 7054 status_t 7055 _user_memory_advice(void* _address, size_t size, uint32 advice) 7056 { 7057 addr_t address = (addr_t)_address; 7058 if ((address % B_PAGE_SIZE) != 0) 7059 return B_BAD_VALUE; 7060 7061 size = PAGE_ALIGN(size); 7062 if (!is_user_address_range(_address, size)) { 7063 // weird error code required by POSIX 7064 return B_NO_MEMORY; 7065 } 7066 7067 switch (advice) { 7068 case MADV_NORMAL: 7069 case MADV_SEQUENTIAL: 7070 case MADV_RANDOM: 7071 case MADV_WILLNEED: 7072 case MADV_DONTNEED: 7073 // TODO: Implement! 7074 break; 7075 7076 case MADV_FREE: 7077 { 7078 AddressSpaceWriteLocker locker; 7079 do { 7080 status_t status = locker.SetTo(team_get_current_team_id()); 7081 if (status != B_OK) 7082 return status; 7083 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 7084 address, size, &locker)); 7085 7086 discard_address_range(locker.AddressSpace(), address, size, false); 7087 break; 7088 } 7089 7090 default: 7091 return B_BAD_VALUE; 7092 } 7093 7094 return B_OK; 7095 } 7096 7097 7098 status_t 7099 _user_get_memory_properties(team_id teamID, const void* address, 7100 uint32* _protected, uint32* _lock) 7101 { 7102 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 7103 return B_BAD_ADDRESS; 7104 7105 AddressSpaceReadLocker locker; 7106 status_t error = locker.SetTo(teamID); 7107 if (error != B_OK) 7108 return error; 7109 7110 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 7111 if (area == NULL) 7112 return B_NO_MEMORY; 7113 7114 uint32 protection = get_area_page_protection(area, (addr_t)address); 7115 uint32 wiring = area->wiring; 7116 7117 locker.Unlock(); 7118 7119 error = user_memcpy(_protected, &protection, sizeof(protection)); 7120 if (error != B_OK) 7121 return error; 7122 7123 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 7124 7125 return error; 7126 } 7127 7128 7129 static status_t 7130 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 7131 { 7132 #if ENABLE_SWAP_SUPPORT 7133 // check address range 7134 addr_t address = (addr_t)_address; 7135 size = PAGE_ALIGN(size); 7136 7137 if ((address % B_PAGE_SIZE) != 0) 7138 return EINVAL; 7139 if (!is_user_address_range(_address, size)) 7140 return EINVAL; 7141 7142 const addr_t endAddress = address + size; 7143 7144 AddressSpaceReadLocker addressSpaceLocker; 7145 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 7146 if (error != B_OK) 7147 return error; 7148 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 7149 7150 // iterate through all concerned areas 7151 addr_t nextAddress = address; 7152 while (nextAddress != endAddress) { 7153 // get the next area 7154 VMArea* area = addressSpace->LookupArea(nextAddress); 7155 if (area == NULL) { 7156 error = B_BAD_ADDRESS; 7157 break; 7158 } 7159 7160 const addr_t areaStart = nextAddress; 7161 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 7162 nextAddress = areaEnd; 7163 7164 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 7165 if (error != B_OK) { 7166 // We don't need to unset or reset things on failure. 7167 break; 7168 } 7169 7170 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 7171 VMAnonymousCache* anonCache = NULL; 7172 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 7173 // This memory will aready never be swapped. Nothing to do. 7174 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 7175 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 7176 areaEnd - areaStart, swappable); 7177 } else { 7178 // Some other cache type? We cannot affect anything here. 7179 error = EINVAL; 7180 } 7181 7182 cacheChainLocker.Unlock(); 7183 7184 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 7185 if (error != B_OK) 7186 break; 7187 } 7188 7189 return error; 7190 #else 7191 // No swap support? Nothing to do. 7192 return B_OK; 7193 #endif 7194 } 7195 7196 7197 status_t 7198 _user_mlock(const void* _address, size_t size) 7199 { 7200 return user_set_memory_swappable(_address, size, false); 7201 } 7202 7203 7204 status_t 7205 _user_munlock(const void* _address, size_t size) 7206 { 7207 // TODO: B_SHARED_AREAs need to be handled a bit differently: 7208 // if multiple clones of an area had mlock() called on them, 7209 // munlock() must also be called on all of them to actually unlock. 7210 // (At present, the first munlock() will unlock all.) 7211 // TODO: fork() should automatically unlock memory in the child. 7212 return user_set_memory_swappable(_address, size, true); 7213 } 7214 7215 7216 // #pragma mark -- compatibility 7217 7218 7219 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7220 7221 7222 struct physical_entry_beos { 7223 uint32 address; 7224 uint32 size; 7225 }; 7226 7227 7228 /*! The physical_entry structure has changed. We need to translate it to the 7229 old one. 7230 */ 7231 extern "C" int32 7232 __get_memory_map_beos(const void* _address, size_t numBytes, 7233 physical_entry_beos* table, int32 numEntries) 7234 { 7235 if (numEntries <= 0) 7236 return B_BAD_VALUE; 7237 7238 const uint8* address = (const uint8*)_address; 7239 7240 int32 count = 0; 7241 while (numBytes > 0 && count < numEntries) { 7242 physical_entry entry; 7243 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7244 if (result < 0) { 7245 if (result != B_BUFFER_OVERFLOW) 7246 return result; 7247 } 7248 7249 if (entry.address >= (phys_addr_t)1 << 32) { 7250 panic("get_memory_map(): Address is greater 4 GB!"); 7251 return B_ERROR; 7252 } 7253 7254 table[count].address = entry.address; 7255 table[count++].size = entry.size; 7256 7257 address += entry.size; 7258 numBytes -= entry.size; 7259 } 7260 7261 // null-terminate the table, if possible 7262 if (count < numEntries) { 7263 table[count].address = 0; 7264 table[count].size = 0; 7265 } 7266 7267 return B_OK; 7268 } 7269 7270 7271 /*! The type of the \a physicalAddress parameter has changed from void* to 7272 phys_addr_t. 7273 */ 7274 extern "C" area_id 7275 __map_physical_memory_beos(const char* name, void* physicalAddress, 7276 size_t numBytes, uint32 addressSpec, uint32 protection, 7277 void** _virtualAddress) 7278 { 7279 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7280 addressSpec, protection, _virtualAddress); 7281 } 7282 7283 7284 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7285 we meddle with the \a lock parameter to force 32 bit. 7286 */ 7287 extern "C" area_id 7288 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7289 size_t size, uint32 lock, uint32 protection) 7290 { 7291 switch (lock) { 7292 case B_NO_LOCK: 7293 break; 7294 case B_FULL_LOCK: 7295 case B_LAZY_LOCK: 7296 lock = B_32_BIT_FULL_LOCK; 7297 break; 7298 case B_CONTIGUOUS: 7299 lock = B_32_BIT_CONTIGUOUS; 7300 break; 7301 } 7302 7303 return __create_area_haiku(name, _address, addressSpec, size, lock, 7304 protection); 7305 } 7306 7307 7308 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7309 "BASE"); 7310 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7311 "map_physical_memory@", "BASE"); 7312 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7313 "BASE"); 7314 7315 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7316 "get_memory_map@@", "1_ALPHA3"); 7317 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7318 "map_physical_memory@@", "1_ALPHA3"); 7319 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7320 "1_ALPHA3"); 7321 7322 7323 #else 7324 7325 7326 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7327 "get_memory_map@@", "BASE"); 7328 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7329 "map_physical_memory@@", "BASE"); 7330 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7331 "BASE"); 7332 7333 7334 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7335