1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/BitUtils.h> 51 #include <util/ThreadAutoLock.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_priv.h> 54 #include <vm/VMAddressSpace.h> 55 #include <vm/VMArea.h> 56 #include <vm/VMCache.h> 57 58 #include "VMAddressSpaceLocking.h" 59 #include "VMAnonymousCache.h" 60 #include "VMAnonymousNoSwapCache.h" 61 #include "IORequest.h" 62 63 64 //#define TRACE_VM 65 //#define TRACE_FAULTS 66 #ifdef TRACE_VM 67 # define TRACE(x) dprintf x 68 #else 69 # define TRACE(x) ; 70 #endif 71 #ifdef TRACE_FAULTS 72 # define FTRACE(x) dprintf x 73 #else 74 # define FTRACE(x) ; 75 #endif 76 77 78 namespace { 79 80 class AreaCacheLocking { 81 public: 82 inline bool Lock(VMCache* lockable) 83 { 84 return false; 85 } 86 87 inline void Unlock(VMCache* lockable) 88 { 89 vm_area_put_locked_cache(lockable); 90 } 91 }; 92 93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 94 public: 95 inline AreaCacheLocker(VMCache* cache = NULL) 96 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 97 { 98 } 99 100 inline AreaCacheLocker(VMArea* area) 101 : AutoLocker<VMCache, AreaCacheLocking>() 102 { 103 SetTo(area); 104 } 105 106 inline void SetTo(VMCache* cache, bool alreadyLocked) 107 { 108 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 109 } 110 111 inline void SetTo(VMArea* area) 112 { 113 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 114 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 115 } 116 }; 117 118 119 class VMCacheChainLocker { 120 public: 121 VMCacheChainLocker() 122 : 123 fTopCache(NULL), 124 fBottomCache(NULL) 125 { 126 } 127 128 VMCacheChainLocker(VMCache* topCache) 129 : 130 fTopCache(topCache), 131 fBottomCache(topCache) 132 { 133 } 134 135 ~VMCacheChainLocker() 136 { 137 Unlock(); 138 } 139 140 void SetTo(VMCache* topCache) 141 { 142 fTopCache = topCache; 143 fBottomCache = topCache; 144 145 if (topCache != NULL) 146 topCache->SetUserData(NULL); 147 } 148 149 VMCache* LockSourceCache() 150 { 151 if (fBottomCache == NULL || fBottomCache->source == NULL) 152 return NULL; 153 154 VMCache* previousCache = fBottomCache; 155 156 fBottomCache = fBottomCache->source; 157 fBottomCache->Lock(); 158 fBottomCache->AcquireRefLocked(); 159 fBottomCache->SetUserData(previousCache); 160 161 return fBottomCache; 162 } 163 164 void LockAllSourceCaches() 165 { 166 while (LockSourceCache() != NULL) { 167 } 168 } 169 170 void Unlock(VMCache* exceptCache = NULL) 171 { 172 if (fTopCache == NULL) 173 return; 174 175 // Unlock caches in source -> consumer direction. This is important to 176 // avoid double-locking and a reversal of locking order in case a cache 177 // is eligable for merging. 178 VMCache* cache = fBottomCache; 179 while (cache != NULL) { 180 VMCache* nextCache = (VMCache*)cache->UserData(); 181 if (cache != exceptCache) 182 cache->ReleaseRefAndUnlock(cache != fTopCache); 183 184 if (cache == fTopCache) 185 break; 186 187 cache = nextCache; 188 } 189 190 fTopCache = NULL; 191 fBottomCache = NULL; 192 } 193 194 void UnlockKeepRefs(bool keepTopCacheLocked) 195 { 196 if (fTopCache == NULL) 197 return; 198 199 VMCache* nextCache = fBottomCache; 200 VMCache* cache = NULL; 201 202 while (keepTopCacheLocked 203 ? nextCache != fTopCache : cache != fTopCache) { 204 cache = nextCache; 205 nextCache = (VMCache*)cache->UserData(); 206 cache->Unlock(cache != fTopCache); 207 } 208 } 209 210 void RelockCaches(bool topCacheLocked) 211 { 212 if (fTopCache == NULL) 213 return; 214 215 VMCache* nextCache = fTopCache; 216 VMCache* cache = NULL; 217 if (topCacheLocked) { 218 cache = nextCache; 219 nextCache = cache->source; 220 } 221 222 while (cache != fBottomCache && nextCache != NULL) { 223 VMCache* consumer = cache; 224 cache = nextCache; 225 nextCache = cache->source; 226 cache->Lock(); 227 cache->SetUserData(consumer); 228 } 229 } 230 231 private: 232 VMCache* fTopCache; 233 VMCache* fBottomCache; 234 }; 235 236 } // namespace 237 238 239 // The memory reserve an allocation of the certain priority must not touch. 240 static const size_t kMemoryReserveForPriority[] = { 241 VM_MEMORY_RESERVE_USER, // user 242 VM_MEMORY_RESERVE_SYSTEM, // system 243 0 // VIP 244 }; 245 246 247 ObjectCache* gPageMappingsObjectCache; 248 249 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 250 251 static off_t sAvailableMemory; 252 static off_t sNeededMemory; 253 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 254 static uint32 sPageFaults; 255 256 static VMPhysicalPageMapper* sPhysicalPageMapper; 257 258 #if DEBUG_CACHE_LIST 259 260 struct cache_info { 261 VMCache* cache; 262 addr_t page_count; 263 addr_t committed; 264 }; 265 266 static const int kCacheInfoTableCount = 100 * 1024; 267 static cache_info* sCacheInfoTable; 268 269 #endif // DEBUG_CACHE_LIST 270 271 272 // function declarations 273 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 274 bool addressSpaceCleanup); 275 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 276 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 277 static status_t map_backing_store(VMAddressSpace* addressSpace, 278 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 279 int protection, int protectionMax, int mapping, uint32 flags, 280 const virtual_address_restrictions* addressRestrictions, bool kernel, 281 VMArea** _area, void** _virtualAddress); 282 static void fix_protection(uint32* protection); 283 284 285 // #pragma mark - 286 287 288 #if VM_PAGE_FAULT_TRACING 289 290 namespace VMPageFaultTracing { 291 292 class PageFaultStart : public AbstractTraceEntry { 293 public: 294 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 295 : 296 fAddress(address), 297 fPC(pc), 298 fWrite(write), 299 fUser(user) 300 { 301 Initialized(); 302 } 303 304 virtual void AddDump(TraceOutput& out) 305 { 306 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 307 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 308 } 309 310 private: 311 addr_t fAddress; 312 addr_t fPC; 313 bool fWrite; 314 bool fUser; 315 }; 316 317 318 // page fault errors 319 enum { 320 PAGE_FAULT_ERROR_NO_AREA = 0, 321 PAGE_FAULT_ERROR_KERNEL_ONLY, 322 PAGE_FAULT_ERROR_WRITE_PROTECTED, 323 PAGE_FAULT_ERROR_READ_PROTECTED, 324 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 325 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 326 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 327 }; 328 329 330 class PageFaultError : public AbstractTraceEntry { 331 public: 332 PageFaultError(area_id area, status_t error) 333 : 334 fArea(area), 335 fError(error) 336 { 337 Initialized(); 338 } 339 340 virtual void AddDump(TraceOutput& out) 341 { 342 switch (fError) { 343 case PAGE_FAULT_ERROR_NO_AREA: 344 out.Print("page fault error: no area"); 345 break; 346 case PAGE_FAULT_ERROR_KERNEL_ONLY: 347 out.Print("page fault error: area: %ld, kernel only", fArea); 348 break; 349 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 350 out.Print("page fault error: area: %ld, write protected", 351 fArea); 352 break; 353 case PAGE_FAULT_ERROR_READ_PROTECTED: 354 out.Print("page fault error: area: %ld, read protected", fArea); 355 break; 356 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 357 out.Print("page fault error: area: %ld, execute protected", 358 fArea); 359 break; 360 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 361 out.Print("page fault error: kernel touching bad user memory"); 362 break; 363 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 364 out.Print("page fault error: no address space"); 365 break; 366 default: 367 out.Print("page fault error: area: %ld, error: %s", fArea, 368 strerror(fError)); 369 break; 370 } 371 } 372 373 private: 374 area_id fArea; 375 status_t fError; 376 }; 377 378 379 class PageFaultDone : public AbstractTraceEntry { 380 public: 381 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 382 vm_page* page) 383 : 384 fArea(area), 385 fTopCache(topCache), 386 fCache(cache), 387 fPage(page) 388 { 389 Initialized(); 390 } 391 392 virtual void AddDump(TraceOutput& out) 393 { 394 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 395 "page: %p", fArea, fTopCache, fCache, fPage); 396 } 397 398 private: 399 area_id fArea; 400 VMCache* fTopCache; 401 VMCache* fCache; 402 vm_page* fPage; 403 }; 404 405 } // namespace VMPageFaultTracing 406 407 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 408 #else 409 # define TPF(x) ; 410 #endif // VM_PAGE_FAULT_TRACING 411 412 413 // #pragma mark - 414 415 416 /*! The page's cache must be locked. 417 */ 418 static inline void 419 increment_page_wired_count(vm_page* page) 420 { 421 if (!page->IsMapped()) 422 atomic_add(&gMappedPagesCount, 1); 423 page->IncrementWiredCount(); 424 } 425 426 427 /*! The page's cache must be locked. 428 */ 429 static inline void 430 decrement_page_wired_count(vm_page* page) 431 { 432 page->DecrementWiredCount(); 433 if (!page->IsMapped()) 434 atomic_add(&gMappedPagesCount, -1); 435 } 436 437 438 static inline addr_t 439 virtual_page_address(VMArea* area, vm_page* page) 440 { 441 return area->Base() 442 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 443 } 444 445 446 static inline bool 447 is_page_in_area(VMArea* area, vm_page* page) 448 { 449 off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT); 450 return pageCacheOffsetBytes >= area->cache_offset 451 && pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size(); 452 } 453 454 455 //! You need to have the address space locked when calling this function 456 static VMArea* 457 lookup_area(VMAddressSpace* addressSpace, area_id id) 458 { 459 VMAreas::ReadLock(); 460 461 VMArea* area = VMAreas::LookupLocked(id); 462 if (area != NULL && area->address_space != addressSpace) 463 area = NULL; 464 465 VMAreas::ReadUnlock(); 466 467 return area; 468 } 469 470 471 static inline size_t 472 area_page_protections_size(size_t areaSize) 473 { 474 // In the page protections we store only the three user protections, 475 // so we use 4 bits per page. 476 return (areaSize / B_PAGE_SIZE + 1) / 2; 477 } 478 479 480 static status_t 481 allocate_area_page_protections(VMArea* area) 482 { 483 size_t bytes = area_page_protections_size(area->Size()); 484 area->page_protections = (uint8*)malloc_etc(bytes, 485 area->address_space == VMAddressSpace::Kernel() 486 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 487 if (area->page_protections == NULL) 488 return B_NO_MEMORY; 489 490 // init the page protections for all pages to that of the area 491 uint32 areaProtection = area->protection 492 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 493 memset(area->page_protections, areaProtection | (areaProtection << 4), 494 bytes); 495 return B_OK; 496 } 497 498 499 static inline void 500 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 501 { 502 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 503 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 504 uint8& entry = area->page_protections[pageIndex / 2]; 505 if (pageIndex % 2 == 0) 506 entry = (entry & 0xf0) | protection; 507 else 508 entry = (entry & 0x0f) | (protection << 4); 509 } 510 511 512 static inline uint32 513 get_area_page_protection(VMArea* area, addr_t pageAddress) 514 { 515 if (area->page_protections == NULL) 516 return area->protection; 517 518 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 519 uint32 protection = area->page_protections[pageIndex / 2]; 520 if (pageIndex % 2 == 0) 521 protection &= 0x0f; 522 else 523 protection >>= 4; 524 525 uint32 kernelProtection = 0; 526 if ((protection & B_READ_AREA) != 0) 527 kernelProtection |= B_KERNEL_READ_AREA; 528 if ((protection & B_WRITE_AREA) != 0) 529 kernelProtection |= B_KERNEL_WRITE_AREA; 530 531 // If this is a kernel area we return only the kernel flags. 532 if (area->address_space == VMAddressSpace::Kernel()) 533 return kernelProtection; 534 535 return protection | kernelProtection; 536 } 537 538 539 static inline uint8* 540 realloc_page_protections(uint8* pageProtections, size_t areaSize, 541 uint32 allocationFlags) 542 { 543 size_t bytes = area_page_protections_size(areaSize); 544 return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags); 545 } 546 547 548 /*! The caller must have reserved enough pages the translation map 549 implementation might need to map this page. 550 The page's cache must be locked. 551 */ 552 static status_t 553 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 554 vm_page_reservation* reservation) 555 { 556 VMTranslationMap* map = area->address_space->TranslationMap(); 557 558 bool wasMapped = page->IsMapped(); 559 560 if (area->wiring == B_NO_LOCK) { 561 DEBUG_PAGE_ACCESS_CHECK(page); 562 563 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 564 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 565 gPageMappingsObjectCache, 566 CACHE_DONT_WAIT_FOR_MEMORY 567 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 568 if (mapping == NULL) 569 return B_NO_MEMORY; 570 571 mapping->page = page; 572 mapping->area = area; 573 574 map->Lock(); 575 576 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 577 area->MemoryType(), reservation); 578 579 // insert mapping into lists 580 if (!page->IsMapped()) 581 atomic_add(&gMappedPagesCount, 1); 582 583 page->mappings.Add(mapping); 584 area->mappings.Add(mapping); 585 586 map->Unlock(); 587 } else { 588 DEBUG_PAGE_ACCESS_CHECK(page); 589 590 map->Lock(); 591 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 592 area->MemoryType(), reservation); 593 map->Unlock(); 594 595 increment_page_wired_count(page); 596 } 597 598 if (!wasMapped) { 599 // The page is mapped now, so we must not remain in the cached queue. 600 // It also makes sense to move it from the inactive to the active, since 601 // otherwise the page daemon wouldn't come to keep track of it (in idle 602 // mode) -- if the page isn't touched, it will be deactivated after a 603 // full iteration through the queue at the latest. 604 if (page->State() == PAGE_STATE_CACHED 605 || page->State() == PAGE_STATE_INACTIVE) { 606 vm_page_set_state(page, PAGE_STATE_ACTIVE); 607 } 608 } 609 610 return B_OK; 611 } 612 613 614 /*! If \a preserveModified is \c true, the caller must hold the lock of the 615 page's cache. 616 */ 617 static inline bool 618 unmap_page(VMArea* area, addr_t virtualAddress) 619 { 620 return area->address_space->TranslationMap()->UnmapPage(area, 621 virtualAddress, true); 622 } 623 624 625 /*! If \a preserveModified is \c true, the caller must hold the lock of all 626 mapped pages' caches. 627 */ 628 static inline void 629 unmap_pages(VMArea* area, addr_t base, size_t size) 630 { 631 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 632 } 633 634 635 static inline bool 636 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 637 { 638 if (address < area->Base()) { 639 offset = area->Base() - address; 640 if (offset >= size) 641 return false; 642 643 address = area->Base(); 644 size -= offset; 645 offset = 0; 646 if (size > area->Size()) 647 size = area->Size(); 648 649 return true; 650 } 651 652 offset = address - area->Base(); 653 if (offset >= area->Size()) 654 return false; 655 656 if (size >= area->Size() - offset) 657 size = area->Size() - offset; 658 659 return true; 660 } 661 662 663 /*! Cuts a piece out of an area. If the given cut range covers the complete 664 area, it is deleted. If it covers the beginning or the end, the area is 665 resized accordingly. If the range covers some part in the middle of the 666 area, it is split in two; in this case the second area is returned via 667 \a _secondArea (the variable is left untouched in the other cases). 668 The address space must be write locked. 669 The caller must ensure that no part of the given range is wired. 670 */ 671 static status_t 672 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 673 addr_t size, VMArea** _secondArea, bool kernel) 674 { 675 addr_t offset; 676 if (!intersect_area(area, address, size, offset)) 677 return B_OK; 678 679 // Is the area fully covered? 680 if (address == area->Base() && size == area->Size()) { 681 delete_area(addressSpace, area, false); 682 return B_OK; 683 } 684 685 int priority; 686 uint32 allocationFlags; 687 if (addressSpace == VMAddressSpace::Kernel()) { 688 priority = VM_PRIORITY_SYSTEM; 689 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 690 | HEAP_DONT_LOCK_KERNEL_SPACE; 691 } else { 692 priority = VM_PRIORITY_USER; 693 allocationFlags = 0; 694 } 695 696 VMCache* cache = vm_area_get_locked_cache(area); 697 VMCacheChainLocker cacheChainLocker(cache); 698 cacheChainLocker.LockAllSourceCaches(); 699 700 // If no one else uses the area's cache and it's an anonymous cache, we can 701 // resize or split it, too. 702 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 703 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 704 705 const addr_t oldSize = area->Size(); 706 707 // Cut the end only? 708 if (offset > 0 && size == area->Size() - offset) { 709 status_t error = addressSpace->ShrinkAreaTail(area, offset, 710 allocationFlags); 711 if (error != B_OK) 712 return error; 713 714 if (area->page_protections != NULL) { 715 uint8* newProtections = realloc_page_protections( 716 area->page_protections, area->Size(), allocationFlags); 717 718 if (newProtections == NULL) { 719 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 720 return B_NO_MEMORY; 721 } 722 723 area->page_protections = newProtections; 724 } 725 726 // unmap pages 727 unmap_pages(area, address, size); 728 729 if (onlyCacheUser) { 730 // Since VMCache::Resize() can temporarily drop the lock, we must 731 // unlock all lower caches to prevent locking order inversion. 732 cacheChainLocker.Unlock(cache); 733 cache->Resize(cache->virtual_base + offset, priority); 734 cache->ReleaseRefAndUnlock(); 735 } 736 737 return B_OK; 738 } 739 740 // Cut the beginning only? 741 if (area->Base() == address) { 742 uint8* newProtections = NULL; 743 if (area->page_protections != NULL) { 744 // Allocate all memory before shifting as the shift might lose some 745 // bits. 746 newProtections = realloc_page_protections(NULL, area->Size(), 747 allocationFlags); 748 749 if (newProtections == NULL) 750 return B_NO_MEMORY; 751 } 752 753 // resize the area 754 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 755 allocationFlags); 756 if (error != B_OK) { 757 if (newProtections != NULL) 758 free_etc(newProtections, allocationFlags); 759 return error; 760 } 761 762 if (area->page_protections != NULL) { 763 size_t oldBytes = area_page_protections_size(oldSize); 764 ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE; 765 bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4)); 766 767 size_t bytes = area_page_protections_size(area->Size()); 768 memcpy(newProtections, area->page_protections, bytes); 769 free_etc(area->page_protections, allocationFlags); 770 area->page_protections = newProtections; 771 } 772 773 // unmap pages 774 unmap_pages(area, address, size); 775 776 if (onlyCacheUser) { 777 // Since VMCache::Rebase() can temporarily drop the lock, we must 778 // unlock all lower caches to prevent locking order inversion. 779 cacheChainLocker.Unlock(cache); 780 cache->Rebase(cache->virtual_base + size, priority); 781 cache->ReleaseRefAndUnlock(); 782 } 783 area->cache_offset += size; 784 785 return B_OK; 786 } 787 788 // The tough part -- cut a piece out of the middle of the area. 789 // We do that by shrinking the area to the begin section and creating a 790 // new area for the end section. 791 addr_t firstNewSize = offset; 792 addr_t secondBase = address + size; 793 addr_t secondSize = area->Size() - offset - size; 794 795 // unmap pages 796 unmap_pages(area, address, area->Size() - firstNewSize); 797 798 // resize the area 799 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 800 allocationFlags); 801 if (error != B_OK) 802 return error; 803 804 uint8* areaNewProtections = NULL; 805 uint8* secondAreaNewProtections = NULL; 806 807 // Try to allocate the new memory before making some hard to reverse 808 // changes. 809 if (area->page_protections != NULL) { 810 areaNewProtections = realloc_page_protections(NULL, area->Size(), 811 allocationFlags); 812 secondAreaNewProtections = realloc_page_protections(NULL, secondSize, 813 allocationFlags); 814 815 if (areaNewProtections == NULL || secondAreaNewProtections == NULL) { 816 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 817 free_etc(areaNewProtections, allocationFlags); 818 free_etc(secondAreaNewProtections, allocationFlags); 819 return B_NO_MEMORY; 820 } 821 } 822 823 virtual_address_restrictions addressRestrictions = {}; 824 addressRestrictions.address = (void*)secondBase; 825 addressRestrictions.address_specification = B_EXACT_ADDRESS; 826 VMArea* secondArea; 827 828 if (onlyCacheUser) { 829 // Create a new cache for the second area. 830 VMCache* secondCache; 831 error = VMCacheFactory::CreateAnonymousCache(secondCache, 832 area->protection & B_OVERCOMMITTING_AREA, 0, 0, 833 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 834 if (error != B_OK) { 835 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 836 free_etc(areaNewProtections, allocationFlags); 837 free_etc(secondAreaNewProtections, allocationFlags); 838 return error; 839 } 840 841 secondCache->Lock(); 842 secondCache->temporary = cache->temporary; 843 secondCache->virtual_base = area->cache_offset; 844 secondCache->virtual_end = area->cache_offset + secondSize; 845 846 // Transfer the concerned pages from the first cache. 847 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 848 error = secondCache->Adopt(cache, adoptOffset, secondSize, 849 area->cache_offset); 850 851 if (error == B_OK) { 852 // Since VMCache::Resize() can temporarily drop the lock, we must 853 // unlock all lower caches to prevent locking order inversion. 854 cacheChainLocker.Unlock(cache); 855 cache->Resize(cache->virtual_base + firstNewSize, priority); 856 // Don't unlock the cache yet because we might have to resize it 857 // back. 858 859 // Map the second area. 860 error = map_backing_store(addressSpace, secondCache, 861 area->cache_offset, area->name, secondSize, area->wiring, 862 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 863 &addressRestrictions, kernel, &secondArea, NULL); 864 } 865 866 if (error != B_OK) { 867 // Restore the original cache. 868 cache->Resize(cache->virtual_base + oldSize, priority); 869 870 // Move the pages back. 871 status_t readoptStatus = cache->Adopt(secondCache, 872 area->cache_offset, secondSize, adoptOffset); 873 if (readoptStatus != B_OK) { 874 // Some (swap) pages have not been moved back and will be lost 875 // once the second cache is deleted. 876 panic("failed to restore cache range: %s", 877 strerror(readoptStatus)); 878 879 // TODO: Handle out of memory cases by freeing memory and 880 // retrying. 881 } 882 883 cache->ReleaseRefAndUnlock(); 884 secondCache->ReleaseRefAndUnlock(); 885 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 886 free_etc(areaNewProtections, allocationFlags); 887 free_etc(secondAreaNewProtections, allocationFlags); 888 return error; 889 } 890 891 // Now we can unlock it. 892 cache->ReleaseRefAndUnlock(); 893 secondCache->Unlock(); 894 } else { 895 error = map_backing_store(addressSpace, cache, area->cache_offset 896 + (secondBase - area->Base()), 897 area->name, secondSize, area->wiring, area->protection, 898 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 899 &addressRestrictions, kernel, &secondArea, NULL); 900 if (error != B_OK) { 901 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 902 free_etc(areaNewProtections, allocationFlags); 903 free_etc(secondAreaNewProtections, allocationFlags); 904 return error; 905 } 906 // We need a cache reference for the new area. 907 cache->AcquireRefLocked(); 908 } 909 910 if (area->page_protections != NULL) { 911 // Copy the protection bits of the first area. 912 size_t areaBytes = area_page_protections_size(area->Size()); 913 memcpy(areaNewProtections, area->page_protections, areaBytes); 914 uint8* areaOldProtections = area->page_protections; 915 area->page_protections = areaNewProtections; 916 917 // Shift the protection bits of the second area to the start of 918 // the old array. 919 size_t oldBytes = area_page_protections_size(oldSize); 920 addr_t secondAreaOffset = secondBase - area->Base(); 921 ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE; 922 bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4)); 923 924 // Copy the protection bits of the second area. 925 size_t secondAreaBytes = area_page_protections_size(secondSize); 926 memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes); 927 secondArea->page_protections = secondAreaNewProtections; 928 929 // We don't need this anymore. 930 free_etc(areaOldProtections, allocationFlags); 931 932 // Set the correct page protections for the second area. 933 VMTranslationMap* map = addressSpace->TranslationMap(); 934 map->Lock(); 935 for (VMCachePagesTree::Iterator it 936 = secondArea->cache->pages.GetIterator(); 937 vm_page* page = it.Next();) { 938 if (is_page_in_area(secondArea, page)) { 939 addr_t address = virtual_page_address(secondArea, page); 940 uint32 pageProtection 941 = get_area_page_protection(secondArea, address); 942 map->ProtectPage(secondArea, address, pageProtection); 943 } 944 } 945 map->Unlock(); 946 } 947 948 if (_secondArea != NULL) 949 *_secondArea = secondArea; 950 951 return B_OK; 952 } 953 954 955 /*! Deletes or cuts all areas in the given address range. 956 The address space must be write-locked. 957 The caller must ensure that no part of the given range is wired. 958 */ 959 static status_t 960 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 961 bool kernel) 962 { 963 size = PAGE_ALIGN(size); 964 965 // Check, whether the caller is allowed to modify the concerned areas. 966 if (!kernel) { 967 for (VMAddressSpace::AreaRangeIterator it 968 = addressSpace->GetAreaRangeIterator(address, size); 969 VMArea* area = it.Next();) { 970 971 if ((area->protection & B_KERNEL_AREA) != 0) { 972 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 973 "unmap range of kernel area %" B_PRId32 " (%s)\n", 974 team_get_current_team_id(), area->id, area->name); 975 return B_NOT_ALLOWED; 976 } 977 } 978 } 979 980 for (VMAddressSpace::AreaRangeIterator it 981 = addressSpace->GetAreaRangeIterator(address, size); 982 VMArea* area = it.Next();) { 983 984 status_t error = cut_area(addressSpace, area, address, size, NULL, 985 kernel); 986 if (error != B_OK) 987 return error; 988 // Failing after already messing with areas is ugly, but we 989 // can't do anything about it. 990 } 991 992 return B_OK; 993 } 994 995 996 static status_t 997 discard_area_range(VMArea* area, addr_t address, addr_t size) 998 { 999 addr_t offset; 1000 if (!intersect_area(area, address, size, offset)) 1001 return B_OK; 1002 1003 // If someone else uses the area's cache or it's not an anonymous cache, we 1004 // can't discard. 1005 VMCache* cache = vm_area_get_locked_cache(area); 1006 if (cache->areas != area || area->cache_next != NULL 1007 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 1008 return B_OK; 1009 } 1010 1011 VMCacheChainLocker cacheChainLocker(cache); 1012 cacheChainLocker.LockAllSourceCaches(); 1013 1014 unmap_pages(area, address, size); 1015 1016 // Since VMCache::Discard() can temporarily drop the lock, we must 1017 // unlock all lower caches to prevent locking order inversion. 1018 cacheChainLocker.Unlock(cache); 1019 cache->Discard(cache->virtual_base + offset, size); 1020 cache->ReleaseRefAndUnlock(); 1021 1022 return B_OK; 1023 } 1024 1025 1026 static status_t 1027 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 1028 bool kernel) 1029 { 1030 for (VMAddressSpace::AreaRangeIterator it 1031 = addressSpace->GetAreaRangeIterator(address, size); 1032 VMArea* area = it.Next();) { 1033 status_t error = discard_area_range(area, address, size); 1034 if (error != B_OK) 1035 return error; 1036 } 1037 1038 return B_OK; 1039 } 1040 1041 1042 /*! You need to hold the lock of the cache and the write lock of the address 1043 space when calling this function. 1044 Note, that in case of error your cache will be temporarily unlocked. 1045 If \a addressSpec is \c B_EXACT_ADDRESS and the 1046 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 1047 that no part of the specified address range (base \c *_virtualAddress, size 1048 \a size) is wired. The cache will also be temporarily unlocked. 1049 */ 1050 static status_t 1051 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 1052 const char* areaName, addr_t size, int wiring, int protection, 1053 int protectionMax, int mapping, 1054 uint32 flags, const virtual_address_restrictions* addressRestrictions, 1055 bool kernel, VMArea** _area, void** _virtualAddress) 1056 { 1057 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 1058 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 1059 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 1060 addressSpace, cache, addressRestrictions->address, offset, size, 1061 addressRestrictions->address_specification, wiring, protection, 1062 protectionMax, _area, areaName)); 1063 cache->AssertLocked(); 1064 1065 if (size == 0) { 1066 #if KDEBUG 1067 panic("map_backing_store(): called with size=0 for area '%s'!", 1068 areaName); 1069 #endif 1070 return B_BAD_VALUE; 1071 } 1072 if (offset < 0) 1073 return B_BAD_VALUE; 1074 1075 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 1076 | HEAP_DONT_LOCK_KERNEL_SPACE; 1077 int priority; 1078 if (addressSpace != VMAddressSpace::Kernel()) { 1079 priority = VM_PRIORITY_USER; 1080 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 1081 priority = VM_PRIORITY_VIP; 1082 allocationFlags |= HEAP_PRIORITY_VIP; 1083 } else 1084 priority = VM_PRIORITY_SYSTEM; 1085 1086 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 1087 allocationFlags); 1088 if (mapping != REGION_PRIVATE_MAP) 1089 area->protection_max = protectionMax & B_USER_PROTECTION; 1090 if (area == NULL) 1091 return B_NO_MEMORY; 1092 1093 status_t status; 1094 1095 // if this is a private map, we need to create a new cache 1096 // to handle the private copies of pages as they are written to 1097 VMCache* sourceCache = cache; 1098 if (mapping == REGION_PRIVATE_MAP) { 1099 VMCache* newCache; 1100 1101 // create an anonymous cache 1102 status = VMCacheFactory::CreateAnonymousCache(newCache, 1103 (protection & B_STACK_AREA) != 0 1104 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 1105 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 1106 if (status != B_OK) 1107 goto err1; 1108 1109 newCache->Lock(); 1110 newCache->temporary = 1; 1111 newCache->virtual_base = offset; 1112 newCache->virtual_end = offset + size; 1113 1114 cache->AddConsumer(newCache); 1115 1116 cache = newCache; 1117 } 1118 1119 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 1120 status = cache->SetMinimalCommitment(size, priority); 1121 if (status != B_OK) 1122 goto err2; 1123 } 1124 1125 // check to see if this address space has entered DELETE state 1126 if (addressSpace->IsBeingDeleted()) { 1127 // okay, someone is trying to delete this address space now, so we can't 1128 // insert the area, so back out 1129 status = B_BAD_TEAM_ID; 1130 goto err2; 1131 } 1132 1133 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1134 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1135 // temporarily unlock the current cache since it might be mapped to 1136 // some existing area, and unmap_address_range also needs to lock that 1137 // cache to delete the area. 1138 cache->Unlock(); 1139 status = unmap_address_range(addressSpace, 1140 (addr_t)addressRestrictions->address, size, kernel); 1141 cache->Lock(); 1142 if (status != B_OK) 1143 goto err2; 1144 } 1145 1146 status = addressSpace->InsertArea(area, size, addressRestrictions, 1147 allocationFlags, _virtualAddress); 1148 if (status == B_NO_MEMORY 1149 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1150 // Due to how many locks are held, we cannot wait here for space to be 1151 // freed up, but we can at least notify the low_resource handler. 1152 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1153 } 1154 if (status != B_OK) 1155 goto err2; 1156 1157 // attach the cache to the area 1158 area->cache = cache; 1159 area->cache_offset = offset; 1160 1161 // point the cache back to the area 1162 cache->InsertAreaLocked(area); 1163 if (mapping == REGION_PRIVATE_MAP) 1164 cache->Unlock(); 1165 1166 // insert the area in the global areas map 1167 VMAreas::Insert(area); 1168 1169 // grab a ref to the address space (the area holds this) 1170 addressSpace->Get(); 1171 1172 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1173 // cache, sourceCache, areaName, area); 1174 1175 *_area = area; 1176 return B_OK; 1177 1178 err2: 1179 if (mapping == REGION_PRIVATE_MAP) { 1180 // We created this cache, so we must delete it again. Note, that we 1181 // need to temporarily unlock the source cache or we'll otherwise 1182 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1183 sourceCache->Unlock(); 1184 cache->ReleaseRefAndUnlock(); 1185 sourceCache->Lock(); 1186 } 1187 err1: 1188 addressSpace->DeleteArea(area, allocationFlags); 1189 return status; 1190 } 1191 1192 1193 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1194 locker1, locker2). 1195 */ 1196 template<typename LockerType1, typename LockerType2> 1197 static inline bool 1198 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1199 { 1200 area->cache->AssertLocked(); 1201 1202 VMAreaUnwiredWaiter waiter; 1203 if (!area->AddWaiterIfWired(&waiter)) 1204 return false; 1205 1206 // unlock everything and wait 1207 if (locker1 != NULL) 1208 locker1->Unlock(); 1209 if (locker2 != NULL) 1210 locker2->Unlock(); 1211 1212 waiter.waitEntry.Wait(); 1213 1214 return true; 1215 } 1216 1217 1218 /*! Checks whether the given area has any wired ranges intersecting with the 1219 specified range and waits, if so. 1220 1221 When it has to wait, the function calls \c Unlock() on both \a locker1 1222 and \a locker2, if given. 1223 The area's top cache must be locked and must be unlocked as a side effect 1224 of calling \c Unlock() on either \a locker1 or \a locker2. 1225 1226 If the function does not have to wait it does not modify or unlock any 1227 object. 1228 1229 \param area The area to be checked. 1230 \param base The base address of the range to check. 1231 \param size The size of the address range to check. 1232 \param locker1 An object to be unlocked when before starting to wait (may 1233 be \c NULL). 1234 \param locker2 An object to be unlocked when before starting to wait (may 1235 be \c NULL). 1236 \return \c true, if the function had to wait, \c false otherwise. 1237 */ 1238 template<typename LockerType1, typename LockerType2> 1239 static inline bool 1240 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1241 LockerType1* locker1, LockerType2* locker2) 1242 { 1243 area->cache->AssertLocked(); 1244 1245 VMAreaUnwiredWaiter waiter; 1246 if (!area->AddWaiterIfWired(&waiter, base, size)) 1247 return false; 1248 1249 // unlock everything and wait 1250 if (locker1 != NULL) 1251 locker1->Unlock(); 1252 if (locker2 != NULL) 1253 locker2->Unlock(); 1254 1255 waiter.waitEntry.Wait(); 1256 1257 return true; 1258 } 1259 1260 1261 /*! Checks whether the given address space has any wired ranges intersecting 1262 with the specified range and waits, if so. 1263 1264 Similar to wait_if_area_range_is_wired(), with the following differences: 1265 - All areas intersecting with the range are checked (respectively all until 1266 one is found that contains a wired range intersecting with the given 1267 range). 1268 - The given address space must at least be read-locked and must be unlocked 1269 when \c Unlock() is called on \a locker. 1270 - None of the areas' caches are allowed to be locked. 1271 */ 1272 template<typename LockerType> 1273 static inline bool 1274 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1275 size_t size, LockerType* locker) 1276 { 1277 for (VMAddressSpace::AreaRangeIterator it 1278 = addressSpace->GetAreaRangeIterator(base, size); 1279 VMArea* area = it.Next();) { 1280 1281 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1282 1283 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1284 return true; 1285 } 1286 1287 return false; 1288 } 1289 1290 1291 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1292 It must be called in a situation where the kernel address space may be 1293 locked. 1294 */ 1295 status_t 1296 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1297 { 1298 AddressSpaceReadLocker locker; 1299 VMArea* area; 1300 status_t status = locker.SetFromArea(id, area); 1301 if (status != B_OK) 1302 return status; 1303 1304 if (area->page_protections == NULL) { 1305 status = allocate_area_page_protections(area); 1306 if (status != B_OK) 1307 return status; 1308 } 1309 1310 *cookie = (void*)area; 1311 return B_OK; 1312 } 1313 1314 1315 /*! This is a debug helper function that can only be used with very specific 1316 use cases. 1317 Sets protection for the given address range to the protection specified. 1318 If \a protection is 0 then the involved pages will be marked non-present 1319 in the translation map to cause a fault on access. The pages aren't 1320 actually unmapped however so that they can be marked present again with 1321 additional calls to this function. For this to work the area must be 1322 fully locked in memory so that the pages aren't otherwise touched. 1323 This function does not lock the kernel address space and needs to be 1324 supplied with a \a cookie retrieved from a successful call to 1325 vm_prepare_kernel_area_debug_protection(). 1326 */ 1327 status_t 1328 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1329 uint32 protection) 1330 { 1331 // check address range 1332 addr_t address = (addr_t)_address; 1333 size = PAGE_ALIGN(size); 1334 1335 if ((address % B_PAGE_SIZE) != 0 1336 || (addr_t)address + size < (addr_t)address 1337 || !IS_KERNEL_ADDRESS(address) 1338 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1339 return B_BAD_VALUE; 1340 } 1341 1342 // Translate the kernel protection to user protection as we only store that. 1343 if ((protection & B_KERNEL_READ_AREA) != 0) 1344 protection |= B_READ_AREA; 1345 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1346 protection |= B_WRITE_AREA; 1347 1348 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1349 VMTranslationMap* map = addressSpace->TranslationMap(); 1350 VMArea* area = (VMArea*)cookie; 1351 1352 addr_t offset = address - area->Base(); 1353 if (area->Size() - offset < size) { 1354 panic("protect range not fully within supplied area"); 1355 return B_BAD_VALUE; 1356 } 1357 1358 if (area->page_protections == NULL) { 1359 panic("area has no page protections"); 1360 return B_BAD_VALUE; 1361 } 1362 1363 // Invalidate the mapping entries so any access to them will fault or 1364 // restore the mapping entries unchanged so that lookup will success again. 1365 map->Lock(); 1366 map->DebugMarkRangePresent(address, address + size, protection != 0); 1367 map->Unlock(); 1368 1369 // And set the proper page protections so that the fault case will actually 1370 // fail and not simply try to map a new page. 1371 for (addr_t pageAddress = address; pageAddress < address + size; 1372 pageAddress += B_PAGE_SIZE) { 1373 set_area_page_protection(area, pageAddress, protection); 1374 } 1375 1376 return B_OK; 1377 } 1378 1379 1380 status_t 1381 vm_block_address_range(const char* name, void* address, addr_t size) 1382 { 1383 if (!arch_vm_supports_protection(0)) 1384 return B_NOT_SUPPORTED; 1385 1386 AddressSpaceWriteLocker locker; 1387 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1388 if (status != B_OK) 1389 return status; 1390 1391 VMAddressSpace* addressSpace = locker.AddressSpace(); 1392 1393 // create an anonymous cache 1394 VMCache* cache; 1395 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1396 VM_PRIORITY_SYSTEM); 1397 if (status != B_OK) 1398 return status; 1399 1400 cache->temporary = 1; 1401 cache->virtual_end = size; 1402 cache->Lock(); 1403 1404 VMArea* area; 1405 virtual_address_restrictions addressRestrictions = {}; 1406 addressRestrictions.address = address; 1407 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1408 status = map_backing_store(addressSpace, cache, 0, name, size, 1409 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1410 true, &area, NULL); 1411 if (status != B_OK) { 1412 cache->ReleaseRefAndUnlock(); 1413 return status; 1414 } 1415 1416 cache->Unlock(); 1417 area->cache_type = CACHE_TYPE_RAM; 1418 return area->id; 1419 } 1420 1421 1422 status_t 1423 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1424 { 1425 AddressSpaceWriteLocker locker(team); 1426 if (!locker.IsLocked()) 1427 return B_BAD_TEAM_ID; 1428 1429 VMAddressSpace* addressSpace = locker.AddressSpace(); 1430 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1431 addressSpace == VMAddressSpace::Kernel() 1432 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1433 } 1434 1435 1436 status_t 1437 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1438 addr_t size, uint32 flags) 1439 { 1440 if (size == 0) 1441 return B_BAD_VALUE; 1442 1443 AddressSpaceWriteLocker locker(team); 1444 if (!locker.IsLocked()) 1445 return B_BAD_TEAM_ID; 1446 1447 virtual_address_restrictions addressRestrictions = {}; 1448 addressRestrictions.address = *_address; 1449 addressRestrictions.address_specification = addressSpec; 1450 VMAddressSpace* addressSpace = locker.AddressSpace(); 1451 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1452 addressSpace == VMAddressSpace::Kernel() 1453 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1454 _address); 1455 } 1456 1457 1458 area_id 1459 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1460 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1461 const virtual_address_restrictions* virtualAddressRestrictions, 1462 const physical_address_restrictions* physicalAddressRestrictions, 1463 bool kernel, void** _address) 1464 { 1465 VMArea* area; 1466 VMCache* cache; 1467 vm_page* page = NULL; 1468 bool isStack = (protection & B_STACK_AREA) != 0; 1469 page_num_t guardPages; 1470 bool canOvercommit = false; 1471 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1472 ? VM_PAGE_ALLOC_CLEAR : 0; 1473 1474 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1475 team, name, size)); 1476 1477 size = PAGE_ALIGN(size); 1478 guardSize = PAGE_ALIGN(guardSize); 1479 guardPages = guardSize / B_PAGE_SIZE; 1480 1481 if (size == 0 || size < guardSize) 1482 return B_BAD_VALUE; 1483 if (!arch_vm_supports_protection(protection)) 1484 return B_NOT_SUPPORTED; 1485 1486 if (team == B_CURRENT_TEAM) 1487 team = VMAddressSpace::CurrentID(); 1488 if (team < 0) 1489 return B_BAD_TEAM_ID; 1490 1491 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1492 canOvercommit = true; 1493 1494 #ifdef DEBUG_KERNEL_STACKS 1495 if ((protection & B_KERNEL_STACK_AREA) != 0) 1496 isStack = true; 1497 #endif 1498 1499 // check parameters 1500 switch (virtualAddressRestrictions->address_specification) { 1501 case B_ANY_ADDRESS: 1502 case B_EXACT_ADDRESS: 1503 case B_BASE_ADDRESS: 1504 case B_ANY_KERNEL_ADDRESS: 1505 case B_ANY_KERNEL_BLOCK_ADDRESS: 1506 case B_RANDOMIZED_ANY_ADDRESS: 1507 case B_RANDOMIZED_BASE_ADDRESS: 1508 break; 1509 1510 default: 1511 return B_BAD_VALUE; 1512 } 1513 1514 // If low or high physical address restrictions are given, we force 1515 // B_CONTIGUOUS wiring, since only then we'll use 1516 // vm_page_allocate_page_run() which deals with those restrictions. 1517 if (physicalAddressRestrictions->low_address != 0 1518 || physicalAddressRestrictions->high_address != 0) { 1519 wiring = B_CONTIGUOUS; 1520 } 1521 1522 physical_address_restrictions stackPhysicalRestrictions; 1523 bool doReserveMemory = false; 1524 switch (wiring) { 1525 case B_NO_LOCK: 1526 break; 1527 case B_FULL_LOCK: 1528 case B_LAZY_LOCK: 1529 case B_CONTIGUOUS: 1530 doReserveMemory = true; 1531 break; 1532 case B_ALREADY_WIRED: 1533 break; 1534 case B_LOMEM: 1535 stackPhysicalRestrictions = *physicalAddressRestrictions; 1536 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1537 physicalAddressRestrictions = &stackPhysicalRestrictions; 1538 wiring = B_CONTIGUOUS; 1539 doReserveMemory = true; 1540 break; 1541 case B_32_BIT_FULL_LOCK: 1542 if (B_HAIKU_PHYSICAL_BITS <= 32 1543 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1544 wiring = B_FULL_LOCK; 1545 doReserveMemory = true; 1546 break; 1547 } 1548 // TODO: We don't really support this mode efficiently. Just fall 1549 // through for now ... 1550 case B_32_BIT_CONTIGUOUS: 1551 #if B_HAIKU_PHYSICAL_BITS > 32 1552 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1553 stackPhysicalRestrictions = *physicalAddressRestrictions; 1554 stackPhysicalRestrictions.high_address 1555 = (phys_addr_t)1 << 32; 1556 physicalAddressRestrictions = &stackPhysicalRestrictions; 1557 } 1558 #endif 1559 wiring = B_CONTIGUOUS; 1560 doReserveMemory = true; 1561 break; 1562 default: 1563 return B_BAD_VALUE; 1564 } 1565 1566 // Optimization: For a single-page contiguous allocation without low/high 1567 // memory restriction B_FULL_LOCK wiring suffices. 1568 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1569 && physicalAddressRestrictions->low_address == 0 1570 && physicalAddressRestrictions->high_address == 0) { 1571 wiring = B_FULL_LOCK; 1572 } 1573 1574 // For full lock or contiguous areas we're also going to map the pages and 1575 // thus need to reserve pages for the mapping backend upfront. 1576 addr_t reservedMapPages = 0; 1577 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1578 AddressSpaceWriteLocker locker; 1579 status_t status = locker.SetTo(team); 1580 if (status != B_OK) 1581 return status; 1582 1583 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1584 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1585 } 1586 1587 int priority; 1588 if (team != VMAddressSpace::KernelID()) 1589 priority = VM_PRIORITY_USER; 1590 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1591 priority = VM_PRIORITY_VIP; 1592 else 1593 priority = VM_PRIORITY_SYSTEM; 1594 1595 // Reserve memory before acquiring the address space lock. This reduces the 1596 // chances of failure, since while holding the write lock to the address 1597 // space (if it is the kernel address space that is), the low memory handler 1598 // won't be able to free anything for us. 1599 addr_t reservedMemory = 0; 1600 if (doReserveMemory) { 1601 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1602 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1603 return B_NO_MEMORY; 1604 reservedMemory = size; 1605 // TODO: We don't reserve the memory for the pages for the page 1606 // directories/tables. We actually need to do since we currently don't 1607 // reclaim them (and probably can't reclaim all of them anyway). Thus 1608 // there are actually less physical pages than there should be, which 1609 // can get the VM into trouble in low memory situations. 1610 } 1611 1612 AddressSpaceWriteLocker locker; 1613 VMAddressSpace* addressSpace; 1614 status_t status; 1615 1616 // For full lock areas reserve the pages before locking the address 1617 // space. E.g. block caches can't release their memory while we hold the 1618 // address space lock. 1619 page_num_t reservedPages = reservedMapPages; 1620 if (wiring == B_FULL_LOCK) 1621 reservedPages += size / B_PAGE_SIZE; 1622 1623 vm_page_reservation reservation; 1624 if (reservedPages > 0) { 1625 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1626 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1627 priority)) { 1628 reservedPages = 0; 1629 status = B_WOULD_BLOCK; 1630 goto err0; 1631 } 1632 } else 1633 vm_page_reserve_pages(&reservation, reservedPages, priority); 1634 } 1635 1636 if (wiring == B_CONTIGUOUS) { 1637 // we try to allocate the page run here upfront as this may easily 1638 // fail for obvious reasons 1639 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1640 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1641 if (page == NULL) { 1642 status = B_NO_MEMORY; 1643 goto err0; 1644 } 1645 } 1646 1647 // Lock the address space and, if B_EXACT_ADDRESS and 1648 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1649 // is not wired. 1650 do { 1651 status = locker.SetTo(team); 1652 if (status != B_OK) 1653 goto err1; 1654 1655 addressSpace = locker.AddressSpace(); 1656 } while (virtualAddressRestrictions->address_specification 1657 == B_EXACT_ADDRESS 1658 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1659 && wait_if_address_range_is_wired(addressSpace, 1660 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1661 1662 // create an anonymous cache 1663 // if it's a stack, make sure that two pages are available at least 1664 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1665 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1666 wiring == B_NO_LOCK, priority); 1667 if (status != B_OK) 1668 goto err1; 1669 1670 cache->temporary = 1; 1671 cache->virtual_end = size; 1672 cache->committed_size = reservedMemory; 1673 // TODO: This should be done via a method. 1674 reservedMemory = 0; 1675 1676 cache->Lock(); 1677 1678 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1679 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1680 virtualAddressRestrictions, kernel, &area, _address); 1681 1682 if (status != B_OK) { 1683 cache->ReleaseRefAndUnlock(); 1684 goto err1; 1685 } 1686 1687 locker.DegradeToReadLock(); 1688 1689 switch (wiring) { 1690 case B_NO_LOCK: 1691 case B_LAZY_LOCK: 1692 // do nothing - the pages are mapped in as needed 1693 break; 1694 1695 case B_FULL_LOCK: 1696 { 1697 // Allocate and map all pages for this area 1698 1699 off_t offset = 0; 1700 for (addr_t address = area->Base(); 1701 address < area->Base() + (area->Size() - 1); 1702 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1703 #ifdef DEBUG_KERNEL_STACKS 1704 # ifdef STACK_GROWS_DOWNWARDS 1705 if (isStack && address < area->Base() 1706 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1707 # else 1708 if (isStack && address >= area->Base() + area->Size() 1709 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1710 # endif 1711 continue; 1712 #endif 1713 vm_page* page = vm_page_allocate_page(&reservation, 1714 PAGE_STATE_WIRED | pageAllocFlags); 1715 cache->InsertPage(page, offset); 1716 map_page(area, page, address, protection, &reservation); 1717 1718 DEBUG_PAGE_ACCESS_END(page); 1719 } 1720 1721 break; 1722 } 1723 1724 case B_ALREADY_WIRED: 1725 { 1726 // The pages should already be mapped. This is only really useful 1727 // during boot time. Find the appropriate vm_page objects and stick 1728 // them in the cache object. 1729 VMTranslationMap* map = addressSpace->TranslationMap(); 1730 off_t offset = 0; 1731 1732 if (!gKernelStartup) 1733 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1734 1735 map->Lock(); 1736 1737 for (addr_t virtualAddress = area->Base(); 1738 virtualAddress < area->Base() + (area->Size() - 1); 1739 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1740 phys_addr_t physicalAddress; 1741 uint32 flags; 1742 status = map->Query(virtualAddress, &physicalAddress, &flags); 1743 if (status < B_OK) { 1744 panic("looking up mapping failed for va 0x%lx\n", 1745 virtualAddress); 1746 } 1747 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1748 if (page == NULL) { 1749 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1750 "\n", physicalAddress); 1751 } 1752 1753 DEBUG_PAGE_ACCESS_START(page); 1754 1755 cache->InsertPage(page, offset); 1756 increment_page_wired_count(page); 1757 vm_page_set_state(page, PAGE_STATE_WIRED); 1758 page->busy = false; 1759 1760 DEBUG_PAGE_ACCESS_END(page); 1761 } 1762 1763 map->Unlock(); 1764 break; 1765 } 1766 1767 case B_CONTIGUOUS: 1768 { 1769 // We have already allocated our continuous pages run, so we can now 1770 // just map them in the address space 1771 VMTranslationMap* map = addressSpace->TranslationMap(); 1772 phys_addr_t physicalAddress 1773 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1774 addr_t virtualAddress = area->Base(); 1775 off_t offset = 0; 1776 1777 map->Lock(); 1778 1779 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1780 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1781 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1782 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1783 if (page == NULL) 1784 panic("couldn't lookup physical page just allocated\n"); 1785 1786 status = map->Map(virtualAddress, physicalAddress, protection, 1787 area->MemoryType(), &reservation); 1788 if (status < B_OK) 1789 panic("couldn't map physical page in page run\n"); 1790 1791 cache->InsertPage(page, offset); 1792 increment_page_wired_count(page); 1793 1794 DEBUG_PAGE_ACCESS_END(page); 1795 } 1796 1797 map->Unlock(); 1798 break; 1799 } 1800 1801 default: 1802 break; 1803 } 1804 1805 cache->Unlock(); 1806 1807 if (reservedPages > 0) 1808 vm_page_unreserve_pages(&reservation); 1809 1810 TRACE(("vm_create_anonymous_area: done\n")); 1811 1812 area->cache_type = CACHE_TYPE_RAM; 1813 return area->id; 1814 1815 err1: 1816 if (wiring == B_CONTIGUOUS) { 1817 // we had reserved the area space upfront... 1818 phys_addr_t pageNumber = page->physical_page_number; 1819 int32 i; 1820 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1821 page = vm_lookup_page(pageNumber); 1822 if (page == NULL) 1823 panic("couldn't lookup physical page just allocated\n"); 1824 1825 vm_page_set_state(page, PAGE_STATE_FREE); 1826 } 1827 } 1828 1829 err0: 1830 if (reservedPages > 0) 1831 vm_page_unreserve_pages(&reservation); 1832 if (reservedMemory > 0) 1833 vm_unreserve_memory(reservedMemory); 1834 1835 return status; 1836 } 1837 1838 1839 area_id 1840 vm_map_physical_memory(team_id team, const char* name, void** _address, 1841 uint32 addressSpec, addr_t size, uint32 protection, 1842 phys_addr_t physicalAddress, bool alreadyWired) 1843 { 1844 VMArea* area; 1845 VMCache* cache; 1846 addr_t mapOffset; 1847 1848 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1849 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1850 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1851 addressSpec, size, protection, physicalAddress)); 1852 1853 if (!arch_vm_supports_protection(protection)) 1854 return B_NOT_SUPPORTED; 1855 1856 AddressSpaceWriteLocker locker(team); 1857 if (!locker.IsLocked()) 1858 return B_BAD_TEAM_ID; 1859 1860 // if the physical address is somewhat inside a page, 1861 // move the actual area down to align on a page boundary 1862 mapOffset = physicalAddress % B_PAGE_SIZE; 1863 size += mapOffset; 1864 physicalAddress -= mapOffset; 1865 1866 size = PAGE_ALIGN(size); 1867 1868 // create a device cache 1869 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1870 if (status != B_OK) 1871 return status; 1872 1873 cache->virtual_end = size; 1874 1875 cache->Lock(); 1876 1877 virtual_address_restrictions addressRestrictions = {}; 1878 addressRestrictions.address = *_address; 1879 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1880 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1881 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1882 true, &area, _address); 1883 1884 if (status < B_OK) 1885 cache->ReleaseRefLocked(); 1886 1887 cache->Unlock(); 1888 1889 if (status == B_OK) { 1890 // set requested memory type -- use uncached, if not given 1891 uint32 memoryType = addressSpec & B_MTR_MASK; 1892 if (memoryType == 0) 1893 memoryType = B_MTR_UC; 1894 1895 area->SetMemoryType(memoryType); 1896 1897 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1898 if (status != B_OK) 1899 delete_area(locker.AddressSpace(), area, false); 1900 } 1901 1902 if (status != B_OK) 1903 return status; 1904 1905 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1906 1907 if (alreadyWired) { 1908 // The area is already mapped, but possibly not with the right 1909 // memory type. 1910 map->Lock(); 1911 map->ProtectArea(area, area->protection); 1912 map->Unlock(); 1913 } else { 1914 // Map the area completely. 1915 1916 // reserve pages needed for the mapping 1917 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1918 area->Base() + (size - 1)); 1919 vm_page_reservation reservation; 1920 vm_page_reserve_pages(&reservation, reservePages, 1921 team == VMAddressSpace::KernelID() 1922 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1923 1924 map->Lock(); 1925 1926 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1927 map->Map(area->Base() + offset, physicalAddress + offset, 1928 protection, area->MemoryType(), &reservation); 1929 } 1930 1931 map->Unlock(); 1932 1933 vm_page_unreserve_pages(&reservation); 1934 } 1935 1936 // modify the pointer returned to be offset back into the new area 1937 // the same way the physical address in was offset 1938 *_address = (void*)((addr_t)*_address + mapOffset); 1939 1940 area->cache_type = CACHE_TYPE_DEVICE; 1941 return area->id; 1942 } 1943 1944 1945 /*! Don't use! 1946 TODO: This function was introduced to map physical page vecs to 1947 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1948 use a device cache and does not track vm_page::wired_count! 1949 */ 1950 area_id 1951 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1952 uint32 addressSpec, addr_t* _size, uint32 protection, 1953 struct generic_io_vec* vecs, uint32 vecCount) 1954 { 1955 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1956 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1957 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1958 addressSpec, _size, protection, vecs, vecCount)); 1959 1960 if (!arch_vm_supports_protection(protection) 1961 || (addressSpec & B_MTR_MASK) != 0) { 1962 return B_NOT_SUPPORTED; 1963 } 1964 1965 AddressSpaceWriteLocker locker(team); 1966 if (!locker.IsLocked()) 1967 return B_BAD_TEAM_ID; 1968 1969 if (vecCount == 0) 1970 return B_BAD_VALUE; 1971 1972 addr_t size = 0; 1973 for (uint32 i = 0; i < vecCount; i++) { 1974 if (vecs[i].base % B_PAGE_SIZE != 0 1975 || vecs[i].length % B_PAGE_SIZE != 0) { 1976 return B_BAD_VALUE; 1977 } 1978 1979 size += vecs[i].length; 1980 } 1981 1982 // create a device cache 1983 VMCache* cache; 1984 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1985 if (result != B_OK) 1986 return result; 1987 1988 cache->virtual_end = size; 1989 1990 cache->Lock(); 1991 1992 VMArea* area; 1993 virtual_address_restrictions addressRestrictions = {}; 1994 addressRestrictions.address = *_address; 1995 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1996 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1997 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1998 &addressRestrictions, true, &area, _address); 1999 2000 if (result != B_OK) 2001 cache->ReleaseRefLocked(); 2002 2003 cache->Unlock(); 2004 2005 if (result != B_OK) 2006 return result; 2007 2008 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2009 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 2010 area->Base() + (size - 1)); 2011 2012 vm_page_reservation reservation; 2013 vm_page_reserve_pages(&reservation, reservePages, 2014 team == VMAddressSpace::KernelID() 2015 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2016 map->Lock(); 2017 2018 uint32 vecIndex = 0; 2019 size_t vecOffset = 0; 2020 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2021 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 2022 vecOffset = 0; 2023 vecIndex++; 2024 } 2025 2026 if (vecIndex >= vecCount) 2027 break; 2028 2029 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 2030 protection, area->MemoryType(), &reservation); 2031 2032 vecOffset += B_PAGE_SIZE; 2033 } 2034 2035 map->Unlock(); 2036 vm_page_unreserve_pages(&reservation); 2037 2038 if (_size != NULL) 2039 *_size = size; 2040 2041 area->cache_type = CACHE_TYPE_DEVICE; 2042 return area->id; 2043 } 2044 2045 2046 area_id 2047 vm_create_null_area(team_id team, const char* name, void** address, 2048 uint32 addressSpec, addr_t size, uint32 flags) 2049 { 2050 size = PAGE_ALIGN(size); 2051 2052 // Lock the address space and, if B_EXACT_ADDRESS and 2053 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 2054 // is not wired. 2055 AddressSpaceWriteLocker locker; 2056 do { 2057 if (locker.SetTo(team) != B_OK) 2058 return B_BAD_TEAM_ID; 2059 } while (addressSpec == B_EXACT_ADDRESS 2060 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 2061 && wait_if_address_range_is_wired(locker.AddressSpace(), 2062 (addr_t)*address, size, &locker)); 2063 2064 // create a null cache 2065 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 2066 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 2067 VMCache* cache; 2068 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 2069 if (status != B_OK) 2070 return status; 2071 2072 cache->temporary = 1; 2073 cache->virtual_end = size; 2074 2075 cache->Lock(); 2076 2077 VMArea* area; 2078 virtual_address_restrictions addressRestrictions = {}; 2079 addressRestrictions.address = *address; 2080 addressRestrictions.address_specification = addressSpec; 2081 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 2082 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 2083 REGION_NO_PRIVATE_MAP, flags, 2084 &addressRestrictions, true, &area, address); 2085 2086 if (status < B_OK) { 2087 cache->ReleaseRefAndUnlock(); 2088 return status; 2089 } 2090 2091 cache->Unlock(); 2092 2093 area->cache_type = CACHE_TYPE_NULL; 2094 return area->id; 2095 } 2096 2097 2098 /*! Creates the vnode cache for the specified \a vnode. 2099 The vnode has to be marked busy when calling this function. 2100 */ 2101 status_t 2102 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2103 { 2104 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2105 } 2106 2107 2108 /*! \a cache must be locked. The area's address space must be read-locked. 2109 */ 2110 static void 2111 pre_map_area_pages(VMArea* area, VMCache* cache, 2112 vm_page_reservation* reservation) 2113 { 2114 addr_t baseAddress = area->Base(); 2115 addr_t cacheOffset = area->cache_offset; 2116 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2117 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 2118 2119 for (VMCachePagesTree::Iterator it 2120 = cache->pages.GetIterator(firstPage, true, true); 2121 vm_page* page = it.Next();) { 2122 if (page->cache_offset >= endPage) 2123 break; 2124 2125 // skip busy and inactive pages 2126 if (page->busy || page->usage_count == 0) 2127 continue; 2128 2129 DEBUG_PAGE_ACCESS_START(page); 2130 map_page(area, page, 2131 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2132 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2133 DEBUG_PAGE_ACCESS_END(page); 2134 } 2135 } 2136 2137 2138 /*! Will map the file specified by \a fd to an area in memory. 2139 The file will be mirrored beginning at the specified \a offset. The 2140 \a offset and \a size arguments have to be page aligned. 2141 */ 2142 static area_id 2143 _vm_map_file(team_id team, const char* name, void** _address, 2144 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2145 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2146 { 2147 // TODO: for binary files, we want to make sure that they get the 2148 // copy of a file at a given time, ie. later changes should not 2149 // make it into the mapped copy -- this will need quite some changes 2150 // to be done in a nice way 2151 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2152 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2153 2154 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2155 size = PAGE_ALIGN(size); 2156 2157 if (mapping == REGION_NO_PRIVATE_MAP) 2158 protection |= B_SHARED_AREA; 2159 if (addressSpec != B_EXACT_ADDRESS) 2160 unmapAddressRange = false; 2161 2162 uint32 mappingFlags = 0; 2163 if (unmapAddressRange) 2164 mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE; 2165 2166 if (fd < 0) { 2167 virtual_address_restrictions virtualRestrictions = {}; 2168 virtualRestrictions.address = *_address; 2169 virtualRestrictions.address_specification = addressSpec; 2170 physical_address_restrictions physicalRestrictions = {}; 2171 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2172 mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2173 _address); 2174 } 2175 2176 // get the open flags of the FD 2177 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2178 if (descriptor == NULL) 2179 return EBADF; 2180 int32 openMode = descriptor->open_mode; 2181 put_fd(descriptor); 2182 2183 // The FD must open for reading at any rate. For shared mapping with write 2184 // access, additionally the FD must be open for writing. 2185 if ((openMode & O_ACCMODE) == O_WRONLY 2186 || (mapping == REGION_NO_PRIVATE_MAP 2187 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2188 && (openMode & O_ACCMODE) == O_RDONLY)) { 2189 return EACCES; 2190 } 2191 2192 uint32 protectionMax = 0; 2193 if (mapping == REGION_NO_PRIVATE_MAP) { 2194 if ((openMode & O_ACCMODE) == O_RDWR) 2195 protectionMax = protection | B_USER_PROTECTION; 2196 else 2197 protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA); 2198 } else if (mapping == REGION_PRIVATE_MAP) { 2199 // For privately mapped read-only regions, skip committing memory. 2200 // (If protections are changed later on, memory will be committed then.) 2201 if ((protection & B_WRITE_AREA) == 0) 2202 mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY; 2203 } 2204 2205 // get the vnode for the object, this also grabs a ref to it 2206 struct vnode* vnode = NULL; 2207 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2208 if (status < B_OK) 2209 return status; 2210 VnodePutter vnodePutter(vnode); 2211 2212 // If we're going to pre-map pages, we need to reserve the pages needed by 2213 // the mapping backend upfront. 2214 page_num_t reservedPreMapPages = 0; 2215 vm_page_reservation reservation; 2216 if ((protection & B_READ_AREA) != 0) { 2217 AddressSpaceWriteLocker locker; 2218 status = locker.SetTo(team); 2219 if (status != B_OK) 2220 return status; 2221 2222 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2223 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2224 2225 locker.Unlock(); 2226 2227 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2228 team == VMAddressSpace::KernelID() 2229 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2230 } 2231 2232 struct PageUnreserver { 2233 PageUnreserver(vm_page_reservation* reservation) 2234 : 2235 fReservation(reservation) 2236 { 2237 } 2238 2239 ~PageUnreserver() 2240 { 2241 if (fReservation != NULL) 2242 vm_page_unreserve_pages(fReservation); 2243 } 2244 2245 vm_page_reservation* fReservation; 2246 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2247 2248 // Lock the address space and, if the specified address range shall be 2249 // unmapped, ensure it is not wired. 2250 AddressSpaceWriteLocker locker; 2251 do { 2252 if (locker.SetTo(team) != B_OK) 2253 return B_BAD_TEAM_ID; 2254 } while (unmapAddressRange 2255 && wait_if_address_range_is_wired(locker.AddressSpace(), 2256 (addr_t)*_address, size, &locker)); 2257 2258 // TODO: this only works for file systems that use the file cache 2259 VMCache* cache; 2260 status = vfs_get_vnode_cache(vnode, &cache, false); 2261 if (status < B_OK) 2262 return status; 2263 2264 cache->Lock(); 2265 2266 VMArea* area; 2267 virtual_address_restrictions addressRestrictions = {}; 2268 addressRestrictions.address = *_address; 2269 addressRestrictions.address_specification = addressSpec; 2270 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2271 0, protection, protectionMax, mapping, mappingFlags, 2272 &addressRestrictions, kernel, &area, _address); 2273 2274 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2275 // map_backing_store() cannot know we no longer need the ref 2276 cache->ReleaseRefLocked(); 2277 } 2278 2279 if (status == B_OK && (protection & B_READ_AREA) != 0) 2280 pre_map_area_pages(area, cache, &reservation); 2281 2282 cache->Unlock(); 2283 2284 if (status == B_OK) { 2285 // TODO: this probably deserves a smarter solution, ie. don't always 2286 // prefetch stuff, and also, probably don't trigger it at this place. 2287 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2288 // prefetches at max 10 MB starting from "offset" 2289 } 2290 2291 if (status != B_OK) 2292 return status; 2293 2294 area->cache_type = CACHE_TYPE_VNODE; 2295 return area->id; 2296 } 2297 2298 2299 area_id 2300 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2301 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2302 int fd, off_t offset) 2303 { 2304 if (!arch_vm_supports_protection(protection)) 2305 return B_NOT_SUPPORTED; 2306 2307 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2308 mapping, unmapAddressRange, fd, offset, true); 2309 } 2310 2311 2312 VMCache* 2313 vm_area_get_locked_cache(VMArea* area) 2314 { 2315 rw_lock_read_lock(&sAreaCacheLock); 2316 2317 while (true) { 2318 VMCache* cache = area->cache; 2319 2320 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2321 // cache has been deleted 2322 rw_lock_read_lock(&sAreaCacheLock); 2323 continue; 2324 } 2325 2326 rw_lock_read_lock(&sAreaCacheLock); 2327 2328 if (cache == area->cache) { 2329 cache->AcquireRefLocked(); 2330 rw_lock_read_unlock(&sAreaCacheLock); 2331 return cache; 2332 } 2333 2334 // the cache changed in the meantime 2335 cache->Unlock(); 2336 } 2337 } 2338 2339 2340 void 2341 vm_area_put_locked_cache(VMCache* cache) 2342 { 2343 cache->ReleaseRefAndUnlock(); 2344 } 2345 2346 2347 area_id 2348 vm_clone_area(team_id team, const char* name, void** address, 2349 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2350 bool kernel) 2351 { 2352 VMArea* newArea = NULL; 2353 VMArea* sourceArea; 2354 2355 // Check whether the source area exists and is cloneable. If so, mark it 2356 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2357 { 2358 AddressSpaceWriteLocker locker; 2359 status_t status = locker.SetFromArea(sourceID, sourceArea); 2360 if (status != B_OK) 2361 return status; 2362 2363 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2364 return B_NOT_ALLOWED; 2365 2366 sourceArea->protection |= B_SHARED_AREA; 2367 protection |= B_SHARED_AREA; 2368 } 2369 2370 // Now lock both address spaces and actually do the cloning. 2371 2372 MultiAddressSpaceLocker locker; 2373 VMAddressSpace* sourceAddressSpace; 2374 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2375 if (status != B_OK) 2376 return status; 2377 2378 VMAddressSpace* targetAddressSpace; 2379 status = locker.AddTeam(team, true, &targetAddressSpace); 2380 if (status != B_OK) 2381 return status; 2382 2383 status = locker.Lock(); 2384 if (status != B_OK) 2385 return status; 2386 2387 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2388 if (sourceArea == NULL) 2389 return B_BAD_VALUE; 2390 2391 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2392 return B_NOT_ALLOWED; 2393 2394 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2395 2396 if (!kernel && sourceAddressSpace != targetAddressSpace 2397 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2398 #if KDEBUG 2399 Team* team = thread_get_current_thread()->team; 2400 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2401 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2402 #endif 2403 status = B_NOT_ALLOWED; 2404 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2405 status = B_NOT_ALLOWED; 2406 } else { 2407 virtual_address_restrictions addressRestrictions = {}; 2408 addressRestrictions.address = *address; 2409 addressRestrictions.address_specification = addressSpec; 2410 status = map_backing_store(targetAddressSpace, cache, 2411 sourceArea->cache_offset, name, sourceArea->Size(), 2412 sourceArea->wiring, protection, sourceArea->protection_max, 2413 mapping, 0, &addressRestrictions, 2414 kernel, &newArea, address); 2415 } 2416 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2417 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2418 // to create a new cache, and has therefore already acquired a reference 2419 // to the source cache - but otherwise it has no idea that we need 2420 // one. 2421 cache->AcquireRefLocked(); 2422 } 2423 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2424 // we need to map in everything at this point 2425 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2426 // we don't have actual pages to map but a physical area 2427 VMTranslationMap* map 2428 = sourceArea->address_space->TranslationMap(); 2429 map->Lock(); 2430 2431 phys_addr_t physicalAddress; 2432 uint32 oldProtection; 2433 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2434 2435 map->Unlock(); 2436 2437 map = targetAddressSpace->TranslationMap(); 2438 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2439 newArea->Base() + (newArea->Size() - 1)); 2440 2441 vm_page_reservation reservation; 2442 vm_page_reserve_pages(&reservation, reservePages, 2443 targetAddressSpace == VMAddressSpace::Kernel() 2444 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2445 map->Lock(); 2446 2447 for (addr_t offset = 0; offset < newArea->Size(); 2448 offset += B_PAGE_SIZE) { 2449 map->Map(newArea->Base() + offset, physicalAddress + offset, 2450 protection, newArea->MemoryType(), &reservation); 2451 } 2452 2453 map->Unlock(); 2454 vm_page_unreserve_pages(&reservation); 2455 } else { 2456 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2457 size_t reservePages = map->MaxPagesNeededToMap( 2458 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2459 vm_page_reservation reservation; 2460 vm_page_reserve_pages(&reservation, reservePages, 2461 targetAddressSpace == VMAddressSpace::Kernel() 2462 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2463 2464 // map in all pages from source 2465 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2466 vm_page* page = it.Next();) { 2467 if (!page->busy) { 2468 DEBUG_PAGE_ACCESS_START(page); 2469 map_page(newArea, page, 2470 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2471 - newArea->cache_offset), 2472 protection, &reservation); 2473 DEBUG_PAGE_ACCESS_END(page); 2474 } 2475 } 2476 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2477 // ensuring that! 2478 2479 vm_page_unreserve_pages(&reservation); 2480 } 2481 } 2482 if (status == B_OK) 2483 newArea->cache_type = sourceArea->cache_type; 2484 2485 vm_area_put_locked_cache(cache); 2486 2487 if (status < B_OK) 2488 return status; 2489 2490 return newArea->id; 2491 } 2492 2493 2494 /*! Deletes the specified area of the given address space. 2495 2496 The address space must be write-locked. 2497 The caller must ensure that the area does not have any wired ranges. 2498 2499 \param addressSpace The address space containing the area. 2500 \param area The area to be deleted. 2501 \param deletingAddressSpace \c true, if the address space is in the process 2502 of being deleted. 2503 */ 2504 static void 2505 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2506 bool deletingAddressSpace) 2507 { 2508 ASSERT(!area->IsWired()); 2509 2510 VMAreas::Remove(area); 2511 2512 // At this point the area is removed from the global hash table, but 2513 // still exists in the area list. 2514 2515 // Unmap the virtual address space the area occupied. 2516 { 2517 // We need to lock the complete cache chain. 2518 VMCache* topCache = vm_area_get_locked_cache(area); 2519 VMCacheChainLocker cacheChainLocker(topCache); 2520 cacheChainLocker.LockAllSourceCaches(); 2521 2522 // If the area's top cache is a temporary cache and the area is the only 2523 // one referencing it (besides us currently holding a second reference), 2524 // the unmapping code doesn't need to care about preserving the accessed 2525 // and dirty flags of the top cache page mappings. 2526 bool ignoreTopCachePageFlags 2527 = topCache->temporary && topCache->RefCount() == 2; 2528 2529 area->address_space->TranslationMap()->UnmapArea(area, 2530 deletingAddressSpace, ignoreTopCachePageFlags); 2531 } 2532 2533 if (!area->cache->temporary) 2534 area->cache->WriteModified(); 2535 2536 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2537 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2538 2539 arch_vm_unset_memory_type(area); 2540 addressSpace->RemoveArea(area, allocationFlags); 2541 addressSpace->Put(); 2542 2543 area->cache->RemoveArea(area); 2544 area->cache->ReleaseRef(); 2545 2546 addressSpace->DeleteArea(area, allocationFlags); 2547 } 2548 2549 2550 status_t 2551 vm_delete_area(team_id team, area_id id, bool kernel) 2552 { 2553 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2554 team, id)); 2555 2556 // lock the address space and make sure the area isn't wired 2557 AddressSpaceWriteLocker locker; 2558 VMArea* area; 2559 AreaCacheLocker cacheLocker; 2560 2561 do { 2562 status_t status = locker.SetFromArea(team, id, area); 2563 if (status != B_OK) 2564 return status; 2565 2566 cacheLocker.SetTo(area); 2567 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2568 2569 cacheLocker.Unlock(); 2570 2571 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2572 return B_NOT_ALLOWED; 2573 2574 delete_area(locker.AddressSpace(), area, false); 2575 return B_OK; 2576 } 2577 2578 2579 /*! Creates a new cache on top of given cache, moves all areas from 2580 the old cache to the new one, and changes the protection of all affected 2581 areas' pages to read-only. If requested, wired pages are moved up to the 2582 new cache and copies are added to the old cache in their place. 2583 Preconditions: 2584 - The given cache must be locked. 2585 - All of the cache's areas' address spaces must be read locked. 2586 - Either the cache must not have any wired ranges or a page reservation for 2587 all wired pages must be provided, so they can be copied. 2588 2589 \param lowerCache The cache on top of which a new cache shall be created. 2590 \param wiredPagesReservation If \c NULL there must not be any wired pages 2591 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2592 has wired page. The wired pages are copied in this case. 2593 */ 2594 static status_t 2595 vm_copy_on_write_area(VMCache* lowerCache, 2596 vm_page_reservation* wiredPagesReservation) 2597 { 2598 VMCache* upperCache; 2599 2600 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2601 2602 // We need to separate the cache from its areas. The cache goes one level 2603 // deeper and we create a new cache inbetween. 2604 2605 // create an anonymous cache 2606 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2607 lowerCache->GuardSize() / B_PAGE_SIZE, 2608 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2609 VM_PRIORITY_USER); 2610 if (status != B_OK) 2611 return status; 2612 2613 upperCache->Lock(); 2614 2615 upperCache->temporary = 1; 2616 upperCache->virtual_base = lowerCache->virtual_base; 2617 upperCache->virtual_end = lowerCache->virtual_end; 2618 2619 // transfer the lower cache areas to the upper cache 2620 rw_lock_write_lock(&sAreaCacheLock); 2621 upperCache->TransferAreas(lowerCache); 2622 rw_lock_write_unlock(&sAreaCacheLock); 2623 2624 lowerCache->AddConsumer(upperCache); 2625 2626 // We now need to remap all pages from all of the cache's areas read-only, 2627 // so that a copy will be created on next write access. If there are wired 2628 // pages, we keep their protection, move them to the upper cache and create 2629 // copies for the lower cache. 2630 if (wiredPagesReservation != NULL) { 2631 // We need to handle wired pages -- iterate through the cache's pages. 2632 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2633 vm_page* page = it.Next();) { 2634 if (page->WiredCount() > 0) { 2635 // allocate a new page and copy the wired one 2636 vm_page* copiedPage = vm_page_allocate_page( 2637 wiredPagesReservation, PAGE_STATE_ACTIVE); 2638 2639 vm_memcpy_physical_page( 2640 copiedPage->physical_page_number * B_PAGE_SIZE, 2641 page->physical_page_number * B_PAGE_SIZE); 2642 2643 // move the wired page to the upper cache (note: removing is OK 2644 // with the SplayTree iterator) and insert the copy 2645 upperCache->MovePage(page); 2646 lowerCache->InsertPage(copiedPage, 2647 page->cache_offset * B_PAGE_SIZE); 2648 2649 DEBUG_PAGE_ACCESS_END(copiedPage); 2650 } else { 2651 // Change the protection of this page in all areas. 2652 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2653 tempArea = tempArea->cache_next) { 2654 if (!is_page_in_area(tempArea, page)) 2655 continue; 2656 2657 // The area must be readable in the same way it was 2658 // previously writable. 2659 addr_t address = virtual_page_address(tempArea, page); 2660 uint32 protection = 0; 2661 uint32 pageProtection = get_area_page_protection(tempArea, address); 2662 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2663 protection |= B_KERNEL_READ_AREA; 2664 if ((pageProtection & B_READ_AREA) != 0) 2665 protection |= B_READ_AREA; 2666 2667 VMTranslationMap* map 2668 = tempArea->address_space->TranslationMap(); 2669 map->Lock(); 2670 map->ProtectPage(tempArea, address, protection); 2671 map->Unlock(); 2672 } 2673 } 2674 } 2675 } else { 2676 ASSERT(lowerCache->WiredPagesCount() == 0); 2677 2678 // just change the protection of all areas 2679 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2680 tempArea = tempArea->cache_next) { 2681 if (tempArea->page_protections != NULL) { 2682 // Change the protection of all pages in this area. 2683 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2684 map->Lock(); 2685 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2686 vm_page* page = it.Next();) { 2687 if (!is_page_in_area(tempArea, page)) 2688 continue; 2689 2690 // The area must be readable in the same way it was 2691 // previously writable. 2692 addr_t address = virtual_page_address(tempArea, page); 2693 uint32 protection = 0; 2694 uint32 pageProtection = get_area_page_protection(tempArea, address); 2695 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2696 protection |= B_KERNEL_READ_AREA; 2697 if ((pageProtection & B_READ_AREA) != 0) 2698 protection |= B_READ_AREA; 2699 2700 map->ProtectPage(tempArea, address, protection); 2701 } 2702 map->Unlock(); 2703 continue; 2704 } 2705 // The area must be readable in the same way it was previously 2706 // writable. 2707 uint32 protection = 0; 2708 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0) 2709 protection |= B_KERNEL_READ_AREA; 2710 if ((tempArea->protection & B_READ_AREA) != 0) 2711 protection |= B_READ_AREA; 2712 2713 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2714 map->Lock(); 2715 map->ProtectArea(tempArea, protection); 2716 map->Unlock(); 2717 } 2718 } 2719 2720 vm_area_put_locked_cache(upperCache); 2721 2722 return B_OK; 2723 } 2724 2725 2726 area_id 2727 vm_copy_area(team_id team, const char* name, void** _address, 2728 uint32 addressSpec, area_id sourceID) 2729 { 2730 // Do the locking: target address space, all address spaces associated with 2731 // the source cache, and the cache itself. 2732 MultiAddressSpaceLocker locker; 2733 VMAddressSpace* targetAddressSpace; 2734 VMCache* cache; 2735 VMArea* source; 2736 AreaCacheLocker cacheLocker; 2737 status_t status; 2738 bool sharedArea; 2739 2740 page_num_t wiredPages = 0; 2741 vm_page_reservation wiredPagesReservation; 2742 2743 bool restart; 2744 do { 2745 restart = false; 2746 2747 locker.Unset(); 2748 status = locker.AddTeam(team, true, &targetAddressSpace); 2749 if (status == B_OK) { 2750 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2751 &cache); 2752 } 2753 if (status != B_OK) 2754 return status; 2755 2756 cacheLocker.SetTo(cache, true); // already locked 2757 2758 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2759 2760 page_num_t oldWiredPages = wiredPages; 2761 wiredPages = 0; 2762 2763 // If the source area isn't shared, count the number of wired pages in 2764 // the cache and reserve as many pages. 2765 if (!sharedArea) { 2766 wiredPages = cache->WiredPagesCount(); 2767 2768 if (wiredPages > oldWiredPages) { 2769 cacheLocker.Unlock(); 2770 locker.Unlock(); 2771 2772 if (oldWiredPages > 0) 2773 vm_page_unreserve_pages(&wiredPagesReservation); 2774 2775 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2776 VM_PRIORITY_USER); 2777 2778 restart = true; 2779 } 2780 } else if (oldWiredPages > 0) 2781 vm_page_unreserve_pages(&wiredPagesReservation); 2782 } while (restart); 2783 2784 // unreserve pages later 2785 struct PagesUnreserver { 2786 PagesUnreserver(vm_page_reservation* reservation) 2787 : 2788 fReservation(reservation) 2789 { 2790 } 2791 2792 ~PagesUnreserver() 2793 { 2794 if (fReservation != NULL) 2795 vm_page_unreserve_pages(fReservation); 2796 } 2797 2798 private: 2799 vm_page_reservation* fReservation; 2800 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2801 2802 bool writableCopy 2803 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2804 uint8* targetPageProtections = NULL; 2805 2806 if (source->page_protections != NULL) { 2807 size_t bytes = area_page_protections_size(source->Size()); 2808 targetPageProtections = (uint8*)malloc_etc(bytes, 2809 (source->address_space == VMAddressSpace::Kernel() 2810 || targetAddressSpace == VMAddressSpace::Kernel()) 2811 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2812 if (targetPageProtections == NULL) 2813 return B_NO_MEMORY; 2814 2815 memcpy(targetPageProtections, source->page_protections, bytes); 2816 2817 if (!writableCopy) { 2818 for (size_t i = 0; i < bytes; i++) { 2819 if ((targetPageProtections[i] 2820 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2821 writableCopy = true; 2822 break; 2823 } 2824 } 2825 } 2826 } 2827 2828 if (addressSpec == B_CLONE_ADDRESS) { 2829 addressSpec = B_EXACT_ADDRESS; 2830 *_address = (void*)source->Base(); 2831 } 2832 2833 // First, create a cache on top of the source area, respectively use the 2834 // existing one, if this is a shared area. 2835 2836 VMArea* target; 2837 virtual_address_restrictions addressRestrictions = {}; 2838 addressRestrictions.address = *_address; 2839 addressRestrictions.address_specification = addressSpec; 2840 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2841 name, source->Size(), source->wiring, source->protection, 2842 source->protection_max, 2843 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2844 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2845 &addressRestrictions, true, &target, _address); 2846 if (status < B_OK) { 2847 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2848 return status; 2849 } 2850 2851 if (targetPageProtections != NULL) 2852 target->page_protections = targetPageProtections; 2853 2854 if (sharedArea) { 2855 // The new area uses the old area's cache, but map_backing_store() 2856 // hasn't acquired a ref. So we have to do that now. 2857 cache->AcquireRefLocked(); 2858 } 2859 2860 // If the source area is writable, we need to move it one layer up as well 2861 2862 if (!sharedArea) { 2863 if (writableCopy) { 2864 // TODO: do something more useful if this fails! 2865 if (vm_copy_on_write_area(cache, 2866 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2867 panic("vm_copy_on_write_area() failed!\n"); 2868 } 2869 } 2870 } 2871 2872 // we return the ID of the newly created area 2873 return target->id; 2874 } 2875 2876 2877 status_t 2878 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2879 bool kernel) 2880 { 2881 fix_protection(&newProtection); 2882 2883 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2884 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2885 2886 if (!arch_vm_supports_protection(newProtection)) 2887 return B_NOT_SUPPORTED; 2888 2889 bool becomesWritable 2890 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2891 2892 // lock address spaces and cache 2893 MultiAddressSpaceLocker locker; 2894 VMCache* cache; 2895 VMArea* area; 2896 status_t status; 2897 AreaCacheLocker cacheLocker; 2898 bool isWritable; 2899 2900 bool restart; 2901 do { 2902 restart = false; 2903 2904 locker.Unset(); 2905 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2906 if (status != B_OK) 2907 return status; 2908 2909 cacheLocker.SetTo(cache, true); // already locked 2910 2911 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2912 || (area->protection & B_KERNEL_AREA) != 0)) { 2913 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2914 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2915 " (%s)\n", team, newProtection, areaID, area->name); 2916 return B_NOT_ALLOWED; 2917 } 2918 if (!kernel && area->protection_max != 0 2919 && (newProtection & area->protection_max) 2920 != (newProtection & B_USER_PROTECTION)) { 2921 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2922 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2923 "area %" B_PRId32 " (%s)\n", team, newProtection, 2924 area->protection_max, areaID, area->name); 2925 return B_NOT_ALLOWED; 2926 } 2927 2928 if (team != VMAddressSpace::KernelID() 2929 && area->address_space->ID() != team) { 2930 // unless you're the kernel, you are only allowed to set 2931 // the protection of your own areas 2932 return B_NOT_ALLOWED; 2933 } 2934 2935 if (area->protection == newProtection) 2936 return B_OK; 2937 2938 isWritable 2939 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2940 2941 // Make sure the area (respectively, if we're going to call 2942 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2943 // wired ranges. 2944 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2945 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2946 otherArea = otherArea->cache_next) { 2947 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2948 restart = true; 2949 break; 2950 } 2951 } 2952 } else { 2953 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2954 restart = true; 2955 } 2956 } while (restart); 2957 2958 bool changePageProtection = true; 2959 bool changeTopCachePagesOnly = false; 2960 2961 if (isWritable && !becomesWritable) { 2962 // writable -> !writable 2963 2964 if (cache->source != NULL && cache->temporary) { 2965 if (cache->CountWritableAreas(area) == 0) { 2966 // Since this cache now lives from the pages in its source cache, 2967 // we can change the cache's commitment to take only those pages 2968 // into account that really are in this cache. 2969 2970 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2971 team == VMAddressSpace::KernelID() 2972 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2973 2974 // TODO: we may be able to join with our source cache, if 2975 // count == 0 2976 } 2977 } 2978 2979 // If only the writability changes, we can just remap the pages of the 2980 // top cache, since the pages of lower caches are mapped read-only 2981 // anyway. That's advantageous only, if the number of pages in the cache 2982 // is significantly smaller than the number of pages in the area, 2983 // though. 2984 if (newProtection 2985 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2986 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2987 changeTopCachePagesOnly = true; 2988 } 2989 } else if (!isWritable && becomesWritable) { 2990 // !writable -> writable 2991 2992 if (!cache->consumers.IsEmpty()) { 2993 // There are consumers -- we have to insert a new cache. Fortunately 2994 // vm_copy_on_write_area() does everything that's needed. 2995 changePageProtection = false; 2996 status = vm_copy_on_write_area(cache, NULL); 2997 } else { 2998 // No consumers, so we don't need to insert a new one. 2999 if (cache->source != NULL && cache->temporary) { 3000 // the cache's commitment must contain all possible pages 3001 status = cache->Commit(cache->virtual_end - cache->virtual_base, 3002 team == VMAddressSpace::KernelID() 3003 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 3004 } 3005 3006 if (status == B_OK && cache->source != NULL) { 3007 // There's a source cache, hence we can't just change all pages' 3008 // protection or we might allow writing into pages belonging to 3009 // a lower cache. 3010 changeTopCachePagesOnly = true; 3011 } 3012 } 3013 } else { 3014 // we don't have anything special to do in all other cases 3015 } 3016 3017 if (status == B_OK) { 3018 // remap existing pages in this cache 3019 if (changePageProtection) { 3020 VMTranslationMap* map = area->address_space->TranslationMap(); 3021 map->Lock(); 3022 3023 if (changeTopCachePagesOnly) { 3024 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 3025 page_num_t lastPageOffset 3026 = firstPageOffset + area->Size() / B_PAGE_SIZE; 3027 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3028 vm_page* page = it.Next();) { 3029 if (page->cache_offset >= firstPageOffset 3030 && page->cache_offset <= lastPageOffset) { 3031 addr_t address = virtual_page_address(area, page); 3032 map->ProtectPage(area, address, newProtection); 3033 } 3034 } 3035 } else 3036 map->ProtectArea(area, newProtection); 3037 3038 map->Unlock(); 3039 } 3040 3041 area->protection = newProtection; 3042 } 3043 3044 return status; 3045 } 3046 3047 3048 status_t 3049 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 3050 { 3051 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 3052 if (addressSpace == NULL) 3053 return B_BAD_TEAM_ID; 3054 3055 VMTranslationMap* map = addressSpace->TranslationMap(); 3056 3057 map->Lock(); 3058 uint32 dummyFlags; 3059 status_t status = map->Query(vaddr, paddr, &dummyFlags); 3060 map->Unlock(); 3061 3062 addressSpace->Put(); 3063 return status; 3064 } 3065 3066 3067 /*! The page's cache must be locked. 3068 */ 3069 bool 3070 vm_test_map_modification(vm_page* page) 3071 { 3072 if (page->modified) 3073 return true; 3074 3075 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3076 vm_page_mapping* mapping; 3077 while ((mapping = iterator.Next()) != NULL) { 3078 VMArea* area = mapping->area; 3079 VMTranslationMap* map = area->address_space->TranslationMap(); 3080 3081 phys_addr_t physicalAddress; 3082 uint32 flags; 3083 map->Lock(); 3084 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 3085 map->Unlock(); 3086 3087 if ((flags & PAGE_MODIFIED) != 0) 3088 return true; 3089 } 3090 3091 return false; 3092 } 3093 3094 3095 /*! The page's cache must be locked. 3096 */ 3097 void 3098 vm_clear_map_flags(vm_page* page, uint32 flags) 3099 { 3100 if ((flags & PAGE_ACCESSED) != 0) 3101 page->accessed = false; 3102 if ((flags & PAGE_MODIFIED) != 0) 3103 page->modified = false; 3104 3105 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3106 vm_page_mapping* mapping; 3107 while ((mapping = iterator.Next()) != NULL) { 3108 VMArea* area = mapping->area; 3109 VMTranslationMap* map = area->address_space->TranslationMap(); 3110 3111 map->Lock(); 3112 map->ClearFlags(virtual_page_address(area, page), flags); 3113 map->Unlock(); 3114 } 3115 } 3116 3117 3118 /*! Removes all mappings from a page. 3119 After you've called this function, the page is unmapped from memory and 3120 the page's \c accessed and \c modified flags have been updated according 3121 to the state of the mappings. 3122 The page's cache must be locked. 3123 */ 3124 void 3125 vm_remove_all_page_mappings(vm_page* page) 3126 { 3127 while (vm_page_mapping* mapping = page->mappings.Head()) { 3128 VMArea* area = mapping->area; 3129 VMTranslationMap* map = area->address_space->TranslationMap(); 3130 addr_t address = virtual_page_address(area, page); 3131 map->UnmapPage(area, address, false); 3132 } 3133 } 3134 3135 3136 int32 3137 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 3138 { 3139 int32 count = 0; 3140 3141 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3142 vm_page_mapping* mapping; 3143 while ((mapping = iterator.Next()) != NULL) { 3144 VMArea* area = mapping->area; 3145 VMTranslationMap* map = area->address_space->TranslationMap(); 3146 3147 bool modified; 3148 if (map->ClearAccessedAndModified(area, 3149 virtual_page_address(area, page), false, modified)) { 3150 count++; 3151 } 3152 3153 page->modified |= modified; 3154 } 3155 3156 3157 if (page->accessed) { 3158 count++; 3159 page->accessed = false; 3160 } 3161 3162 return count; 3163 } 3164 3165 3166 /*! Removes all mappings of a page and/or clears the accessed bits of the 3167 mappings. 3168 The function iterates through the page mappings and removes them until 3169 encountering one that has been accessed. From then on it will continue to 3170 iterate, but only clear the accessed flag of the mapping. The page's 3171 \c modified bit will be updated accordingly, the \c accessed bit will be 3172 cleared. 3173 \return The number of mapping accessed bits encountered, including the 3174 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3175 of the page have been removed. 3176 */ 3177 int32 3178 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3179 { 3180 ASSERT(page->WiredCount() == 0); 3181 3182 if (page->accessed) 3183 return vm_clear_page_mapping_accessed_flags(page); 3184 3185 while (vm_page_mapping* mapping = page->mappings.Head()) { 3186 VMArea* area = mapping->area; 3187 VMTranslationMap* map = area->address_space->TranslationMap(); 3188 addr_t address = virtual_page_address(area, page); 3189 bool modified = false; 3190 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3191 page->accessed = true; 3192 page->modified |= modified; 3193 return vm_clear_page_mapping_accessed_flags(page); 3194 } 3195 page->modified |= modified; 3196 } 3197 3198 return 0; 3199 } 3200 3201 3202 static int 3203 display_mem(int argc, char** argv) 3204 { 3205 bool physical = false; 3206 addr_t copyAddress; 3207 int32 displayWidth; 3208 int32 itemSize; 3209 int32 num = -1; 3210 addr_t address; 3211 int i = 1, j; 3212 3213 if (argc > 1 && argv[1][0] == '-') { 3214 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3215 physical = true; 3216 i++; 3217 } else 3218 i = 99; 3219 } 3220 3221 if (argc < i + 1 || argc > i + 2) { 3222 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3223 "\tdl - 8 bytes\n" 3224 "\tdw - 4 bytes\n" 3225 "\tds - 2 bytes\n" 3226 "\tdb - 1 byte\n" 3227 "\tstring - a whole string\n" 3228 " -p or --physical only allows memory from a single page to be " 3229 "displayed.\n"); 3230 return 0; 3231 } 3232 3233 address = parse_expression(argv[i]); 3234 3235 if (argc > i + 1) 3236 num = parse_expression(argv[i + 1]); 3237 3238 // build the format string 3239 if (strcmp(argv[0], "db") == 0) { 3240 itemSize = 1; 3241 displayWidth = 16; 3242 } else if (strcmp(argv[0], "ds") == 0) { 3243 itemSize = 2; 3244 displayWidth = 8; 3245 } else if (strcmp(argv[0], "dw") == 0) { 3246 itemSize = 4; 3247 displayWidth = 4; 3248 } else if (strcmp(argv[0], "dl") == 0) { 3249 itemSize = 8; 3250 displayWidth = 2; 3251 } else if (strcmp(argv[0], "string") == 0) { 3252 itemSize = 1; 3253 displayWidth = -1; 3254 } else { 3255 kprintf("display_mem called in an invalid way!\n"); 3256 return 0; 3257 } 3258 3259 if (num <= 0) 3260 num = displayWidth; 3261 3262 void* physicalPageHandle = NULL; 3263 3264 if (physical) { 3265 int32 offset = address & (B_PAGE_SIZE - 1); 3266 if (num * itemSize + offset > B_PAGE_SIZE) { 3267 num = (B_PAGE_SIZE - offset) / itemSize; 3268 kprintf("NOTE: number of bytes has been cut to page size\n"); 3269 } 3270 3271 address = ROUNDDOWN(address, B_PAGE_SIZE); 3272 3273 if (vm_get_physical_page_debug(address, ©Address, 3274 &physicalPageHandle) != B_OK) { 3275 kprintf("getting the hardware page failed."); 3276 return 0; 3277 } 3278 3279 address += offset; 3280 copyAddress += offset; 3281 } else 3282 copyAddress = address; 3283 3284 if (!strcmp(argv[0], "string")) { 3285 kprintf("%p \"", (char*)copyAddress); 3286 3287 // string mode 3288 for (i = 0; true; i++) { 3289 char c; 3290 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3291 != B_OK 3292 || c == '\0') { 3293 break; 3294 } 3295 3296 if (c == '\n') 3297 kprintf("\\n"); 3298 else if (c == '\t') 3299 kprintf("\\t"); 3300 else { 3301 if (!isprint(c)) 3302 c = '.'; 3303 3304 kprintf("%c", c); 3305 } 3306 } 3307 3308 kprintf("\"\n"); 3309 } else { 3310 // number mode 3311 for (i = 0; i < num; i++) { 3312 uint64 value; 3313 3314 if ((i % displayWidth) == 0) { 3315 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3316 if (i != 0) 3317 kprintf("\n"); 3318 3319 kprintf("[0x%lx] ", address + i * itemSize); 3320 3321 for (j = 0; j < displayed; j++) { 3322 char c; 3323 if (debug_memcpy(B_CURRENT_TEAM, &c, 3324 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3325 displayed = j; 3326 break; 3327 } 3328 if (!isprint(c)) 3329 c = '.'; 3330 3331 kprintf("%c", c); 3332 } 3333 if (num > displayWidth) { 3334 // make sure the spacing in the last line is correct 3335 for (j = displayed; j < displayWidth * itemSize; j++) 3336 kprintf(" "); 3337 } 3338 kprintf(" "); 3339 } 3340 3341 if (debug_memcpy(B_CURRENT_TEAM, &value, 3342 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3343 kprintf("read fault"); 3344 break; 3345 } 3346 3347 switch (itemSize) { 3348 case 1: 3349 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3350 break; 3351 case 2: 3352 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3353 break; 3354 case 4: 3355 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3356 break; 3357 case 8: 3358 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3359 break; 3360 } 3361 } 3362 3363 kprintf("\n"); 3364 } 3365 3366 if (physical) { 3367 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3368 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3369 } 3370 return 0; 3371 } 3372 3373 3374 static void 3375 dump_cache_tree_recursively(VMCache* cache, int level, 3376 VMCache* highlightCache) 3377 { 3378 // print this cache 3379 for (int i = 0; i < level; i++) 3380 kprintf(" "); 3381 if (cache == highlightCache) 3382 kprintf("%p <--\n", cache); 3383 else 3384 kprintf("%p\n", cache); 3385 3386 // recursively print its consumers 3387 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3388 VMCache* consumer = it.Next();) { 3389 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3390 } 3391 } 3392 3393 3394 static int 3395 dump_cache_tree(int argc, char** argv) 3396 { 3397 if (argc != 2 || !strcmp(argv[1], "--help")) { 3398 kprintf("usage: %s <address>\n", argv[0]); 3399 return 0; 3400 } 3401 3402 addr_t address = parse_expression(argv[1]); 3403 if (address == 0) 3404 return 0; 3405 3406 VMCache* cache = (VMCache*)address; 3407 VMCache* root = cache; 3408 3409 // find the root cache (the transitive source) 3410 while (root->source != NULL) 3411 root = root->source; 3412 3413 dump_cache_tree_recursively(root, 0, cache); 3414 3415 return 0; 3416 } 3417 3418 3419 const char* 3420 vm_cache_type_to_string(int32 type) 3421 { 3422 switch (type) { 3423 case CACHE_TYPE_RAM: 3424 return "RAM"; 3425 case CACHE_TYPE_DEVICE: 3426 return "device"; 3427 case CACHE_TYPE_VNODE: 3428 return "vnode"; 3429 case CACHE_TYPE_NULL: 3430 return "null"; 3431 3432 default: 3433 return "unknown"; 3434 } 3435 } 3436 3437 3438 #if DEBUG_CACHE_LIST 3439 3440 static void 3441 update_cache_info_recursively(VMCache* cache, cache_info& info) 3442 { 3443 info.page_count += cache->page_count; 3444 if (cache->type == CACHE_TYPE_RAM) 3445 info.committed += cache->committed_size; 3446 3447 // recurse 3448 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3449 VMCache* consumer = it.Next();) { 3450 update_cache_info_recursively(consumer, info); 3451 } 3452 } 3453 3454 3455 static int 3456 cache_info_compare_page_count(const void* _a, const void* _b) 3457 { 3458 const cache_info* a = (const cache_info*)_a; 3459 const cache_info* b = (const cache_info*)_b; 3460 if (a->page_count == b->page_count) 3461 return 0; 3462 return a->page_count < b->page_count ? 1 : -1; 3463 } 3464 3465 3466 static int 3467 cache_info_compare_committed(const void* _a, const void* _b) 3468 { 3469 const cache_info* a = (const cache_info*)_a; 3470 const cache_info* b = (const cache_info*)_b; 3471 if (a->committed == b->committed) 3472 return 0; 3473 return a->committed < b->committed ? 1 : -1; 3474 } 3475 3476 3477 static void 3478 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3479 { 3480 for (int i = 0; i < level; i++) 3481 kprintf(" "); 3482 3483 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3484 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3485 cache->virtual_base, cache->virtual_end, cache->page_count); 3486 3487 if (level == 0) 3488 kprintf("/%lu", info.page_count); 3489 3490 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3491 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3492 3493 if (level == 0) 3494 kprintf("/%lu", info.committed); 3495 } 3496 3497 // areas 3498 if (cache->areas != NULL) { 3499 VMArea* area = cache->areas; 3500 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3501 area->name, area->address_space->ID()); 3502 3503 while (area->cache_next != NULL) { 3504 area = area->cache_next; 3505 kprintf(", %" B_PRId32, area->id); 3506 } 3507 } 3508 3509 kputs("\n"); 3510 3511 // recurse 3512 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3513 VMCache* consumer = it.Next();) { 3514 dump_caches_recursively(consumer, info, level + 1); 3515 } 3516 } 3517 3518 3519 static int 3520 dump_caches(int argc, char** argv) 3521 { 3522 if (sCacheInfoTable == NULL) { 3523 kprintf("No cache info table!\n"); 3524 return 0; 3525 } 3526 3527 bool sortByPageCount = true; 3528 3529 for (int32 i = 1; i < argc; i++) { 3530 if (strcmp(argv[i], "-c") == 0) { 3531 sortByPageCount = false; 3532 } else { 3533 print_debugger_command_usage(argv[0]); 3534 return 0; 3535 } 3536 } 3537 3538 uint32 totalCount = 0; 3539 uint32 rootCount = 0; 3540 off_t totalCommitted = 0; 3541 page_num_t totalPages = 0; 3542 3543 VMCache* cache = gDebugCacheList; 3544 while (cache) { 3545 totalCount++; 3546 if (cache->source == NULL) { 3547 cache_info stackInfo; 3548 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3549 ? sCacheInfoTable[rootCount] : stackInfo; 3550 rootCount++; 3551 info.cache = cache; 3552 info.page_count = 0; 3553 info.committed = 0; 3554 update_cache_info_recursively(cache, info); 3555 totalCommitted += info.committed; 3556 totalPages += info.page_count; 3557 } 3558 3559 cache = cache->debug_next; 3560 } 3561 3562 if (rootCount <= (uint32)kCacheInfoTableCount) { 3563 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3564 sortByPageCount 3565 ? &cache_info_compare_page_count 3566 : &cache_info_compare_committed); 3567 } 3568 3569 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3570 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3571 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3572 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3573 "page count" : "committed size"); 3574 3575 if (rootCount <= (uint32)kCacheInfoTableCount) { 3576 for (uint32 i = 0; i < rootCount; i++) { 3577 cache_info& info = sCacheInfoTable[i]; 3578 dump_caches_recursively(info.cache, info, 0); 3579 } 3580 } else 3581 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3582 3583 return 0; 3584 } 3585 3586 #endif // DEBUG_CACHE_LIST 3587 3588 3589 static int 3590 dump_cache(int argc, char** argv) 3591 { 3592 VMCache* cache; 3593 bool showPages = false; 3594 int i = 1; 3595 3596 if (argc < 2 || !strcmp(argv[1], "--help")) { 3597 kprintf("usage: %s [-ps] <address>\n" 3598 " if -p is specified, all pages are shown, if -s is used\n" 3599 " only the cache info is shown respectively.\n", argv[0]); 3600 return 0; 3601 } 3602 while (argv[i][0] == '-') { 3603 char* arg = argv[i] + 1; 3604 while (arg[0]) { 3605 if (arg[0] == 'p') 3606 showPages = true; 3607 arg++; 3608 } 3609 i++; 3610 } 3611 if (argv[i] == NULL) { 3612 kprintf("%s: invalid argument, pass address\n", argv[0]); 3613 return 0; 3614 } 3615 3616 addr_t address = parse_expression(argv[i]); 3617 if (address == 0) 3618 return 0; 3619 3620 cache = (VMCache*)address; 3621 3622 cache->Dump(showPages); 3623 3624 set_debug_variable("_sourceCache", (addr_t)cache->source); 3625 3626 return 0; 3627 } 3628 3629 3630 static void 3631 dump_area_struct(VMArea* area, bool mappings) 3632 { 3633 kprintf("AREA: %p\n", area); 3634 kprintf("name:\t\t'%s'\n", area->name); 3635 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3636 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3637 kprintf("base:\t\t0x%lx\n", area->Base()); 3638 kprintf("size:\t\t0x%lx\n", area->Size()); 3639 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3640 kprintf("page_protection:%p\n", area->page_protections); 3641 kprintf("wiring:\t\t0x%x\n", area->wiring); 3642 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3643 kprintf("cache:\t\t%p\n", area->cache); 3644 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3645 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3646 kprintf("cache_next:\t%p\n", area->cache_next); 3647 kprintf("cache_prev:\t%p\n", area->cache_prev); 3648 3649 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3650 if (mappings) { 3651 kprintf("page mappings:\n"); 3652 while (iterator.HasNext()) { 3653 vm_page_mapping* mapping = iterator.Next(); 3654 kprintf(" %p", mapping->page); 3655 } 3656 kprintf("\n"); 3657 } else { 3658 uint32 count = 0; 3659 while (iterator.Next() != NULL) { 3660 count++; 3661 } 3662 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3663 } 3664 } 3665 3666 3667 static int 3668 dump_area(int argc, char** argv) 3669 { 3670 bool mappings = false; 3671 bool found = false; 3672 int32 index = 1; 3673 VMArea* area; 3674 addr_t num; 3675 3676 if (argc < 2 || !strcmp(argv[1], "--help")) { 3677 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3678 "All areas matching either id/address/name are listed. You can\n" 3679 "force to check only a specific item by prefixing the specifier\n" 3680 "with the id/contains/address/name keywords.\n" 3681 "-m shows the area's mappings as well.\n"); 3682 return 0; 3683 } 3684 3685 if (!strcmp(argv[1], "-m")) { 3686 mappings = true; 3687 index++; 3688 } 3689 3690 int32 mode = 0xf; 3691 if (!strcmp(argv[index], "id")) 3692 mode = 1; 3693 else if (!strcmp(argv[index], "contains")) 3694 mode = 2; 3695 else if (!strcmp(argv[index], "name")) 3696 mode = 4; 3697 else if (!strcmp(argv[index], "address")) 3698 mode = 0; 3699 if (mode != 0xf) 3700 index++; 3701 3702 if (index >= argc) { 3703 kprintf("No area specifier given.\n"); 3704 return 0; 3705 } 3706 3707 num = parse_expression(argv[index]); 3708 3709 if (mode == 0) { 3710 dump_area_struct((struct VMArea*)num, mappings); 3711 } else { 3712 // walk through the area list, looking for the arguments as a name 3713 3714 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3715 while ((area = it.Next()) != NULL) { 3716 if (((mode & 4) != 0 3717 && !strcmp(argv[index], area->name)) 3718 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3719 || (((mode & 2) != 0 && area->Base() <= num 3720 && area->Base() + area->Size() > num))))) { 3721 dump_area_struct(area, mappings); 3722 found = true; 3723 } 3724 } 3725 3726 if (!found) 3727 kprintf("could not find area %s (%ld)\n", argv[index], num); 3728 } 3729 3730 return 0; 3731 } 3732 3733 3734 static int 3735 dump_area_list(int argc, char** argv) 3736 { 3737 VMArea* area; 3738 const char* name = NULL; 3739 int32 id = 0; 3740 3741 if (argc > 1) { 3742 id = parse_expression(argv[1]); 3743 if (id == 0) 3744 name = argv[1]; 3745 } 3746 3747 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3748 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3749 B_PRINTF_POINTER_WIDTH, "size"); 3750 3751 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3752 while ((area = it.Next()) != NULL) { 3753 if ((id != 0 && area->address_space->ID() != id) 3754 || (name != NULL && strstr(area->name, name) == NULL)) 3755 continue; 3756 3757 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3758 area->id, (void*)area->Base(), (void*)area->Size(), 3759 area->protection, area->wiring, area->name); 3760 } 3761 return 0; 3762 } 3763 3764 3765 static int 3766 dump_available_memory(int argc, char** argv) 3767 { 3768 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3769 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3770 return 0; 3771 } 3772 3773 3774 static int 3775 dump_mapping_info(int argc, char** argv) 3776 { 3777 bool reverseLookup = false; 3778 bool pageLookup = false; 3779 3780 int argi = 1; 3781 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3782 const char* arg = argv[argi]; 3783 if (strcmp(arg, "-r") == 0) { 3784 reverseLookup = true; 3785 } else if (strcmp(arg, "-p") == 0) { 3786 reverseLookup = true; 3787 pageLookup = true; 3788 } else { 3789 print_debugger_command_usage(argv[0]); 3790 return 0; 3791 } 3792 } 3793 3794 // We need at least one argument, the address. Optionally a thread ID can be 3795 // specified. 3796 if (argi >= argc || argi + 2 < argc) { 3797 print_debugger_command_usage(argv[0]); 3798 return 0; 3799 } 3800 3801 uint64 addressValue; 3802 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3803 return 0; 3804 3805 Team* team = NULL; 3806 if (argi < argc) { 3807 uint64 threadID; 3808 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3809 return 0; 3810 3811 Thread* thread = Thread::GetDebug(threadID); 3812 if (thread == NULL) { 3813 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3814 return 0; 3815 } 3816 3817 team = thread->team; 3818 } 3819 3820 if (reverseLookup) { 3821 phys_addr_t physicalAddress; 3822 if (pageLookup) { 3823 vm_page* page = (vm_page*)(addr_t)addressValue; 3824 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3825 } else { 3826 physicalAddress = (phys_addr_t)addressValue; 3827 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3828 } 3829 3830 kprintf(" Team Virtual Address Area\n"); 3831 kprintf("--------------------------------------\n"); 3832 3833 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3834 Callback() 3835 : 3836 fAddressSpace(NULL) 3837 { 3838 } 3839 3840 void SetAddressSpace(VMAddressSpace* addressSpace) 3841 { 3842 fAddressSpace = addressSpace; 3843 } 3844 3845 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3846 { 3847 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3848 virtualAddress); 3849 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3850 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3851 else 3852 kprintf("\n"); 3853 return false; 3854 } 3855 3856 private: 3857 VMAddressSpace* fAddressSpace; 3858 } callback; 3859 3860 if (team != NULL) { 3861 // team specified -- get its address space 3862 VMAddressSpace* addressSpace = team->address_space; 3863 if (addressSpace == NULL) { 3864 kprintf("Failed to get address space!\n"); 3865 return 0; 3866 } 3867 3868 callback.SetAddressSpace(addressSpace); 3869 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3870 physicalAddress, callback); 3871 } else { 3872 // no team specified -- iterate through all address spaces 3873 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3874 addressSpace != NULL; 3875 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3876 callback.SetAddressSpace(addressSpace); 3877 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3878 physicalAddress, callback); 3879 } 3880 } 3881 } else { 3882 // get the address space 3883 addr_t virtualAddress = (addr_t)addressValue; 3884 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3885 VMAddressSpace* addressSpace; 3886 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3887 addressSpace = VMAddressSpace::Kernel(); 3888 } else if (team != NULL) { 3889 addressSpace = team->address_space; 3890 } else { 3891 Thread* thread = debug_get_debugged_thread(); 3892 if (thread == NULL || thread->team == NULL) { 3893 kprintf("Failed to get team!\n"); 3894 return 0; 3895 } 3896 3897 addressSpace = thread->team->address_space; 3898 } 3899 3900 if (addressSpace == NULL) { 3901 kprintf("Failed to get address space!\n"); 3902 return 0; 3903 } 3904 3905 // let the translation map implementation do the job 3906 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3907 } 3908 3909 return 0; 3910 } 3911 3912 3913 /*! Deletes all areas and reserved regions in the given address space. 3914 3915 The caller must ensure that none of the areas has any wired ranges. 3916 3917 \param addressSpace The address space. 3918 \param deletingAddressSpace \c true, if the address space is in the process 3919 of being deleted. 3920 */ 3921 void 3922 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3923 { 3924 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3925 addressSpace->ID())); 3926 3927 addressSpace->WriteLock(); 3928 3929 // remove all reserved areas in this address space 3930 addressSpace->UnreserveAllAddressRanges(0); 3931 3932 // delete all the areas in this address space 3933 while (VMArea* area = addressSpace->FirstArea()) { 3934 ASSERT(!area->IsWired()); 3935 delete_area(addressSpace, area, deletingAddressSpace); 3936 } 3937 3938 addressSpace->WriteUnlock(); 3939 } 3940 3941 3942 static area_id 3943 vm_area_for(addr_t address, bool kernel) 3944 { 3945 team_id team; 3946 if (IS_USER_ADDRESS(address)) { 3947 // we try the user team address space, if any 3948 team = VMAddressSpace::CurrentID(); 3949 if (team < 0) 3950 return team; 3951 } else 3952 team = VMAddressSpace::KernelID(); 3953 3954 AddressSpaceReadLocker locker(team); 3955 if (!locker.IsLocked()) 3956 return B_BAD_TEAM_ID; 3957 3958 VMArea* area = locker.AddressSpace()->LookupArea(address); 3959 if (area != NULL) { 3960 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0 3961 && (area->protection & B_KERNEL_AREA) != 0) 3962 return B_ERROR; 3963 3964 return area->id; 3965 } 3966 3967 return B_ERROR; 3968 } 3969 3970 3971 /*! Frees physical pages that were used during the boot process. 3972 \a end is inclusive. 3973 */ 3974 static void 3975 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3976 { 3977 // free all physical pages in the specified range 3978 3979 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3980 phys_addr_t physicalAddress; 3981 uint32 flags; 3982 3983 if (map->Query(current, &physicalAddress, &flags) == B_OK 3984 && (flags & PAGE_PRESENT) != 0) { 3985 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3986 if (page != NULL && page->State() != PAGE_STATE_FREE 3987 && page->State() != PAGE_STATE_CLEAR 3988 && page->State() != PAGE_STATE_UNUSED) { 3989 DEBUG_PAGE_ACCESS_START(page); 3990 vm_page_set_state(page, PAGE_STATE_FREE); 3991 } 3992 } 3993 } 3994 3995 // unmap the memory 3996 map->Unmap(start, end); 3997 } 3998 3999 4000 void 4001 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 4002 { 4003 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 4004 addr_t end = start + (size - 1); 4005 addr_t lastEnd = start; 4006 4007 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 4008 (void*)start, (void*)end)); 4009 4010 // The areas are sorted in virtual address space order, so 4011 // we just have to find the holes between them that fall 4012 // into the area we should dispose 4013 4014 map->Lock(); 4015 4016 for (VMAddressSpace::AreaIterator it 4017 = VMAddressSpace::Kernel()->GetAreaIterator(); 4018 VMArea* area = it.Next();) { 4019 addr_t areaStart = area->Base(); 4020 addr_t areaEnd = areaStart + (area->Size() - 1); 4021 4022 if (areaEnd < start) 4023 continue; 4024 4025 if (areaStart > end) { 4026 // we are done, the area is already beyond of what we have to free 4027 break; 4028 } 4029 4030 if (areaStart > lastEnd) { 4031 // this is something we can free 4032 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 4033 (void*)areaStart)); 4034 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 4035 } 4036 4037 if (areaEnd >= end) { 4038 lastEnd = areaEnd; 4039 // no +1 to prevent potential overflow 4040 break; 4041 } 4042 4043 lastEnd = areaEnd + 1; 4044 } 4045 4046 if (lastEnd < end) { 4047 // we can also get rid of some space at the end of the area 4048 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 4049 (void*)end)); 4050 unmap_and_free_physical_pages(map, lastEnd, end); 4051 } 4052 4053 map->Unlock(); 4054 } 4055 4056 4057 static void 4058 create_preloaded_image_areas(struct preloaded_image* _image) 4059 { 4060 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 4061 char name[B_OS_NAME_LENGTH]; 4062 void* address; 4063 int32 length; 4064 4065 // use file name to create a good area name 4066 char* fileName = strrchr(image->name, '/'); 4067 if (fileName == NULL) 4068 fileName = image->name; 4069 else 4070 fileName++; 4071 4072 length = strlen(fileName); 4073 // make sure there is enough space for the suffix 4074 if (length > 25) 4075 length = 25; 4076 4077 memcpy(name, fileName, length); 4078 strcpy(name + length, "_text"); 4079 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 4080 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4081 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 4082 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4083 // this will later be remapped read-only/executable by the 4084 // ELF initialization code 4085 4086 strcpy(name + length, "_data"); 4087 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 4088 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4089 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 4090 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4091 } 4092 4093 4094 /*! Frees all previously kernel arguments areas from the kernel_args structure. 4095 Any boot loader resources contained in that arguments must not be accessed 4096 anymore past this point. 4097 */ 4098 void 4099 vm_free_kernel_args(kernel_args* args) 4100 { 4101 uint32 i; 4102 4103 TRACE(("vm_free_kernel_args()\n")); 4104 4105 for (i = 0; i < args->num_kernel_args_ranges; i++) { 4106 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 4107 if (area >= B_OK) 4108 delete_area(area); 4109 } 4110 } 4111 4112 4113 static void 4114 allocate_kernel_args(kernel_args* args) 4115 { 4116 TRACE(("allocate_kernel_args()\n")); 4117 4118 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 4119 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 4120 4121 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 4122 args->kernel_args_range[i].size, B_ALREADY_WIRED, 4123 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4124 } 4125 } 4126 4127 4128 static void 4129 unreserve_boot_loader_ranges(kernel_args* args) 4130 { 4131 TRACE(("unreserve_boot_loader_ranges()\n")); 4132 4133 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4134 vm_unreserve_address_range(VMAddressSpace::KernelID(), 4135 (void*)(addr_t)args->virtual_allocated_range[i].start, 4136 args->virtual_allocated_range[i].size); 4137 } 4138 } 4139 4140 4141 static void 4142 reserve_boot_loader_ranges(kernel_args* args) 4143 { 4144 TRACE(("reserve_boot_loader_ranges()\n")); 4145 4146 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4147 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 4148 4149 // If the address is no kernel address, we just skip it. The 4150 // architecture specific code has to deal with it. 4151 if (!IS_KERNEL_ADDRESS(address)) { 4152 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 4153 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 4154 continue; 4155 } 4156 4157 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 4158 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4159 if (status < B_OK) 4160 panic("could not reserve boot loader ranges\n"); 4161 } 4162 } 4163 4164 4165 static addr_t 4166 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 4167 { 4168 size = PAGE_ALIGN(size); 4169 4170 // find a slot in the virtual allocation addr range 4171 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 4172 // check to see if the space between this one and the last is big enough 4173 addr_t rangeStart = args->virtual_allocated_range[i].start; 4174 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4175 + args->virtual_allocated_range[i - 1].size; 4176 4177 addr_t base = alignment > 0 4178 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4179 4180 if (base >= KERNEL_BASE && base < rangeStart 4181 && rangeStart - base >= size) { 4182 args->virtual_allocated_range[i - 1].size 4183 += base + size - previousRangeEnd; 4184 return base; 4185 } 4186 } 4187 4188 // we hadn't found one between allocation ranges. this is ok. 4189 // see if there's a gap after the last one 4190 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4191 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4192 + args->virtual_allocated_range[lastEntryIndex].size; 4193 addr_t base = alignment > 0 4194 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4195 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4196 args->virtual_allocated_range[lastEntryIndex].size 4197 += base + size - lastRangeEnd; 4198 return base; 4199 } 4200 4201 // see if there's a gap before the first one 4202 addr_t rangeStart = args->virtual_allocated_range[0].start; 4203 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4204 base = rangeStart - size; 4205 if (alignment > 0) 4206 base = ROUNDDOWN(base, alignment); 4207 4208 if (base >= KERNEL_BASE) { 4209 args->virtual_allocated_range[0].start = base; 4210 args->virtual_allocated_range[0].size += rangeStart - base; 4211 return base; 4212 } 4213 } 4214 4215 return 0; 4216 } 4217 4218 4219 static bool 4220 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4221 { 4222 // TODO: horrible brute-force method of determining if the page can be 4223 // allocated 4224 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4225 if (address >= args->physical_memory_range[i].start 4226 && address < args->physical_memory_range[i].start 4227 + args->physical_memory_range[i].size) 4228 return true; 4229 } 4230 return false; 4231 } 4232 4233 4234 page_num_t 4235 vm_allocate_early_physical_page(kernel_args* args) 4236 { 4237 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4238 phys_addr_t nextPage; 4239 4240 nextPage = args->physical_allocated_range[i].start 4241 + args->physical_allocated_range[i].size; 4242 // see if the page after the next allocated paddr run can be allocated 4243 if (i + 1 < args->num_physical_allocated_ranges 4244 && args->physical_allocated_range[i + 1].size != 0) { 4245 // see if the next page will collide with the next allocated range 4246 if (nextPage >= args->physical_allocated_range[i+1].start) 4247 continue; 4248 } 4249 // see if the next physical page fits in the memory block 4250 if (is_page_in_physical_memory_range(args, nextPage)) { 4251 // we got one! 4252 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4253 return nextPage / B_PAGE_SIZE; 4254 } 4255 } 4256 4257 // Expanding upwards didn't work, try going downwards. 4258 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4259 phys_addr_t nextPage; 4260 4261 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4262 // see if the page after the prev allocated paddr run can be allocated 4263 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4264 // see if the next page will collide with the next allocated range 4265 if (nextPage < args->physical_allocated_range[i-1].start 4266 + args->physical_allocated_range[i-1].size) 4267 continue; 4268 } 4269 // see if the next physical page fits in the memory block 4270 if (is_page_in_physical_memory_range(args, nextPage)) { 4271 // we got one! 4272 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4273 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4274 return nextPage / B_PAGE_SIZE; 4275 } 4276 } 4277 4278 return 0; 4279 // could not allocate a block 4280 } 4281 4282 4283 /*! This one uses the kernel_args' physical and virtual memory ranges to 4284 allocate some pages before the VM is completely up. 4285 */ 4286 addr_t 4287 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4288 uint32 attributes, addr_t alignment) 4289 { 4290 if (physicalSize > virtualSize) 4291 physicalSize = virtualSize; 4292 4293 // find the vaddr to allocate at 4294 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4295 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4296 if (virtualBase == 0) { 4297 panic("vm_allocate_early: could not allocate virtual address\n"); 4298 return 0; 4299 } 4300 4301 // map the pages 4302 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4303 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4304 if (physicalAddress == 0) 4305 panic("error allocating early page!\n"); 4306 4307 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4308 4309 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4310 physicalAddress * B_PAGE_SIZE, attributes, 4311 &vm_allocate_early_physical_page); 4312 } 4313 4314 return virtualBase; 4315 } 4316 4317 4318 /*! The main entrance point to initialize the VM. */ 4319 status_t 4320 vm_init(kernel_args* args) 4321 { 4322 struct preloaded_image* image; 4323 void* address; 4324 status_t err = 0; 4325 uint32 i; 4326 4327 TRACE(("vm_init: entry\n")); 4328 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4329 err = arch_vm_init(args); 4330 4331 // initialize some globals 4332 vm_page_init_num_pages(args); 4333 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4334 4335 slab_init(args); 4336 4337 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4338 off_t heapSize = INITIAL_HEAP_SIZE; 4339 // try to accomodate low memory systems 4340 while (heapSize > sAvailableMemory / 8) 4341 heapSize /= 2; 4342 if (heapSize < 1024 * 1024) 4343 panic("vm_init: go buy some RAM please."); 4344 4345 // map in the new heap and initialize it 4346 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4347 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4348 TRACE(("heap at 0x%lx\n", heapBase)); 4349 heap_init(heapBase, heapSize); 4350 #endif 4351 4352 // initialize the free page list and physical page mapper 4353 vm_page_init(args); 4354 4355 // initialize the cache allocators 4356 vm_cache_init(args); 4357 4358 { 4359 status_t error = VMAreas::Init(); 4360 if (error != B_OK) 4361 panic("vm_init: error initializing areas map\n"); 4362 } 4363 4364 VMAddressSpace::Init(); 4365 reserve_boot_loader_ranges(args); 4366 4367 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4368 heap_init_post_area(); 4369 #endif 4370 4371 // Do any further initialization that the architecture dependant layers may 4372 // need now 4373 arch_vm_translation_map_init_post_area(args); 4374 arch_vm_init_post_area(args); 4375 vm_page_init_post_area(args); 4376 slab_init_post_area(); 4377 4378 // allocate areas to represent stuff that already exists 4379 4380 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4381 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4382 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4383 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4384 #endif 4385 4386 allocate_kernel_args(args); 4387 4388 create_preloaded_image_areas(args->kernel_image); 4389 4390 // allocate areas for preloaded images 4391 for (image = args->preloaded_images; image != NULL; image = image->next) 4392 create_preloaded_image_areas(image); 4393 4394 // allocate kernel stacks 4395 for (i = 0; i < args->num_cpus; i++) { 4396 char name[64]; 4397 4398 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4399 address = (void*)args->cpu_kstack[i].start; 4400 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4401 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4402 } 4403 4404 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4405 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4406 4407 #if PARANOID_KERNEL_MALLOC 4408 vm_block_address_range("uninitialized heap memory", 4409 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4410 #endif 4411 #if PARANOID_KERNEL_FREE 4412 vm_block_address_range("freed heap memory", 4413 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4414 #endif 4415 4416 // create the object cache for the page mappings 4417 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4418 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4419 NULL, NULL); 4420 if (gPageMappingsObjectCache == NULL) 4421 panic("failed to create page mappings object cache"); 4422 4423 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4424 4425 #if DEBUG_CACHE_LIST 4426 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4427 virtual_address_restrictions virtualRestrictions = {}; 4428 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4429 physical_address_restrictions physicalRestrictions = {}; 4430 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4431 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4432 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4433 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4434 &physicalRestrictions, (void**)&sCacheInfoTable); 4435 } 4436 #endif // DEBUG_CACHE_LIST 4437 4438 // add some debugger commands 4439 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4440 add_debugger_command("area", &dump_area, 4441 "Dump info about a particular area"); 4442 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4443 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4444 #if DEBUG_CACHE_LIST 4445 if (sCacheInfoTable != NULL) { 4446 add_debugger_command_etc("caches", &dump_caches, 4447 "List all VMCache trees", 4448 "[ \"-c\" ]\n" 4449 "All cache trees are listed sorted in decreasing order by number " 4450 "of\n" 4451 "used pages or, if \"-c\" is specified, by size of committed " 4452 "memory.\n", 4453 0); 4454 } 4455 #endif 4456 add_debugger_command("avail", &dump_available_memory, 4457 "Dump available memory"); 4458 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4459 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4460 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4461 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4462 add_debugger_command("string", &display_mem, "dump strings"); 4463 4464 add_debugger_command_etc("mapping", &dump_mapping_info, 4465 "Print address mapping information", 4466 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4467 "Prints low-level page mapping information for a given address. If\n" 4468 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4469 "address that is looked up in the translation map of the current\n" 4470 "team, respectively the team specified by thread ID <thread ID>. If\n" 4471 "\"-r\" is specified, <address> is a physical address that is\n" 4472 "searched in the translation map of all teams, respectively the team\n" 4473 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4474 "<address> is the address of a vm_page structure. The behavior is\n" 4475 "equivalent to specifying \"-r\" with the physical address of that\n" 4476 "page.\n", 4477 0); 4478 4479 TRACE(("vm_init: exit\n")); 4480 4481 vm_cache_init_post_heap(); 4482 4483 return err; 4484 } 4485 4486 4487 status_t 4488 vm_init_post_sem(kernel_args* args) 4489 { 4490 // This frees all unused boot loader resources and makes its space available 4491 // again 4492 arch_vm_init_end(args); 4493 unreserve_boot_loader_ranges(args); 4494 4495 // fill in all of the semaphores that were not allocated before 4496 // since we're still single threaded and only the kernel address space 4497 // exists, it isn't that hard to find all of the ones we need to create 4498 4499 arch_vm_translation_map_init_post_sem(args); 4500 4501 slab_init_post_sem(); 4502 4503 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4504 heap_init_post_sem(); 4505 #endif 4506 4507 return B_OK; 4508 } 4509 4510 4511 status_t 4512 vm_init_post_thread(kernel_args* args) 4513 { 4514 vm_page_init_post_thread(args); 4515 slab_init_post_thread(); 4516 return heap_init_post_thread(); 4517 } 4518 4519 4520 status_t 4521 vm_init_post_modules(kernel_args* args) 4522 { 4523 return arch_vm_init_post_modules(args); 4524 } 4525 4526 4527 void 4528 permit_page_faults(void) 4529 { 4530 Thread* thread = thread_get_current_thread(); 4531 if (thread != NULL) 4532 atomic_add(&thread->page_faults_allowed, 1); 4533 } 4534 4535 4536 void 4537 forbid_page_faults(void) 4538 { 4539 Thread* thread = thread_get_current_thread(); 4540 if (thread != NULL) 4541 atomic_add(&thread->page_faults_allowed, -1); 4542 } 4543 4544 4545 status_t 4546 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4547 bool isUser, addr_t* newIP) 4548 { 4549 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4550 faultAddress)); 4551 4552 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4553 4554 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4555 VMAddressSpace* addressSpace = NULL; 4556 4557 status_t status = B_OK; 4558 *newIP = 0; 4559 atomic_add((int32*)&sPageFaults, 1); 4560 4561 if (IS_KERNEL_ADDRESS(pageAddress)) { 4562 addressSpace = VMAddressSpace::GetKernel(); 4563 } else if (IS_USER_ADDRESS(pageAddress)) { 4564 addressSpace = VMAddressSpace::GetCurrent(); 4565 if (addressSpace == NULL) { 4566 if (!isUser) { 4567 dprintf("vm_page_fault: kernel thread accessing invalid user " 4568 "memory!\n"); 4569 status = B_BAD_ADDRESS; 4570 TPF(PageFaultError(-1, 4571 VMPageFaultTracing 4572 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4573 } else { 4574 // XXX weird state. 4575 panic("vm_page_fault: non kernel thread accessing user memory " 4576 "that doesn't exist!\n"); 4577 status = B_BAD_ADDRESS; 4578 } 4579 } 4580 } else { 4581 // the hit was probably in the 64k DMZ between kernel and user space 4582 // this keeps a user space thread from passing a buffer that crosses 4583 // into kernel space 4584 status = B_BAD_ADDRESS; 4585 TPF(PageFaultError(-1, 4586 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4587 } 4588 4589 if (status == B_OK) { 4590 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4591 isUser, NULL); 4592 } 4593 4594 if (status < B_OK) { 4595 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4596 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4597 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4598 thread_get_current_thread_id()); 4599 if (!isUser) { 4600 Thread* thread = thread_get_current_thread(); 4601 if (thread != NULL && thread->fault_handler != 0) { 4602 // this will cause the arch dependant page fault handler to 4603 // modify the IP on the interrupt frame or whatever to return 4604 // to this address 4605 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4606 } else { 4607 // unhandled page fault in the kernel 4608 panic("vm_page_fault: unhandled page fault in kernel space at " 4609 "0x%lx, ip 0x%lx\n", address, faultAddress); 4610 } 4611 } else { 4612 Thread* thread = thread_get_current_thread(); 4613 4614 #ifdef TRACE_FAULTS 4615 VMArea* area = NULL; 4616 if (addressSpace != NULL) { 4617 addressSpace->ReadLock(); 4618 area = addressSpace->LookupArea(faultAddress); 4619 } 4620 4621 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4622 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4623 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4624 thread->team->Name(), thread->team->id, 4625 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4626 faultAddress, area ? area->name : "???", faultAddress - (area ? 4627 area->Base() : 0x0)); 4628 4629 if (addressSpace != NULL) 4630 addressSpace->ReadUnlock(); 4631 #endif 4632 4633 // If the thread has a signal handler for SIGSEGV, we simply 4634 // send it the signal. Otherwise we notify the user debugger 4635 // first. 4636 struct sigaction action; 4637 if ((sigaction(SIGSEGV, NULL, &action) == 0 4638 && action.sa_handler != SIG_DFL 4639 && action.sa_handler != SIG_IGN) 4640 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4641 SIGSEGV)) { 4642 Signal signal(SIGSEGV, 4643 status == B_PERMISSION_DENIED 4644 ? SEGV_ACCERR : SEGV_MAPERR, 4645 EFAULT, thread->team->id); 4646 signal.SetAddress((void*)address); 4647 send_signal_to_thread(thread, signal, 0); 4648 } 4649 } 4650 } 4651 4652 if (addressSpace != NULL) 4653 addressSpace->Put(); 4654 4655 return B_HANDLED_INTERRUPT; 4656 } 4657 4658 4659 struct PageFaultContext { 4660 AddressSpaceReadLocker addressSpaceLocker; 4661 VMCacheChainLocker cacheChainLocker; 4662 4663 VMTranslationMap* map; 4664 VMCache* topCache; 4665 off_t cacheOffset; 4666 vm_page_reservation reservation; 4667 bool isWrite; 4668 4669 // return values 4670 vm_page* page; 4671 bool restart; 4672 bool pageAllocated; 4673 4674 4675 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4676 : 4677 addressSpaceLocker(addressSpace, true), 4678 map(addressSpace->TranslationMap()), 4679 isWrite(isWrite) 4680 { 4681 } 4682 4683 ~PageFaultContext() 4684 { 4685 UnlockAll(); 4686 vm_page_unreserve_pages(&reservation); 4687 } 4688 4689 void Prepare(VMCache* topCache, off_t cacheOffset) 4690 { 4691 this->topCache = topCache; 4692 this->cacheOffset = cacheOffset; 4693 page = NULL; 4694 restart = false; 4695 pageAllocated = false; 4696 4697 cacheChainLocker.SetTo(topCache); 4698 } 4699 4700 void UnlockAll(VMCache* exceptCache = NULL) 4701 { 4702 topCache = NULL; 4703 addressSpaceLocker.Unlock(); 4704 cacheChainLocker.Unlock(exceptCache); 4705 } 4706 }; 4707 4708 4709 /*! Gets the page that should be mapped into the area. 4710 Returns an error code other than \c B_OK, if the page couldn't be found or 4711 paged in. The locking state of the address space and the caches is undefined 4712 in that case. 4713 Returns \c B_OK with \c context.restart set to \c true, if the functions 4714 had to unlock the address space and all caches and is supposed to be called 4715 again. 4716 Returns \c B_OK with \c context.restart set to \c false, if the page was 4717 found. It is returned in \c context.page. The address space will still be 4718 locked as well as all caches starting from the top cache to at least the 4719 cache the page lives in. 4720 */ 4721 static status_t 4722 fault_get_page(PageFaultContext& context) 4723 { 4724 VMCache* cache = context.topCache; 4725 VMCache* lastCache = NULL; 4726 vm_page* page = NULL; 4727 4728 while (cache != NULL) { 4729 // We already hold the lock of the cache at this point. 4730 4731 lastCache = cache; 4732 4733 page = cache->LookupPage(context.cacheOffset); 4734 if (page != NULL && page->busy) { 4735 // page must be busy -- wait for it to become unbusy 4736 context.UnlockAll(cache); 4737 cache->ReleaseRefLocked(); 4738 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4739 4740 // restart the whole process 4741 context.restart = true; 4742 return B_OK; 4743 } 4744 4745 if (page != NULL) 4746 break; 4747 4748 // The current cache does not contain the page we're looking for. 4749 4750 // see if the backing store has it 4751 if (cache->HasPage(context.cacheOffset)) { 4752 // insert a fresh page and mark it busy -- we're going to read it in 4753 page = vm_page_allocate_page(&context.reservation, 4754 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4755 cache->InsertPage(page, context.cacheOffset); 4756 4757 // We need to unlock all caches and the address space while reading 4758 // the page in. Keep a reference to the cache around. 4759 cache->AcquireRefLocked(); 4760 context.UnlockAll(); 4761 4762 // read the page in 4763 generic_io_vec vec; 4764 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4765 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4766 4767 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4768 B_PHYSICAL_IO_REQUEST, &bytesRead); 4769 4770 cache->Lock(); 4771 4772 if (status < B_OK) { 4773 // on error remove and free the page 4774 dprintf("reading page from cache %p returned: %s!\n", 4775 cache, strerror(status)); 4776 4777 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4778 cache->RemovePage(page); 4779 vm_page_set_state(page, PAGE_STATE_FREE); 4780 4781 cache->ReleaseRefAndUnlock(); 4782 return status; 4783 } 4784 4785 // mark the page unbusy again 4786 cache->MarkPageUnbusy(page); 4787 4788 DEBUG_PAGE_ACCESS_END(page); 4789 4790 // Since we needed to unlock everything temporarily, the area 4791 // situation might have changed. So we need to restart the whole 4792 // process. 4793 cache->ReleaseRefAndUnlock(); 4794 context.restart = true; 4795 return B_OK; 4796 } 4797 4798 cache = context.cacheChainLocker.LockSourceCache(); 4799 } 4800 4801 if (page == NULL) { 4802 // There was no adequate page, determine the cache for a clean one. 4803 // Read-only pages come in the deepest cache, only the top most cache 4804 // may have direct write access. 4805 cache = context.isWrite ? context.topCache : lastCache; 4806 4807 // allocate a clean page 4808 page = vm_page_allocate_page(&context.reservation, 4809 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4810 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4811 page->physical_page_number)); 4812 4813 // insert the new page into our cache 4814 cache->InsertPage(page, context.cacheOffset); 4815 context.pageAllocated = true; 4816 } else if (page->Cache() != context.topCache && context.isWrite) { 4817 // We have a page that has the data we want, but in the wrong cache 4818 // object so we need to copy it and stick it into the top cache. 4819 vm_page* sourcePage = page; 4820 4821 // TODO: If memory is low, it might be a good idea to steal the page 4822 // from our source cache -- if possible, that is. 4823 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4824 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4825 4826 // To not needlessly kill concurrency we unlock all caches but the top 4827 // one while copying the page. Lacking another mechanism to ensure that 4828 // the source page doesn't disappear, we mark it busy. 4829 sourcePage->busy = true; 4830 context.cacheChainLocker.UnlockKeepRefs(true); 4831 4832 // copy the page 4833 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4834 sourcePage->physical_page_number * B_PAGE_SIZE); 4835 4836 context.cacheChainLocker.RelockCaches(true); 4837 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4838 4839 // insert the new page into our cache 4840 context.topCache->InsertPage(page, context.cacheOffset); 4841 context.pageAllocated = true; 4842 } else 4843 DEBUG_PAGE_ACCESS_START(page); 4844 4845 context.page = page; 4846 return B_OK; 4847 } 4848 4849 4850 /*! Makes sure the address in the given address space is mapped. 4851 4852 \param addressSpace The address space. 4853 \param originalAddress The address. Doesn't need to be page aligned. 4854 \param isWrite If \c true the address shall be write-accessible. 4855 \param isUser If \c true the access is requested by a userland team. 4856 \param wirePage On success, if non \c NULL, the wired count of the page 4857 mapped at the given address is incremented and the page is returned 4858 via this parameter. 4859 \return \c B_OK on success, another error code otherwise. 4860 */ 4861 static status_t 4862 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4863 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4864 { 4865 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4866 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4867 originalAddress, isWrite, isUser)); 4868 4869 PageFaultContext context(addressSpace, isWrite); 4870 4871 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4872 status_t status = B_OK; 4873 4874 addressSpace->IncrementFaultCount(); 4875 4876 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4877 // the pages upfront makes sure we don't have any cache locked, so that the 4878 // page daemon/thief can do their job without problems. 4879 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4880 originalAddress); 4881 context.addressSpaceLocker.Unlock(); 4882 vm_page_reserve_pages(&context.reservation, reservePages, 4883 addressSpace == VMAddressSpace::Kernel() 4884 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4885 4886 while (true) { 4887 context.addressSpaceLocker.Lock(); 4888 4889 // get the area the fault was in 4890 VMArea* area = addressSpace->LookupArea(address); 4891 if (area == NULL) { 4892 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4893 "space\n", originalAddress); 4894 TPF(PageFaultError(-1, 4895 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4896 status = B_BAD_ADDRESS; 4897 break; 4898 } 4899 4900 // check permissions 4901 uint32 protection = get_area_page_protection(area, address); 4902 if (isUser && (protection & B_USER_PROTECTION) == 0 4903 && (area->protection & B_KERNEL_AREA) != 0) { 4904 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4905 area->id, (void*)originalAddress); 4906 TPF(PageFaultError(area->id, 4907 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4908 status = B_PERMISSION_DENIED; 4909 break; 4910 } 4911 if (isWrite && (protection 4912 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4913 dprintf("write access attempted on write-protected area 0x%" 4914 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4915 TPF(PageFaultError(area->id, 4916 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4917 status = B_PERMISSION_DENIED; 4918 break; 4919 } else if (isExecute && (protection 4920 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4921 dprintf("instruction fetch attempted on execute-protected area 0x%" 4922 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4923 TPF(PageFaultError(area->id, 4924 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4925 status = B_PERMISSION_DENIED; 4926 break; 4927 } else if (!isWrite && !isExecute && (protection 4928 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4929 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4930 " at %p\n", area->id, (void*)originalAddress); 4931 TPF(PageFaultError(area->id, 4932 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4933 status = B_PERMISSION_DENIED; 4934 break; 4935 } 4936 4937 // We have the area, it was a valid access, so let's try to resolve the 4938 // page fault now. 4939 // At first, the top most cache from the area is investigated. 4940 4941 context.Prepare(vm_area_get_locked_cache(area), 4942 address - area->Base() + area->cache_offset); 4943 4944 // See if this cache has a fault handler -- this will do all the work 4945 // for us. 4946 { 4947 // Note, since the page fault is resolved with interrupts enabled, 4948 // the fault handler could be called more than once for the same 4949 // reason -- the store must take this into account. 4950 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4951 if (status != B_BAD_HANDLER) 4952 break; 4953 } 4954 4955 // The top most cache has no fault handler, so let's see if the cache or 4956 // its sources already have the page we're searching for (we're going 4957 // from top to bottom). 4958 status = fault_get_page(context); 4959 if (status != B_OK) { 4960 TPF(PageFaultError(area->id, status)); 4961 break; 4962 } 4963 4964 if (context.restart) 4965 continue; 4966 4967 // All went fine, all there is left to do is to map the page into the 4968 // address space. 4969 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4970 context.page)); 4971 4972 // If the page doesn't reside in the area's cache, we need to make sure 4973 // it's mapped in read-only, so that we cannot overwrite someone else's 4974 // data (copy-on-write) 4975 uint32 newProtection = protection; 4976 if (context.page->Cache() != context.topCache && !isWrite) 4977 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4978 4979 bool unmapPage = false; 4980 bool mapPage = true; 4981 4982 // check whether there's already a page mapped at the address 4983 context.map->Lock(); 4984 4985 phys_addr_t physicalAddress; 4986 uint32 flags; 4987 vm_page* mappedPage = NULL; 4988 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4989 && (flags & PAGE_PRESENT) != 0 4990 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4991 != NULL) { 4992 // Yep there's already a page. If it's ours, we can simply adjust 4993 // its protection. Otherwise we have to unmap it. 4994 if (mappedPage == context.page) { 4995 context.map->ProtectPage(area, address, newProtection); 4996 // Note: We assume that ProtectPage() is atomic (i.e. 4997 // the page isn't temporarily unmapped), otherwise we'd have 4998 // to make sure it isn't wired. 4999 mapPage = false; 5000 } else 5001 unmapPage = true; 5002 } 5003 5004 context.map->Unlock(); 5005 5006 if (unmapPage) { 5007 // If the page is wired, we can't unmap it. Wait until it is unwired 5008 // again and restart. Note that the page cannot be wired for 5009 // writing, since it it isn't in the topmost cache. So we can safely 5010 // ignore ranges wired for writing (our own and other concurrent 5011 // wiring attempts in progress) and in fact have to do that to avoid 5012 // a deadlock. 5013 VMAreaUnwiredWaiter waiter; 5014 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 5015 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 5016 // unlock everything and wait 5017 if (context.pageAllocated) { 5018 // ... but since we allocated a page and inserted it into 5019 // the top cache, remove and free it first. Otherwise we'd 5020 // have a page from a lower cache mapped while an upper 5021 // cache has a page that would shadow it. 5022 context.topCache->RemovePage(context.page); 5023 vm_page_free_etc(context.topCache, context.page, 5024 &context.reservation); 5025 } else 5026 DEBUG_PAGE_ACCESS_END(context.page); 5027 5028 context.UnlockAll(); 5029 waiter.waitEntry.Wait(); 5030 continue; 5031 } 5032 5033 // Note: The mapped page is a page of a lower cache. We are 5034 // guaranteed to have that cached locked, our new page is a copy of 5035 // that page, and the page is not busy. The logic for that guarantee 5036 // is as follows: Since the page is mapped, it must live in the top 5037 // cache (ruled out above) or any of its lower caches, and there is 5038 // (was before the new page was inserted) no other page in any 5039 // cache between the top cache and the page's cache (otherwise that 5040 // would be mapped instead). That in turn means that our algorithm 5041 // must have found it and therefore it cannot be busy either. 5042 DEBUG_PAGE_ACCESS_START(mappedPage); 5043 unmap_page(area, address); 5044 DEBUG_PAGE_ACCESS_END(mappedPage); 5045 } 5046 5047 if (mapPage) { 5048 if (map_page(area, context.page, address, newProtection, 5049 &context.reservation) != B_OK) { 5050 // Mapping can only fail, when the page mapping object couldn't 5051 // be allocated. Save for the missing mapping everything is 5052 // fine, though. If this was a regular page fault, we'll simply 5053 // leave and probably fault again. To make sure we'll have more 5054 // luck then, we ensure that the minimum object reserve is 5055 // available. 5056 DEBUG_PAGE_ACCESS_END(context.page); 5057 5058 context.UnlockAll(); 5059 5060 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 5061 != B_OK) { 5062 // Apparently the situation is serious. Let's get ourselves 5063 // killed. 5064 status = B_NO_MEMORY; 5065 } else if (wirePage != NULL) { 5066 // The caller expects us to wire the page. Since 5067 // object_cache_reserve() succeeded, we should now be able 5068 // to allocate a mapping structure. Restart. 5069 continue; 5070 } 5071 5072 break; 5073 } 5074 } else if (context.page->State() == PAGE_STATE_INACTIVE) 5075 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 5076 5077 // also wire the page, if requested 5078 if (wirePage != NULL && status == B_OK) { 5079 increment_page_wired_count(context.page); 5080 *wirePage = context.page; 5081 } 5082 5083 DEBUG_PAGE_ACCESS_END(context.page); 5084 5085 break; 5086 } 5087 5088 return status; 5089 } 5090 5091 5092 status_t 5093 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 5094 { 5095 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 5096 } 5097 5098 status_t 5099 vm_put_physical_page(addr_t vaddr, void* handle) 5100 { 5101 return sPhysicalPageMapper->PutPage(vaddr, handle); 5102 } 5103 5104 5105 status_t 5106 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 5107 void** _handle) 5108 { 5109 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 5110 } 5111 5112 status_t 5113 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5114 { 5115 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 5116 } 5117 5118 5119 status_t 5120 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 5121 { 5122 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 5123 } 5124 5125 status_t 5126 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5127 { 5128 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 5129 } 5130 5131 5132 void 5133 vm_get_info(system_info* info) 5134 { 5135 swap_get_info(info); 5136 5137 MutexLocker locker(sAvailableMemoryLock); 5138 info->needed_memory = sNeededMemory; 5139 info->free_memory = sAvailableMemory; 5140 } 5141 5142 5143 uint32 5144 vm_num_page_faults(void) 5145 { 5146 return sPageFaults; 5147 } 5148 5149 5150 off_t 5151 vm_available_memory(void) 5152 { 5153 MutexLocker locker(sAvailableMemoryLock); 5154 return sAvailableMemory; 5155 } 5156 5157 5158 off_t 5159 vm_available_not_needed_memory(void) 5160 { 5161 MutexLocker locker(sAvailableMemoryLock); 5162 return sAvailableMemory - sNeededMemory; 5163 } 5164 5165 5166 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 5167 debugger. 5168 */ 5169 off_t 5170 vm_available_not_needed_memory_debug(void) 5171 { 5172 return sAvailableMemory - sNeededMemory; 5173 } 5174 5175 5176 size_t 5177 vm_kernel_address_space_left(void) 5178 { 5179 return VMAddressSpace::Kernel()->FreeSpace(); 5180 } 5181 5182 5183 void 5184 vm_unreserve_memory(size_t amount) 5185 { 5186 mutex_lock(&sAvailableMemoryLock); 5187 5188 sAvailableMemory += amount; 5189 5190 mutex_unlock(&sAvailableMemoryLock); 5191 } 5192 5193 5194 status_t 5195 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5196 { 5197 size_t reserve = kMemoryReserveForPriority[priority]; 5198 5199 MutexLocker locker(sAvailableMemoryLock); 5200 5201 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5202 5203 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5204 sAvailableMemory -= amount; 5205 return B_OK; 5206 } 5207 5208 if (timeout <= 0) 5209 return B_NO_MEMORY; 5210 5211 // turn timeout into an absolute timeout 5212 timeout += system_time(); 5213 5214 // loop until we've got the memory or the timeout occurs 5215 do { 5216 sNeededMemory += amount; 5217 5218 // call the low resource manager 5219 locker.Unlock(); 5220 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5221 B_ABSOLUTE_TIMEOUT, timeout); 5222 locker.Lock(); 5223 5224 sNeededMemory -= amount; 5225 5226 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5227 sAvailableMemory -= amount; 5228 return B_OK; 5229 } 5230 } while (timeout > system_time()); 5231 5232 return B_NO_MEMORY; 5233 } 5234 5235 5236 status_t 5237 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5238 { 5239 // NOTE: The caller is responsible for synchronizing calls to this function! 5240 5241 AddressSpaceReadLocker locker; 5242 VMArea* area; 5243 status_t status = locker.SetFromArea(id, area); 5244 if (status != B_OK) 5245 return status; 5246 5247 // nothing to do, if the type doesn't change 5248 uint32 oldType = area->MemoryType(); 5249 if (type == oldType) 5250 return B_OK; 5251 5252 // set the memory type of the area and the mapped pages 5253 VMTranslationMap* map = area->address_space->TranslationMap(); 5254 map->Lock(); 5255 area->SetMemoryType(type); 5256 map->ProtectArea(area, area->protection); 5257 map->Unlock(); 5258 5259 // set the physical memory type 5260 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5261 if (error != B_OK) { 5262 // reset the memory type of the area and the mapped pages 5263 map->Lock(); 5264 area->SetMemoryType(oldType); 5265 map->ProtectArea(area, area->protection); 5266 map->Unlock(); 5267 return error; 5268 } 5269 5270 return B_OK; 5271 5272 } 5273 5274 5275 /*! This function enforces some protection properties: 5276 - kernel areas must be W^X (after kernel startup) 5277 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5278 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5279 */ 5280 static void 5281 fix_protection(uint32* protection) 5282 { 5283 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5284 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5285 || (*protection & B_WRITE_AREA) != 0) 5286 && !gKernelStartup) 5287 panic("kernel areas cannot be both writable and executable!"); 5288 5289 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5290 if ((*protection & B_WRITE_AREA) != 0) 5291 *protection |= B_KERNEL_WRITE_AREA; 5292 if ((*protection & B_READ_AREA) != 0) 5293 *protection |= B_KERNEL_READ_AREA; 5294 } 5295 } 5296 5297 5298 static void 5299 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5300 { 5301 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5302 info->area = area->id; 5303 info->address = (void*)area->Base(); 5304 info->size = area->Size(); 5305 info->protection = area->protection; 5306 info->lock = area->wiring; 5307 info->team = area->address_space->ID(); 5308 info->copy_count = 0; 5309 info->in_count = 0; 5310 info->out_count = 0; 5311 // TODO: retrieve real values here! 5312 5313 VMCache* cache = vm_area_get_locked_cache(area); 5314 5315 // Note, this is a simplification; the cache could be larger than this area 5316 info->ram_size = cache->page_count * B_PAGE_SIZE; 5317 5318 vm_area_put_locked_cache(cache); 5319 } 5320 5321 5322 static status_t 5323 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5324 { 5325 // is newSize a multiple of B_PAGE_SIZE? 5326 if (newSize & (B_PAGE_SIZE - 1)) 5327 return B_BAD_VALUE; 5328 5329 // lock all affected address spaces and the cache 5330 VMArea* area; 5331 VMCache* cache; 5332 5333 MultiAddressSpaceLocker locker; 5334 AreaCacheLocker cacheLocker; 5335 5336 status_t status; 5337 size_t oldSize; 5338 bool anyKernelArea; 5339 bool restart; 5340 5341 do { 5342 anyKernelArea = false; 5343 restart = false; 5344 5345 locker.Unset(); 5346 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5347 if (status != B_OK) 5348 return status; 5349 cacheLocker.SetTo(cache, true); // already locked 5350 5351 // enforce restrictions 5352 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5353 || (area->protection & B_KERNEL_AREA) != 0)) { 5354 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5355 "resize kernel area %" B_PRId32 " (%s)\n", 5356 team_get_current_team_id(), areaID, area->name); 5357 return B_NOT_ALLOWED; 5358 } 5359 // TODO: Enforce all restrictions (team, etc.)! 5360 5361 oldSize = area->Size(); 5362 if (newSize == oldSize) 5363 return B_OK; 5364 5365 if (cache->type != CACHE_TYPE_RAM) 5366 return B_NOT_ALLOWED; 5367 5368 if (oldSize < newSize) { 5369 // We need to check if all areas of this cache can be resized. 5370 for (VMArea* current = cache->areas; current != NULL; 5371 current = current->cache_next) { 5372 if (!current->address_space->CanResizeArea(current, newSize)) 5373 return B_ERROR; 5374 anyKernelArea 5375 |= current->address_space == VMAddressSpace::Kernel(); 5376 } 5377 } else { 5378 // We're shrinking the areas, so we must make sure the affected 5379 // ranges are not wired. 5380 for (VMArea* current = cache->areas; current != NULL; 5381 current = current->cache_next) { 5382 anyKernelArea 5383 |= current->address_space == VMAddressSpace::Kernel(); 5384 5385 if (wait_if_area_range_is_wired(current, 5386 current->Base() + newSize, oldSize - newSize, &locker, 5387 &cacheLocker)) { 5388 restart = true; 5389 break; 5390 } 5391 } 5392 } 5393 } while (restart); 5394 5395 // Okay, looks good so far, so let's do it 5396 5397 int priority = kernel && anyKernelArea 5398 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5399 uint32 allocationFlags = kernel && anyKernelArea 5400 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5401 5402 if (oldSize < newSize) { 5403 // Growing the cache can fail, so we do it first. 5404 status = cache->Resize(cache->virtual_base + newSize, priority); 5405 if (status != B_OK) 5406 return status; 5407 } 5408 5409 for (VMArea* current = cache->areas; current != NULL; 5410 current = current->cache_next) { 5411 status = current->address_space->ResizeArea(current, newSize, 5412 allocationFlags); 5413 if (status != B_OK) 5414 break; 5415 5416 // We also need to unmap all pages beyond the new size, if the area has 5417 // shrunk 5418 if (newSize < oldSize) { 5419 VMCacheChainLocker cacheChainLocker(cache); 5420 cacheChainLocker.LockAllSourceCaches(); 5421 5422 unmap_pages(current, current->Base() + newSize, 5423 oldSize - newSize); 5424 5425 cacheChainLocker.Unlock(cache); 5426 } 5427 } 5428 5429 if (status == B_OK) { 5430 // Shrink or grow individual page protections if in use. 5431 if (area->page_protections != NULL) { 5432 size_t bytes = area_page_protections_size(newSize); 5433 uint8* newProtections 5434 = (uint8*)realloc(area->page_protections, bytes); 5435 if (newProtections == NULL) 5436 status = B_NO_MEMORY; 5437 else { 5438 area->page_protections = newProtections; 5439 5440 if (oldSize < newSize) { 5441 // init the additional page protections to that of the area 5442 uint32 offset = area_page_protections_size(oldSize); 5443 uint32 areaProtection = area->protection 5444 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5445 memset(area->page_protections + offset, 5446 areaProtection | (areaProtection << 4), bytes - offset); 5447 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5448 uint8& entry = area->page_protections[offset - 1]; 5449 entry = (entry & 0x0f) | (areaProtection << 4); 5450 } 5451 } 5452 } 5453 } 5454 } 5455 5456 // shrinking the cache can't fail, so we do it now 5457 if (status == B_OK && newSize < oldSize) 5458 status = cache->Resize(cache->virtual_base + newSize, priority); 5459 5460 if (status != B_OK) { 5461 // Something failed -- resize the areas back to their original size. 5462 // This can fail, too, in which case we're seriously screwed. 5463 for (VMArea* current = cache->areas; current != NULL; 5464 current = current->cache_next) { 5465 if (current->address_space->ResizeArea(current, oldSize, 5466 allocationFlags) != B_OK) { 5467 panic("vm_resize_area(): Failed and not being able to restore " 5468 "original state."); 5469 } 5470 } 5471 5472 cache->Resize(cache->virtual_base + oldSize, priority); 5473 } 5474 5475 // TODO: we must honour the lock restrictions of this area 5476 return status; 5477 } 5478 5479 5480 status_t 5481 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5482 { 5483 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5484 } 5485 5486 5487 status_t 5488 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5489 { 5490 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5491 } 5492 5493 5494 status_t 5495 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5496 bool user) 5497 { 5498 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5499 } 5500 5501 5502 void 5503 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5504 { 5505 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5506 } 5507 5508 5509 /*! Copies a range of memory directly from/to a page that might not be mapped 5510 at the moment. 5511 5512 For \a unsafeMemory the current mapping (if any is ignored). The function 5513 walks through the respective area's cache chain to find the physical page 5514 and copies from/to it directly. 5515 The memory range starting at \a unsafeMemory with a length of \a size bytes 5516 must not cross a page boundary. 5517 5518 \param teamID The team ID identifying the address space \a unsafeMemory is 5519 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5520 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5521 is passed, the address space of the thread returned by 5522 debug_get_debugged_thread() is used. 5523 \param unsafeMemory The start of the unsafe memory range to be copied 5524 from/to. 5525 \param buffer A safely accessible kernel buffer to be copied from/to. 5526 \param size The number of bytes to be copied. 5527 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5528 \a unsafeMemory, the other way around otherwise. 5529 */ 5530 status_t 5531 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5532 size_t size, bool copyToUnsafe) 5533 { 5534 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5535 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5536 return B_BAD_VALUE; 5537 } 5538 5539 // get the address space for the debugged thread 5540 VMAddressSpace* addressSpace; 5541 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5542 addressSpace = VMAddressSpace::Kernel(); 5543 } else if (teamID == B_CURRENT_TEAM) { 5544 Thread* thread = debug_get_debugged_thread(); 5545 if (thread == NULL || thread->team == NULL) 5546 return B_BAD_ADDRESS; 5547 5548 addressSpace = thread->team->address_space; 5549 } else 5550 addressSpace = VMAddressSpace::DebugGet(teamID); 5551 5552 if (addressSpace == NULL) 5553 return B_BAD_ADDRESS; 5554 5555 // get the area 5556 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5557 if (area == NULL) 5558 return B_BAD_ADDRESS; 5559 5560 // search the page 5561 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5562 + area->cache_offset; 5563 VMCache* cache = area->cache; 5564 vm_page* page = NULL; 5565 while (cache != NULL) { 5566 page = cache->DebugLookupPage(cacheOffset); 5567 if (page != NULL) 5568 break; 5569 5570 // Page not found in this cache -- if it is paged out, we must not try 5571 // to get it from lower caches. 5572 if (cache->DebugHasPage(cacheOffset)) 5573 break; 5574 5575 cache = cache->source; 5576 } 5577 5578 if (page == NULL) 5579 return B_UNSUPPORTED; 5580 5581 // copy from/to physical memory 5582 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5583 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5584 5585 if (copyToUnsafe) { 5586 if (page->Cache() != area->cache) 5587 return B_UNSUPPORTED; 5588 5589 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5590 } 5591 5592 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5593 } 5594 5595 5596 /** Validate that a memory range is either fully in kernel space, or fully in 5597 * userspace */ 5598 static inline bool 5599 validate_memory_range(const void* addr, size_t size) 5600 { 5601 addr_t address = (addr_t)addr; 5602 5603 // Check for overflows on all addresses. 5604 if ((address + size) < address) 5605 return false; 5606 5607 // Validate that the address range does not cross the kernel/user boundary. 5608 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5609 } 5610 5611 5612 // #pragma mark - kernel public API 5613 5614 5615 status_t 5616 user_memcpy(void* to, const void* from, size_t size) 5617 { 5618 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5619 return B_BAD_ADDRESS; 5620 5621 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5622 return B_BAD_ADDRESS; 5623 5624 return B_OK; 5625 } 5626 5627 5628 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5629 the string in \a to, NULL-terminating the result. 5630 5631 \param to Pointer to the destination C-string. 5632 \param from Pointer to the source C-string. 5633 \param size Size in bytes of the string buffer pointed to by \a to. 5634 5635 \return strlen(\a from). 5636 */ 5637 ssize_t 5638 user_strlcpy(char* to, const char* from, size_t size) 5639 { 5640 if (to == NULL && size != 0) 5641 return B_BAD_VALUE; 5642 if (from == NULL) 5643 return B_BAD_ADDRESS; 5644 5645 // Protect the source address from overflows. 5646 size_t maxSize = size; 5647 if ((addr_t)from + maxSize < (addr_t)from) 5648 maxSize -= (addr_t)from + maxSize; 5649 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5650 maxSize = USER_TOP - (addr_t)from; 5651 5652 if (!validate_memory_range(to, maxSize)) 5653 return B_BAD_ADDRESS; 5654 5655 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5656 if (result < 0) 5657 return result; 5658 5659 // If we hit the address overflow boundary, fail. 5660 if ((size_t)result >= maxSize && maxSize < size) 5661 return B_BAD_ADDRESS; 5662 5663 return result; 5664 } 5665 5666 5667 status_t 5668 user_memset(void* s, char c, size_t count) 5669 { 5670 if (!validate_memory_range(s, count)) 5671 return B_BAD_ADDRESS; 5672 5673 if (arch_cpu_user_memset(s, c, count) < B_OK) 5674 return B_BAD_ADDRESS; 5675 5676 return B_OK; 5677 } 5678 5679 5680 /*! Wires a single page at the given address. 5681 5682 \param team The team whose address space the address belongs to. Supports 5683 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5684 parameter is ignored. 5685 \param address address The virtual address to wire down. Does not need to 5686 be page aligned. 5687 \param writable If \c true the page shall be writable. 5688 \param info On success the info is filled in, among other things 5689 containing the physical address the given virtual one translates to. 5690 \return \c B_OK, when the page could be wired, another error code otherwise. 5691 */ 5692 status_t 5693 vm_wire_page(team_id team, addr_t address, bool writable, 5694 VMPageWiringInfo* info) 5695 { 5696 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5697 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5698 5699 // compute the page protection that is required 5700 bool isUser = IS_USER_ADDRESS(address); 5701 uint32 requiredProtection = PAGE_PRESENT 5702 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5703 if (writable) 5704 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5705 5706 // get and read lock the address space 5707 VMAddressSpace* addressSpace = NULL; 5708 if (isUser) { 5709 if (team == B_CURRENT_TEAM) 5710 addressSpace = VMAddressSpace::GetCurrent(); 5711 else 5712 addressSpace = VMAddressSpace::Get(team); 5713 } else 5714 addressSpace = VMAddressSpace::GetKernel(); 5715 if (addressSpace == NULL) 5716 return B_ERROR; 5717 5718 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5719 5720 VMTranslationMap* map = addressSpace->TranslationMap(); 5721 status_t error = B_OK; 5722 5723 // get the area 5724 VMArea* area = addressSpace->LookupArea(pageAddress); 5725 if (area == NULL) { 5726 addressSpace->Put(); 5727 return B_BAD_ADDRESS; 5728 } 5729 5730 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5731 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5732 5733 // mark the area range wired 5734 area->Wire(&info->range); 5735 5736 // Lock the area's cache chain and the translation map. Needed to look 5737 // up the page and play with its wired count. 5738 cacheChainLocker.LockAllSourceCaches(); 5739 map->Lock(); 5740 5741 phys_addr_t physicalAddress; 5742 uint32 flags; 5743 vm_page* page; 5744 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5745 && (flags & requiredProtection) == requiredProtection 5746 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5747 != NULL) { 5748 // Already mapped with the correct permissions -- just increment 5749 // the page's wired count. 5750 increment_page_wired_count(page); 5751 5752 map->Unlock(); 5753 cacheChainLocker.Unlock(); 5754 addressSpaceLocker.Unlock(); 5755 } else { 5756 // Let vm_soft_fault() map the page for us, if possible. We need 5757 // to fully unlock to avoid deadlocks. Since we have already 5758 // wired the area itself, nothing disturbing will happen with it 5759 // in the meantime. 5760 map->Unlock(); 5761 cacheChainLocker.Unlock(); 5762 addressSpaceLocker.Unlock(); 5763 5764 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5765 isUser, &page); 5766 5767 if (error != B_OK) { 5768 // The page could not be mapped -- clean up. 5769 VMCache* cache = vm_area_get_locked_cache(area); 5770 area->Unwire(&info->range); 5771 cache->ReleaseRefAndUnlock(); 5772 addressSpace->Put(); 5773 return error; 5774 } 5775 } 5776 5777 info->physicalAddress 5778 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5779 + address % B_PAGE_SIZE; 5780 info->page = page; 5781 5782 return B_OK; 5783 } 5784 5785 5786 /*! Unwires a single page previously wired via vm_wire_page(). 5787 5788 \param info The same object passed to vm_wire_page() before. 5789 */ 5790 void 5791 vm_unwire_page(VMPageWiringInfo* info) 5792 { 5793 // lock the address space 5794 VMArea* area = info->range.area; 5795 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5796 // takes over our reference 5797 5798 // lock the top cache 5799 VMCache* cache = vm_area_get_locked_cache(area); 5800 VMCacheChainLocker cacheChainLocker(cache); 5801 5802 if (info->page->Cache() != cache) { 5803 // The page is not in the top cache, so we lock the whole cache chain 5804 // before touching the page's wired count. 5805 cacheChainLocker.LockAllSourceCaches(); 5806 } 5807 5808 decrement_page_wired_count(info->page); 5809 5810 // remove the wired range from the range 5811 area->Unwire(&info->range); 5812 5813 cacheChainLocker.Unlock(); 5814 } 5815 5816 5817 /*! Wires down the given address range in the specified team's address space. 5818 5819 If successful the function 5820 - acquires a reference to the specified team's address space, 5821 - adds respective wired ranges to all areas that intersect with the given 5822 address range, 5823 - makes sure all pages in the given address range are mapped with the 5824 requested access permissions and increments their wired count. 5825 5826 It fails, when \a team doesn't specify a valid address space, when any part 5827 of the specified address range is not covered by areas, when the concerned 5828 areas don't allow mapping with the requested permissions, or when mapping 5829 failed for another reason. 5830 5831 When successful the call must be balanced by a unlock_memory_etc() call with 5832 the exact same parameters. 5833 5834 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5835 supported. 5836 \param address The start of the address range to be wired. 5837 \param numBytes The size of the address range to be wired. 5838 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5839 requests that the range must be wired writable ("read from device 5840 into memory"). 5841 \return \c B_OK on success, another error code otherwise. 5842 */ 5843 status_t 5844 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5845 { 5846 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5847 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5848 5849 // compute the page protection that is required 5850 bool isUser = IS_USER_ADDRESS(address); 5851 bool writable = (flags & B_READ_DEVICE) == 0; 5852 uint32 requiredProtection = PAGE_PRESENT 5853 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5854 if (writable) 5855 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5856 5857 uint32 mallocFlags = isUser 5858 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5859 5860 // get and read lock the address space 5861 VMAddressSpace* addressSpace = NULL; 5862 if (isUser) { 5863 if (team == B_CURRENT_TEAM) 5864 addressSpace = VMAddressSpace::GetCurrent(); 5865 else 5866 addressSpace = VMAddressSpace::Get(team); 5867 } else 5868 addressSpace = VMAddressSpace::GetKernel(); 5869 if (addressSpace == NULL) 5870 return B_ERROR; 5871 5872 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5873 // We get a new address space reference here. The one we got above will 5874 // be freed by unlock_memory_etc(). 5875 5876 VMTranslationMap* map = addressSpace->TranslationMap(); 5877 status_t error = B_OK; 5878 5879 // iterate through all concerned areas 5880 addr_t nextAddress = lockBaseAddress; 5881 while (nextAddress != lockEndAddress) { 5882 // get the next area 5883 VMArea* area = addressSpace->LookupArea(nextAddress); 5884 if (area == NULL) { 5885 error = B_BAD_ADDRESS; 5886 break; 5887 } 5888 5889 addr_t areaStart = nextAddress; 5890 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5891 5892 // allocate the wired range (do that before locking the cache to avoid 5893 // deadlocks) 5894 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5895 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5896 if (range == NULL) { 5897 error = B_NO_MEMORY; 5898 break; 5899 } 5900 5901 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5902 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5903 5904 // mark the area range wired 5905 area->Wire(range); 5906 5907 // Depending on the area cache type and the wiring, we may not need to 5908 // look at the individual pages. 5909 if (area->cache_type == CACHE_TYPE_NULL 5910 || area->cache_type == CACHE_TYPE_DEVICE 5911 || area->wiring == B_FULL_LOCK 5912 || area->wiring == B_CONTIGUOUS) { 5913 nextAddress = areaEnd; 5914 continue; 5915 } 5916 5917 // Lock the area's cache chain and the translation map. Needed to look 5918 // up pages and play with their wired count. 5919 cacheChainLocker.LockAllSourceCaches(); 5920 map->Lock(); 5921 5922 // iterate through the pages and wire them 5923 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5924 phys_addr_t physicalAddress; 5925 uint32 flags; 5926 5927 vm_page* page; 5928 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5929 && (flags & requiredProtection) == requiredProtection 5930 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5931 != NULL) { 5932 // Already mapped with the correct permissions -- just increment 5933 // the page's wired count. 5934 increment_page_wired_count(page); 5935 } else { 5936 // Let vm_soft_fault() map the page for us, if possible. We need 5937 // to fully unlock to avoid deadlocks. Since we have already 5938 // wired the area itself, nothing disturbing will happen with it 5939 // in the meantime. 5940 map->Unlock(); 5941 cacheChainLocker.Unlock(); 5942 addressSpaceLocker.Unlock(); 5943 5944 error = vm_soft_fault(addressSpace, nextAddress, writable, 5945 false, isUser, &page); 5946 5947 addressSpaceLocker.Lock(); 5948 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5949 cacheChainLocker.LockAllSourceCaches(); 5950 map->Lock(); 5951 } 5952 5953 if (error != B_OK) 5954 break; 5955 } 5956 5957 map->Unlock(); 5958 5959 if (error == B_OK) { 5960 cacheChainLocker.Unlock(); 5961 } else { 5962 // An error occurred, so abort right here. If the current address 5963 // is the first in this area, unwire the area, since we won't get 5964 // to it when reverting what we've done so far. 5965 if (nextAddress == areaStart) { 5966 area->Unwire(range); 5967 cacheChainLocker.Unlock(); 5968 range->~VMAreaWiredRange(); 5969 free_etc(range, mallocFlags); 5970 } else 5971 cacheChainLocker.Unlock(); 5972 5973 break; 5974 } 5975 } 5976 5977 if (error != B_OK) { 5978 // An error occurred, so unwire all that we've already wired. Note that 5979 // even if not a single page was wired, unlock_memory_etc() is called 5980 // to put the address space reference. 5981 addressSpaceLocker.Unlock(); 5982 unlock_memory_etc(team, (void*)lockBaseAddress, 5983 nextAddress - lockBaseAddress, flags); 5984 } 5985 5986 return error; 5987 } 5988 5989 5990 status_t 5991 lock_memory(void* address, size_t numBytes, uint32 flags) 5992 { 5993 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5994 } 5995 5996 5997 /*! Unwires an address range previously wired with lock_memory_etc(). 5998 5999 Note that a call to this function must balance a previous lock_memory_etc() 6000 call with exactly the same parameters. 6001 */ 6002 status_t 6003 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 6004 { 6005 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 6006 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 6007 6008 // compute the page protection that is required 6009 bool isUser = IS_USER_ADDRESS(address); 6010 bool writable = (flags & B_READ_DEVICE) == 0; 6011 uint32 requiredProtection = PAGE_PRESENT 6012 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 6013 if (writable) 6014 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 6015 6016 uint32 mallocFlags = isUser 6017 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 6018 6019 // get and read lock the address space 6020 VMAddressSpace* addressSpace = NULL; 6021 if (isUser) { 6022 if (team == B_CURRENT_TEAM) 6023 addressSpace = VMAddressSpace::GetCurrent(); 6024 else 6025 addressSpace = VMAddressSpace::Get(team); 6026 } else 6027 addressSpace = VMAddressSpace::GetKernel(); 6028 if (addressSpace == NULL) 6029 return B_ERROR; 6030 6031 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 6032 // Take over the address space reference. We don't unlock until we're 6033 // done. 6034 6035 VMTranslationMap* map = addressSpace->TranslationMap(); 6036 status_t error = B_OK; 6037 6038 // iterate through all concerned areas 6039 addr_t nextAddress = lockBaseAddress; 6040 while (nextAddress != lockEndAddress) { 6041 // get the next area 6042 VMArea* area = addressSpace->LookupArea(nextAddress); 6043 if (area == NULL) { 6044 error = B_BAD_ADDRESS; 6045 break; 6046 } 6047 6048 addr_t areaStart = nextAddress; 6049 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 6050 6051 // Lock the area's top cache. This is a requirement for 6052 // VMArea::Unwire(). 6053 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6054 6055 // Depending on the area cache type and the wiring, we may not need to 6056 // look at the individual pages. 6057 if (area->cache_type == CACHE_TYPE_NULL 6058 || area->cache_type == CACHE_TYPE_DEVICE 6059 || area->wiring == B_FULL_LOCK 6060 || area->wiring == B_CONTIGUOUS) { 6061 // unwire the range (to avoid deadlocks we delete the range after 6062 // unlocking the cache) 6063 nextAddress = areaEnd; 6064 VMAreaWiredRange* range = area->Unwire(areaStart, 6065 areaEnd - areaStart, writable); 6066 cacheChainLocker.Unlock(); 6067 if (range != NULL) { 6068 range->~VMAreaWiredRange(); 6069 free_etc(range, mallocFlags); 6070 } 6071 continue; 6072 } 6073 6074 // Lock the area's cache chain and the translation map. Needed to look 6075 // up pages and play with their wired count. 6076 cacheChainLocker.LockAllSourceCaches(); 6077 map->Lock(); 6078 6079 // iterate through the pages and unwire them 6080 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 6081 phys_addr_t physicalAddress; 6082 uint32 flags; 6083 6084 vm_page* page; 6085 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 6086 && (flags & PAGE_PRESENT) != 0 6087 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 6088 != NULL) { 6089 // Already mapped with the correct permissions -- just increment 6090 // the page's wired count. 6091 decrement_page_wired_count(page); 6092 } else { 6093 panic("unlock_memory_etc(): Failed to unwire page: address " 6094 "space %p, address: %#" B_PRIxADDR, addressSpace, 6095 nextAddress); 6096 error = B_BAD_VALUE; 6097 break; 6098 } 6099 } 6100 6101 map->Unlock(); 6102 6103 // All pages are unwired. Remove the area's wired range as well (to 6104 // avoid deadlocks we delete the range after unlocking the cache). 6105 VMAreaWiredRange* range = area->Unwire(areaStart, 6106 areaEnd - areaStart, writable); 6107 6108 cacheChainLocker.Unlock(); 6109 6110 if (range != NULL) { 6111 range->~VMAreaWiredRange(); 6112 free_etc(range, mallocFlags); 6113 } 6114 6115 if (error != B_OK) 6116 break; 6117 } 6118 6119 // get rid of the address space reference lock_memory_etc() acquired 6120 addressSpace->Put(); 6121 6122 return error; 6123 } 6124 6125 6126 status_t 6127 unlock_memory(void* address, size_t numBytes, uint32 flags) 6128 { 6129 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 6130 } 6131 6132 6133 /*! Similar to get_memory_map(), but also allows to specify the address space 6134 for the memory in question and has a saner semantics. 6135 Returns \c B_OK when the complete range could be translated or 6136 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 6137 case the actual number of entries is written to \c *_numEntries. Any other 6138 error case indicates complete failure; \c *_numEntries will be set to \c 0 6139 in this case. 6140 */ 6141 status_t 6142 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 6143 physical_entry* table, uint32* _numEntries) 6144 { 6145 uint32 numEntries = *_numEntries; 6146 *_numEntries = 0; 6147 6148 VMAddressSpace* addressSpace; 6149 addr_t virtualAddress = (addr_t)address; 6150 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 6151 phys_addr_t physicalAddress; 6152 status_t status = B_OK; 6153 int32 index = -1; 6154 addr_t offset = 0; 6155 bool interrupts = are_interrupts_enabled(); 6156 6157 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6158 "entries)\n", team, address, numBytes, numEntries)); 6159 6160 if (numEntries == 0 || numBytes == 0) 6161 return B_BAD_VALUE; 6162 6163 // in which address space is the address to be found? 6164 if (IS_USER_ADDRESS(virtualAddress)) { 6165 if (team == B_CURRENT_TEAM) 6166 addressSpace = VMAddressSpace::GetCurrent(); 6167 else 6168 addressSpace = VMAddressSpace::Get(team); 6169 } else 6170 addressSpace = VMAddressSpace::GetKernel(); 6171 6172 if (addressSpace == NULL) 6173 return B_ERROR; 6174 6175 VMTranslationMap* map = addressSpace->TranslationMap(); 6176 6177 if (interrupts) 6178 map->Lock(); 6179 6180 while (offset < numBytes) { 6181 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6182 uint32 flags; 6183 6184 if (interrupts) { 6185 status = map->Query((addr_t)address + offset, &physicalAddress, 6186 &flags); 6187 } else { 6188 status = map->QueryInterrupt((addr_t)address + offset, 6189 &physicalAddress, &flags); 6190 } 6191 if (status < B_OK) 6192 break; 6193 if ((flags & PAGE_PRESENT) == 0) { 6194 panic("get_memory_map() called on unmapped memory!"); 6195 return B_BAD_ADDRESS; 6196 } 6197 6198 if (index < 0 && pageOffset > 0) { 6199 physicalAddress += pageOffset; 6200 if (bytes > B_PAGE_SIZE - pageOffset) 6201 bytes = B_PAGE_SIZE - pageOffset; 6202 } 6203 6204 // need to switch to the next physical_entry? 6205 if (index < 0 || table[index].address 6206 != physicalAddress - table[index].size) { 6207 if ((uint32)++index + 1 > numEntries) { 6208 // table to small 6209 break; 6210 } 6211 table[index].address = physicalAddress; 6212 table[index].size = bytes; 6213 } else { 6214 // page does fit in current entry 6215 table[index].size += bytes; 6216 } 6217 6218 offset += bytes; 6219 } 6220 6221 if (interrupts) 6222 map->Unlock(); 6223 6224 if (status != B_OK) 6225 return status; 6226 6227 if ((uint32)index + 1 > numEntries) { 6228 *_numEntries = index; 6229 return B_BUFFER_OVERFLOW; 6230 } 6231 6232 *_numEntries = index + 1; 6233 return B_OK; 6234 } 6235 6236 6237 /*! According to the BeBook, this function should always succeed. 6238 This is no longer the case. 6239 */ 6240 extern "C" int32 6241 __get_memory_map_haiku(const void* address, size_t numBytes, 6242 physical_entry* table, int32 numEntries) 6243 { 6244 uint32 entriesRead = numEntries; 6245 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6246 table, &entriesRead); 6247 if (error != B_OK) 6248 return error; 6249 6250 // close the entry list 6251 6252 // if it's only one entry, we will silently accept the missing ending 6253 if (numEntries == 1) 6254 return B_OK; 6255 6256 if (entriesRead + 1 > (uint32)numEntries) 6257 return B_BUFFER_OVERFLOW; 6258 6259 table[entriesRead].address = 0; 6260 table[entriesRead].size = 0; 6261 6262 return B_OK; 6263 } 6264 6265 6266 area_id 6267 area_for(void* address) 6268 { 6269 return vm_area_for((addr_t)address, true); 6270 } 6271 6272 6273 area_id 6274 find_area(const char* name) 6275 { 6276 return VMAreas::Find(name); 6277 } 6278 6279 6280 status_t 6281 _get_area_info(area_id id, area_info* info, size_t size) 6282 { 6283 if (size != sizeof(area_info) || info == NULL) 6284 return B_BAD_VALUE; 6285 6286 AddressSpaceReadLocker locker; 6287 VMArea* area; 6288 status_t status = locker.SetFromArea(id, area); 6289 if (status != B_OK) 6290 return status; 6291 6292 fill_area_info(area, info, size); 6293 return B_OK; 6294 } 6295 6296 6297 status_t 6298 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6299 { 6300 addr_t nextBase = *(addr_t*)cookie; 6301 6302 // we're already through the list 6303 if (nextBase == (addr_t)-1) 6304 return B_ENTRY_NOT_FOUND; 6305 6306 if (team == B_CURRENT_TEAM) 6307 team = team_get_current_team_id(); 6308 6309 AddressSpaceReadLocker locker(team); 6310 if (!locker.IsLocked()) 6311 return B_BAD_TEAM_ID; 6312 6313 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6314 if (area == NULL) { 6315 nextBase = (addr_t)-1; 6316 return B_ENTRY_NOT_FOUND; 6317 } 6318 6319 fill_area_info(area, info, size); 6320 *cookie = (ssize_t)(area->Base() + 1); 6321 6322 return B_OK; 6323 } 6324 6325 6326 status_t 6327 set_area_protection(area_id area, uint32 newProtection) 6328 { 6329 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6330 newProtection, true); 6331 } 6332 6333 6334 status_t 6335 resize_area(area_id areaID, size_t newSize) 6336 { 6337 return vm_resize_area(areaID, newSize, true); 6338 } 6339 6340 6341 /*! Transfers the specified area to a new team. The caller must be the owner 6342 of the area. 6343 */ 6344 area_id 6345 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6346 bool kernel) 6347 { 6348 area_info info; 6349 status_t status = get_area_info(id, &info); 6350 if (status != B_OK) 6351 return status; 6352 6353 if (info.team != thread_get_current_thread()->team->id) 6354 return B_PERMISSION_DENIED; 6355 6356 // We need to mark the area cloneable so the following operations work. 6357 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6358 if (status != B_OK) 6359 return status; 6360 6361 area_id clonedArea = vm_clone_area(target, info.name, _address, 6362 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6363 if (clonedArea < 0) 6364 return clonedArea; 6365 6366 status = vm_delete_area(info.team, id, kernel); 6367 if (status != B_OK) { 6368 vm_delete_area(target, clonedArea, kernel); 6369 return status; 6370 } 6371 6372 // Now we can reset the protection to whatever it was before. 6373 set_area_protection(clonedArea, info.protection); 6374 6375 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6376 6377 return clonedArea; 6378 } 6379 6380 6381 extern "C" area_id 6382 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6383 size_t numBytes, uint32 addressSpec, uint32 protection, 6384 void** _virtualAddress) 6385 { 6386 if (!arch_vm_supports_protection(protection)) 6387 return B_NOT_SUPPORTED; 6388 6389 fix_protection(&protection); 6390 6391 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6392 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6393 false); 6394 } 6395 6396 6397 area_id 6398 clone_area(const char* name, void** _address, uint32 addressSpec, 6399 uint32 protection, area_id source) 6400 { 6401 if ((protection & B_KERNEL_PROTECTION) == 0) 6402 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6403 6404 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6405 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6406 } 6407 6408 6409 area_id 6410 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6411 uint32 protection, uint32 flags, uint32 guardSize, 6412 const virtual_address_restrictions* virtualAddressRestrictions, 6413 const physical_address_restrictions* physicalAddressRestrictions, 6414 void** _address) 6415 { 6416 fix_protection(&protection); 6417 6418 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6419 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6420 true, _address); 6421 } 6422 6423 6424 extern "C" area_id 6425 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6426 size_t size, uint32 lock, uint32 protection) 6427 { 6428 fix_protection(&protection); 6429 6430 virtual_address_restrictions virtualRestrictions = {}; 6431 virtualRestrictions.address = *_address; 6432 virtualRestrictions.address_specification = addressSpec; 6433 physical_address_restrictions physicalRestrictions = {}; 6434 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6435 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6436 true, _address); 6437 } 6438 6439 6440 status_t 6441 delete_area(area_id area) 6442 { 6443 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6444 } 6445 6446 6447 // #pragma mark - Userland syscalls 6448 6449 6450 status_t 6451 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6452 addr_t size) 6453 { 6454 // filter out some unavailable values (for userland) 6455 switch (addressSpec) { 6456 case B_ANY_KERNEL_ADDRESS: 6457 case B_ANY_KERNEL_BLOCK_ADDRESS: 6458 return B_BAD_VALUE; 6459 } 6460 6461 addr_t address; 6462 6463 if (!IS_USER_ADDRESS(userAddress) 6464 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6465 return B_BAD_ADDRESS; 6466 6467 status_t status = vm_reserve_address_range( 6468 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6469 RESERVED_AVOID_BASE); 6470 if (status != B_OK) 6471 return status; 6472 6473 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6474 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6475 (void*)address, size); 6476 return B_BAD_ADDRESS; 6477 } 6478 6479 return B_OK; 6480 } 6481 6482 6483 status_t 6484 _user_unreserve_address_range(addr_t address, addr_t size) 6485 { 6486 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6487 (void*)address, size); 6488 } 6489 6490 6491 area_id 6492 _user_area_for(void* address) 6493 { 6494 return vm_area_for((addr_t)address, false); 6495 } 6496 6497 6498 area_id 6499 _user_find_area(const char* userName) 6500 { 6501 char name[B_OS_NAME_LENGTH]; 6502 6503 if (!IS_USER_ADDRESS(userName) 6504 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6505 return B_BAD_ADDRESS; 6506 6507 return find_area(name); 6508 } 6509 6510 6511 status_t 6512 _user_get_area_info(area_id area, area_info* userInfo) 6513 { 6514 if (!IS_USER_ADDRESS(userInfo)) 6515 return B_BAD_ADDRESS; 6516 6517 area_info info; 6518 status_t status = get_area_info(area, &info); 6519 if (status < B_OK) 6520 return status; 6521 6522 // TODO: do we want to prevent userland from seeing kernel protections? 6523 //info.protection &= B_USER_PROTECTION; 6524 6525 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6526 return B_BAD_ADDRESS; 6527 6528 return status; 6529 } 6530 6531 6532 status_t 6533 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6534 { 6535 ssize_t cookie; 6536 6537 if (!IS_USER_ADDRESS(userCookie) 6538 || !IS_USER_ADDRESS(userInfo) 6539 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6540 return B_BAD_ADDRESS; 6541 6542 area_info info; 6543 status_t status = _get_next_area_info(team, &cookie, &info, 6544 sizeof(area_info)); 6545 if (status != B_OK) 6546 return status; 6547 6548 //info.protection &= B_USER_PROTECTION; 6549 6550 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6551 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6552 return B_BAD_ADDRESS; 6553 6554 return status; 6555 } 6556 6557 6558 status_t 6559 _user_set_area_protection(area_id area, uint32 newProtection) 6560 { 6561 if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0) 6562 return B_BAD_VALUE; 6563 6564 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6565 newProtection, false); 6566 } 6567 6568 6569 status_t 6570 _user_resize_area(area_id area, size_t newSize) 6571 { 6572 // TODO: Since we restrict deleting of areas to those owned by the team, 6573 // we should also do that for resizing (check other functions, too). 6574 return vm_resize_area(area, newSize, false); 6575 } 6576 6577 6578 area_id 6579 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6580 team_id target) 6581 { 6582 // filter out some unavailable values (for userland) 6583 switch (addressSpec) { 6584 case B_ANY_KERNEL_ADDRESS: 6585 case B_ANY_KERNEL_BLOCK_ADDRESS: 6586 return B_BAD_VALUE; 6587 } 6588 6589 void* address; 6590 if (!IS_USER_ADDRESS(userAddress) 6591 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6592 return B_BAD_ADDRESS; 6593 6594 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6595 if (newArea < B_OK) 6596 return newArea; 6597 6598 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6599 return B_BAD_ADDRESS; 6600 6601 return newArea; 6602 } 6603 6604 6605 area_id 6606 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6607 uint32 protection, area_id sourceArea) 6608 { 6609 char name[B_OS_NAME_LENGTH]; 6610 void* address; 6611 6612 // filter out some unavailable values (for userland) 6613 switch (addressSpec) { 6614 case B_ANY_KERNEL_ADDRESS: 6615 case B_ANY_KERNEL_BLOCK_ADDRESS: 6616 return B_BAD_VALUE; 6617 } 6618 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6619 return B_BAD_VALUE; 6620 6621 if (!IS_USER_ADDRESS(userName) 6622 || !IS_USER_ADDRESS(userAddress) 6623 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6624 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6625 return B_BAD_ADDRESS; 6626 6627 fix_protection(&protection); 6628 6629 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6630 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6631 false); 6632 if (clonedArea < B_OK) 6633 return clonedArea; 6634 6635 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6636 delete_area(clonedArea); 6637 return B_BAD_ADDRESS; 6638 } 6639 6640 return clonedArea; 6641 } 6642 6643 6644 area_id 6645 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6646 size_t size, uint32 lock, uint32 protection) 6647 { 6648 char name[B_OS_NAME_LENGTH]; 6649 void* address; 6650 6651 // filter out some unavailable values (for userland) 6652 switch (addressSpec) { 6653 case B_ANY_KERNEL_ADDRESS: 6654 case B_ANY_KERNEL_BLOCK_ADDRESS: 6655 return B_BAD_VALUE; 6656 } 6657 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6658 return B_BAD_VALUE; 6659 6660 if (!IS_USER_ADDRESS(userName) 6661 || !IS_USER_ADDRESS(userAddress) 6662 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6663 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6664 return B_BAD_ADDRESS; 6665 6666 if (addressSpec == B_EXACT_ADDRESS 6667 && IS_KERNEL_ADDRESS(address)) 6668 return B_BAD_VALUE; 6669 6670 if (addressSpec == B_ANY_ADDRESS) 6671 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6672 if (addressSpec == B_BASE_ADDRESS) 6673 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6674 6675 fix_protection(&protection); 6676 6677 virtual_address_restrictions virtualRestrictions = {}; 6678 virtualRestrictions.address = address; 6679 virtualRestrictions.address_specification = addressSpec; 6680 physical_address_restrictions physicalRestrictions = {}; 6681 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6682 size, lock, protection, 0, 0, &virtualRestrictions, 6683 &physicalRestrictions, false, &address); 6684 6685 if (area >= B_OK 6686 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6687 delete_area(area); 6688 return B_BAD_ADDRESS; 6689 } 6690 6691 return area; 6692 } 6693 6694 6695 status_t 6696 _user_delete_area(area_id area) 6697 { 6698 // Unlike the BeOS implementation, you can now only delete areas 6699 // that you have created yourself from userland. 6700 // The documentation to delete_area() explicitly states that this 6701 // will be restricted in the future, and so it will. 6702 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6703 } 6704 6705 6706 // TODO: create a BeOS style call for this! 6707 6708 area_id 6709 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6710 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6711 int fd, off_t offset) 6712 { 6713 char name[B_OS_NAME_LENGTH]; 6714 void* address; 6715 area_id area; 6716 6717 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6718 return B_BAD_VALUE; 6719 6720 fix_protection(&protection); 6721 6722 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6723 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6724 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6725 return B_BAD_ADDRESS; 6726 6727 if (addressSpec == B_EXACT_ADDRESS) { 6728 if ((addr_t)address + size < (addr_t)address 6729 || (addr_t)address % B_PAGE_SIZE != 0) { 6730 return B_BAD_VALUE; 6731 } 6732 if (!IS_USER_ADDRESS(address) 6733 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6734 return B_BAD_ADDRESS; 6735 } 6736 } 6737 6738 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6739 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6740 false); 6741 if (area < B_OK) 6742 return area; 6743 6744 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6745 return B_BAD_ADDRESS; 6746 6747 return area; 6748 } 6749 6750 6751 status_t 6752 _user_unmap_memory(void* _address, size_t size) 6753 { 6754 addr_t address = (addr_t)_address; 6755 6756 // check params 6757 if (size == 0 || (addr_t)address + size < (addr_t)address 6758 || (addr_t)address % B_PAGE_SIZE != 0) { 6759 return B_BAD_VALUE; 6760 } 6761 6762 if (!IS_USER_ADDRESS(address) 6763 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6764 return B_BAD_ADDRESS; 6765 } 6766 6767 // Write lock the address space and ensure the address range is not wired. 6768 AddressSpaceWriteLocker locker; 6769 do { 6770 status_t status = locker.SetTo(team_get_current_team_id()); 6771 if (status != B_OK) 6772 return status; 6773 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6774 size, &locker)); 6775 6776 // unmap 6777 return unmap_address_range(locker.AddressSpace(), address, size, false); 6778 } 6779 6780 6781 status_t 6782 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6783 { 6784 // check address range 6785 addr_t address = (addr_t)_address; 6786 size = PAGE_ALIGN(size); 6787 6788 if ((address % B_PAGE_SIZE) != 0) 6789 return B_BAD_VALUE; 6790 if (!is_user_address_range(_address, size)) { 6791 // weird error code required by POSIX 6792 return ENOMEM; 6793 } 6794 6795 // extend and check protection 6796 if ((protection & ~B_USER_PROTECTION) != 0) 6797 return B_BAD_VALUE; 6798 6799 fix_protection(&protection); 6800 6801 // We need to write lock the address space, since we're going to play with 6802 // the areas. Also make sure that none of the areas is wired and that we're 6803 // actually allowed to change the protection. 6804 AddressSpaceWriteLocker locker; 6805 6806 bool restart; 6807 do { 6808 restart = false; 6809 6810 status_t status = locker.SetTo(team_get_current_team_id()); 6811 if (status != B_OK) 6812 return status; 6813 6814 // First round: Check whether the whole range is covered by areas and we 6815 // are allowed to modify them. 6816 addr_t currentAddress = address; 6817 size_t sizeLeft = size; 6818 while (sizeLeft > 0) { 6819 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6820 if (area == NULL) 6821 return B_NO_MEMORY; 6822 6823 if ((area->protection & B_KERNEL_AREA) != 0) 6824 return B_NOT_ALLOWED; 6825 if (area->protection_max != 0 6826 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) { 6827 return B_NOT_ALLOWED; 6828 } 6829 6830 addr_t offset = currentAddress - area->Base(); 6831 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6832 6833 AreaCacheLocker cacheLocker(area); 6834 6835 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6836 &locker, &cacheLocker)) { 6837 restart = true; 6838 break; 6839 } 6840 6841 cacheLocker.Unlock(); 6842 6843 currentAddress += rangeSize; 6844 sizeLeft -= rangeSize; 6845 } 6846 } while (restart); 6847 6848 // Second round: If the protections differ from that of the area, create a 6849 // page protection array and re-map mapped pages. 6850 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6851 addr_t currentAddress = address; 6852 size_t sizeLeft = size; 6853 while (sizeLeft > 0) { 6854 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6855 if (area == NULL) 6856 return B_NO_MEMORY; 6857 6858 addr_t offset = currentAddress - area->Base(); 6859 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6860 6861 currentAddress += rangeSize; 6862 sizeLeft -= rangeSize; 6863 6864 if (area->page_protections == NULL) { 6865 if (area->protection == protection) 6866 continue; 6867 if (offset == 0 && rangeSize == area->Size()) { 6868 // The whole area is covered: let set_area_protection handle it. 6869 status_t status = vm_set_area_protection(area->address_space->ID(), 6870 area->id, protection, false); 6871 if (status != B_OK) 6872 return status; 6873 continue; 6874 } 6875 6876 status_t status = allocate_area_page_protections(area); 6877 if (status != B_OK) 6878 return status; 6879 } 6880 6881 // We need to lock the complete cache chain, since we potentially unmap 6882 // pages of lower caches. 6883 VMCache* topCache = vm_area_get_locked_cache(area); 6884 VMCacheChainLocker cacheChainLocker(topCache); 6885 cacheChainLocker.LockAllSourceCaches(); 6886 6887 // Adjust the committed size, if necessary. 6888 if (topCache->source != NULL && topCache->temporary) { 6889 const bool becomesWritable = (protection & B_WRITE_AREA) != 0; 6890 ssize_t commitmentChange = 0; 6891 for (addr_t pageAddress = area->Base() + offset; 6892 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6893 if (topCache->LookupPage(pageAddress) != NULL) { 6894 // This page should already be accounted for in the commitment. 6895 continue; 6896 } 6897 6898 const bool isWritable 6899 = (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0; 6900 6901 if (becomesWritable && !isWritable) 6902 commitmentChange += B_PAGE_SIZE; 6903 else if (!becomesWritable && isWritable) 6904 commitmentChange -= B_PAGE_SIZE; 6905 } 6906 6907 if (commitmentChange != 0) { 6908 const off_t newCommitment = topCache->committed_size + commitmentChange; 6909 ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base)); 6910 status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER); 6911 if (status != B_OK) 6912 return status; 6913 } 6914 } 6915 6916 for (addr_t pageAddress = area->Base() + offset; 6917 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6918 map->Lock(); 6919 6920 set_area_page_protection(area, pageAddress, protection); 6921 6922 phys_addr_t physicalAddress; 6923 uint32 flags; 6924 6925 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6926 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6927 map->Unlock(); 6928 continue; 6929 } 6930 6931 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6932 if (page == NULL) { 6933 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6934 "\n", area, physicalAddress); 6935 map->Unlock(); 6936 return B_ERROR; 6937 } 6938 6939 // If the page is not in the topmost cache and write access is 6940 // requested, we have to unmap it. Otherwise we can re-map it with 6941 // the new protection. 6942 bool unmapPage = page->Cache() != topCache 6943 && (protection & B_WRITE_AREA) != 0; 6944 6945 if (!unmapPage) 6946 map->ProtectPage(area, pageAddress, protection); 6947 6948 map->Unlock(); 6949 6950 if (unmapPage) { 6951 DEBUG_PAGE_ACCESS_START(page); 6952 unmap_page(area, pageAddress); 6953 DEBUG_PAGE_ACCESS_END(page); 6954 } 6955 } 6956 } 6957 6958 return B_OK; 6959 } 6960 6961 6962 status_t 6963 _user_sync_memory(void* _address, size_t size, uint32 flags) 6964 { 6965 addr_t address = (addr_t)_address; 6966 size = PAGE_ALIGN(size); 6967 6968 // check params 6969 if ((address % B_PAGE_SIZE) != 0) 6970 return B_BAD_VALUE; 6971 if (!is_user_address_range(_address, size)) { 6972 // weird error code required by POSIX 6973 return ENOMEM; 6974 } 6975 6976 bool writeSync = (flags & MS_SYNC) != 0; 6977 bool writeAsync = (flags & MS_ASYNC) != 0; 6978 if (writeSync && writeAsync) 6979 return B_BAD_VALUE; 6980 6981 if (size == 0 || (!writeSync && !writeAsync)) 6982 return B_OK; 6983 6984 // iterate through the range and sync all concerned areas 6985 while (size > 0) { 6986 // read lock the address space 6987 AddressSpaceReadLocker locker; 6988 status_t error = locker.SetTo(team_get_current_team_id()); 6989 if (error != B_OK) 6990 return error; 6991 6992 // get the first area 6993 VMArea* area = locker.AddressSpace()->LookupArea(address); 6994 if (area == NULL) 6995 return B_NO_MEMORY; 6996 6997 uint32 offset = address - area->Base(); 6998 size_t rangeSize = min_c(area->Size() - offset, size); 6999 offset += area->cache_offset; 7000 7001 // lock the cache 7002 AreaCacheLocker cacheLocker(area); 7003 if (!cacheLocker) 7004 return B_BAD_VALUE; 7005 VMCache* cache = area->cache; 7006 7007 locker.Unlock(); 7008 7009 uint32 firstPage = offset >> PAGE_SHIFT; 7010 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 7011 7012 // write the pages 7013 if (cache->type == CACHE_TYPE_VNODE) { 7014 if (writeSync) { 7015 // synchronous 7016 error = vm_page_write_modified_page_range(cache, firstPage, 7017 endPage); 7018 if (error != B_OK) 7019 return error; 7020 } else { 7021 // asynchronous 7022 vm_page_schedule_write_page_range(cache, firstPage, endPage); 7023 // TODO: This is probably not quite what is supposed to happen. 7024 // Especially when a lot has to be written, it might take ages 7025 // until it really hits the disk. 7026 } 7027 } 7028 7029 address += rangeSize; 7030 size -= rangeSize; 7031 } 7032 7033 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 7034 // synchronize multiple mappings of the same file. In our VM they never get 7035 // out of sync, though, so we don't have to do anything. 7036 7037 return B_OK; 7038 } 7039 7040 7041 status_t 7042 _user_memory_advice(void* _address, size_t size, uint32 advice) 7043 { 7044 addr_t address = (addr_t)_address; 7045 if ((address % B_PAGE_SIZE) != 0) 7046 return B_BAD_VALUE; 7047 7048 size = PAGE_ALIGN(size); 7049 if (!is_user_address_range(_address, size)) { 7050 // weird error code required by POSIX 7051 return B_NO_MEMORY; 7052 } 7053 7054 switch (advice) { 7055 case MADV_NORMAL: 7056 case MADV_SEQUENTIAL: 7057 case MADV_RANDOM: 7058 case MADV_WILLNEED: 7059 case MADV_DONTNEED: 7060 // TODO: Implement! 7061 break; 7062 7063 case MADV_FREE: 7064 { 7065 AddressSpaceWriteLocker locker; 7066 do { 7067 status_t status = locker.SetTo(team_get_current_team_id()); 7068 if (status != B_OK) 7069 return status; 7070 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 7071 address, size, &locker)); 7072 7073 discard_address_range(locker.AddressSpace(), address, size, false); 7074 break; 7075 } 7076 7077 default: 7078 return B_BAD_VALUE; 7079 } 7080 7081 return B_OK; 7082 } 7083 7084 7085 status_t 7086 _user_get_memory_properties(team_id teamID, const void* address, 7087 uint32* _protected, uint32* _lock) 7088 { 7089 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 7090 return B_BAD_ADDRESS; 7091 7092 AddressSpaceReadLocker locker; 7093 status_t error = locker.SetTo(teamID); 7094 if (error != B_OK) 7095 return error; 7096 7097 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 7098 if (area == NULL) 7099 return B_NO_MEMORY; 7100 7101 uint32 protection = get_area_page_protection(area, (addr_t)address); 7102 uint32 wiring = area->wiring; 7103 7104 locker.Unlock(); 7105 7106 error = user_memcpy(_protected, &protection, sizeof(protection)); 7107 if (error != B_OK) 7108 return error; 7109 7110 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 7111 7112 return error; 7113 } 7114 7115 7116 static status_t 7117 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 7118 { 7119 #if ENABLE_SWAP_SUPPORT 7120 // check address range 7121 addr_t address = (addr_t)_address; 7122 size = PAGE_ALIGN(size); 7123 7124 if ((address % B_PAGE_SIZE) != 0) 7125 return EINVAL; 7126 if (!is_user_address_range(_address, size)) 7127 return EINVAL; 7128 7129 const addr_t endAddress = address + size; 7130 7131 AddressSpaceReadLocker addressSpaceLocker; 7132 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 7133 if (error != B_OK) 7134 return error; 7135 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 7136 7137 // iterate through all concerned areas 7138 addr_t nextAddress = address; 7139 while (nextAddress != endAddress) { 7140 // get the next area 7141 VMArea* area = addressSpace->LookupArea(nextAddress); 7142 if (area == NULL) { 7143 error = B_BAD_ADDRESS; 7144 break; 7145 } 7146 7147 const addr_t areaStart = nextAddress; 7148 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 7149 nextAddress = areaEnd; 7150 7151 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 7152 if (error != B_OK) { 7153 // We don't need to unset or reset things on failure. 7154 break; 7155 } 7156 7157 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 7158 VMAnonymousCache* anonCache = NULL; 7159 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 7160 // This memory will aready never be swapped. Nothing to do. 7161 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 7162 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 7163 areaEnd - areaStart, swappable); 7164 } else { 7165 // Some other cache type? We cannot affect anything here. 7166 error = EINVAL; 7167 } 7168 7169 cacheChainLocker.Unlock(); 7170 7171 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 7172 if (error != B_OK) 7173 break; 7174 } 7175 7176 return error; 7177 #else 7178 // No swap support? Nothing to do. 7179 return B_OK; 7180 #endif 7181 } 7182 7183 7184 status_t 7185 _user_mlock(const void* _address, size_t size) 7186 { 7187 return user_set_memory_swappable(_address, size, false); 7188 } 7189 7190 7191 status_t 7192 _user_munlock(const void* _address, size_t size) 7193 { 7194 // TODO: B_SHARED_AREAs need to be handled a bit differently: 7195 // if multiple clones of an area had mlock() called on them, 7196 // munlock() must also be called on all of them to actually unlock. 7197 // (At present, the first munlock() will unlock all.) 7198 // TODO: fork() should automatically unlock memory in the child. 7199 return user_set_memory_swappable(_address, size, true); 7200 } 7201 7202 7203 // #pragma mark -- compatibility 7204 7205 7206 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7207 7208 7209 struct physical_entry_beos { 7210 uint32 address; 7211 uint32 size; 7212 }; 7213 7214 7215 /*! The physical_entry structure has changed. We need to translate it to the 7216 old one. 7217 */ 7218 extern "C" int32 7219 __get_memory_map_beos(const void* _address, size_t numBytes, 7220 physical_entry_beos* table, int32 numEntries) 7221 { 7222 if (numEntries <= 0) 7223 return B_BAD_VALUE; 7224 7225 const uint8* address = (const uint8*)_address; 7226 7227 int32 count = 0; 7228 while (numBytes > 0 && count < numEntries) { 7229 physical_entry entry; 7230 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7231 if (result < 0) { 7232 if (result != B_BUFFER_OVERFLOW) 7233 return result; 7234 } 7235 7236 if (entry.address >= (phys_addr_t)1 << 32) { 7237 panic("get_memory_map(): Address is greater 4 GB!"); 7238 return B_ERROR; 7239 } 7240 7241 table[count].address = entry.address; 7242 table[count++].size = entry.size; 7243 7244 address += entry.size; 7245 numBytes -= entry.size; 7246 } 7247 7248 // null-terminate the table, if possible 7249 if (count < numEntries) { 7250 table[count].address = 0; 7251 table[count].size = 0; 7252 } 7253 7254 return B_OK; 7255 } 7256 7257 7258 /*! The type of the \a physicalAddress parameter has changed from void* to 7259 phys_addr_t. 7260 */ 7261 extern "C" area_id 7262 __map_physical_memory_beos(const char* name, void* physicalAddress, 7263 size_t numBytes, uint32 addressSpec, uint32 protection, 7264 void** _virtualAddress) 7265 { 7266 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7267 addressSpec, protection, _virtualAddress); 7268 } 7269 7270 7271 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7272 we meddle with the \a lock parameter to force 32 bit. 7273 */ 7274 extern "C" area_id 7275 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7276 size_t size, uint32 lock, uint32 protection) 7277 { 7278 switch (lock) { 7279 case B_NO_LOCK: 7280 break; 7281 case B_FULL_LOCK: 7282 case B_LAZY_LOCK: 7283 lock = B_32_BIT_FULL_LOCK; 7284 break; 7285 case B_CONTIGUOUS: 7286 lock = B_32_BIT_CONTIGUOUS; 7287 break; 7288 } 7289 7290 return __create_area_haiku(name, _address, addressSpec, size, lock, 7291 protection); 7292 } 7293 7294 7295 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7296 "BASE"); 7297 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7298 "map_physical_memory@", "BASE"); 7299 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7300 "BASE"); 7301 7302 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7303 "get_memory_map@@", "1_ALPHA3"); 7304 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7305 "map_physical_memory@@", "1_ALPHA3"); 7306 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7307 "1_ALPHA3"); 7308 7309 7310 #else 7311 7312 7313 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7314 "get_memory_map@@", "BASE"); 7315 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7316 "map_physical_memory@@", "BASE"); 7317 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7318 "BASE"); 7319 7320 7321 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7322