1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/BitUtils.h> 51 #include <util/ThreadAutoLock.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_priv.h> 54 #include <vm/VMAddressSpace.h> 55 #include <vm/VMArea.h> 56 #include <vm/VMCache.h> 57 58 #include "VMAddressSpaceLocking.h" 59 #include "VMAnonymousCache.h" 60 #include "VMAnonymousNoSwapCache.h" 61 #include "IORequest.h" 62 63 64 //#define TRACE_VM 65 //#define TRACE_FAULTS 66 #ifdef TRACE_VM 67 # define TRACE(x) dprintf x 68 #else 69 # define TRACE(x) ; 70 #endif 71 #ifdef TRACE_FAULTS 72 # define FTRACE(x) dprintf x 73 #else 74 # define FTRACE(x) ; 75 #endif 76 77 78 namespace { 79 80 class AreaCacheLocking { 81 public: 82 inline bool Lock(VMCache* lockable) 83 { 84 return false; 85 } 86 87 inline void Unlock(VMCache* lockable) 88 { 89 vm_area_put_locked_cache(lockable); 90 } 91 }; 92 93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 94 public: 95 inline AreaCacheLocker(VMCache* cache = NULL) 96 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 97 { 98 } 99 100 inline AreaCacheLocker(VMArea* area) 101 : AutoLocker<VMCache, AreaCacheLocking>() 102 { 103 SetTo(area); 104 } 105 106 inline void SetTo(VMCache* cache, bool alreadyLocked) 107 { 108 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 109 } 110 111 inline void SetTo(VMArea* area) 112 { 113 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 114 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 115 } 116 }; 117 118 119 class VMCacheChainLocker { 120 public: 121 VMCacheChainLocker() 122 : 123 fTopCache(NULL), 124 fBottomCache(NULL) 125 { 126 } 127 128 VMCacheChainLocker(VMCache* topCache) 129 : 130 fTopCache(topCache), 131 fBottomCache(topCache) 132 { 133 } 134 135 ~VMCacheChainLocker() 136 { 137 Unlock(); 138 } 139 140 void SetTo(VMCache* topCache) 141 { 142 fTopCache = topCache; 143 fBottomCache = topCache; 144 145 if (topCache != NULL) 146 topCache->SetUserData(NULL); 147 } 148 149 VMCache* LockSourceCache() 150 { 151 if (fBottomCache == NULL || fBottomCache->source == NULL) 152 return NULL; 153 154 VMCache* previousCache = fBottomCache; 155 156 fBottomCache = fBottomCache->source; 157 fBottomCache->Lock(); 158 fBottomCache->AcquireRefLocked(); 159 fBottomCache->SetUserData(previousCache); 160 161 return fBottomCache; 162 } 163 164 void LockAllSourceCaches() 165 { 166 while (LockSourceCache() != NULL) { 167 } 168 } 169 170 void Unlock(VMCache* exceptCache = NULL) 171 { 172 if (fTopCache == NULL) 173 return; 174 175 // Unlock caches in source -> consumer direction. This is important to 176 // avoid double-locking and a reversal of locking order in case a cache 177 // is eligable for merging. 178 VMCache* cache = fBottomCache; 179 while (cache != NULL) { 180 VMCache* nextCache = (VMCache*)cache->UserData(); 181 if (cache != exceptCache) 182 cache->ReleaseRefAndUnlock(cache != fTopCache); 183 184 if (cache == fTopCache) 185 break; 186 187 cache = nextCache; 188 } 189 190 fTopCache = NULL; 191 fBottomCache = NULL; 192 } 193 194 void UnlockKeepRefs(bool keepTopCacheLocked) 195 { 196 if (fTopCache == NULL) 197 return; 198 199 VMCache* nextCache = fBottomCache; 200 VMCache* cache = NULL; 201 202 while (keepTopCacheLocked 203 ? nextCache != fTopCache : cache != fTopCache) { 204 cache = nextCache; 205 nextCache = (VMCache*)cache->UserData(); 206 cache->Unlock(cache != fTopCache); 207 } 208 } 209 210 void RelockCaches(bool topCacheLocked) 211 { 212 if (fTopCache == NULL) 213 return; 214 215 VMCache* nextCache = fTopCache; 216 VMCache* cache = NULL; 217 if (topCacheLocked) { 218 cache = nextCache; 219 nextCache = cache->source; 220 } 221 222 while (cache != fBottomCache && nextCache != NULL) { 223 VMCache* consumer = cache; 224 cache = nextCache; 225 nextCache = cache->source; 226 cache->Lock(); 227 cache->SetUserData(consumer); 228 } 229 } 230 231 private: 232 VMCache* fTopCache; 233 VMCache* fBottomCache; 234 }; 235 236 } // namespace 237 238 239 // The memory reserve an allocation of the certain priority must not touch. 240 static const size_t kMemoryReserveForPriority[] = { 241 VM_MEMORY_RESERVE_USER, // user 242 VM_MEMORY_RESERVE_SYSTEM, // system 243 0 // VIP 244 }; 245 246 247 ObjectCache* gPageMappingsObjectCache; 248 249 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 250 251 static off_t sAvailableMemory; 252 static off_t sNeededMemory; 253 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 254 static uint32 sPageFaults; 255 256 static VMPhysicalPageMapper* sPhysicalPageMapper; 257 258 #if DEBUG_CACHE_LIST 259 260 struct cache_info { 261 VMCache* cache; 262 addr_t page_count; 263 addr_t committed; 264 }; 265 266 static const int kCacheInfoTableCount = 100 * 1024; 267 static cache_info* sCacheInfoTable; 268 269 #endif // DEBUG_CACHE_LIST 270 271 272 // function declarations 273 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 274 bool addressSpaceCleanup); 275 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 276 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 277 static status_t map_backing_store(VMAddressSpace* addressSpace, 278 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 279 int protection, int protectionMax, int mapping, uint32 flags, 280 const virtual_address_restrictions* addressRestrictions, bool kernel, 281 VMArea** _area, void** _virtualAddress); 282 static void fix_protection(uint32* protection); 283 284 285 // #pragma mark - 286 287 288 #if VM_PAGE_FAULT_TRACING 289 290 namespace VMPageFaultTracing { 291 292 class PageFaultStart : public AbstractTraceEntry { 293 public: 294 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 295 : 296 fAddress(address), 297 fPC(pc), 298 fWrite(write), 299 fUser(user) 300 { 301 Initialized(); 302 } 303 304 virtual void AddDump(TraceOutput& out) 305 { 306 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 307 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 308 } 309 310 private: 311 addr_t fAddress; 312 addr_t fPC; 313 bool fWrite; 314 bool fUser; 315 }; 316 317 318 // page fault errors 319 enum { 320 PAGE_FAULT_ERROR_NO_AREA = 0, 321 PAGE_FAULT_ERROR_KERNEL_ONLY, 322 PAGE_FAULT_ERROR_WRITE_PROTECTED, 323 PAGE_FAULT_ERROR_READ_PROTECTED, 324 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 325 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 326 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 327 }; 328 329 330 class PageFaultError : public AbstractTraceEntry { 331 public: 332 PageFaultError(area_id area, status_t error) 333 : 334 fArea(area), 335 fError(error) 336 { 337 Initialized(); 338 } 339 340 virtual void AddDump(TraceOutput& out) 341 { 342 switch (fError) { 343 case PAGE_FAULT_ERROR_NO_AREA: 344 out.Print("page fault error: no area"); 345 break; 346 case PAGE_FAULT_ERROR_KERNEL_ONLY: 347 out.Print("page fault error: area: %ld, kernel only", fArea); 348 break; 349 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 350 out.Print("page fault error: area: %ld, write protected", 351 fArea); 352 break; 353 case PAGE_FAULT_ERROR_READ_PROTECTED: 354 out.Print("page fault error: area: %ld, read protected", fArea); 355 break; 356 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 357 out.Print("page fault error: area: %ld, execute protected", 358 fArea); 359 break; 360 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 361 out.Print("page fault error: kernel touching bad user memory"); 362 break; 363 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 364 out.Print("page fault error: no address space"); 365 break; 366 default: 367 out.Print("page fault error: area: %ld, error: %s", fArea, 368 strerror(fError)); 369 break; 370 } 371 } 372 373 private: 374 area_id fArea; 375 status_t fError; 376 }; 377 378 379 class PageFaultDone : public AbstractTraceEntry { 380 public: 381 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 382 vm_page* page) 383 : 384 fArea(area), 385 fTopCache(topCache), 386 fCache(cache), 387 fPage(page) 388 { 389 Initialized(); 390 } 391 392 virtual void AddDump(TraceOutput& out) 393 { 394 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 395 "page: %p", fArea, fTopCache, fCache, fPage); 396 } 397 398 private: 399 area_id fArea; 400 VMCache* fTopCache; 401 VMCache* fCache; 402 vm_page* fPage; 403 }; 404 405 } // namespace VMPageFaultTracing 406 407 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 408 #else 409 # define TPF(x) ; 410 #endif // VM_PAGE_FAULT_TRACING 411 412 413 // #pragma mark - 414 415 416 /*! The page's cache must be locked. 417 */ 418 static inline void 419 increment_page_wired_count(vm_page* page) 420 { 421 if (!page->IsMapped()) 422 atomic_add(&gMappedPagesCount, 1); 423 page->IncrementWiredCount(); 424 } 425 426 427 /*! The page's cache must be locked. 428 */ 429 static inline void 430 decrement_page_wired_count(vm_page* page) 431 { 432 page->DecrementWiredCount(); 433 if (!page->IsMapped()) 434 atomic_add(&gMappedPagesCount, -1); 435 } 436 437 438 static inline addr_t 439 virtual_page_address(VMArea* area, vm_page* page) 440 { 441 return area->Base() 442 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 443 } 444 445 446 static inline bool 447 is_page_in_area(VMArea* area, vm_page* page) 448 { 449 off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT); 450 return pageCacheOffsetBytes >= area->cache_offset 451 && pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size(); 452 } 453 454 455 //! You need to have the address space locked when calling this function 456 static VMArea* 457 lookup_area(VMAddressSpace* addressSpace, area_id id) 458 { 459 VMAreas::ReadLock(); 460 461 VMArea* area = VMAreas::LookupLocked(id); 462 if (area != NULL && area->address_space != addressSpace) 463 area = NULL; 464 465 VMAreas::ReadUnlock(); 466 467 return area; 468 } 469 470 471 static inline size_t 472 area_page_protections_size(size_t areaSize) 473 { 474 // In the page protections we store only the three user protections, 475 // so we use 4 bits per page. 476 return (areaSize / B_PAGE_SIZE + 1) / 2; 477 } 478 479 480 static status_t 481 allocate_area_page_protections(VMArea* area) 482 { 483 size_t bytes = area_page_protections_size(area->Size()); 484 area->page_protections = (uint8*)malloc_etc(bytes, 485 area->address_space == VMAddressSpace::Kernel() 486 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 487 if (area->page_protections == NULL) 488 return B_NO_MEMORY; 489 490 // init the page protections for all pages to that of the area 491 uint32 areaProtection = area->protection 492 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 493 memset(area->page_protections, areaProtection | (areaProtection << 4), 494 bytes); 495 return B_OK; 496 } 497 498 499 static inline void 500 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 501 { 502 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 503 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 504 uint8& entry = area->page_protections[pageIndex / 2]; 505 if (pageIndex % 2 == 0) 506 entry = (entry & 0xf0) | protection; 507 else 508 entry = (entry & 0x0f) | (protection << 4); 509 } 510 511 512 static inline uint32 513 get_area_page_protection(VMArea* area, addr_t pageAddress) 514 { 515 if (area->page_protections == NULL) 516 return area->protection; 517 518 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 519 uint32 protection = area->page_protections[pageIndex / 2]; 520 if (pageIndex % 2 == 0) 521 protection &= 0x0f; 522 else 523 protection >>= 4; 524 525 uint32 kernelProtection = 0; 526 if ((protection & B_READ_AREA) != 0) 527 kernelProtection |= B_KERNEL_READ_AREA; 528 if ((protection & B_WRITE_AREA) != 0) 529 kernelProtection |= B_KERNEL_WRITE_AREA; 530 531 // If this is a kernel area we return only the kernel flags. 532 if (area->address_space == VMAddressSpace::Kernel()) 533 return kernelProtection; 534 535 return protection | kernelProtection; 536 } 537 538 539 static inline uint8* 540 realloc_page_protections(uint8* pageProtections, size_t areaSize, 541 uint32 allocationFlags) 542 { 543 size_t bytes = area_page_protections_size(areaSize); 544 return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags); 545 } 546 547 548 /*! The caller must have reserved enough pages the translation map 549 implementation might need to map this page. 550 The page's cache must be locked. 551 */ 552 static status_t 553 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 554 vm_page_reservation* reservation) 555 { 556 VMTranslationMap* map = area->address_space->TranslationMap(); 557 558 bool wasMapped = page->IsMapped(); 559 560 if (area->wiring == B_NO_LOCK) { 561 DEBUG_PAGE_ACCESS_CHECK(page); 562 563 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 564 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 565 gPageMappingsObjectCache, 566 CACHE_DONT_WAIT_FOR_MEMORY 567 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 568 if (mapping == NULL) 569 return B_NO_MEMORY; 570 571 mapping->page = page; 572 mapping->area = area; 573 574 map->Lock(); 575 576 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 577 area->MemoryType(), reservation); 578 579 // insert mapping into lists 580 if (!page->IsMapped()) 581 atomic_add(&gMappedPagesCount, 1); 582 583 page->mappings.Add(mapping); 584 area->mappings.Add(mapping); 585 586 map->Unlock(); 587 } else { 588 DEBUG_PAGE_ACCESS_CHECK(page); 589 590 map->Lock(); 591 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 592 area->MemoryType(), reservation); 593 map->Unlock(); 594 595 increment_page_wired_count(page); 596 } 597 598 if (!wasMapped) { 599 // The page is mapped now, so we must not remain in the cached queue. 600 // It also makes sense to move it from the inactive to the active, since 601 // otherwise the page daemon wouldn't come to keep track of it (in idle 602 // mode) -- if the page isn't touched, it will be deactivated after a 603 // full iteration through the queue at the latest. 604 if (page->State() == PAGE_STATE_CACHED 605 || page->State() == PAGE_STATE_INACTIVE) { 606 vm_page_set_state(page, PAGE_STATE_ACTIVE); 607 } 608 } 609 610 return B_OK; 611 } 612 613 614 /*! If \a preserveModified is \c true, the caller must hold the lock of the 615 page's cache. 616 */ 617 static inline bool 618 unmap_page(VMArea* area, addr_t virtualAddress) 619 { 620 return area->address_space->TranslationMap()->UnmapPage(area, 621 virtualAddress, true); 622 } 623 624 625 /*! If \a preserveModified is \c true, the caller must hold the lock of all 626 mapped pages' caches. 627 */ 628 static inline void 629 unmap_pages(VMArea* area, addr_t base, size_t size) 630 { 631 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 632 } 633 634 635 static inline bool 636 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 637 { 638 if (address < area->Base()) { 639 offset = area->Base() - address; 640 if (offset >= size) 641 return false; 642 643 address = area->Base(); 644 size -= offset; 645 offset = 0; 646 if (size > area->Size()) 647 size = area->Size(); 648 649 return true; 650 } 651 652 offset = address - area->Base(); 653 if (offset >= area->Size()) 654 return false; 655 656 if (size >= area->Size() - offset) 657 size = area->Size() - offset; 658 659 return true; 660 } 661 662 663 /*! Cuts a piece out of an area. If the given cut range covers the complete 664 area, it is deleted. If it covers the beginning or the end, the area is 665 resized accordingly. If the range covers some part in the middle of the 666 area, it is split in two; in this case the second area is returned via 667 \a _secondArea (the variable is left untouched in the other cases). 668 The address space must be write locked. 669 The caller must ensure that no part of the given range is wired. 670 */ 671 static status_t 672 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 673 addr_t size, VMArea** _secondArea, bool kernel) 674 { 675 addr_t offset; 676 if (!intersect_area(area, address, size, offset)) 677 return B_OK; 678 679 // Is the area fully covered? 680 if (address == area->Base() && size == area->Size()) { 681 delete_area(addressSpace, area, false); 682 return B_OK; 683 } 684 685 int priority; 686 uint32 allocationFlags; 687 if (addressSpace == VMAddressSpace::Kernel()) { 688 priority = VM_PRIORITY_SYSTEM; 689 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 690 | HEAP_DONT_LOCK_KERNEL_SPACE; 691 } else { 692 priority = VM_PRIORITY_USER; 693 allocationFlags = 0; 694 } 695 696 VMCache* cache = vm_area_get_locked_cache(area); 697 VMCacheChainLocker cacheChainLocker(cache); 698 cacheChainLocker.LockAllSourceCaches(); 699 700 // If no one else uses the area's cache and it's an anonymous cache, we can 701 // resize or split it, too. 702 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 703 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 704 705 const addr_t oldSize = area->Size(); 706 707 // Cut the end only? 708 if (offset > 0 && size == area->Size() - offset) { 709 status_t error = addressSpace->ShrinkAreaTail(area, offset, 710 allocationFlags); 711 if (error != B_OK) 712 return error; 713 714 if (area->page_protections != NULL) { 715 uint8* newProtections = realloc_page_protections( 716 area->page_protections, area->Size(), allocationFlags); 717 718 if (newProtections == NULL) { 719 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 720 return B_NO_MEMORY; 721 } 722 723 area->page_protections = newProtections; 724 } 725 726 // unmap pages 727 unmap_pages(area, address, size); 728 729 if (onlyCacheUser) { 730 // Since VMCache::Resize() can temporarily drop the lock, we must 731 // unlock all lower caches to prevent locking order inversion. 732 cacheChainLocker.Unlock(cache); 733 cache->Resize(cache->virtual_base + offset, priority); 734 cache->ReleaseRefAndUnlock(); 735 } 736 737 return B_OK; 738 } 739 740 // Cut the beginning only? 741 if (area->Base() == address) { 742 uint8* newProtections = NULL; 743 if (area->page_protections != NULL) { 744 // Allocate all memory before shifting as the shift might lose some 745 // bits. 746 newProtections = realloc_page_protections(NULL, area->Size(), 747 allocationFlags); 748 749 if (newProtections == NULL) 750 return B_NO_MEMORY; 751 } 752 753 // resize the area 754 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 755 allocationFlags); 756 if (error != B_OK) { 757 if (newProtections != NULL) 758 free_etc(newProtections, allocationFlags); 759 return error; 760 } 761 762 if (area->page_protections != NULL) { 763 size_t oldBytes = area_page_protections_size(oldSize); 764 ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE; 765 bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4)); 766 767 size_t bytes = area_page_protections_size(area->Size()); 768 memcpy(newProtections, area->page_protections, bytes); 769 free_etc(area->page_protections, allocationFlags); 770 area->page_protections = newProtections; 771 } 772 773 // unmap pages 774 unmap_pages(area, address, size); 775 776 if (onlyCacheUser) { 777 // Since VMCache::Rebase() can temporarily drop the lock, we must 778 // unlock all lower caches to prevent locking order inversion. 779 cacheChainLocker.Unlock(cache); 780 cache->Rebase(cache->virtual_base + size, priority); 781 cache->ReleaseRefAndUnlock(); 782 } 783 area->cache_offset += size; 784 785 return B_OK; 786 } 787 788 // The tough part -- cut a piece out of the middle of the area. 789 // We do that by shrinking the area to the begin section and creating a 790 // new area for the end section. 791 addr_t firstNewSize = offset; 792 addr_t secondBase = address + size; 793 addr_t secondSize = area->Size() - offset - size; 794 795 // unmap pages 796 unmap_pages(area, address, area->Size() - firstNewSize); 797 798 // resize the area 799 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 800 allocationFlags); 801 if (error != B_OK) 802 return error; 803 804 uint8* areaNewProtections = NULL; 805 uint8* secondAreaNewProtections = NULL; 806 807 // Try to allocate the new memory before making some hard to reverse 808 // changes. 809 if (area->page_protections != NULL) { 810 areaNewProtections = realloc_page_protections(NULL, area->Size(), 811 allocationFlags); 812 secondAreaNewProtections = realloc_page_protections(NULL, secondSize, 813 allocationFlags); 814 815 if (areaNewProtections == NULL || secondAreaNewProtections == NULL) { 816 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 817 free_etc(areaNewProtections, allocationFlags); 818 free_etc(secondAreaNewProtections, allocationFlags); 819 return B_NO_MEMORY; 820 } 821 } 822 823 virtual_address_restrictions addressRestrictions = {}; 824 addressRestrictions.address = (void*)secondBase; 825 addressRestrictions.address_specification = B_EXACT_ADDRESS; 826 VMArea* secondArea; 827 828 if (onlyCacheUser) { 829 // Create a new cache for the second area. 830 VMCache* secondCache; 831 error = VMCacheFactory::CreateAnonymousCache(secondCache, 832 area->protection & B_OVERCOMMITTING_AREA, 0, 0, 833 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 834 if (error != B_OK) { 835 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 836 free_etc(areaNewProtections, allocationFlags); 837 free_etc(secondAreaNewProtections, allocationFlags); 838 return error; 839 } 840 841 secondCache->Lock(); 842 secondCache->temporary = cache->temporary; 843 secondCache->virtual_base = area->cache_offset; 844 secondCache->virtual_end = area->cache_offset + secondSize; 845 846 // Transfer the concerned pages from the first cache. 847 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 848 error = secondCache->Adopt(cache, adoptOffset, secondSize, 849 area->cache_offset); 850 851 if (error == B_OK) { 852 // Since VMCache::Resize() can temporarily drop the lock, we must 853 // unlock all lower caches to prevent locking order inversion. 854 cacheChainLocker.Unlock(cache); 855 cache->Resize(cache->virtual_base + firstNewSize, priority); 856 // Don't unlock the cache yet because we might have to resize it 857 // back. 858 859 // Map the second area. 860 error = map_backing_store(addressSpace, secondCache, 861 area->cache_offset, area->name, secondSize, area->wiring, 862 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 863 &addressRestrictions, kernel, &secondArea, NULL); 864 } 865 866 if (error != B_OK) { 867 // Restore the original cache. 868 cache->Resize(cache->virtual_base + oldSize, priority); 869 870 // Move the pages back. 871 status_t readoptStatus = cache->Adopt(secondCache, 872 area->cache_offset, secondSize, adoptOffset); 873 if (readoptStatus != B_OK) { 874 // Some (swap) pages have not been moved back and will be lost 875 // once the second cache is deleted. 876 panic("failed to restore cache range: %s", 877 strerror(readoptStatus)); 878 879 // TODO: Handle out of memory cases by freeing memory and 880 // retrying. 881 } 882 883 cache->ReleaseRefAndUnlock(); 884 secondCache->ReleaseRefAndUnlock(); 885 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 886 free_etc(areaNewProtections, allocationFlags); 887 free_etc(secondAreaNewProtections, allocationFlags); 888 return error; 889 } 890 891 // Now we can unlock it. 892 cache->ReleaseRefAndUnlock(); 893 secondCache->Unlock(); 894 } else { 895 error = map_backing_store(addressSpace, cache, area->cache_offset 896 + (secondBase - area->Base()), 897 area->name, secondSize, area->wiring, area->protection, 898 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 899 &addressRestrictions, kernel, &secondArea, NULL); 900 if (error != B_OK) { 901 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 902 free_etc(areaNewProtections, allocationFlags); 903 free_etc(secondAreaNewProtections, allocationFlags); 904 return error; 905 } 906 // We need a cache reference for the new area. 907 cache->AcquireRefLocked(); 908 } 909 910 if (area->page_protections != NULL) { 911 // Copy the protection bits of the first area. 912 size_t areaBytes = area_page_protections_size(area->Size()); 913 memcpy(areaNewProtections, area->page_protections, areaBytes); 914 uint8* areaOldProtections = area->page_protections; 915 area->page_protections = areaNewProtections; 916 917 // Shift the protection bits of the second area to the start of 918 // the old array. 919 size_t oldBytes = area_page_protections_size(oldSize); 920 addr_t secondAreaOffset = secondBase - area->Base(); 921 ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE; 922 bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4)); 923 924 // Copy the protection bits of the second area. 925 size_t secondAreaBytes = area_page_protections_size(secondSize); 926 memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes); 927 secondArea->page_protections = secondAreaNewProtections; 928 929 // We don't need this anymore. 930 free_etc(areaOldProtections, allocationFlags); 931 932 // Set the correct page protections for the second area. 933 VMTranslationMap* map = addressSpace->TranslationMap(); 934 map->Lock(); 935 for (VMCachePagesTree::Iterator it 936 = secondArea->cache->pages.GetIterator(); 937 vm_page* page = it.Next();) { 938 if (is_page_in_area(secondArea, page)) { 939 addr_t address = virtual_page_address(secondArea, page); 940 uint32 pageProtection 941 = get_area_page_protection(secondArea, address); 942 map->ProtectPage(secondArea, address, pageProtection); 943 } 944 } 945 map->Unlock(); 946 } 947 948 if (_secondArea != NULL) 949 *_secondArea = secondArea; 950 951 return B_OK; 952 } 953 954 955 /*! Deletes or cuts all areas in the given address range. 956 The address space must be write-locked. 957 The caller must ensure that no part of the given range is wired. 958 */ 959 static status_t 960 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 961 bool kernel) 962 { 963 size = PAGE_ALIGN(size); 964 965 // Check, whether the caller is allowed to modify the concerned areas. 966 if (!kernel) { 967 for (VMAddressSpace::AreaRangeIterator it 968 = addressSpace->GetAreaRangeIterator(address, size); 969 VMArea* area = it.Next();) { 970 971 if ((area->protection & B_KERNEL_AREA) != 0) { 972 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 973 "unmap range of kernel area %" B_PRId32 " (%s)\n", 974 team_get_current_team_id(), area->id, area->name); 975 return B_NOT_ALLOWED; 976 } 977 } 978 } 979 980 for (VMAddressSpace::AreaRangeIterator it 981 = addressSpace->GetAreaRangeIterator(address, size); 982 VMArea* area = it.Next();) { 983 984 status_t error = cut_area(addressSpace, area, address, size, NULL, 985 kernel); 986 if (error != B_OK) 987 return error; 988 // Failing after already messing with areas is ugly, but we 989 // can't do anything about it. 990 } 991 992 return B_OK; 993 } 994 995 996 static status_t 997 discard_area_range(VMArea* area, addr_t address, addr_t size) 998 { 999 addr_t offset; 1000 if (!intersect_area(area, address, size, offset)) 1001 return B_OK; 1002 1003 // If someone else uses the area's cache or it's not an anonymous cache, we 1004 // can't discard. 1005 VMCache* cache = vm_area_get_locked_cache(area); 1006 if (cache->areas != area || area->cache_next != NULL 1007 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 1008 return B_OK; 1009 } 1010 1011 VMCacheChainLocker cacheChainLocker(cache); 1012 cacheChainLocker.LockAllSourceCaches(); 1013 1014 unmap_pages(area, address, size); 1015 1016 // Since VMCache::Discard() can temporarily drop the lock, we must 1017 // unlock all lower caches to prevent locking order inversion. 1018 cacheChainLocker.Unlock(cache); 1019 cache->Discard(cache->virtual_base + offset, size); 1020 cache->ReleaseRefAndUnlock(); 1021 1022 return B_OK; 1023 } 1024 1025 1026 static status_t 1027 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 1028 bool kernel) 1029 { 1030 for (VMAddressSpace::AreaRangeIterator it 1031 = addressSpace->GetAreaRangeIterator(address, size); 1032 VMArea* area = it.Next();) { 1033 status_t error = discard_area_range(area, address, size); 1034 if (error != B_OK) 1035 return error; 1036 } 1037 1038 return B_OK; 1039 } 1040 1041 1042 /*! You need to hold the lock of the cache and the write lock of the address 1043 space when calling this function. 1044 Note, that in case of error your cache will be temporarily unlocked. 1045 If \a addressSpec is \c B_EXACT_ADDRESS and the 1046 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 1047 that no part of the specified address range (base \c *_virtualAddress, size 1048 \a size) is wired. The cache will also be temporarily unlocked. 1049 */ 1050 static status_t 1051 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 1052 const char* areaName, addr_t size, int wiring, int protection, 1053 int protectionMax, int mapping, 1054 uint32 flags, const virtual_address_restrictions* addressRestrictions, 1055 bool kernel, VMArea** _area, void** _virtualAddress) 1056 { 1057 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 1058 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 1059 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 1060 addressSpace, cache, addressRestrictions->address, offset, size, 1061 addressRestrictions->address_specification, wiring, protection, 1062 protectionMax, _area, areaName)); 1063 cache->AssertLocked(); 1064 1065 if (size == 0) { 1066 #if KDEBUG 1067 panic("map_backing_store(): called with size=0 for area '%s'!", 1068 areaName); 1069 #endif 1070 return B_BAD_VALUE; 1071 } 1072 if (offset < 0) 1073 return B_BAD_VALUE; 1074 1075 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 1076 | HEAP_DONT_LOCK_KERNEL_SPACE; 1077 int priority; 1078 if (addressSpace != VMAddressSpace::Kernel()) { 1079 priority = VM_PRIORITY_USER; 1080 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 1081 priority = VM_PRIORITY_VIP; 1082 allocationFlags |= HEAP_PRIORITY_VIP; 1083 } else 1084 priority = VM_PRIORITY_SYSTEM; 1085 1086 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 1087 allocationFlags); 1088 if (mapping != REGION_PRIVATE_MAP) 1089 area->protection_max = protectionMax & B_USER_PROTECTION; 1090 if (area == NULL) 1091 return B_NO_MEMORY; 1092 1093 status_t status; 1094 1095 // if this is a private map, we need to create a new cache 1096 // to handle the private copies of pages as they are written to 1097 VMCache* sourceCache = cache; 1098 if (mapping == REGION_PRIVATE_MAP) { 1099 VMCache* newCache; 1100 1101 // create an anonymous cache 1102 status = VMCacheFactory::CreateAnonymousCache(newCache, 1103 (protection & B_STACK_AREA) != 0 1104 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 1105 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 1106 if (status != B_OK) 1107 goto err1; 1108 1109 newCache->Lock(); 1110 newCache->temporary = 1; 1111 newCache->virtual_base = offset; 1112 newCache->virtual_end = offset + size; 1113 1114 cache->AddConsumer(newCache); 1115 1116 cache = newCache; 1117 } 1118 1119 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 1120 status = cache->SetMinimalCommitment(size, priority); 1121 if (status != B_OK) 1122 goto err2; 1123 } 1124 1125 // check to see if this address space has entered DELETE state 1126 if (addressSpace->IsBeingDeleted()) { 1127 // okay, someone is trying to delete this address space now, so we can't 1128 // insert the area, so back out 1129 status = B_BAD_TEAM_ID; 1130 goto err2; 1131 } 1132 1133 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1134 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1135 // temporarily unlock the current cache since it might be mapped to 1136 // some existing area, and unmap_address_range also needs to lock that 1137 // cache to delete the area. 1138 cache->Unlock(); 1139 status = unmap_address_range(addressSpace, 1140 (addr_t)addressRestrictions->address, size, kernel); 1141 cache->Lock(); 1142 if (status != B_OK) 1143 goto err2; 1144 } 1145 1146 status = addressSpace->InsertArea(area, size, addressRestrictions, 1147 allocationFlags, _virtualAddress); 1148 if (status == B_NO_MEMORY 1149 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1150 // Due to how many locks are held, we cannot wait here for space to be 1151 // freed up, but we can at least notify the low_resource handler. 1152 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1153 } 1154 if (status != B_OK) 1155 goto err2; 1156 1157 // attach the cache to the area 1158 area->cache = cache; 1159 area->cache_offset = offset; 1160 1161 // point the cache back to the area 1162 cache->InsertAreaLocked(area); 1163 if (mapping == REGION_PRIVATE_MAP) 1164 cache->Unlock(); 1165 1166 // insert the area in the global areas map 1167 VMAreas::Insert(area); 1168 1169 // grab a ref to the address space (the area holds this) 1170 addressSpace->Get(); 1171 1172 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1173 // cache, sourceCache, areaName, area); 1174 1175 *_area = area; 1176 return B_OK; 1177 1178 err2: 1179 if (mapping == REGION_PRIVATE_MAP) { 1180 // We created this cache, so we must delete it again. Note, that we 1181 // need to temporarily unlock the source cache or we'll otherwise 1182 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1183 sourceCache->Unlock(); 1184 cache->ReleaseRefAndUnlock(); 1185 sourceCache->Lock(); 1186 } 1187 err1: 1188 addressSpace->DeleteArea(area, allocationFlags); 1189 return status; 1190 } 1191 1192 1193 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1194 locker1, locker2). 1195 */ 1196 template<typename LockerType1, typename LockerType2> 1197 static inline bool 1198 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1199 { 1200 area->cache->AssertLocked(); 1201 1202 VMAreaUnwiredWaiter waiter; 1203 if (!area->AddWaiterIfWired(&waiter)) 1204 return false; 1205 1206 // unlock everything and wait 1207 if (locker1 != NULL) 1208 locker1->Unlock(); 1209 if (locker2 != NULL) 1210 locker2->Unlock(); 1211 1212 waiter.waitEntry.Wait(); 1213 1214 return true; 1215 } 1216 1217 1218 /*! Checks whether the given area has any wired ranges intersecting with the 1219 specified range and waits, if so. 1220 1221 When it has to wait, the function calls \c Unlock() on both \a locker1 1222 and \a locker2, if given. 1223 The area's top cache must be locked and must be unlocked as a side effect 1224 of calling \c Unlock() on either \a locker1 or \a locker2. 1225 1226 If the function does not have to wait it does not modify or unlock any 1227 object. 1228 1229 \param area The area to be checked. 1230 \param base The base address of the range to check. 1231 \param size The size of the address range to check. 1232 \param locker1 An object to be unlocked when before starting to wait (may 1233 be \c NULL). 1234 \param locker2 An object to be unlocked when before starting to wait (may 1235 be \c NULL). 1236 \return \c true, if the function had to wait, \c false otherwise. 1237 */ 1238 template<typename LockerType1, typename LockerType2> 1239 static inline bool 1240 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1241 LockerType1* locker1, LockerType2* locker2) 1242 { 1243 area->cache->AssertLocked(); 1244 1245 VMAreaUnwiredWaiter waiter; 1246 if (!area->AddWaiterIfWired(&waiter, base, size)) 1247 return false; 1248 1249 // unlock everything and wait 1250 if (locker1 != NULL) 1251 locker1->Unlock(); 1252 if (locker2 != NULL) 1253 locker2->Unlock(); 1254 1255 waiter.waitEntry.Wait(); 1256 1257 return true; 1258 } 1259 1260 1261 /*! Checks whether the given address space has any wired ranges intersecting 1262 with the specified range and waits, if so. 1263 1264 Similar to wait_if_area_range_is_wired(), with the following differences: 1265 - All areas intersecting with the range are checked (respectively all until 1266 one is found that contains a wired range intersecting with the given 1267 range). 1268 - The given address space must at least be read-locked and must be unlocked 1269 when \c Unlock() is called on \a locker. 1270 - None of the areas' caches are allowed to be locked. 1271 */ 1272 template<typename LockerType> 1273 static inline bool 1274 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1275 size_t size, LockerType* locker) 1276 { 1277 for (VMAddressSpace::AreaRangeIterator it 1278 = addressSpace->GetAreaRangeIterator(base, size); 1279 VMArea* area = it.Next();) { 1280 1281 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1282 1283 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1284 return true; 1285 } 1286 1287 return false; 1288 } 1289 1290 1291 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1292 It must be called in a situation where the kernel address space may be 1293 locked. 1294 */ 1295 status_t 1296 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1297 { 1298 AddressSpaceReadLocker locker; 1299 VMArea* area; 1300 status_t status = locker.SetFromArea(id, area); 1301 if (status != B_OK) 1302 return status; 1303 1304 if (area->page_protections == NULL) { 1305 status = allocate_area_page_protections(area); 1306 if (status != B_OK) 1307 return status; 1308 } 1309 1310 *cookie = (void*)area; 1311 return B_OK; 1312 } 1313 1314 1315 /*! This is a debug helper function that can only be used with very specific 1316 use cases. 1317 Sets protection for the given address range to the protection specified. 1318 If \a protection is 0 then the involved pages will be marked non-present 1319 in the translation map to cause a fault on access. The pages aren't 1320 actually unmapped however so that they can be marked present again with 1321 additional calls to this function. For this to work the area must be 1322 fully locked in memory so that the pages aren't otherwise touched. 1323 This function does not lock the kernel address space and needs to be 1324 supplied with a \a cookie retrieved from a successful call to 1325 vm_prepare_kernel_area_debug_protection(). 1326 */ 1327 status_t 1328 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1329 uint32 protection) 1330 { 1331 // check address range 1332 addr_t address = (addr_t)_address; 1333 size = PAGE_ALIGN(size); 1334 1335 if ((address % B_PAGE_SIZE) != 0 1336 || (addr_t)address + size < (addr_t)address 1337 || !IS_KERNEL_ADDRESS(address) 1338 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1339 return B_BAD_VALUE; 1340 } 1341 1342 // Translate the kernel protection to user protection as we only store that. 1343 if ((protection & B_KERNEL_READ_AREA) != 0) 1344 protection |= B_READ_AREA; 1345 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1346 protection |= B_WRITE_AREA; 1347 1348 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1349 VMTranslationMap* map = addressSpace->TranslationMap(); 1350 VMArea* area = (VMArea*)cookie; 1351 1352 addr_t offset = address - area->Base(); 1353 if (area->Size() - offset < size) { 1354 panic("protect range not fully within supplied area"); 1355 return B_BAD_VALUE; 1356 } 1357 1358 if (area->page_protections == NULL) { 1359 panic("area has no page protections"); 1360 return B_BAD_VALUE; 1361 } 1362 1363 // Invalidate the mapping entries so any access to them will fault or 1364 // restore the mapping entries unchanged so that lookup will success again. 1365 map->Lock(); 1366 map->DebugMarkRangePresent(address, address + size, protection != 0); 1367 map->Unlock(); 1368 1369 // And set the proper page protections so that the fault case will actually 1370 // fail and not simply try to map a new page. 1371 for (addr_t pageAddress = address; pageAddress < address + size; 1372 pageAddress += B_PAGE_SIZE) { 1373 set_area_page_protection(area, pageAddress, protection); 1374 } 1375 1376 return B_OK; 1377 } 1378 1379 1380 status_t 1381 vm_block_address_range(const char* name, void* address, addr_t size) 1382 { 1383 if (!arch_vm_supports_protection(0)) 1384 return B_NOT_SUPPORTED; 1385 1386 AddressSpaceWriteLocker locker; 1387 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1388 if (status != B_OK) 1389 return status; 1390 1391 VMAddressSpace* addressSpace = locker.AddressSpace(); 1392 1393 // create an anonymous cache 1394 VMCache* cache; 1395 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1396 VM_PRIORITY_SYSTEM); 1397 if (status != B_OK) 1398 return status; 1399 1400 cache->temporary = 1; 1401 cache->virtual_end = size; 1402 cache->Lock(); 1403 1404 VMArea* area; 1405 virtual_address_restrictions addressRestrictions = {}; 1406 addressRestrictions.address = address; 1407 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1408 status = map_backing_store(addressSpace, cache, 0, name, size, 1409 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1410 true, &area, NULL); 1411 if (status != B_OK) { 1412 cache->ReleaseRefAndUnlock(); 1413 return status; 1414 } 1415 1416 cache->Unlock(); 1417 area->cache_type = CACHE_TYPE_RAM; 1418 return area->id; 1419 } 1420 1421 1422 status_t 1423 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1424 { 1425 AddressSpaceWriteLocker locker(team); 1426 if (!locker.IsLocked()) 1427 return B_BAD_TEAM_ID; 1428 1429 VMAddressSpace* addressSpace = locker.AddressSpace(); 1430 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1431 addressSpace == VMAddressSpace::Kernel() 1432 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1433 } 1434 1435 1436 status_t 1437 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1438 addr_t size, uint32 flags) 1439 { 1440 if (size == 0) 1441 return B_BAD_VALUE; 1442 1443 AddressSpaceWriteLocker locker(team); 1444 if (!locker.IsLocked()) 1445 return B_BAD_TEAM_ID; 1446 1447 virtual_address_restrictions addressRestrictions = {}; 1448 addressRestrictions.address = *_address; 1449 addressRestrictions.address_specification = addressSpec; 1450 VMAddressSpace* addressSpace = locker.AddressSpace(); 1451 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1452 addressSpace == VMAddressSpace::Kernel() 1453 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1454 _address); 1455 } 1456 1457 1458 area_id 1459 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1460 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1461 const virtual_address_restrictions* virtualAddressRestrictions, 1462 const physical_address_restrictions* physicalAddressRestrictions, 1463 bool kernel, void** _address) 1464 { 1465 VMArea* area; 1466 VMCache* cache; 1467 vm_page* page = NULL; 1468 bool isStack = (protection & B_STACK_AREA) != 0; 1469 page_num_t guardPages; 1470 bool canOvercommit = false; 1471 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1472 ? VM_PAGE_ALLOC_CLEAR : 0; 1473 1474 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1475 team, name, size)); 1476 1477 size = PAGE_ALIGN(size); 1478 guardSize = PAGE_ALIGN(guardSize); 1479 guardPages = guardSize / B_PAGE_SIZE; 1480 1481 if (size == 0 || size < guardSize) 1482 return B_BAD_VALUE; 1483 if (!arch_vm_supports_protection(protection)) 1484 return B_NOT_SUPPORTED; 1485 1486 if (team == B_CURRENT_TEAM) 1487 team = VMAddressSpace::CurrentID(); 1488 if (team < 0) 1489 return B_BAD_TEAM_ID; 1490 1491 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1492 canOvercommit = true; 1493 1494 #ifdef DEBUG_KERNEL_STACKS 1495 if ((protection & B_KERNEL_STACK_AREA) != 0) 1496 isStack = true; 1497 #endif 1498 1499 // check parameters 1500 switch (virtualAddressRestrictions->address_specification) { 1501 case B_ANY_ADDRESS: 1502 case B_EXACT_ADDRESS: 1503 case B_BASE_ADDRESS: 1504 case B_ANY_KERNEL_ADDRESS: 1505 case B_ANY_KERNEL_BLOCK_ADDRESS: 1506 case B_RANDOMIZED_ANY_ADDRESS: 1507 case B_RANDOMIZED_BASE_ADDRESS: 1508 break; 1509 1510 default: 1511 return B_BAD_VALUE; 1512 } 1513 1514 // If low or high physical address restrictions are given, we force 1515 // B_CONTIGUOUS wiring, since only then we'll use 1516 // vm_page_allocate_page_run() which deals with those restrictions. 1517 if (physicalAddressRestrictions->low_address != 0 1518 || physicalAddressRestrictions->high_address != 0) { 1519 wiring = B_CONTIGUOUS; 1520 } 1521 1522 physical_address_restrictions stackPhysicalRestrictions; 1523 bool doReserveMemory = false; 1524 switch (wiring) { 1525 case B_NO_LOCK: 1526 break; 1527 case B_FULL_LOCK: 1528 case B_LAZY_LOCK: 1529 case B_CONTIGUOUS: 1530 doReserveMemory = true; 1531 break; 1532 case B_ALREADY_WIRED: 1533 break; 1534 case B_LOMEM: 1535 stackPhysicalRestrictions = *physicalAddressRestrictions; 1536 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1537 physicalAddressRestrictions = &stackPhysicalRestrictions; 1538 wiring = B_CONTIGUOUS; 1539 doReserveMemory = true; 1540 break; 1541 case B_32_BIT_FULL_LOCK: 1542 if (B_HAIKU_PHYSICAL_BITS <= 32 1543 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1544 wiring = B_FULL_LOCK; 1545 doReserveMemory = true; 1546 break; 1547 } 1548 // TODO: We don't really support this mode efficiently. Just fall 1549 // through for now ... 1550 case B_32_BIT_CONTIGUOUS: 1551 #if B_HAIKU_PHYSICAL_BITS > 32 1552 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1553 stackPhysicalRestrictions = *physicalAddressRestrictions; 1554 stackPhysicalRestrictions.high_address 1555 = (phys_addr_t)1 << 32; 1556 physicalAddressRestrictions = &stackPhysicalRestrictions; 1557 } 1558 #endif 1559 wiring = B_CONTIGUOUS; 1560 doReserveMemory = true; 1561 break; 1562 default: 1563 return B_BAD_VALUE; 1564 } 1565 1566 // Optimization: For a single-page contiguous allocation without low/high 1567 // memory restriction B_FULL_LOCK wiring suffices. 1568 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1569 && physicalAddressRestrictions->low_address == 0 1570 && physicalAddressRestrictions->high_address == 0) { 1571 wiring = B_FULL_LOCK; 1572 } 1573 1574 // For full lock or contiguous areas we're also going to map the pages and 1575 // thus need to reserve pages for the mapping backend upfront. 1576 addr_t reservedMapPages = 0; 1577 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1578 AddressSpaceWriteLocker locker; 1579 status_t status = locker.SetTo(team); 1580 if (status != B_OK) 1581 return status; 1582 1583 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1584 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1585 } 1586 1587 int priority; 1588 if (team != VMAddressSpace::KernelID()) 1589 priority = VM_PRIORITY_USER; 1590 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1591 priority = VM_PRIORITY_VIP; 1592 else 1593 priority = VM_PRIORITY_SYSTEM; 1594 1595 // Reserve memory before acquiring the address space lock. This reduces the 1596 // chances of failure, since while holding the write lock to the address 1597 // space (if it is the kernel address space that is), the low memory handler 1598 // won't be able to free anything for us. 1599 addr_t reservedMemory = 0; 1600 if (doReserveMemory) { 1601 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1602 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1603 return B_NO_MEMORY; 1604 reservedMemory = size; 1605 // TODO: We don't reserve the memory for the pages for the page 1606 // directories/tables. We actually need to do since we currently don't 1607 // reclaim them (and probably can't reclaim all of them anyway). Thus 1608 // there are actually less physical pages than there should be, which 1609 // can get the VM into trouble in low memory situations. 1610 } 1611 1612 AddressSpaceWriteLocker locker; 1613 VMAddressSpace* addressSpace; 1614 status_t status; 1615 1616 // For full lock areas reserve the pages before locking the address 1617 // space. E.g. block caches can't release their memory while we hold the 1618 // address space lock. 1619 page_num_t reservedPages = reservedMapPages; 1620 if (wiring == B_FULL_LOCK) 1621 reservedPages += size / B_PAGE_SIZE; 1622 1623 vm_page_reservation reservation; 1624 if (reservedPages > 0) { 1625 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1626 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1627 priority)) { 1628 reservedPages = 0; 1629 status = B_WOULD_BLOCK; 1630 goto err0; 1631 } 1632 } else 1633 vm_page_reserve_pages(&reservation, reservedPages, priority); 1634 } 1635 1636 if (wiring == B_CONTIGUOUS) { 1637 // we try to allocate the page run here upfront as this may easily 1638 // fail for obvious reasons 1639 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1640 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1641 if (page == NULL) { 1642 status = B_NO_MEMORY; 1643 goto err0; 1644 } 1645 } 1646 1647 // Lock the address space and, if B_EXACT_ADDRESS and 1648 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1649 // is not wired. 1650 do { 1651 status = locker.SetTo(team); 1652 if (status != B_OK) 1653 goto err1; 1654 1655 addressSpace = locker.AddressSpace(); 1656 } while (virtualAddressRestrictions->address_specification 1657 == B_EXACT_ADDRESS 1658 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1659 && wait_if_address_range_is_wired(addressSpace, 1660 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1661 1662 // create an anonymous cache 1663 // if it's a stack, make sure that two pages are available at least 1664 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1665 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1666 wiring == B_NO_LOCK, priority); 1667 if (status != B_OK) 1668 goto err1; 1669 1670 cache->temporary = 1; 1671 cache->virtual_end = size; 1672 cache->committed_size = reservedMemory; 1673 // TODO: This should be done via a method. 1674 reservedMemory = 0; 1675 1676 cache->Lock(); 1677 1678 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1679 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1680 virtualAddressRestrictions, kernel, &area, _address); 1681 1682 if (status != B_OK) { 1683 cache->ReleaseRefAndUnlock(); 1684 goto err1; 1685 } 1686 1687 locker.DegradeToReadLock(); 1688 1689 switch (wiring) { 1690 case B_NO_LOCK: 1691 case B_LAZY_LOCK: 1692 // do nothing - the pages are mapped in as needed 1693 break; 1694 1695 case B_FULL_LOCK: 1696 { 1697 // Allocate and map all pages for this area 1698 1699 off_t offset = 0; 1700 for (addr_t address = area->Base(); 1701 address < area->Base() + (area->Size() - 1); 1702 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1703 #ifdef DEBUG_KERNEL_STACKS 1704 # ifdef STACK_GROWS_DOWNWARDS 1705 if (isStack && address < area->Base() 1706 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1707 # else 1708 if (isStack && address >= area->Base() + area->Size() 1709 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1710 # endif 1711 continue; 1712 #endif 1713 vm_page* page = vm_page_allocate_page(&reservation, 1714 PAGE_STATE_WIRED | pageAllocFlags); 1715 cache->InsertPage(page, offset); 1716 map_page(area, page, address, protection, &reservation); 1717 1718 DEBUG_PAGE_ACCESS_END(page); 1719 } 1720 1721 break; 1722 } 1723 1724 case B_ALREADY_WIRED: 1725 { 1726 // The pages should already be mapped. This is only really useful 1727 // during boot time. Find the appropriate vm_page objects and stick 1728 // them in the cache object. 1729 VMTranslationMap* map = addressSpace->TranslationMap(); 1730 off_t offset = 0; 1731 1732 if (!gKernelStartup) 1733 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1734 1735 map->Lock(); 1736 1737 for (addr_t virtualAddress = area->Base(); 1738 virtualAddress < area->Base() + (area->Size() - 1); 1739 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1740 phys_addr_t physicalAddress; 1741 uint32 flags; 1742 status = map->Query(virtualAddress, &physicalAddress, &flags); 1743 if (status < B_OK) { 1744 panic("looking up mapping failed for va 0x%lx\n", 1745 virtualAddress); 1746 } 1747 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1748 if (page == NULL) { 1749 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1750 "\n", physicalAddress); 1751 } 1752 1753 DEBUG_PAGE_ACCESS_START(page); 1754 1755 cache->InsertPage(page, offset); 1756 increment_page_wired_count(page); 1757 vm_page_set_state(page, PAGE_STATE_WIRED); 1758 page->busy = false; 1759 1760 DEBUG_PAGE_ACCESS_END(page); 1761 } 1762 1763 map->Unlock(); 1764 break; 1765 } 1766 1767 case B_CONTIGUOUS: 1768 { 1769 // We have already allocated our continuous pages run, so we can now 1770 // just map them in the address space 1771 VMTranslationMap* map = addressSpace->TranslationMap(); 1772 phys_addr_t physicalAddress 1773 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1774 addr_t virtualAddress = area->Base(); 1775 off_t offset = 0; 1776 1777 map->Lock(); 1778 1779 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1780 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1781 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1782 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1783 if (page == NULL) 1784 panic("couldn't lookup physical page just allocated\n"); 1785 1786 status = map->Map(virtualAddress, physicalAddress, protection, 1787 area->MemoryType(), &reservation); 1788 if (status < B_OK) 1789 panic("couldn't map physical page in page run\n"); 1790 1791 cache->InsertPage(page, offset); 1792 increment_page_wired_count(page); 1793 1794 DEBUG_PAGE_ACCESS_END(page); 1795 } 1796 1797 map->Unlock(); 1798 break; 1799 } 1800 1801 default: 1802 break; 1803 } 1804 1805 cache->Unlock(); 1806 1807 if (reservedPages > 0) 1808 vm_page_unreserve_pages(&reservation); 1809 1810 TRACE(("vm_create_anonymous_area: done\n")); 1811 1812 area->cache_type = CACHE_TYPE_RAM; 1813 return area->id; 1814 1815 err1: 1816 if (wiring == B_CONTIGUOUS) { 1817 // we had reserved the area space upfront... 1818 phys_addr_t pageNumber = page->physical_page_number; 1819 int32 i; 1820 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1821 page = vm_lookup_page(pageNumber); 1822 if (page == NULL) 1823 panic("couldn't lookup physical page just allocated\n"); 1824 1825 vm_page_set_state(page, PAGE_STATE_FREE); 1826 } 1827 } 1828 1829 err0: 1830 if (reservedPages > 0) 1831 vm_page_unreserve_pages(&reservation); 1832 if (reservedMemory > 0) 1833 vm_unreserve_memory(reservedMemory); 1834 1835 return status; 1836 } 1837 1838 1839 area_id 1840 vm_map_physical_memory(team_id team, const char* name, void** _address, 1841 uint32 addressSpec, addr_t size, uint32 protection, 1842 phys_addr_t physicalAddress, bool alreadyWired) 1843 { 1844 VMArea* area; 1845 VMCache* cache; 1846 addr_t mapOffset; 1847 1848 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1849 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1850 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1851 addressSpec, size, protection, physicalAddress)); 1852 1853 if (!arch_vm_supports_protection(protection)) 1854 return B_NOT_SUPPORTED; 1855 1856 AddressSpaceWriteLocker locker(team); 1857 if (!locker.IsLocked()) 1858 return B_BAD_TEAM_ID; 1859 1860 // if the physical address is somewhat inside a page, 1861 // move the actual area down to align on a page boundary 1862 mapOffset = physicalAddress % B_PAGE_SIZE; 1863 size += mapOffset; 1864 physicalAddress -= mapOffset; 1865 1866 size = PAGE_ALIGN(size); 1867 1868 // create a device cache 1869 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1870 if (status != B_OK) 1871 return status; 1872 1873 cache->virtual_end = size; 1874 1875 cache->Lock(); 1876 1877 virtual_address_restrictions addressRestrictions = {}; 1878 addressRestrictions.address = *_address; 1879 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1880 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1881 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1882 true, &area, _address); 1883 1884 if (status < B_OK) 1885 cache->ReleaseRefLocked(); 1886 1887 cache->Unlock(); 1888 1889 if (status == B_OK) { 1890 // set requested memory type -- use uncached, if not given 1891 uint32 memoryType = addressSpec & B_MTR_MASK; 1892 if (memoryType == 0) 1893 memoryType = B_MTR_UC; 1894 1895 area->SetMemoryType(memoryType); 1896 1897 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1898 if (status != B_OK) 1899 delete_area(locker.AddressSpace(), area, false); 1900 } 1901 1902 if (status != B_OK) 1903 return status; 1904 1905 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1906 1907 if (alreadyWired) { 1908 // The area is already mapped, but possibly not with the right 1909 // memory type. 1910 map->Lock(); 1911 map->ProtectArea(area, area->protection); 1912 map->Unlock(); 1913 } else { 1914 // Map the area completely. 1915 1916 // reserve pages needed for the mapping 1917 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1918 area->Base() + (size - 1)); 1919 vm_page_reservation reservation; 1920 vm_page_reserve_pages(&reservation, reservePages, 1921 team == VMAddressSpace::KernelID() 1922 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1923 1924 map->Lock(); 1925 1926 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1927 map->Map(area->Base() + offset, physicalAddress + offset, 1928 protection, area->MemoryType(), &reservation); 1929 } 1930 1931 map->Unlock(); 1932 1933 vm_page_unreserve_pages(&reservation); 1934 } 1935 1936 // modify the pointer returned to be offset back into the new area 1937 // the same way the physical address in was offset 1938 *_address = (void*)((addr_t)*_address + mapOffset); 1939 1940 area->cache_type = CACHE_TYPE_DEVICE; 1941 return area->id; 1942 } 1943 1944 1945 /*! Don't use! 1946 TODO: This function was introduced to map physical page vecs to 1947 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1948 use a device cache and does not track vm_page::wired_count! 1949 */ 1950 area_id 1951 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1952 uint32 addressSpec, addr_t* _size, uint32 protection, 1953 struct generic_io_vec* vecs, uint32 vecCount) 1954 { 1955 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1956 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1957 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1958 addressSpec, _size, protection, vecs, vecCount)); 1959 1960 if (!arch_vm_supports_protection(protection) 1961 || (addressSpec & B_MTR_MASK) != 0) { 1962 return B_NOT_SUPPORTED; 1963 } 1964 1965 AddressSpaceWriteLocker locker(team); 1966 if (!locker.IsLocked()) 1967 return B_BAD_TEAM_ID; 1968 1969 if (vecCount == 0) 1970 return B_BAD_VALUE; 1971 1972 addr_t size = 0; 1973 for (uint32 i = 0; i < vecCount; i++) { 1974 if (vecs[i].base % B_PAGE_SIZE != 0 1975 || vecs[i].length % B_PAGE_SIZE != 0) { 1976 return B_BAD_VALUE; 1977 } 1978 1979 size += vecs[i].length; 1980 } 1981 1982 // create a device cache 1983 VMCache* cache; 1984 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1985 if (result != B_OK) 1986 return result; 1987 1988 cache->virtual_end = size; 1989 1990 cache->Lock(); 1991 1992 VMArea* area; 1993 virtual_address_restrictions addressRestrictions = {}; 1994 addressRestrictions.address = *_address; 1995 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1996 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1997 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1998 &addressRestrictions, true, &area, _address); 1999 2000 if (result != B_OK) 2001 cache->ReleaseRefLocked(); 2002 2003 cache->Unlock(); 2004 2005 if (result != B_OK) 2006 return result; 2007 2008 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2009 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 2010 area->Base() + (size - 1)); 2011 2012 vm_page_reservation reservation; 2013 vm_page_reserve_pages(&reservation, reservePages, 2014 team == VMAddressSpace::KernelID() 2015 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2016 map->Lock(); 2017 2018 uint32 vecIndex = 0; 2019 size_t vecOffset = 0; 2020 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2021 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 2022 vecOffset = 0; 2023 vecIndex++; 2024 } 2025 2026 if (vecIndex >= vecCount) 2027 break; 2028 2029 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 2030 protection, area->MemoryType(), &reservation); 2031 2032 vecOffset += B_PAGE_SIZE; 2033 } 2034 2035 map->Unlock(); 2036 vm_page_unreserve_pages(&reservation); 2037 2038 if (_size != NULL) 2039 *_size = size; 2040 2041 area->cache_type = CACHE_TYPE_DEVICE; 2042 return area->id; 2043 } 2044 2045 2046 area_id 2047 vm_create_null_area(team_id team, const char* name, void** address, 2048 uint32 addressSpec, addr_t size, uint32 flags) 2049 { 2050 size = PAGE_ALIGN(size); 2051 2052 // Lock the address space and, if B_EXACT_ADDRESS and 2053 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 2054 // is not wired. 2055 AddressSpaceWriteLocker locker; 2056 do { 2057 if (locker.SetTo(team) != B_OK) 2058 return B_BAD_TEAM_ID; 2059 } while (addressSpec == B_EXACT_ADDRESS 2060 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 2061 && wait_if_address_range_is_wired(locker.AddressSpace(), 2062 (addr_t)*address, size, &locker)); 2063 2064 // create a null cache 2065 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 2066 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 2067 VMCache* cache; 2068 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 2069 if (status != B_OK) 2070 return status; 2071 2072 cache->temporary = 1; 2073 cache->virtual_end = size; 2074 2075 cache->Lock(); 2076 2077 VMArea* area; 2078 virtual_address_restrictions addressRestrictions = {}; 2079 addressRestrictions.address = *address; 2080 addressRestrictions.address_specification = addressSpec; 2081 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 2082 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 2083 REGION_NO_PRIVATE_MAP, flags, 2084 &addressRestrictions, true, &area, address); 2085 2086 if (status < B_OK) { 2087 cache->ReleaseRefAndUnlock(); 2088 return status; 2089 } 2090 2091 cache->Unlock(); 2092 2093 area->cache_type = CACHE_TYPE_NULL; 2094 return area->id; 2095 } 2096 2097 2098 /*! Creates the vnode cache for the specified \a vnode. 2099 The vnode has to be marked busy when calling this function. 2100 */ 2101 status_t 2102 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2103 { 2104 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2105 } 2106 2107 2108 /*! \a cache must be locked. The area's address space must be read-locked. 2109 */ 2110 static void 2111 pre_map_area_pages(VMArea* area, VMCache* cache, 2112 vm_page_reservation* reservation) 2113 { 2114 addr_t baseAddress = area->Base(); 2115 addr_t cacheOffset = area->cache_offset; 2116 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2117 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 2118 2119 for (VMCachePagesTree::Iterator it 2120 = cache->pages.GetIterator(firstPage, true, true); 2121 vm_page* page = it.Next();) { 2122 if (page->cache_offset >= endPage) 2123 break; 2124 2125 // skip busy and inactive pages 2126 if (page->busy || page->usage_count == 0) 2127 continue; 2128 2129 DEBUG_PAGE_ACCESS_START(page); 2130 map_page(area, page, 2131 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2132 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2133 DEBUG_PAGE_ACCESS_END(page); 2134 } 2135 } 2136 2137 2138 /*! Will map the file specified by \a fd to an area in memory. 2139 The file will be mirrored beginning at the specified \a offset. The 2140 \a offset and \a size arguments have to be page aligned. 2141 */ 2142 static area_id 2143 _vm_map_file(team_id team, const char* name, void** _address, 2144 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2145 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2146 { 2147 // TODO: for binary files, we want to make sure that they get the 2148 // copy of a file at a given time, ie. later changes should not 2149 // make it into the mapped copy -- this will need quite some changes 2150 // to be done in a nice way 2151 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2152 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2153 2154 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2155 size = PAGE_ALIGN(size); 2156 2157 if (mapping == REGION_NO_PRIVATE_MAP) 2158 protection |= B_SHARED_AREA; 2159 if (addressSpec != B_EXACT_ADDRESS) 2160 unmapAddressRange = false; 2161 2162 uint32 mappingFlags = 0; 2163 if (unmapAddressRange) 2164 mappingFlags |= CREATE_AREA_UNMAP_ADDRESS_RANGE; 2165 2166 if (fd < 0) { 2167 virtual_address_restrictions virtualRestrictions = {}; 2168 virtualRestrictions.address = *_address; 2169 virtualRestrictions.address_specification = addressSpec; 2170 physical_address_restrictions physicalRestrictions = {}; 2171 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2172 mappingFlags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2173 _address); 2174 } 2175 2176 // get the open flags of the FD 2177 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2178 if (descriptor == NULL) 2179 return EBADF; 2180 int32 openMode = descriptor->open_mode; 2181 put_fd(descriptor); 2182 2183 // The FD must open for reading at any rate. For shared mapping with write 2184 // access, additionally the FD must be open for writing. 2185 if ((openMode & O_ACCMODE) == O_WRONLY 2186 || (mapping == REGION_NO_PRIVATE_MAP 2187 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2188 && (openMode & O_ACCMODE) == O_RDONLY)) { 2189 return EACCES; 2190 } 2191 2192 uint32 protectionMax = 0; 2193 if (mapping == REGION_NO_PRIVATE_MAP) { 2194 if ((openMode & O_ACCMODE) == O_RDWR) 2195 protectionMax = protection | B_USER_PROTECTION; 2196 else 2197 protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA); 2198 } else if (mapping == REGION_PRIVATE_MAP) { 2199 // For privately mapped read-only regions, skip committing memory. 2200 // (If protections are changed later on, memory will be committed then.) 2201 if ((protection & B_WRITE_AREA) == 0) 2202 mappingFlags |= CREATE_AREA_DONT_COMMIT_MEMORY; 2203 } 2204 2205 // get the vnode for the object, this also grabs a ref to it 2206 struct vnode* vnode = NULL; 2207 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2208 if (status < B_OK) 2209 return status; 2210 VnodePutter vnodePutter(vnode); 2211 2212 // If we're going to pre-map pages, we need to reserve the pages needed by 2213 // the mapping backend upfront. 2214 page_num_t reservedPreMapPages = 0; 2215 vm_page_reservation reservation; 2216 if ((protection & B_READ_AREA) != 0) { 2217 AddressSpaceWriteLocker locker; 2218 status = locker.SetTo(team); 2219 if (status != B_OK) 2220 return status; 2221 2222 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2223 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2224 2225 locker.Unlock(); 2226 2227 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2228 team == VMAddressSpace::KernelID() 2229 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2230 } 2231 2232 struct PageUnreserver { 2233 PageUnreserver(vm_page_reservation* reservation) 2234 : 2235 fReservation(reservation) 2236 { 2237 } 2238 2239 ~PageUnreserver() 2240 { 2241 if (fReservation != NULL) 2242 vm_page_unreserve_pages(fReservation); 2243 } 2244 2245 vm_page_reservation* fReservation; 2246 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2247 2248 // Lock the address space and, if the specified address range shall be 2249 // unmapped, ensure it is not wired. 2250 AddressSpaceWriteLocker locker; 2251 do { 2252 if (locker.SetTo(team) != B_OK) 2253 return B_BAD_TEAM_ID; 2254 } while (unmapAddressRange 2255 && wait_if_address_range_is_wired(locker.AddressSpace(), 2256 (addr_t)*_address, size, &locker)); 2257 2258 // TODO: this only works for file systems that use the file cache 2259 VMCache* cache; 2260 status = vfs_get_vnode_cache(vnode, &cache, false); 2261 if (status < B_OK) 2262 return status; 2263 2264 cache->Lock(); 2265 2266 VMArea* area; 2267 virtual_address_restrictions addressRestrictions = {}; 2268 addressRestrictions.address = *_address; 2269 addressRestrictions.address_specification = addressSpec; 2270 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2271 0, protection, protectionMax, mapping, mappingFlags, 2272 &addressRestrictions, kernel, &area, _address); 2273 2274 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2275 // map_backing_store() cannot know we no longer need the ref 2276 cache->ReleaseRefLocked(); 2277 } 2278 2279 if (status == B_OK && (protection & B_READ_AREA) != 0) 2280 pre_map_area_pages(area, cache, &reservation); 2281 2282 cache->Unlock(); 2283 2284 if (status == B_OK) { 2285 // TODO: this probably deserves a smarter solution, ie. don't always 2286 // prefetch stuff, and also, probably don't trigger it at this place. 2287 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2288 // prefetches at max 10 MB starting from "offset" 2289 } 2290 2291 if (status != B_OK) 2292 return status; 2293 2294 area->cache_type = CACHE_TYPE_VNODE; 2295 return area->id; 2296 } 2297 2298 2299 area_id 2300 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2301 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2302 int fd, off_t offset) 2303 { 2304 if (!arch_vm_supports_protection(protection)) 2305 return B_NOT_SUPPORTED; 2306 2307 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2308 mapping, unmapAddressRange, fd, offset, true); 2309 } 2310 2311 2312 VMCache* 2313 vm_area_get_locked_cache(VMArea* area) 2314 { 2315 rw_lock_read_lock(&sAreaCacheLock); 2316 2317 while (true) { 2318 VMCache* cache = area->cache; 2319 2320 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2321 // cache has been deleted 2322 rw_lock_read_lock(&sAreaCacheLock); 2323 continue; 2324 } 2325 2326 rw_lock_read_lock(&sAreaCacheLock); 2327 2328 if (cache == area->cache) { 2329 cache->AcquireRefLocked(); 2330 rw_lock_read_unlock(&sAreaCacheLock); 2331 return cache; 2332 } 2333 2334 // the cache changed in the meantime 2335 cache->Unlock(); 2336 } 2337 } 2338 2339 2340 void 2341 vm_area_put_locked_cache(VMCache* cache) 2342 { 2343 cache->ReleaseRefAndUnlock(); 2344 } 2345 2346 2347 area_id 2348 vm_clone_area(team_id team, const char* name, void** address, 2349 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2350 bool kernel) 2351 { 2352 VMArea* newArea = NULL; 2353 VMArea* sourceArea; 2354 2355 // Check whether the source area exists and is cloneable. If so, mark it 2356 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2357 { 2358 AddressSpaceWriteLocker locker; 2359 status_t status = locker.SetFromArea(sourceID, sourceArea); 2360 if (status != B_OK) 2361 return status; 2362 2363 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2364 return B_NOT_ALLOWED; 2365 2366 sourceArea->protection |= B_SHARED_AREA; 2367 protection |= B_SHARED_AREA; 2368 } 2369 2370 // Now lock both address spaces and actually do the cloning. 2371 2372 MultiAddressSpaceLocker locker; 2373 VMAddressSpace* sourceAddressSpace; 2374 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2375 if (status != B_OK) 2376 return status; 2377 2378 VMAddressSpace* targetAddressSpace; 2379 status = locker.AddTeam(team, true, &targetAddressSpace); 2380 if (status != B_OK) 2381 return status; 2382 2383 status = locker.Lock(); 2384 if (status != B_OK) 2385 return status; 2386 2387 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2388 if (sourceArea == NULL) 2389 return B_BAD_VALUE; 2390 2391 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2392 return B_NOT_ALLOWED; 2393 2394 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2395 2396 if (!kernel && sourceAddressSpace != targetAddressSpace 2397 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2398 #if KDEBUG 2399 Team* team = thread_get_current_thread()->team; 2400 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2401 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2402 #endif 2403 status = B_NOT_ALLOWED; 2404 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2405 status = B_NOT_ALLOWED; 2406 } else { 2407 virtual_address_restrictions addressRestrictions = {}; 2408 addressRestrictions.address = *address; 2409 addressRestrictions.address_specification = addressSpec; 2410 status = map_backing_store(targetAddressSpace, cache, 2411 sourceArea->cache_offset, name, sourceArea->Size(), 2412 sourceArea->wiring, protection, sourceArea->protection_max, 2413 mapping, 0, &addressRestrictions, 2414 kernel, &newArea, address); 2415 } 2416 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2417 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2418 // to create a new cache, and has therefore already acquired a reference 2419 // to the source cache - but otherwise it has no idea that we need 2420 // one. 2421 cache->AcquireRefLocked(); 2422 } 2423 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2424 // we need to map in everything at this point 2425 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2426 // we don't have actual pages to map but a physical area 2427 VMTranslationMap* map 2428 = sourceArea->address_space->TranslationMap(); 2429 map->Lock(); 2430 2431 phys_addr_t physicalAddress; 2432 uint32 oldProtection; 2433 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2434 2435 map->Unlock(); 2436 2437 map = targetAddressSpace->TranslationMap(); 2438 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2439 newArea->Base() + (newArea->Size() - 1)); 2440 2441 vm_page_reservation reservation; 2442 vm_page_reserve_pages(&reservation, reservePages, 2443 targetAddressSpace == VMAddressSpace::Kernel() 2444 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2445 map->Lock(); 2446 2447 for (addr_t offset = 0; offset < newArea->Size(); 2448 offset += B_PAGE_SIZE) { 2449 map->Map(newArea->Base() + offset, physicalAddress + offset, 2450 protection, newArea->MemoryType(), &reservation); 2451 } 2452 2453 map->Unlock(); 2454 vm_page_unreserve_pages(&reservation); 2455 } else { 2456 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2457 size_t reservePages = map->MaxPagesNeededToMap( 2458 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2459 vm_page_reservation reservation; 2460 vm_page_reserve_pages(&reservation, reservePages, 2461 targetAddressSpace == VMAddressSpace::Kernel() 2462 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2463 2464 // map in all pages from source 2465 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2466 vm_page* page = it.Next();) { 2467 if (!page->busy) { 2468 DEBUG_PAGE_ACCESS_START(page); 2469 map_page(newArea, page, 2470 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2471 - newArea->cache_offset), 2472 protection, &reservation); 2473 DEBUG_PAGE_ACCESS_END(page); 2474 } 2475 } 2476 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2477 // ensuring that! 2478 2479 vm_page_unreserve_pages(&reservation); 2480 } 2481 } 2482 if (status == B_OK) 2483 newArea->cache_type = sourceArea->cache_type; 2484 2485 vm_area_put_locked_cache(cache); 2486 2487 if (status < B_OK) 2488 return status; 2489 2490 return newArea->id; 2491 } 2492 2493 2494 /*! Deletes the specified area of the given address space. 2495 2496 The address space must be write-locked. 2497 The caller must ensure that the area does not have any wired ranges. 2498 2499 \param addressSpace The address space containing the area. 2500 \param area The area to be deleted. 2501 \param deletingAddressSpace \c true, if the address space is in the process 2502 of being deleted. 2503 */ 2504 static void 2505 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2506 bool deletingAddressSpace) 2507 { 2508 ASSERT(!area->IsWired()); 2509 2510 VMAreas::Remove(area); 2511 2512 // At this point the area is removed from the global hash table, but 2513 // still exists in the area list. 2514 2515 // Unmap the virtual address space the area occupied. 2516 { 2517 // We need to lock the complete cache chain. 2518 VMCache* topCache = vm_area_get_locked_cache(area); 2519 VMCacheChainLocker cacheChainLocker(topCache); 2520 cacheChainLocker.LockAllSourceCaches(); 2521 2522 // If the area's top cache is a temporary cache and the area is the only 2523 // one referencing it (besides us currently holding a second reference), 2524 // the unmapping code doesn't need to care about preserving the accessed 2525 // and dirty flags of the top cache page mappings. 2526 bool ignoreTopCachePageFlags 2527 = topCache->temporary && topCache->RefCount() == 2; 2528 2529 area->address_space->TranslationMap()->UnmapArea(area, 2530 deletingAddressSpace, ignoreTopCachePageFlags); 2531 } 2532 2533 if (!area->cache->temporary) 2534 area->cache->WriteModified(); 2535 2536 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2537 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2538 2539 arch_vm_unset_memory_type(area); 2540 addressSpace->RemoveArea(area, allocationFlags); 2541 addressSpace->Put(); 2542 2543 area->cache->RemoveArea(area); 2544 area->cache->ReleaseRef(); 2545 2546 addressSpace->DeleteArea(area, allocationFlags); 2547 } 2548 2549 2550 status_t 2551 vm_delete_area(team_id team, area_id id, bool kernel) 2552 { 2553 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2554 team, id)); 2555 2556 // lock the address space and make sure the area isn't wired 2557 AddressSpaceWriteLocker locker; 2558 VMArea* area; 2559 AreaCacheLocker cacheLocker; 2560 2561 do { 2562 status_t status = locker.SetFromArea(team, id, area); 2563 if (status != B_OK) 2564 return status; 2565 2566 cacheLocker.SetTo(area); 2567 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2568 2569 cacheLocker.Unlock(); 2570 2571 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2572 return B_NOT_ALLOWED; 2573 2574 delete_area(locker.AddressSpace(), area, false); 2575 return B_OK; 2576 } 2577 2578 2579 /*! Creates a new cache on top of given cache, moves all areas from 2580 the old cache to the new one, and changes the protection of all affected 2581 areas' pages to read-only. If requested, wired pages are moved up to the 2582 new cache and copies are added to the old cache in their place. 2583 Preconditions: 2584 - The given cache must be locked. 2585 - All of the cache's areas' address spaces must be read locked. 2586 - Either the cache must not have any wired ranges or a page reservation for 2587 all wired pages must be provided, so they can be copied. 2588 2589 \param lowerCache The cache on top of which a new cache shall be created. 2590 \param wiredPagesReservation If \c NULL there must not be any wired pages 2591 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2592 has wired page. The wired pages are copied in this case. 2593 */ 2594 static status_t 2595 vm_copy_on_write_area(VMCache* lowerCache, 2596 vm_page_reservation* wiredPagesReservation) 2597 { 2598 VMCache* upperCache; 2599 2600 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2601 2602 // We need to separate the cache from its areas. The cache goes one level 2603 // deeper and we create a new cache inbetween. 2604 2605 // create an anonymous cache 2606 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2607 lowerCache->GuardSize() / B_PAGE_SIZE, 2608 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2609 VM_PRIORITY_USER); 2610 if (status != B_OK) 2611 return status; 2612 2613 upperCache->Lock(); 2614 2615 upperCache->temporary = 1; 2616 upperCache->virtual_base = lowerCache->virtual_base; 2617 upperCache->virtual_end = lowerCache->virtual_end; 2618 2619 // transfer the lower cache areas to the upper cache 2620 rw_lock_write_lock(&sAreaCacheLock); 2621 upperCache->TransferAreas(lowerCache); 2622 rw_lock_write_unlock(&sAreaCacheLock); 2623 2624 lowerCache->AddConsumer(upperCache); 2625 2626 // We now need to remap all pages from all of the cache's areas read-only, 2627 // so that a copy will be created on next write access. If there are wired 2628 // pages, we keep their protection, move them to the upper cache and create 2629 // copies for the lower cache. 2630 if (wiredPagesReservation != NULL) { 2631 // We need to handle wired pages -- iterate through the cache's pages. 2632 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2633 vm_page* page = it.Next();) { 2634 if (page->WiredCount() > 0) { 2635 // allocate a new page and copy the wired one 2636 vm_page* copiedPage = vm_page_allocate_page( 2637 wiredPagesReservation, PAGE_STATE_ACTIVE); 2638 2639 vm_memcpy_physical_page( 2640 copiedPage->physical_page_number * B_PAGE_SIZE, 2641 page->physical_page_number * B_PAGE_SIZE); 2642 2643 // move the wired page to the upper cache (note: removing is OK 2644 // with the SplayTree iterator) and insert the copy 2645 upperCache->MovePage(page); 2646 lowerCache->InsertPage(copiedPage, 2647 page->cache_offset * B_PAGE_SIZE); 2648 2649 DEBUG_PAGE_ACCESS_END(copiedPage); 2650 } else { 2651 // Change the protection of this page in all areas. 2652 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2653 tempArea = tempArea->cache_next) { 2654 if (!is_page_in_area(tempArea, page)) 2655 continue; 2656 2657 // The area must be readable in the same way it was 2658 // previously writable. 2659 addr_t address = virtual_page_address(tempArea, page); 2660 uint32 protection = 0; 2661 uint32 pageProtection = get_area_page_protection(tempArea, address); 2662 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2663 protection |= B_KERNEL_READ_AREA; 2664 if ((pageProtection & B_READ_AREA) != 0) 2665 protection |= B_READ_AREA; 2666 2667 VMTranslationMap* map 2668 = tempArea->address_space->TranslationMap(); 2669 map->Lock(); 2670 map->ProtectPage(tempArea, address, protection); 2671 map->Unlock(); 2672 } 2673 } 2674 } 2675 } else { 2676 ASSERT(lowerCache->WiredPagesCount() == 0); 2677 2678 // just change the protection of all areas 2679 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2680 tempArea = tempArea->cache_next) { 2681 if (tempArea->page_protections != NULL) { 2682 // Change the protection of all pages in this area. 2683 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2684 map->Lock(); 2685 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2686 vm_page* page = it.Next();) { 2687 if (!is_page_in_area(tempArea, page)) 2688 continue; 2689 2690 // The area must be readable in the same way it was 2691 // previously writable. 2692 addr_t address = virtual_page_address(tempArea, page); 2693 uint32 protection = 0; 2694 uint32 pageProtection = get_area_page_protection(tempArea, address); 2695 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2696 protection |= B_KERNEL_READ_AREA; 2697 if ((pageProtection & B_READ_AREA) != 0) 2698 protection |= B_READ_AREA; 2699 2700 map->ProtectPage(tempArea, address, protection); 2701 } 2702 map->Unlock(); 2703 continue; 2704 } 2705 // The area must be readable in the same way it was previously 2706 // writable. 2707 uint32 protection = 0; 2708 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0) 2709 protection |= B_KERNEL_READ_AREA; 2710 if ((tempArea->protection & B_READ_AREA) != 0) 2711 protection |= B_READ_AREA; 2712 2713 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2714 map->Lock(); 2715 map->ProtectArea(tempArea, protection); 2716 map->Unlock(); 2717 } 2718 } 2719 2720 vm_area_put_locked_cache(upperCache); 2721 2722 return B_OK; 2723 } 2724 2725 2726 area_id 2727 vm_copy_area(team_id team, const char* name, void** _address, 2728 uint32 addressSpec, area_id sourceID) 2729 { 2730 // Do the locking: target address space, all address spaces associated with 2731 // the source cache, and the cache itself. 2732 MultiAddressSpaceLocker locker; 2733 VMAddressSpace* targetAddressSpace; 2734 VMCache* cache; 2735 VMArea* source; 2736 AreaCacheLocker cacheLocker; 2737 status_t status; 2738 bool sharedArea; 2739 2740 page_num_t wiredPages = 0; 2741 vm_page_reservation wiredPagesReservation; 2742 2743 bool restart; 2744 do { 2745 restart = false; 2746 2747 locker.Unset(); 2748 status = locker.AddTeam(team, true, &targetAddressSpace); 2749 if (status == B_OK) { 2750 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2751 &cache); 2752 } 2753 if (status != B_OK) 2754 return status; 2755 2756 cacheLocker.SetTo(cache, true); // already locked 2757 2758 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2759 2760 page_num_t oldWiredPages = wiredPages; 2761 wiredPages = 0; 2762 2763 // If the source area isn't shared, count the number of wired pages in 2764 // the cache and reserve as many pages. 2765 if (!sharedArea) { 2766 wiredPages = cache->WiredPagesCount(); 2767 2768 if (wiredPages > oldWiredPages) { 2769 cacheLocker.Unlock(); 2770 locker.Unlock(); 2771 2772 if (oldWiredPages > 0) 2773 vm_page_unreserve_pages(&wiredPagesReservation); 2774 2775 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2776 VM_PRIORITY_USER); 2777 2778 restart = true; 2779 } 2780 } else if (oldWiredPages > 0) 2781 vm_page_unreserve_pages(&wiredPagesReservation); 2782 } while (restart); 2783 2784 // unreserve pages later 2785 struct PagesUnreserver { 2786 PagesUnreserver(vm_page_reservation* reservation) 2787 : 2788 fReservation(reservation) 2789 { 2790 } 2791 2792 ~PagesUnreserver() 2793 { 2794 if (fReservation != NULL) 2795 vm_page_unreserve_pages(fReservation); 2796 } 2797 2798 private: 2799 vm_page_reservation* fReservation; 2800 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2801 2802 bool writableCopy 2803 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2804 uint8* targetPageProtections = NULL; 2805 2806 if (source->page_protections != NULL) { 2807 size_t bytes = area_page_protections_size(source->Size()); 2808 targetPageProtections = (uint8*)malloc_etc(bytes, 2809 (source->address_space == VMAddressSpace::Kernel() 2810 || targetAddressSpace == VMAddressSpace::Kernel()) 2811 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2812 if (targetPageProtections == NULL) 2813 return B_NO_MEMORY; 2814 2815 memcpy(targetPageProtections, source->page_protections, bytes); 2816 2817 if (!writableCopy) { 2818 for (size_t i = 0; i < bytes; i++) { 2819 if ((targetPageProtections[i] 2820 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2821 writableCopy = true; 2822 break; 2823 } 2824 } 2825 } 2826 } 2827 2828 if (addressSpec == B_CLONE_ADDRESS) { 2829 addressSpec = B_EXACT_ADDRESS; 2830 *_address = (void*)source->Base(); 2831 } 2832 2833 // First, create a cache on top of the source area, respectively use the 2834 // existing one, if this is a shared area. 2835 2836 VMArea* target; 2837 virtual_address_restrictions addressRestrictions = {}; 2838 addressRestrictions.address = *_address; 2839 addressRestrictions.address_specification = addressSpec; 2840 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2841 name, source->Size(), source->wiring, source->protection, 2842 source->protection_max, 2843 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2844 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2845 &addressRestrictions, true, &target, _address); 2846 if (status < B_OK) { 2847 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2848 return status; 2849 } 2850 2851 if (targetPageProtections != NULL) 2852 target->page_protections = targetPageProtections; 2853 2854 if (sharedArea) { 2855 // The new area uses the old area's cache, but map_backing_store() 2856 // hasn't acquired a ref. So we have to do that now. 2857 cache->AcquireRefLocked(); 2858 } 2859 2860 // If the source area is writable, we need to move it one layer up as well 2861 2862 if (!sharedArea) { 2863 if (writableCopy) { 2864 // TODO: do something more useful if this fails! 2865 if (vm_copy_on_write_area(cache, 2866 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2867 panic("vm_copy_on_write_area() failed!\n"); 2868 } 2869 } 2870 } 2871 2872 // we return the ID of the newly created area 2873 return target->id; 2874 } 2875 2876 2877 status_t 2878 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2879 bool kernel) 2880 { 2881 fix_protection(&newProtection); 2882 2883 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2884 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2885 2886 if (!arch_vm_supports_protection(newProtection)) 2887 return B_NOT_SUPPORTED; 2888 2889 bool becomesWritable 2890 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2891 2892 // lock address spaces and cache 2893 MultiAddressSpaceLocker locker; 2894 VMCache* cache; 2895 VMArea* area; 2896 status_t status; 2897 AreaCacheLocker cacheLocker; 2898 bool isWritable; 2899 2900 bool restart; 2901 do { 2902 restart = false; 2903 2904 locker.Unset(); 2905 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2906 if (status != B_OK) 2907 return status; 2908 2909 cacheLocker.SetTo(cache, true); // already locked 2910 2911 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2912 || (area->protection & B_KERNEL_AREA) != 0)) { 2913 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2914 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2915 " (%s)\n", team, newProtection, areaID, area->name); 2916 return B_NOT_ALLOWED; 2917 } 2918 if (!kernel && area->protection_max != 0 2919 && (newProtection & area->protection_max) 2920 != (newProtection & B_USER_PROTECTION)) { 2921 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2922 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2923 "area %" B_PRId32 " (%s)\n", team, newProtection, 2924 area->protection_max, areaID, area->name); 2925 return B_NOT_ALLOWED; 2926 } 2927 2928 if (team != VMAddressSpace::KernelID() 2929 && area->address_space->ID() != team) { 2930 // unless you're the kernel, you are only allowed to set 2931 // the protection of your own areas 2932 return B_NOT_ALLOWED; 2933 } 2934 2935 if (area->protection == newProtection) 2936 return B_OK; 2937 2938 isWritable 2939 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2940 2941 // Make sure the area (respectively, if we're going to call 2942 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2943 // wired ranges. 2944 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2945 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2946 otherArea = otherArea->cache_next) { 2947 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2948 restart = true; 2949 break; 2950 } 2951 } 2952 } else { 2953 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2954 restart = true; 2955 } 2956 } while (restart); 2957 2958 bool changePageProtection = true; 2959 bool changeTopCachePagesOnly = false; 2960 2961 if (isWritable && !becomesWritable) { 2962 // writable -> !writable 2963 2964 if (cache->source != NULL && cache->temporary) { 2965 if (cache->CountWritableAreas(area) == 0) { 2966 // Since this cache now lives from the pages in its source cache, 2967 // we can change the cache's commitment to take only those pages 2968 // into account that really are in this cache. 2969 2970 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2971 team == VMAddressSpace::KernelID() 2972 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2973 2974 // TODO: we may be able to join with our source cache, if 2975 // count == 0 2976 } 2977 } 2978 2979 // If only the writability changes, we can just remap the pages of the 2980 // top cache, since the pages of lower caches are mapped read-only 2981 // anyway. That's advantageous only, if the number of pages in the cache 2982 // is significantly smaller than the number of pages in the area, 2983 // though. 2984 if (newProtection 2985 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2986 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2987 changeTopCachePagesOnly = true; 2988 } 2989 } else if (!isWritable && becomesWritable) { 2990 // !writable -> writable 2991 2992 if (!cache->consumers.IsEmpty()) { 2993 // There are consumers -- we have to insert a new cache. Fortunately 2994 // vm_copy_on_write_area() does everything that's needed. 2995 changePageProtection = false; 2996 status = vm_copy_on_write_area(cache, NULL); 2997 } else { 2998 // No consumers, so we don't need to insert a new one. 2999 if (cache->source != NULL && cache->temporary) { 3000 // the cache's commitment must contain all possible pages 3001 status = cache->Commit(cache->virtual_end - cache->virtual_base, 3002 team == VMAddressSpace::KernelID() 3003 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 3004 } 3005 3006 if (status == B_OK && cache->source != NULL) { 3007 // There's a source cache, hence we can't just change all pages' 3008 // protection or we might allow writing into pages belonging to 3009 // a lower cache. 3010 changeTopCachePagesOnly = true; 3011 } 3012 } 3013 } else { 3014 // we don't have anything special to do in all other cases 3015 } 3016 3017 if (status == B_OK) { 3018 // remap existing pages in this cache 3019 if (changePageProtection) { 3020 VMTranslationMap* map = area->address_space->TranslationMap(); 3021 map->Lock(); 3022 3023 if (changeTopCachePagesOnly) { 3024 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 3025 page_num_t lastPageOffset 3026 = firstPageOffset + area->Size() / B_PAGE_SIZE; 3027 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3028 vm_page* page = it.Next();) { 3029 if (page->cache_offset >= firstPageOffset 3030 && page->cache_offset <= lastPageOffset) { 3031 addr_t address = virtual_page_address(area, page); 3032 map->ProtectPage(area, address, newProtection); 3033 } 3034 } 3035 } else 3036 map->ProtectArea(area, newProtection); 3037 3038 map->Unlock(); 3039 } 3040 3041 area->protection = newProtection; 3042 } 3043 3044 return status; 3045 } 3046 3047 3048 status_t 3049 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 3050 { 3051 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 3052 if (addressSpace == NULL) 3053 return B_BAD_TEAM_ID; 3054 3055 VMTranslationMap* map = addressSpace->TranslationMap(); 3056 3057 map->Lock(); 3058 uint32 dummyFlags; 3059 status_t status = map->Query(vaddr, paddr, &dummyFlags); 3060 map->Unlock(); 3061 3062 addressSpace->Put(); 3063 return status; 3064 } 3065 3066 3067 /*! The page's cache must be locked. 3068 */ 3069 bool 3070 vm_test_map_modification(vm_page* page) 3071 { 3072 if (page->modified) 3073 return true; 3074 3075 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3076 vm_page_mapping* mapping; 3077 while ((mapping = iterator.Next()) != NULL) { 3078 VMArea* area = mapping->area; 3079 VMTranslationMap* map = area->address_space->TranslationMap(); 3080 3081 phys_addr_t physicalAddress; 3082 uint32 flags; 3083 map->Lock(); 3084 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 3085 map->Unlock(); 3086 3087 if ((flags & PAGE_MODIFIED) != 0) 3088 return true; 3089 } 3090 3091 return false; 3092 } 3093 3094 3095 /*! The page's cache must be locked. 3096 */ 3097 void 3098 vm_clear_map_flags(vm_page* page, uint32 flags) 3099 { 3100 if ((flags & PAGE_ACCESSED) != 0) 3101 page->accessed = false; 3102 if ((flags & PAGE_MODIFIED) != 0) 3103 page->modified = false; 3104 3105 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3106 vm_page_mapping* mapping; 3107 while ((mapping = iterator.Next()) != NULL) { 3108 VMArea* area = mapping->area; 3109 VMTranslationMap* map = area->address_space->TranslationMap(); 3110 3111 map->Lock(); 3112 map->ClearFlags(virtual_page_address(area, page), flags); 3113 map->Unlock(); 3114 } 3115 } 3116 3117 3118 /*! Removes all mappings from a page. 3119 After you've called this function, the page is unmapped from memory and 3120 the page's \c accessed and \c modified flags have been updated according 3121 to the state of the mappings. 3122 The page's cache must be locked. 3123 */ 3124 void 3125 vm_remove_all_page_mappings(vm_page* page) 3126 { 3127 while (vm_page_mapping* mapping = page->mappings.Head()) { 3128 VMArea* area = mapping->area; 3129 VMTranslationMap* map = area->address_space->TranslationMap(); 3130 addr_t address = virtual_page_address(area, page); 3131 map->UnmapPage(area, address, false); 3132 } 3133 } 3134 3135 3136 int32 3137 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 3138 { 3139 int32 count = 0; 3140 3141 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3142 vm_page_mapping* mapping; 3143 while ((mapping = iterator.Next()) != NULL) { 3144 VMArea* area = mapping->area; 3145 VMTranslationMap* map = area->address_space->TranslationMap(); 3146 3147 bool modified; 3148 if (map->ClearAccessedAndModified(area, 3149 virtual_page_address(area, page), false, modified)) { 3150 count++; 3151 } 3152 3153 page->modified |= modified; 3154 } 3155 3156 3157 if (page->accessed) { 3158 count++; 3159 page->accessed = false; 3160 } 3161 3162 return count; 3163 } 3164 3165 3166 /*! Removes all mappings of a page and/or clears the accessed bits of the 3167 mappings. 3168 The function iterates through the page mappings and removes them until 3169 encountering one that has been accessed. From then on it will continue to 3170 iterate, but only clear the accessed flag of the mapping. The page's 3171 \c modified bit will be updated accordingly, the \c accessed bit will be 3172 cleared. 3173 \return The number of mapping accessed bits encountered, including the 3174 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3175 of the page have been removed. 3176 */ 3177 int32 3178 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3179 { 3180 ASSERT(page->WiredCount() == 0); 3181 3182 if (page->accessed) 3183 return vm_clear_page_mapping_accessed_flags(page); 3184 3185 while (vm_page_mapping* mapping = page->mappings.Head()) { 3186 VMArea* area = mapping->area; 3187 VMTranslationMap* map = area->address_space->TranslationMap(); 3188 addr_t address = virtual_page_address(area, page); 3189 bool modified = false; 3190 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3191 page->accessed = true; 3192 page->modified |= modified; 3193 return vm_clear_page_mapping_accessed_flags(page); 3194 } 3195 page->modified |= modified; 3196 } 3197 3198 return 0; 3199 } 3200 3201 3202 static int 3203 display_mem(int argc, char** argv) 3204 { 3205 bool physical = false; 3206 addr_t copyAddress; 3207 int32 displayWidth; 3208 int32 itemSize; 3209 int32 num = -1; 3210 addr_t address; 3211 int i = 1, j; 3212 3213 if (argc > 1 && argv[1][0] == '-') { 3214 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3215 physical = true; 3216 i++; 3217 } else 3218 i = 99; 3219 } 3220 3221 if (argc < i + 1 || argc > i + 2) { 3222 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3223 "\tdl - 8 bytes\n" 3224 "\tdw - 4 bytes\n" 3225 "\tds - 2 bytes\n" 3226 "\tdb - 1 byte\n" 3227 "\tstring - a whole string\n" 3228 " -p or --physical only allows memory from a single page to be " 3229 "displayed.\n"); 3230 return 0; 3231 } 3232 3233 address = parse_expression(argv[i]); 3234 3235 if (argc > i + 1) 3236 num = parse_expression(argv[i + 1]); 3237 3238 // build the format string 3239 if (strcmp(argv[0], "db") == 0) { 3240 itemSize = 1; 3241 displayWidth = 16; 3242 } else if (strcmp(argv[0], "ds") == 0) { 3243 itemSize = 2; 3244 displayWidth = 8; 3245 } else if (strcmp(argv[0], "dw") == 0) { 3246 itemSize = 4; 3247 displayWidth = 4; 3248 } else if (strcmp(argv[0], "dl") == 0) { 3249 itemSize = 8; 3250 displayWidth = 2; 3251 } else if (strcmp(argv[0], "string") == 0) { 3252 itemSize = 1; 3253 displayWidth = -1; 3254 } else { 3255 kprintf("display_mem called in an invalid way!\n"); 3256 return 0; 3257 } 3258 3259 if (num <= 0) 3260 num = displayWidth; 3261 3262 void* physicalPageHandle = NULL; 3263 3264 if (physical) { 3265 int32 offset = address & (B_PAGE_SIZE - 1); 3266 if (num * itemSize + offset > B_PAGE_SIZE) { 3267 num = (B_PAGE_SIZE - offset) / itemSize; 3268 kprintf("NOTE: number of bytes has been cut to page size\n"); 3269 } 3270 3271 address = ROUNDDOWN(address, B_PAGE_SIZE); 3272 3273 if (vm_get_physical_page_debug(address, ©Address, 3274 &physicalPageHandle) != B_OK) { 3275 kprintf("getting the hardware page failed."); 3276 return 0; 3277 } 3278 3279 address += offset; 3280 copyAddress += offset; 3281 } else 3282 copyAddress = address; 3283 3284 if (!strcmp(argv[0], "string")) { 3285 kprintf("%p \"", (char*)copyAddress); 3286 3287 // string mode 3288 for (i = 0; true; i++) { 3289 char c; 3290 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3291 != B_OK 3292 || c == '\0') { 3293 break; 3294 } 3295 3296 if (c == '\n') 3297 kprintf("\\n"); 3298 else if (c == '\t') 3299 kprintf("\\t"); 3300 else { 3301 if (!isprint(c)) 3302 c = '.'; 3303 3304 kprintf("%c", c); 3305 } 3306 } 3307 3308 kprintf("\"\n"); 3309 } else { 3310 // number mode 3311 for (i = 0; i < num; i++) { 3312 uint64 value; 3313 3314 if ((i % displayWidth) == 0) { 3315 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3316 if (i != 0) 3317 kprintf("\n"); 3318 3319 kprintf("[0x%lx] ", address + i * itemSize); 3320 3321 for (j = 0; j < displayed; j++) { 3322 char c; 3323 if (debug_memcpy(B_CURRENT_TEAM, &c, 3324 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3325 displayed = j; 3326 break; 3327 } 3328 if (!isprint(c)) 3329 c = '.'; 3330 3331 kprintf("%c", c); 3332 } 3333 if (num > displayWidth) { 3334 // make sure the spacing in the last line is correct 3335 for (j = displayed; j < displayWidth * itemSize; j++) 3336 kprintf(" "); 3337 } 3338 kprintf(" "); 3339 } 3340 3341 if (debug_memcpy(B_CURRENT_TEAM, &value, 3342 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3343 kprintf("read fault"); 3344 break; 3345 } 3346 3347 switch (itemSize) { 3348 case 1: 3349 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3350 break; 3351 case 2: 3352 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3353 break; 3354 case 4: 3355 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3356 break; 3357 case 8: 3358 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3359 break; 3360 } 3361 } 3362 3363 kprintf("\n"); 3364 } 3365 3366 if (physical) { 3367 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3368 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3369 } 3370 return 0; 3371 } 3372 3373 3374 static void 3375 dump_cache_tree_recursively(VMCache* cache, int level, 3376 VMCache* highlightCache) 3377 { 3378 // print this cache 3379 for (int i = 0; i < level; i++) 3380 kprintf(" "); 3381 if (cache == highlightCache) 3382 kprintf("%p <--\n", cache); 3383 else 3384 kprintf("%p\n", cache); 3385 3386 // recursively print its consumers 3387 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3388 VMCache* consumer = it.Next();) { 3389 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3390 } 3391 } 3392 3393 3394 static int 3395 dump_cache_tree(int argc, char** argv) 3396 { 3397 if (argc != 2 || !strcmp(argv[1], "--help")) { 3398 kprintf("usage: %s <address>\n", argv[0]); 3399 return 0; 3400 } 3401 3402 addr_t address = parse_expression(argv[1]); 3403 if (address == 0) 3404 return 0; 3405 3406 VMCache* cache = (VMCache*)address; 3407 VMCache* root = cache; 3408 3409 // find the root cache (the transitive source) 3410 while (root->source != NULL) 3411 root = root->source; 3412 3413 dump_cache_tree_recursively(root, 0, cache); 3414 3415 return 0; 3416 } 3417 3418 3419 const char* 3420 vm_cache_type_to_string(int32 type) 3421 { 3422 switch (type) { 3423 case CACHE_TYPE_RAM: 3424 return "RAM"; 3425 case CACHE_TYPE_DEVICE: 3426 return "device"; 3427 case CACHE_TYPE_VNODE: 3428 return "vnode"; 3429 case CACHE_TYPE_NULL: 3430 return "null"; 3431 3432 default: 3433 return "unknown"; 3434 } 3435 } 3436 3437 3438 #if DEBUG_CACHE_LIST 3439 3440 static void 3441 update_cache_info_recursively(VMCache* cache, cache_info& info) 3442 { 3443 info.page_count += cache->page_count; 3444 if (cache->type == CACHE_TYPE_RAM) 3445 info.committed += cache->committed_size; 3446 3447 // recurse 3448 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3449 VMCache* consumer = it.Next();) { 3450 update_cache_info_recursively(consumer, info); 3451 } 3452 } 3453 3454 3455 static int 3456 cache_info_compare_page_count(const void* _a, const void* _b) 3457 { 3458 const cache_info* a = (const cache_info*)_a; 3459 const cache_info* b = (const cache_info*)_b; 3460 if (a->page_count == b->page_count) 3461 return 0; 3462 return a->page_count < b->page_count ? 1 : -1; 3463 } 3464 3465 3466 static int 3467 cache_info_compare_committed(const void* _a, const void* _b) 3468 { 3469 const cache_info* a = (const cache_info*)_a; 3470 const cache_info* b = (const cache_info*)_b; 3471 if (a->committed == b->committed) 3472 return 0; 3473 return a->committed < b->committed ? 1 : -1; 3474 } 3475 3476 3477 static void 3478 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3479 { 3480 for (int i = 0; i < level; i++) 3481 kprintf(" "); 3482 3483 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3484 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3485 cache->virtual_base, cache->virtual_end, cache->page_count); 3486 3487 if (level == 0) 3488 kprintf("/%lu", info.page_count); 3489 3490 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3491 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3492 3493 if (level == 0) 3494 kprintf("/%lu", info.committed); 3495 } 3496 3497 // areas 3498 if (cache->areas != NULL) { 3499 VMArea* area = cache->areas; 3500 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3501 area->name, area->address_space->ID()); 3502 3503 while (area->cache_next != NULL) { 3504 area = area->cache_next; 3505 kprintf(", %" B_PRId32, area->id); 3506 } 3507 } 3508 3509 kputs("\n"); 3510 3511 // recurse 3512 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3513 VMCache* consumer = it.Next();) { 3514 dump_caches_recursively(consumer, info, level + 1); 3515 } 3516 } 3517 3518 3519 static int 3520 dump_caches(int argc, char** argv) 3521 { 3522 if (sCacheInfoTable == NULL) { 3523 kprintf("No cache info table!\n"); 3524 return 0; 3525 } 3526 3527 bool sortByPageCount = true; 3528 3529 for (int32 i = 1; i < argc; i++) { 3530 if (strcmp(argv[i], "-c") == 0) { 3531 sortByPageCount = false; 3532 } else { 3533 print_debugger_command_usage(argv[0]); 3534 return 0; 3535 } 3536 } 3537 3538 uint32 totalCount = 0; 3539 uint32 rootCount = 0; 3540 off_t totalCommitted = 0; 3541 page_num_t totalPages = 0; 3542 3543 VMCache* cache = gDebugCacheList; 3544 while (cache) { 3545 totalCount++; 3546 if (cache->source == NULL) { 3547 cache_info stackInfo; 3548 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3549 ? sCacheInfoTable[rootCount] : stackInfo; 3550 rootCount++; 3551 info.cache = cache; 3552 info.page_count = 0; 3553 info.committed = 0; 3554 update_cache_info_recursively(cache, info); 3555 totalCommitted += info.committed; 3556 totalPages += info.page_count; 3557 } 3558 3559 cache = cache->debug_next; 3560 } 3561 3562 if (rootCount <= (uint32)kCacheInfoTableCount) { 3563 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3564 sortByPageCount 3565 ? &cache_info_compare_page_count 3566 : &cache_info_compare_committed); 3567 } 3568 3569 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3570 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3571 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3572 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3573 "page count" : "committed size"); 3574 3575 if (rootCount <= (uint32)kCacheInfoTableCount) { 3576 for (uint32 i = 0; i < rootCount; i++) { 3577 cache_info& info = sCacheInfoTable[i]; 3578 dump_caches_recursively(info.cache, info, 0); 3579 } 3580 } else 3581 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3582 3583 return 0; 3584 } 3585 3586 #endif // DEBUG_CACHE_LIST 3587 3588 3589 static int 3590 dump_cache(int argc, char** argv) 3591 { 3592 VMCache* cache; 3593 bool showPages = false; 3594 int i = 1; 3595 3596 if (argc < 2 || !strcmp(argv[1], "--help")) { 3597 kprintf("usage: %s [-ps] <address>\n" 3598 " if -p is specified, all pages are shown, if -s is used\n" 3599 " only the cache info is shown respectively.\n", argv[0]); 3600 return 0; 3601 } 3602 while (argv[i][0] == '-') { 3603 char* arg = argv[i] + 1; 3604 while (arg[0]) { 3605 if (arg[0] == 'p') 3606 showPages = true; 3607 arg++; 3608 } 3609 i++; 3610 } 3611 if (argv[i] == NULL) { 3612 kprintf("%s: invalid argument, pass address\n", argv[0]); 3613 return 0; 3614 } 3615 3616 addr_t address = parse_expression(argv[i]); 3617 if (address == 0) 3618 return 0; 3619 3620 cache = (VMCache*)address; 3621 3622 cache->Dump(showPages); 3623 3624 set_debug_variable("_sourceCache", (addr_t)cache->source); 3625 3626 return 0; 3627 } 3628 3629 3630 static void 3631 dump_area_struct(VMArea* area, bool mappings) 3632 { 3633 kprintf("AREA: %p\n", area); 3634 kprintf("name:\t\t'%s'\n", area->name); 3635 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3636 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3637 kprintf("base:\t\t0x%lx\n", area->Base()); 3638 kprintf("size:\t\t0x%lx\n", area->Size()); 3639 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3640 kprintf("page_protection:%p\n", area->page_protections); 3641 kprintf("wiring:\t\t0x%x\n", area->wiring); 3642 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3643 kprintf("cache:\t\t%p\n", area->cache); 3644 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3645 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3646 kprintf("cache_next:\t%p\n", area->cache_next); 3647 kprintf("cache_prev:\t%p\n", area->cache_prev); 3648 3649 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3650 if (mappings) { 3651 kprintf("page mappings:\n"); 3652 while (iterator.HasNext()) { 3653 vm_page_mapping* mapping = iterator.Next(); 3654 kprintf(" %p", mapping->page); 3655 } 3656 kprintf("\n"); 3657 } else { 3658 uint32 count = 0; 3659 while (iterator.Next() != NULL) { 3660 count++; 3661 } 3662 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3663 } 3664 } 3665 3666 3667 static int 3668 dump_area(int argc, char** argv) 3669 { 3670 bool mappings = false; 3671 bool found = false; 3672 int32 index = 1; 3673 VMArea* area; 3674 addr_t num; 3675 3676 if (argc < 2 || !strcmp(argv[1], "--help")) { 3677 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3678 "All areas matching either id/address/name are listed. You can\n" 3679 "force to check only a specific item by prefixing the specifier\n" 3680 "with the id/contains/address/name keywords.\n" 3681 "-m shows the area's mappings as well.\n"); 3682 return 0; 3683 } 3684 3685 if (!strcmp(argv[1], "-m")) { 3686 mappings = true; 3687 index++; 3688 } 3689 3690 int32 mode = 0xf; 3691 if (!strcmp(argv[index], "id")) 3692 mode = 1; 3693 else if (!strcmp(argv[index], "contains")) 3694 mode = 2; 3695 else if (!strcmp(argv[index], "name")) 3696 mode = 4; 3697 else if (!strcmp(argv[index], "address")) 3698 mode = 0; 3699 if (mode != 0xf) 3700 index++; 3701 3702 if (index >= argc) { 3703 kprintf("No area specifier given.\n"); 3704 return 0; 3705 } 3706 3707 num = parse_expression(argv[index]); 3708 3709 if (mode == 0) { 3710 dump_area_struct((struct VMArea*)num, mappings); 3711 } else { 3712 // walk through the area list, looking for the arguments as a name 3713 3714 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3715 while ((area = it.Next()) != NULL) { 3716 if (((mode & 4) != 0 3717 && !strcmp(argv[index], area->name)) 3718 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3719 || (((mode & 2) != 0 && area->Base() <= num 3720 && area->Base() + area->Size() > num))))) { 3721 dump_area_struct(area, mappings); 3722 found = true; 3723 } 3724 } 3725 3726 if (!found) 3727 kprintf("could not find area %s (%ld)\n", argv[index], num); 3728 } 3729 3730 return 0; 3731 } 3732 3733 3734 static int 3735 dump_area_list(int argc, char** argv) 3736 { 3737 VMArea* area; 3738 const char* name = NULL; 3739 int32 id = 0; 3740 3741 if (argc > 1) { 3742 id = parse_expression(argv[1]); 3743 if (id == 0) 3744 name = argv[1]; 3745 } 3746 3747 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3748 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3749 B_PRINTF_POINTER_WIDTH, "size"); 3750 3751 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3752 while ((area = it.Next()) != NULL) { 3753 if ((id != 0 && area->address_space->ID() != id) 3754 || (name != NULL && strstr(area->name, name) == NULL)) 3755 continue; 3756 3757 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3758 area->id, (void*)area->Base(), (void*)area->Size(), 3759 area->protection, area->wiring, area->name); 3760 } 3761 return 0; 3762 } 3763 3764 3765 static int 3766 dump_available_memory(int argc, char** argv) 3767 { 3768 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3769 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3770 return 0; 3771 } 3772 3773 3774 static int 3775 dump_mapping_info(int argc, char** argv) 3776 { 3777 bool reverseLookup = false; 3778 bool pageLookup = false; 3779 3780 int argi = 1; 3781 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3782 const char* arg = argv[argi]; 3783 if (strcmp(arg, "-r") == 0) { 3784 reverseLookup = true; 3785 } else if (strcmp(arg, "-p") == 0) { 3786 reverseLookup = true; 3787 pageLookup = true; 3788 } else { 3789 print_debugger_command_usage(argv[0]); 3790 return 0; 3791 } 3792 } 3793 3794 // We need at least one argument, the address. Optionally a thread ID can be 3795 // specified. 3796 if (argi >= argc || argi + 2 < argc) { 3797 print_debugger_command_usage(argv[0]); 3798 return 0; 3799 } 3800 3801 uint64 addressValue; 3802 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3803 return 0; 3804 3805 Team* team = NULL; 3806 if (argi < argc) { 3807 uint64 threadID; 3808 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3809 return 0; 3810 3811 Thread* thread = Thread::GetDebug(threadID); 3812 if (thread == NULL) { 3813 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3814 return 0; 3815 } 3816 3817 team = thread->team; 3818 } 3819 3820 if (reverseLookup) { 3821 phys_addr_t physicalAddress; 3822 if (pageLookup) { 3823 vm_page* page = (vm_page*)(addr_t)addressValue; 3824 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3825 } else { 3826 physicalAddress = (phys_addr_t)addressValue; 3827 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3828 } 3829 3830 kprintf(" Team Virtual Address Area\n"); 3831 kprintf("--------------------------------------\n"); 3832 3833 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3834 Callback() 3835 : 3836 fAddressSpace(NULL) 3837 { 3838 } 3839 3840 void SetAddressSpace(VMAddressSpace* addressSpace) 3841 { 3842 fAddressSpace = addressSpace; 3843 } 3844 3845 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3846 { 3847 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3848 virtualAddress); 3849 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3850 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3851 else 3852 kprintf("\n"); 3853 return false; 3854 } 3855 3856 private: 3857 VMAddressSpace* fAddressSpace; 3858 } callback; 3859 3860 if (team != NULL) { 3861 // team specified -- get its address space 3862 VMAddressSpace* addressSpace = team->address_space; 3863 if (addressSpace == NULL) { 3864 kprintf("Failed to get address space!\n"); 3865 return 0; 3866 } 3867 3868 callback.SetAddressSpace(addressSpace); 3869 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3870 physicalAddress, callback); 3871 } else { 3872 // no team specified -- iterate through all address spaces 3873 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3874 addressSpace != NULL; 3875 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3876 callback.SetAddressSpace(addressSpace); 3877 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3878 physicalAddress, callback); 3879 } 3880 } 3881 } else { 3882 // get the address space 3883 addr_t virtualAddress = (addr_t)addressValue; 3884 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3885 VMAddressSpace* addressSpace; 3886 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3887 addressSpace = VMAddressSpace::Kernel(); 3888 } else if (team != NULL) { 3889 addressSpace = team->address_space; 3890 } else { 3891 Thread* thread = debug_get_debugged_thread(); 3892 if (thread == NULL || thread->team == NULL) { 3893 kprintf("Failed to get team!\n"); 3894 return 0; 3895 } 3896 3897 addressSpace = thread->team->address_space; 3898 } 3899 3900 if (addressSpace == NULL) { 3901 kprintf("Failed to get address space!\n"); 3902 return 0; 3903 } 3904 3905 // let the translation map implementation do the job 3906 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3907 } 3908 3909 return 0; 3910 } 3911 3912 3913 /*! Deletes all areas and reserved regions in the given address space. 3914 3915 The caller must ensure that none of the areas has any wired ranges. 3916 3917 \param addressSpace The address space. 3918 \param deletingAddressSpace \c true, if the address space is in the process 3919 of being deleted. 3920 */ 3921 void 3922 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3923 { 3924 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3925 addressSpace->ID())); 3926 3927 addressSpace->WriteLock(); 3928 3929 // remove all reserved areas in this address space 3930 addressSpace->UnreserveAllAddressRanges(0); 3931 3932 // delete all the areas in this address space 3933 while (VMArea* area = addressSpace->FirstArea()) { 3934 ASSERT(!area->IsWired()); 3935 delete_area(addressSpace, area, deletingAddressSpace); 3936 } 3937 3938 addressSpace->WriteUnlock(); 3939 } 3940 3941 3942 static area_id 3943 vm_area_for(addr_t address, bool kernel) 3944 { 3945 team_id team; 3946 if (IS_USER_ADDRESS(address)) { 3947 // we try the user team address space, if any 3948 team = VMAddressSpace::CurrentID(); 3949 if (team < 0) 3950 return team; 3951 } else 3952 team = VMAddressSpace::KernelID(); 3953 3954 AddressSpaceReadLocker locker(team); 3955 if (!locker.IsLocked()) 3956 return B_BAD_TEAM_ID; 3957 3958 VMArea* area = locker.AddressSpace()->LookupArea(address); 3959 if (area != NULL) { 3960 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0 3961 && (area->protection & B_KERNEL_AREA) != 0) 3962 return B_ERROR; 3963 3964 return area->id; 3965 } 3966 3967 return B_ERROR; 3968 } 3969 3970 3971 /*! Frees physical pages that were used during the boot process. 3972 \a end is inclusive. 3973 */ 3974 static void 3975 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3976 { 3977 // free all physical pages in the specified range 3978 3979 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3980 phys_addr_t physicalAddress; 3981 uint32 flags; 3982 3983 if (map->Query(current, &physicalAddress, &flags) == B_OK 3984 && (flags & PAGE_PRESENT) != 0) { 3985 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3986 if (page != NULL && page->State() != PAGE_STATE_FREE 3987 && page->State() != PAGE_STATE_CLEAR 3988 && page->State() != PAGE_STATE_UNUSED) { 3989 DEBUG_PAGE_ACCESS_START(page); 3990 vm_page_set_state(page, PAGE_STATE_FREE); 3991 } 3992 } 3993 } 3994 3995 // unmap the memory 3996 map->Unmap(start, end); 3997 } 3998 3999 4000 void 4001 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 4002 { 4003 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 4004 addr_t end = start + (size - 1); 4005 addr_t lastEnd = start; 4006 4007 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 4008 (void*)start, (void*)end)); 4009 4010 // The areas are sorted in virtual address space order, so 4011 // we just have to find the holes between them that fall 4012 // into the area we should dispose 4013 4014 map->Lock(); 4015 4016 for (VMAddressSpace::AreaIterator it 4017 = VMAddressSpace::Kernel()->GetAreaIterator(); 4018 VMArea* area = it.Next();) { 4019 addr_t areaStart = area->Base(); 4020 addr_t areaEnd = areaStart + (area->Size() - 1); 4021 4022 if (areaEnd < start) 4023 continue; 4024 4025 if (areaStart > end) { 4026 // we are done, the area is already beyond of what we have to free 4027 break; 4028 } 4029 4030 if (areaStart > lastEnd) { 4031 // this is something we can free 4032 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 4033 (void*)areaStart)); 4034 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 4035 } 4036 4037 if (areaEnd >= end) { 4038 lastEnd = areaEnd; 4039 // no +1 to prevent potential overflow 4040 break; 4041 } 4042 4043 lastEnd = areaEnd + 1; 4044 } 4045 4046 if (lastEnd < end) { 4047 // we can also get rid of some space at the end of the area 4048 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 4049 (void*)end)); 4050 unmap_and_free_physical_pages(map, lastEnd, end); 4051 } 4052 4053 map->Unlock(); 4054 } 4055 4056 4057 static void 4058 create_preloaded_image_areas(struct preloaded_image* _image) 4059 { 4060 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 4061 char name[B_OS_NAME_LENGTH]; 4062 void* address; 4063 int32 length; 4064 4065 // use file name to create a good area name 4066 char* fileName = strrchr(image->name, '/'); 4067 if (fileName == NULL) 4068 fileName = image->name; 4069 else 4070 fileName++; 4071 4072 length = strlen(fileName); 4073 // make sure there is enough space for the suffix 4074 if (length > 25) 4075 length = 25; 4076 4077 memcpy(name, fileName, length); 4078 strcpy(name + length, "_text"); 4079 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 4080 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4081 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 4082 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4083 // this will later be remapped read-only/executable by the 4084 // ELF initialization code 4085 4086 strcpy(name + length, "_data"); 4087 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 4088 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4089 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 4090 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4091 } 4092 4093 4094 /*! Frees all previously kernel arguments areas from the kernel_args structure. 4095 Any boot loader resources contained in that arguments must not be accessed 4096 anymore past this point. 4097 */ 4098 void 4099 vm_free_kernel_args(kernel_args* args) 4100 { 4101 uint32 i; 4102 4103 TRACE(("vm_free_kernel_args()\n")); 4104 4105 for (i = 0; i < args->num_kernel_args_ranges; i++) { 4106 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 4107 if (area >= B_OK) 4108 delete_area(area); 4109 } 4110 } 4111 4112 4113 static void 4114 allocate_kernel_args(kernel_args* args) 4115 { 4116 TRACE(("allocate_kernel_args()\n")); 4117 4118 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 4119 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 4120 4121 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 4122 args->kernel_args_range[i].size, B_ALREADY_WIRED, 4123 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4124 } 4125 } 4126 4127 4128 static void 4129 unreserve_boot_loader_ranges(kernel_args* args) 4130 { 4131 TRACE(("unreserve_boot_loader_ranges()\n")); 4132 4133 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4134 vm_unreserve_address_range(VMAddressSpace::KernelID(), 4135 (void*)(addr_t)args->virtual_allocated_range[i].start, 4136 args->virtual_allocated_range[i].size); 4137 } 4138 } 4139 4140 4141 static void 4142 reserve_boot_loader_ranges(kernel_args* args) 4143 { 4144 TRACE(("reserve_boot_loader_ranges()\n")); 4145 4146 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4147 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 4148 4149 // If the address is no kernel address, we just skip it. The 4150 // architecture specific code has to deal with it. 4151 if (!IS_KERNEL_ADDRESS(address)) { 4152 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 4153 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 4154 continue; 4155 } 4156 4157 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 4158 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4159 if (status < B_OK) 4160 panic("could not reserve boot loader ranges\n"); 4161 } 4162 } 4163 4164 4165 static addr_t 4166 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 4167 { 4168 size = PAGE_ALIGN(size); 4169 4170 // find a slot in the virtual allocation addr range 4171 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 4172 // check to see if the space between this one and the last is big enough 4173 addr_t rangeStart = args->virtual_allocated_range[i].start; 4174 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4175 + args->virtual_allocated_range[i - 1].size; 4176 4177 addr_t base = alignment > 0 4178 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4179 4180 if (base >= KERNEL_BASE && base < rangeStart 4181 && rangeStart - base >= size) { 4182 args->virtual_allocated_range[i - 1].size 4183 += base + size - previousRangeEnd; 4184 return base; 4185 } 4186 } 4187 4188 // we hadn't found one between allocation ranges. this is ok. 4189 // see if there's a gap after the last one 4190 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4191 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4192 + args->virtual_allocated_range[lastEntryIndex].size; 4193 addr_t base = alignment > 0 4194 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4195 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4196 args->virtual_allocated_range[lastEntryIndex].size 4197 += base + size - lastRangeEnd; 4198 return base; 4199 } 4200 4201 // see if there's a gap before the first one 4202 addr_t rangeStart = args->virtual_allocated_range[0].start; 4203 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4204 base = rangeStart - size; 4205 if (alignment > 0) 4206 base = ROUNDDOWN(base, alignment); 4207 4208 if (base >= KERNEL_BASE) { 4209 args->virtual_allocated_range[0].start = base; 4210 args->virtual_allocated_range[0].size += rangeStart - base; 4211 return base; 4212 } 4213 } 4214 4215 return 0; 4216 } 4217 4218 4219 static bool 4220 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4221 { 4222 // TODO: horrible brute-force method of determining if the page can be 4223 // allocated 4224 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4225 if (address >= args->physical_memory_range[i].start 4226 && address < args->physical_memory_range[i].start 4227 + args->physical_memory_range[i].size) 4228 return true; 4229 } 4230 return false; 4231 } 4232 4233 4234 page_num_t 4235 vm_allocate_early_physical_page(kernel_args* args) 4236 { 4237 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4238 phys_addr_t nextPage; 4239 4240 nextPage = args->physical_allocated_range[i].start 4241 + args->physical_allocated_range[i].size; 4242 // see if the page after the next allocated paddr run can be allocated 4243 if (i + 1 < args->num_physical_allocated_ranges 4244 && args->physical_allocated_range[i + 1].size != 0) { 4245 // see if the next page will collide with the next allocated range 4246 if (nextPage >= args->physical_allocated_range[i+1].start) 4247 continue; 4248 } 4249 // see if the next physical page fits in the memory block 4250 if (is_page_in_physical_memory_range(args, nextPage)) { 4251 // we got one! 4252 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4253 return nextPage / B_PAGE_SIZE; 4254 } 4255 } 4256 4257 // Expanding upwards didn't work, try going downwards. 4258 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4259 phys_addr_t nextPage; 4260 4261 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4262 // see if the page after the prev allocated paddr run can be allocated 4263 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4264 // see if the next page will collide with the next allocated range 4265 if (nextPage < args->physical_allocated_range[i-1].start 4266 + args->physical_allocated_range[i-1].size) 4267 continue; 4268 } 4269 // see if the next physical page fits in the memory block 4270 if (is_page_in_physical_memory_range(args, nextPage)) { 4271 // we got one! 4272 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4273 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4274 return nextPage / B_PAGE_SIZE; 4275 } 4276 } 4277 4278 return 0; 4279 // could not allocate a block 4280 } 4281 4282 4283 /*! This one uses the kernel_args' physical and virtual memory ranges to 4284 allocate some pages before the VM is completely up. 4285 */ 4286 addr_t 4287 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4288 uint32 attributes, addr_t alignment) 4289 { 4290 if (physicalSize > virtualSize) 4291 physicalSize = virtualSize; 4292 4293 // find the vaddr to allocate at 4294 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4295 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4296 if (virtualBase == 0) { 4297 panic("vm_allocate_early: could not allocate virtual address\n"); 4298 return 0; 4299 } 4300 4301 // map the pages 4302 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4303 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4304 if (physicalAddress == 0) 4305 panic("error allocating early page!\n"); 4306 4307 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4308 4309 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4310 physicalAddress * B_PAGE_SIZE, attributes, 4311 &vm_allocate_early_physical_page); 4312 } 4313 4314 return virtualBase; 4315 } 4316 4317 4318 /*! The main entrance point to initialize the VM. */ 4319 status_t 4320 vm_init(kernel_args* args) 4321 { 4322 struct preloaded_image* image; 4323 void* address; 4324 status_t err = 0; 4325 uint32 i; 4326 4327 TRACE(("vm_init: entry\n")); 4328 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4329 err = arch_vm_init(args); 4330 4331 // initialize some globals 4332 vm_page_init_num_pages(args); 4333 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4334 4335 slab_init(args); 4336 4337 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4338 off_t heapSize = INITIAL_HEAP_SIZE; 4339 // try to accomodate low memory systems 4340 while (heapSize > sAvailableMemory / 8) 4341 heapSize /= 2; 4342 if (heapSize < 1024 * 1024) 4343 panic("vm_init: go buy some RAM please."); 4344 4345 // map in the new heap and initialize it 4346 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4347 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4348 TRACE(("heap at 0x%lx\n", heapBase)); 4349 heap_init(heapBase, heapSize); 4350 #endif 4351 4352 // initialize the free page list and physical page mapper 4353 vm_page_init(args); 4354 4355 // initialize the cache allocators 4356 vm_cache_init(args); 4357 4358 { 4359 status_t error = VMAreas::Init(); 4360 if (error != B_OK) 4361 panic("vm_init: error initializing areas map\n"); 4362 } 4363 4364 VMAddressSpace::Init(); 4365 reserve_boot_loader_ranges(args); 4366 4367 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4368 heap_init_post_area(); 4369 #endif 4370 4371 // Do any further initialization that the architecture dependant layers may 4372 // need now 4373 arch_vm_translation_map_init_post_area(args); 4374 arch_vm_init_post_area(args); 4375 vm_page_init_post_area(args); 4376 slab_init_post_area(); 4377 4378 // allocate areas to represent stuff that already exists 4379 4380 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4381 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4382 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4383 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4384 #endif 4385 4386 allocate_kernel_args(args); 4387 4388 create_preloaded_image_areas(args->kernel_image); 4389 4390 // allocate areas for preloaded images 4391 for (image = args->preloaded_images; image != NULL; image = image->next) 4392 create_preloaded_image_areas(image); 4393 4394 // allocate kernel stacks 4395 for (i = 0; i < args->num_cpus; i++) { 4396 char name[64]; 4397 4398 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4399 address = (void*)args->cpu_kstack[i].start; 4400 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4401 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4402 } 4403 4404 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4405 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4406 4407 #if PARANOID_KERNEL_MALLOC 4408 vm_block_address_range("uninitialized heap memory", 4409 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4410 #endif 4411 #if PARANOID_KERNEL_FREE 4412 vm_block_address_range("freed heap memory", 4413 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4414 #endif 4415 4416 // create the object cache for the page mappings 4417 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4418 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4419 NULL, NULL); 4420 if (gPageMappingsObjectCache == NULL) 4421 panic("failed to create page mappings object cache"); 4422 4423 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4424 4425 #if DEBUG_CACHE_LIST 4426 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4427 virtual_address_restrictions virtualRestrictions = {}; 4428 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4429 physical_address_restrictions physicalRestrictions = {}; 4430 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4431 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4432 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4433 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4434 &physicalRestrictions, (void**)&sCacheInfoTable); 4435 } 4436 #endif // DEBUG_CACHE_LIST 4437 4438 // add some debugger commands 4439 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4440 add_debugger_command("area", &dump_area, 4441 "Dump info about a particular area"); 4442 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4443 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4444 #if DEBUG_CACHE_LIST 4445 if (sCacheInfoTable != NULL) { 4446 add_debugger_command_etc("caches", &dump_caches, 4447 "List all VMCache trees", 4448 "[ \"-c\" ]\n" 4449 "All cache trees are listed sorted in decreasing order by number " 4450 "of\n" 4451 "used pages or, if \"-c\" is specified, by size of committed " 4452 "memory.\n", 4453 0); 4454 } 4455 #endif 4456 add_debugger_command("avail", &dump_available_memory, 4457 "Dump available memory"); 4458 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4459 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4460 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4461 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4462 add_debugger_command("string", &display_mem, "dump strings"); 4463 4464 add_debugger_command_etc("mapping", &dump_mapping_info, 4465 "Print address mapping information", 4466 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4467 "Prints low-level page mapping information for a given address. If\n" 4468 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4469 "address that is looked up in the translation map of the current\n" 4470 "team, respectively the team specified by thread ID <thread ID>. If\n" 4471 "\"-r\" is specified, <address> is a physical address that is\n" 4472 "searched in the translation map of all teams, respectively the team\n" 4473 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4474 "<address> is the address of a vm_page structure. The behavior is\n" 4475 "equivalent to specifying \"-r\" with the physical address of that\n" 4476 "page.\n", 4477 0); 4478 4479 TRACE(("vm_init: exit\n")); 4480 4481 vm_cache_init_post_heap(); 4482 4483 return err; 4484 } 4485 4486 4487 status_t 4488 vm_init_post_sem(kernel_args* args) 4489 { 4490 // This frees all unused boot loader resources and makes its space available 4491 // again 4492 arch_vm_init_end(args); 4493 unreserve_boot_loader_ranges(args); 4494 4495 // fill in all of the semaphores that were not allocated before 4496 // since we're still single threaded and only the kernel address space 4497 // exists, it isn't that hard to find all of the ones we need to create 4498 4499 arch_vm_translation_map_init_post_sem(args); 4500 4501 slab_init_post_sem(); 4502 4503 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4504 heap_init_post_sem(); 4505 #endif 4506 4507 return B_OK; 4508 } 4509 4510 4511 status_t 4512 vm_init_post_thread(kernel_args* args) 4513 { 4514 vm_page_init_post_thread(args); 4515 slab_init_post_thread(); 4516 return heap_init_post_thread(); 4517 } 4518 4519 4520 status_t 4521 vm_init_post_modules(kernel_args* args) 4522 { 4523 return arch_vm_init_post_modules(args); 4524 } 4525 4526 4527 void 4528 permit_page_faults(void) 4529 { 4530 Thread* thread = thread_get_current_thread(); 4531 if (thread != NULL) 4532 atomic_add(&thread->page_faults_allowed, 1); 4533 } 4534 4535 4536 void 4537 forbid_page_faults(void) 4538 { 4539 Thread* thread = thread_get_current_thread(); 4540 if (thread != NULL) 4541 atomic_add(&thread->page_faults_allowed, -1); 4542 } 4543 4544 4545 status_t 4546 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4547 bool isUser, addr_t* newIP) 4548 { 4549 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4550 faultAddress)); 4551 4552 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4553 4554 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4555 VMAddressSpace* addressSpace = NULL; 4556 4557 status_t status = B_OK; 4558 *newIP = 0; 4559 atomic_add((int32*)&sPageFaults, 1); 4560 4561 if (IS_KERNEL_ADDRESS(pageAddress)) { 4562 addressSpace = VMAddressSpace::GetKernel(); 4563 } else if (IS_USER_ADDRESS(pageAddress)) { 4564 addressSpace = VMAddressSpace::GetCurrent(); 4565 if (addressSpace == NULL) { 4566 if (!isUser) { 4567 dprintf("vm_page_fault: kernel thread accessing invalid user " 4568 "memory!\n"); 4569 status = B_BAD_ADDRESS; 4570 TPF(PageFaultError(-1, 4571 VMPageFaultTracing 4572 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4573 } else { 4574 // XXX weird state. 4575 panic("vm_page_fault: non kernel thread accessing user memory " 4576 "that doesn't exist!\n"); 4577 status = B_BAD_ADDRESS; 4578 } 4579 } 4580 } else { 4581 // the hit was probably in the 64k DMZ between kernel and user space 4582 // this keeps a user space thread from passing a buffer that crosses 4583 // into kernel space 4584 status = B_BAD_ADDRESS; 4585 TPF(PageFaultError(-1, 4586 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4587 } 4588 4589 if (status == B_OK) { 4590 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4591 isUser, NULL); 4592 } 4593 4594 if (status < B_OK) { 4595 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4596 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4597 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4598 thread_get_current_thread_id()); 4599 if (!isUser) { 4600 Thread* thread = thread_get_current_thread(); 4601 if (thread != NULL && thread->fault_handler != 0) { 4602 // this will cause the arch dependant page fault handler to 4603 // modify the IP on the interrupt frame or whatever to return 4604 // to this address 4605 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4606 } else { 4607 // unhandled page fault in the kernel 4608 panic("vm_page_fault: unhandled page fault in kernel space at " 4609 "0x%lx, ip 0x%lx\n", address, faultAddress); 4610 } 4611 } else { 4612 Thread* thread = thread_get_current_thread(); 4613 4614 #ifdef TRACE_FAULTS 4615 VMArea* area = NULL; 4616 if (addressSpace != NULL) { 4617 addressSpace->ReadLock(); 4618 area = addressSpace->LookupArea(faultAddress); 4619 } 4620 4621 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4622 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4623 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4624 thread->team->Name(), thread->team->id, 4625 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4626 faultAddress, area ? area->name : "???", faultAddress - (area ? 4627 area->Base() : 0x0)); 4628 4629 if (addressSpace != NULL) 4630 addressSpace->ReadUnlock(); 4631 #endif 4632 4633 // If the thread has a signal handler for SIGSEGV, we simply 4634 // send it the signal. Otherwise we notify the user debugger 4635 // first. 4636 struct sigaction action; 4637 if ((sigaction(SIGSEGV, NULL, &action) == 0 4638 && action.sa_handler != SIG_DFL 4639 && action.sa_handler != SIG_IGN) 4640 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4641 SIGSEGV)) { 4642 Signal signal(SIGSEGV, 4643 status == B_PERMISSION_DENIED 4644 ? SEGV_ACCERR : SEGV_MAPERR, 4645 EFAULT, thread->team->id); 4646 signal.SetAddress((void*)address); 4647 send_signal_to_thread(thread, signal, 0); 4648 } 4649 } 4650 } 4651 4652 if (addressSpace != NULL) 4653 addressSpace->Put(); 4654 4655 return B_HANDLED_INTERRUPT; 4656 } 4657 4658 4659 struct PageFaultContext { 4660 AddressSpaceReadLocker addressSpaceLocker; 4661 VMCacheChainLocker cacheChainLocker; 4662 4663 VMTranslationMap* map; 4664 VMCache* topCache; 4665 off_t cacheOffset; 4666 vm_page_reservation reservation; 4667 bool isWrite; 4668 4669 // return values 4670 vm_page* page; 4671 bool restart; 4672 bool pageAllocated; 4673 4674 4675 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4676 : 4677 addressSpaceLocker(addressSpace, true), 4678 map(addressSpace->TranslationMap()), 4679 isWrite(isWrite) 4680 { 4681 } 4682 4683 ~PageFaultContext() 4684 { 4685 UnlockAll(); 4686 vm_page_unreserve_pages(&reservation); 4687 } 4688 4689 void Prepare(VMCache* topCache, off_t cacheOffset) 4690 { 4691 this->topCache = topCache; 4692 this->cacheOffset = cacheOffset; 4693 page = NULL; 4694 restart = false; 4695 pageAllocated = false; 4696 4697 cacheChainLocker.SetTo(topCache); 4698 } 4699 4700 void UnlockAll(VMCache* exceptCache = NULL) 4701 { 4702 topCache = NULL; 4703 addressSpaceLocker.Unlock(); 4704 cacheChainLocker.Unlock(exceptCache); 4705 } 4706 }; 4707 4708 4709 /*! Gets the page that should be mapped into the area. 4710 Returns an error code other than \c B_OK, if the page couldn't be found or 4711 paged in. The locking state of the address space and the caches is undefined 4712 in that case. 4713 Returns \c B_OK with \c context.restart set to \c true, if the functions 4714 had to unlock the address space and all caches and is supposed to be called 4715 again. 4716 Returns \c B_OK with \c context.restart set to \c false, if the page was 4717 found. It is returned in \c context.page. The address space will still be 4718 locked as well as all caches starting from the top cache to at least the 4719 cache the page lives in. 4720 */ 4721 static status_t 4722 fault_get_page(PageFaultContext& context) 4723 { 4724 VMCache* cache = context.topCache; 4725 VMCache* lastCache = NULL; 4726 vm_page* page = NULL; 4727 4728 while (cache != NULL) { 4729 // We already hold the lock of the cache at this point. 4730 4731 lastCache = cache; 4732 4733 page = cache->LookupPage(context.cacheOffset); 4734 if (page != NULL && page->busy) { 4735 // page must be busy -- wait for it to become unbusy 4736 context.UnlockAll(cache); 4737 cache->ReleaseRefLocked(); 4738 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4739 4740 // restart the whole process 4741 context.restart = true; 4742 return B_OK; 4743 } 4744 4745 if (page != NULL) 4746 break; 4747 4748 // The current cache does not contain the page we're looking for. 4749 4750 // see if the backing store has it 4751 if (cache->HasPage(context.cacheOffset)) { 4752 // insert a fresh page and mark it busy -- we're going to read it in 4753 page = vm_page_allocate_page(&context.reservation, 4754 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4755 cache->InsertPage(page, context.cacheOffset); 4756 4757 // We need to unlock all caches and the address space while reading 4758 // the page in. Keep a reference to the cache around. 4759 cache->AcquireRefLocked(); 4760 context.UnlockAll(); 4761 4762 // read the page in 4763 generic_io_vec vec; 4764 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4765 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4766 4767 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4768 B_PHYSICAL_IO_REQUEST, &bytesRead); 4769 4770 cache->Lock(); 4771 4772 if (status < B_OK) { 4773 // on error remove and free the page 4774 dprintf("reading page from cache %p returned: %s!\n", 4775 cache, strerror(status)); 4776 4777 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4778 cache->RemovePage(page); 4779 vm_page_set_state(page, PAGE_STATE_FREE); 4780 4781 cache->ReleaseRefAndUnlock(); 4782 return status; 4783 } 4784 4785 // mark the page unbusy again 4786 cache->MarkPageUnbusy(page); 4787 4788 DEBUG_PAGE_ACCESS_END(page); 4789 4790 // Since we needed to unlock everything temporarily, the area 4791 // situation might have changed. So we need to restart the whole 4792 // process. 4793 cache->ReleaseRefAndUnlock(); 4794 context.restart = true; 4795 return B_OK; 4796 } 4797 4798 cache = context.cacheChainLocker.LockSourceCache(); 4799 } 4800 4801 if (page == NULL) { 4802 // There was no adequate page, determine the cache for a clean one. 4803 // Read-only pages come in the deepest cache, only the top most cache 4804 // may have direct write access. 4805 cache = context.isWrite ? context.topCache : lastCache; 4806 4807 // allocate a clean page 4808 page = vm_page_allocate_page(&context.reservation, 4809 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4810 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4811 page->physical_page_number)); 4812 4813 // insert the new page into our cache 4814 cache->InsertPage(page, context.cacheOffset); 4815 context.pageAllocated = true; 4816 } else if (page->Cache() != context.topCache && context.isWrite) { 4817 // We have a page that has the data we want, but in the wrong cache 4818 // object so we need to copy it and stick it into the top cache. 4819 vm_page* sourcePage = page; 4820 4821 // TODO: If memory is low, it might be a good idea to steal the page 4822 // from our source cache -- if possible, that is. 4823 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4824 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4825 4826 // To not needlessly kill concurrency we unlock all caches but the top 4827 // one while copying the page. Lacking another mechanism to ensure that 4828 // the source page doesn't disappear, we mark it busy. 4829 sourcePage->busy = true; 4830 context.cacheChainLocker.UnlockKeepRefs(true); 4831 4832 // copy the page 4833 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4834 sourcePage->physical_page_number * B_PAGE_SIZE); 4835 4836 context.cacheChainLocker.RelockCaches(true); 4837 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4838 4839 // insert the new page into our cache 4840 context.topCache->InsertPage(page, context.cacheOffset); 4841 context.pageAllocated = true; 4842 } else 4843 DEBUG_PAGE_ACCESS_START(page); 4844 4845 context.page = page; 4846 return B_OK; 4847 } 4848 4849 4850 /*! Makes sure the address in the given address space is mapped. 4851 4852 \param addressSpace The address space. 4853 \param originalAddress The address. Doesn't need to be page aligned. 4854 \param isWrite If \c true the address shall be write-accessible. 4855 \param isUser If \c true the access is requested by a userland team. 4856 \param wirePage On success, if non \c NULL, the wired count of the page 4857 mapped at the given address is incremented and the page is returned 4858 via this parameter. 4859 \return \c B_OK on success, another error code otherwise. 4860 */ 4861 static status_t 4862 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4863 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4864 { 4865 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4866 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4867 originalAddress, isWrite, isUser)); 4868 4869 PageFaultContext context(addressSpace, isWrite); 4870 4871 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4872 status_t status = B_OK; 4873 4874 addressSpace->IncrementFaultCount(); 4875 4876 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4877 // the pages upfront makes sure we don't have any cache locked, so that the 4878 // page daemon/thief can do their job without problems. 4879 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4880 originalAddress); 4881 context.addressSpaceLocker.Unlock(); 4882 vm_page_reserve_pages(&context.reservation, reservePages, 4883 addressSpace == VMAddressSpace::Kernel() 4884 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4885 4886 while (true) { 4887 context.addressSpaceLocker.Lock(); 4888 4889 // get the area the fault was in 4890 VMArea* area = addressSpace->LookupArea(address); 4891 if (area == NULL) { 4892 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4893 "space\n", originalAddress); 4894 TPF(PageFaultError(-1, 4895 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4896 status = B_BAD_ADDRESS; 4897 break; 4898 } 4899 4900 // check permissions 4901 uint32 protection = get_area_page_protection(area, address); 4902 if (isUser && (protection & B_USER_PROTECTION) == 0 4903 && (area->protection & B_KERNEL_AREA) != 0) { 4904 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4905 area->id, (void*)originalAddress); 4906 TPF(PageFaultError(area->id, 4907 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4908 status = B_PERMISSION_DENIED; 4909 break; 4910 } 4911 if (isWrite && (protection 4912 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4913 dprintf("write access attempted on write-protected area 0x%" 4914 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4915 TPF(PageFaultError(area->id, 4916 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4917 status = B_PERMISSION_DENIED; 4918 break; 4919 } else if (isExecute && (protection 4920 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4921 dprintf("instruction fetch attempted on execute-protected area 0x%" 4922 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4923 TPF(PageFaultError(area->id, 4924 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4925 status = B_PERMISSION_DENIED; 4926 break; 4927 } else if (!isWrite && !isExecute && (protection 4928 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4929 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4930 " at %p\n", area->id, (void*)originalAddress); 4931 TPF(PageFaultError(area->id, 4932 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4933 status = B_PERMISSION_DENIED; 4934 break; 4935 } 4936 4937 // We have the area, it was a valid access, so let's try to resolve the 4938 // page fault now. 4939 // At first, the top most cache from the area is investigated. 4940 4941 context.Prepare(vm_area_get_locked_cache(area), 4942 address - area->Base() + area->cache_offset); 4943 4944 // See if this cache has a fault handler -- this will do all the work 4945 // for us. 4946 { 4947 // Note, since the page fault is resolved with interrupts enabled, 4948 // the fault handler could be called more than once for the same 4949 // reason -- the store must take this into account. 4950 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4951 if (status != B_BAD_HANDLER) 4952 break; 4953 } 4954 4955 // The top most cache has no fault handler, so let's see if the cache or 4956 // its sources already have the page we're searching for (we're going 4957 // from top to bottom). 4958 status = fault_get_page(context); 4959 if (status != B_OK) { 4960 TPF(PageFaultError(area->id, status)); 4961 break; 4962 } 4963 4964 if (context.restart) 4965 continue; 4966 4967 // All went fine, all there is left to do is to map the page into the 4968 // address space. 4969 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4970 context.page)); 4971 4972 // If the page doesn't reside in the area's cache, we need to make sure 4973 // it's mapped in read-only, so that we cannot overwrite someone else's 4974 // data (copy-on-write) 4975 uint32 newProtection = protection; 4976 if (context.page->Cache() != context.topCache && !isWrite) 4977 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4978 4979 bool unmapPage = false; 4980 bool mapPage = true; 4981 4982 // check whether there's already a page mapped at the address 4983 context.map->Lock(); 4984 4985 phys_addr_t physicalAddress; 4986 uint32 flags; 4987 vm_page* mappedPage = NULL; 4988 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4989 && (flags & PAGE_PRESENT) != 0 4990 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4991 != NULL) { 4992 // Yep there's already a page. If it's ours, we can simply adjust 4993 // its protection. Otherwise we have to unmap it. 4994 if (mappedPage == context.page) { 4995 context.map->ProtectPage(area, address, newProtection); 4996 // Note: We assume that ProtectPage() is atomic (i.e. 4997 // the page isn't temporarily unmapped), otherwise we'd have 4998 // to make sure it isn't wired. 4999 mapPage = false; 5000 } else 5001 unmapPage = true; 5002 } 5003 5004 context.map->Unlock(); 5005 5006 if (unmapPage) { 5007 // If the page is wired, we can't unmap it. Wait until it is unwired 5008 // again and restart. Note that the page cannot be wired for 5009 // writing, since it it isn't in the topmost cache. So we can safely 5010 // ignore ranges wired for writing (our own and other concurrent 5011 // wiring attempts in progress) and in fact have to do that to avoid 5012 // a deadlock. 5013 VMAreaUnwiredWaiter waiter; 5014 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 5015 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 5016 // unlock everything and wait 5017 if (context.pageAllocated) { 5018 // ... but since we allocated a page and inserted it into 5019 // the top cache, remove and free it first. Otherwise we'd 5020 // have a page from a lower cache mapped while an upper 5021 // cache has a page that would shadow it. 5022 context.topCache->RemovePage(context.page); 5023 vm_page_free_etc(context.topCache, context.page, 5024 &context.reservation); 5025 } else 5026 DEBUG_PAGE_ACCESS_END(context.page); 5027 5028 context.UnlockAll(); 5029 waiter.waitEntry.Wait(); 5030 continue; 5031 } 5032 5033 // Note: The mapped page is a page of a lower cache. We are 5034 // guaranteed to have that cached locked, our new page is a copy of 5035 // that page, and the page is not busy. The logic for that guarantee 5036 // is as follows: Since the page is mapped, it must live in the top 5037 // cache (ruled out above) or any of its lower caches, and there is 5038 // (was before the new page was inserted) no other page in any 5039 // cache between the top cache and the page's cache (otherwise that 5040 // would be mapped instead). That in turn means that our algorithm 5041 // must have found it and therefore it cannot be busy either. 5042 DEBUG_PAGE_ACCESS_START(mappedPage); 5043 unmap_page(area, address); 5044 DEBUG_PAGE_ACCESS_END(mappedPage); 5045 } 5046 5047 if (mapPage) { 5048 if (map_page(area, context.page, address, newProtection, 5049 &context.reservation) != B_OK) { 5050 // Mapping can only fail, when the page mapping object couldn't 5051 // be allocated. Save for the missing mapping everything is 5052 // fine, though. If this was a regular page fault, we'll simply 5053 // leave and probably fault again. To make sure we'll have more 5054 // luck then, we ensure that the minimum object reserve is 5055 // available. 5056 DEBUG_PAGE_ACCESS_END(context.page); 5057 5058 context.UnlockAll(); 5059 5060 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 5061 != B_OK) { 5062 // Apparently the situation is serious. Let's get ourselves 5063 // killed. 5064 status = B_NO_MEMORY; 5065 } else if (wirePage != NULL) { 5066 // The caller expects us to wire the page. Since 5067 // object_cache_reserve() succeeded, we should now be able 5068 // to allocate a mapping structure. Restart. 5069 continue; 5070 } 5071 5072 break; 5073 } 5074 } else if (context.page->State() == PAGE_STATE_INACTIVE) 5075 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 5076 5077 // also wire the page, if requested 5078 if (wirePage != NULL && status == B_OK) { 5079 increment_page_wired_count(context.page); 5080 *wirePage = context.page; 5081 } 5082 5083 DEBUG_PAGE_ACCESS_END(context.page); 5084 5085 break; 5086 } 5087 5088 return status; 5089 } 5090 5091 5092 status_t 5093 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 5094 { 5095 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 5096 } 5097 5098 status_t 5099 vm_put_physical_page(addr_t vaddr, void* handle) 5100 { 5101 return sPhysicalPageMapper->PutPage(vaddr, handle); 5102 } 5103 5104 5105 status_t 5106 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 5107 void** _handle) 5108 { 5109 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 5110 } 5111 5112 status_t 5113 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5114 { 5115 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 5116 } 5117 5118 5119 status_t 5120 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 5121 { 5122 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 5123 } 5124 5125 status_t 5126 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5127 { 5128 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 5129 } 5130 5131 5132 void 5133 vm_get_info(system_info* info) 5134 { 5135 swap_get_info(info); 5136 5137 MutexLocker locker(sAvailableMemoryLock); 5138 info->needed_memory = sNeededMemory; 5139 info->free_memory = sAvailableMemory; 5140 } 5141 5142 5143 uint32 5144 vm_num_page_faults(void) 5145 { 5146 return sPageFaults; 5147 } 5148 5149 5150 off_t 5151 vm_available_memory(void) 5152 { 5153 MutexLocker locker(sAvailableMemoryLock); 5154 return sAvailableMemory; 5155 } 5156 5157 5158 off_t 5159 vm_available_not_needed_memory(void) 5160 { 5161 MutexLocker locker(sAvailableMemoryLock); 5162 return sAvailableMemory - sNeededMemory; 5163 } 5164 5165 5166 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 5167 debugger. 5168 */ 5169 off_t 5170 vm_available_not_needed_memory_debug(void) 5171 { 5172 return sAvailableMemory - sNeededMemory; 5173 } 5174 5175 5176 size_t 5177 vm_kernel_address_space_left(void) 5178 { 5179 return VMAddressSpace::Kernel()->FreeSpace(); 5180 } 5181 5182 5183 void 5184 vm_unreserve_memory(size_t amount) 5185 { 5186 mutex_lock(&sAvailableMemoryLock); 5187 5188 sAvailableMemory += amount; 5189 5190 mutex_unlock(&sAvailableMemoryLock); 5191 } 5192 5193 5194 status_t 5195 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5196 { 5197 size_t reserve = kMemoryReserveForPriority[priority]; 5198 5199 MutexLocker locker(sAvailableMemoryLock); 5200 5201 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5202 5203 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5204 sAvailableMemory -= amount; 5205 return B_OK; 5206 } 5207 5208 if (amount >= (vm_page_num_pages() * B_PAGE_SIZE)) { 5209 // Do not wait for something that will never happen. 5210 return B_NO_MEMORY; 5211 } 5212 5213 if (timeout <= 0) 5214 return B_NO_MEMORY; 5215 5216 // turn timeout into an absolute timeout 5217 timeout += system_time(); 5218 5219 // loop until we've got the memory or the timeout occurs 5220 do { 5221 sNeededMemory += amount; 5222 5223 // call the low resource manager 5224 locker.Unlock(); 5225 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5226 B_ABSOLUTE_TIMEOUT, timeout); 5227 locker.Lock(); 5228 5229 sNeededMemory -= amount; 5230 5231 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5232 sAvailableMemory -= amount; 5233 return B_OK; 5234 } 5235 } while (timeout > system_time()); 5236 5237 return B_NO_MEMORY; 5238 } 5239 5240 5241 status_t 5242 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5243 { 5244 // NOTE: The caller is responsible for synchronizing calls to this function! 5245 5246 AddressSpaceReadLocker locker; 5247 VMArea* area; 5248 status_t status = locker.SetFromArea(id, area); 5249 if (status != B_OK) 5250 return status; 5251 5252 // nothing to do, if the type doesn't change 5253 uint32 oldType = area->MemoryType(); 5254 if (type == oldType) 5255 return B_OK; 5256 5257 // set the memory type of the area and the mapped pages 5258 VMTranslationMap* map = area->address_space->TranslationMap(); 5259 map->Lock(); 5260 area->SetMemoryType(type); 5261 map->ProtectArea(area, area->protection); 5262 map->Unlock(); 5263 5264 // set the physical memory type 5265 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5266 if (error != B_OK) { 5267 // reset the memory type of the area and the mapped pages 5268 map->Lock(); 5269 area->SetMemoryType(oldType); 5270 map->ProtectArea(area, area->protection); 5271 map->Unlock(); 5272 return error; 5273 } 5274 5275 return B_OK; 5276 5277 } 5278 5279 5280 /*! This function enforces some protection properties: 5281 - kernel areas must be W^X (after kernel startup) 5282 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5283 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5284 */ 5285 static void 5286 fix_protection(uint32* protection) 5287 { 5288 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5289 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5290 || (*protection & B_WRITE_AREA) != 0) 5291 && !gKernelStartup) 5292 panic("kernel areas cannot be both writable and executable!"); 5293 5294 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5295 if ((*protection & B_WRITE_AREA) != 0) 5296 *protection |= B_KERNEL_WRITE_AREA; 5297 if ((*protection & B_READ_AREA) != 0) 5298 *protection |= B_KERNEL_READ_AREA; 5299 } 5300 } 5301 5302 5303 static void 5304 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5305 { 5306 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5307 info->area = area->id; 5308 info->address = (void*)area->Base(); 5309 info->size = area->Size(); 5310 info->protection = area->protection; 5311 info->lock = area->wiring; 5312 info->team = area->address_space->ID(); 5313 info->copy_count = 0; 5314 info->in_count = 0; 5315 info->out_count = 0; 5316 // TODO: retrieve real values here! 5317 5318 VMCache* cache = vm_area_get_locked_cache(area); 5319 5320 // Note, this is a simplification; the cache could be larger than this area 5321 info->ram_size = cache->page_count * B_PAGE_SIZE; 5322 5323 vm_area_put_locked_cache(cache); 5324 } 5325 5326 5327 static status_t 5328 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5329 { 5330 // is newSize a multiple of B_PAGE_SIZE? 5331 if (newSize & (B_PAGE_SIZE - 1)) 5332 return B_BAD_VALUE; 5333 5334 // lock all affected address spaces and the cache 5335 VMArea* area; 5336 VMCache* cache; 5337 5338 MultiAddressSpaceLocker locker; 5339 AreaCacheLocker cacheLocker; 5340 5341 status_t status; 5342 size_t oldSize; 5343 bool anyKernelArea; 5344 bool restart; 5345 5346 do { 5347 anyKernelArea = false; 5348 restart = false; 5349 5350 locker.Unset(); 5351 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5352 if (status != B_OK) 5353 return status; 5354 cacheLocker.SetTo(cache, true); // already locked 5355 5356 // enforce restrictions 5357 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5358 || (area->protection & B_KERNEL_AREA) != 0)) { 5359 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5360 "resize kernel area %" B_PRId32 " (%s)\n", 5361 team_get_current_team_id(), areaID, area->name); 5362 return B_NOT_ALLOWED; 5363 } 5364 // TODO: Enforce all restrictions (team, etc.)! 5365 5366 oldSize = area->Size(); 5367 if (newSize == oldSize) 5368 return B_OK; 5369 5370 if (cache->type != CACHE_TYPE_RAM) 5371 return B_NOT_ALLOWED; 5372 5373 if (oldSize < newSize) { 5374 // We need to check if all areas of this cache can be resized. 5375 for (VMArea* current = cache->areas; current != NULL; 5376 current = current->cache_next) { 5377 if (!current->address_space->CanResizeArea(current, newSize)) 5378 return B_ERROR; 5379 anyKernelArea 5380 |= current->address_space == VMAddressSpace::Kernel(); 5381 } 5382 } else { 5383 // We're shrinking the areas, so we must make sure the affected 5384 // ranges are not wired. 5385 for (VMArea* current = cache->areas; current != NULL; 5386 current = current->cache_next) { 5387 anyKernelArea 5388 |= current->address_space == VMAddressSpace::Kernel(); 5389 5390 if (wait_if_area_range_is_wired(current, 5391 current->Base() + newSize, oldSize - newSize, &locker, 5392 &cacheLocker)) { 5393 restart = true; 5394 break; 5395 } 5396 } 5397 } 5398 } while (restart); 5399 5400 // Okay, looks good so far, so let's do it 5401 5402 int priority = kernel && anyKernelArea 5403 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5404 uint32 allocationFlags = kernel && anyKernelArea 5405 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5406 5407 if (oldSize < newSize) { 5408 // Growing the cache can fail, so we do it first. 5409 status = cache->Resize(cache->virtual_base + newSize, priority); 5410 if (status != B_OK) 5411 return status; 5412 } 5413 5414 for (VMArea* current = cache->areas; current != NULL; 5415 current = current->cache_next) { 5416 status = current->address_space->ResizeArea(current, newSize, 5417 allocationFlags); 5418 if (status != B_OK) 5419 break; 5420 5421 // We also need to unmap all pages beyond the new size, if the area has 5422 // shrunk 5423 if (newSize < oldSize) { 5424 VMCacheChainLocker cacheChainLocker(cache); 5425 cacheChainLocker.LockAllSourceCaches(); 5426 5427 unmap_pages(current, current->Base() + newSize, 5428 oldSize - newSize); 5429 5430 cacheChainLocker.Unlock(cache); 5431 } 5432 } 5433 5434 if (status == B_OK) { 5435 // Shrink or grow individual page protections if in use. 5436 if (area->page_protections != NULL) { 5437 size_t bytes = area_page_protections_size(newSize); 5438 uint8* newProtections 5439 = (uint8*)realloc(area->page_protections, bytes); 5440 if (newProtections == NULL) 5441 status = B_NO_MEMORY; 5442 else { 5443 area->page_protections = newProtections; 5444 5445 if (oldSize < newSize) { 5446 // init the additional page protections to that of the area 5447 uint32 offset = area_page_protections_size(oldSize); 5448 uint32 areaProtection = area->protection 5449 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5450 memset(area->page_protections + offset, 5451 areaProtection | (areaProtection << 4), bytes - offset); 5452 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5453 uint8& entry = area->page_protections[offset - 1]; 5454 entry = (entry & 0x0f) | (areaProtection << 4); 5455 } 5456 } 5457 } 5458 } 5459 } 5460 5461 // shrinking the cache can't fail, so we do it now 5462 if (status == B_OK && newSize < oldSize) 5463 status = cache->Resize(cache->virtual_base + newSize, priority); 5464 5465 if (status != B_OK) { 5466 // Something failed -- resize the areas back to their original size. 5467 // This can fail, too, in which case we're seriously screwed. 5468 for (VMArea* current = cache->areas; current != NULL; 5469 current = current->cache_next) { 5470 if (current->address_space->ResizeArea(current, oldSize, 5471 allocationFlags) != B_OK) { 5472 panic("vm_resize_area(): Failed and not being able to restore " 5473 "original state."); 5474 } 5475 } 5476 5477 cache->Resize(cache->virtual_base + oldSize, priority); 5478 } 5479 5480 // TODO: we must honour the lock restrictions of this area 5481 return status; 5482 } 5483 5484 5485 status_t 5486 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5487 { 5488 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5489 } 5490 5491 5492 status_t 5493 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5494 { 5495 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5496 } 5497 5498 5499 status_t 5500 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5501 bool user) 5502 { 5503 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5504 } 5505 5506 5507 void 5508 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5509 { 5510 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5511 } 5512 5513 5514 /*! Copies a range of memory directly from/to a page that might not be mapped 5515 at the moment. 5516 5517 For \a unsafeMemory the current mapping (if any is ignored). The function 5518 walks through the respective area's cache chain to find the physical page 5519 and copies from/to it directly. 5520 The memory range starting at \a unsafeMemory with a length of \a size bytes 5521 must not cross a page boundary. 5522 5523 \param teamID The team ID identifying the address space \a unsafeMemory is 5524 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5525 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5526 is passed, the address space of the thread returned by 5527 debug_get_debugged_thread() is used. 5528 \param unsafeMemory The start of the unsafe memory range to be copied 5529 from/to. 5530 \param buffer A safely accessible kernel buffer to be copied from/to. 5531 \param size The number of bytes to be copied. 5532 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5533 \a unsafeMemory, the other way around otherwise. 5534 */ 5535 status_t 5536 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5537 size_t size, bool copyToUnsafe) 5538 { 5539 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5540 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5541 return B_BAD_VALUE; 5542 } 5543 5544 // get the address space for the debugged thread 5545 VMAddressSpace* addressSpace; 5546 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5547 addressSpace = VMAddressSpace::Kernel(); 5548 } else if (teamID == B_CURRENT_TEAM) { 5549 Thread* thread = debug_get_debugged_thread(); 5550 if (thread == NULL || thread->team == NULL) 5551 return B_BAD_ADDRESS; 5552 5553 addressSpace = thread->team->address_space; 5554 } else 5555 addressSpace = VMAddressSpace::DebugGet(teamID); 5556 5557 if (addressSpace == NULL) 5558 return B_BAD_ADDRESS; 5559 5560 // get the area 5561 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5562 if (area == NULL) 5563 return B_BAD_ADDRESS; 5564 5565 // search the page 5566 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5567 + area->cache_offset; 5568 VMCache* cache = area->cache; 5569 vm_page* page = NULL; 5570 while (cache != NULL) { 5571 page = cache->DebugLookupPage(cacheOffset); 5572 if (page != NULL) 5573 break; 5574 5575 // Page not found in this cache -- if it is paged out, we must not try 5576 // to get it from lower caches. 5577 if (cache->DebugHasPage(cacheOffset)) 5578 break; 5579 5580 cache = cache->source; 5581 } 5582 5583 if (page == NULL) 5584 return B_UNSUPPORTED; 5585 5586 // copy from/to physical memory 5587 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5588 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5589 5590 if (copyToUnsafe) { 5591 if (page->Cache() != area->cache) 5592 return B_UNSUPPORTED; 5593 5594 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5595 } 5596 5597 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5598 } 5599 5600 5601 /** Validate that a memory range is either fully in kernel space, or fully in 5602 * userspace */ 5603 static inline bool 5604 validate_memory_range(const void* addr, size_t size) 5605 { 5606 addr_t address = (addr_t)addr; 5607 5608 // Check for overflows on all addresses. 5609 if ((address + size) < address) 5610 return false; 5611 5612 // Validate that the address range does not cross the kernel/user boundary. 5613 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5614 } 5615 5616 5617 // #pragma mark - kernel public API 5618 5619 5620 status_t 5621 user_memcpy(void* to, const void* from, size_t size) 5622 { 5623 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5624 return B_BAD_ADDRESS; 5625 5626 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5627 return B_BAD_ADDRESS; 5628 5629 return B_OK; 5630 } 5631 5632 5633 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5634 the string in \a to, NULL-terminating the result. 5635 5636 \param to Pointer to the destination C-string. 5637 \param from Pointer to the source C-string. 5638 \param size Size in bytes of the string buffer pointed to by \a to. 5639 5640 \return strlen(\a from). 5641 */ 5642 ssize_t 5643 user_strlcpy(char* to, const char* from, size_t size) 5644 { 5645 if (to == NULL && size != 0) 5646 return B_BAD_VALUE; 5647 if (from == NULL) 5648 return B_BAD_ADDRESS; 5649 5650 // Protect the source address from overflows. 5651 size_t maxSize = size; 5652 if ((addr_t)from + maxSize < (addr_t)from) 5653 maxSize -= (addr_t)from + maxSize; 5654 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5655 maxSize = USER_TOP - (addr_t)from; 5656 5657 if (!validate_memory_range(to, maxSize)) 5658 return B_BAD_ADDRESS; 5659 5660 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5661 if (result < 0) 5662 return result; 5663 5664 // If we hit the address overflow boundary, fail. 5665 if ((size_t)result >= maxSize && maxSize < size) 5666 return B_BAD_ADDRESS; 5667 5668 return result; 5669 } 5670 5671 5672 status_t 5673 user_memset(void* s, char c, size_t count) 5674 { 5675 if (!validate_memory_range(s, count)) 5676 return B_BAD_ADDRESS; 5677 5678 if (arch_cpu_user_memset(s, c, count) < B_OK) 5679 return B_BAD_ADDRESS; 5680 5681 return B_OK; 5682 } 5683 5684 5685 /*! Wires a single page at the given address. 5686 5687 \param team The team whose address space the address belongs to. Supports 5688 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5689 parameter is ignored. 5690 \param address address The virtual address to wire down. Does not need to 5691 be page aligned. 5692 \param writable If \c true the page shall be writable. 5693 \param info On success the info is filled in, among other things 5694 containing the physical address the given virtual one translates to. 5695 \return \c B_OK, when the page could be wired, another error code otherwise. 5696 */ 5697 status_t 5698 vm_wire_page(team_id team, addr_t address, bool writable, 5699 VMPageWiringInfo* info) 5700 { 5701 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5702 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5703 5704 // compute the page protection that is required 5705 bool isUser = IS_USER_ADDRESS(address); 5706 uint32 requiredProtection = PAGE_PRESENT 5707 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5708 if (writable) 5709 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5710 5711 // get and read lock the address space 5712 VMAddressSpace* addressSpace = NULL; 5713 if (isUser) { 5714 if (team == B_CURRENT_TEAM) 5715 addressSpace = VMAddressSpace::GetCurrent(); 5716 else 5717 addressSpace = VMAddressSpace::Get(team); 5718 } else 5719 addressSpace = VMAddressSpace::GetKernel(); 5720 if (addressSpace == NULL) 5721 return B_ERROR; 5722 5723 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5724 5725 VMTranslationMap* map = addressSpace->TranslationMap(); 5726 status_t error = B_OK; 5727 5728 // get the area 5729 VMArea* area = addressSpace->LookupArea(pageAddress); 5730 if (area == NULL) { 5731 addressSpace->Put(); 5732 return B_BAD_ADDRESS; 5733 } 5734 5735 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5736 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5737 5738 // mark the area range wired 5739 area->Wire(&info->range); 5740 5741 // Lock the area's cache chain and the translation map. Needed to look 5742 // up the page and play with its wired count. 5743 cacheChainLocker.LockAllSourceCaches(); 5744 map->Lock(); 5745 5746 phys_addr_t physicalAddress; 5747 uint32 flags; 5748 vm_page* page; 5749 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5750 && (flags & requiredProtection) == requiredProtection 5751 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5752 != NULL) { 5753 // Already mapped with the correct permissions -- just increment 5754 // the page's wired count. 5755 increment_page_wired_count(page); 5756 5757 map->Unlock(); 5758 cacheChainLocker.Unlock(); 5759 addressSpaceLocker.Unlock(); 5760 } else { 5761 // Let vm_soft_fault() map the page for us, if possible. We need 5762 // to fully unlock to avoid deadlocks. Since we have already 5763 // wired the area itself, nothing disturbing will happen with it 5764 // in the meantime. 5765 map->Unlock(); 5766 cacheChainLocker.Unlock(); 5767 addressSpaceLocker.Unlock(); 5768 5769 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5770 isUser, &page); 5771 5772 if (error != B_OK) { 5773 // The page could not be mapped -- clean up. 5774 VMCache* cache = vm_area_get_locked_cache(area); 5775 area->Unwire(&info->range); 5776 cache->ReleaseRefAndUnlock(); 5777 addressSpace->Put(); 5778 return error; 5779 } 5780 } 5781 5782 info->physicalAddress 5783 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5784 + address % B_PAGE_SIZE; 5785 info->page = page; 5786 5787 return B_OK; 5788 } 5789 5790 5791 /*! Unwires a single page previously wired via vm_wire_page(). 5792 5793 \param info The same object passed to vm_wire_page() before. 5794 */ 5795 void 5796 vm_unwire_page(VMPageWiringInfo* info) 5797 { 5798 // lock the address space 5799 VMArea* area = info->range.area; 5800 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5801 // takes over our reference 5802 5803 // lock the top cache 5804 VMCache* cache = vm_area_get_locked_cache(area); 5805 VMCacheChainLocker cacheChainLocker(cache); 5806 5807 if (info->page->Cache() != cache) { 5808 // The page is not in the top cache, so we lock the whole cache chain 5809 // before touching the page's wired count. 5810 cacheChainLocker.LockAllSourceCaches(); 5811 } 5812 5813 decrement_page_wired_count(info->page); 5814 5815 // remove the wired range from the range 5816 area->Unwire(&info->range); 5817 5818 cacheChainLocker.Unlock(); 5819 } 5820 5821 5822 /*! Wires down the given address range in the specified team's address space. 5823 5824 If successful the function 5825 - acquires a reference to the specified team's address space, 5826 - adds respective wired ranges to all areas that intersect with the given 5827 address range, 5828 - makes sure all pages in the given address range are mapped with the 5829 requested access permissions and increments their wired count. 5830 5831 It fails, when \a team doesn't specify a valid address space, when any part 5832 of the specified address range is not covered by areas, when the concerned 5833 areas don't allow mapping with the requested permissions, or when mapping 5834 failed for another reason. 5835 5836 When successful the call must be balanced by a unlock_memory_etc() call with 5837 the exact same parameters. 5838 5839 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5840 supported. 5841 \param address The start of the address range to be wired. 5842 \param numBytes The size of the address range to be wired. 5843 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5844 requests that the range must be wired writable ("read from device 5845 into memory"). 5846 \return \c B_OK on success, another error code otherwise. 5847 */ 5848 status_t 5849 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5850 { 5851 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5852 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5853 5854 // compute the page protection that is required 5855 bool isUser = IS_USER_ADDRESS(address); 5856 bool writable = (flags & B_READ_DEVICE) == 0; 5857 uint32 requiredProtection = PAGE_PRESENT 5858 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5859 if (writable) 5860 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5861 5862 uint32 mallocFlags = isUser 5863 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5864 5865 // get and read lock the address space 5866 VMAddressSpace* addressSpace = NULL; 5867 if (isUser) { 5868 if (team == B_CURRENT_TEAM) 5869 addressSpace = VMAddressSpace::GetCurrent(); 5870 else 5871 addressSpace = VMAddressSpace::Get(team); 5872 } else 5873 addressSpace = VMAddressSpace::GetKernel(); 5874 if (addressSpace == NULL) 5875 return B_ERROR; 5876 5877 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5878 // We get a new address space reference here. The one we got above will 5879 // be freed by unlock_memory_etc(). 5880 5881 VMTranslationMap* map = addressSpace->TranslationMap(); 5882 status_t error = B_OK; 5883 5884 // iterate through all concerned areas 5885 addr_t nextAddress = lockBaseAddress; 5886 while (nextAddress != lockEndAddress) { 5887 // get the next area 5888 VMArea* area = addressSpace->LookupArea(nextAddress); 5889 if (area == NULL) { 5890 error = B_BAD_ADDRESS; 5891 break; 5892 } 5893 5894 addr_t areaStart = nextAddress; 5895 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5896 5897 // allocate the wired range (do that before locking the cache to avoid 5898 // deadlocks) 5899 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5900 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5901 if (range == NULL) { 5902 error = B_NO_MEMORY; 5903 break; 5904 } 5905 5906 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5907 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5908 5909 // mark the area range wired 5910 area->Wire(range); 5911 5912 // Depending on the area cache type and the wiring, we may not need to 5913 // look at the individual pages. 5914 if (area->cache_type == CACHE_TYPE_NULL 5915 || area->cache_type == CACHE_TYPE_DEVICE 5916 || area->wiring == B_FULL_LOCK 5917 || area->wiring == B_CONTIGUOUS) { 5918 nextAddress = areaEnd; 5919 continue; 5920 } 5921 5922 // Lock the area's cache chain and the translation map. Needed to look 5923 // up pages and play with their wired count. 5924 cacheChainLocker.LockAllSourceCaches(); 5925 map->Lock(); 5926 5927 // iterate through the pages and wire them 5928 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5929 phys_addr_t physicalAddress; 5930 uint32 flags; 5931 5932 vm_page* page; 5933 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5934 && (flags & requiredProtection) == requiredProtection 5935 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5936 != NULL) { 5937 // Already mapped with the correct permissions -- just increment 5938 // the page's wired count. 5939 increment_page_wired_count(page); 5940 } else { 5941 // Let vm_soft_fault() map the page for us, if possible. We need 5942 // to fully unlock to avoid deadlocks. Since we have already 5943 // wired the area itself, nothing disturbing will happen with it 5944 // in the meantime. 5945 map->Unlock(); 5946 cacheChainLocker.Unlock(); 5947 addressSpaceLocker.Unlock(); 5948 5949 error = vm_soft_fault(addressSpace, nextAddress, writable, 5950 false, isUser, &page); 5951 5952 addressSpaceLocker.Lock(); 5953 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5954 cacheChainLocker.LockAllSourceCaches(); 5955 map->Lock(); 5956 } 5957 5958 if (error != B_OK) 5959 break; 5960 } 5961 5962 map->Unlock(); 5963 5964 if (error == B_OK) { 5965 cacheChainLocker.Unlock(); 5966 } else { 5967 // An error occurred, so abort right here. If the current address 5968 // is the first in this area, unwire the area, since we won't get 5969 // to it when reverting what we've done so far. 5970 if (nextAddress == areaStart) { 5971 area->Unwire(range); 5972 cacheChainLocker.Unlock(); 5973 range->~VMAreaWiredRange(); 5974 free_etc(range, mallocFlags); 5975 } else 5976 cacheChainLocker.Unlock(); 5977 5978 break; 5979 } 5980 } 5981 5982 if (error != B_OK) { 5983 // An error occurred, so unwire all that we've already wired. Note that 5984 // even if not a single page was wired, unlock_memory_etc() is called 5985 // to put the address space reference. 5986 addressSpaceLocker.Unlock(); 5987 unlock_memory_etc(team, (void*)lockBaseAddress, 5988 nextAddress - lockBaseAddress, flags); 5989 } 5990 5991 return error; 5992 } 5993 5994 5995 status_t 5996 lock_memory(void* address, size_t numBytes, uint32 flags) 5997 { 5998 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5999 } 6000 6001 6002 /*! Unwires an address range previously wired with lock_memory_etc(). 6003 6004 Note that a call to this function must balance a previous lock_memory_etc() 6005 call with exactly the same parameters. 6006 */ 6007 status_t 6008 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 6009 { 6010 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 6011 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 6012 6013 // compute the page protection that is required 6014 bool isUser = IS_USER_ADDRESS(address); 6015 bool writable = (flags & B_READ_DEVICE) == 0; 6016 uint32 requiredProtection = PAGE_PRESENT 6017 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 6018 if (writable) 6019 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 6020 6021 uint32 mallocFlags = isUser 6022 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 6023 6024 // get and read lock the address space 6025 VMAddressSpace* addressSpace = NULL; 6026 if (isUser) { 6027 if (team == B_CURRENT_TEAM) 6028 addressSpace = VMAddressSpace::GetCurrent(); 6029 else 6030 addressSpace = VMAddressSpace::Get(team); 6031 } else 6032 addressSpace = VMAddressSpace::GetKernel(); 6033 if (addressSpace == NULL) 6034 return B_ERROR; 6035 6036 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 6037 // Take over the address space reference. We don't unlock until we're 6038 // done. 6039 6040 VMTranslationMap* map = addressSpace->TranslationMap(); 6041 status_t error = B_OK; 6042 6043 // iterate through all concerned areas 6044 addr_t nextAddress = lockBaseAddress; 6045 while (nextAddress != lockEndAddress) { 6046 // get the next area 6047 VMArea* area = addressSpace->LookupArea(nextAddress); 6048 if (area == NULL) { 6049 error = B_BAD_ADDRESS; 6050 break; 6051 } 6052 6053 addr_t areaStart = nextAddress; 6054 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 6055 6056 // Lock the area's top cache. This is a requirement for 6057 // VMArea::Unwire(). 6058 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6059 6060 // Depending on the area cache type and the wiring, we may not need to 6061 // look at the individual pages. 6062 if (area->cache_type == CACHE_TYPE_NULL 6063 || area->cache_type == CACHE_TYPE_DEVICE 6064 || area->wiring == B_FULL_LOCK 6065 || area->wiring == B_CONTIGUOUS) { 6066 // unwire the range (to avoid deadlocks we delete the range after 6067 // unlocking the cache) 6068 nextAddress = areaEnd; 6069 VMAreaWiredRange* range = area->Unwire(areaStart, 6070 areaEnd - areaStart, writable); 6071 cacheChainLocker.Unlock(); 6072 if (range != NULL) { 6073 range->~VMAreaWiredRange(); 6074 free_etc(range, mallocFlags); 6075 } 6076 continue; 6077 } 6078 6079 // Lock the area's cache chain and the translation map. Needed to look 6080 // up pages and play with their wired count. 6081 cacheChainLocker.LockAllSourceCaches(); 6082 map->Lock(); 6083 6084 // iterate through the pages and unwire them 6085 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 6086 phys_addr_t physicalAddress; 6087 uint32 flags; 6088 6089 vm_page* page; 6090 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 6091 && (flags & PAGE_PRESENT) != 0 6092 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 6093 != NULL) { 6094 // Already mapped with the correct permissions -- just increment 6095 // the page's wired count. 6096 decrement_page_wired_count(page); 6097 } else { 6098 panic("unlock_memory_etc(): Failed to unwire page: address " 6099 "space %p, address: %#" B_PRIxADDR, addressSpace, 6100 nextAddress); 6101 error = B_BAD_VALUE; 6102 break; 6103 } 6104 } 6105 6106 map->Unlock(); 6107 6108 // All pages are unwired. Remove the area's wired range as well (to 6109 // avoid deadlocks we delete the range after unlocking the cache). 6110 VMAreaWiredRange* range = area->Unwire(areaStart, 6111 areaEnd - areaStart, writable); 6112 6113 cacheChainLocker.Unlock(); 6114 6115 if (range != NULL) { 6116 range->~VMAreaWiredRange(); 6117 free_etc(range, mallocFlags); 6118 } 6119 6120 if (error != B_OK) 6121 break; 6122 } 6123 6124 // get rid of the address space reference lock_memory_etc() acquired 6125 addressSpace->Put(); 6126 6127 return error; 6128 } 6129 6130 6131 status_t 6132 unlock_memory(void* address, size_t numBytes, uint32 flags) 6133 { 6134 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 6135 } 6136 6137 6138 /*! Similar to get_memory_map(), but also allows to specify the address space 6139 for the memory in question and has a saner semantics. 6140 Returns \c B_OK when the complete range could be translated or 6141 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 6142 case the actual number of entries is written to \c *_numEntries. Any other 6143 error case indicates complete failure; \c *_numEntries will be set to \c 0 6144 in this case. 6145 */ 6146 status_t 6147 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 6148 physical_entry* table, uint32* _numEntries) 6149 { 6150 uint32 numEntries = *_numEntries; 6151 *_numEntries = 0; 6152 6153 VMAddressSpace* addressSpace; 6154 addr_t virtualAddress = (addr_t)address; 6155 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 6156 phys_addr_t physicalAddress; 6157 status_t status = B_OK; 6158 int32 index = -1; 6159 addr_t offset = 0; 6160 bool interrupts = are_interrupts_enabled(); 6161 6162 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6163 "entries)\n", team, address, numBytes, numEntries)); 6164 6165 if (numEntries == 0 || numBytes == 0) 6166 return B_BAD_VALUE; 6167 6168 // in which address space is the address to be found? 6169 if (IS_USER_ADDRESS(virtualAddress)) { 6170 if (team == B_CURRENT_TEAM) 6171 addressSpace = VMAddressSpace::GetCurrent(); 6172 else 6173 addressSpace = VMAddressSpace::Get(team); 6174 } else 6175 addressSpace = VMAddressSpace::GetKernel(); 6176 6177 if (addressSpace == NULL) 6178 return B_ERROR; 6179 6180 VMTranslationMap* map = addressSpace->TranslationMap(); 6181 6182 if (interrupts) 6183 map->Lock(); 6184 6185 while (offset < numBytes) { 6186 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6187 uint32 flags; 6188 6189 if (interrupts) { 6190 status = map->Query((addr_t)address + offset, &physicalAddress, 6191 &flags); 6192 } else { 6193 status = map->QueryInterrupt((addr_t)address + offset, 6194 &physicalAddress, &flags); 6195 } 6196 if (status < B_OK) 6197 break; 6198 if ((flags & PAGE_PRESENT) == 0) { 6199 panic("get_memory_map() called on unmapped memory!"); 6200 return B_BAD_ADDRESS; 6201 } 6202 6203 if (index < 0 && pageOffset > 0) { 6204 physicalAddress += pageOffset; 6205 if (bytes > B_PAGE_SIZE - pageOffset) 6206 bytes = B_PAGE_SIZE - pageOffset; 6207 } 6208 6209 // need to switch to the next physical_entry? 6210 if (index < 0 || table[index].address 6211 != physicalAddress - table[index].size) { 6212 if ((uint32)++index + 1 > numEntries) { 6213 // table to small 6214 break; 6215 } 6216 table[index].address = physicalAddress; 6217 table[index].size = bytes; 6218 } else { 6219 // page does fit in current entry 6220 table[index].size += bytes; 6221 } 6222 6223 offset += bytes; 6224 } 6225 6226 if (interrupts) 6227 map->Unlock(); 6228 6229 if (status != B_OK) 6230 return status; 6231 6232 if ((uint32)index + 1 > numEntries) { 6233 *_numEntries = index; 6234 return B_BUFFER_OVERFLOW; 6235 } 6236 6237 *_numEntries = index + 1; 6238 return B_OK; 6239 } 6240 6241 6242 /*! According to the BeBook, this function should always succeed. 6243 This is no longer the case. 6244 */ 6245 extern "C" int32 6246 __get_memory_map_haiku(const void* address, size_t numBytes, 6247 physical_entry* table, int32 numEntries) 6248 { 6249 uint32 entriesRead = numEntries; 6250 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6251 table, &entriesRead); 6252 if (error != B_OK) 6253 return error; 6254 6255 // close the entry list 6256 6257 // if it's only one entry, we will silently accept the missing ending 6258 if (numEntries == 1) 6259 return B_OK; 6260 6261 if (entriesRead + 1 > (uint32)numEntries) 6262 return B_BUFFER_OVERFLOW; 6263 6264 table[entriesRead].address = 0; 6265 table[entriesRead].size = 0; 6266 6267 return B_OK; 6268 } 6269 6270 6271 area_id 6272 area_for(void* address) 6273 { 6274 return vm_area_for((addr_t)address, true); 6275 } 6276 6277 6278 area_id 6279 find_area(const char* name) 6280 { 6281 return VMAreas::Find(name); 6282 } 6283 6284 6285 status_t 6286 _get_area_info(area_id id, area_info* info, size_t size) 6287 { 6288 if (size != sizeof(area_info) || info == NULL) 6289 return B_BAD_VALUE; 6290 6291 AddressSpaceReadLocker locker; 6292 VMArea* area; 6293 status_t status = locker.SetFromArea(id, area); 6294 if (status != B_OK) 6295 return status; 6296 6297 fill_area_info(area, info, size); 6298 return B_OK; 6299 } 6300 6301 6302 status_t 6303 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6304 { 6305 addr_t nextBase = *(addr_t*)cookie; 6306 6307 // we're already through the list 6308 if (nextBase == (addr_t)-1) 6309 return B_ENTRY_NOT_FOUND; 6310 6311 if (team == B_CURRENT_TEAM) 6312 team = team_get_current_team_id(); 6313 6314 AddressSpaceReadLocker locker(team); 6315 if (!locker.IsLocked()) 6316 return B_BAD_TEAM_ID; 6317 6318 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6319 if (area == NULL) { 6320 nextBase = (addr_t)-1; 6321 return B_ENTRY_NOT_FOUND; 6322 } 6323 6324 fill_area_info(area, info, size); 6325 *cookie = (ssize_t)(area->Base() + 1); 6326 6327 return B_OK; 6328 } 6329 6330 6331 status_t 6332 set_area_protection(area_id area, uint32 newProtection) 6333 { 6334 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6335 newProtection, true); 6336 } 6337 6338 6339 status_t 6340 resize_area(area_id areaID, size_t newSize) 6341 { 6342 return vm_resize_area(areaID, newSize, true); 6343 } 6344 6345 6346 /*! Transfers the specified area to a new team. The caller must be the owner 6347 of the area. 6348 */ 6349 area_id 6350 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6351 bool kernel) 6352 { 6353 area_info info; 6354 status_t status = get_area_info(id, &info); 6355 if (status != B_OK) 6356 return status; 6357 6358 if (info.team != thread_get_current_thread()->team->id) 6359 return B_PERMISSION_DENIED; 6360 6361 // We need to mark the area cloneable so the following operations work. 6362 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6363 if (status != B_OK) 6364 return status; 6365 6366 area_id clonedArea = vm_clone_area(target, info.name, _address, 6367 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6368 if (clonedArea < 0) 6369 return clonedArea; 6370 6371 status = vm_delete_area(info.team, id, kernel); 6372 if (status != B_OK) { 6373 vm_delete_area(target, clonedArea, kernel); 6374 return status; 6375 } 6376 6377 // Now we can reset the protection to whatever it was before. 6378 set_area_protection(clonedArea, info.protection); 6379 6380 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6381 6382 return clonedArea; 6383 } 6384 6385 6386 extern "C" area_id 6387 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6388 size_t numBytes, uint32 addressSpec, uint32 protection, 6389 void** _virtualAddress) 6390 { 6391 if (!arch_vm_supports_protection(protection)) 6392 return B_NOT_SUPPORTED; 6393 6394 fix_protection(&protection); 6395 6396 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6397 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6398 false); 6399 } 6400 6401 6402 area_id 6403 clone_area(const char* name, void** _address, uint32 addressSpec, 6404 uint32 protection, area_id source) 6405 { 6406 if ((protection & B_KERNEL_PROTECTION) == 0) 6407 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6408 6409 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6410 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6411 } 6412 6413 6414 area_id 6415 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6416 uint32 protection, uint32 flags, uint32 guardSize, 6417 const virtual_address_restrictions* virtualAddressRestrictions, 6418 const physical_address_restrictions* physicalAddressRestrictions, 6419 void** _address) 6420 { 6421 fix_protection(&protection); 6422 6423 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6424 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6425 true, _address); 6426 } 6427 6428 6429 extern "C" area_id 6430 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6431 size_t size, uint32 lock, uint32 protection) 6432 { 6433 fix_protection(&protection); 6434 6435 virtual_address_restrictions virtualRestrictions = {}; 6436 virtualRestrictions.address = *_address; 6437 virtualRestrictions.address_specification = addressSpec; 6438 physical_address_restrictions physicalRestrictions = {}; 6439 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6440 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6441 true, _address); 6442 } 6443 6444 6445 status_t 6446 delete_area(area_id area) 6447 { 6448 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6449 } 6450 6451 6452 // #pragma mark - Userland syscalls 6453 6454 6455 status_t 6456 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6457 addr_t size) 6458 { 6459 // filter out some unavailable values (for userland) 6460 switch (addressSpec) { 6461 case B_ANY_KERNEL_ADDRESS: 6462 case B_ANY_KERNEL_BLOCK_ADDRESS: 6463 return B_BAD_VALUE; 6464 } 6465 6466 addr_t address; 6467 6468 if (!IS_USER_ADDRESS(userAddress) 6469 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6470 return B_BAD_ADDRESS; 6471 6472 status_t status = vm_reserve_address_range( 6473 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6474 RESERVED_AVOID_BASE); 6475 if (status != B_OK) 6476 return status; 6477 6478 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6479 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6480 (void*)address, size); 6481 return B_BAD_ADDRESS; 6482 } 6483 6484 return B_OK; 6485 } 6486 6487 6488 status_t 6489 _user_unreserve_address_range(addr_t address, addr_t size) 6490 { 6491 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6492 (void*)address, size); 6493 } 6494 6495 6496 area_id 6497 _user_area_for(void* address) 6498 { 6499 return vm_area_for((addr_t)address, false); 6500 } 6501 6502 6503 area_id 6504 _user_find_area(const char* userName) 6505 { 6506 char name[B_OS_NAME_LENGTH]; 6507 6508 if (!IS_USER_ADDRESS(userName) 6509 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6510 return B_BAD_ADDRESS; 6511 6512 return find_area(name); 6513 } 6514 6515 6516 status_t 6517 _user_get_area_info(area_id area, area_info* userInfo) 6518 { 6519 if (!IS_USER_ADDRESS(userInfo)) 6520 return B_BAD_ADDRESS; 6521 6522 area_info info; 6523 status_t status = get_area_info(area, &info); 6524 if (status < B_OK) 6525 return status; 6526 6527 // TODO: do we want to prevent userland from seeing kernel protections? 6528 //info.protection &= B_USER_PROTECTION; 6529 6530 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6531 return B_BAD_ADDRESS; 6532 6533 return status; 6534 } 6535 6536 6537 status_t 6538 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6539 { 6540 ssize_t cookie; 6541 6542 if (!IS_USER_ADDRESS(userCookie) 6543 || !IS_USER_ADDRESS(userInfo) 6544 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6545 return B_BAD_ADDRESS; 6546 6547 area_info info; 6548 status_t status = _get_next_area_info(team, &cookie, &info, 6549 sizeof(area_info)); 6550 if (status != B_OK) 6551 return status; 6552 6553 //info.protection &= B_USER_PROTECTION; 6554 6555 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6556 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6557 return B_BAD_ADDRESS; 6558 6559 return status; 6560 } 6561 6562 6563 status_t 6564 _user_set_area_protection(area_id area, uint32 newProtection) 6565 { 6566 if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0) 6567 return B_BAD_VALUE; 6568 6569 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6570 newProtection, false); 6571 } 6572 6573 6574 status_t 6575 _user_resize_area(area_id area, size_t newSize) 6576 { 6577 // TODO: Since we restrict deleting of areas to those owned by the team, 6578 // we should also do that for resizing (check other functions, too). 6579 return vm_resize_area(area, newSize, false); 6580 } 6581 6582 6583 area_id 6584 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6585 team_id target) 6586 { 6587 // filter out some unavailable values (for userland) 6588 switch (addressSpec) { 6589 case B_ANY_KERNEL_ADDRESS: 6590 case B_ANY_KERNEL_BLOCK_ADDRESS: 6591 return B_BAD_VALUE; 6592 } 6593 6594 void* address; 6595 if (!IS_USER_ADDRESS(userAddress) 6596 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6597 return B_BAD_ADDRESS; 6598 6599 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6600 if (newArea < B_OK) 6601 return newArea; 6602 6603 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6604 return B_BAD_ADDRESS; 6605 6606 return newArea; 6607 } 6608 6609 6610 area_id 6611 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6612 uint32 protection, area_id sourceArea) 6613 { 6614 char name[B_OS_NAME_LENGTH]; 6615 void* address; 6616 6617 // filter out some unavailable values (for userland) 6618 switch (addressSpec) { 6619 case B_ANY_KERNEL_ADDRESS: 6620 case B_ANY_KERNEL_BLOCK_ADDRESS: 6621 return B_BAD_VALUE; 6622 } 6623 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6624 return B_BAD_VALUE; 6625 6626 if (!IS_USER_ADDRESS(userName) 6627 || !IS_USER_ADDRESS(userAddress) 6628 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6629 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6630 return B_BAD_ADDRESS; 6631 6632 fix_protection(&protection); 6633 6634 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6635 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6636 false); 6637 if (clonedArea < B_OK) 6638 return clonedArea; 6639 6640 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6641 delete_area(clonedArea); 6642 return B_BAD_ADDRESS; 6643 } 6644 6645 return clonedArea; 6646 } 6647 6648 6649 area_id 6650 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6651 size_t size, uint32 lock, uint32 protection) 6652 { 6653 char name[B_OS_NAME_LENGTH]; 6654 void* address; 6655 6656 // filter out some unavailable values (for userland) 6657 switch (addressSpec) { 6658 case B_ANY_KERNEL_ADDRESS: 6659 case B_ANY_KERNEL_BLOCK_ADDRESS: 6660 return B_BAD_VALUE; 6661 } 6662 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6663 return B_BAD_VALUE; 6664 6665 if (!IS_USER_ADDRESS(userName) 6666 || !IS_USER_ADDRESS(userAddress) 6667 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6668 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6669 return B_BAD_ADDRESS; 6670 6671 if (addressSpec == B_EXACT_ADDRESS 6672 && IS_KERNEL_ADDRESS(address)) 6673 return B_BAD_VALUE; 6674 6675 if (addressSpec == B_ANY_ADDRESS) 6676 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6677 if (addressSpec == B_BASE_ADDRESS) 6678 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6679 6680 fix_protection(&protection); 6681 6682 virtual_address_restrictions virtualRestrictions = {}; 6683 virtualRestrictions.address = address; 6684 virtualRestrictions.address_specification = addressSpec; 6685 physical_address_restrictions physicalRestrictions = {}; 6686 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6687 size, lock, protection, 0, 0, &virtualRestrictions, 6688 &physicalRestrictions, false, &address); 6689 6690 if (area >= B_OK 6691 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6692 delete_area(area); 6693 return B_BAD_ADDRESS; 6694 } 6695 6696 return area; 6697 } 6698 6699 6700 status_t 6701 _user_delete_area(area_id area) 6702 { 6703 // Unlike the BeOS implementation, you can now only delete areas 6704 // that you have created yourself from userland. 6705 // The documentation to delete_area() explicitly states that this 6706 // will be restricted in the future, and so it will. 6707 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6708 } 6709 6710 6711 // TODO: create a BeOS style call for this! 6712 6713 area_id 6714 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6715 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6716 int fd, off_t offset) 6717 { 6718 char name[B_OS_NAME_LENGTH]; 6719 void* address; 6720 area_id area; 6721 6722 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6723 return B_BAD_VALUE; 6724 6725 fix_protection(&protection); 6726 6727 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6728 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6729 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6730 return B_BAD_ADDRESS; 6731 6732 if (addressSpec == B_EXACT_ADDRESS) { 6733 if ((addr_t)address + size < (addr_t)address 6734 || (addr_t)address % B_PAGE_SIZE != 0) { 6735 return B_BAD_VALUE; 6736 } 6737 if (!IS_USER_ADDRESS(address) 6738 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6739 return B_BAD_ADDRESS; 6740 } 6741 } 6742 6743 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6744 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6745 false); 6746 if (area < B_OK) 6747 return area; 6748 6749 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6750 return B_BAD_ADDRESS; 6751 6752 return area; 6753 } 6754 6755 6756 status_t 6757 _user_unmap_memory(void* _address, size_t size) 6758 { 6759 addr_t address = (addr_t)_address; 6760 6761 // check params 6762 if (size == 0 || (addr_t)address + size < (addr_t)address 6763 || (addr_t)address % B_PAGE_SIZE != 0) { 6764 return B_BAD_VALUE; 6765 } 6766 6767 if (!IS_USER_ADDRESS(address) 6768 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6769 return B_BAD_ADDRESS; 6770 } 6771 6772 // Write lock the address space and ensure the address range is not wired. 6773 AddressSpaceWriteLocker locker; 6774 do { 6775 status_t status = locker.SetTo(team_get_current_team_id()); 6776 if (status != B_OK) 6777 return status; 6778 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6779 size, &locker)); 6780 6781 // unmap 6782 return unmap_address_range(locker.AddressSpace(), address, size, false); 6783 } 6784 6785 6786 status_t 6787 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6788 { 6789 // check address range 6790 addr_t address = (addr_t)_address; 6791 size = PAGE_ALIGN(size); 6792 6793 if ((address % B_PAGE_SIZE) != 0) 6794 return B_BAD_VALUE; 6795 if (!is_user_address_range(_address, size)) { 6796 // weird error code required by POSIX 6797 return ENOMEM; 6798 } 6799 6800 // extend and check protection 6801 if ((protection & ~B_USER_PROTECTION) != 0) 6802 return B_BAD_VALUE; 6803 6804 fix_protection(&protection); 6805 6806 // We need to write lock the address space, since we're going to play with 6807 // the areas. Also make sure that none of the areas is wired and that we're 6808 // actually allowed to change the protection. 6809 AddressSpaceWriteLocker locker; 6810 6811 bool restart; 6812 do { 6813 restart = false; 6814 6815 status_t status = locker.SetTo(team_get_current_team_id()); 6816 if (status != B_OK) 6817 return status; 6818 6819 // First round: Check whether the whole range is covered by areas and we 6820 // are allowed to modify them. 6821 addr_t currentAddress = address; 6822 size_t sizeLeft = size; 6823 while (sizeLeft > 0) { 6824 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6825 if (area == NULL) 6826 return B_NO_MEMORY; 6827 6828 if ((area->protection & B_KERNEL_AREA) != 0) 6829 return B_NOT_ALLOWED; 6830 if (area->protection_max != 0 6831 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) { 6832 return B_NOT_ALLOWED; 6833 } 6834 6835 addr_t offset = currentAddress - area->Base(); 6836 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6837 6838 AreaCacheLocker cacheLocker(area); 6839 6840 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6841 &locker, &cacheLocker)) { 6842 restart = true; 6843 break; 6844 } 6845 6846 cacheLocker.Unlock(); 6847 6848 currentAddress += rangeSize; 6849 sizeLeft -= rangeSize; 6850 } 6851 } while (restart); 6852 6853 // Second round: If the protections differ from that of the area, create a 6854 // page protection array and re-map mapped pages. 6855 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6856 addr_t currentAddress = address; 6857 size_t sizeLeft = size; 6858 while (sizeLeft > 0) { 6859 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6860 if (area == NULL) 6861 return B_NO_MEMORY; 6862 6863 addr_t offset = currentAddress - area->Base(); 6864 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6865 6866 currentAddress += rangeSize; 6867 sizeLeft -= rangeSize; 6868 6869 if (area->page_protections == NULL) { 6870 if (area->protection == protection) 6871 continue; 6872 if (offset == 0 && rangeSize == area->Size()) { 6873 // The whole area is covered: let set_area_protection handle it. 6874 status_t status = vm_set_area_protection(area->address_space->ID(), 6875 area->id, protection, false); 6876 if (status != B_OK) 6877 return status; 6878 continue; 6879 } 6880 6881 status_t status = allocate_area_page_protections(area); 6882 if (status != B_OK) 6883 return status; 6884 } 6885 6886 // We need to lock the complete cache chain, since we potentially unmap 6887 // pages of lower caches. 6888 VMCache* topCache = vm_area_get_locked_cache(area); 6889 VMCacheChainLocker cacheChainLocker(topCache); 6890 cacheChainLocker.LockAllSourceCaches(); 6891 6892 // Adjust the committed size, if necessary. 6893 if (topCache->source != NULL && topCache->temporary) { 6894 const bool becomesWritable = (protection & B_WRITE_AREA) != 0; 6895 ssize_t commitmentChange = 0; 6896 for (addr_t pageAddress = area->Base() + offset; 6897 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6898 if (topCache->LookupPage(pageAddress) != NULL) { 6899 // This page should already be accounted for in the commitment. 6900 continue; 6901 } 6902 6903 const bool isWritable 6904 = (get_area_page_protection(area, pageAddress) & B_WRITE_AREA) != 0; 6905 6906 if (becomesWritable && !isWritable) 6907 commitmentChange += B_PAGE_SIZE; 6908 else if (!becomesWritable && isWritable) 6909 commitmentChange -= B_PAGE_SIZE; 6910 } 6911 6912 if (commitmentChange != 0) { 6913 const off_t newCommitment = topCache->committed_size + commitmentChange; 6914 ASSERT(newCommitment <= (topCache->virtual_end - topCache->virtual_base)); 6915 status_t status = topCache->Commit(newCommitment, VM_PRIORITY_USER); 6916 if (status != B_OK) 6917 return status; 6918 } 6919 } 6920 6921 for (addr_t pageAddress = area->Base() + offset; 6922 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6923 map->Lock(); 6924 6925 set_area_page_protection(area, pageAddress, protection); 6926 6927 phys_addr_t physicalAddress; 6928 uint32 flags; 6929 6930 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6931 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6932 map->Unlock(); 6933 continue; 6934 } 6935 6936 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6937 if (page == NULL) { 6938 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6939 "\n", area, physicalAddress); 6940 map->Unlock(); 6941 return B_ERROR; 6942 } 6943 6944 // If the page is not in the topmost cache and write access is 6945 // requested, we have to unmap it. Otherwise we can re-map it with 6946 // the new protection. 6947 bool unmapPage = page->Cache() != topCache 6948 && (protection & B_WRITE_AREA) != 0; 6949 6950 if (!unmapPage) 6951 map->ProtectPage(area, pageAddress, protection); 6952 6953 map->Unlock(); 6954 6955 if (unmapPage) { 6956 DEBUG_PAGE_ACCESS_START(page); 6957 unmap_page(area, pageAddress); 6958 DEBUG_PAGE_ACCESS_END(page); 6959 } 6960 } 6961 } 6962 6963 return B_OK; 6964 } 6965 6966 6967 status_t 6968 _user_sync_memory(void* _address, size_t size, uint32 flags) 6969 { 6970 addr_t address = (addr_t)_address; 6971 size = PAGE_ALIGN(size); 6972 6973 // check params 6974 if ((address % B_PAGE_SIZE) != 0) 6975 return B_BAD_VALUE; 6976 if (!is_user_address_range(_address, size)) { 6977 // weird error code required by POSIX 6978 return ENOMEM; 6979 } 6980 6981 bool writeSync = (flags & MS_SYNC) != 0; 6982 bool writeAsync = (flags & MS_ASYNC) != 0; 6983 if (writeSync && writeAsync) 6984 return B_BAD_VALUE; 6985 6986 if (size == 0 || (!writeSync && !writeAsync)) 6987 return B_OK; 6988 6989 // iterate through the range and sync all concerned areas 6990 while (size > 0) { 6991 // read lock the address space 6992 AddressSpaceReadLocker locker; 6993 status_t error = locker.SetTo(team_get_current_team_id()); 6994 if (error != B_OK) 6995 return error; 6996 6997 // get the first area 6998 VMArea* area = locker.AddressSpace()->LookupArea(address); 6999 if (area == NULL) 7000 return B_NO_MEMORY; 7001 7002 uint32 offset = address - area->Base(); 7003 size_t rangeSize = min_c(area->Size() - offset, size); 7004 offset += area->cache_offset; 7005 7006 // lock the cache 7007 AreaCacheLocker cacheLocker(area); 7008 if (!cacheLocker) 7009 return B_BAD_VALUE; 7010 VMCache* cache = area->cache; 7011 7012 locker.Unlock(); 7013 7014 uint32 firstPage = offset >> PAGE_SHIFT; 7015 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 7016 7017 // write the pages 7018 if (cache->type == CACHE_TYPE_VNODE) { 7019 if (writeSync) { 7020 // synchronous 7021 error = vm_page_write_modified_page_range(cache, firstPage, 7022 endPage); 7023 if (error != B_OK) 7024 return error; 7025 } else { 7026 // asynchronous 7027 vm_page_schedule_write_page_range(cache, firstPage, endPage); 7028 // TODO: This is probably not quite what is supposed to happen. 7029 // Especially when a lot has to be written, it might take ages 7030 // until it really hits the disk. 7031 } 7032 } 7033 7034 address += rangeSize; 7035 size -= rangeSize; 7036 } 7037 7038 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 7039 // synchronize multiple mappings of the same file. In our VM they never get 7040 // out of sync, though, so we don't have to do anything. 7041 7042 return B_OK; 7043 } 7044 7045 7046 status_t 7047 _user_memory_advice(void* _address, size_t size, uint32 advice) 7048 { 7049 addr_t address = (addr_t)_address; 7050 if ((address % B_PAGE_SIZE) != 0) 7051 return B_BAD_VALUE; 7052 7053 size = PAGE_ALIGN(size); 7054 if (!is_user_address_range(_address, size)) { 7055 // weird error code required by POSIX 7056 return B_NO_MEMORY; 7057 } 7058 7059 switch (advice) { 7060 case MADV_NORMAL: 7061 case MADV_SEQUENTIAL: 7062 case MADV_RANDOM: 7063 case MADV_WILLNEED: 7064 case MADV_DONTNEED: 7065 // TODO: Implement! 7066 break; 7067 7068 case MADV_FREE: 7069 { 7070 AddressSpaceWriteLocker locker; 7071 do { 7072 status_t status = locker.SetTo(team_get_current_team_id()); 7073 if (status != B_OK) 7074 return status; 7075 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 7076 address, size, &locker)); 7077 7078 discard_address_range(locker.AddressSpace(), address, size, false); 7079 break; 7080 } 7081 7082 default: 7083 return B_BAD_VALUE; 7084 } 7085 7086 return B_OK; 7087 } 7088 7089 7090 status_t 7091 _user_get_memory_properties(team_id teamID, const void* address, 7092 uint32* _protected, uint32* _lock) 7093 { 7094 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 7095 return B_BAD_ADDRESS; 7096 7097 AddressSpaceReadLocker locker; 7098 status_t error = locker.SetTo(teamID); 7099 if (error != B_OK) 7100 return error; 7101 7102 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 7103 if (area == NULL) 7104 return B_NO_MEMORY; 7105 7106 uint32 protection = get_area_page_protection(area, (addr_t)address); 7107 uint32 wiring = area->wiring; 7108 7109 locker.Unlock(); 7110 7111 error = user_memcpy(_protected, &protection, sizeof(protection)); 7112 if (error != B_OK) 7113 return error; 7114 7115 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 7116 7117 return error; 7118 } 7119 7120 7121 static status_t 7122 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 7123 { 7124 #if ENABLE_SWAP_SUPPORT 7125 // check address range 7126 addr_t address = (addr_t)_address; 7127 size = PAGE_ALIGN(size); 7128 7129 if ((address % B_PAGE_SIZE) != 0) 7130 return EINVAL; 7131 if (!is_user_address_range(_address, size)) 7132 return EINVAL; 7133 7134 const addr_t endAddress = address + size; 7135 7136 AddressSpaceReadLocker addressSpaceLocker; 7137 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 7138 if (error != B_OK) 7139 return error; 7140 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 7141 7142 // iterate through all concerned areas 7143 addr_t nextAddress = address; 7144 while (nextAddress != endAddress) { 7145 // get the next area 7146 VMArea* area = addressSpace->LookupArea(nextAddress); 7147 if (area == NULL) { 7148 error = B_BAD_ADDRESS; 7149 break; 7150 } 7151 7152 const addr_t areaStart = nextAddress; 7153 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 7154 nextAddress = areaEnd; 7155 7156 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 7157 if (error != B_OK) { 7158 // We don't need to unset or reset things on failure. 7159 break; 7160 } 7161 7162 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 7163 VMAnonymousCache* anonCache = NULL; 7164 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 7165 // This memory will aready never be swapped. Nothing to do. 7166 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 7167 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 7168 areaEnd - areaStart, swappable); 7169 } else { 7170 // Some other cache type? We cannot affect anything here. 7171 error = EINVAL; 7172 } 7173 7174 cacheChainLocker.Unlock(); 7175 7176 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 7177 if (error != B_OK) 7178 break; 7179 } 7180 7181 return error; 7182 #else 7183 // No swap support? Nothing to do. 7184 return B_OK; 7185 #endif 7186 } 7187 7188 7189 status_t 7190 _user_mlock(const void* _address, size_t size) 7191 { 7192 return user_set_memory_swappable(_address, size, false); 7193 } 7194 7195 7196 status_t 7197 _user_munlock(const void* _address, size_t size) 7198 { 7199 // TODO: B_SHARED_AREAs need to be handled a bit differently: 7200 // if multiple clones of an area had mlock() called on them, 7201 // munlock() must also be called on all of them to actually unlock. 7202 // (At present, the first munlock() will unlock all.) 7203 // TODO: fork() should automatically unlock memory in the child. 7204 return user_set_memory_swappable(_address, size, true); 7205 } 7206 7207 7208 // #pragma mark -- compatibility 7209 7210 7211 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7212 7213 7214 struct physical_entry_beos { 7215 uint32 address; 7216 uint32 size; 7217 }; 7218 7219 7220 /*! The physical_entry structure has changed. We need to translate it to the 7221 old one. 7222 */ 7223 extern "C" int32 7224 __get_memory_map_beos(const void* _address, size_t numBytes, 7225 physical_entry_beos* table, int32 numEntries) 7226 { 7227 if (numEntries <= 0) 7228 return B_BAD_VALUE; 7229 7230 const uint8* address = (const uint8*)_address; 7231 7232 int32 count = 0; 7233 while (numBytes > 0 && count < numEntries) { 7234 physical_entry entry; 7235 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7236 if (result < 0) { 7237 if (result != B_BUFFER_OVERFLOW) 7238 return result; 7239 } 7240 7241 if (entry.address >= (phys_addr_t)1 << 32) { 7242 panic("get_memory_map(): Address is greater 4 GB!"); 7243 return B_ERROR; 7244 } 7245 7246 table[count].address = entry.address; 7247 table[count++].size = entry.size; 7248 7249 address += entry.size; 7250 numBytes -= entry.size; 7251 } 7252 7253 // null-terminate the table, if possible 7254 if (count < numEntries) { 7255 table[count].address = 0; 7256 table[count].size = 0; 7257 } 7258 7259 return B_OK; 7260 } 7261 7262 7263 /*! The type of the \a physicalAddress parameter has changed from void* to 7264 phys_addr_t. 7265 */ 7266 extern "C" area_id 7267 __map_physical_memory_beos(const char* name, void* physicalAddress, 7268 size_t numBytes, uint32 addressSpec, uint32 protection, 7269 void** _virtualAddress) 7270 { 7271 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7272 addressSpec, protection, _virtualAddress); 7273 } 7274 7275 7276 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7277 we meddle with the \a lock parameter to force 32 bit. 7278 */ 7279 extern "C" area_id 7280 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7281 size_t size, uint32 lock, uint32 protection) 7282 { 7283 switch (lock) { 7284 case B_NO_LOCK: 7285 break; 7286 case B_FULL_LOCK: 7287 case B_LAZY_LOCK: 7288 lock = B_32_BIT_FULL_LOCK; 7289 break; 7290 case B_CONTIGUOUS: 7291 lock = B_32_BIT_CONTIGUOUS; 7292 break; 7293 } 7294 7295 return __create_area_haiku(name, _address, addressSpec, size, lock, 7296 protection); 7297 } 7298 7299 7300 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7301 "BASE"); 7302 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7303 "map_physical_memory@", "BASE"); 7304 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7305 "BASE"); 7306 7307 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7308 "get_memory_map@@", "1_ALPHA3"); 7309 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7310 "map_physical_memory@@", "1_ALPHA3"); 7311 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7312 "1_ALPHA3"); 7313 7314 7315 #else 7316 7317 7318 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7319 "get_memory_map@@", "BASE"); 7320 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7321 "map_physical_memory@@", "BASE"); 7322 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7323 "BASE"); 7324 7325 7326 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7327