1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/BitUtils.h> 51 #include <util/ThreadAutoLock.h> 52 #include <vm/vm_page.h> 53 #include <vm/vm_priv.h> 54 #include <vm/VMAddressSpace.h> 55 #include <vm/VMArea.h> 56 #include <vm/VMCache.h> 57 58 #include "VMAddressSpaceLocking.h" 59 #include "VMAnonymousCache.h" 60 #include "VMAnonymousNoSwapCache.h" 61 #include "IORequest.h" 62 63 64 //#define TRACE_VM 65 //#define TRACE_FAULTS 66 #ifdef TRACE_VM 67 # define TRACE(x) dprintf x 68 #else 69 # define TRACE(x) ; 70 #endif 71 #ifdef TRACE_FAULTS 72 # define FTRACE(x) dprintf x 73 #else 74 # define FTRACE(x) ; 75 #endif 76 77 78 namespace { 79 80 class AreaCacheLocking { 81 public: 82 inline bool Lock(VMCache* lockable) 83 { 84 return false; 85 } 86 87 inline void Unlock(VMCache* lockable) 88 { 89 vm_area_put_locked_cache(lockable); 90 } 91 }; 92 93 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 94 public: 95 inline AreaCacheLocker(VMCache* cache = NULL) 96 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 97 { 98 } 99 100 inline AreaCacheLocker(VMArea* area) 101 : AutoLocker<VMCache, AreaCacheLocking>() 102 { 103 SetTo(area); 104 } 105 106 inline void SetTo(VMCache* cache, bool alreadyLocked) 107 { 108 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 109 } 110 111 inline void SetTo(VMArea* area) 112 { 113 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 114 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 115 } 116 }; 117 118 119 class VMCacheChainLocker { 120 public: 121 VMCacheChainLocker() 122 : 123 fTopCache(NULL), 124 fBottomCache(NULL) 125 { 126 } 127 128 VMCacheChainLocker(VMCache* topCache) 129 : 130 fTopCache(topCache), 131 fBottomCache(topCache) 132 { 133 } 134 135 ~VMCacheChainLocker() 136 { 137 Unlock(); 138 } 139 140 void SetTo(VMCache* topCache) 141 { 142 fTopCache = topCache; 143 fBottomCache = topCache; 144 145 if (topCache != NULL) 146 topCache->SetUserData(NULL); 147 } 148 149 VMCache* LockSourceCache() 150 { 151 if (fBottomCache == NULL || fBottomCache->source == NULL) 152 return NULL; 153 154 VMCache* previousCache = fBottomCache; 155 156 fBottomCache = fBottomCache->source; 157 fBottomCache->Lock(); 158 fBottomCache->AcquireRefLocked(); 159 fBottomCache->SetUserData(previousCache); 160 161 return fBottomCache; 162 } 163 164 void LockAllSourceCaches() 165 { 166 while (LockSourceCache() != NULL) { 167 } 168 } 169 170 void Unlock(VMCache* exceptCache = NULL) 171 { 172 if (fTopCache == NULL) 173 return; 174 175 // Unlock caches in source -> consumer direction. This is important to 176 // avoid double-locking and a reversal of locking order in case a cache 177 // is eligable for merging. 178 VMCache* cache = fBottomCache; 179 while (cache != NULL) { 180 VMCache* nextCache = (VMCache*)cache->UserData(); 181 if (cache != exceptCache) 182 cache->ReleaseRefAndUnlock(cache != fTopCache); 183 184 if (cache == fTopCache) 185 break; 186 187 cache = nextCache; 188 } 189 190 fTopCache = NULL; 191 fBottomCache = NULL; 192 } 193 194 void UnlockKeepRefs(bool keepTopCacheLocked) 195 { 196 if (fTopCache == NULL) 197 return; 198 199 VMCache* nextCache = fBottomCache; 200 VMCache* cache = NULL; 201 202 while (keepTopCacheLocked 203 ? nextCache != fTopCache : cache != fTopCache) { 204 cache = nextCache; 205 nextCache = (VMCache*)cache->UserData(); 206 cache->Unlock(cache != fTopCache); 207 } 208 } 209 210 void RelockCaches(bool topCacheLocked) 211 { 212 if (fTopCache == NULL) 213 return; 214 215 VMCache* nextCache = fTopCache; 216 VMCache* cache = NULL; 217 if (topCacheLocked) { 218 cache = nextCache; 219 nextCache = cache->source; 220 } 221 222 while (cache != fBottomCache && nextCache != NULL) { 223 VMCache* consumer = cache; 224 cache = nextCache; 225 nextCache = cache->source; 226 cache->Lock(); 227 cache->SetUserData(consumer); 228 } 229 } 230 231 private: 232 VMCache* fTopCache; 233 VMCache* fBottomCache; 234 }; 235 236 } // namespace 237 238 239 // The memory reserve an allocation of the certain priority must not touch. 240 static const size_t kMemoryReserveForPriority[] = { 241 VM_MEMORY_RESERVE_USER, // user 242 VM_MEMORY_RESERVE_SYSTEM, // system 243 0 // VIP 244 }; 245 246 247 ObjectCache* gPageMappingsObjectCache; 248 249 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 250 251 static off_t sAvailableMemory; 252 static off_t sNeededMemory; 253 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 254 static uint32 sPageFaults; 255 256 static VMPhysicalPageMapper* sPhysicalPageMapper; 257 258 #if DEBUG_CACHE_LIST 259 260 struct cache_info { 261 VMCache* cache; 262 addr_t page_count; 263 addr_t committed; 264 }; 265 266 static const int kCacheInfoTableCount = 100 * 1024; 267 static cache_info* sCacheInfoTable; 268 269 #endif // DEBUG_CACHE_LIST 270 271 272 // function declarations 273 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 274 bool addressSpaceCleanup); 275 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 276 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 277 static status_t map_backing_store(VMAddressSpace* addressSpace, 278 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 279 int protection, int protectionMax, int mapping, uint32 flags, 280 const virtual_address_restrictions* addressRestrictions, bool kernel, 281 VMArea** _area, void** _virtualAddress); 282 static void fix_protection(uint32* protection); 283 284 285 // #pragma mark - 286 287 288 #if VM_PAGE_FAULT_TRACING 289 290 namespace VMPageFaultTracing { 291 292 class PageFaultStart : public AbstractTraceEntry { 293 public: 294 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 295 : 296 fAddress(address), 297 fPC(pc), 298 fWrite(write), 299 fUser(user) 300 { 301 Initialized(); 302 } 303 304 virtual void AddDump(TraceOutput& out) 305 { 306 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 307 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 308 } 309 310 private: 311 addr_t fAddress; 312 addr_t fPC; 313 bool fWrite; 314 bool fUser; 315 }; 316 317 318 // page fault errors 319 enum { 320 PAGE_FAULT_ERROR_NO_AREA = 0, 321 PAGE_FAULT_ERROR_KERNEL_ONLY, 322 PAGE_FAULT_ERROR_WRITE_PROTECTED, 323 PAGE_FAULT_ERROR_READ_PROTECTED, 324 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 325 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 326 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 327 }; 328 329 330 class PageFaultError : public AbstractTraceEntry { 331 public: 332 PageFaultError(area_id area, status_t error) 333 : 334 fArea(area), 335 fError(error) 336 { 337 Initialized(); 338 } 339 340 virtual void AddDump(TraceOutput& out) 341 { 342 switch (fError) { 343 case PAGE_FAULT_ERROR_NO_AREA: 344 out.Print("page fault error: no area"); 345 break; 346 case PAGE_FAULT_ERROR_KERNEL_ONLY: 347 out.Print("page fault error: area: %ld, kernel only", fArea); 348 break; 349 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 350 out.Print("page fault error: area: %ld, write protected", 351 fArea); 352 break; 353 case PAGE_FAULT_ERROR_READ_PROTECTED: 354 out.Print("page fault error: area: %ld, read protected", fArea); 355 break; 356 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 357 out.Print("page fault error: area: %ld, execute protected", 358 fArea); 359 break; 360 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 361 out.Print("page fault error: kernel touching bad user memory"); 362 break; 363 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 364 out.Print("page fault error: no address space"); 365 break; 366 default: 367 out.Print("page fault error: area: %ld, error: %s", fArea, 368 strerror(fError)); 369 break; 370 } 371 } 372 373 private: 374 area_id fArea; 375 status_t fError; 376 }; 377 378 379 class PageFaultDone : public AbstractTraceEntry { 380 public: 381 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 382 vm_page* page) 383 : 384 fArea(area), 385 fTopCache(topCache), 386 fCache(cache), 387 fPage(page) 388 { 389 Initialized(); 390 } 391 392 virtual void AddDump(TraceOutput& out) 393 { 394 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 395 "page: %p", fArea, fTopCache, fCache, fPage); 396 } 397 398 private: 399 area_id fArea; 400 VMCache* fTopCache; 401 VMCache* fCache; 402 vm_page* fPage; 403 }; 404 405 } // namespace VMPageFaultTracing 406 407 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 408 #else 409 # define TPF(x) ; 410 #endif // VM_PAGE_FAULT_TRACING 411 412 413 // #pragma mark - 414 415 416 /*! The page's cache must be locked. 417 */ 418 static inline void 419 increment_page_wired_count(vm_page* page) 420 { 421 if (!page->IsMapped()) 422 atomic_add(&gMappedPagesCount, 1); 423 page->IncrementWiredCount(); 424 } 425 426 427 /*! The page's cache must be locked. 428 */ 429 static inline void 430 decrement_page_wired_count(vm_page* page) 431 { 432 page->DecrementWiredCount(); 433 if (!page->IsMapped()) 434 atomic_add(&gMappedPagesCount, -1); 435 } 436 437 438 static inline addr_t 439 virtual_page_address(VMArea* area, vm_page* page) 440 { 441 return area->Base() 442 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 443 } 444 445 446 static inline bool 447 is_page_in_area(VMArea* area, vm_page* page) 448 { 449 off_t pageCacheOffsetBytes = (off_t)(page->cache_offset << PAGE_SHIFT); 450 return pageCacheOffsetBytes >= area->cache_offset 451 && pageCacheOffsetBytes < area->cache_offset + (off_t)area->Size(); 452 } 453 454 455 //! You need to have the address space locked when calling this function 456 static VMArea* 457 lookup_area(VMAddressSpace* addressSpace, area_id id) 458 { 459 VMAreas::ReadLock(); 460 461 VMArea* area = VMAreas::LookupLocked(id); 462 if (area != NULL && area->address_space != addressSpace) 463 area = NULL; 464 465 VMAreas::ReadUnlock(); 466 467 return area; 468 } 469 470 471 static inline size_t 472 area_page_protections_size(size_t areaSize) 473 { 474 // In the page protections we store only the three user protections, 475 // so we use 4 bits per page. 476 return (areaSize / B_PAGE_SIZE + 1) / 2; 477 } 478 479 480 static status_t 481 allocate_area_page_protections(VMArea* area) 482 { 483 size_t bytes = area_page_protections_size(area->Size()); 484 area->page_protections = (uint8*)malloc_etc(bytes, 485 area->address_space == VMAddressSpace::Kernel() 486 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 487 if (area->page_protections == NULL) 488 return B_NO_MEMORY; 489 490 // init the page protections for all pages to that of the area 491 uint32 areaProtection = area->protection 492 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 493 memset(area->page_protections, areaProtection | (areaProtection << 4), 494 bytes); 495 return B_OK; 496 } 497 498 499 static inline void 500 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 501 { 502 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 503 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 504 uint8& entry = area->page_protections[pageIndex / 2]; 505 if (pageIndex % 2 == 0) 506 entry = (entry & 0xf0) | protection; 507 else 508 entry = (entry & 0x0f) | (protection << 4); 509 } 510 511 512 static inline uint32 513 get_area_page_protection(VMArea* area, addr_t pageAddress) 514 { 515 if (area->page_protections == NULL) 516 return area->protection; 517 518 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 519 uint32 protection = area->page_protections[pageIndex / 2]; 520 if (pageIndex % 2 == 0) 521 protection &= 0x0f; 522 else 523 protection >>= 4; 524 525 uint32 kernelProtection = 0; 526 if ((protection & B_READ_AREA) != 0) 527 kernelProtection |= B_KERNEL_READ_AREA; 528 if ((protection & B_WRITE_AREA) != 0) 529 kernelProtection |= B_KERNEL_WRITE_AREA; 530 531 // If this is a kernel area we return only the kernel flags. 532 if (area->address_space == VMAddressSpace::Kernel()) 533 return kernelProtection; 534 535 return protection | kernelProtection; 536 } 537 538 539 static inline uint8* 540 realloc_page_protections(uint8* pageProtections, size_t areaSize, 541 uint32 allocationFlags) 542 { 543 size_t bytes = area_page_protections_size(areaSize); 544 return (uint8*)realloc_etc(pageProtections, bytes, allocationFlags); 545 } 546 547 548 /*! The caller must have reserved enough pages the translation map 549 implementation might need to map this page. 550 The page's cache must be locked. 551 */ 552 static status_t 553 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 554 vm_page_reservation* reservation) 555 { 556 VMTranslationMap* map = area->address_space->TranslationMap(); 557 558 bool wasMapped = page->IsMapped(); 559 560 if (area->wiring == B_NO_LOCK) { 561 DEBUG_PAGE_ACCESS_CHECK(page); 562 563 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 564 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 565 gPageMappingsObjectCache, 566 CACHE_DONT_WAIT_FOR_MEMORY 567 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 568 if (mapping == NULL) 569 return B_NO_MEMORY; 570 571 mapping->page = page; 572 mapping->area = area; 573 574 map->Lock(); 575 576 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 577 area->MemoryType(), reservation); 578 579 // insert mapping into lists 580 if (!page->IsMapped()) 581 atomic_add(&gMappedPagesCount, 1); 582 583 page->mappings.Add(mapping); 584 area->mappings.Add(mapping); 585 586 map->Unlock(); 587 } else { 588 DEBUG_PAGE_ACCESS_CHECK(page); 589 590 map->Lock(); 591 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 592 area->MemoryType(), reservation); 593 map->Unlock(); 594 595 increment_page_wired_count(page); 596 } 597 598 if (!wasMapped) { 599 // The page is mapped now, so we must not remain in the cached queue. 600 // It also makes sense to move it from the inactive to the active, since 601 // otherwise the page daemon wouldn't come to keep track of it (in idle 602 // mode) -- if the page isn't touched, it will be deactivated after a 603 // full iteration through the queue at the latest. 604 if (page->State() == PAGE_STATE_CACHED 605 || page->State() == PAGE_STATE_INACTIVE) { 606 vm_page_set_state(page, PAGE_STATE_ACTIVE); 607 } 608 } 609 610 return B_OK; 611 } 612 613 614 /*! If \a preserveModified is \c true, the caller must hold the lock of the 615 page's cache. 616 */ 617 static inline bool 618 unmap_page(VMArea* area, addr_t virtualAddress) 619 { 620 return area->address_space->TranslationMap()->UnmapPage(area, 621 virtualAddress, true); 622 } 623 624 625 /*! If \a preserveModified is \c true, the caller must hold the lock of all 626 mapped pages' caches. 627 */ 628 static inline void 629 unmap_pages(VMArea* area, addr_t base, size_t size) 630 { 631 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 632 } 633 634 635 static inline bool 636 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 637 { 638 if (address < area->Base()) { 639 offset = area->Base() - address; 640 if (offset >= size) 641 return false; 642 643 address = area->Base(); 644 size -= offset; 645 offset = 0; 646 if (size > area->Size()) 647 size = area->Size(); 648 649 return true; 650 } 651 652 offset = address - area->Base(); 653 if (offset >= area->Size()) 654 return false; 655 656 if (size >= area->Size() - offset) 657 size = area->Size() - offset; 658 659 return true; 660 } 661 662 663 /*! Cuts a piece out of an area. If the given cut range covers the complete 664 area, it is deleted. If it covers the beginning or the end, the area is 665 resized accordingly. If the range covers some part in the middle of the 666 area, it is split in two; in this case the second area is returned via 667 \a _secondArea (the variable is left untouched in the other cases). 668 The address space must be write locked. 669 The caller must ensure that no part of the given range is wired. 670 */ 671 static status_t 672 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 673 addr_t size, VMArea** _secondArea, bool kernel) 674 { 675 addr_t offset; 676 if (!intersect_area(area, address, size, offset)) 677 return B_OK; 678 679 // Is the area fully covered? 680 if (address == area->Base() && size == area->Size()) { 681 delete_area(addressSpace, area, false); 682 return B_OK; 683 } 684 685 int priority; 686 uint32 allocationFlags; 687 if (addressSpace == VMAddressSpace::Kernel()) { 688 priority = VM_PRIORITY_SYSTEM; 689 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 690 | HEAP_DONT_LOCK_KERNEL_SPACE; 691 } else { 692 priority = VM_PRIORITY_USER; 693 allocationFlags = 0; 694 } 695 696 VMCache* cache = vm_area_get_locked_cache(area); 697 VMCacheChainLocker cacheChainLocker(cache); 698 cacheChainLocker.LockAllSourceCaches(); 699 700 // If no one else uses the area's cache and it's an anonymous cache, we can 701 // resize or split it, too. 702 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 703 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 704 705 const addr_t oldSize = area->Size(); 706 707 // Cut the end only? 708 if (offset > 0 && size == area->Size() - offset) { 709 status_t error = addressSpace->ShrinkAreaTail(area, offset, 710 allocationFlags); 711 if (error != B_OK) 712 return error; 713 714 if (area->page_protections != NULL) { 715 uint8* newProtections = realloc_page_protections( 716 area->page_protections, area->Size(), allocationFlags); 717 718 if (newProtections == NULL) { 719 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 720 return B_NO_MEMORY; 721 } 722 723 area->page_protections = newProtections; 724 } 725 726 // unmap pages 727 unmap_pages(area, address, size); 728 729 if (onlyCacheUser) { 730 // Since VMCache::Resize() can temporarily drop the lock, we must 731 // unlock all lower caches to prevent locking order inversion. 732 cacheChainLocker.Unlock(cache); 733 cache->Resize(cache->virtual_base + offset, priority); 734 cache->ReleaseRefAndUnlock(); 735 } 736 737 return B_OK; 738 } 739 740 // Cut the beginning only? 741 if (area->Base() == address) { 742 uint8* newProtections = NULL; 743 if (area->page_protections != NULL) { 744 // Allocate all memory before shifting as the shift might lose some 745 // bits. 746 newProtections = realloc_page_protections(NULL, area->Size(), 747 allocationFlags); 748 749 if (newProtections == NULL) 750 return B_NO_MEMORY; 751 } 752 753 // resize the area 754 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 755 allocationFlags); 756 if (error != B_OK) { 757 if (newProtections != NULL) 758 free_etc(newProtections, allocationFlags); 759 return error; 760 } 761 762 if (area->page_protections != NULL) { 763 size_t oldBytes = area_page_protections_size(oldSize); 764 ssize_t pagesShifted = (oldSize - area->Size()) / B_PAGE_SIZE; 765 bitmap_shift<uint8>(area->page_protections, oldBytes * 8, -(pagesShifted * 4)); 766 767 size_t bytes = area_page_protections_size(area->Size()); 768 memcpy(newProtections, area->page_protections, bytes); 769 free_etc(area->page_protections, allocationFlags); 770 area->page_protections = newProtections; 771 } 772 773 // unmap pages 774 unmap_pages(area, address, size); 775 776 if (onlyCacheUser) { 777 // Since VMCache::Rebase() can temporarily drop the lock, we must 778 // unlock all lower caches to prevent locking order inversion. 779 cacheChainLocker.Unlock(cache); 780 cache->Rebase(cache->virtual_base + size, priority); 781 cache->ReleaseRefAndUnlock(); 782 } 783 area->cache_offset += size; 784 785 return B_OK; 786 } 787 788 // The tough part -- cut a piece out of the middle of the area. 789 // We do that by shrinking the area to the begin section and creating a 790 // new area for the end section. 791 addr_t firstNewSize = offset; 792 addr_t secondBase = address + size; 793 addr_t secondSize = area->Size() - offset - size; 794 795 // unmap pages 796 unmap_pages(area, address, area->Size() - firstNewSize); 797 798 // resize the area 799 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 800 allocationFlags); 801 if (error != B_OK) 802 return error; 803 804 uint8* areaNewProtections = NULL; 805 uint8* secondAreaNewProtections = NULL; 806 807 // Try to allocate the new memory before making some hard to reverse 808 // changes. 809 if (area->page_protections != NULL) { 810 areaNewProtections = realloc_page_protections(NULL, area->Size(), 811 allocationFlags); 812 secondAreaNewProtections = realloc_page_protections(NULL, secondSize, 813 allocationFlags); 814 815 if (areaNewProtections == NULL || secondAreaNewProtections == NULL) { 816 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 817 free_etc(areaNewProtections, allocationFlags); 818 free_etc(secondAreaNewProtections, allocationFlags); 819 return B_NO_MEMORY; 820 } 821 } 822 823 virtual_address_restrictions addressRestrictions = {}; 824 addressRestrictions.address = (void*)secondBase; 825 addressRestrictions.address_specification = B_EXACT_ADDRESS; 826 VMArea* secondArea; 827 828 if (onlyCacheUser) { 829 // Create a new cache for the second area. 830 VMCache* secondCache; 831 error = VMCacheFactory::CreateAnonymousCache(secondCache, 832 area->protection & B_OVERCOMMITTING_AREA, 0, 0, 833 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 834 if (error != B_OK) { 835 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 836 free_etc(areaNewProtections, allocationFlags); 837 free_etc(secondAreaNewProtections, allocationFlags); 838 return error; 839 } 840 841 secondCache->Lock(); 842 secondCache->temporary = cache->temporary; 843 secondCache->virtual_base = area->cache_offset; 844 secondCache->virtual_end = area->cache_offset + secondSize; 845 846 // Transfer the concerned pages from the first cache. 847 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 848 error = secondCache->Adopt(cache, adoptOffset, secondSize, 849 area->cache_offset); 850 851 if (error == B_OK) { 852 // Since VMCache::Resize() can temporarily drop the lock, we must 853 // unlock all lower caches to prevent locking order inversion. 854 cacheChainLocker.Unlock(cache); 855 cache->Resize(cache->virtual_base + firstNewSize, priority); 856 // Don't unlock the cache yet because we might have to resize it 857 // back. 858 859 // Map the second area. 860 error = map_backing_store(addressSpace, secondCache, 861 area->cache_offset, area->name, secondSize, area->wiring, 862 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 863 &addressRestrictions, kernel, &secondArea, NULL); 864 } 865 866 if (error != B_OK) { 867 // Restore the original cache. 868 cache->Resize(cache->virtual_base + oldSize, priority); 869 870 // Move the pages back. 871 status_t readoptStatus = cache->Adopt(secondCache, 872 area->cache_offset, secondSize, adoptOffset); 873 if (readoptStatus != B_OK) { 874 // Some (swap) pages have not been moved back and will be lost 875 // once the second cache is deleted. 876 panic("failed to restore cache range: %s", 877 strerror(readoptStatus)); 878 879 // TODO: Handle out of memory cases by freeing memory and 880 // retrying. 881 } 882 883 cache->ReleaseRefAndUnlock(); 884 secondCache->ReleaseRefAndUnlock(); 885 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 886 free_etc(areaNewProtections, allocationFlags); 887 free_etc(secondAreaNewProtections, allocationFlags); 888 return error; 889 } 890 891 // Now we can unlock it. 892 cache->ReleaseRefAndUnlock(); 893 secondCache->Unlock(); 894 } else { 895 error = map_backing_store(addressSpace, cache, area->cache_offset 896 + (secondBase - area->Base()), 897 area->name, secondSize, area->wiring, area->protection, 898 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 899 &addressRestrictions, kernel, &secondArea, NULL); 900 if (error != B_OK) { 901 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 902 free_etc(areaNewProtections, allocationFlags); 903 free_etc(secondAreaNewProtections, allocationFlags); 904 return error; 905 } 906 // We need a cache reference for the new area. 907 cache->AcquireRefLocked(); 908 } 909 910 if (area->page_protections != NULL) { 911 // Copy the protection bits of the first area. 912 size_t areaBytes = area_page_protections_size(area->Size()); 913 memcpy(areaNewProtections, area->page_protections, areaBytes); 914 uint8* areaOldProtections = area->page_protections; 915 area->page_protections = areaNewProtections; 916 917 // Shift the protection bits of the second area to the start of 918 // the old array. 919 size_t oldBytes = area_page_protections_size(oldSize); 920 addr_t secondAreaOffset = secondBase - area->Base(); 921 ssize_t secondAreaPagesShifted = secondAreaOffset / B_PAGE_SIZE; 922 bitmap_shift<uint8>(areaOldProtections, oldBytes * 8, -(secondAreaPagesShifted * 4)); 923 924 // Copy the protection bits of the second area. 925 size_t secondAreaBytes = area_page_protections_size(secondSize); 926 memcpy(secondAreaNewProtections, areaOldProtections, secondAreaBytes); 927 secondArea->page_protections = secondAreaNewProtections; 928 929 // We don't need this anymore. 930 free_etc(areaOldProtections, allocationFlags); 931 932 // Set the correct page protections for the second area. 933 VMTranslationMap* map = addressSpace->TranslationMap(); 934 map->Lock(); 935 for (VMCachePagesTree::Iterator it 936 = secondArea->cache->pages.GetIterator(); 937 vm_page* page = it.Next();) { 938 if (is_page_in_area(secondArea, page)) { 939 addr_t address = virtual_page_address(secondArea, page); 940 uint32 pageProtection 941 = get_area_page_protection(secondArea, address); 942 map->ProtectPage(secondArea, address, pageProtection); 943 } 944 } 945 map->Unlock(); 946 } 947 948 if (_secondArea != NULL) 949 *_secondArea = secondArea; 950 951 return B_OK; 952 } 953 954 955 /*! Deletes or cuts all areas in the given address range. 956 The address space must be write-locked. 957 The caller must ensure that no part of the given range is wired. 958 */ 959 static status_t 960 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 961 bool kernel) 962 { 963 size = PAGE_ALIGN(size); 964 965 // Check, whether the caller is allowed to modify the concerned areas. 966 if (!kernel) { 967 for (VMAddressSpace::AreaRangeIterator it 968 = addressSpace->GetAreaRangeIterator(address, size); 969 VMArea* area = it.Next();) { 970 971 if ((area->protection & B_KERNEL_AREA) != 0) { 972 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 973 "unmap range of kernel area %" B_PRId32 " (%s)\n", 974 team_get_current_team_id(), area->id, area->name); 975 return B_NOT_ALLOWED; 976 } 977 } 978 } 979 980 for (VMAddressSpace::AreaRangeIterator it 981 = addressSpace->GetAreaRangeIterator(address, size); 982 VMArea* area = it.Next();) { 983 984 status_t error = cut_area(addressSpace, area, address, size, NULL, 985 kernel); 986 if (error != B_OK) 987 return error; 988 // Failing after already messing with areas is ugly, but we 989 // can't do anything about it. 990 } 991 992 return B_OK; 993 } 994 995 996 static status_t 997 discard_area_range(VMArea* area, addr_t address, addr_t size) 998 { 999 addr_t offset; 1000 if (!intersect_area(area, address, size, offset)) 1001 return B_OK; 1002 1003 // If someone else uses the area's cache or it's not an anonymous cache, we 1004 // can't discard. 1005 VMCache* cache = vm_area_get_locked_cache(area); 1006 if (cache->areas != area || area->cache_next != NULL 1007 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 1008 return B_OK; 1009 } 1010 1011 VMCacheChainLocker cacheChainLocker(cache); 1012 cacheChainLocker.LockAllSourceCaches(); 1013 1014 unmap_pages(area, address, size); 1015 1016 // Since VMCache::Discard() can temporarily drop the lock, we must 1017 // unlock all lower caches to prevent locking order inversion. 1018 cacheChainLocker.Unlock(cache); 1019 cache->Discard(cache->virtual_base + offset, size); 1020 cache->ReleaseRefAndUnlock(); 1021 1022 return B_OK; 1023 } 1024 1025 1026 static status_t 1027 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 1028 bool kernel) 1029 { 1030 for (VMAddressSpace::AreaRangeIterator it 1031 = addressSpace->GetAreaRangeIterator(address, size); 1032 VMArea* area = it.Next();) { 1033 status_t error = discard_area_range(area, address, size); 1034 if (error != B_OK) 1035 return error; 1036 } 1037 1038 return B_OK; 1039 } 1040 1041 1042 /*! You need to hold the lock of the cache and the write lock of the address 1043 space when calling this function. 1044 Note, that in case of error your cache will be temporarily unlocked. 1045 If \a addressSpec is \c B_EXACT_ADDRESS and the 1046 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 1047 that no part of the specified address range (base \c *_virtualAddress, size 1048 \a size) is wired. The cache will also be temporarily unlocked. 1049 */ 1050 static status_t 1051 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 1052 const char* areaName, addr_t size, int wiring, int protection, 1053 int protectionMax, int mapping, 1054 uint32 flags, const virtual_address_restrictions* addressRestrictions, 1055 bool kernel, VMArea** _area, void** _virtualAddress) 1056 { 1057 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 1058 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 1059 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 1060 addressSpace, cache, addressRestrictions->address, offset, size, 1061 addressRestrictions->address_specification, wiring, protection, 1062 protectionMax, _area, areaName)); 1063 cache->AssertLocked(); 1064 1065 if (size == 0) { 1066 #if KDEBUG 1067 panic("map_backing_store(): called with size=0 for area '%s'!", 1068 areaName); 1069 #endif 1070 return B_BAD_VALUE; 1071 } 1072 if (offset < 0) 1073 return B_BAD_VALUE; 1074 1075 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 1076 | HEAP_DONT_LOCK_KERNEL_SPACE; 1077 int priority; 1078 if (addressSpace != VMAddressSpace::Kernel()) { 1079 priority = VM_PRIORITY_USER; 1080 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 1081 priority = VM_PRIORITY_VIP; 1082 allocationFlags |= HEAP_PRIORITY_VIP; 1083 } else 1084 priority = VM_PRIORITY_SYSTEM; 1085 1086 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 1087 allocationFlags); 1088 if (mapping != REGION_PRIVATE_MAP) 1089 area->protection_max = protectionMax & B_USER_PROTECTION; 1090 if (area == NULL) 1091 return B_NO_MEMORY; 1092 1093 status_t status; 1094 1095 // if this is a private map, we need to create a new cache 1096 // to handle the private copies of pages as they are written to 1097 VMCache* sourceCache = cache; 1098 if (mapping == REGION_PRIVATE_MAP) { 1099 VMCache* newCache; 1100 1101 // create an anonymous cache 1102 status = VMCacheFactory::CreateAnonymousCache(newCache, 1103 (protection & B_STACK_AREA) != 0 1104 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 1105 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 1106 if (status != B_OK) 1107 goto err1; 1108 1109 newCache->Lock(); 1110 newCache->temporary = 1; 1111 newCache->virtual_base = offset; 1112 newCache->virtual_end = offset + size; 1113 1114 cache->AddConsumer(newCache); 1115 1116 cache = newCache; 1117 } 1118 1119 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 1120 status = cache->SetMinimalCommitment(size, priority); 1121 if (status != B_OK) 1122 goto err2; 1123 } 1124 1125 // check to see if this address space has entered DELETE state 1126 if (addressSpace->IsBeingDeleted()) { 1127 // okay, someone is trying to delete this address space now, so we can't 1128 // insert the area, so back out 1129 status = B_BAD_TEAM_ID; 1130 goto err2; 1131 } 1132 1133 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1134 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1135 // temporarily unlock the current cache since it might be mapped to 1136 // some existing area, and unmap_address_range also needs to lock that 1137 // cache to delete the area. 1138 cache->Unlock(); 1139 status = unmap_address_range(addressSpace, 1140 (addr_t)addressRestrictions->address, size, kernel); 1141 cache->Lock(); 1142 if (status != B_OK) 1143 goto err2; 1144 } 1145 1146 status = addressSpace->InsertArea(area, size, addressRestrictions, 1147 allocationFlags, _virtualAddress); 1148 if (status == B_NO_MEMORY 1149 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1150 // Due to how many locks are held, we cannot wait here for space to be 1151 // freed up, but we can at least notify the low_resource handler. 1152 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1153 } 1154 if (status != B_OK) 1155 goto err2; 1156 1157 // attach the cache to the area 1158 area->cache = cache; 1159 area->cache_offset = offset; 1160 1161 // point the cache back to the area 1162 cache->InsertAreaLocked(area); 1163 if (mapping == REGION_PRIVATE_MAP) 1164 cache->Unlock(); 1165 1166 // insert the area in the global areas map 1167 VMAreas::Insert(area); 1168 1169 // grab a ref to the address space (the area holds this) 1170 addressSpace->Get(); 1171 1172 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1173 // cache, sourceCache, areaName, area); 1174 1175 *_area = area; 1176 return B_OK; 1177 1178 err2: 1179 if (mapping == REGION_PRIVATE_MAP) { 1180 // We created this cache, so we must delete it again. Note, that we 1181 // need to temporarily unlock the source cache or we'll otherwise 1182 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1183 sourceCache->Unlock(); 1184 cache->ReleaseRefAndUnlock(); 1185 sourceCache->Lock(); 1186 } 1187 err1: 1188 addressSpace->DeleteArea(area, allocationFlags); 1189 return status; 1190 } 1191 1192 1193 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1194 locker1, locker2). 1195 */ 1196 template<typename LockerType1, typename LockerType2> 1197 static inline bool 1198 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1199 { 1200 area->cache->AssertLocked(); 1201 1202 VMAreaUnwiredWaiter waiter; 1203 if (!area->AddWaiterIfWired(&waiter)) 1204 return false; 1205 1206 // unlock everything and wait 1207 if (locker1 != NULL) 1208 locker1->Unlock(); 1209 if (locker2 != NULL) 1210 locker2->Unlock(); 1211 1212 waiter.waitEntry.Wait(); 1213 1214 return true; 1215 } 1216 1217 1218 /*! Checks whether the given area has any wired ranges intersecting with the 1219 specified range and waits, if so. 1220 1221 When it has to wait, the function calls \c Unlock() on both \a locker1 1222 and \a locker2, if given. 1223 The area's top cache must be locked and must be unlocked as a side effect 1224 of calling \c Unlock() on either \a locker1 or \a locker2. 1225 1226 If the function does not have to wait it does not modify or unlock any 1227 object. 1228 1229 \param area The area to be checked. 1230 \param base The base address of the range to check. 1231 \param size The size of the address range to check. 1232 \param locker1 An object to be unlocked when before starting to wait (may 1233 be \c NULL). 1234 \param locker2 An object to be unlocked when before starting to wait (may 1235 be \c NULL). 1236 \return \c true, if the function had to wait, \c false otherwise. 1237 */ 1238 template<typename LockerType1, typename LockerType2> 1239 static inline bool 1240 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1241 LockerType1* locker1, LockerType2* locker2) 1242 { 1243 area->cache->AssertLocked(); 1244 1245 VMAreaUnwiredWaiter waiter; 1246 if (!area->AddWaiterIfWired(&waiter, base, size)) 1247 return false; 1248 1249 // unlock everything and wait 1250 if (locker1 != NULL) 1251 locker1->Unlock(); 1252 if (locker2 != NULL) 1253 locker2->Unlock(); 1254 1255 waiter.waitEntry.Wait(); 1256 1257 return true; 1258 } 1259 1260 1261 /*! Checks whether the given address space has any wired ranges intersecting 1262 with the specified range and waits, if so. 1263 1264 Similar to wait_if_area_range_is_wired(), with the following differences: 1265 - All areas intersecting with the range are checked (respectively all until 1266 one is found that contains a wired range intersecting with the given 1267 range). 1268 - The given address space must at least be read-locked and must be unlocked 1269 when \c Unlock() is called on \a locker. 1270 - None of the areas' caches are allowed to be locked. 1271 */ 1272 template<typename LockerType> 1273 static inline bool 1274 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1275 size_t size, LockerType* locker) 1276 { 1277 for (VMAddressSpace::AreaRangeIterator it 1278 = addressSpace->GetAreaRangeIterator(base, size); 1279 VMArea* area = it.Next();) { 1280 1281 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1282 1283 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1284 return true; 1285 } 1286 1287 return false; 1288 } 1289 1290 1291 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1292 It must be called in a situation where the kernel address space may be 1293 locked. 1294 */ 1295 status_t 1296 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1297 { 1298 AddressSpaceReadLocker locker; 1299 VMArea* area; 1300 status_t status = locker.SetFromArea(id, area); 1301 if (status != B_OK) 1302 return status; 1303 1304 if (area->page_protections == NULL) { 1305 status = allocate_area_page_protections(area); 1306 if (status != B_OK) 1307 return status; 1308 } 1309 1310 *cookie = (void*)area; 1311 return B_OK; 1312 } 1313 1314 1315 /*! This is a debug helper function that can only be used with very specific 1316 use cases. 1317 Sets protection for the given address range to the protection specified. 1318 If \a protection is 0 then the involved pages will be marked non-present 1319 in the translation map to cause a fault on access. The pages aren't 1320 actually unmapped however so that they can be marked present again with 1321 additional calls to this function. For this to work the area must be 1322 fully locked in memory so that the pages aren't otherwise touched. 1323 This function does not lock the kernel address space and needs to be 1324 supplied with a \a cookie retrieved from a successful call to 1325 vm_prepare_kernel_area_debug_protection(). 1326 */ 1327 status_t 1328 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1329 uint32 protection) 1330 { 1331 // check address range 1332 addr_t address = (addr_t)_address; 1333 size = PAGE_ALIGN(size); 1334 1335 if ((address % B_PAGE_SIZE) != 0 1336 || (addr_t)address + size < (addr_t)address 1337 || !IS_KERNEL_ADDRESS(address) 1338 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1339 return B_BAD_VALUE; 1340 } 1341 1342 // Translate the kernel protection to user protection as we only store that. 1343 if ((protection & B_KERNEL_READ_AREA) != 0) 1344 protection |= B_READ_AREA; 1345 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1346 protection |= B_WRITE_AREA; 1347 1348 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1349 VMTranslationMap* map = addressSpace->TranslationMap(); 1350 VMArea* area = (VMArea*)cookie; 1351 1352 addr_t offset = address - area->Base(); 1353 if (area->Size() - offset < size) { 1354 panic("protect range not fully within supplied area"); 1355 return B_BAD_VALUE; 1356 } 1357 1358 if (area->page_protections == NULL) { 1359 panic("area has no page protections"); 1360 return B_BAD_VALUE; 1361 } 1362 1363 // Invalidate the mapping entries so any access to them will fault or 1364 // restore the mapping entries unchanged so that lookup will success again. 1365 map->Lock(); 1366 map->DebugMarkRangePresent(address, address + size, protection != 0); 1367 map->Unlock(); 1368 1369 // And set the proper page protections so that the fault case will actually 1370 // fail and not simply try to map a new page. 1371 for (addr_t pageAddress = address; pageAddress < address + size; 1372 pageAddress += B_PAGE_SIZE) { 1373 set_area_page_protection(area, pageAddress, protection); 1374 } 1375 1376 return B_OK; 1377 } 1378 1379 1380 status_t 1381 vm_block_address_range(const char* name, void* address, addr_t size) 1382 { 1383 if (!arch_vm_supports_protection(0)) 1384 return B_NOT_SUPPORTED; 1385 1386 AddressSpaceWriteLocker locker; 1387 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1388 if (status != B_OK) 1389 return status; 1390 1391 VMAddressSpace* addressSpace = locker.AddressSpace(); 1392 1393 // create an anonymous cache 1394 VMCache* cache; 1395 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1396 VM_PRIORITY_SYSTEM); 1397 if (status != B_OK) 1398 return status; 1399 1400 cache->temporary = 1; 1401 cache->virtual_end = size; 1402 cache->Lock(); 1403 1404 VMArea* area; 1405 virtual_address_restrictions addressRestrictions = {}; 1406 addressRestrictions.address = address; 1407 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1408 status = map_backing_store(addressSpace, cache, 0, name, size, 1409 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1410 true, &area, NULL); 1411 if (status != B_OK) { 1412 cache->ReleaseRefAndUnlock(); 1413 return status; 1414 } 1415 1416 cache->Unlock(); 1417 area->cache_type = CACHE_TYPE_RAM; 1418 return area->id; 1419 } 1420 1421 1422 status_t 1423 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1424 { 1425 AddressSpaceWriteLocker locker(team); 1426 if (!locker.IsLocked()) 1427 return B_BAD_TEAM_ID; 1428 1429 VMAddressSpace* addressSpace = locker.AddressSpace(); 1430 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1431 addressSpace == VMAddressSpace::Kernel() 1432 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1433 } 1434 1435 1436 status_t 1437 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1438 addr_t size, uint32 flags) 1439 { 1440 if (size == 0) 1441 return B_BAD_VALUE; 1442 1443 AddressSpaceWriteLocker locker(team); 1444 if (!locker.IsLocked()) 1445 return B_BAD_TEAM_ID; 1446 1447 virtual_address_restrictions addressRestrictions = {}; 1448 addressRestrictions.address = *_address; 1449 addressRestrictions.address_specification = addressSpec; 1450 VMAddressSpace* addressSpace = locker.AddressSpace(); 1451 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1452 addressSpace == VMAddressSpace::Kernel() 1453 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1454 _address); 1455 } 1456 1457 1458 area_id 1459 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1460 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1461 const virtual_address_restrictions* virtualAddressRestrictions, 1462 const physical_address_restrictions* physicalAddressRestrictions, 1463 bool kernel, void** _address) 1464 { 1465 VMArea* area; 1466 VMCache* cache; 1467 vm_page* page = NULL; 1468 bool isStack = (protection & B_STACK_AREA) != 0; 1469 page_num_t guardPages; 1470 bool canOvercommit = false; 1471 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1472 ? VM_PAGE_ALLOC_CLEAR : 0; 1473 1474 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1475 team, name, size)); 1476 1477 size = PAGE_ALIGN(size); 1478 guardSize = PAGE_ALIGN(guardSize); 1479 guardPages = guardSize / B_PAGE_SIZE; 1480 1481 if (size == 0 || size < guardSize) 1482 return B_BAD_VALUE; 1483 if (!arch_vm_supports_protection(protection)) 1484 return B_NOT_SUPPORTED; 1485 1486 if (team == B_CURRENT_TEAM) 1487 team = VMAddressSpace::CurrentID(); 1488 if (team < 0) 1489 return B_BAD_TEAM_ID; 1490 1491 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1492 canOvercommit = true; 1493 1494 #ifdef DEBUG_KERNEL_STACKS 1495 if ((protection & B_KERNEL_STACK_AREA) != 0) 1496 isStack = true; 1497 #endif 1498 1499 // check parameters 1500 switch (virtualAddressRestrictions->address_specification) { 1501 case B_ANY_ADDRESS: 1502 case B_EXACT_ADDRESS: 1503 case B_BASE_ADDRESS: 1504 case B_ANY_KERNEL_ADDRESS: 1505 case B_ANY_KERNEL_BLOCK_ADDRESS: 1506 case B_RANDOMIZED_ANY_ADDRESS: 1507 case B_RANDOMIZED_BASE_ADDRESS: 1508 break; 1509 1510 default: 1511 return B_BAD_VALUE; 1512 } 1513 1514 // If low or high physical address restrictions are given, we force 1515 // B_CONTIGUOUS wiring, since only then we'll use 1516 // vm_page_allocate_page_run() which deals with those restrictions. 1517 if (physicalAddressRestrictions->low_address != 0 1518 || physicalAddressRestrictions->high_address != 0) { 1519 wiring = B_CONTIGUOUS; 1520 } 1521 1522 physical_address_restrictions stackPhysicalRestrictions; 1523 bool doReserveMemory = false; 1524 switch (wiring) { 1525 case B_NO_LOCK: 1526 break; 1527 case B_FULL_LOCK: 1528 case B_LAZY_LOCK: 1529 case B_CONTIGUOUS: 1530 doReserveMemory = true; 1531 break; 1532 case B_ALREADY_WIRED: 1533 break; 1534 case B_LOMEM: 1535 stackPhysicalRestrictions = *physicalAddressRestrictions; 1536 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1537 physicalAddressRestrictions = &stackPhysicalRestrictions; 1538 wiring = B_CONTIGUOUS; 1539 doReserveMemory = true; 1540 break; 1541 case B_32_BIT_FULL_LOCK: 1542 if (B_HAIKU_PHYSICAL_BITS <= 32 1543 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1544 wiring = B_FULL_LOCK; 1545 doReserveMemory = true; 1546 break; 1547 } 1548 // TODO: We don't really support this mode efficiently. Just fall 1549 // through for now ... 1550 case B_32_BIT_CONTIGUOUS: 1551 #if B_HAIKU_PHYSICAL_BITS > 32 1552 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1553 stackPhysicalRestrictions = *physicalAddressRestrictions; 1554 stackPhysicalRestrictions.high_address 1555 = (phys_addr_t)1 << 32; 1556 physicalAddressRestrictions = &stackPhysicalRestrictions; 1557 } 1558 #endif 1559 wiring = B_CONTIGUOUS; 1560 doReserveMemory = true; 1561 break; 1562 default: 1563 return B_BAD_VALUE; 1564 } 1565 1566 // Optimization: For a single-page contiguous allocation without low/high 1567 // memory restriction B_FULL_LOCK wiring suffices. 1568 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1569 && physicalAddressRestrictions->low_address == 0 1570 && physicalAddressRestrictions->high_address == 0) { 1571 wiring = B_FULL_LOCK; 1572 } 1573 1574 // For full lock or contiguous areas we're also going to map the pages and 1575 // thus need to reserve pages for the mapping backend upfront. 1576 addr_t reservedMapPages = 0; 1577 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1578 AddressSpaceWriteLocker locker; 1579 status_t status = locker.SetTo(team); 1580 if (status != B_OK) 1581 return status; 1582 1583 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1584 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1585 } 1586 1587 int priority; 1588 if (team != VMAddressSpace::KernelID()) 1589 priority = VM_PRIORITY_USER; 1590 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1591 priority = VM_PRIORITY_VIP; 1592 else 1593 priority = VM_PRIORITY_SYSTEM; 1594 1595 // Reserve memory before acquiring the address space lock. This reduces the 1596 // chances of failure, since while holding the write lock to the address 1597 // space (if it is the kernel address space that is), the low memory handler 1598 // won't be able to free anything for us. 1599 addr_t reservedMemory = 0; 1600 if (doReserveMemory) { 1601 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1602 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1603 return B_NO_MEMORY; 1604 reservedMemory = size; 1605 // TODO: We don't reserve the memory for the pages for the page 1606 // directories/tables. We actually need to do since we currently don't 1607 // reclaim them (and probably can't reclaim all of them anyway). Thus 1608 // there are actually less physical pages than there should be, which 1609 // can get the VM into trouble in low memory situations. 1610 } 1611 1612 AddressSpaceWriteLocker locker; 1613 VMAddressSpace* addressSpace; 1614 status_t status; 1615 1616 // For full lock areas reserve the pages before locking the address 1617 // space. E.g. block caches can't release their memory while we hold the 1618 // address space lock. 1619 page_num_t reservedPages = reservedMapPages; 1620 if (wiring == B_FULL_LOCK) 1621 reservedPages += size / B_PAGE_SIZE; 1622 1623 vm_page_reservation reservation; 1624 if (reservedPages > 0) { 1625 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1626 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1627 priority)) { 1628 reservedPages = 0; 1629 status = B_WOULD_BLOCK; 1630 goto err0; 1631 } 1632 } else 1633 vm_page_reserve_pages(&reservation, reservedPages, priority); 1634 } 1635 1636 if (wiring == B_CONTIGUOUS) { 1637 // we try to allocate the page run here upfront as this may easily 1638 // fail for obvious reasons 1639 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1640 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1641 if (page == NULL) { 1642 status = B_NO_MEMORY; 1643 goto err0; 1644 } 1645 } 1646 1647 // Lock the address space and, if B_EXACT_ADDRESS and 1648 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1649 // is not wired. 1650 do { 1651 status = locker.SetTo(team); 1652 if (status != B_OK) 1653 goto err1; 1654 1655 addressSpace = locker.AddressSpace(); 1656 } while (virtualAddressRestrictions->address_specification 1657 == B_EXACT_ADDRESS 1658 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1659 && wait_if_address_range_is_wired(addressSpace, 1660 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1661 1662 // create an anonymous cache 1663 // if it's a stack, make sure that two pages are available at least 1664 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1665 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1666 wiring == B_NO_LOCK, priority); 1667 if (status != B_OK) 1668 goto err1; 1669 1670 cache->temporary = 1; 1671 cache->virtual_end = size; 1672 cache->committed_size = reservedMemory; 1673 // TODO: This should be done via a method. 1674 reservedMemory = 0; 1675 1676 cache->Lock(); 1677 1678 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1679 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1680 virtualAddressRestrictions, kernel, &area, _address); 1681 1682 if (status != B_OK) { 1683 cache->ReleaseRefAndUnlock(); 1684 goto err1; 1685 } 1686 1687 locker.DegradeToReadLock(); 1688 1689 switch (wiring) { 1690 case B_NO_LOCK: 1691 case B_LAZY_LOCK: 1692 // do nothing - the pages are mapped in as needed 1693 break; 1694 1695 case B_FULL_LOCK: 1696 { 1697 // Allocate and map all pages for this area 1698 1699 off_t offset = 0; 1700 for (addr_t address = area->Base(); 1701 address < area->Base() + (area->Size() - 1); 1702 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1703 #ifdef DEBUG_KERNEL_STACKS 1704 # ifdef STACK_GROWS_DOWNWARDS 1705 if (isStack && address < area->Base() 1706 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1707 # else 1708 if (isStack && address >= area->Base() + area->Size() 1709 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1710 # endif 1711 continue; 1712 #endif 1713 vm_page* page = vm_page_allocate_page(&reservation, 1714 PAGE_STATE_WIRED | pageAllocFlags); 1715 cache->InsertPage(page, offset); 1716 map_page(area, page, address, protection, &reservation); 1717 1718 DEBUG_PAGE_ACCESS_END(page); 1719 } 1720 1721 break; 1722 } 1723 1724 case B_ALREADY_WIRED: 1725 { 1726 // The pages should already be mapped. This is only really useful 1727 // during boot time. Find the appropriate vm_page objects and stick 1728 // them in the cache object. 1729 VMTranslationMap* map = addressSpace->TranslationMap(); 1730 off_t offset = 0; 1731 1732 if (!gKernelStartup) 1733 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1734 1735 map->Lock(); 1736 1737 for (addr_t virtualAddress = area->Base(); 1738 virtualAddress < area->Base() + (area->Size() - 1); 1739 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1740 phys_addr_t physicalAddress; 1741 uint32 flags; 1742 status = map->Query(virtualAddress, &physicalAddress, &flags); 1743 if (status < B_OK) { 1744 panic("looking up mapping failed for va 0x%lx\n", 1745 virtualAddress); 1746 } 1747 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1748 if (page == NULL) { 1749 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1750 "\n", physicalAddress); 1751 } 1752 1753 DEBUG_PAGE_ACCESS_START(page); 1754 1755 cache->InsertPage(page, offset); 1756 increment_page_wired_count(page); 1757 vm_page_set_state(page, PAGE_STATE_WIRED); 1758 page->busy = false; 1759 1760 DEBUG_PAGE_ACCESS_END(page); 1761 } 1762 1763 map->Unlock(); 1764 break; 1765 } 1766 1767 case B_CONTIGUOUS: 1768 { 1769 // We have already allocated our continuous pages run, so we can now 1770 // just map them in the address space 1771 VMTranslationMap* map = addressSpace->TranslationMap(); 1772 phys_addr_t physicalAddress 1773 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1774 addr_t virtualAddress = area->Base(); 1775 off_t offset = 0; 1776 1777 map->Lock(); 1778 1779 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1780 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1781 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1782 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1783 if (page == NULL) 1784 panic("couldn't lookup physical page just allocated\n"); 1785 1786 status = map->Map(virtualAddress, physicalAddress, protection, 1787 area->MemoryType(), &reservation); 1788 if (status < B_OK) 1789 panic("couldn't map physical page in page run\n"); 1790 1791 cache->InsertPage(page, offset); 1792 increment_page_wired_count(page); 1793 1794 DEBUG_PAGE_ACCESS_END(page); 1795 } 1796 1797 map->Unlock(); 1798 break; 1799 } 1800 1801 default: 1802 break; 1803 } 1804 1805 cache->Unlock(); 1806 1807 if (reservedPages > 0) 1808 vm_page_unreserve_pages(&reservation); 1809 1810 TRACE(("vm_create_anonymous_area: done\n")); 1811 1812 area->cache_type = CACHE_TYPE_RAM; 1813 return area->id; 1814 1815 err1: 1816 if (wiring == B_CONTIGUOUS) { 1817 // we had reserved the area space upfront... 1818 phys_addr_t pageNumber = page->physical_page_number; 1819 int32 i; 1820 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1821 page = vm_lookup_page(pageNumber); 1822 if (page == NULL) 1823 panic("couldn't lookup physical page just allocated\n"); 1824 1825 vm_page_set_state(page, PAGE_STATE_FREE); 1826 } 1827 } 1828 1829 err0: 1830 if (reservedPages > 0) 1831 vm_page_unreserve_pages(&reservation); 1832 if (reservedMemory > 0) 1833 vm_unreserve_memory(reservedMemory); 1834 1835 return status; 1836 } 1837 1838 1839 area_id 1840 vm_map_physical_memory(team_id team, const char* name, void** _address, 1841 uint32 addressSpec, addr_t size, uint32 protection, 1842 phys_addr_t physicalAddress, bool alreadyWired) 1843 { 1844 VMArea* area; 1845 VMCache* cache; 1846 addr_t mapOffset; 1847 1848 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1849 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1850 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1851 addressSpec, size, protection, physicalAddress)); 1852 1853 if (!arch_vm_supports_protection(protection)) 1854 return B_NOT_SUPPORTED; 1855 1856 AddressSpaceWriteLocker locker(team); 1857 if (!locker.IsLocked()) 1858 return B_BAD_TEAM_ID; 1859 1860 // if the physical address is somewhat inside a page, 1861 // move the actual area down to align on a page boundary 1862 mapOffset = physicalAddress % B_PAGE_SIZE; 1863 size += mapOffset; 1864 physicalAddress -= mapOffset; 1865 1866 size = PAGE_ALIGN(size); 1867 1868 // create a device cache 1869 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1870 if (status != B_OK) 1871 return status; 1872 1873 cache->virtual_end = size; 1874 1875 cache->Lock(); 1876 1877 virtual_address_restrictions addressRestrictions = {}; 1878 addressRestrictions.address = *_address; 1879 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1880 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1881 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1882 true, &area, _address); 1883 1884 if (status < B_OK) 1885 cache->ReleaseRefLocked(); 1886 1887 cache->Unlock(); 1888 1889 if (status == B_OK) { 1890 // set requested memory type -- use uncached, if not given 1891 uint32 memoryType = addressSpec & B_MTR_MASK; 1892 if (memoryType == 0) 1893 memoryType = B_MTR_UC; 1894 1895 area->SetMemoryType(memoryType); 1896 1897 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1898 if (status != B_OK) 1899 delete_area(locker.AddressSpace(), area, false); 1900 } 1901 1902 if (status != B_OK) 1903 return status; 1904 1905 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1906 1907 if (alreadyWired) { 1908 // The area is already mapped, but possibly not with the right 1909 // memory type. 1910 map->Lock(); 1911 map->ProtectArea(area, area->protection); 1912 map->Unlock(); 1913 } else { 1914 // Map the area completely. 1915 1916 // reserve pages needed for the mapping 1917 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1918 area->Base() + (size - 1)); 1919 vm_page_reservation reservation; 1920 vm_page_reserve_pages(&reservation, reservePages, 1921 team == VMAddressSpace::KernelID() 1922 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1923 1924 map->Lock(); 1925 1926 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1927 map->Map(area->Base() + offset, physicalAddress + offset, 1928 protection, area->MemoryType(), &reservation); 1929 } 1930 1931 map->Unlock(); 1932 1933 vm_page_unreserve_pages(&reservation); 1934 } 1935 1936 // modify the pointer returned to be offset back into the new area 1937 // the same way the physical address in was offset 1938 *_address = (void*)((addr_t)*_address + mapOffset); 1939 1940 area->cache_type = CACHE_TYPE_DEVICE; 1941 return area->id; 1942 } 1943 1944 1945 /*! Don't use! 1946 TODO: This function was introduced to map physical page vecs to 1947 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1948 use a device cache and does not track vm_page::wired_count! 1949 */ 1950 area_id 1951 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1952 uint32 addressSpec, addr_t* _size, uint32 protection, 1953 struct generic_io_vec* vecs, uint32 vecCount) 1954 { 1955 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1956 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1957 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1958 addressSpec, _size, protection, vecs, vecCount)); 1959 1960 if (!arch_vm_supports_protection(protection) 1961 || (addressSpec & B_MTR_MASK) != 0) { 1962 return B_NOT_SUPPORTED; 1963 } 1964 1965 AddressSpaceWriteLocker locker(team); 1966 if (!locker.IsLocked()) 1967 return B_BAD_TEAM_ID; 1968 1969 if (vecCount == 0) 1970 return B_BAD_VALUE; 1971 1972 addr_t size = 0; 1973 for (uint32 i = 0; i < vecCount; i++) { 1974 if (vecs[i].base % B_PAGE_SIZE != 0 1975 || vecs[i].length % B_PAGE_SIZE != 0) { 1976 return B_BAD_VALUE; 1977 } 1978 1979 size += vecs[i].length; 1980 } 1981 1982 // create a device cache 1983 VMCache* cache; 1984 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1985 if (result != B_OK) 1986 return result; 1987 1988 cache->virtual_end = size; 1989 1990 cache->Lock(); 1991 1992 VMArea* area; 1993 virtual_address_restrictions addressRestrictions = {}; 1994 addressRestrictions.address = *_address; 1995 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1996 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1997 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1998 &addressRestrictions, true, &area, _address); 1999 2000 if (result != B_OK) 2001 cache->ReleaseRefLocked(); 2002 2003 cache->Unlock(); 2004 2005 if (result != B_OK) 2006 return result; 2007 2008 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2009 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 2010 area->Base() + (size - 1)); 2011 2012 vm_page_reservation reservation; 2013 vm_page_reserve_pages(&reservation, reservePages, 2014 team == VMAddressSpace::KernelID() 2015 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2016 map->Lock(); 2017 2018 uint32 vecIndex = 0; 2019 size_t vecOffset = 0; 2020 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 2021 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 2022 vecOffset = 0; 2023 vecIndex++; 2024 } 2025 2026 if (vecIndex >= vecCount) 2027 break; 2028 2029 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 2030 protection, area->MemoryType(), &reservation); 2031 2032 vecOffset += B_PAGE_SIZE; 2033 } 2034 2035 map->Unlock(); 2036 vm_page_unreserve_pages(&reservation); 2037 2038 if (_size != NULL) 2039 *_size = size; 2040 2041 area->cache_type = CACHE_TYPE_DEVICE; 2042 return area->id; 2043 } 2044 2045 2046 area_id 2047 vm_create_null_area(team_id team, const char* name, void** address, 2048 uint32 addressSpec, addr_t size, uint32 flags) 2049 { 2050 size = PAGE_ALIGN(size); 2051 2052 // Lock the address space and, if B_EXACT_ADDRESS and 2053 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 2054 // is not wired. 2055 AddressSpaceWriteLocker locker; 2056 do { 2057 if (locker.SetTo(team) != B_OK) 2058 return B_BAD_TEAM_ID; 2059 } while (addressSpec == B_EXACT_ADDRESS 2060 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 2061 && wait_if_address_range_is_wired(locker.AddressSpace(), 2062 (addr_t)*address, size, &locker)); 2063 2064 // create a null cache 2065 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 2066 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 2067 VMCache* cache; 2068 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 2069 if (status != B_OK) 2070 return status; 2071 2072 cache->temporary = 1; 2073 cache->virtual_end = size; 2074 2075 cache->Lock(); 2076 2077 VMArea* area; 2078 virtual_address_restrictions addressRestrictions = {}; 2079 addressRestrictions.address = *address; 2080 addressRestrictions.address_specification = addressSpec; 2081 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 2082 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 2083 REGION_NO_PRIVATE_MAP, flags, 2084 &addressRestrictions, true, &area, address); 2085 2086 if (status < B_OK) { 2087 cache->ReleaseRefAndUnlock(); 2088 return status; 2089 } 2090 2091 cache->Unlock(); 2092 2093 area->cache_type = CACHE_TYPE_NULL; 2094 return area->id; 2095 } 2096 2097 2098 /*! Creates the vnode cache for the specified \a vnode. 2099 The vnode has to be marked busy when calling this function. 2100 */ 2101 status_t 2102 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 2103 { 2104 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 2105 } 2106 2107 2108 /*! \a cache must be locked. The area's address space must be read-locked. 2109 */ 2110 static void 2111 pre_map_area_pages(VMArea* area, VMCache* cache, 2112 vm_page_reservation* reservation) 2113 { 2114 addr_t baseAddress = area->Base(); 2115 addr_t cacheOffset = area->cache_offset; 2116 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 2117 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 2118 2119 for (VMCachePagesTree::Iterator it 2120 = cache->pages.GetIterator(firstPage, true, true); 2121 vm_page* page = it.Next();) { 2122 if (page->cache_offset >= endPage) 2123 break; 2124 2125 // skip busy and inactive pages 2126 if (page->busy || page->usage_count == 0) 2127 continue; 2128 2129 DEBUG_PAGE_ACCESS_START(page); 2130 map_page(area, page, 2131 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 2132 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 2133 DEBUG_PAGE_ACCESS_END(page); 2134 } 2135 } 2136 2137 2138 /*! Will map the file specified by \a fd to an area in memory. 2139 The file will be mirrored beginning at the specified \a offset. The 2140 \a offset and \a size arguments have to be page aligned. 2141 */ 2142 static area_id 2143 _vm_map_file(team_id team, const char* name, void** _address, 2144 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2145 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2146 { 2147 // TODO: for binary files, we want to make sure that they get the 2148 // copy of a file at a given time, ie. later changes should not 2149 // make it into the mapped copy -- this will need quite some changes 2150 // to be done in a nice way 2151 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2152 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2153 2154 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2155 size = PAGE_ALIGN(size); 2156 2157 if (mapping == REGION_NO_PRIVATE_MAP) 2158 protection |= B_SHARED_AREA; 2159 if (addressSpec != B_EXACT_ADDRESS) 2160 unmapAddressRange = false; 2161 2162 if (fd < 0) { 2163 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2164 virtual_address_restrictions virtualRestrictions = {}; 2165 virtualRestrictions.address = *_address; 2166 virtualRestrictions.address_specification = addressSpec; 2167 physical_address_restrictions physicalRestrictions = {}; 2168 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2169 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2170 _address); 2171 } 2172 2173 // get the open flags of the FD 2174 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2175 if (descriptor == NULL) 2176 return EBADF; 2177 int32 openMode = descriptor->open_mode; 2178 put_fd(descriptor); 2179 2180 // The FD must open for reading at any rate. For shared mapping with write 2181 // access, additionally the FD must be open for writing. 2182 if ((openMode & O_ACCMODE) == O_WRONLY 2183 || (mapping == REGION_NO_PRIVATE_MAP 2184 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2185 && (openMode & O_ACCMODE) == O_RDONLY)) { 2186 return EACCES; 2187 } 2188 2189 uint32 protectionMax = 0; 2190 if (mapping != REGION_PRIVATE_MAP) { 2191 if ((openMode & O_ACCMODE) == O_RDWR) 2192 protectionMax = protection | B_USER_PROTECTION; 2193 else 2194 protectionMax = protection | (B_USER_PROTECTION & ~B_WRITE_AREA); 2195 } 2196 2197 // get the vnode for the object, this also grabs a ref to it 2198 struct vnode* vnode = NULL; 2199 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2200 if (status < B_OK) 2201 return status; 2202 VnodePutter vnodePutter(vnode); 2203 2204 // If we're going to pre-map pages, we need to reserve the pages needed by 2205 // the mapping backend upfront. 2206 page_num_t reservedPreMapPages = 0; 2207 vm_page_reservation reservation; 2208 if ((protection & B_READ_AREA) != 0) { 2209 AddressSpaceWriteLocker locker; 2210 status = locker.SetTo(team); 2211 if (status != B_OK) 2212 return status; 2213 2214 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2215 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2216 2217 locker.Unlock(); 2218 2219 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2220 team == VMAddressSpace::KernelID() 2221 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2222 } 2223 2224 struct PageUnreserver { 2225 PageUnreserver(vm_page_reservation* reservation) 2226 : 2227 fReservation(reservation) 2228 { 2229 } 2230 2231 ~PageUnreserver() 2232 { 2233 if (fReservation != NULL) 2234 vm_page_unreserve_pages(fReservation); 2235 } 2236 2237 vm_page_reservation* fReservation; 2238 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2239 2240 // Lock the address space and, if the specified address range shall be 2241 // unmapped, ensure it is not wired. 2242 AddressSpaceWriteLocker locker; 2243 do { 2244 if (locker.SetTo(team) != B_OK) 2245 return B_BAD_TEAM_ID; 2246 } while (unmapAddressRange 2247 && wait_if_address_range_is_wired(locker.AddressSpace(), 2248 (addr_t)*_address, size, &locker)); 2249 2250 // TODO: this only works for file systems that use the file cache 2251 VMCache* cache; 2252 status = vfs_get_vnode_cache(vnode, &cache, false); 2253 if (status < B_OK) 2254 return status; 2255 2256 cache->Lock(); 2257 2258 VMArea* area; 2259 virtual_address_restrictions addressRestrictions = {}; 2260 addressRestrictions.address = *_address; 2261 addressRestrictions.address_specification = addressSpec; 2262 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2263 0, protection, protectionMax, mapping, 2264 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2265 &addressRestrictions, kernel, &area, _address); 2266 2267 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2268 // map_backing_store() cannot know we no longer need the ref 2269 cache->ReleaseRefLocked(); 2270 } 2271 2272 if (status == B_OK && (protection & B_READ_AREA) != 0) 2273 pre_map_area_pages(area, cache, &reservation); 2274 2275 cache->Unlock(); 2276 2277 if (status == B_OK) { 2278 // TODO: this probably deserves a smarter solution, ie. don't always 2279 // prefetch stuff, and also, probably don't trigger it at this place. 2280 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2281 // prefetches at max 10 MB starting from "offset" 2282 } 2283 2284 if (status != B_OK) 2285 return status; 2286 2287 area->cache_type = CACHE_TYPE_VNODE; 2288 return area->id; 2289 } 2290 2291 2292 area_id 2293 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2294 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2295 int fd, off_t offset) 2296 { 2297 if (!arch_vm_supports_protection(protection)) 2298 return B_NOT_SUPPORTED; 2299 2300 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2301 mapping, unmapAddressRange, fd, offset, true); 2302 } 2303 2304 2305 VMCache* 2306 vm_area_get_locked_cache(VMArea* area) 2307 { 2308 rw_lock_read_lock(&sAreaCacheLock); 2309 2310 while (true) { 2311 VMCache* cache = area->cache; 2312 2313 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2314 // cache has been deleted 2315 rw_lock_read_lock(&sAreaCacheLock); 2316 continue; 2317 } 2318 2319 rw_lock_read_lock(&sAreaCacheLock); 2320 2321 if (cache == area->cache) { 2322 cache->AcquireRefLocked(); 2323 rw_lock_read_unlock(&sAreaCacheLock); 2324 return cache; 2325 } 2326 2327 // the cache changed in the meantime 2328 cache->Unlock(); 2329 } 2330 } 2331 2332 2333 void 2334 vm_area_put_locked_cache(VMCache* cache) 2335 { 2336 cache->ReleaseRefAndUnlock(); 2337 } 2338 2339 2340 area_id 2341 vm_clone_area(team_id team, const char* name, void** address, 2342 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2343 bool kernel) 2344 { 2345 VMArea* newArea = NULL; 2346 VMArea* sourceArea; 2347 2348 // Check whether the source area exists and is cloneable. If so, mark it 2349 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2350 { 2351 AddressSpaceWriteLocker locker; 2352 status_t status = locker.SetFromArea(sourceID, sourceArea); 2353 if (status != B_OK) 2354 return status; 2355 2356 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2357 return B_NOT_ALLOWED; 2358 2359 sourceArea->protection |= B_SHARED_AREA; 2360 protection |= B_SHARED_AREA; 2361 } 2362 2363 // Now lock both address spaces and actually do the cloning. 2364 2365 MultiAddressSpaceLocker locker; 2366 VMAddressSpace* sourceAddressSpace; 2367 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2368 if (status != B_OK) 2369 return status; 2370 2371 VMAddressSpace* targetAddressSpace; 2372 status = locker.AddTeam(team, true, &targetAddressSpace); 2373 if (status != B_OK) 2374 return status; 2375 2376 status = locker.Lock(); 2377 if (status != B_OK) 2378 return status; 2379 2380 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2381 if (sourceArea == NULL) 2382 return B_BAD_VALUE; 2383 2384 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2385 return B_NOT_ALLOWED; 2386 2387 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2388 2389 if (!kernel && sourceAddressSpace != targetAddressSpace 2390 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2391 #if KDEBUG 2392 Team* team = thread_get_current_thread()->team; 2393 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2394 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2395 #endif 2396 status = B_NOT_ALLOWED; 2397 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2398 status = B_NOT_ALLOWED; 2399 } else { 2400 virtual_address_restrictions addressRestrictions = {}; 2401 addressRestrictions.address = *address; 2402 addressRestrictions.address_specification = addressSpec; 2403 status = map_backing_store(targetAddressSpace, cache, 2404 sourceArea->cache_offset, name, sourceArea->Size(), 2405 sourceArea->wiring, protection, sourceArea->protection_max, 2406 mapping, 0, &addressRestrictions, 2407 kernel, &newArea, address); 2408 } 2409 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2410 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2411 // to create a new cache, and has therefore already acquired a reference 2412 // to the source cache - but otherwise it has no idea that we need 2413 // one. 2414 cache->AcquireRefLocked(); 2415 } 2416 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2417 // we need to map in everything at this point 2418 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2419 // we don't have actual pages to map but a physical area 2420 VMTranslationMap* map 2421 = sourceArea->address_space->TranslationMap(); 2422 map->Lock(); 2423 2424 phys_addr_t physicalAddress; 2425 uint32 oldProtection; 2426 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2427 2428 map->Unlock(); 2429 2430 map = targetAddressSpace->TranslationMap(); 2431 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2432 newArea->Base() + (newArea->Size() - 1)); 2433 2434 vm_page_reservation reservation; 2435 vm_page_reserve_pages(&reservation, reservePages, 2436 targetAddressSpace == VMAddressSpace::Kernel() 2437 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2438 map->Lock(); 2439 2440 for (addr_t offset = 0; offset < newArea->Size(); 2441 offset += B_PAGE_SIZE) { 2442 map->Map(newArea->Base() + offset, physicalAddress + offset, 2443 protection, newArea->MemoryType(), &reservation); 2444 } 2445 2446 map->Unlock(); 2447 vm_page_unreserve_pages(&reservation); 2448 } else { 2449 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2450 size_t reservePages = map->MaxPagesNeededToMap( 2451 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2452 vm_page_reservation reservation; 2453 vm_page_reserve_pages(&reservation, reservePages, 2454 targetAddressSpace == VMAddressSpace::Kernel() 2455 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2456 2457 // map in all pages from source 2458 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2459 vm_page* page = it.Next();) { 2460 if (!page->busy) { 2461 DEBUG_PAGE_ACCESS_START(page); 2462 map_page(newArea, page, 2463 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2464 - newArea->cache_offset), 2465 protection, &reservation); 2466 DEBUG_PAGE_ACCESS_END(page); 2467 } 2468 } 2469 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2470 // ensuring that! 2471 2472 vm_page_unreserve_pages(&reservation); 2473 } 2474 } 2475 if (status == B_OK) 2476 newArea->cache_type = sourceArea->cache_type; 2477 2478 vm_area_put_locked_cache(cache); 2479 2480 if (status < B_OK) 2481 return status; 2482 2483 return newArea->id; 2484 } 2485 2486 2487 /*! Deletes the specified area of the given address space. 2488 2489 The address space must be write-locked. 2490 The caller must ensure that the area does not have any wired ranges. 2491 2492 \param addressSpace The address space containing the area. 2493 \param area The area to be deleted. 2494 \param deletingAddressSpace \c true, if the address space is in the process 2495 of being deleted. 2496 */ 2497 static void 2498 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2499 bool deletingAddressSpace) 2500 { 2501 ASSERT(!area->IsWired()); 2502 2503 VMAreas::Remove(area); 2504 2505 // At this point the area is removed from the global hash table, but 2506 // still exists in the area list. 2507 2508 // Unmap the virtual address space the area occupied. 2509 { 2510 // We need to lock the complete cache chain. 2511 VMCache* topCache = vm_area_get_locked_cache(area); 2512 VMCacheChainLocker cacheChainLocker(topCache); 2513 cacheChainLocker.LockAllSourceCaches(); 2514 2515 // If the area's top cache is a temporary cache and the area is the only 2516 // one referencing it (besides us currently holding a second reference), 2517 // the unmapping code doesn't need to care about preserving the accessed 2518 // and dirty flags of the top cache page mappings. 2519 bool ignoreTopCachePageFlags 2520 = topCache->temporary && topCache->RefCount() == 2; 2521 2522 area->address_space->TranslationMap()->UnmapArea(area, 2523 deletingAddressSpace, ignoreTopCachePageFlags); 2524 } 2525 2526 if (!area->cache->temporary) 2527 area->cache->WriteModified(); 2528 2529 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2530 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2531 2532 arch_vm_unset_memory_type(area); 2533 addressSpace->RemoveArea(area, allocationFlags); 2534 addressSpace->Put(); 2535 2536 area->cache->RemoveArea(area); 2537 area->cache->ReleaseRef(); 2538 2539 addressSpace->DeleteArea(area, allocationFlags); 2540 } 2541 2542 2543 status_t 2544 vm_delete_area(team_id team, area_id id, bool kernel) 2545 { 2546 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2547 team, id)); 2548 2549 // lock the address space and make sure the area isn't wired 2550 AddressSpaceWriteLocker locker; 2551 VMArea* area; 2552 AreaCacheLocker cacheLocker; 2553 2554 do { 2555 status_t status = locker.SetFromArea(team, id, area); 2556 if (status != B_OK) 2557 return status; 2558 2559 cacheLocker.SetTo(area); 2560 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2561 2562 cacheLocker.Unlock(); 2563 2564 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2565 return B_NOT_ALLOWED; 2566 2567 delete_area(locker.AddressSpace(), area, false); 2568 return B_OK; 2569 } 2570 2571 2572 /*! Creates a new cache on top of given cache, moves all areas from 2573 the old cache to the new one, and changes the protection of all affected 2574 areas' pages to read-only. If requested, wired pages are moved up to the 2575 new cache and copies are added to the old cache in their place. 2576 Preconditions: 2577 - The given cache must be locked. 2578 - All of the cache's areas' address spaces must be read locked. 2579 - Either the cache must not have any wired ranges or a page reservation for 2580 all wired pages must be provided, so they can be copied. 2581 2582 \param lowerCache The cache on top of which a new cache shall be created. 2583 \param wiredPagesReservation If \c NULL there must not be any wired pages 2584 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2585 has wired page. The wired pages are copied in this case. 2586 */ 2587 static status_t 2588 vm_copy_on_write_area(VMCache* lowerCache, 2589 vm_page_reservation* wiredPagesReservation) 2590 { 2591 VMCache* upperCache; 2592 2593 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2594 2595 // We need to separate the cache from its areas. The cache goes one level 2596 // deeper and we create a new cache inbetween. 2597 2598 // create an anonymous cache 2599 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2600 lowerCache->GuardSize() / B_PAGE_SIZE, 2601 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2602 VM_PRIORITY_USER); 2603 if (status != B_OK) 2604 return status; 2605 2606 upperCache->Lock(); 2607 2608 upperCache->temporary = 1; 2609 upperCache->virtual_base = lowerCache->virtual_base; 2610 upperCache->virtual_end = lowerCache->virtual_end; 2611 2612 // transfer the lower cache areas to the upper cache 2613 rw_lock_write_lock(&sAreaCacheLock); 2614 upperCache->TransferAreas(lowerCache); 2615 rw_lock_write_unlock(&sAreaCacheLock); 2616 2617 lowerCache->AddConsumer(upperCache); 2618 2619 // We now need to remap all pages from all of the cache's areas read-only, 2620 // so that a copy will be created on next write access. If there are wired 2621 // pages, we keep their protection, move them to the upper cache and create 2622 // copies for the lower cache. 2623 if (wiredPagesReservation != NULL) { 2624 // We need to handle wired pages -- iterate through the cache's pages. 2625 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2626 vm_page* page = it.Next();) { 2627 if (page->WiredCount() > 0) { 2628 // allocate a new page and copy the wired one 2629 vm_page* copiedPage = vm_page_allocate_page( 2630 wiredPagesReservation, PAGE_STATE_ACTIVE); 2631 2632 vm_memcpy_physical_page( 2633 copiedPage->physical_page_number * B_PAGE_SIZE, 2634 page->physical_page_number * B_PAGE_SIZE); 2635 2636 // move the wired page to the upper cache (note: removing is OK 2637 // with the SplayTree iterator) and insert the copy 2638 upperCache->MovePage(page); 2639 lowerCache->InsertPage(copiedPage, 2640 page->cache_offset * B_PAGE_SIZE); 2641 2642 DEBUG_PAGE_ACCESS_END(copiedPage); 2643 } else { 2644 // Change the protection of this page in all areas. 2645 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2646 tempArea = tempArea->cache_next) { 2647 if (!is_page_in_area(tempArea, page)) 2648 continue; 2649 2650 // The area must be readable in the same way it was 2651 // previously writable. 2652 addr_t address = virtual_page_address(tempArea, page); 2653 uint32 protection = 0; 2654 uint32 pageProtection = get_area_page_protection(tempArea, address); 2655 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2656 protection |= B_KERNEL_READ_AREA; 2657 if ((pageProtection & B_READ_AREA) != 0) 2658 protection |= B_READ_AREA; 2659 2660 VMTranslationMap* map 2661 = tempArea->address_space->TranslationMap(); 2662 map->Lock(); 2663 map->ProtectPage(tempArea, address, protection); 2664 map->Unlock(); 2665 } 2666 } 2667 } 2668 } else { 2669 ASSERT(lowerCache->WiredPagesCount() == 0); 2670 2671 // just change the protection of all areas 2672 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2673 tempArea = tempArea->cache_next) { 2674 if (tempArea->page_protections != NULL) { 2675 // Change the protection of all pages in this area. 2676 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2677 map->Lock(); 2678 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2679 vm_page* page = it.Next();) { 2680 if (!is_page_in_area(tempArea, page)) 2681 continue; 2682 2683 // The area must be readable in the same way it was 2684 // previously writable. 2685 addr_t address = virtual_page_address(tempArea, page); 2686 uint32 protection = 0; 2687 uint32 pageProtection = get_area_page_protection(tempArea, address); 2688 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2689 protection |= B_KERNEL_READ_AREA; 2690 if ((pageProtection & B_READ_AREA) != 0) 2691 protection |= B_READ_AREA; 2692 2693 map->ProtectPage(tempArea, address, protection); 2694 } 2695 map->Unlock(); 2696 continue; 2697 } 2698 // The area must be readable in the same way it was previously 2699 // writable. 2700 uint32 protection = 0; 2701 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0) 2702 protection |= B_KERNEL_READ_AREA; 2703 if ((tempArea->protection & B_READ_AREA) != 0) 2704 protection |= B_READ_AREA; 2705 2706 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2707 map->Lock(); 2708 map->ProtectArea(tempArea, protection); 2709 map->Unlock(); 2710 } 2711 } 2712 2713 vm_area_put_locked_cache(upperCache); 2714 2715 return B_OK; 2716 } 2717 2718 2719 area_id 2720 vm_copy_area(team_id team, const char* name, void** _address, 2721 uint32 addressSpec, area_id sourceID) 2722 { 2723 // Do the locking: target address space, all address spaces associated with 2724 // the source cache, and the cache itself. 2725 MultiAddressSpaceLocker locker; 2726 VMAddressSpace* targetAddressSpace; 2727 VMCache* cache; 2728 VMArea* source; 2729 AreaCacheLocker cacheLocker; 2730 status_t status; 2731 bool sharedArea; 2732 2733 page_num_t wiredPages = 0; 2734 vm_page_reservation wiredPagesReservation; 2735 2736 bool restart; 2737 do { 2738 restart = false; 2739 2740 locker.Unset(); 2741 status = locker.AddTeam(team, true, &targetAddressSpace); 2742 if (status == B_OK) { 2743 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2744 &cache); 2745 } 2746 if (status != B_OK) 2747 return status; 2748 2749 cacheLocker.SetTo(cache, true); // already locked 2750 2751 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2752 2753 page_num_t oldWiredPages = wiredPages; 2754 wiredPages = 0; 2755 2756 // If the source area isn't shared, count the number of wired pages in 2757 // the cache and reserve as many pages. 2758 if (!sharedArea) { 2759 wiredPages = cache->WiredPagesCount(); 2760 2761 if (wiredPages > oldWiredPages) { 2762 cacheLocker.Unlock(); 2763 locker.Unlock(); 2764 2765 if (oldWiredPages > 0) 2766 vm_page_unreserve_pages(&wiredPagesReservation); 2767 2768 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2769 VM_PRIORITY_USER); 2770 2771 restart = true; 2772 } 2773 } else if (oldWiredPages > 0) 2774 vm_page_unreserve_pages(&wiredPagesReservation); 2775 } while (restart); 2776 2777 // unreserve pages later 2778 struct PagesUnreserver { 2779 PagesUnreserver(vm_page_reservation* reservation) 2780 : 2781 fReservation(reservation) 2782 { 2783 } 2784 2785 ~PagesUnreserver() 2786 { 2787 if (fReservation != NULL) 2788 vm_page_unreserve_pages(fReservation); 2789 } 2790 2791 private: 2792 vm_page_reservation* fReservation; 2793 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2794 2795 bool writableCopy 2796 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2797 uint8* targetPageProtections = NULL; 2798 2799 if (source->page_protections != NULL) { 2800 size_t bytes = area_page_protections_size(source->Size()); 2801 targetPageProtections = (uint8*)malloc_etc(bytes, 2802 (source->address_space == VMAddressSpace::Kernel() 2803 || targetAddressSpace == VMAddressSpace::Kernel()) 2804 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2805 if (targetPageProtections == NULL) 2806 return B_NO_MEMORY; 2807 2808 memcpy(targetPageProtections, source->page_protections, bytes); 2809 2810 if (!writableCopy) { 2811 for (size_t i = 0; i < bytes; i++) { 2812 if ((targetPageProtections[i] 2813 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2814 writableCopy = true; 2815 break; 2816 } 2817 } 2818 } 2819 } 2820 2821 if (addressSpec == B_CLONE_ADDRESS) { 2822 addressSpec = B_EXACT_ADDRESS; 2823 *_address = (void*)source->Base(); 2824 } 2825 2826 // First, create a cache on top of the source area, respectively use the 2827 // existing one, if this is a shared area. 2828 2829 VMArea* target; 2830 virtual_address_restrictions addressRestrictions = {}; 2831 addressRestrictions.address = *_address; 2832 addressRestrictions.address_specification = addressSpec; 2833 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2834 name, source->Size(), source->wiring, source->protection, 2835 source->protection_max, 2836 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2837 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2838 &addressRestrictions, true, &target, _address); 2839 if (status < B_OK) { 2840 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2841 return status; 2842 } 2843 2844 if (targetPageProtections != NULL) 2845 target->page_protections = targetPageProtections; 2846 2847 if (sharedArea) { 2848 // The new area uses the old area's cache, but map_backing_store() 2849 // hasn't acquired a ref. So we have to do that now. 2850 cache->AcquireRefLocked(); 2851 } 2852 2853 // If the source area is writable, we need to move it one layer up as well 2854 2855 if (!sharedArea) { 2856 if (writableCopy) { 2857 // TODO: do something more useful if this fails! 2858 if (vm_copy_on_write_area(cache, 2859 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2860 panic("vm_copy_on_write_area() failed!\n"); 2861 } 2862 } 2863 } 2864 2865 // we return the ID of the newly created area 2866 return target->id; 2867 } 2868 2869 2870 status_t 2871 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2872 bool kernel) 2873 { 2874 fix_protection(&newProtection); 2875 2876 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2877 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2878 2879 if (!arch_vm_supports_protection(newProtection)) 2880 return B_NOT_SUPPORTED; 2881 2882 bool becomesWritable 2883 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2884 2885 // lock address spaces and cache 2886 MultiAddressSpaceLocker locker; 2887 VMCache* cache; 2888 VMArea* area; 2889 status_t status; 2890 AreaCacheLocker cacheLocker; 2891 bool isWritable; 2892 2893 bool restart; 2894 do { 2895 restart = false; 2896 2897 locker.Unset(); 2898 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2899 if (status != B_OK) 2900 return status; 2901 2902 cacheLocker.SetTo(cache, true); // already locked 2903 2904 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2905 || (area->protection & B_KERNEL_AREA) != 0)) { 2906 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2907 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2908 " (%s)\n", team, newProtection, areaID, area->name); 2909 return B_NOT_ALLOWED; 2910 } 2911 if (!kernel && area->protection_max != 0 2912 && (newProtection & area->protection_max) 2913 != (newProtection & B_USER_PROTECTION)) { 2914 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2915 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2916 "area %" B_PRId32 " (%s)\n", team, newProtection, 2917 area->protection_max, areaID, area->name); 2918 return B_NOT_ALLOWED; 2919 } 2920 2921 if (team != VMAddressSpace::KernelID() 2922 && area->address_space->ID() != team) { 2923 // unless you're the kernel, you are only allowed to set 2924 // the protection of your own areas 2925 return B_NOT_ALLOWED; 2926 } 2927 2928 if (area->protection == newProtection) 2929 return B_OK; 2930 2931 isWritable 2932 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2933 2934 // Make sure the area (respectively, if we're going to call 2935 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2936 // wired ranges. 2937 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2938 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2939 otherArea = otherArea->cache_next) { 2940 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2941 restart = true; 2942 break; 2943 } 2944 } 2945 } else { 2946 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2947 restart = true; 2948 } 2949 } while (restart); 2950 2951 bool changePageProtection = true; 2952 bool changeTopCachePagesOnly = false; 2953 2954 if (isWritable && !becomesWritable) { 2955 // writable -> !writable 2956 2957 if (cache->source != NULL && cache->temporary) { 2958 if (cache->CountWritableAreas(area) == 0) { 2959 // Since this cache now lives from the pages in its source cache, 2960 // we can change the cache's commitment to take only those pages 2961 // into account that really are in this cache. 2962 2963 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2964 team == VMAddressSpace::KernelID() 2965 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2966 2967 // TODO: we may be able to join with our source cache, if 2968 // count == 0 2969 } 2970 } 2971 2972 // If only the writability changes, we can just remap the pages of the 2973 // top cache, since the pages of lower caches are mapped read-only 2974 // anyway. That's advantageous only, if the number of pages in the cache 2975 // is significantly smaller than the number of pages in the area, 2976 // though. 2977 if (newProtection 2978 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2979 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2980 changeTopCachePagesOnly = true; 2981 } 2982 } else if (!isWritable && becomesWritable) { 2983 // !writable -> writable 2984 2985 if (!cache->consumers.IsEmpty()) { 2986 // There are consumers -- we have to insert a new cache. Fortunately 2987 // vm_copy_on_write_area() does everything that's needed. 2988 changePageProtection = false; 2989 status = vm_copy_on_write_area(cache, NULL); 2990 } else { 2991 // No consumers, so we don't need to insert a new one. 2992 if (cache->source != NULL && cache->temporary) { 2993 // the cache's commitment must contain all possible pages 2994 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2995 team == VMAddressSpace::KernelID() 2996 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2997 } 2998 2999 if (status == B_OK && cache->source != NULL) { 3000 // There's a source cache, hence we can't just change all pages' 3001 // protection or we might allow writing into pages belonging to 3002 // a lower cache. 3003 changeTopCachePagesOnly = true; 3004 } 3005 } 3006 } else { 3007 // we don't have anything special to do in all other cases 3008 } 3009 3010 if (status == B_OK) { 3011 // remap existing pages in this cache 3012 if (changePageProtection) { 3013 VMTranslationMap* map = area->address_space->TranslationMap(); 3014 map->Lock(); 3015 3016 if (changeTopCachePagesOnly) { 3017 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 3018 page_num_t lastPageOffset 3019 = firstPageOffset + area->Size() / B_PAGE_SIZE; 3020 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 3021 vm_page* page = it.Next();) { 3022 if (page->cache_offset >= firstPageOffset 3023 && page->cache_offset <= lastPageOffset) { 3024 addr_t address = virtual_page_address(area, page); 3025 map->ProtectPage(area, address, newProtection); 3026 } 3027 } 3028 } else 3029 map->ProtectArea(area, newProtection); 3030 3031 map->Unlock(); 3032 } 3033 3034 area->protection = newProtection; 3035 } 3036 3037 return status; 3038 } 3039 3040 3041 status_t 3042 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 3043 { 3044 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 3045 if (addressSpace == NULL) 3046 return B_BAD_TEAM_ID; 3047 3048 VMTranslationMap* map = addressSpace->TranslationMap(); 3049 3050 map->Lock(); 3051 uint32 dummyFlags; 3052 status_t status = map->Query(vaddr, paddr, &dummyFlags); 3053 map->Unlock(); 3054 3055 addressSpace->Put(); 3056 return status; 3057 } 3058 3059 3060 /*! The page's cache must be locked. 3061 */ 3062 bool 3063 vm_test_map_modification(vm_page* page) 3064 { 3065 if (page->modified) 3066 return true; 3067 3068 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3069 vm_page_mapping* mapping; 3070 while ((mapping = iterator.Next()) != NULL) { 3071 VMArea* area = mapping->area; 3072 VMTranslationMap* map = area->address_space->TranslationMap(); 3073 3074 phys_addr_t physicalAddress; 3075 uint32 flags; 3076 map->Lock(); 3077 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 3078 map->Unlock(); 3079 3080 if ((flags & PAGE_MODIFIED) != 0) 3081 return true; 3082 } 3083 3084 return false; 3085 } 3086 3087 3088 /*! The page's cache must be locked. 3089 */ 3090 void 3091 vm_clear_map_flags(vm_page* page, uint32 flags) 3092 { 3093 if ((flags & PAGE_ACCESSED) != 0) 3094 page->accessed = false; 3095 if ((flags & PAGE_MODIFIED) != 0) 3096 page->modified = false; 3097 3098 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3099 vm_page_mapping* mapping; 3100 while ((mapping = iterator.Next()) != NULL) { 3101 VMArea* area = mapping->area; 3102 VMTranslationMap* map = area->address_space->TranslationMap(); 3103 3104 map->Lock(); 3105 map->ClearFlags(virtual_page_address(area, page), flags); 3106 map->Unlock(); 3107 } 3108 } 3109 3110 3111 /*! Removes all mappings from a page. 3112 After you've called this function, the page is unmapped from memory and 3113 the page's \c accessed and \c modified flags have been updated according 3114 to the state of the mappings. 3115 The page's cache must be locked. 3116 */ 3117 void 3118 vm_remove_all_page_mappings(vm_page* page) 3119 { 3120 while (vm_page_mapping* mapping = page->mappings.Head()) { 3121 VMArea* area = mapping->area; 3122 VMTranslationMap* map = area->address_space->TranslationMap(); 3123 addr_t address = virtual_page_address(area, page); 3124 map->UnmapPage(area, address, false); 3125 } 3126 } 3127 3128 3129 int32 3130 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 3131 { 3132 int32 count = 0; 3133 3134 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 3135 vm_page_mapping* mapping; 3136 while ((mapping = iterator.Next()) != NULL) { 3137 VMArea* area = mapping->area; 3138 VMTranslationMap* map = area->address_space->TranslationMap(); 3139 3140 bool modified; 3141 if (map->ClearAccessedAndModified(area, 3142 virtual_page_address(area, page), false, modified)) { 3143 count++; 3144 } 3145 3146 page->modified |= modified; 3147 } 3148 3149 3150 if (page->accessed) { 3151 count++; 3152 page->accessed = false; 3153 } 3154 3155 return count; 3156 } 3157 3158 3159 /*! Removes all mappings of a page and/or clears the accessed bits of the 3160 mappings. 3161 The function iterates through the page mappings and removes them until 3162 encountering one that has been accessed. From then on it will continue to 3163 iterate, but only clear the accessed flag of the mapping. The page's 3164 \c modified bit will be updated accordingly, the \c accessed bit will be 3165 cleared. 3166 \return The number of mapping accessed bits encountered, including the 3167 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3168 of the page have been removed. 3169 */ 3170 int32 3171 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3172 { 3173 ASSERT(page->WiredCount() == 0); 3174 3175 if (page->accessed) 3176 return vm_clear_page_mapping_accessed_flags(page); 3177 3178 while (vm_page_mapping* mapping = page->mappings.Head()) { 3179 VMArea* area = mapping->area; 3180 VMTranslationMap* map = area->address_space->TranslationMap(); 3181 addr_t address = virtual_page_address(area, page); 3182 bool modified = false; 3183 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3184 page->accessed = true; 3185 page->modified |= modified; 3186 return vm_clear_page_mapping_accessed_flags(page); 3187 } 3188 page->modified |= modified; 3189 } 3190 3191 return 0; 3192 } 3193 3194 3195 static int 3196 display_mem(int argc, char** argv) 3197 { 3198 bool physical = false; 3199 addr_t copyAddress; 3200 int32 displayWidth; 3201 int32 itemSize; 3202 int32 num = -1; 3203 addr_t address; 3204 int i = 1, j; 3205 3206 if (argc > 1 && argv[1][0] == '-') { 3207 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3208 physical = true; 3209 i++; 3210 } else 3211 i = 99; 3212 } 3213 3214 if (argc < i + 1 || argc > i + 2) { 3215 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3216 "\tdl - 8 bytes\n" 3217 "\tdw - 4 bytes\n" 3218 "\tds - 2 bytes\n" 3219 "\tdb - 1 byte\n" 3220 "\tstring - a whole string\n" 3221 " -p or --physical only allows memory from a single page to be " 3222 "displayed.\n"); 3223 return 0; 3224 } 3225 3226 address = parse_expression(argv[i]); 3227 3228 if (argc > i + 1) 3229 num = parse_expression(argv[i + 1]); 3230 3231 // build the format string 3232 if (strcmp(argv[0], "db") == 0) { 3233 itemSize = 1; 3234 displayWidth = 16; 3235 } else if (strcmp(argv[0], "ds") == 0) { 3236 itemSize = 2; 3237 displayWidth = 8; 3238 } else if (strcmp(argv[0], "dw") == 0) { 3239 itemSize = 4; 3240 displayWidth = 4; 3241 } else if (strcmp(argv[0], "dl") == 0) { 3242 itemSize = 8; 3243 displayWidth = 2; 3244 } else if (strcmp(argv[0], "string") == 0) { 3245 itemSize = 1; 3246 displayWidth = -1; 3247 } else { 3248 kprintf("display_mem called in an invalid way!\n"); 3249 return 0; 3250 } 3251 3252 if (num <= 0) 3253 num = displayWidth; 3254 3255 void* physicalPageHandle = NULL; 3256 3257 if (physical) { 3258 int32 offset = address & (B_PAGE_SIZE - 1); 3259 if (num * itemSize + offset > B_PAGE_SIZE) { 3260 num = (B_PAGE_SIZE - offset) / itemSize; 3261 kprintf("NOTE: number of bytes has been cut to page size\n"); 3262 } 3263 3264 address = ROUNDDOWN(address, B_PAGE_SIZE); 3265 3266 if (vm_get_physical_page_debug(address, ©Address, 3267 &physicalPageHandle) != B_OK) { 3268 kprintf("getting the hardware page failed."); 3269 return 0; 3270 } 3271 3272 address += offset; 3273 copyAddress += offset; 3274 } else 3275 copyAddress = address; 3276 3277 if (!strcmp(argv[0], "string")) { 3278 kprintf("%p \"", (char*)copyAddress); 3279 3280 // string mode 3281 for (i = 0; true; i++) { 3282 char c; 3283 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3284 != B_OK 3285 || c == '\0') { 3286 break; 3287 } 3288 3289 if (c == '\n') 3290 kprintf("\\n"); 3291 else if (c == '\t') 3292 kprintf("\\t"); 3293 else { 3294 if (!isprint(c)) 3295 c = '.'; 3296 3297 kprintf("%c", c); 3298 } 3299 } 3300 3301 kprintf("\"\n"); 3302 } else { 3303 // number mode 3304 for (i = 0; i < num; i++) { 3305 uint64 value; 3306 3307 if ((i % displayWidth) == 0) { 3308 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3309 if (i != 0) 3310 kprintf("\n"); 3311 3312 kprintf("[0x%lx] ", address + i * itemSize); 3313 3314 for (j = 0; j < displayed; j++) { 3315 char c; 3316 if (debug_memcpy(B_CURRENT_TEAM, &c, 3317 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3318 displayed = j; 3319 break; 3320 } 3321 if (!isprint(c)) 3322 c = '.'; 3323 3324 kprintf("%c", c); 3325 } 3326 if (num > displayWidth) { 3327 // make sure the spacing in the last line is correct 3328 for (j = displayed; j < displayWidth * itemSize; j++) 3329 kprintf(" "); 3330 } 3331 kprintf(" "); 3332 } 3333 3334 if (debug_memcpy(B_CURRENT_TEAM, &value, 3335 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3336 kprintf("read fault"); 3337 break; 3338 } 3339 3340 switch (itemSize) { 3341 case 1: 3342 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3343 break; 3344 case 2: 3345 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3346 break; 3347 case 4: 3348 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3349 break; 3350 case 8: 3351 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3352 break; 3353 } 3354 } 3355 3356 kprintf("\n"); 3357 } 3358 3359 if (physical) { 3360 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3361 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3362 } 3363 return 0; 3364 } 3365 3366 3367 static void 3368 dump_cache_tree_recursively(VMCache* cache, int level, 3369 VMCache* highlightCache) 3370 { 3371 // print this cache 3372 for (int i = 0; i < level; i++) 3373 kprintf(" "); 3374 if (cache == highlightCache) 3375 kprintf("%p <--\n", cache); 3376 else 3377 kprintf("%p\n", cache); 3378 3379 // recursively print its consumers 3380 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3381 VMCache* consumer = it.Next();) { 3382 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3383 } 3384 } 3385 3386 3387 static int 3388 dump_cache_tree(int argc, char** argv) 3389 { 3390 if (argc != 2 || !strcmp(argv[1], "--help")) { 3391 kprintf("usage: %s <address>\n", argv[0]); 3392 return 0; 3393 } 3394 3395 addr_t address = parse_expression(argv[1]); 3396 if (address == 0) 3397 return 0; 3398 3399 VMCache* cache = (VMCache*)address; 3400 VMCache* root = cache; 3401 3402 // find the root cache (the transitive source) 3403 while (root->source != NULL) 3404 root = root->source; 3405 3406 dump_cache_tree_recursively(root, 0, cache); 3407 3408 return 0; 3409 } 3410 3411 3412 const char* 3413 vm_cache_type_to_string(int32 type) 3414 { 3415 switch (type) { 3416 case CACHE_TYPE_RAM: 3417 return "RAM"; 3418 case CACHE_TYPE_DEVICE: 3419 return "device"; 3420 case CACHE_TYPE_VNODE: 3421 return "vnode"; 3422 case CACHE_TYPE_NULL: 3423 return "null"; 3424 3425 default: 3426 return "unknown"; 3427 } 3428 } 3429 3430 3431 #if DEBUG_CACHE_LIST 3432 3433 static void 3434 update_cache_info_recursively(VMCache* cache, cache_info& info) 3435 { 3436 info.page_count += cache->page_count; 3437 if (cache->type == CACHE_TYPE_RAM) 3438 info.committed += cache->committed_size; 3439 3440 // recurse 3441 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3442 VMCache* consumer = it.Next();) { 3443 update_cache_info_recursively(consumer, info); 3444 } 3445 } 3446 3447 3448 static int 3449 cache_info_compare_page_count(const void* _a, const void* _b) 3450 { 3451 const cache_info* a = (const cache_info*)_a; 3452 const cache_info* b = (const cache_info*)_b; 3453 if (a->page_count == b->page_count) 3454 return 0; 3455 return a->page_count < b->page_count ? 1 : -1; 3456 } 3457 3458 3459 static int 3460 cache_info_compare_committed(const void* _a, const void* _b) 3461 { 3462 const cache_info* a = (const cache_info*)_a; 3463 const cache_info* b = (const cache_info*)_b; 3464 if (a->committed == b->committed) 3465 return 0; 3466 return a->committed < b->committed ? 1 : -1; 3467 } 3468 3469 3470 static void 3471 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3472 { 3473 for (int i = 0; i < level; i++) 3474 kprintf(" "); 3475 3476 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3477 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3478 cache->virtual_base, cache->virtual_end, cache->page_count); 3479 3480 if (level == 0) 3481 kprintf("/%lu", info.page_count); 3482 3483 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3484 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3485 3486 if (level == 0) 3487 kprintf("/%lu", info.committed); 3488 } 3489 3490 // areas 3491 if (cache->areas != NULL) { 3492 VMArea* area = cache->areas; 3493 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3494 area->name, area->address_space->ID()); 3495 3496 while (area->cache_next != NULL) { 3497 area = area->cache_next; 3498 kprintf(", %" B_PRId32, area->id); 3499 } 3500 } 3501 3502 kputs("\n"); 3503 3504 // recurse 3505 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3506 VMCache* consumer = it.Next();) { 3507 dump_caches_recursively(consumer, info, level + 1); 3508 } 3509 } 3510 3511 3512 static int 3513 dump_caches(int argc, char** argv) 3514 { 3515 if (sCacheInfoTable == NULL) { 3516 kprintf("No cache info table!\n"); 3517 return 0; 3518 } 3519 3520 bool sortByPageCount = true; 3521 3522 for (int32 i = 1; i < argc; i++) { 3523 if (strcmp(argv[i], "-c") == 0) { 3524 sortByPageCount = false; 3525 } else { 3526 print_debugger_command_usage(argv[0]); 3527 return 0; 3528 } 3529 } 3530 3531 uint32 totalCount = 0; 3532 uint32 rootCount = 0; 3533 off_t totalCommitted = 0; 3534 page_num_t totalPages = 0; 3535 3536 VMCache* cache = gDebugCacheList; 3537 while (cache) { 3538 totalCount++; 3539 if (cache->source == NULL) { 3540 cache_info stackInfo; 3541 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3542 ? sCacheInfoTable[rootCount] : stackInfo; 3543 rootCount++; 3544 info.cache = cache; 3545 info.page_count = 0; 3546 info.committed = 0; 3547 update_cache_info_recursively(cache, info); 3548 totalCommitted += info.committed; 3549 totalPages += info.page_count; 3550 } 3551 3552 cache = cache->debug_next; 3553 } 3554 3555 if (rootCount <= (uint32)kCacheInfoTableCount) { 3556 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3557 sortByPageCount 3558 ? &cache_info_compare_page_count 3559 : &cache_info_compare_committed); 3560 } 3561 3562 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3563 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3564 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3565 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3566 "page count" : "committed size"); 3567 3568 if (rootCount <= (uint32)kCacheInfoTableCount) { 3569 for (uint32 i = 0; i < rootCount; i++) { 3570 cache_info& info = sCacheInfoTable[i]; 3571 dump_caches_recursively(info.cache, info, 0); 3572 } 3573 } else 3574 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3575 3576 return 0; 3577 } 3578 3579 #endif // DEBUG_CACHE_LIST 3580 3581 3582 static int 3583 dump_cache(int argc, char** argv) 3584 { 3585 VMCache* cache; 3586 bool showPages = false; 3587 int i = 1; 3588 3589 if (argc < 2 || !strcmp(argv[1], "--help")) { 3590 kprintf("usage: %s [-ps] <address>\n" 3591 " if -p is specified, all pages are shown, if -s is used\n" 3592 " only the cache info is shown respectively.\n", argv[0]); 3593 return 0; 3594 } 3595 while (argv[i][0] == '-') { 3596 char* arg = argv[i] + 1; 3597 while (arg[0]) { 3598 if (arg[0] == 'p') 3599 showPages = true; 3600 arg++; 3601 } 3602 i++; 3603 } 3604 if (argv[i] == NULL) { 3605 kprintf("%s: invalid argument, pass address\n", argv[0]); 3606 return 0; 3607 } 3608 3609 addr_t address = parse_expression(argv[i]); 3610 if (address == 0) 3611 return 0; 3612 3613 cache = (VMCache*)address; 3614 3615 cache->Dump(showPages); 3616 3617 set_debug_variable("_sourceCache", (addr_t)cache->source); 3618 3619 return 0; 3620 } 3621 3622 3623 static void 3624 dump_area_struct(VMArea* area, bool mappings) 3625 { 3626 kprintf("AREA: %p\n", area); 3627 kprintf("name:\t\t'%s'\n", area->name); 3628 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3629 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3630 kprintf("base:\t\t0x%lx\n", area->Base()); 3631 kprintf("size:\t\t0x%lx\n", area->Size()); 3632 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3633 kprintf("page_protection:%p\n", area->page_protections); 3634 kprintf("wiring:\t\t0x%x\n", area->wiring); 3635 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3636 kprintf("cache:\t\t%p\n", area->cache); 3637 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3638 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3639 kprintf("cache_next:\t%p\n", area->cache_next); 3640 kprintf("cache_prev:\t%p\n", area->cache_prev); 3641 3642 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3643 if (mappings) { 3644 kprintf("page mappings:\n"); 3645 while (iterator.HasNext()) { 3646 vm_page_mapping* mapping = iterator.Next(); 3647 kprintf(" %p", mapping->page); 3648 } 3649 kprintf("\n"); 3650 } else { 3651 uint32 count = 0; 3652 while (iterator.Next() != NULL) { 3653 count++; 3654 } 3655 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3656 } 3657 } 3658 3659 3660 static int 3661 dump_area(int argc, char** argv) 3662 { 3663 bool mappings = false; 3664 bool found = false; 3665 int32 index = 1; 3666 VMArea* area; 3667 addr_t num; 3668 3669 if (argc < 2 || !strcmp(argv[1], "--help")) { 3670 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3671 "All areas matching either id/address/name are listed. You can\n" 3672 "force to check only a specific item by prefixing the specifier\n" 3673 "with the id/contains/address/name keywords.\n" 3674 "-m shows the area's mappings as well.\n"); 3675 return 0; 3676 } 3677 3678 if (!strcmp(argv[1], "-m")) { 3679 mappings = true; 3680 index++; 3681 } 3682 3683 int32 mode = 0xf; 3684 if (!strcmp(argv[index], "id")) 3685 mode = 1; 3686 else if (!strcmp(argv[index], "contains")) 3687 mode = 2; 3688 else if (!strcmp(argv[index], "name")) 3689 mode = 4; 3690 else if (!strcmp(argv[index], "address")) 3691 mode = 0; 3692 if (mode != 0xf) 3693 index++; 3694 3695 if (index >= argc) { 3696 kprintf("No area specifier given.\n"); 3697 return 0; 3698 } 3699 3700 num = parse_expression(argv[index]); 3701 3702 if (mode == 0) { 3703 dump_area_struct((struct VMArea*)num, mappings); 3704 } else { 3705 // walk through the area list, looking for the arguments as a name 3706 3707 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3708 while ((area = it.Next()) != NULL) { 3709 if (((mode & 4) != 0 3710 && !strcmp(argv[index], area->name)) 3711 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3712 || (((mode & 2) != 0 && area->Base() <= num 3713 && area->Base() + area->Size() > num))))) { 3714 dump_area_struct(area, mappings); 3715 found = true; 3716 } 3717 } 3718 3719 if (!found) 3720 kprintf("could not find area %s (%ld)\n", argv[index], num); 3721 } 3722 3723 return 0; 3724 } 3725 3726 3727 static int 3728 dump_area_list(int argc, char** argv) 3729 { 3730 VMArea* area; 3731 const char* name = NULL; 3732 int32 id = 0; 3733 3734 if (argc > 1) { 3735 id = parse_expression(argv[1]); 3736 if (id == 0) 3737 name = argv[1]; 3738 } 3739 3740 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3741 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3742 B_PRINTF_POINTER_WIDTH, "size"); 3743 3744 VMAreasTree::Iterator it = VMAreas::GetIterator(); 3745 while ((area = it.Next()) != NULL) { 3746 if ((id != 0 && area->address_space->ID() != id) 3747 || (name != NULL && strstr(area->name, name) == NULL)) 3748 continue; 3749 3750 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3751 area->id, (void*)area->Base(), (void*)area->Size(), 3752 area->protection, area->wiring, area->name); 3753 } 3754 return 0; 3755 } 3756 3757 3758 static int 3759 dump_available_memory(int argc, char** argv) 3760 { 3761 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3762 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3763 return 0; 3764 } 3765 3766 3767 static int 3768 dump_mapping_info(int argc, char** argv) 3769 { 3770 bool reverseLookup = false; 3771 bool pageLookup = false; 3772 3773 int argi = 1; 3774 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3775 const char* arg = argv[argi]; 3776 if (strcmp(arg, "-r") == 0) { 3777 reverseLookup = true; 3778 } else if (strcmp(arg, "-p") == 0) { 3779 reverseLookup = true; 3780 pageLookup = true; 3781 } else { 3782 print_debugger_command_usage(argv[0]); 3783 return 0; 3784 } 3785 } 3786 3787 // We need at least one argument, the address. Optionally a thread ID can be 3788 // specified. 3789 if (argi >= argc || argi + 2 < argc) { 3790 print_debugger_command_usage(argv[0]); 3791 return 0; 3792 } 3793 3794 uint64 addressValue; 3795 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3796 return 0; 3797 3798 Team* team = NULL; 3799 if (argi < argc) { 3800 uint64 threadID; 3801 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3802 return 0; 3803 3804 Thread* thread = Thread::GetDebug(threadID); 3805 if (thread == NULL) { 3806 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3807 return 0; 3808 } 3809 3810 team = thread->team; 3811 } 3812 3813 if (reverseLookup) { 3814 phys_addr_t physicalAddress; 3815 if (pageLookup) { 3816 vm_page* page = (vm_page*)(addr_t)addressValue; 3817 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3818 } else { 3819 physicalAddress = (phys_addr_t)addressValue; 3820 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3821 } 3822 3823 kprintf(" Team Virtual Address Area\n"); 3824 kprintf("--------------------------------------\n"); 3825 3826 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3827 Callback() 3828 : 3829 fAddressSpace(NULL) 3830 { 3831 } 3832 3833 void SetAddressSpace(VMAddressSpace* addressSpace) 3834 { 3835 fAddressSpace = addressSpace; 3836 } 3837 3838 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3839 { 3840 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3841 virtualAddress); 3842 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3843 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3844 else 3845 kprintf("\n"); 3846 return false; 3847 } 3848 3849 private: 3850 VMAddressSpace* fAddressSpace; 3851 } callback; 3852 3853 if (team != NULL) { 3854 // team specified -- get its address space 3855 VMAddressSpace* addressSpace = team->address_space; 3856 if (addressSpace == NULL) { 3857 kprintf("Failed to get address space!\n"); 3858 return 0; 3859 } 3860 3861 callback.SetAddressSpace(addressSpace); 3862 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3863 physicalAddress, callback); 3864 } else { 3865 // no team specified -- iterate through all address spaces 3866 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3867 addressSpace != NULL; 3868 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3869 callback.SetAddressSpace(addressSpace); 3870 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3871 physicalAddress, callback); 3872 } 3873 } 3874 } else { 3875 // get the address space 3876 addr_t virtualAddress = (addr_t)addressValue; 3877 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3878 VMAddressSpace* addressSpace; 3879 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3880 addressSpace = VMAddressSpace::Kernel(); 3881 } else if (team != NULL) { 3882 addressSpace = team->address_space; 3883 } else { 3884 Thread* thread = debug_get_debugged_thread(); 3885 if (thread == NULL || thread->team == NULL) { 3886 kprintf("Failed to get team!\n"); 3887 return 0; 3888 } 3889 3890 addressSpace = thread->team->address_space; 3891 } 3892 3893 if (addressSpace == NULL) { 3894 kprintf("Failed to get address space!\n"); 3895 return 0; 3896 } 3897 3898 // let the translation map implementation do the job 3899 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3900 } 3901 3902 return 0; 3903 } 3904 3905 3906 /*! Deletes all areas and reserved regions in the given address space. 3907 3908 The caller must ensure that none of the areas has any wired ranges. 3909 3910 \param addressSpace The address space. 3911 \param deletingAddressSpace \c true, if the address space is in the process 3912 of being deleted. 3913 */ 3914 void 3915 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3916 { 3917 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3918 addressSpace->ID())); 3919 3920 addressSpace->WriteLock(); 3921 3922 // remove all reserved areas in this address space 3923 addressSpace->UnreserveAllAddressRanges(0); 3924 3925 // delete all the areas in this address space 3926 while (VMArea* area = addressSpace->FirstArea()) { 3927 ASSERT(!area->IsWired()); 3928 delete_area(addressSpace, area, deletingAddressSpace); 3929 } 3930 3931 addressSpace->WriteUnlock(); 3932 } 3933 3934 3935 static area_id 3936 vm_area_for(addr_t address, bool kernel) 3937 { 3938 team_id team; 3939 if (IS_USER_ADDRESS(address)) { 3940 // we try the user team address space, if any 3941 team = VMAddressSpace::CurrentID(); 3942 if (team < 0) 3943 return team; 3944 } else 3945 team = VMAddressSpace::KernelID(); 3946 3947 AddressSpaceReadLocker locker(team); 3948 if (!locker.IsLocked()) 3949 return B_BAD_TEAM_ID; 3950 3951 VMArea* area = locker.AddressSpace()->LookupArea(address); 3952 if (area != NULL) { 3953 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0 3954 && (area->protection & B_KERNEL_AREA) != 0) 3955 return B_ERROR; 3956 3957 return area->id; 3958 } 3959 3960 return B_ERROR; 3961 } 3962 3963 3964 /*! Frees physical pages that were used during the boot process. 3965 \a end is inclusive. 3966 */ 3967 static void 3968 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3969 { 3970 // free all physical pages in the specified range 3971 3972 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3973 phys_addr_t physicalAddress; 3974 uint32 flags; 3975 3976 if (map->Query(current, &physicalAddress, &flags) == B_OK 3977 && (flags & PAGE_PRESENT) != 0) { 3978 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3979 if (page != NULL && page->State() != PAGE_STATE_FREE 3980 && page->State() != PAGE_STATE_CLEAR 3981 && page->State() != PAGE_STATE_UNUSED) { 3982 DEBUG_PAGE_ACCESS_START(page); 3983 vm_page_set_state(page, PAGE_STATE_FREE); 3984 } 3985 } 3986 } 3987 3988 // unmap the memory 3989 map->Unmap(start, end); 3990 } 3991 3992 3993 void 3994 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3995 { 3996 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3997 addr_t end = start + (size - 1); 3998 addr_t lastEnd = start; 3999 4000 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 4001 (void*)start, (void*)end)); 4002 4003 // The areas are sorted in virtual address space order, so 4004 // we just have to find the holes between them that fall 4005 // into the area we should dispose 4006 4007 map->Lock(); 4008 4009 for (VMAddressSpace::AreaIterator it 4010 = VMAddressSpace::Kernel()->GetAreaIterator(); 4011 VMArea* area = it.Next();) { 4012 addr_t areaStart = area->Base(); 4013 addr_t areaEnd = areaStart + (area->Size() - 1); 4014 4015 if (areaEnd < start) 4016 continue; 4017 4018 if (areaStart > end) { 4019 // we are done, the area is already beyond of what we have to free 4020 break; 4021 } 4022 4023 if (areaStart > lastEnd) { 4024 // this is something we can free 4025 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 4026 (void*)areaStart)); 4027 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 4028 } 4029 4030 if (areaEnd >= end) { 4031 lastEnd = areaEnd; 4032 // no +1 to prevent potential overflow 4033 break; 4034 } 4035 4036 lastEnd = areaEnd + 1; 4037 } 4038 4039 if (lastEnd < end) { 4040 // we can also get rid of some space at the end of the area 4041 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 4042 (void*)end)); 4043 unmap_and_free_physical_pages(map, lastEnd, end); 4044 } 4045 4046 map->Unlock(); 4047 } 4048 4049 4050 static void 4051 create_preloaded_image_areas(struct preloaded_image* _image) 4052 { 4053 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 4054 char name[B_OS_NAME_LENGTH]; 4055 void* address; 4056 int32 length; 4057 4058 // use file name to create a good area name 4059 char* fileName = strrchr(image->name, '/'); 4060 if (fileName == NULL) 4061 fileName = image->name; 4062 else 4063 fileName++; 4064 4065 length = strlen(fileName); 4066 // make sure there is enough space for the suffix 4067 if (length > 25) 4068 length = 25; 4069 4070 memcpy(name, fileName, length); 4071 strcpy(name + length, "_text"); 4072 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 4073 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4074 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 4075 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4076 // this will later be remapped read-only/executable by the 4077 // ELF initialization code 4078 4079 strcpy(name + length, "_data"); 4080 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 4081 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 4082 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 4083 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4084 } 4085 4086 4087 /*! Frees all previously kernel arguments areas from the kernel_args structure. 4088 Any boot loader resources contained in that arguments must not be accessed 4089 anymore past this point. 4090 */ 4091 void 4092 vm_free_kernel_args(kernel_args* args) 4093 { 4094 uint32 i; 4095 4096 TRACE(("vm_free_kernel_args()\n")); 4097 4098 for (i = 0; i < args->num_kernel_args_ranges; i++) { 4099 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 4100 if (area >= B_OK) 4101 delete_area(area); 4102 } 4103 } 4104 4105 4106 static void 4107 allocate_kernel_args(kernel_args* args) 4108 { 4109 TRACE(("allocate_kernel_args()\n")); 4110 4111 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 4112 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 4113 4114 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 4115 args->kernel_args_range[i].size, B_ALREADY_WIRED, 4116 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4117 } 4118 } 4119 4120 4121 static void 4122 unreserve_boot_loader_ranges(kernel_args* args) 4123 { 4124 TRACE(("unreserve_boot_loader_ranges()\n")); 4125 4126 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4127 vm_unreserve_address_range(VMAddressSpace::KernelID(), 4128 (void*)(addr_t)args->virtual_allocated_range[i].start, 4129 args->virtual_allocated_range[i].size); 4130 } 4131 } 4132 4133 4134 static void 4135 reserve_boot_loader_ranges(kernel_args* args) 4136 { 4137 TRACE(("reserve_boot_loader_ranges()\n")); 4138 4139 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 4140 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 4141 4142 // If the address is no kernel address, we just skip it. The 4143 // architecture specific code has to deal with it. 4144 if (!IS_KERNEL_ADDRESS(address)) { 4145 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 4146 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 4147 continue; 4148 } 4149 4150 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 4151 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4152 if (status < B_OK) 4153 panic("could not reserve boot loader ranges\n"); 4154 } 4155 } 4156 4157 4158 static addr_t 4159 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 4160 { 4161 size = PAGE_ALIGN(size); 4162 4163 // find a slot in the virtual allocation addr range 4164 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 4165 // check to see if the space between this one and the last is big enough 4166 addr_t rangeStart = args->virtual_allocated_range[i].start; 4167 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4168 + args->virtual_allocated_range[i - 1].size; 4169 4170 addr_t base = alignment > 0 4171 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4172 4173 if (base >= KERNEL_BASE && base < rangeStart 4174 && rangeStart - base >= size) { 4175 args->virtual_allocated_range[i - 1].size 4176 += base + size - previousRangeEnd; 4177 return base; 4178 } 4179 } 4180 4181 // we hadn't found one between allocation ranges. this is ok. 4182 // see if there's a gap after the last one 4183 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4184 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4185 + args->virtual_allocated_range[lastEntryIndex].size; 4186 addr_t base = alignment > 0 4187 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4188 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4189 args->virtual_allocated_range[lastEntryIndex].size 4190 += base + size - lastRangeEnd; 4191 return base; 4192 } 4193 4194 // see if there's a gap before the first one 4195 addr_t rangeStart = args->virtual_allocated_range[0].start; 4196 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4197 base = rangeStart - size; 4198 if (alignment > 0) 4199 base = ROUNDDOWN(base, alignment); 4200 4201 if (base >= KERNEL_BASE) { 4202 args->virtual_allocated_range[0].start = base; 4203 args->virtual_allocated_range[0].size += rangeStart - base; 4204 return base; 4205 } 4206 } 4207 4208 return 0; 4209 } 4210 4211 4212 static bool 4213 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4214 { 4215 // TODO: horrible brute-force method of determining if the page can be 4216 // allocated 4217 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4218 if (address >= args->physical_memory_range[i].start 4219 && address < args->physical_memory_range[i].start 4220 + args->physical_memory_range[i].size) 4221 return true; 4222 } 4223 return false; 4224 } 4225 4226 4227 page_num_t 4228 vm_allocate_early_physical_page(kernel_args* args) 4229 { 4230 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4231 phys_addr_t nextPage; 4232 4233 nextPage = args->physical_allocated_range[i].start 4234 + args->physical_allocated_range[i].size; 4235 // see if the page after the next allocated paddr run can be allocated 4236 if (i + 1 < args->num_physical_allocated_ranges 4237 && args->physical_allocated_range[i + 1].size != 0) { 4238 // see if the next page will collide with the next allocated range 4239 if (nextPage >= args->physical_allocated_range[i+1].start) 4240 continue; 4241 } 4242 // see if the next physical page fits in the memory block 4243 if (is_page_in_physical_memory_range(args, nextPage)) { 4244 // we got one! 4245 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4246 return nextPage / B_PAGE_SIZE; 4247 } 4248 } 4249 4250 // Expanding upwards didn't work, try going downwards. 4251 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4252 phys_addr_t nextPage; 4253 4254 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4255 // see if the page after the prev allocated paddr run can be allocated 4256 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4257 // see if the next page will collide with the next allocated range 4258 if (nextPage < args->physical_allocated_range[i-1].start 4259 + args->physical_allocated_range[i-1].size) 4260 continue; 4261 } 4262 // see if the next physical page fits in the memory block 4263 if (is_page_in_physical_memory_range(args, nextPage)) { 4264 // we got one! 4265 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4266 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4267 return nextPage / B_PAGE_SIZE; 4268 } 4269 } 4270 4271 return 0; 4272 // could not allocate a block 4273 } 4274 4275 4276 /*! This one uses the kernel_args' physical and virtual memory ranges to 4277 allocate some pages before the VM is completely up. 4278 */ 4279 addr_t 4280 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4281 uint32 attributes, addr_t alignment) 4282 { 4283 if (physicalSize > virtualSize) 4284 physicalSize = virtualSize; 4285 4286 // find the vaddr to allocate at 4287 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4288 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4289 if (virtualBase == 0) { 4290 panic("vm_allocate_early: could not allocate virtual address\n"); 4291 return 0; 4292 } 4293 4294 // map the pages 4295 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4296 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4297 if (physicalAddress == 0) 4298 panic("error allocating early page!\n"); 4299 4300 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4301 4302 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4303 physicalAddress * B_PAGE_SIZE, attributes, 4304 &vm_allocate_early_physical_page); 4305 } 4306 4307 return virtualBase; 4308 } 4309 4310 4311 /*! The main entrance point to initialize the VM. */ 4312 status_t 4313 vm_init(kernel_args* args) 4314 { 4315 struct preloaded_image* image; 4316 void* address; 4317 status_t err = 0; 4318 uint32 i; 4319 4320 TRACE(("vm_init: entry\n")); 4321 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4322 err = arch_vm_init(args); 4323 4324 // initialize some globals 4325 vm_page_init_num_pages(args); 4326 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4327 4328 slab_init(args); 4329 4330 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4331 off_t heapSize = INITIAL_HEAP_SIZE; 4332 // try to accomodate low memory systems 4333 while (heapSize > sAvailableMemory / 8) 4334 heapSize /= 2; 4335 if (heapSize < 1024 * 1024) 4336 panic("vm_init: go buy some RAM please."); 4337 4338 // map in the new heap and initialize it 4339 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4340 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4341 TRACE(("heap at 0x%lx\n", heapBase)); 4342 heap_init(heapBase, heapSize); 4343 #endif 4344 4345 // initialize the free page list and physical page mapper 4346 vm_page_init(args); 4347 4348 // initialize the cache allocators 4349 vm_cache_init(args); 4350 4351 { 4352 status_t error = VMAreas::Init(); 4353 if (error != B_OK) 4354 panic("vm_init: error initializing areas map\n"); 4355 } 4356 4357 VMAddressSpace::Init(); 4358 reserve_boot_loader_ranges(args); 4359 4360 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4361 heap_init_post_area(); 4362 #endif 4363 4364 // Do any further initialization that the architecture dependant layers may 4365 // need now 4366 arch_vm_translation_map_init_post_area(args); 4367 arch_vm_init_post_area(args); 4368 vm_page_init_post_area(args); 4369 slab_init_post_area(); 4370 4371 // allocate areas to represent stuff that already exists 4372 4373 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4374 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4375 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4376 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4377 #endif 4378 4379 allocate_kernel_args(args); 4380 4381 create_preloaded_image_areas(args->kernel_image); 4382 4383 // allocate areas for preloaded images 4384 for (image = args->preloaded_images; image != NULL; image = image->next) 4385 create_preloaded_image_areas(image); 4386 4387 // allocate kernel stacks 4388 for (i = 0; i < args->num_cpus; i++) { 4389 char name[64]; 4390 4391 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4392 address = (void*)args->cpu_kstack[i].start; 4393 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4394 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4395 } 4396 4397 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4398 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4399 4400 #if PARANOID_KERNEL_MALLOC 4401 vm_block_address_range("uninitialized heap memory", 4402 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4403 #endif 4404 #if PARANOID_KERNEL_FREE 4405 vm_block_address_range("freed heap memory", 4406 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4407 #endif 4408 4409 // create the object cache for the page mappings 4410 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4411 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4412 NULL, NULL); 4413 if (gPageMappingsObjectCache == NULL) 4414 panic("failed to create page mappings object cache"); 4415 4416 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4417 4418 #if DEBUG_CACHE_LIST 4419 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4420 virtual_address_restrictions virtualRestrictions = {}; 4421 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4422 physical_address_restrictions physicalRestrictions = {}; 4423 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4424 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4425 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4426 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4427 &physicalRestrictions, (void**)&sCacheInfoTable); 4428 } 4429 #endif // DEBUG_CACHE_LIST 4430 4431 // add some debugger commands 4432 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4433 add_debugger_command("area", &dump_area, 4434 "Dump info about a particular area"); 4435 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4436 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4437 #if DEBUG_CACHE_LIST 4438 if (sCacheInfoTable != NULL) { 4439 add_debugger_command_etc("caches", &dump_caches, 4440 "List all VMCache trees", 4441 "[ \"-c\" ]\n" 4442 "All cache trees are listed sorted in decreasing order by number " 4443 "of\n" 4444 "used pages or, if \"-c\" is specified, by size of committed " 4445 "memory.\n", 4446 0); 4447 } 4448 #endif 4449 add_debugger_command("avail", &dump_available_memory, 4450 "Dump available memory"); 4451 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4452 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4453 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4454 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4455 add_debugger_command("string", &display_mem, "dump strings"); 4456 4457 add_debugger_command_etc("mapping", &dump_mapping_info, 4458 "Print address mapping information", 4459 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4460 "Prints low-level page mapping information for a given address. If\n" 4461 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4462 "address that is looked up in the translation map of the current\n" 4463 "team, respectively the team specified by thread ID <thread ID>. If\n" 4464 "\"-r\" is specified, <address> is a physical address that is\n" 4465 "searched in the translation map of all teams, respectively the team\n" 4466 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4467 "<address> is the address of a vm_page structure. The behavior is\n" 4468 "equivalent to specifying \"-r\" with the physical address of that\n" 4469 "page.\n", 4470 0); 4471 4472 TRACE(("vm_init: exit\n")); 4473 4474 vm_cache_init_post_heap(); 4475 4476 return err; 4477 } 4478 4479 4480 status_t 4481 vm_init_post_sem(kernel_args* args) 4482 { 4483 // This frees all unused boot loader resources and makes its space available 4484 // again 4485 arch_vm_init_end(args); 4486 unreserve_boot_loader_ranges(args); 4487 4488 // fill in all of the semaphores that were not allocated before 4489 // since we're still single threaded and only the kernel address space 4490 // exists, it isn't that hard to find all of the ones we need to create 4491 4492 arch_vm_translation_map_init_post_sem(args); 4493 4494 slab_init_post_sem(); 4495 4496 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4497 heap_init_post_sem(); 4498 #endif 4499 4500 return B_OK; 4501 } 4502 4503 4504 status_t 4505 vm_init_post_thread(kernel_args* args) 4506 { 4507 vm_page_init_post_thread(args); 4508 slab_init_post_thread(); 4509 return heap_init_post_thread(); 4510 } 4511 4512 4513 status_t 4514 vm_init_post_modules(kernel_args* args) 4515 { 4516 return arch_vm_init_post_modules(args); 4517 } 4518 4519 4520 void 4521 permit_page_faults(void) 4522 { 4523 Thread* thread = thread_get_current_thread(); 4524 if (thread != NULL) 4525 atomic_add(&thread->page_faults_allowed, 1); 4526 } 4527 4528 4529 void 4530 forbid_page_faults(void) 4531 { 4532 Thread* thread = thread_get_current_thread(); 4533 if (thread != NULL) 4534 atomic_add(&thread->page_faults_allowed, -1); 4535 } 4536 4537 4538 status_t 4539 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4540 bool isUser, addr_t* newIP) 4541 { 4542 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4543 faultAddress)); 4544 4545 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4546 4547 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4548 VMAddressSpace* addressSpace = NULL; 4549 4550 status_t status = B_OK; 4551 *newIP = 0; 4552 atomic_add((int32*)&sPageFaults, 1); 4553 4554 if (IS_KERNEL_ADDRESS(pageAddress)) { 4555 addressSpace = VMAddressSpace::GetKernel(); 4556 } else if (IS_USER_ADDRESS(pageAddress)) { 4557 addressSpace = VMAddressSpace::GetCurrent(); 4558 if (addressSpace == NULL) { 4559 if (!isUser) { 4560 dprintf("vm_page_fault: kernel thread accessing invalid user " 4561 "memory!\n"); 4562 status = B_BAD_ADDRESS; 4563 TPF(PageFaultError(-1, 4564 VMPageFaultTracing 4565 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4566 } else { 4567 // XXX weird state. 4568 panic("vm_page_fault: non kernel thread accessing user memory " 4569 "that doesn't exist!\n"); 4570 status = B_BAD_ADDRESS; 4571 } 4572 } 4573 } else { 4574 // the hit was probably in the 64k DMZ between kernel and user space 4575 // this keeps a user space thread from passing a buffer that crosses 4576 // into kernel space 4577 status = B_BAD_ADDRESS; 4578 TPF(PageFaultError(-1, 4579 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4580 } 4581 4582 if (status == B_OK) { 4583 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4584 isUser, NULL); 4585 } 4586 4587 if (status < B_OK) { 4588 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4589 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4590 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4591 thread_get_current_thread_id()); 4592 if (!isUser) { 4593 Thread* thread = thread_get_current_thread(); 4594 if (thread != NULL && thread->fault_handler != 0) { 4595 // this will cause the arch dependant page fault handler to 4596 // modify the IP on the interrupt frame or whatever to return 4597 // to this address 4598 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4599 } else { 4600 // unhandled page fault in the kernel 4601 panic("vm_page_fault: unhandled page fault in kernel space at " 4602 "0x%lx, ip 0x%lx\n", address, faultAddress); 4603 } 4604 } else { 4605 Thread* thread = thread_get_current_thread(); 4606 4607 #ifdef TRACE_FAULTS 4608 VMArea* area = NULL; 4609 if (addressSpace != NULL) { 4610 addressSpace->ReadLock(); 4611 area = addressSpace->LookupArea(faultAddress); 4612 } 4613 4614 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4615 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4616 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4617 thread->team->Name(), thread->team->id, 4618 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4619 faultAddress, area ? area->name : "???", faultAddress - (area ? 4620 area->Base() : 0x0)); 4621 4622 if (addressSpace != NULL) 4623 addressSpace->ReadUnlock(); 4624 #endif 4625 4626 // If the thread has a signal handler for SIGSEGV, we simply 4627 // send it the signal. Otherwise we notify the user debugger 4628 // first. 4629 struct sigaction action; 4630 if ((sigaction(SIGSEGV, NULL, &action) == 0 4631 && action.sa_handler != SIG_DFL 4632 && action.sa_handler != SIG_IGN) 4633 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4634 SIGSEGV)) { 4635 Signal signal(SIGSEGV, 4636 status == B_PERMISSION_DENIED 4637 ? SEGV_ACCERR : SEGV_MAPERR, 4638 EFAULT, thread->team->id); 4639 signal.SetAddress((void*)address); 4640 send_signal_to_thread(thread, signal, 0); 4641 } 4642 } 4643 } 4644 4645 if (addressSpace != NULL) 4646 addressSpace->Put(); 4647 4648 return B_HANDLED_INTERRUPT; 4649 } 4650 4651 4652 struct PageFaultContext { 4653 AddressSpaceReadLocker addressSpaceLocker; 4654 VMCacheChainLocker cacheChainLocker; 4655 4656 VMTranslationMap* map; 4657 VMCache* topCache; 4658 off_t cacheOffset; 4659 vm_page_reservation reservation; 4660 bool isWrite; 4661 4662 // return values 4663 vm_page* page; 4664 bool restart; 4665 bool pageAllocated; 4666 4667 4668 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4669 : 4670 addressSpaceLocker(addressSpace, true), 4671 map(addressSpace->TranslationMap()), 4672 isWrite(isWrite) 4673 { 4674 } 4675 4676 ~PageFaultContext() 4677 { 4678 UnlockAll(); 4679 vm_page_unreserve_pages(&reservation); 4680 } 4681 4682 void Prepare(VMCache* topCache, off_t cacheOffset) 4683 { 4684 this->topCache = topCache; 4685 this->cacheOffset = cacheOffset; 4686 page = NULL; 4687 restart = false; 4688 pageAllocated = false; 4689 4690 cacheChainLocker.SetTo(topCache); 4691 } 4692 4693 void UnlockAll(VMCache* exceptCache = NULL) 4694 { 4695 topCache = NULL; 4696 addressSpaceLocker.Unlock(); 4697 cacheChainLocker.Unlock(exceptCache); 4698 } 4699 }; 4700 4701 4702 /*! Gets the page that should be mapped into the area. 4703 Returns an error code other than \c B_OK, if the page couldn't be found or 4704 paged in. The locking state of the address space and the caches is undefined 4705 in that case. 4706 Returns \c B_OK with \c context.restart set to \c true, if the functions 4707 had to unlock the address space and all caches and is supposed to be called 4708 again. 4709 Returns \c B_OK with \c context.restart set to \c false, if the page was 4710 found. It is returned in \c context.page. The address space will still be 4711 locked as well as all caches starting from the top cache to at least the 4712 cache the page lives in. 4713 */ 4714 static status_t 4715 fault_get_page(PageFaultContext& context) 4716 { 4717 VMCache* cache = context.topCache; 4718 VMCache* lastCache = NULL; 4719 vm_page* page = NULL; 4720 4721 while (cache != NULL) { 4722 // We already hold the lock of the cache at this point. 4723 4724 lastCache = cache; 4725 4726 page = cache->LookupPage(context.cacheOffset); 4727 if (page != NULL && page->busy) { 4728 // page must be busy -- wait for it to become unbusy 4729 context.UnlockAll(cache); 4730 cache->ReleaseRefLocked(); 4731 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4732 4733 // restart the whole process 4734 context.restart = true; 4735 return B_OK; 4736 } 4737 4738 if (page != NULL) 4739 break; 4740 4741 // The current cache does not contain the page we're looking for. 4742 4743 // see if the backing store has it 4744 if (cache->HasPage(context.cacheOffset)) { 4745 // insert a fresh page and mark it busy -- we're going to read it in 4746 page = vm_page_allocate_page(&context.reservation, 4747 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4748 cache->InsertPage(page, context.cacheOffset); 4749 4750 // We need to unlock all caches and the address space while reading 4751 // the page in. Keep a reference to the cache around. 4752 cache->AcquireRefLocked(); 4753 context.UnlockAll(); 4754 4755 // read the page in 4756 generic_io_vec vec; 4757 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4758 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4759 4760 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4761 B_PHYSICAL_IO_REQUEST, &bytesRead); 4762 4763 cache->Lock(); 4764 4765 if (status < B_OK) { 4766 // on error remove and free the page 4767 dprintf("reading page from cache %p returned: %s!\n", 4768 cache, strerror(status)); 4769 4770 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4771 cache->RemovePage(page); 4772 vm_page_set_state(page, PAGE_STATE_FREE); 4773 4774 cache->ReleaseRefAndUnlock(); 4775 return status; 4776 } 4777 4778 // mark the page unbusy again 4779 cache->MarkPageUnbusy(page); 4780 4781 DEBUG_PAGE_ACCESS_END(page); 4782 4783 // Since we needed to unlock everything temporarily, the area 4784 // situation might have changed. So we need to restart the whole 4785 // process. 4786 cache->ReleaseRefAndUnlock(); 4787 context.restart = true; 4788 return B_OK; 4789 } 4790 4791 cache = context.cacheChainLocker.LockSourceCache(); 4792 } 4793 4794 if (page == NULL) { 4795 // There was no adequate page, determine the cache for a clean one. 4796 // Read-only pages come in the deepest cache, only the top most cache 4797 // may have direct write access. 4798 cache = context.isWrite ? context.topCache : lastCache; 4799 4800 // allocate a clean page 4801 page = vm_page_allocate_page(&context.reservation, 4802 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4803 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4804 page->physical_page_number)); 4805 4806 // insert the new page into our cache 4807 cache->InsertPage(page, context.cacheOffset); 4808 context.pageAllocated = true; 4809 } else if (page->Cache() != context.topCache && context.isWrite) { 4810 // We have a page that has the data we want, but in the wrong cache 4811 // object so we need to copy it and stick it into the top cache. 4812 vm_page* sourcePage = page; 4813 4814 // TODO: If memory is low, it might be a good idea to steal the page 4815 // from our source cache -- if possible, that is. 4816 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4817 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4818 4819 // To not needlessly kill concurrency we unlock all caches but the top 4820 // one while copying the page. Lacking another mechanism to ensure that 4821 // the source page doesn't disappear, we mark it busy. 4822 sourcePage->busy = true; 4823 context.cacheChainLocker.UnlockKeepRefs(true); 4824 4825 // copy the page 4826 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4827 sourcePage->physical_page_number * B_PAGE_SIZE); 4828 4829 context.cacheChainLocker.RelockCaches(true); 4830 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4831 4832 // insert the new page into our cache 4833 context.topCache->InsertPage(page, context.cacheOffset); 4834 context.pageAllocated = true; 4835 } else 4836 DEBUG_PAGE_ACCESS_START(page); 4837 4838 context.page = page; 4839 return B_OK; 4840 } 4841 4842 4843 /*! Makes sure the address in the given address space is mapped. 4844 4845 \param addressSpace The address space. 4846 \param originalAddress The address. Doesn't need to be page aligned. 4847 \param isWrite If \c true the address shall be write-accessible. 4848 \param isUser If \c true the access is requested by a userland team. 4849 \param wirePage On success, if non \c NULL, the wired count of the page 4850 mapped at the given address is incremented and the page is returned 4851 via this parameter. 4852 \return \c B_OK on success, another error code otherwise. 4853 */ 4854 static status_t 4855 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4856 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4857 { 4858 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4859 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4860 originalAddress, isWrite, isUser)); 4861 4862 PageFaultContext context(addressSpace, isWrite); 4863 4864 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4865 status_t status = B_OK; 4866 4867 addressSpace->IncrementFaultCount(); 4868 4869 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4870 // the pages upfront makes sure we don't have any cache locked, so that the 4871 // page daemon/thief can do their job without problems. 4872 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4873 originalAddress); 4874 context.addressSpaceLocker.Unlock(); 4875 vm_page_reserve_pages(&context.reservation, reservePages, 4876 addressSpace == VMAddressSpace::Kernel() 4877 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4878 4879 while (true) { 4880 context.addressSpaceLocker.Lock(); 4881 4882 // get the area the fault was in 4883 VMArea* area = addressSpace->LookupArea(address); 4884 if (area == NULL) { 4885 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4886 "space\n", originalAddress); 4887 TPF(PageFaultError(-1, 4888 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4889 status = B_BAD_ADDRESS; 4890 break; 4891 } 4892 4893 // check permissions 4894 uint32 protection = get_area_page_protection(area, address); 4895 if (isUser && (protection & B_USER_PROTECTION) == 0 4896 && (area->protection & B_KERNEL_AREA) != 0) { 4897 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4898 area->id, (void*)originalAddress); 4899 TPF(PageFaultError(area->id, 4900 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4901 status = B_PERMISSION_DENIED; 4902 break; 4903 } 4904 if (isWrite && (protection 4905 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4906 dprintf("write access attempted on write-protected area 0x%" 4907 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4908 TPF(PageFaultError(area->id, 4909 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4910 status = B_PERMISSION_DENIED; 4911 break; 4912 } else if (isExecute && (protection 4913 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4914 dprintf("instruction fetch attempted on execute-protected area 0x%" 4915 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4916 TPF(PageFaultError(area->id, 4917 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4918 status = B_PERMISSION_DENIED; 4919 break; 4920 } else if (!isWrite && !isExecute && (protection 4921 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4922 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4923 " at %p\n", area->id, (void*)originalAddress); 4924 TPF(PageFaultError(area->id, 4925 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4926 status = B_PERMISSION_DENIED; 4927 break; 4928 } 4929 4930 // We have the area, it was a valid access, so let's try to resolve the 4931 // page fault now. 4932 // At first, the top most cache from the area is investigated. 4933 4934 context.Prepare(vm_area_get_locked_cache(area), 4935 address - area->Base() + area->cache_offset); 4936 4937 // See if this cache has a fault handler -- this will do all the work 4938 // for us. 4939 { 4940 // Note, since the page fault is resolved with interrupts enabled, 4941 // the fault handler could be called more than once for the same 4942 // reason -- the store must take this into account. 4943 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4944 if (status != B_BAD_HANDLER) 4945 break; 4946 } 4947 4948 // The top most cache has no fault handler, so let's see if the cache or 4949 // its sources already have the page we're searching for (we're going 4950 // from top to bottom). 4951 status = fault_get_page(context); 4952 if (status != B_OK) { 4953 TPF(PageFaultError(area->id, status)); 4954 break; 4955 } 4956 4957 if (context.restart) 4958 continue; 4959 4960 // All went fine, all there is left to do is to map the page into the 4961 // address space. 4962 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4963 context.page)); 4964 4965 // If the page doesn't reside in the area's cache, we need to make sure 4966 // it's mapped in read-only, so that we cannot overwrite someone else's 4967 // data (copy-on-write) 4968 uint32 newProtection = protection; 4969 if (context.page->Cache() != context.topCache && !isWrite) 4970 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4971 4972 bool unmapPage = false; 4973 bool mapPage = true; 4974 4975 // check whether there's already a page mapped at the address 4976 context.map->Lock(); 4977 4978 phys_addr_t physicalAddress; 4979 uint32 flags; 4980 vm_page* mappedPage = NULL; 4981 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4982 && (flags & PAGE_PRESENT) != 0 4983 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4984 != NULL) { 4985 // Yep there's already a page. If it's ours, we can simply adjust 4986 // its protection. Otherwise we have to unmap it. 4987 if (mappedPage == context.page) { 4988 context.map->ProtectPage(area, address, newProtection); 4989 // Note: We assume that ProtectPage() is atomic (i.e. 4990 // the page isn't temporarily unmapped), otherwise we'd have 4991 // to make sure it isn't wired. 4992 mapPage = false; 4993 } else 4994 unmapPage = true; 4995 } 4996 4997 context.map->Unlock(); 4998 4999 if (unmapPage) { 5000 // If the page is wired, we can't unmap it. Wait until it is unwired 5001 // again and restart. Note that the page cannot be wired for 5002 // writing, since it it isn't in the topmost cache. So we can safely 5003 // ignore ranges wired for writing (our own and other concurrent 5004 // wiring attempts in progress) and in fact have to do that to avoid 5005 // a deadlock. 5006 VMAreaUnwiredWaiter waiter; 5007 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 5008 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 5009 // unlock everything and wait 5010 if (context.pageAllocated) { 5011 // ... but since we allocated a page and inserted it into 5012 // the top cache, remove and free it first. Otherwise we'd 5013 // have a page from a lower cache mapped while an upper 5014 // cache has a page that would shadow it. 5015 context.topCache->RemovePage(context.page); 5016 vm_page_free_etc(context.topCache, context.page, 5017 &context.reservation); 5018 } else 5019 DEBUG_PAGE_ACCESS_END(context.page); 5020 5021 context.UnlockAll(); 5022 waiter.waitEntry.Wait(); 5023 continue; 5024 } 5025 5026 // Note: The mapped page is a page of a lower cache. We are 5027 // guaranteed to have that cached locked, our new page is a copy of 5028 // that page, and the page is not busy. The logic for that guarantee 5029 // is as follows: Since the page is mapped, it must live in the top 5030 // cache (ruled out above) or any of its lower caches, and there is 5031 // (was before the new page was inserted) no other page in any 5032 // cache between the top cache and the page's cache (otherwise that 5033 // would be mapped instead). That in turn means that our algorithm 5034 // must have found it and therefore it cannot be busy either. 5035 DEBUG_PAGE_ACCESS_START(mappedPage); 5036 unmap_page(area, address); 5037 DEBUG_PAGE_ACCESS_END(mappedPage); 5038 } 5039 5040 if (mapPage) { 5041 if (map_page(area, context.page, address, newProtection, 5042 &context.reservation) != B_OK) { 5043 // Mapping can only fail, when the page mapping object couldn't 5044 // be allocated. Save for the missing mapping everything is 5045 // fine, though. If this was a regular page fault, we'll simply 5046 // leave and probably fault again. To make sure we'll have more 5047 // luck then, we ensure that the minimum object reserve is 5048 // available. 5049 DEBUG_PAGE_ACCESS_END(context.page); 5050 5051 context.UnlockAll(); 5052 5053 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 5054 != B_OK) { 5055 // Apparently the situation is serious. Let's get ourselves 5056 // killed. 5057 status = B_NO_MEMORY; 5058 } else if (wirePage != NULL) { 5059 // The caller expects us to wire the page. Since 5060 // object_cache_reserve() succeeded, we should now be able 5061 // to allocate a mapping structure. Restart. 5062 continue; 5063 } 5064 5065 break; 5066 } 5067 } else if (context.page->State() == PAGE_STATE_INACTIVE) 5068 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 5069 5070 // also wire the page, if requested 5071 if (wirePage != NULL && status == B_OK) { 5072 increment_page_wired_count(context.page); 5073 *wirePage = context.page; 5074 } 5075 5076 DEBUG_PAGE_ACCESS_END(context.page); 5077 5078 break; 5079 } 5080 5081 return status; 5082 } 5083 5084 5085 status_t 5086 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 5087 { 5088 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 5089 } 5090 5091 status_t 5092 vm_put_physical_page(addr_t vaddr, void* handle) 5093 { 5094 return sPhysicalPageMapper->PutPage(vaddr, handle); 5095 } 5096 5097 5098 status_t 5099 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 5100 void** _handle) 5101 { 5102 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 5103 } 5104 5105 status_t 5106 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 5107 { 5108 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 5109 } 5110 5111 5112 status_t 5113 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 5114 { 5115 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 5116 } 5117 5118 status_t 5119 vm_put_physical_page_debug(addr_t vaddr, void* handle) 5120 { 5121 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 5122 } 5123 5124 5125 void 5126 vm_get_info(system_info* info) 5127 { 5128 swap_get_info(info); 5129 5130 MutexLocker locker(sAvailableMemoryLock); 5131 info->needed_memory = sNeededMemory; 5132 info->free_memory = sAvailableMemory; 5133 } 5134 5135 5136 uint32 5137 vm_num_page_faults(void) 5138 { 5139 return sPageFaults; 5140 } 5141 5142 5143 off_t 5144 vm_available_memory(void) 5145 { 5146 MutexLocker locker(sAvailableMemoryLock); 5147 return sAvailableMemory; 5148 } 5149 5150 5151 off_t 5152 vm_available_not_needed_memory(void) 5153 { 5154 MutexLocker locker(sAvailableMemoryLock); 5155 return sAvailableMemory - sNeededMemory; 5156 } 5157 5158 5159 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 5160 debugger. 5161 */ 5162 off_t 5163 vm_available_not_needed_memory_debug(void) 5164 { 5165 return sAvailableMemory - sNeededMemory; 5166 } 5167 5168 5169 size_t 5170 vm_kernel_address_space_left(void) 5171 { 5172 return VMAddressSpace::Kernel()->FreeSpace(); 5173 } 5174 5175 5176 void 5177 vm_unreserve_memory(size_t amount) 5178 { 5179 mutex_lock(&sAvailableMemoryLock); 5180 5181 sAvailableMemory += amount; 5182 5183 mutex_unlock(&sAvailableMemoryLock); 5184 } 5185 5186 5187 status_t 5188 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5189 { 5190 size_t reserve = kMemoryReserveForPriority[priority]; 5191 5192 MutexLocker locker(sAvailableMemoryLock); 5193 5194 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5195 5196 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5197 sAvailableMemory -= amount; 5198 return B_OK; 5199 } 5200 5201 if (timeout <= 0) 5202 return B_NO_MEMORY; 5203 5204 // turn timeout into an absolute timeout 5205 timeout += system_time(); 5206 5207 // loop until we've got the memory or the timeout occurs 5208 do { 5209 sNeededMemory += amount; 5210 5211 // call the low resource manager 5212 locker.Unlock(); 5213 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5214 B_ABSOLUTE_TIMEOUT, timeout); 5215 locker.Lock(); 5216 5217 sNeededMemory -= amount; 5218 5219 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5220 sAvailableMemory -= amount; 5221 return B_OK; 5222 } 5223 } while (timeout > system_time()); 5224 5225 return B_NO_MEMORY; 5226 } 5227 5228 5229 status_t 5230 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5231 { 5232 // NOTE: The caller is responsible for synchronizing calls to this function! 5233 5234 AddressSpaceReadLocker locker; 5235 VMArea* area; 5236 status_t status = locker.SetFromArea(id, area); 5237 if (status != B_OK) 5238 return status; 5239 5240 // nothing to do, if the type doesn't change 5241 uint32 oldType = area->MemoryType(); 5242 if (type == oldType) 5243 return B_OK; 5244 5245 // set the memory type of the area and the mapped pages 5246 VMTranslationMap* map = area->address_space->TranslationMap(); 5247 map->Lock(); 5248 area->SetMemoryType(type); 5249 map->ProtectArea(area, area->protection); 5250 map->Unlock(); 5251 5252 // set the physical memory type 5253 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5254 if (error != B_OK) { 5255 // reset the memory type of the area and the mapped pages 5256 map->Lock(); 5257 area->SetMemoryType(oldType); 5258 map->ProtectArea(area, area->protection); 5259 map->Unlock(); 5260 return error; 5261 } 5262 5263 return B_OK; 5264 5265 } 5266 5267 5268 /*! This function enforces some protection properties: 5269 - kernel areas must be W^X (after kernel startup) 5270 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5271 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5272 */ 5273 static void 5274 fix_protection(uint32* protection) 5275 { 5276 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5277 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5278 || (*protection & B_WRITE_AREA) != 0) 5279 && !gKernelStartup) 5280 panic("kernel areas cannot be both writable and executable!"); 5281 5282 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5283 if ((*protection & B_WRITE_AREA) != 0) 5284 *protection |= B_KERNEL_WRITE_AREA; 5285 if ((*protection & B_READ_AREA) != 0) 5286 *protection |= B_KERNEL_READ_AREA; 5287 } 5288 } 5289 5290 5291 static void 5292 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5293 { 5294 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5295 info->area = area->id; 5296 info->address = (void*)area->Base(); 5297 info->size = area->Size(); 5298 info->protection = area->protection; 5299 info->lock = area->wiring; 5300 info->team = area->address_space->ID(); 5301 info->copy_count = 0; 5302 info->in_count = 0; 5303 info->out_count = 0; 5304 // TODO: retrieve real values here! 5305 5306 VMCache* cache = vm_area_get_locked_cache(area); 5307 5308 // Note, this is a simplification; the cache could be larger than this area 5309 info->ram_size = cache->page_count * B_PAGE_SIZE; 5310 5311 vm_area_put_locked_cache(cache); 5312 } 5313 5314 5315 static status_t 5316 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5317 { 5318 // is newSize a multiple of B_PAGE_SIZE? 5319 if (newSize & (B_PAGE_SIZE - 1)) 5320 return B_BAD_VALUE; 5321 5322 // lock all affected address spaces and the cache 5323 VMArea* area; 5324 VMCache* cache; 5325 5326 MultiAddressSpaceLocker locker; 5327 AreaCacheLocker cacheLocker; 5328 5329 status_t status; 5330 size_t oldSize; 5331 bool anyKernelArea; 5332 bool restart; 5333 5334 do { 5335 anyKernelArea = false; 5336 restart = false; 5337 5338 locker.Unset(); 5339 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5340 if (status != B_OK) 5341 return status; 5342 cacheLocker.SetTo(cache, true); // already locked 5343 5344 // enforce restrictions 5345 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5346 || (area->protection & B_KERNEL_AREA) != 0)) { 5347 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5348 "resize kernel area %" B_PRId32 " (%s)\n", 5349 team_get_current_team_id(), areaID, area->name); 5350 return B_NOT_ALLOWED; 5351 } 5352 // TODO: Enforce all restrictions (team, etc.)! 5353 5354 oldSize = area->Size(); 5355 if (newSize == oldSize) 5356 return B_OK; 5357 5358 if (cache->type != CACHE_TYPE_RAM) 5359 return B_NOT_ALLOWED; 5360 5361 if (oldSize < newSize) { 5362 // We need to check if all areas of this cache can be resized. 5363 for (VMArea* current = cache->areas; current != NULL; 5364 current = current->cache_next) { 5365 if (!current->address_space->CanResizeArea(current, newSize)) 5366 return B_ERROR; 5367 anyKernelArea 5368 |= current->address_space == VMAddressSpace::Kernel(); 5369 } 5370 } else { 5371 // We're shrinking the areas, so we must make sure the affected 5372 // ranges are not wired. 5373 for (VMArea* current = cache->areas; current != NULL; 5374 current = current->cache_next) { 5375 anyKernelArea 5376 |= current->address_space == VMAddressSpace::Kernel(); 5377 5378 if (wait_if_area_range_is_wired(current, 5379 current->Base() + newSize, oldSize - newSize, &locker, 5380 &cacheLocker)) { 5381 restart = true; 5382 break; 5383 } 5384 } 5385 } 5386 } while (restart); 5387 5388 // Okay, looks good so far, so let's do it 5389 5390 int priority = kernel && anyKernelArea 5391 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5392 uint32 allocationFlags = kernel && anyKernelArea 5393 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5394 5395 if (oldSize < newSize) { 5396 // Growing the cache can fail, so we do it first. 5397 status = cache->Resize(cache->virtual_base + newSize, priority); 5398 if (status != B_OK) 5399 return status; 5400 } 5401 5402 for (VMArea* current = cache->areas; current != NULL; 5403 current = current->cache_next) { 5404 status = current->address_space->ResizeArea(current, newSize, 5405 allocationFlags); 5406 if (status != B_OK) 5407 break; 5408 5409 // We also need to unmap all pages beyond the new size, if the area has 5410 // shrunk 5411 if (newSize < oldSize) { 5412 VMCacheChainLocker cacheChainLocker(cache); 5413 cacheChainLocker.LockAllSourceCaches(); 5414 5415 unmap_pages(current, current->Base() + newSize, 5416 oldSize - newSize); 5417 5418 cacheChainLocker.Unlock(cache); 5419 } 5420 } 5421 5422 if (status == B_OK) { 5423 // Shrink or grow individual page protections if in use. 5424 if (area->page_protections != NULL) { 5425 size_t bytes = area_page_protections_size(newSize); 5426 uint8* newProtections 5427 = (uint8*)realloc(area->page_protections, bytes); 5428 if (newProtections == NULL) 5429 status = B_NO_MEMORY; 5430 else { 5431 area->page_protections = newProtections; 5432 5433 if (oldSize < newSize) { 5434 // init the additional page protections to that of the area 5435 uint32 offset = area_page_protections_size(oldSize); 5436 uint32 areaProtection = area->protection 5437 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5438 memset(area->page_protections + offset, 5439 areaProtection | (areaProtection << 4), bytes - offset); 5440 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5441 uint8& entry = area->page_protections[offset - 1]; 5442 entry = (entry & 0x0f) | (areaProtection << 4); 5443 } 5444 } 5445 } 5446 } 5447 } 5448 5449 // shrinking the cache can't fail, so we do it now 5450 if (status == B_OK && newSize < oldSize) 5451 status = cache->Resize(cache->virtual_base + newSize, priority); 5452 5453 if (status != B_OK) { 5454 // Something failed -- resize the areas back to their original size. 5455 // This can fail, too, in which case we're seriously screwed. 5456 for (VMArea* current = cache->areas; current != NULL; 5457 current = current->cache_next) { 5458 if (current->address_space->ResizeArea(current, oldSize, 5459 allocationFlags) != B_OK) { 5460 panic("vm_resize_area(): Failed and not being able to restore " 5461 "original state."); 5462 } 5463 } 5464 5465 cache->Resize(cache->virtual_base + oldSize, priority); 5466 } 5467 5468 // TODO: we must honour the lock restrictions of this area 5469 return status; 5470 } 5471 5472 5473 status_t 5474 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5475 { 5476 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5477 } 5478 5479 5480 status_t 5481 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5482 { 5483 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5484 } 5485 5486 5487 status_t 5488 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5489 bool user) 5490 { 5491 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5492 } 5493 5494 5495 void 5496 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5497 { 5498 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5499 } 5500 5501 5502 /*! Copies a range of memory directly from/to a page that might not be mapped 5503 at the moment. 5504 5505 For \a unsafeMemory the current mapping (if any is ignored). The function 5506 walks through the respective area's cache chain to find the physical page 5507 and copies from/to it directly. 5508 The memory range starting at \a unsafeMemory with a length of \a size bytes 5509 must not cross a page boundary. 5510 5511 \param teamID The team ID identifying the address space \a unsafeMemory is 5512 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5513 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5514 is passed, the address space of the thread returned by 5515 debug_get_debugged_thread() is used. 5516 \param unsafeMemory The start of the unsafe memory range to be copied 5517 from/to. 5518 \param buffer A safely accessible kernel buffer to be copied from/to. 5519 \param size The number of bytes to be copied. 5520 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5521 \a unsafeMemory, the other way around otherwise. 5522 */ 5523 status_t 5524 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5525 size_t size, bool copyToUnsafe) 5526 { 5527 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5528 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5529 return B_BAD_VALUE; 5530 } 5531 5532 // get the address space for the debugged thread 5533 VMAddressSpace* addressSpace; 5534 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5535 addressSpace = VMAddressSpace::Kernel(); 5536 } else if (teamID == B_CURRENT_TEAM) { 5537 Thread* thread = debug_get_debugged_thread(); 5538 if (thread == NULL || thread->team == NULL) 5539 return B_BAD_ADDRESS; 5540 5541 addressSpace = thread->team->address_space; 5542 } else 5543 addressSpace = VMAddressSpace::DebugGet(teamID); 5544 5545 if (addressSpace == NULL) 5546 return B_BAD_ADDRESS; 5547 5548 // get the area 5549 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5550 if (area == NULL) 5551 return B_BAD_ADDRESS; 5552 5553 // search the page 5554 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5555 + area->cache_offset; 5556 VMCache* cache = area->cache; 5557 vm_page* page = NULL; 5558 while (cache != NULL) { 5559 page = cache->DebugLookupPage(cacheOffset); 5560 if (page != NULL) 5561 break; 5562 5563 // Page not found in this cache -- if it is paged out, we must not try 5564 // to get it from lower caches. 5565 if (cache->DebugHasPage(cacheOffset)) 5566 break; 5567 5568 cache = cache->source; 5569 } 5570 5571 if (page == NULL) 5572 return B_UNSUPPORTED; 5573 5574 // copy from/to physical memory 5575 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5576 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5577 5578 if (copyToUnsafe) { 5579 if (page->Cache() != area->cache) 5580 return B_UNSUPPORTED; 5581 5582 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5583 } 5584 5585 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5586 } 5587 5588 5589 /** Validate that a memory range is either fully in kernel space, or fully in 5590 * userspace */ 5591 static inline bool 5592 validate_memory_range(const void* addr, size_t size) 5593 { 5594 addr_t address = (addr_t)addr; 5595 5596 // Check for overflows on all addresses. 5597 if ((address + size) < address) 5598 return false; 5599 5600 // Validate that the address range does not cross the kernel/user boundary. 5601 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5602 } 5603 5604 5605 // #pragma mark - kernel public API 5606 5607 5608 status_t 5609 user_memcpy(void* to, const void* from, size_t size) 5610 { 5611 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5612 return B_BAD_ADDRESS; 5613 5614 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5615 return B_BAD_ADDRESS; 5616 5617 return B_OK; 5618 } 5619 5620 5621 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5622 the string in \a to, NULL-terminating the result. 5623 5624 \param to Pointer to the destination C-string. 5625 \param from Pointer to the source C-string. 5626 \param size Size in bytes of the string buffer pointed to by \a to. 5627 5628 \return strlen(\a from). 5629 */ 5630 ssize_t 5631 user_strlcpy(char* to, const char* from, size_t size) 5632 { 5633 if (to == NULL && size != 0) 5634 return B_BAD_VALUE; 5635 if (from == NULL) 5636 return B_BAD_ADDRESS; 5637 5638 // Protect the source address from overflows. 5639 size_t maxSize = size; 5640 if ((addr_t)from + maxSize < (addr_t)from) 5641 maxSize -= (addr_t)from + maxSize; 5642 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5643 maxSize = USER_TOP - (addr_t)from; 5644 5645 if (!validate_memory_range(to, maxSize)) 5646 return B_BAD_ADDRESS; 5647 5648 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5649 if (result < 0) 5650 return result; 5651 5652 // If we hit the address overflow boundary, fail. 5653 if ((size_t)result >= maxSize && maxSize < size) 5654 return B_BAD_ADDRESS; 5655 5656 return result; 5657 } 5658 5659 5660 status_t 5661 user_memset(void* s, char c, size_t count) 5662 { 5663 if (!validate_memory_range(s, count)) 5664 return B_BAD_ADDRESS; 5665 5666 if (arch_cpu_user_memset(s, c, count) < B_OK) 5667 return B_BAD_ADDRESS; 5668 5669 return B_OK; 5670 } 5671 5672 5673 /*! Wires a single page at the given address. 5674 5675 \param team The team whose address space the address belongs to. Supports 5676 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5677 parameter is ignored. 5678 \param address address The virtual address to wire down. Does not need to 5679 be page aligned. 5680 \param writable If \c true the page shall be writable. 5681 \param info On success the info is filled in, among other things 5682 containing the physical address the given virtual one translates to. 5683 \return \c B_OK, when the page could be wired, another error code otherwise. 5684 */ 5685 status_t 5686 vm_wire_page(team_id team, addr_t address, bool writable, 5687 VMPageWiringInfo* info) 5688 { 5689 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5690 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5691 5692 // compute the page protection that is required 5693 bool isUser = IS_USER_ADDRESS(address); 5694 uint32 requiredProtection = PAGE_PRESENT 5695 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5696 if (writable) 5697 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5698 5699 // get and read lock the address space 5700 VMAddressSpace* addressSpace = NULL; 5701 if (isUser) { 5702 if (team == B_CURRENT_TEAM) 5703 addressSpace = VMAddressSpace::GetCurrent(); 5704 else 5705 addressSpace = VMAddressSpace::Get(team); 5706 } else 5707 addressSpace = VMAddressSpace::GetKernel(); 5708 if (addressSpace == NULL) 5709 return B_ERROR; 5710 5711 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5712 5713 VMTranslationMap* map = addressSpace->TranslationMap(); 5714 status_t error = B_OK; 5715 5716 // get the area 5717 VMArea* area = addressSpace->LookupArea(pageAddress); 5718 if (area == NULL) { 5719 addressSpace->Put(); 5720 return B_BAD_ADDRESS; 5721 } 5722 5723 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5724 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5725 5726 // mark the area range wired 5727 area->Wire(&info->range); 5728 5729 // Lock the area's cache chain and the translation map. Needed to look 5730 // up the page and play with its wired count. 5731 cacheChainLocker.LockAllSourceCaches(); 5732 map->Lock(); 5733 5734 phys_addr_t physicalAddress; 5735 uint32 flags; 5736 vm_page* page; 5737 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5738 && (flags & requiredProtection) == requiredProtection 5739 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5740 != NULL) { 5741 // Already mapped with the correct permissions -- just increment 5742 // the page's wired count. 5743 increment_page_wired_count(page); 5744 5745 map->Unlock(); 5746 cacheChainLocker.Unlock(); 5747 addressSpaceLocker.Unlock(); 5748 } else { 5749 // Let vm_soft_fault() map the page for us, if possible. We need 5750 // to fully unlock to avoid deadlocks. Since we have already 5751 // wired the area itself, nothing disturbing will happen with it 5752 // in the meantime. 5753 map->Unlock(); 5754 cacheChainLocker.Unlock(); 5755 addressSpaceLocker.Unlock(); 5756 5757 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5758 isUser, &page); 5759 5760 if (error != B_OK) { 5761 // The page could not be mapped -- clean up. 5762 VMCache* cache = vm_area_get_locked_cache(area); 5763 area->Unwire(&info->range); 5764 cache->ReleaseRefAndUnlock(); 5765 addressSpace->Put(); 5766 return error; 5767 } 5768 } 5769 5770 info->physicalAddress 5771 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5772 + address % B_PAGE_SIZE; 5773 info->page = page; 5774 5775 return B_OK; 5776 } 5777 5778 5779 /*! Unwires a single page previously wired via vm_wire_page(). 5780 5781 \param info The same object passed to vm_wire_page() before. 5782 */ 5783 void 5784 vm_unwire_page(VMPageWiringInfo* info) 5785 { 5786 // lock the address space 5787 VMArea* area = info->range.area; 5788 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5789 // takes over our reference 5790 5791 // lock the top cache 5792 VMCache* cache = vm_area_get_locked_cache(area); 5793 VMCacheChainLocker cacheChainLocker(cache); 5794 5795 if (info->page->Cache() != cache) { 5796 // The page is not in the top cache, so we lock the whole cache chain 5797 // before touching the page's wired count. 5798 cacheChainLocker.LockAllSourceCaches(); 5799 } 5800 5801 decrement_page_wired_count(info->page); 5802 5803 // remove the wired range from the range 5804 area->Unwire(&info->range); 5805 5806 cacheChainLocker.Unlock(); 5807 } 5808 5809 5810 /*! Wires down the given address range in the specified team's address space. 5811 5812 If successful the function 5813 - acquires a reference to the specified team's address space, 5814 - adds respective wired ranges to all areas that intersect with the given 5815 address range, 5816 - makes sure all pages in the given address range are mapped with the 5817 requested access permissions and increments their wired count. 5818 5819 It fails, when \a team doesn't specify a valid address space, when any part 5820 of the specified address range is not covered by areas, when the concerned 5821 areas don't allow mapping with the requested permissions, or when mapping 5822 failed for another reason. 5823 5824 When successful the call must be balanced by a unlock_memory_etc() call with 5825 the exact same parameters. 5826 5827 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5828 supported. 5829 \param address The start of the address range to be wired. 5830 \param numBytes The size of the address range to be wired. 5831 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5832 requests that the range must be wired writable ("read from device 5833 into memory"). 5834 \return \c B_OK on success, another error code otherwise. 5835 */ 5836 status_t 5837 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5838 { 5839 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5840 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5841 5842 // compute the page protection that is required 5843 bool isUser = IS_USER_ADDRESS(address); 5844 bool writable = (flags & B_READ_DEVICE) == 0; 5845 uint32 requiredProtection = PAGE_PRESENT 5846 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5847 if (writable) 5848 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5849 5850 uint32 mallocFlags = isUser 5851 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5852 5853 // get and read lock the address space 5854 VMAddressSpace* addressSpace = NULL; 5855 if (isUser) { 5856 if (team == B_CURRENT_TEAM) 5857 addressSpace = VMAddressSpace::GetCurrent(); 5858 else 5859 addressSpace = VMAddressSpace::Get(team); 5860 } else 5861 addressSpace = VMAddressSpace::GetKernel(); 5862 if (addressSpace == NULL) 5863 return B_ERROR; 5864 5865 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5866 // We get a new address space reference here. The one we got above will 5867 // be freed by unlock_memory_etc(). 5868 5869 VMTranslationMap* map = addressSpace->TranslationMap(); 5870 status_t error = B_OK; 5871 5872 // iterate through all concerned areas 5873 addr_t nextAddress = lockBaseAddress; 5874 while (nextAddress != lockEndAddress) { 5875 // get the next area 5876 VMArea* area = addressSpace->LookupArea(nextAddress); 5877 if (area == NULL) { 5878 error = B_BAD_ADDRESS; 5879 break; 5880 } 5881 5882 addr_t areaStart = nextAddress; 5883 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5884 5885 // allocate the wired range (do that before locking the cache to avoid 5886 // deadlocks) 5887 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5888 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5889 if (range == NULL) { 5890 error = B_NO_MEMORY; 5891 break; 5892 } 5893 5894 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5895 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5896 5897 // mark the area range wired 5898 area->Wire(range); 5899 5900 // Depending on the area cache type and the wiring, we may not need to 5901 // look at the individual pages. 5902 if (area->cache_type == CACHE_TYPE_NULL 5903 || area->cache_type == CACHE_TYPE_DEVICE 5904 || area->wiring == B_FULL_LOCK 5905 || area->wiring == B_CONTIGUOUS) { 5906 nextAddress = areaEnd; 5907 continue; 5908 } 5909 5910 // Lock the area's cache chain and the translation map. Needed to look 5911 // up pages and play with their wired count. 5912 cacheChainLocker.LockAllSourceCaches(); 5913 map->Lock(); 5914 5915 // iterate through the pages and wire them 5916 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5917 phys_addr_t physicalAddress; 5918 uint32 flags; 5919 5920 vm_page* page; 5921 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5922 && (flags & requiredProtection) == requiredProtection 5923 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5924 != NULL) { 5925 // Already mapped with the correct permissions -- just increment 5926 // the page's wired count. 5927 increment_page_wired_count(page); 5928 } else { 5929 // Let vm_soft_fault() map the page for us, if possible. We need 5930 // to fully unlock to avoid deadlocks. Since we have already 5931 // wired the area itself, nothing disturbing will happen with it 5932 // in the meantime. 5933 map->Unlock(); 5934 cacheChainLocker.Unlock(); 5935 addressSpaceLocker.Unlock(); 5936 5937 error = vm_soft_fault(addressSpace, nextAddress, writable, 5938 false, isUser, &page); 5939 5940 addressSpaceLocker.Lock(); 5941 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5942 cacheChainLocker.LockAllSourceCaches(); 5943 map->Lock(); 5944 } 5945 5946 if (error != B_OK) 5947 break; 5948 } 5949 5950 map->Unlock(); 5951 5952 if (error == B_OK) { 5953 cacheChainLocker.Unlock(); 5954 } else { 5955 // An error occurred, so abort right here. If the current address 5956 // is the first in this area, unwire the area, since we won't get 5957 // to it when reverting what we've done so far. 5958 if (nextAddress == areaStart) { 5959 area->Unwire(range); 5960 cacheChainLocker.Unlock(); 5961 range->~VMAreaWiredRange(); 5962 free_etc(range, mallocFlags); 5963 } else 5964 cacheChainLocker.Unlock(); 5965 5966 break; 5967 } 5968 } 5969 5970 if (error != B_OK) { 5971 // An error occurred, so unwire all that we've already wired. Note that 5972 // even if not a single page was wired, unlock_memory_etc() is called 5973 // to put the address space reference. 5974 addressSpaceLocker.Unlock(); 5975 unlock_memory_etc(team, (void*)lockBaseAddress, 5976 nextAddress - lockBaseAddress, flags); 5977 } 5978 5979 return error; 5980 } 5981 5982 5983 status_t 5984 lock_memory(void* address, size_t numBytes, uint32 flags) 5985 { 5986 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5987 } 5988 5989 5990 /*! Unwires an address range previously wired with lock_memory_etc(). 5991 5992 Note that a call to this function must balance a previous lock_memory_etc() 5993 call with exactly the same parameters. 5994 */ 5995 status_t 5996 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5997 { 5998 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5999 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 6000 6001 // compute the page protection that is required 6002 bool isUser = IS_USER_ADDRESS(address); 6003 bool writable = (flags & B_READ_DEVICE) == 0; 6004 uint32 requiredProtection = PAGE_PRESENT 6005 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 6006 if (writable) 6007 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 6008 6009 uint32 mallocFlags = isUser 6010 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 6011 6012 // get and read lock the address space 6013 VMAddressSpace* addressSpace = NULL; 6014 if (isUser) { 6015 if (team == B_CURRENT_TEAM) 6016 addressSpace = VMAddressSpace::GetCurrent(); 6017 else 6018 addressSpace = VMAddressSpace::Get(team); 6019 } else 6020 addressSpace = VMAddressSpace::GetKernel(); 6021 if (addressSpace == NULL) 6022 return B_ERROR; 6023 6024 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 6025 // Take over the address space reference. We don't unlock until we're 6026 // done. 6027 6028 VMTranslationMap* map = addressSpace->TranslationMap(); 6029 status_t error = B_OK; 6030 6031 // iterate through all concerned areas 6032 addr_t nextAddress = lockBaseAddress; 6033 while (nextAddress != lockEndAddress) { 6034 // get the next area 6035 VMArea* area = addressSpace->LookupArea(nextAddress); 6036 if (area == NULL) { 6037 error = B_BAD_ADDRESS; 6038 break; 6039 } 6040 6041 addr_t areaStart = nextAddress; 6042 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 6043 6044 // Lock the area's top cache. This is a requirement for 6045 // VMArea::Unwire(). 6046 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6047 6048 // Depending on the area cache type and the wiring, we may not need to 6049 // look at the individual pages. 6050 if (area->cache_type == CACHE_TYPE_NULL 6051 || area->cache_type == CACHE_TYPE_DEVICE 6052 || area->wiring == B_FULL_LOCK 6053 || area->wiring == B_CONTIGUOUS) { 6054 // unwire the range (to avoid deadlocks we delete the range after 6055 // unlocking the cache) 6056 nextAddress = areaEnd; 6057 VMAreaWiredRange* range = area->Unwire(areaStart, 6058 areaEnd - areaStart, writable); 6059 cacheChainLocker.Unlock(); 6060 if (range != NULL) { 6061 range->~VMAreaWiredRange(); 6062 free_etc(range, mallocFlags); 6063 } 6064 continue; 6065 } 6066 6067 // Lock the area's cache chain and the translation map. Needed to look 6068 // up pages and play with their wired count. 6069 cacheChainLocker.LockAllSourceCaches(); 6070 map->Lock(); 6071 6072 // iterate through the pages and unwire them 6073 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 6074 phys_addr_t physicalAddress; 6075 uint32 flags; 6076 6077 vm_page* page; 6078 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 6079 && (flags & PAGE_PRESENT) != 0 6080 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 6081 != NULL) { 6082 // Already mapped with the correct permissions -- just increment 6083 // the page's wired count. 6084 decrement_page_wired_count(page); 6085 } else { 6086 panic("unlock_memory_etc(): Failed to unwire page: address " 6087 "space %p, address: %#" B_PRIxADDR, addressSpace, 6088 nextAddress); 6089 error = B_BAD_VALUE; 6090 break; 6091 } 6092 } 6093 6094 map->Unlock(); 6095 6096 // All pages are unwired. Remove the area's wired range as well (to 6097 // avoid deadlocks we delete the range after unlocking the cache). 6098 VMAreaWiredRange* range = area->Unwire(areaStart, 6099 areaEnd - areaStart, writable); 6100 6101 cacheChainLocker.Unlock(); 6102 6103 if (range != NULL) { 6104 range->~VMAreaWiredRange(); 6105 free_etc(range, mallocFlags); 6106 } 6107 6108 if (error != B_OK) 6109 break; 6110 } 6111 6112 // get rid of the address space reference lock_memory_etc() acquired 6113 addressSpace->Put(); 6114 6115 return error; 6116 } 6117 6118 6119 status_t 6120 unlock_memory(void* address, size_t numBytes, uint32 flags) 6121 { 6122 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 6123 } 6124 6125 6126 /*! Similar to get_memory_map(), but also allows to specify the address space 6127 for the memory in question and has a saner semantics. 6128 Returns \c B_OK when the complete range could be translated or 6129 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 6130 case the actual number of entries is written to \c *_numEntries. Any other 6131 error case indicates complete failure; \c *_numEntries will be set to \c 0 6132 in this case. 6133 */ 6134 status_t 6135 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 6136 physical_entry* table, uint32* _numEntries) 6137 { 6138 uint32 numEntries = *_numEntries; 6139 *_numEntries = 0; 6140 6141 VMAddressSpace* addressSpace; 6142 addr_t virtualAddress = (addr_t)address; 6143 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 6144 phys_addr_t physicalAddress; 6145 status_t status = B_OK; 6146 int32 index = -1; 6147 addr_t offset = 0; 6148 bool interrupts = are_interrupts_enabled(); 6149 6150 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6151 "entries)\n", team, address, numBytes, numEntries)); 6152 6153 if (numEntries == 0 || numBytes == 0) 6154 return B_BAD_VALUE; 6155 6156 // in which address space is the address to be found? 6157 if (IS_USER_ADDRESS(virtualAddress)) { 6158 if (team == B_CURRENT_TEAM) 6159 addressSpace = VMAddressSpace::GetCurrent(); 6160 else 6161 addressSpace = VMAddressSpace::Get(team); 6162 } else 6163 addressSpace = VMAddressSpace::GetKernel(); 6164 6165 if (addressSpace == NULL) 6166 return B_ERROR; 6167 6168 VMTranslationMap* map = addressSpace->TranslationMap(); 6169 6170 if (interrupts) 6171 map->Lock(); 6172 6173 while (offset < numBytes) { 6174 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6175 uint32 flags; 6176 6177 if (interrupts) { 6178 status = map->Query((addr_t)address + offset, &physicalAddress, 6179 &flags); 6180 } else { 6181 status = map->QueryInterrupt((addr_t)address + offset, 6182 &physicalAddress, &flags); 6183 } 6184 if (status < B_OK) 6185 break; 6186 if ((flags & PAGE_PRESENT) == 0) { 6187 panic("get_memory_map() called on unmapped memory!"); 6188 return B_BAD_ADDRESS; 6189 } 6190 6191 if (index < 0 && pageOffset > 0) { 6192 physicalAddress += pageOffset; 6193 if (bytes > B_PAGE_SIZE - pageOffset) 6194 bytes = B_PAGE_SIZE - pageOffset; 6195 } 6196 6197 // need to switch to the next physical_entry? 6198 if (index < 0 || table[index].address 6199 != physicalAddress - table[index].size) { 6200 if ((uint32)++index + 1 > numEntries) { 6201 // table to small 6202 break; 6203 } 6204 table[index].address = physicalAddress; 6205 table[index].size = bytes; 6206 } else { 6207 // page does fit in current entry 6208 table[index].size += bytes; 6209 } 6210 6211 offset += bytes; 6212 } 6213 6214 if (interrupts) 6215 map->Unlock(); 6216 6217 if (status != B_OK) 6218 return status; 6219 6220 if ((uint32)index + 1 > numEntries) { 6221 *_numEntries = index; 6222 return B_BUFFER_OVERFLOW; 6223 } 6224 6225 *_numEntries = index + 1; 6226 return B_OK; 6227 } 6228 6229 6230 /*! According to the BeBook, this function should always succeed. 6231 This is no longer the case. 6232 */ 6233 extern "C" int32 6234 __get_memory_map_haiku(const void* address, size_t numBytes, 6235 physical_entry* table, int32 numEntries) 6236 { 6237 uint32 entriesRead = numEntries; 6238 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6239 table, &entriesRead); 6240 if (error != B_OK) 6241 return error; 6242 6243 // close the entry list 6244 6245 // if it's only one entry, we will silently accept the missing ending 6246 if (numEntries == 1) 6247 return B_OK; 6248 6249 if (entriesRead + 1 > (uint32)numEntries) 6250 return B_BUFFER_OVERFLOW; 6251 6252 table[entriesRead].address = 0; 6253 table[entriesRead].size = 0; 6254 6255 return B_OK; 6256 } 6257 6258 6259 area_id 6260 area_for(void* address) 6261 { 6262 return vm_area_for((addr_t)address, true); 6263 } 6264 6265 6266 area_id 6267 find_area(const char* name) 6268 { 6269 return VMAreas::Find(name); 6270 } 6271 6272 6273 status_t 6274 _get_area_info(area_id id, area_info* info, size_t size) 6275 { 6276 if (size != sizeof(area_info) || info == NULL) 6277 return B_BAD_VALUE; 6278 6279 AddressSpaceReadLocker locker; 6280 VMArea* area; 6281 status_t status = locker.SetFromArea(id, area); 6282 if (status != B_OK) 6283 return status; 6284 6285 fill_area_info(area, info, size); 6286 return B_OK; 6287 } 6288 6289 6290 status_t 6291 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6292 { 6293 addr_t nextBase = *(addr_t*)cookie; 6294 6295 // we're already through the list 6296 if (nextBase == (addr_t)-1) 6297 return B_ENTRY_NOT_FOUND; 6298 6299 if (team == B_CURRENT_TEAM) 6300 team = team_get_current_team_id(); 6301 6302 AddressSpaceReadLocker locker(team); 6303 if (!locker.IsLocked()) 6304 return B_BAD_TEAM_ID; 6305 6306 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6307 if (area == NULL) { 6308 nextBase = (addr_t)-1; 6309 return B_ENTRY_NOT_FOUND; 6310 } 6311 6312 fill_area_info(area, info, size); 6313 *cookie = (ssize_t)(area->Base() + 1); 6314 6315 return B_OK; 6316 } 6317 6318 6319 status_t 6320 set_area_protection(area_id area, uint32 newProtection) 6321 { 6322 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6323 newProtection, true); 6324 } 6325 6326 6327 status_t 6328 resize_area(area_id areaID, size_t newSize) 6329 { 6330 return vm_resize_area(areaID, newSize, true); 6331 } 6332 6333 6334 /*! Transfers the specified area to a new team. The caller must be the owner 6335 of the area. 6336 */ 6337 area_id 6338 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6339 bool kernel) 6340 { 6341 area_info info; 6342 status_t status = get_area_info(id, &info); 6343 if (status != B_OK) 6344 return status; 6345 6346 if (info.team != thread_get_current_thread()->team->id) 6347 return B_PERMISSION_DENIED; 6348 6349 // We need to mark the area cloneable so the following operations work. 6350 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6351 if (status != B_OK) 6352 return status; 6353 6354 area_id clonedArea = vm_clone_area(target, info.name, _address, 6355 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6356 if (clonedArea < 0) 6357 return clonedArea; 6358 6359 status = vm_delete_area(info.team, id, kernel); 6360 if (status != B_OK) { 6361 vm_delete_area(target, clonedArea, kernel); 6362 return status; 6363 } 6364 6365 // Now we can reset the protection to whatever it was before. 6366 set_area_protection(clonedArea, info.protection); 6367 6368 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6369 6370 return clonedArea; 6371 } 6372 6373 6374 extern "C" area_id 6375 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6376 size_t numBytes, uint32 addressSpec, uint32 protection, 6377 void** _virtualAddress) 6378 { 6379 if (!arch_vm_supports_protection(protection)) 6380 return B_NOT_SUPPORTED; 6381 6382 fix_protection(&protection); 6383 6384 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6385 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6386 false); 6387 } 6388 6389 6390 area_id 6391 clone_area(const char* name, void** _address, uint32 addressSpec, 6392 uint32 protection, area_id source) 6393 { 6394 if ((protection & B_KERNEL_PROTECTION) == 0) 6395 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6396 6397 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6398 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6399 } 6400 6401 6402 area_id 6403 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6404 uint32 protection, uint32 flags, uint32 guardSize, 6405 const virtual_address_restrictions* virtualAddressRestrictions, 6406 const physical_address_restrictions* physicalAddressRestrictions, 6407 void** _address) 6408 { 6409 fix_protection(&protection); 6410 6411 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6412 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6413 true, _address); 6414 } 6415 6416 6417 extern "C" area_id 6418 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6419 size_t size, uint32 lock, uint32 protection) 6420 { 6421 fix_protection(&protection); 6422 6423 virtual_address_restrictions virtualRestrictions = {}; 6424 virtualRestrictions.address = *_address; 6425 virtualRestrictions.address_specification = addressSpec; 6426 physical_address_restrictions physicalRestrictions = {}; 6427 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6428 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6429 true, _address); 6430 } 6431 6432 6433 status_t 6434 delete_area(area_id area) 6435 { 6436 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6437 } 6438 6439 6440 // #pragma mark - Userland syscalls 6441 6442 6443 status_t 6444 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6445 addr_t size) 6446 { 6447 // filter out some unavailable values (for userland) 6448 switch (addressSpec) { 6449 case B_ANY_KERNEL_ADDRESS: 6450 case B_ANY_KERNEL_BLOCK_ADDRESS: 6451 return B_BAD_VALUE; 6452 } 6453 6454 addr_t address; 6455 6456 if (!IS_USER_ADDRESS(userAddress) 6457 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6458 return B_BAD_ADDRESS; 6459 6460 status_t status = vm_reserve_address_range( 6461 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6462 RESERVED_AVOID_BASE); 6463 if (status != B_OK) 6464 return status; 6465 6466 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6467 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6468 (void*)address, size); 6469 return B_BAD_ADDRESS; 6470 } 6471 6472 return B_OK; 6473 } 6474 6475 6476 status_t 6477 _user_unreserve_address_range(addr_t address, addr_t size) 6478 { 6479 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6480 (void*)address, size); 6481 } 6482 6483 6484 area_id 6485 _user_area_for(void* address) 6486 { 6487 return vm_area_for((addr_t)address, false); 6488 } 6489 6490 6491 area_id 6492 _user_find_area(const char* userName) 6493 { 6494 char name[B_OS_NAME_LENGTH]; 6495 6496 if (!IS_USER_ADDRESS(userName) 6497 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6498 return B_BAD_ADDRESS; 6499 6500 return find_area(name); 6501 } 6502 6503 6504 status_t 6505 _user_get_area_info(area_id area, area_info* userInfo) 6506 { 6507 if (!IS_USER_ADDRESS(userInfo)) 6508 return B_BAD_ADDRESS; 6509 6510 area_info info; 6511 status_t status = get_area_info(area, &info); 6512 if (status < B_OK) 6513 return status; 6514 6515 // TODO: do we want to prevent userland from seeing kernel protections? 6516 //info.protection &= B_USER_PROTECTION; 6517 6518 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6519 return B_BAD_ADDRESS; 6520 6521 return status; 6522 } 6523 6524 6525 status_t 6526 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6527 { 6528 ssize_t cookie; 6529 6530 if (!IS_USER_ADDRESS(userCookie) 6531 || !IS_USER_ADDRESS(userInfo) 6532 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6533 return B_BAD_ADDRESS; 6534 6535 area_info info; 6536 status_t status = _get_next_area_info(team, &cookie, &info, 6537 sizeof(area_info)); 6538 if (status != B_OK) 6539 return status; 6540 6541 //info.protection &= B_USER_PROTECTION; 6542 6543 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6544 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6545 return B_BAD_ADDRESS; 6546 6547 return status; 6548 } 6549 6550 6551 status_t 6552 _user_set_area_protection(area_id area, uint32 newProtection) 6553 { 6554 if ((newProtection & ~(B_USER_PROTECTION | B_CLONEABLE_AREA)) != 0) 6555 return B_BAD_VALUE; 6556 6557 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6558 newProtection, false); 6559 } 6560 6561 6562 status_t 6563 _user_resize_area(area_id area, size_t newSize) 6564 { 6565 // TODO: Since we restrict deleting of areas to those owned by the team, 6566 // we should also do that for resizing (check other functions, too). 6567 return vm_resize_area(area, newSize, false); 6568 } 6569 6570 6571 area_id 6572 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6573 team_id target) 6574 { 6575 // filter out some unavailable values (for userland) 6576 switch (addressSpec) { 6577 case B_ANY_KERNEL_ADDRESS: 6578 case B_ANY_KERNEL_BLOCK_ADDRESS: 6579 return B_BAD_VALUE; 6580 } 6581 6582 void* address; 6583 if (!IS_USER_ADDRESS(userAddress) 6584 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6585 return B_BAD_ADDRESS; 6586 6587 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6588 if (newArea < B_OK) 6589 return newArea; 6590 6591 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6592 return B_BAD_ADDRESS; 6593 6594 return newArea; 6595 } 6596 6597 6598 area_id 6599 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6600 uint32 protection, area_id sourceArea) 6601 { 6602 char name[B_OS_NAME_LENGTH]; 6603 void* address; 6604 6605 // filter out some unavailable values (for userland) 6606 switch (addressSpec) { 6607 case B_ANY_KERNEL_ADDRESS: 6608 case B_ANY_KERNEL_BLOCK_ADDRESS: 6609 return B_BAD_VALUE; 6610 } 6611 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6612 return B_BAD_VALUE; 6613 6614 if (!IS_USER_ADDRESS(userName) 6615 || !IS_USER_ADDRESS(userAddress) 6616 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6617 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6618 return B_BAD_ADDRESS; 6619 6620 fix_protection(&protection); 6621 6622 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6623 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6624 false); 6625 if (clonedArea < B_OK) 6626 return clonedArea; 6627 6628 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6629 delete_area(clonedArea); 6630 return B_BAD_ADDRESS; 6631 } 6632 6633 return clonedArea; 6634 } 6635 6636 6637 area_id 6638 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6639 size_t size, uint32 lock, uint32 protection) 6640 { 6641 char name[B_OS_NAME_LENGTH]; 6642 void* address; 6643 6644 // filter out some unavailable values (for userland) 6645 switch (addressSpec) { 6646 case B_ANY_KERNEL_ADDRESS: 6647 case B_ANY_KERNEL_BLOCK_ADDRESS: 6648 return B_BAD_VALUE; 6649 } 6650 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6651 return B_BAD_VALUE; 6652 6653 if (!IS_USER_ADDRESS(userName) 6654 || !IS_USER_ADDRESS(userAddress) 6655 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6656 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6657 return B_BAD_ADDRESS; 6658 6659 if (addressSpec == B_EXACT_ADDRESS 6660 && IS_KERNEL_ADDRESS(address)) 6661 return B_BAD_VALUE; 6662 6663 if (addressSpec == B_ANY_ADDRESS) 6664 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6665 if (addressSpec == B_BASE_ADDRESS) 6666 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6667 6668 fix_protection(&protection); 6669 6670 virtual_address_restrictions virtualRestrictions = {}; 6671 virtualRestrictions.address = address; 6672 virtualRestrictions.address_specification = addressSpec; 6673 physical_address_restrictions physicalRestrictions = {}; 6674 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6675 size, lock, protection, 0, 0, &virtualRestrictions, 6676 &physicalRestrictions, false, &address); 6677 6678 if (area >= B_OK 6679 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6680 delete_area(area); 6681 return B_BAD_ADDRESS; 6682 } 6683 6684 return area; 6685 } 6686 6687 6688 status_t 6689 _user_delete_area(area_id area) 6690 { 6691 // Unlike the BeOS implementation, you can now only delete areas 6692 // that you have created yourself from userland. 6693 // The documentation to delete_area() explicitly states that this 6694 // will be restricted in the future, and so it will. 6695 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6696 } 6697 6698 6699 // TODO: create a BeOS style call for this! 6700 6701 area_id 6702 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6703 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6704 int fd, off_t offset) 6705 { 6706 char name[B_OS_NAME_LENGTH]; 6707 void* address; 6708 area_id area; 6709 6710 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6711 return B_BAD_VALUE; 6712 6713 fix_protection(&protection); 6714 6715 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6716 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6717 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6718 return B_BAD_ADDRESS; 6719 6720 if (addressSpec == B_EXACT_ADDRESS) { 6721 if ((addr_t)address + size < (addr_t)address 6722 || (addr_t)address % B_PAGE_SIZE != 0) { 6723 return B_BAD_VALUE; 6724 } 6725 if (!IS_USER_ADDRESS(address) 6726 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6727 return B_BAD_ADDRESS; 6728 } 6729 } 6730 6731 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6732 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6733 false); 6734 if (area < B_OK) 6735 return area; 6736 6737 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6738 return B_BAD_ADDRESS; 6739 6740 return area; 6741 } 6742 6743 6744 status_t 6745 _user_unmap_memory(void* _address, size_t size) 6746 { 6747 addr_t address = (addr_t)_address; 6748 6749 // check params 6750 if (size == 0 || (addr_t)address + size < (addr_t)address 6751 || (addr_t)address % B_PAGE_SIZE != 0) { 6752 return B_BAD_VALUE; 6753 } 6754 6755 if (!IS_USER_ADDRESS(address) 6756 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6757 return B_BAD_ADDRESS; 6758 } 6759 6760 // Write lock the address space and ensure the address range is not wired. 6761 AddressSpaceWriteLocker locker; 6762 do { 6763 status_t status = locker.SetTo(team_get_current_team_id()); 6764 if (status != B_OK) 6765 return status; 6766 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6767 size, &locker)); 6768 6769 // unmap 6770 return unmap_address_range(locker.AddressSpace(), address, size, false); 6771 } 6772 6773 6774 status_t 6775 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6776 { 6777 // check address range 6778 addr_t address = (addr_t)_address; 6779 size = PAGE_ALIGN(size); 6780 6781 if ((address % B_PAGE_SIZE) != 0) 6782 return B_BAD_VALUE; 6783 if (!is_user_address_range(_address, size)) { 6784 // weird error code required by POSIX 6785 return ENOMEM; 6786 } 6787 6788 // extend and check protection 6789 if ((protection & ~B_USER_PROTECTION) != 0) 6790 return B_BAD_VALUE; 6791 6792 fix_protection(&protection); 6793 6794 // We need to write lock the address space, since we're going to play with 6795 // the areas. Also make sure that none of the areas is wired and that we're 6796 // actually allowed to change the protection. 6797 AddressSpaceWriteLocker locker; 6798 6799 bool restart; 6800 do { 6801 restart = false; 6802 6803 status_t status = locker.SetTo(team_get_current_team_id()); 6804 if (status != B_OK) 6805 return status; 6806 6807 // First round: Check whether the whole range is covered by areas and we 6808 // are allowed to modify them. 6809 addr_t currentAddress = address; 6810 size_t sizeLeft = size; 6811 while (sizeLeft > 0) { 6812 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6813 if (area == NULL) 6814 return B_NO_MEMORY; 6815 6816 if ((area->protection & B_KERNEL_AREA) != 0) 6817 return B_NOT_ALLOWED; 6818 if (area->protection_max != 0 6819 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) { 6820 return B_NOT_ALLOWED; 6821 } 6822 6823 addr_t offset = currentAddress - area->Base(); 6824 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6825 6826 AreaCacheLocker cacheLocker(area); 6827 6828 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6829 &locker, &cacheLocker)) { 6830 restart = true; 6831 break; 6832 } 6833 6834 cacheLocker.Unlock(); 6835 6836 currentAddress += rangeSize; 6837 sizeLeft -= rangeSize; 6838 } 6839 } while (restart); 6840 6841 // Second round: If the protections differ from that of the area, create a 6842 // page protection array and re-map mapped pages. 6843 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6844 addr_t currentAddress = address; 6845 size_t sizeLeft = size; 6846 while (sizeLeft > 0) { 6847 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6848 if (area == NULL) 6849 return B_NO_MEMORY; 6850 6851 addr_t offset = currentAddress - area->Base(); 6852 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6853 6854 currentAddress += rangeSize; 6855 sizeLeft -= rangeSize; 6856 6857 if (area->page_protections == NULL) { 6858 if (area->protection == protection) 6859 continue; 6860 if (offset == 0 && rangeSize == area->Size()) { 6861 status_t status = vm_set_area_protection(area->address_space->ID(), 6862 area->id, protection, false); 6863 if (status != B_OK) 6864 return status; 6865 continue; 6866 } 6867 6868 status_t status = allocate_area_page_protections(area); 6869 if (status != B_OK) 6870 return status; 6871 } 6872 6873 // We need to lock the complete cache chain, since we potentially unmap 6874 // pages of lower caches. 6875 VMCache* topCache = vm_area_get_locked_cache(area); 6876 VMCacheChainLocker cacheChainLocker(topCache); 6877 cacheChainLocker.LockAllSourceCaches(); 6878 6879 for (addr_t pageAddress = area->Base() + offset; 6880 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6881 map->Lock(); 6882 6883 set_area_page_protection(area, pageAddress, protection); 6884 6885 phys_addr_t physicalAddress; 6886 uint32 flags; 6887 6888 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6889 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6890 map->Unlock(); 6891 continue; 6892 } 6893 6894 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6895 if (page == NULL) { 6896 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6897 "\n", area, physicalAddress); 6898 map->Unlock(); 6899 return B_ERROR; 6900 } 6901 6902 // If the page is not in the topmost cache and write access is 6903 // requested, we have to unmap it. Otherwise we can re-map it with 6904 // the new protection. 6905 bool unmapPage = page->Cache() != topCache 6906 && (protection & B_WRITE_AREA) != 0; 6907 6908 if (!unmapPage) 6909 map->ProtectPage(area, pageAddress, protection); 6910 6911 map->Unlock(); 6912 6913 if (unmapPage) { 6914 DEBUG_PAGE_ACCESS_START(page); 6915 unmap_page(area, pageAddress); 6916 DEBUG_PAGE_ACCESS_END(page); 6917 } 6918 } 6919 } 6920 6921 return B_OK; 6922 } 6923 6924 6925 status_t 6926 _user_sync_memory(void* _address, size_t size, uint32 flags) 6927 { 6928 addr_t address = (addr_t)_address; 6929 size = PAGE_ALIGN(size); 6930 6931 // check params 6932 if ((address % B_PAGE_SIZE) != 0) 6933 return B_BAD_VALUE; 6934 if (!is_user_address_range(_address, size)) { 6935 // weird error code required by POSIX 6936 return ENOMEM; 6937 } 6938 6939 bool writeSync = (flags & MS_SYNC) != 0; 6940 bool writeAsync = (flags & MS_ASYNC) != 0; 6941 if (writeSync && writeAsync) 6942 return B_BAD_VALUE; 6943 6944 if (size == 0 || (!writeSync && !writeAsync)) 6945 return B_OK; 6946 6947 // iterate through the range and sync all concerned areas 6948 while (size > 0) { 6949 // read lock the address space 6950 AddressSpaceReadLocker locker; 6951 status_t error = locker.SetTo(team_get_current_team_id()); 6952 if (error != B_OK) 6953 return error; 6954 6955 // get the first area 6956 VMArea* area = locker.AddressSpace()->LookupArea(address); 6957 if (area == NULL) 6958 return B_NO_MEMORY; 6959 6960 uint32 offset = address - area->Base(); 6961 size_t rangeSize = min_c(area->Size() - offset, size); 6962 offset += area->cache_offset; 6963 6964 // lock the cache 6965 AreaCacheLocker cacheLocker(area); 6966 if (!cacheLocker) 6967 return B_BAD_VALUE; 6968 VMCache* cache = area->cache; 6969 6970 locker.Unlock(); 6971 6972 uint32 firstPage = offset >> PAGE_SHIFT; 6973 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6974 6975 // write the pages 6976 if (cache->type == CACHE_TYPE_VNODE) { 6977 if (writeSync) { 6978 // synchronous 6979 error = vm_page_write_modified_page_range(cache, firstPage, 6980 endPage); 6981 if (error != B_OK) 6982 return error; 6983 } else { 6984 // asynchronous 6985 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6986 // TODO: This is probably not quite what is supposed to happen. 6987 // Especially when a lot has to be written, it might take ages 6988 // until it really hits the disk. 6989 } 6990 } 6991 6992 address += rangeSize; 6993 size -= rangeSize; 6994 } 6995 6996 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6997 // synchronize multiple mappings of the same file. In our VM they never get 6998 // out of sync, though, so we don't have to do anything. 6999 7000 return B_OK; 7001 } 7002 7003 7004 status_t 7005 _user_memory_advice(void* _address, size_t size, uint32 advice) 7006 { 7007 addr_t address = (addr_t)_address; 7008 if ((address % B_PAGE_SIZE) != 0) 7009 return B_BAD_VALUE; 7010 7011 size = PAGE_ALIGN(size); 7012 if (!is_user_address_range(_address, size)) { 7013 // weird error code required by POSIX 7014 return B_NO_MEMORY; 7015 } 7016 7017 switch (advice) { 7018 case MADV_NORMAL: 7019 case MADV_SEQUENTIAL: 7020 case MADV_RANDOM: 7021 case MADV_WILLNEED: 7022 case MADV_DONTNEED: 7023 // TODO: Implement! 7024 break; 7025 7026 case MADV_FREE: 7027 { 7028 AddressSpaceWriteLocker locker; 7029 do { 7030 status_t status = locker.SetTo(team_get_current_team_id()); 7031 if (status != B_OK) 7032 return status; 7033 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 7034 address, size, &locker)); 7035 7036 discard_address_range(locker.AddressSpace(), address, size, false); 7037 break; 7038 } 7039 7040 default: 7041 return B_BAD_VALUE; 7042 } 7043 7044 return B_OK; 7045 } 7046 7047 7048 status_t 7049 _user_get_memory_properties(team_id teamID, const void* address, 7050 uint32* _protected, uint32* _lock) 7051 { 7052 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 7053 return B_BAD_ADDRESS; 7054 7055 AddressSpaceReadLocker locker; 7056 status_t error = locker.SetTo(teamID); 7057 if (error != B_OK) 7058 return error; 7059 7060 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 7061 if (area == NULL) 7062 return B_NO_MEMORY; 7063 7064 uint32 protection = get_area_page_protection(area, (addr_t)address); 7065 uint32 wiring = area->wiring; 7066 7067 locker.Unlock(); 7068 7069 error = user_memcpy(_protected, &protection, sizeof(protection)); 7070 if (error != B_OK) 7071 return error; 7072 7073 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 7074 7075 return error; 7076 } 7077 7078 7079 static status_t 7080 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 7081 { 7082 #if ENABLE_SWAP_SUPPORT 7083 // check address range 7084 addr_t address = (addr_t)_address; 7085 size = PAGE_ALIGN(size); 7086 7087 if ((address % B_PAGE_SIZE) != 0) 7088 return EINVAL; 7089 if (!is_user_address_range(_address, size)) 7090 return EINVAL; 7091 7092 const addr_t endAddress = address + size; 7093 7094 AddressSpaceReadLocker addressSpaceLocker; 7095 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 7096 if (error != B_OK) 7097 return error; 7098 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 7099 7100 // iterate through all concerned areas 7101 addr_t nextAddress = address; 7102 while (nextAddress != endAddress) { 7103 // get the next area 7104 VMArea* area = addressSpace->LookupArea(nextAddress); 7105 if (area == NULL) { 7106 error = B_BAD_ADDRESS; 7107 break; 7108 } 7109 7110 const addr_t areaStart = nextAddress; 7111 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 7112 nextAddress = areaEnd; 7113 7114 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 7115 if (error != B_OK) { 7116 // We don't need to unset or reset things on failure. 7117 break; 7118 } 7119 7120 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 7121 VMAnonymousCache* anonCache = NULL; 7122 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 7123 // This memory will aready never be swapped. Nothing to do. 7124 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 7125 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 7126 areaEnd - areaStart, swappable); 7127 } else { 7128 // Some other cache type? We cannot affect anything here. 7129 error = EINVAL; 7130 } 7131 7132 cacheChainLocker.Unlock(); 7133 7134 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 7135 if (error != B_OK) 7136 break; 7137 } 7138 7139 return error; 7140 #else 7141 // No swap support? Nothing to do. 7142 return B_OK; 7143 #endif 7144 } 7145 7146 7147 status_t 7148 _user_mlock(const void* _address, size_t size) 7149 { 7150 return user_set_memory_swappable(_address, size, false); 7151 } 7152 7153 7154 status_t 7155 _user_munlock(const void* _address, size_t size) 7156 { 7157 // TODO: B_SHARED_AREAs need to be handled a bit differently: 7158 // if multiple clones of an area had mlock() called on them, 7159 // munlock() must also be called on all of them to actually unlock. 7160 // (At present, the first munlock() will unlock all.) 7161 // TODO: fork() should automatically unlock memory in the child. 7162 return user_set_memory_swappable(_address, size, true); 7163 } 7164 7165 7166 // #pragma mark -- compatibility 7167 7168 7169 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7170 7171 7172 struct physical_entry_beos { 7173 uint32 address; 7174 uint32 size; 7175 }; 7176 7177 7178 /*! The physical_entry structure has changed. We need to translate it to the 7179 old one. 7180 */ 7181 extern "C" int32 7182 __get_memory_map_beos(const void* _address, size_t numBytes, 7183 physical_entry_beos* table, int32 numEntries) 7184 { 7185 if (numEntries <= 0) 7186 return B_BAD_VALUE; 7187 7188 const uint8* address = (const uint8*)_address; 7189 7190 int32 count = 0; 7191 while (numBytes > 0 && count < numEntries) { 7192 physical_entry entry; 7193 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7194 if (result < 0) { 7195 if (result != B_BUFFER_OVERFLOW) 7196 return result; 7197 } 7198 7199 if (entry.address >= (phys_addr_t)1 << 32) { 7200 panic("get_memory_map(): Address is greater 4 GB!"); 7201 return B_ERROR; 7202 } 7203 7204 table[count].address = entry.address; 7205 table[count++].size = entry.size; 7206 7207 address += entry.size; 7208 numBytes -= entry.size; 7209 } 7210 7211 // null-terminate the table, if possible 7212 if (count < numEntries) { 7213 table[count].address = 0; 7214 table[count].size = 0; 7215 } 7216 7217 return B_OK; 7218 } 7219 7220 7221 /*! The type of the \a physicalAddress parameter has changed from void* to 7222 phys_addr_t. 7223 */ 7224 extern "C" area_id 7225 __map_physical_memory_beos(const char* name, void* physicalAddress, 7226 size_t numBytes, uint32 addressSpec, uint32 protection, 7227 void** _virtualAddress) 7228 { 7229 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7230 addressSpec, protection, _virtualAddress); 7231 } 7232 7233 7234 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7235 we meddle with the \a lock parameter to force 32 bit. 7236 */ 7237 extern "C" area_id 7238 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7239 size_t size, uint32 lock, uint32 protection) 7240 { 7241 switch (lock) { 7242 case B_NO_LOCK: 7243 break; 7244 case B_FULL_LOCK: 7245 case B_LAZY_LOCK: 7246 lock = B_32_BIT_FULL_LOCK; 7247 break; 7248 case B_CONTIGUOUS: 7249 lock = B_32_BIT_CONTIGUOUS; 7250 break; 7251 } 7252 7253 return __create_area_haiku(name, _address, addressSpec, size, lock, 7254 protection); 7255 } 7256 7257 7258 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7259 "BASE"); 7260 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7261 "map_physical_memory@", "BASE"); 7262 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7263 "BASE"); 7264 7265 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7266 "get_memory_map@@", "1_ALPHA3"); 7267 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7268 "map_physical_memory@@", "1_ALPHA3"); 7269 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7270 "1_ALPHA3"); 7271 7272 7273 #else 7274 7275 7276 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7277 "get_memory_map@@", "BASE"); 7278 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7279 "map_physical_memory@@", "BASE"); 7280 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7281 "BASE"); 7282 7283 7284 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7285