1 /* 2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de. 3 * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de. 4 * Distributed under the terms of the MIT License. 5 * 6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved. 7 * Distributed under the terms of the NewOS License. 8 */ 9 10 11 #include <vm/vm.h> 12 13 #include <ctype.h> 14 #include <stdlib.h> 15 #include <stdio.h> 16 #include <string.h> 17 #include <sys/mman.h> 18 19 #include <algorithm> 20 21 #include <OS.h> 22 #include <KernelExport.h> 23 24 #include <AutoDeleterDrivers.h> 25 26 #include <symbol_versioning.h> 27 28 #include <arch/cpu.h> 29 #include <arch/vm.h> 30 #include <arch/user_memory.h> 31 #include <boot/elf.h> 32 #include <boot/stage2.h> 33 #include <condition_variable.h> 34 #include <console.h> 35 #include <debug.h> 36 #include <file_cache.h> 37 #include <fs/fd.h> 38 #include <heap.h> 39 #include <kernel.h> 40 #include <int.h> 41 #include <lock.h> 42 #include <low_resource_manager.h> 43 #include <slab/Slab.h> 44 #include <smp.h> 45 #include <system_info.h> 46 #include <thread.h> 47 #include <team.h> 48 #include <tracing.h> 49 #include <util/AutoLock.h> 50 #include <util/ThreadAutoLock.h> 51 #include <vm/vm_page.h> 52 #include <vm/vm_priv.h> 53 #include <vm/VMAddressSpace.h> 54 #include <vm/VMArea.h> 55 #include <vm/VMCache.h> 56 57 #include "VMAddressSpaceLocking.h" 58 #include "VMAnonymousCache.h" 59 #include "VMAnonymousNoSwapCache.h" 60 #include "IORequest.h" 61 62 63 //#define TRACE_VM 64 //#define TRACE_FAULTS 65 #ifdef TRACE_VM 66 # define TRACE(x) dprintf x 67 #else 68 # define TRACE(x) ; 69 #endif 70 #ifdef TRACE_FAULTS 71 # define FTRACE(x) dprintf x 72 #else 73 # define FTRACE(x) ; 74 #endif 75 76 77 namespace { 78 79 class AreaCacheLocking { 80 public: 81 inline bool Lock(VMCache* lockable) 82 { 83 return false; 84 } 85 86 inline void Unlock(VMCache* lockable) 87 { 88 vm_area_put_locked_cache(lockable); 89 } 90 }; 91 92 class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> { 93 public: 94 inline AreaCacheLocker(VMCache* cache = NULL) 95 : AutoLocker<VMCache, AreaCacheLocking>(cache, true) 96 { 97 } 98 99 inline AreaCacheLocker(VMArea* area) 100 : AutoLocker<VMCache, AreaCacheLocking>() 101 { 102 SetTo(area); 103 } 104 105 inline void SetTo(VMCache* cache, bool alreadyLocked) 106 { 107 AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked); 108 } 109 110 inline void SetTo(VMArea* area) 111 { 112 return AutoLocker<VMCache, AreaCacheLocking>::SetTo( 113 area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true); 114 } 115 }; 116 117 118 class VMCacheChainLocker { 119 public: 120 VMCacheChainLocker() 121 : 122 fTopCache(NULL), 123 fBottomCache(NULL) 124 { 125 } 126 127 VMCacheChainLocker(VMCache* topCache) 128 : 129 fTopCache(topCache), 130 fBottomCache(topCache) 131 { 132 } 133 134 ~VMCacheChainLocker() 135 { 136 Unlock(); 137 } 138 139 void SetTo(VMCache* topCache) 140 { 141 fTopCache = topCache; 142 fBottomCache = topCache; 143 144 if (topCache != NULL) 145 topCache->SetUserData(NULL); 146 } 147 148 VMCache* LockSourceCache() 149 { 150 if (fBottomCache == NULL || fBottomCache->source == NULL) 151 return NULL; 152 153 VMCache* previousCache = fBottomCache; 154 155 fBottomCache = fBottomCache->source; 156 fBottomCache->Lock(); 157 fBottomCache->AcquireRefLocked(); 158 fBottomCache->SetUserData(previousCache); 159 160 return fBottomCache; 161 } 162 163 void LockAllSourceCaches() 164 { 165 while (LockSourceCache() != NULL) { 166 } 167 } 168 169 void Unlock(VMCache* exceptCache = NULL) 170 { 171 if (fTopCache == NULL) 172 return; 173 174 // Unlock caches in source -> consumer direction. This is important to 175 // avoid double-locking and a reversal of locking order in case a cache 176 // is eligable for merging. 177 VMCache* cache = fBottomCache; 178 while (cache != NULL) { 179 VMCache* nextCache = (VMCache*)cache->UserData(); 180 if (cache != exceptCache) 181 cache->ReleaseRefAndUnlock(cache != fTopCache); 182 183 if (cache == fTopCache) 184 break; 185 186 cache = nextCache; 187 } 188 189 fTopCache = NULL; 190 fBottomCache = NULL; 191 } 192 193 void UnlockKeepRefs(bool keepTopCacheLocked) 194 { 195 if (fTopCache == NULL) 196 return; 197 198 VMCache* nextCache = fBottomCache; 199 VMCache* cache = NULL; 200 201 while (keepTopCacheLocked 202 ? nextCache != fTopCache : cache != fTopCache) { 203 cache = nextCache; 204 nextCache = (VMCache*)cache->UserData(); 205 cache->Unlock(cache != fTopCache); 206 } 207 } 208 209 void RelockCaches(bool topCacheLocked) 210 { 211 if (fTopCache == NULL) 212 return; 213 214 VMCache* nextCache = fTopCache; 215 VMCache* cache = NULL; 216 if (topCacheLocked) { 217 cache = nextCache; 218 nextCache = cache->source; 219 } 220 221 while (cache != fBottomCache && nextCache != NULL) { 222 VMCache* consumer = cache; 223 cache = nextCache; 224 nextCache = cache->source; 225 cache->Lock(); 226 cache->SetUserData(consumer); 227 } 228 } 229 230 private: 231 VMCache* fTopCache; 232 VMCache* fBottomCache; 233 }; 234 235 } // namespace 236 237 238 // The memory reserve an allocation of the certain priority must not touch. 239 static const size_t kMemoryReserveForPriority[] = { 240 VM_MEMORY_RESERVE_USER, // user 241 VM_MEMORY_RESERVE_SYSTEM, // system 242 0 // VIP 243 }; 244 245 246 ObjectCache* gPageMappingsObjectCache; 247 248 static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache"); 249 250 static off_t sAvailableMemory; 251 static off_t sNeededMemory; 252 static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock"); 253 static uint32 sPageFaults; 254 255 static VMPhysicalPageMapper* sPhysicalPageMapper; 256 257 #if DEBUG_CACHE_LIST 258 259 struct cache_info { 260 VMCache* cache; 261 addr_t page_count; 262 addr_t committed; 263 }; 264 265 static const int kCacheInfoTableCount = 100 * 1024; 266 static cache_info* sCacheInfoTable; 267 268 #endif // DEBUG_CACHE_LIST 269 270 271 // function declarations 272 static void delete_area(VMAddressSpace* addressSpace, VMArea* area, 273 bool addressSpaceCleanup); 274 static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address, 275 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage); 276 static status_t map_backing_store(VMAddressSpace* addressSpace, 277 VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring, 278 int protection, int protectionMax, int mapping, uint32 flags, 279 const virtual_address_restrictions* addressRestrictions, bool kernel, 280 VMArea** _area, void** _virtualAddress); 281 static void fix_protection(uint32* protection); 282 283 284 // #pragma mark - 285 286 287 #if VM_PAGE_FAULT_TRACING 288 289 namespace VMPageFaultTracing { 290 291 class PageFaultStart : public AbstractTraceEntry { 292 public: 293 PageFaultStart(addr_t address, bool write, bool user, addr_t pc) 294 : 295 fAddress(address), 296 fPC(pc), 297 fWrite(write), 298 fUser(user) 299 { 300 Initialized(); 301 } 302 303 virtual void AddDump(TraceOutput& out) 304 { 305 out.Print("page fault %#lx %s %s, pc: %#lx", fAddress, 306 fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC); 307 } 308 309 private: 310 addr_t fAddress; 311 addr_t fPC; 312 bool fWrite; 313 bool fUser; 314 }; 315 316 317 // page fault errors 318 enum { 319 PAGE_FAULT_ERROR_NO_AREA = 0, 320 PAGE_FAULT_ERROR_KERNEL_ONLY, 321 PAGE_FAULT_ERROR_WRITE_PROTECTED, 322 PAGE_FAULT_ERROR_READ_PROTECTED, 323 PAGE_FAULT_ERROR_EXECUTE_PROTECTED, 324 PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY, 325 PAGE_FAULT_ERROR_NO_ADDRESS_SPACE 326 }; 327 328 329 class PageFaultError : public AbstractTraceEntry { 330 public: 331 PageFaultError(area_id area, status_t error) 332 : 333 fArea(area), 334 fError(error) 335 { 336 Initialized(); 337 } 338 339 virtual void AddDump(TraceOutput& out) 340 { 341 switch (fError) { 342 case PAGE_FAULT_ERROR_NO_AREA: 343 out.Print("page fault error: no area"); 344 break; 345 case PAGE_FAULT_ERROR_KERNEL_ONLY: 346 out.Print("page fault error: area: %ld, kernel only", fArea); 347 break; 348 case PAGE_FAULT_ERROR_WRITE_PROTECTED: 349 out.Print("page fault error: area: %ld, write protected", 350 fArea); 351 break; 352 case PAGE_FAULT_ERROR_READ_PROTECTED: 353 out.Print("page fault error: area: %ld, read protected", fArea); 354 break; 355 case PAGE_FAULT_ERROR_EXECUTE_PROTECTED: 356 out.Print("page fault error: area: %ld, execute protected", 357 fArea); 358 break; 359 case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY: 360 out.Print("page fault error: kernel touching bad user memory"); 361 break; 362 case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE: 363 out.Print("page fault error: no address space"); 364 break; 365 default: 366 out.Print("page fault error: area: %ld, error: %s", fArea, 367 strerror(fError)); 368 break; 369 } 370 } 371 372 private: 373 area_id fArea; 374 status_t fError; 375 }; 376 377 378 class PageFaultDone : public AbstractTraceEntry { 379 public: 380 PageFaultDone(area_id area, VMCache* topCache, VMCache* cache, 381 vm_page* page) 382 : 383 fArea(area), 384 fTopCache(topCache), 385 fCache(cache), 386 fPage(page) 387 { 388 Initialized(); 389 } 390 391 virtual void AddDump(TraceOutput& out) 392 { 393 out.Print("page fault done: area: %ld, top cache: %p, cache: %p, " 394 "page: %p", fArea, fTopCache, fCache, fPage); 395 } 396 397 private: 398 area_id fArea; 399 VMCache* fTopCache; 400 VMCache* fCache; 401 vm_page* fPage; 402 }; 403 404 } // namespace VMPageFaultTracing 405 406 # define TPF(x) new(std::nothrow) VMPageFaultTracing::x; 407 #else 408 # define TPF(x) ; 409 #endif // VM_PAGE_FAULT_TRACING 410 411 412 // #pragma mark - 413 414 415 /*! The page's cache must be locked. 416 */ 417 static inline void 418 increment_page_wired_count(vm_page* page) 419 { 420 if (!page->IsMapped()) 421 atomic_add(&gMappedPagesCount, 1); 422 page->IncrementWiredCount(); 423 } 424 425 426 /*! The page's cache must be locked. 427 */ 428 static inline void 429 decrement_page_wired_count(vm_page* page) 430 { 431 page->DecrementWiredCount(); 432 if (!page->IsMapped()) 433 atomic_add(&gMappedPagesCount, -1); 434 } 435 436 437 static inline addr_t 438 virtual_page_address(VMArea* area, vm_page* page) 439 { 440 return area->Base() 441 + ((page->cache_offset << PAGE_SHIFT) - area->cache_offset); 442 } 443 444 445 //! You need to have the address space locked when calling this function 446 static VMArea* 447 lookup_area(VMAddressSpace* addressSpace, area_id id) 448 { 449 VMAreaHash::ReadLock(); 450 451 VMArea* area = VMAreaHash::LookupLocked(id); 452 if (area != NULL && area->address_space != addressSpace) 453 area = NULL; 454 455 VMAreaHash::ReadUnlock(); 456 457 return area; 458 } 459 460 461 static status_t 462 allocate_area_page_protections(VMArea* area) 463 { 464 // In the page protections we store only the three user protections, 465 // so we use 4 bits per page. 466 size_t bytes = (area->Size() / B_PAGE_SIZE + 1) / 2; 467 area->page_protections = (uint8*)malloc_etc(bytes, 468 area->address_space == VMAddressSpace::Kernel() 469 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 470 if (area->page_protections == NULL) 471 return B_NO_MEMORY; 472 473 // init the page protections for all pages to that of the area 474 uint32 areaProtection = area->protection 475 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 476 memset(area->page_protections, areaProtection | (areaProtection << 4), 477 bytes); 478 return B_OK; 479 } 480 481 482 static inline void 483 set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection) 484 { 485 protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA; 486 addr_t pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 487 uint8& entry = area->page_protections[pageIndex / 2]; 488 if (pageIndex % 2 == 0) 489 entry = (entry & 0xf0) | protection; 490 else 491 entry = (entry & 0x0f) | (protection << 4); 492 } 493 494 495 static inline uint32 496 get_area_page_protection(VMArea* area, addr_t pageAddress) 497 { 498 if (area->page_protections == NULL) 499 return area->protection; 500 501 uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE; 502 uint32 protection = area->page_protections[pageIndex / 2]; 503 if (pageIndex % 2 == 0) 504 protection &= 0x0f; 505 else 506 protection >>= 4; 507 508 uint32 kernelProtection = 0; 509 if ((protection & B_READ_AREA) != 0) 510 kernelProtection |= B_KERNEL_READ_AREA; 511 if ((protection & B_WRITE_AREA) != 0) 512 kernelProtection |= B_KERNEL_WRITE_AREA; 513 514 // If this is a kernel area we return only the kernel flags. 515 if (area->address_space == VMAddressSpace::Kernel()) 516 return kernelProtection; 517 518 return protection | kernelProtection; 519 } 520 521 522 /*! The caller must have reserved enough pages the translation map 523 implementation might need to map this page. 524 The page's cache must be locked. 525 */ 526 static status_t 527 map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection, 528 vm_page_reservation* reservation) 529 { 530 VMTranslationMap* map = area->address_space->TranslationMap(); 531 532 bool wasMapped = page->IsMapped(); 533 534 if (area->wiring == B_NO_LOCK) { 535 DEBUG_PAGE_ACCESS_CHECK(page); 536 537 bool isKernelSpace = area->address_space == VMAddressSpace::Kernel(); 538 vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc( 539 gPageMappingsObjectCache, 540 CACHE_DONT_WAIT_FOR_MEMORY 541 | (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0)); 542 if (mapping == NULL) 543 return B_NO_MEMORY; 544 545 mapping->page = page; 546 mapping->area = area; 547 548 map->Lock(); 549 550 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 551 area->MemoryType(), reservation); 552 553 // insert mapping into lists 554 if (!page->IsMapped()) 555 atomic_add(&gMappedPagesCount, 1); 556 557 page->mappings.Add(mapping); 558 area->mappings.Add(mapping); 559 560 map->Unlock(); 561 } else { 562 DEBUG_PAGE_ACCESS_CHECK(page); 563 564 map->Lock(); 565 map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection, 566 area->MemoryType(), reservation); 567 map->Unlock(); 568 569 increment_page_wired_count(page); 570 } 571 572 if (!wasMapped) { 573 // The page is mapped now, so we must not remain in the cached queue. 574 // It also makes sense to move it from the inactive to the active, since 575 // otherwise the page daemon wouldn't come to keep track of it (in idle 576 // mode) -- if the page isn't touched, it will be deactivated after a 577 // full iteration through the queue at the latest. 578 if (page->State() == PAGE_STATE_CACHED 579 || page->State() == PAGE_STATE_INACTIVE) { 580 vm_page_set_state(page, PAGE_STATE_ACTIVE); 581 } 582 } 583 584 return B_OK; 585 } 586 587 588 /*! If \a preserveModified is \c true, the caller must hold the lock of the 589 page's cache. 590 */ 591 static inline bool 592 unmap_page(VMArea* area, addr_t virtualAddress) 593 { 594 return area->address_space->TranslationMap()->UnmapPage(area, 595 virtualAddress, true); 596 } 597 598 599 /*! If \a preserveModified is \c true, the caller must hold the lock of all 600 mapped pages' caches. 601 */ 602 static inline void 603 unmap_pages(VMArea* area, addr_t base, size_t size) 604 { 605 area->address_space->TranslationMap()->UnmapPages(area, base, size, true); 606 } 607 608 609 static inline bool 610 intersect_area(VMArea* area, addr_t& address, addr_t& size, addr_t& offset) 611 { 612 if (address < area->Base()) { 613 offset = area->Base() - address; 614 if (offset >= size) 615 return false; 616 617 address = area->Base(); 618 size -= offset; 619 offset = 0; 620 if (size > area->Size()) 621 size = area->Size(); 622 623 return true; 624 } 625 626 offset = address - area->Base(); 627 if (offset >= area->Size()) 628 return false; 629 630 if (size >= area->Size() - offset) 631 size = area->Size() - offset; 632 633 return true; 634 } 635 636 637 /*! Cuts a piece out of an area. If the given cut range covers the complete 638 area, it is deleted. If it covers the beginning or the end, the area is 639 resized accordingly. If the range covers some part in the middle of the 640 area, it is split in two; in this case the second area is returned via 641 \a _secondArea (the variable is left untouched in the other cases). 642 The address space must be write locked. 643 The caller must ensure that no part of the given range is wired. 644 */ 645 static status_t 646 cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address, 647 addr_t size, VMArea** _secondArea, bool kernel) 648 { 649 addr_t offset; 650 if (!intersect_area(area, address, size, offset)) 651 return B_OK; 652 653 // Is the area fully covered? 654 if (address == area->Base() && size == area->Size()) { 655 delete_area(addressSpace, area, false); 656 return B_OK; 657 } 658 659 int priority; 660 uint32 allocationFlags; 661 if (addressSpace == VMAddressSpace::Kernel()) { 662 priority = VM_PRIORITY_SYSTEM; 663 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 664 | HEAP_DONT_LOCK_KERNEL_SPACE; 665 } else { 666 priority = VM_PRIORITY_USER; 667 allocationFlags = 0; 668 } 669 670 VMCache* cache = vm_area_get_locked_cache(area); 671 VMCacheChainLocker cacheChainLocker(cache); 672 cacheChainLocker.LockAllSourceCaches(); 673 674 // If no one else uses the area's cache and it's an anonymous cache, we can 675 // resize or split it, too. 676 bool onlyCacheUser = cache->areas == area && area->cache_next == NULL 677 && cache->consumers.IsEmpty() && area->cache_type == CACHE_TYPE_RAM; 678 679 // Cut the end only? 680 if (offset > 0 && size == area->Size() - offset) { 681 status_t error = addressSpace->ShrinkAreaTail(area, offset, 682 allocationFlags); 683 if (error != B_OK) 684 return error; 685 686 // unmap pages 687 unmap_pages(area, address, size); 688 689 if (onlyCacheUser) { 690 // Since VMCache::Resize() can temporarily drop the lock, we must 691 // unlock all lower caches to prevent locking order inversion. 692 cacheChainLocker.Unlock(cache); 693 cache->Resize(cache->virtual_base + offset, priority); 694 cache->ReleaseRefAndUnlock(); 695 } 696 697 return B_OK; 698 } 699 700 // Cut the beginning only? 701 if (area->Base() == address) { 702 // resize the area 703 status_t error = addressSpace->ShrinkAreaHead(area, area->Size() - size, 704 allocationFlags); 705 if (error != B_OK) 706 return error; 707 708 // unmap pages 709 unmap_pages(area, address, size); 710 711 if (onlyCacheUser) { 712 // Since VMCache::Rebase() can temporarily drop the lock, we must 713 // unlock all lower caches to prevent locking order inversion. 714 cacheChainLocker.Unlock(cache); 715 cache->Rebase(cache->virtual_base + size, priority); 716 cache->ReleaseRefAndUnlock(); 717 } 718 area->cache_offset += size; 719 720 return B_OK; 721 } 722 723 // The tough part -- cut a piece out of the middle of the area. 724 // We do that by shrinking the area to the begin section and creating a 725 // new area for the end section. 726 addr_t firstNewSize = offset; 727 addr_t secondBase = address + size; 728 addr_t secondSize = area->Size() - offset - size; 729 730 // unmap pages 731 unmap_pages(area, address, area->Size() - firstNewSize); 732 733 // resize the area 734 addr_t oldSize = area->Size(); 735 status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize, 736 allocationFlags); 737 if (error != B_OK) 738 return error; 739 740 virtual_address_restrictions addressRestrictions = {}; 741 addressRestrictions.address = (void*)secondBase; 742 addressRestrictions.address_specification = B_EXACT_ADDRESS; 743 VMArea* secondArea; 744 745 if (onlyCacheUser) { 746 // Create a new cache for the second area. 747 VMCache* secondCache; 748 error = VMCacheFactory::CreateAnonymousCache(secondCache, false, 0, 0, 749 dynamic_cast<VMAnonymousNoSwapCache*>(cache) == NULL, priority); 750 if (error != B_OK) { 751 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 752 return error; 753 } 754 755 secondCache->Lock(); 756 secondCache->temporary = cache->temporary; 757 secondCache->virtual_base = area->cache_offset; 758 secondCache->virtual_end = area->cache_offset + secondSize; 759 760 // Transfer the concerned pages from the first cache. 761 off_t adoptOffset = area->cache_offset + secondBase - area->Base(); 762 error = secondCache->Adopt(cache, adoptOffset, secondSize, 763 area->cache_offset); 764 765 if (error == B_OK) { 766 // Since VMCache::Resize() can temporarily drop the lock, we must 767 // unlock all lower caches to prevent locking order inversion. 768 cacheChainLocker.Unlock(cache); 769 cache->Resize(cache->virtual_base + firstNewSize, priority); 770 // Don't unlock the cache yet because we might have to resize it 771 // back. 772 773 // Map the second area. 774 error = map_backing_store(addressSpace, secondCache, 775 area->cache_offset, area->name, secondSize, area->wiring, 776 area->protection, area->protection_max, REGION_NO_PRIVATE_MAP, 0, 777 &addressRestrictions, kernel, &secondArea, NULL); 778 } 779 780 if (error != B_OK) { 781 // Restore the original cache. 782 cache->Resize(cache->virtual_base + oldSize, priority); 783 784 // Move the pages back. 785 status_t readoptStatus = cache->Adopt(secondCache, 786 area->cache_offset, secondSize, adoptOffset); 787 if (readoptStatus != B_OK) { 788 // Some (swap) pages have not been moved back and will be lost 789 // once the second cache is deleted. 790 panic("failed to restore cache range: %s", 791 strerror(readoptStatus)); 792 793 // TODO: Handle out of memory cases by freeing memory and 794 // retrying. 795 } 796 797 cache->ReleaseRefAndUnlock(); 798 secondCache->ReleaseRefAndUnlock(); 799 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 800 return error; 801 } 802 803 // Now we can unlock it. 804 cache->ReleaseRefAndUnlock(); 805 secondCache->Unlock(); 806 } else { 807 error = map_backing_store(addressSpace, cache, area->cache_offset 808 + (secondBase - area->Base()), 809 area->name, secondSize, area->wiring, area->protection, 810 area->protection_max, REGION_NO_PRIVATE_MAP, 0, 811 &addressRestrictions, kernel, &secondArea, NULL); 812 if (error != B_OK) { 813 addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags); 814 return error; 815 } 816 // We need a cache reference for the new area. 817 cache->AcquireRefLocked(); 818 } 819 820 if (_secondArea != NULL) 821 *_secondArea = secondArea; 822 823 return B_OK; 824 } 825 826 827 /*! Deletes or cuts all areas in the given address range. 828 The address space must be write-locked. 829 The caller must ensure that no part of the given range is wired. 830 */ 831 static status_t 832 unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 833 bool kernel) 834 { 835 size = PAGE_ALIGN(size); 836 837 // Check, whether the caller is allowed to modify the concerned areas. 838 if (!kernel) { 839 for (VMAddressSpace::AreaRangeIterator it 840 = addressSpace->GetAreaRangeIterator(address, size); 841 VMArea* area = it.Next();) { 842 843 if ((area->protection & B_KERNEL_AREA) != 0) { 844 dprintf("unmap_address_range: team %" B_PRId32 " tried to " 845 "unmap range of kernel area %" B_PRId32 " (%s)\n", 846 team_get_current_team_id(), area->id, area->name); 847 return B_NOT_ALLOWED; 848 } 849 } 850 } 851 852 for (VMAddressSpace::AreaRangeIterator it 853 = addressSpace->GetAreaRangeIterator(address, size); 854 VMArea* area = it.Next();) { 855 856 status_t error = cut_area(addressSpace, area, address, size, NULL, 857 kernel); 858 if (error != B_OK) 859 return error; 860 // Failing after already messing with areas is ugly, but we 861 // can't do anything about it. 862 } 863 864 return B_OK; 865 } 866 867 868 static status_t 869 discard_area_range(VMArea* area, addr_t address, addr_t size) 870 { 871 addr_t offset; 872 if (!intersect_area(area, address, size, offset)) 873 return B_OK; 874 875 // If someone else uses the area's cache or it's not an anonymous cache, we 876 // can't discard. 877 VMCache* cache = vm_area_get_locked_cache(area); 878 if (cache->areas != area || area->cache_next != NULL 879 || !cache->consumers.IsEmpty() || cache->type != CACHE_TYPE_RAM) { 880 return B_OK; 881 } 882 883 VMCacheChainLocker cacheChainLocker(cache); 884 cacheChainLocker.LockAllSourceCaches(); 885 886 unmap_pages(area, address, size); 887 888 // Since VMCache::Discard() can temporarily drop the lock, we must 889 // unlock all lower caches to prevent locking order inversion. 890 cacheChainLocker.Unlock(cache); 891 cache->Discard(cache->virtual_base + offset, size); 892 cache->ReleaseRefAndUnlock(); 893 894 return B_OK; 895 } 896 897 898 static status_t 899 discard_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size, 900 bool kernel) 901 { 902 for (VMAddressSpace::AreaRangeIterator it 903 = addressSpace->GetAreaRangeIterator(address, size); 904 VMArea* area = it.Next();) { 905 status_t error = discard_area_range(area, address, size); 906 if (error != B_OK) 907 return error; 908 } 909 910 return B_OK; 911 } 912 913 914 /*! You need to hold the lock of the cache and the write lock of the address 915 space when calling this function. 916 Note, that in case of error your cache will be temporarily unlocked. 917 If \a addressSpec is \c B_EXACT_ADDRESS and the 918 \c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure 919 that no part of the specified address range (base \c *_virtualAddress, size 920 \a size) is wired. 921 */ 922 static status_t 923 map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset, 924 const char* areaName, addr_t size, int wiring, int protection, 925 int protectionMax, int mapping, 926 uint32 flags, const virtual_address_restrictions* addressRestrictions, 927 bool kernel, VMArea** _area, void** _virtualAddress) 928 { 929 TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%" 930 B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d" 931 ", protection %d, protectionMax %d, area %p, areaName '%s'\n", 932 addressSpace, cache, addressRestrictions->address, offset, size, 933 addressRestrictions->address_specification, wiring, protection, 934 protectionMax, _area, areaName)); 935 cache->AssertLocked(); 936 937 if (size == 0) { 938 #if KDEBUG 939 panic("map_backing_store(): called with size=0 for area '%s'!", 940 areaName); 941 #endif 942 return B_BAD_VALUE; 943 } 944 945 uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY 946 | HEAP_DONT_LOCK_KERNEL_SPACE; 947 int priority; 948 if (addressSpace != VMAddressSpace::Kernel()) { 949 priority = VM_PRIORITY_USER; 950 } else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) { 951 priority = VM_PRIORITY_VIP; 952 allocationFlags |= HEAP_PRIORITY_VIP; 953 } else 954 priority = VM_PRIORITY_SYSTEM; 955 956 VMArea* area = addressSpace->CreateArea(areaName, wiring, protection, 957 allocationFlags); 958 if (mapping != REGION_PRIVATE_MAP) 959 area->protection_max = protectionMax & B_USER_PROTECTION; 960 if (area == NULL) 961 return B_NO_MEMORY; 962 963 status_t status; 964 965 // if this is a private map, we need to create a new cache 966 // to handle the private copies of pages as they are written to 967 VMCache* sourceCache = cache; 968 if (mapping == REGION_PRIVATE_MAP) { 969 VMCache* newCache; 970 971 // create an anonymous cache 972 status = VMCacheFactory::CreateAnonymousCache(newCache, 973 (protection & B_STACK_AREA) != 0 974 || (protection & B_OVERCOMMITTING_AREA) != 0, 0, 975 cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER); 976 if (status != B_OK) 977 goto err1; 978 979 newCache->Lock(); 980 newCache->temporary = 1; 981 newCache->virtual_base = offset; 982 newCache->virtual_end = offset + size; 983 984 cache->AddConsumer(newCache); 985 986 cache = newCache; 987 } 988 989 if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) { 990 status = cache->SetMinimalCommitment(size, priority); 991 if (status != B_OK) 992 goto err2; 993 } 994 995 // check to see if this address space has entered DELETE state 996 if (addressSpace->IsBeingDeleted()) { 997 // okay, someone is trying to delete this address space now, so we can't 998 // insert the area, so back out 999 status = B_BAD_TEAM_ID; 1000 goto err2; 1001 } 1002 1003 if (addressRestrictions->address_specification == B_EXACT_ADDRESS 1004 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) { 1005 status = unmap_address_range(addressSpace, 1006 (addr_t)addressRestrictions->address, size, kernel); 1007 if (status != B_OK) 1008 goto err2; 1009 } 1010 1011 status = addressSpace->InsertArea(area, size, addressRestrictions, 1012 allocationFlags, _virtualAddress); 1013 if (status == B_NO_MEMORY 1014 && addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) { 1015 // Due to how many locks are held, we cannot wait here for space to be 1016 // freed up, but we can at least notify the low_resource handler. 1017 low_resource(B_KERNEL_RESOURCE_ADDRESS_SPACE, size, B_RELATIVE_TIMEOUT, 0); 1018 } 1019 if (status != B_OK) 1020 goto err2; 1021 1022 // attach the cache to the area 1023 area->cache = cache; 1024 area->cache_offset = offset; 1025 1026 // point the cache back to the area 1027 cache->InsertAreaLocked(area); 1028 if (mapping == REGION_PRIVATE_MAP) 1029 cache->Unlock(); 1030 1031 // insert the area in the global area hash table 1032 VMAreaHash::Insert(area); 1033 1034 // grab a ref to the address space (the area holds this) 1035 addressSpace->Get(); 1036 1037 // ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p", 1038 // cache, sourceCache, areaName, area); 1039 1040 *_area = area; 1041 return B_OK; 1042 1043 err2: 1044 if (mapping == REGION_PRIVATE_MAP) { 1045 // We created this cache, so we must delete it again. Note, that we 1046 // need to temporarily unlock the source cache or we'll otherwise 1047 // deadlock, since VMCache::_RemoveConsumer() will try to lock it, too. 1048 sourceCache->Unlock(); 1049 cache->ReleaseRefAndUnlock(); 1050 sourceCache->Lock(); 1051 } 1052 err1: 1053 addressSpace->DeleteArea(area, allocationFlags); 1054 return status; 1055 } 1056 1057 1058 /*! Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(), 1059 locker1, locker2). 1060 */ 1061 template<typename LockerType1, typename LockerType2> 1062 static inline bool 1063 wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2) 1064 { 1065 area->cache->AssertLocked(); 1066 1067 VMAreaUnwiredWaiter waiter; 1068 if (!area->AddWaiterIfWired(&waiter)) 1069 return false; 1070 1071 // unlock everything and wait 1072 if (locker1 != NULL) 1073 locker1->Unlock(); 1074 if (locker2 != NULL) 1075 locker2->Unlock(); 1076 1077 waiter.waitEntry.Wait(); 1078 1079 return true; 1080 } 1081 1082 1083 /*! Checks whether the given area has any wired ranges intersecting with the 1084 specified range and waits, if so. 1085 1086 When it has to wait, the function calls \c Unlock() on both \a locker1 1087 and \a locker2, if given. 1088 The area's top cache must be locked and must be unlocked as a side effect 1089 of calling \c Unlock() on either \a locker1 or \a locker2. 1090 1091 If the function does not have to wait it does not modify or unlock any 1092 object. 1093 1094 \param area The area to be checked. 1095 \param base The base address of the range to check. 1096 \param size The size of the address range to check. 1097 \param locker1 An object to be unlocked when before starting to wait (may 1098 be \c NULL). 1099 \param locker2 An object to be unlocked when before starting to wait (may 1100 be \c NULL). 1101 \return \c true, if the function had to wait, \c false otherwise. 1102 */ 1103 template<typename LockerType1, typename LockerType2> 1104 static inline bool 1105 wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size, 1106 LockerType1* locker1, LockerType2* locker2) 1107 { 1108 area->cache->AssertLocked(); 1109 1110 VMAreaUnwiredWaiter waiter; 1111 if (!area->AddWaiterIfWired(&waiter, base, size)) 1112 return false; 1113 1114 // unlock everything and wait 1115 if (locker1 != NULL) 1116 locker1->Unlock(); 1117 if (locker2 != NULL) 1118 locker2->Unlock(); 1119 1120 waiter.waitEntry.Wait(); 1121 1122 return true; 1123 } 1124 1125 1126 /*! Checks whether the given address space has any wired ranges intersecting 1127 with the specified range and waits, if so. 1128 1129 Similar to wait_if_area_range_is_wired(), with the following differences: 1130 - All areas intersecting with the range are checked (respectively all until 1131 one is found that contains a wired range intersecting with the given 1132 range). 1133 - The given address space must at least be read-locked and must be unlocked 1134 when \c Unlock() is called on \a locker. 1135 - None of the areas' caches are allowed to be locked. 1136 */ 1137 template<typename LockerType> 1138 static inline bool 1139 wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base, 1140 size_t size, LockerType* locker) 1141 { 1142 for (VMAddressSpace::AreaRangeIterator it 1143 = addressSpace->GetAreaRangeIterator(base, size); 1144 VMArea* area = it.Next();) { 1145 1146 AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area)); 1147 1148 if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker)) 1149 return true; 1150 } 1151 1152 return false; 1153 } 1154 1155 1156 /*! Prepares an area to be used for vm_set_kernel_area_debug_protection(). 1157 It must be called in a situation where the kernel address space may be 1158 locked. 1159 */ 1160 status_t 1161 vm_prepare_kernel_area_debug_protection(area_id id, void** cookie) 1162 { 1163 AddressSpaceReadLocker locker; 1164 VMArea* area; 1165 status_t status = locker.SetFromArea(id, area); 1166 if (status != B_OK) 1167 return status; 1168 1169 if (area->page_protections == NULL) { 1170 status = allocate_area_page_protections(area); 1171 if (status != B_OK) 1172 return status; 1173 } 1174 1175 *cookie = (void*)area; 1176 return B_OK; 1177 } 1178 1179 1180 /*! This is a debug helper function that can only be used with very specific 1181 use cases. 1182 Sets protection for the given address range to the protection specified. 1183 If \a protection is 0 then the involved pages will be marked non-present 1184 in the translation map to cause a fault on access. The pages aren't 1185 actually unmapped however so that they can be marked present again with 1186 additional calls to this function. For this to work the area must be 1187 fully locked in memory so that the pages aren't otherwise touched. 1188 This function does not lock the kernel address space and needs to be 1189 supplied with a \a cookie retrieved from a successful call to 1190 vm_prepare_kernel_area_debug_protection(). 1191 */ 1192 status_t 1193 vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size, 1194 uint32 protection) 1195 { 1196 // check address range 1197 addr_t address = (addr_t)_address; 1198 size = PAGE_ALIGN(size); 1199 1200 if ((address % B_PAGE_SIZE) != 0 1201 || (addr_t)address + size < (addr_t)address 1202 || !IS_KERNEL_ADDRESS(address) 1203 || !IS_KERNEL_ADDRESS((addr_t)address + size)) { 1204 return B_BAD_VALUE; 1205 } 1206 1207 // Translate the kernel protection to user protection as we only store that. 1208 if ((protection & B_KERNEL_READ_AREA) != 0) 1209 protection |= B_READ_AREA; 1210 if ((protection & B_KERNEL_WRITE_AREA) != 0) 1211 protection |= B_WRITE_AREA; 1212 1213 VMAddressSpace* addressSpace = VMAddressSpace::GetKernel(); 1214 VMTranslationMap* map = addressSpace->TranslationMap(); 1215 VMArea* area = (VMArea*)cookie; 1216 1217 addr_t offset = address - area->Base(); 1218 if (area->Size() - offset < size) { 1219 panic("protect range not fully within supplied area"); 1220 return B_BAD_VALUE; 1221 } 1222 1223 if (area->page_protections == NULL) { 1224 panic("area has no page protections"); 1225 return B_BAD_VALUE; 1226 } 1227 1228 // Invalidate the mapping entries so any access to them will fault or 1229 // restore the mapping entries unchanged so that lookup will success again. 1230 map->Lock(); 1231 map->DebugMarkRangePresent(address, address + size, protection != 0); 1232 map->Unlock(); 1233 1234 // And set the proper page protections so that the fault case will actually 1235 // fail and not simply try to map a new page. 1236 for (addr_t pageAddress = address; pageAddress < address + size; 1237 pageAddress += B_PAGE_SIZE) { 1238 set_area_page_protection(area, pageAddress, protection); 1239 } 1240 1241 return B_OK; 1242 } 1243 1244 1245 status_t 1246 vm_block_address_range(const char* name, void* address, addr_t size) 1247 { 1248 if (!arch_vm_supports_protection(0)) 1249 return B_NOT_SUPPORTED; 1250 1251 AddressSpaceWriteLocker locker; 1252 status_t status = locker.SetTo(VMAddressSpace::KernelID()); 1253 if (status != B_OK) 1254 return status; 1255 1256 VMAddressSpace* addressSpace = locker.AddressSpace(); 1257 1258 // create an anonymous cache 1259 VMCache* cache; 1260 status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false, 1261 VM_PRIORITY_SYSTEM); 1262 if (status != B_OK) 1263 return status; 1264 1265 cache->temporary = 1; 1266 cache->virtual_end = size; 1267 cache->Lock(); 1268 1269 VMArea* area; 1270 virtual_address_restrictions addressRestrictions = {}; 1271 addressRestrictions.address = address; 1272 addressRestrictions.address_specification = B_EXACT_ADDRESS; 1273 status = map_backing_store(addressSpace, cache, 0, name, size, 1274 B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, 0, &addressRestrictions, 1275 true, &area, NULL); 1276 if (status != B_OK) { 1277 cache->ReleaseRefAndUnlock(); 1278 return status; 1279 } 1280 1281 cache->Unlock(); 1282 area->cache_type = CACHE_TYPE_RAM; 1283 return area->id; 1284 } 1285 1286 1287 status_t 1288 vm_unreserve_address_range(team_id team, void* address, addr_t size) 1289 { 1290 AddressSpaceWriteLocker locker(team); 1291 if (!locker.IsLocked()) 1292 return B_BAD_TEAM_ID; 1293 1294 VMAddressSpace* addressSpace = locker.AddressSpace(); 1295 return addressSpace->UnreserveAddressRange((addr_t)address, size, 1296 addressSpace == VMAddressSpace::Kernel() 1297 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0); 1298 } 1299 1300 1301 status_t 1302 vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec, 1303 addr_t size, uint32 flags) 1304 { 1305 if (size == 0) 1306 return B_BAD_VALUE; 1307 1308 AddressSpaceWriteLocker locker(team); 1309 if (!locker.IsLocked()) 1310 return B_BAD_TEAM_ID; 1311 1312 virtual_address_restrictions addressRestrictions = {}; 1313 addressRestrictions.address = *_address; 1314 addressRestrictions.address_specification = addressSpec; 1315 VMAddressSpace* addressSpace = locker.AddressSpace(); 1316 return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags, 1317 addressSpace == VMAddressSpace::Kernel() 1318 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0, 1319 _address); 1320 } 1321 1322 1323 area_id 1324 vm_create_anonymous_area(team_id team, const char *name, addr_t size, 1325 uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize, 1326 const virtual_address_restrictions* virtualAddressRestrictions, 1327 const physical_address_restrictions* physicalAddressRestrictions, 1328 bool kernel, void** _address) 1329 { 1330 VMArea* area; 1331 VMCache* cache; 1332 vm_page* page = NULL; 1333 bool isStack = (protection & B_STACK_AREA) != 0; 1334 page_num_t guardPages; 1335 bool canOvercommit = false; 1336 uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0 1337 ? VM_PAGE_ALLOC_CLEAR : 0; 1338 1339 TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n", 1340 team, name, size)); 1341 1342 size = PAGE_ALIGN(size); 1343 guardSize = PAGE_ALIGN(guardSize); 1344 guardPages = guardSize / B_PAGE_SIZE; 1345 1346 if (size == 0 || size < guardSize) 1347 return B_BAD_VALUE; 1348 if (!arch_vm_supports_protection(protection)) 1349 return B_NOT_SUPPORTED; 1350 1351 if (team == B_CURRENT_TEAM) 1352 team = VMAddressSpace::CurrentID(); 1353 if (team < 0) 1354 return B_BAD_TEAM_ID; 1355 1356 if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0) 1357 canOvercommit = true; 1358 1359 #ifdef DEBUG_KERNEL_STACKS 1360 if ((protection & B_KERNEL_STACK_AREA) != 0) 1361 isStack = true; 1362 #endif 1363 1364 // check parameters 1365 switch (virtualAddressRestrictions->address_specification) { 1366 case B_ANY_ADDRESS: 1367 case B_EXACT_ADDRESS: 1368 case B_BASE_ADDRESS: 1369 case B_ANY_KERNEL_ADDRESS: 1370 case B_ANY_KERNEL_BLOCK_ADDRESS: 1371 case B_RANDOMIZED_ANY_ADDRESS: 1372 case B_RANDOMIZED_BASE_ADDRESS: 1373 break; 1374 1375 default: 1376 return B_BAD_VALUE; 1377 } 1378 1379 // If low or high physical address restrictions are given, we force 1380 // B_CONTIGUOUS wiring, since only then we'll use 1381 // vm_page_allocate_page_run() which deals with those restrictions. 1382 if (physicalAddressRestrictions->low_address != 0 1383 || physicalAddressRestrictions->high_address != 0) { 1384 wiring = B_CONTIGUOUS; 1385 } 1386 1387 physical_address_restrictions stackPhysicalRestrictions; 1388 bool doReserveMemory = false; 1389 switch (wiring) { 1390 case B_NO_LOCK: 1391 break; 1392 case B_FULL_LOCK: 1393 case B_LAZY_LOCK: 1394 case B_CONTIGUOUS: 1395 doReserveMemory = true; 1396 break; 1397 case B_ALREADY_WIRED: 1398 break; 1399 case B_LOMEM: 1400 stackPhysicalRestrictions = *physicalAddressRestrictions; 1401 stackPhysicalRestrictions.high_address = 16 * 1024 * 1024; 1402 physicalAddressRestrictions = &stackPhysicalRestrictions; 1403 wiring = B_CONTIGUOUS; 1404 doReserveMemory = true; 1405 break; 1406 case B_32_BIT_FULL_LOCK: 1407 if (B_HAIKU_PHYSICAL_BITS <= 32 1408 || (uint64)vm_page_max_address() < (uint64)1 << 32) { 1409 wiring = B_FULL_LOCK; 1410 doReserveMemory = true; 1411 break; 1412 } 1413 // TODO: We don't really support this mode efficiently. Just fall 1414 // through for now ... 1415 case B_32_BIT_CONTIGUOUS: 1416 #if B_HAIKU_PHYSICAL_BITS > 32 1417 if (vm_page_max_address() >= (phys_addr_t)1 << 32) { 1418 stackPhysicalRestrictions = *physicalAddressRestrictions; 1419 stackPhysicalRestrictions.high_address 1420 = (phys_addr_t)1 << 32; 1421 physicalAddressRestrictions = &stackPhysicalRestrictions; 1422 } 1423 #endif 1424 wiring = B_CONTIGUOUS; 1425 doReserveMemory = true; 1426 break; 1427 default: 1428 return B_BAD_VALUE; 1429 } 1430 1431 // Optimization: For a single-page contiguous allocation without low/high 1432 // memory restriction B_FULL_LOCK wiring suffices. 1433 if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE 1434 && physicalAddressRestrictions->low_address == 0 1435 && physicalAddressRestrictions->high_address == 0) { 1436 wiring = B_FULL_LOCK; 1437 } 1438 1439 // For full lock or contiguous areas we're also going to map the pages and 1440 // thus need to reserve pages for the mapping backend upfront. 1441 addr_t reservedMapPages = 0; 1442 if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) { 1443 AddressSpaceWriteLocker locker; 1444 status_t status = locker.SetTo(team); 1445 if (status != B_OK) 1446 return status; 1447 1448 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1449 reservedMapPages = map->MaxPagesNeededToMap(0, size - 1); 1450 } 1451 1452 int priority; 1453 if (team != VMAddressSpace::KernelID()) 1454 priority = VM_PRIORITY_USER; 1455 else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) 1456 priority = VM_PRIORITY_VIP; 1457 else 1458 priority = VM_PRIORITY_SYSTEM; 1459 1460 // Reserve memory before acquiring the address space lock. This reduces the 1461 // chances of failure, since while holding the write lock to the address 1462 // space (if it is the kernel address space that is), the low memory handler 1463 // won't be able to free anything for us. 1464 addr_t reservedMemory = 0; 1465 if (doReserveMemory) { 1466 bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000; 1467 if (vm_try_reserve_memory(size, priority, timeout) != B_OK) 1468 return B_NO_MEMORY; 1469 reservedMemory = size; 1470 // TODO: We don't reserve the memory for the pages for the page 1471 // directories/tables. We actually need to do since we currently don't 1472 // reclaim them (and probably can't reclaim all of them anyway). Thus 1473 // there are actually less physical pages than there should be, which 1474 // can get the VM into trouble in low memory situations. 1475 } 1476 1477 AddressSpaceWriteLocker locker; 1478 VMAddressSpace* addressSpace; 1479 status_t status; 1480 1481 // For full lock areas reserve the pages before locking the address 1482 // space. E.g. block caches can't release their memory while we hold the 1483 // address space lock. 1484 page_num_t reservedPages = reservedMapPages; 1485 if (wiring == B_FULL_LOCK) 1486 reservedPages += size / B_PAGE_SIZE; 1487 1488 vm_page_reservation reservation; 1489 if (reservedPages > 0) { 1490 if ((flags & CREATE_AREA_DONT_WAIT) != 0) { 1491 if (!vm_page_try_reserve_pages(&reservation, reservedPages, 1492 priority)) { 1493 reservedPages = 0; 1494 status = B_WOULD_BLOCK; 1495 goto err0; 1496 } 1497 } else 1498 vm_page_reserve_pages(&reservation, reservedPages, priority); 1499 } 1500 1501 if (wiring == B_CONTIGUOUS) { 1502 // we try to allocate the page run here upfront as this may easily 1503 // fail for obvious reasons 1504 page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags, 1505 size / B_PAGE_SIZE, physicalAddressRestrictions, priority); 1506 if (page == NULL) { 1507 status = B_NO_MEMORY; 1508 goto err0; 1509 } 1510 } 1511 1512 // Lock the address space and, if B_EXACT_ADDRESS and 1513 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1514 // is not wired. 1515 do { 1516 status = locker.SetTo(team); 1517 if (status != B_OK) 1518 goto err1; 1519 1520 addressSpace = locker.AddressSpace(); 1521 } while (virtualAddressRestrictions->address_specification 1522 == B_EXACT_ADDRESS 1523 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1524 && wait_if_address_range_is_wired(addressSpace, 1525 (addr_t)virtualAddressRestrictions->address, size, &locker)); 1526 1527 // create an anonymous cache 1528 // if it's a stack, make sure that two pages are available at least 1529 status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit, 1530 isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages, 1531 wiring == B_NO_LOCK, priority); 1532 if (status != B_OK) 1533 goto err1; 1534 1535 cache->temporary = 1; 1536 cache->virtual_end = size; 1537 cache->committed_size = reservedMemory; 1538 // TODO: This should be done via a method. 1539 reservedMemory = 0; 1540 1541 cache->Lock(); 1542 1543 status = map_backing_store(addressSpace, cache, 0, name, size, wiring, 1544 protection, 0, REGION_NO_PRIVATE_MAP, flags, 1545 virtualAddressRestrictions, kernel, &area, _address); 1546 1547 if (status != B_OK) { 1548 cache->ReleaseRefAndUnlock(); 1549 goto err1; 1550 } 1551 1552 locker.DegradeToReadLock(); 1553 1554 switch (wiring) { 1555 case B_NO_LOCK: 1556 case B_LAZY_LOCK: 1557 // do nothing - the pages are mapped in as needed 1558 break; 1559 1560 case B_FULL_LOCK: 1561 { 1562 // Allocate and map all pages for this area 1563 1564 off_t offset = 0; 1565 for (addr_t address = area->Base(); 1566 address < area->Base() + (area->Size() - 1); 1567 address += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1568 #ifdef DEBUG_KERNEL_STACKS 1569 # ifdef STACK_GROWS_DOWNWARDS 1570 if (isStack && address < area->Base() 1571 + KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1572 # else 1573 if (isStack && address >= area->Base() + area->Size() 1574 - KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE) 1575 # endif 1576 continue; 1577 #endif 1578 vm_page* page = vm_page_allocate_page(&reservation, 1579 PAGE_STATE_WIRED | pageAllocFlags); 1580 cache->InsertPage(page, offset); 1581 map_page(area, page, address, protection, &reservation); 1582 1583 DEBUG_PAGE_ACCESS_END(page); 1584 } 1585 1586 break; 1587 } 1588 1589 case B_ALREADY_WIRED: 1590 { 1591 // The pages should already be mapped. This is only really useful 1592 // during boot time. Find the appropriate vm_page objects and stick 1593 // them in the cache object. 1594 VMTranslationMap* map = addressSpace->TranslationMap(); 1595 off_t offset = 0; 1596 1597 if (!gKernelStartup) 1598 panic("ALREADY_WIRED flag used outside kernel startup\n"); 1599 1600 map->Lock(); 1601 1602 for (addr_t virtualAddress = area->Base(); 1603 virtualAddress < area->Base() + (area->Size() - 1); 1604 virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) { 1605 phys_addr_t physicalAddress; 1606 uint32 flags; 1607 status = map->Query(virtualAddress, &physicalAddress, &flags); 1608 if (status < B_OK) { 1609 panic("looking up mapping failed for va 0x%lx\n", 1610 virtualAddress); 1611 } 1612 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1613 if (page == NULL) { 1614 panic("looking up page failed for pa %#" B_PRIxPHYSADDR 1615 "\n", physicalAddress); 1616 } 1617 1618 DEBUG_PAGE_ACCESS_START(page); 1619 1620 cache->InsertPage(page, offset); 1621 increment_page_wired_count(page); 1622 vm_page_set_state(page, PAGE_STATE_WIRED); 1623 page->busy = false; 1624 1625 DEBUG_PAGE_ACCESS_END(page); 1626 } 1627 1628 map->Unlock(); 1629 break; 1630 } 1631 1632 case B_CONTIGUOUS: 1633 { 1634 // We have already allocated our continuous pages run, so we can now 1635 // just map them in the address space 1636 VMTranslationMap* map = addressSpace->TranslationMap(); 1637 phys_addr_t physicalAddress 1638 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 1639 addr_t virtualAddress = area->Base(); 1640 off_t offset = 0; 1641 1642 map->Lock(); 1643 1644 for (virtualAddress = area->Base(); virtualAddress < area->Base() 1645 + (area->Size() - 1); virtualAddress += B_PAGE_SIZE, 1646 offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) { 1647 page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 1648 if (page == NULL) 1649 panic("couldn't lookup physical page just allocated\n"); 1650 1651 status = map->Map(virtualAddress, physicalAddress, protection, 1652 area->MemoryType(), &reservation); 1653 if (status < B_OK) 1654 panic("couldn't map physical page in page run\n"); 1655 1656 cache->InsertPage(page, offset); 1657 increment_page_wired_count(page); 1658 1659 DEBUG_PAGE_ACCESS_END(page); 1660 } 1661 1662 map->Unlock(); 1663 break; 1664 } 1665 1666 default: 1667 break; 1668 } 1669 1670 cache->Unlock(); 1671 1672 if (reservedPages > 0) 1673 vm_page_unreserve_pages(&reservation); 1674 1675 TRACE(("vm_create_anonymous_area: done\n")); 1676 1677 area->cache_type = CACHE_TYPE_RAM; 1678 return area->id; 1679 1680 err1: 1681 if (wiring == B_CONTIGUOUS) { 1682 // we had reserved the area space upfront... 1683 phys_addr_t pageNumber = page->physical_page_number; 1684 int32 i; 1685 for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) { 1686 page = vm_lookup_page(pageNumber); 1687 if (page == NULL) 1688 panic("couldn't lookup physical page just allocated\n"); 1689 1690 vm_page_set_state(page, PAGE_STATE_FREE); 1691 } 1692 } 1693 1694 err0: 1695 if (reservedPages > 0) 1696 vm_page_unreserve_pages(&reservation); 1697 if (reservedMemory > 0) 1698 vm_unreserve_memory(reservedMemory); 1699 1700 return status; 1701 } 1702 1703 1704 area_id 1705 vm_map_physical_memory(team_id team, const char* name, void** _address, 1706 uint32 addressSpec, addr_t size, uint32 protection, 1707 phys_addr_t physicalAddress, bool alreadyWired) 1708 { 1709 VMArea* area; 1710 VMCache* cache; 1711 addr_t mapOffset; 1712 1713 TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p" 1714 ", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %" 1715 B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address, 1716 addressSpec, size, protection, physicalAddress)); 1717 1718 if (!arch_vm_supports_protection(protection)) 1719 return B_NOT_SUPPORTED; 1720 1721 AddressSpaceWriteLocker locker(team); 1722 if (!locker.IsLocked()) 1723 return B_BAD_TEAM_ID; 1724 1725 // if the physical address is somewhat inside a page, 1726 // move the actual area down to align on a page boundary 1727 mapOffset = physicalAddress % B_PAGE_SIZE; 1728 size += mapOffset; 1729 physicalAddress -= mapOffset; 1730 1731 size = PAGE_ALIGN(size); 1732 1733 // create a device cache 1734 status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress); 1735 if (status != B_OK) 1736 return status; 1737 1738 cache->virtual_end = size; 1739 1740 cache->Lock(); 1741 1742 virtual_address_restrictions addressRestrictions = {}; 1743 addressRestrictions.address = *_address; 1744 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1745 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1746 B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions, 1747 true, &area, _address); 1748 1749 if (status < B_OK) 1750 cache->ReleaseRefLocked(); 1751 1752 cache->Unlock(); 1753 1754 if (status == B_OK) { 1755 // set requested memory type -- use uncached, if not given 1756 uint32 memoryType = addressSpec & B_MTR_MASK; 1757 if (memoryType == 0) 1758 memoryType = B_MTR_UC; 1759 1760 area->SetMemoryType(memoryType); 1761 1762 status = arch_vm_set_memory_type(area, physicalAddress, memoryType); 1763 if (status != B_OK) 1764 delete_area(locker.AddressSpace(), area, false); 1765 } 1766 1767 if (status != B_OK) 1768 return status; 1769 1770 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1771 1772 if (alreadyWired) { 1773 // The area is already mapped, but possibly not with the right 1774 // memory type. 1775 map->Lock(); 1776 map->ProtectArea(area, area->protection); 1777 map->Unlock(); 1778 } else { 1779 // Map the area completely. 1780 1781 // reserve pages needed for the mapping 1782 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1783 area->Base() + (size - 1)); 1784 vm_page_reservation reservation; 1785 vm_page_reserve_pages(&reservation, reservePages, 1786 team == VMAddressSpace::KernelID() 1787 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1788 1789 map->Lock(); 1790 1791 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1792 map->Map(area->Base() + offset, physicalAddress + offset, 1793 protection, area->MemoryType(), &reservation); 1794 } 1795 1796 map->Unlock(); 1797 1798 vm_page_unreserve_pages(&reservation); 1799 } 1800 1801 // modify the pointer returned to be offset back into the new area 1802 // the same way the physical address in was offset 1803 *_address = (void*)((addr_t)*_address + mapOffset); 1804 1805 area->cache_type = CACHE_TYPE_DEVICE; 1806 return area->id; 1807 } 1808 1809 1810 /*! Don't use! 1811 TODO: This function was introduced to map physical page vecs to 1812 contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does 1813 use a device cache and does not track vm_page::wired_count! 1814 */ 1815 area_id 1816 vm_map_physical_memory_vecs(team_id team, const char* name, void** _address, 1817 uint32 addressSpec, addr_t* _size, uint32 protection, 1818 struct generic_io_vec* vecs, uint32 vecCount) 1819 { 1820 TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual " 1821 "= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", " 1822 "vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address, 1823 addressSpec, _size, protection, vecs, vecCount)); 1824 1825 if (!arch_vm_supports_protection(protection) 1826 || (addressSpec & B_MTR_MASK) != 0) { 1827 return B_NOT_SUPPORTED; 1828 } 1829 1830 AddressSpaceWriteLocker locker(team); 1831 if (!locker.IsLocked()) 1832 return B_BAD_TEAM_ID; 1833 1834 if (vecCount == 0) 1835 return B_BAD_VALUE; 1836 1837 addr_t size = 0; 1838 for (uint32 i = 0; i < vecCount; i++) { 1839 if (vecs[i].base % B_PAGE_SIZE != 0 1840 || vecs[i].length % B_PAGE_SIZE != 0) { 1841 return B_BAD_VALUE; 1842 } 1843 1844 size += vecs[i].length; 1845 } 1846 1847 // create a device cache 1848 VMCache* cache; 1849 status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base); 1850 if (result != B_OK) 1851 return result; 1852 1853 cache->virtual_end = size; 1854 1855 cache->Lock(); 1856 1857 VMArea* area; 1858 virtual_address_restrictions addressRestrictions = {}; 1859 addressRestrictions.address = *_address; 1860 addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK; 1861 result = map_backing_store(locker.AddressSpace(), cache, 0, name, 1862 size, B_FULL_LOCK, protection, 0, REGION_NO_PRIVATE_MAP, 0, 1863 &addressRestrictions, true, &area, _address); 1864 1865 if (result != B_OK) 1866 cache->ReleaseRefLocked(); 1867 1868 cache->Unlock(); 1869 1870 if (result != B_OK) 1871 return result; 1872 1873 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 1874 size_t reservePages = map->MaxPagesNeededToMap(area->Base(), 1875 area->Base() + (size - 1)); 1876 1877 vm_page_reservation reservation; 1878 vm_page_reserve_pages(&reservation, reservePages, 1879 team == VMAddressSpace::KernelID() 1880 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 1881 map->Lock(); 1882 1883 uint32 vecIndex = 0; 1884 size_t vecOffset = 0; 1885 for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) { 1886 while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) { 1887 vecOffset = 0; 1888 vecIndex++; 1889 } 1890 1891 if (vecIndex >= vecCount) 1892 break; 1893 1894 map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset, 1895 protection, area->MemoryType(), &reservation); 1896 1897 vecOffset += B_PAGE_SIZE; 1898 } 1899 1900 map->Unlock(); 1901 vm_page_unreserve_pages(&reservation); 1902 1903 if (_size != NULL) 1904 *_size = size; 1905 1906 area->cache_type = CACHE_TYPE_DEVICE; 1907 return area->id; 1908 } 1909 1910 1911 area_id 1912 vm_create_null_area(team_id team, const char* name, void** address, 1913 uint32 addressSpec, addr_t size, uint32 flags) 1914 { 1915 size = PAGE_ALIGN(size); 1916 1917 // Lock the address space and, if B_EXACT_ADDRESS and 1918 // CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range 1919 // is not wired. 1920 AddressSpaceWriteLocker locker; 1921 do { 1922 if (locker.SetTo(team) != B_OK) 1923 return B_BAD_TEAM_ID; 1924 } while (addressSpec == B_EXACT_ADDRESS 1925 && (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0 1926 && wait_if_address_range_is_wired(locker.AddressSpace(), 1927 (addr_t)*address, size, &locker)); 1928 1929 // create a null cache 1930 int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0 1931 ? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM; 1932 VMCache* cache; 1933 status_t status = VMCacheFactory::CreateNullCache(priority, cache); 1934 if (status != B_OK) 1935 return status; 1936 1937 cache->temporary = 1; 1938 cache->virtual_end = size; 1939 1940 cache->Lock(); 1941 1942 VMArea* area; 1943 virtual_address_restrictions addressRestrictions = {}; 1944 addressRestrictions.address = *address; 1945 addressRestrictions.address_specification = addressSpec; 1946 status = map_backing_store(locker.AddressSpace(), cache, 0, name, size, 1947 B_LAZY_LOCK, B_KERNEL_READ_AREA, B_KERNEL_READ_AREA, 1948 REGION_NO_PRIVATE_MAP, flags, 1949 &addressRestrictions, true, &area, address); 1950 1951 if (status < B_OK) { 1952 cache->ReleaseRefAndUnlock(); 1953 return status; 1954 } 1955 1956 cache->Unlock(); 1957 1958 area->cache_type = CACHE_TYPE_NULL; 1959 return area->id; 1960 } 1961 1962 1963 /*! Creates the vnode cache for the specified \a vnode. 1964 The vnode has to be marked busy when calling this function. 1965 */ 1966 status_t 1967 vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache) 1968 { 1969 return VMCacheFactory::CreateVnodeCache(*cache, vnode); 1970 } 1971 1972 1973 /*! \a cache must be locked. The area's address space must be read-locked. 1974 */ 1975 static void 1976 pre_map_area_pages(VMArea* area, VMCache* cache, 1977 vm_page_reservation* reservation) 1978 { 1979 addr_t baseAddress = area->Base(); 1980 addr_t cacheOffset = area->cache_offset; 1981 page_num_t firstPage = cacheOffset / B_PAGE_SIZE; 1982 page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE; 1983 1984 for (VMCachePagesTree::Iterator it 1985 = cache->pages.GetIterator(firstPage, true, true); 1986 vm_page* page = it.Next();) { 1987 if (page->cache_offset >= endPage) 1988 break; 1989 1990 // skip busy and inactive pages 1991 if (page->busy || page->usage_count == 0) 1992 continue; 1993 1994 DEBUG_PAGE_ACCESS_START(page); 1995 map_page(area, page, 1996 baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset), 1997 B_READ_AREA | B_KERNEL_READ_AREA, reservation); 1998 DEBUG_PAGE_ACCESS_END(page); 1999 } 2000 } 2001 2002 2003 /*! Will map the file specified by \a fd to an area in memory. 2004 The file will be mirrored beginning at the specified \a offset. The 2005 \a offset and \a size arguments have to be page aligned. 2006 */ 2007 static area_id 2008 _vm_map_file(team_id team, const char* name, void** _address, 2009 uint32 addressSpec, size_t size, uint32 protection, uint32 mapping, 2010 bool unmapAddressRange, int fd, off_t offset, bool kernel) 2011 { 2012 // TODO: for binary files, we want to make sure that they get the 2013 // copy of a file at a given time, ie. later changes should not 2014 // make it into the mapped copy -- this will need quite some changes 2015 // to be done in a nice way 2016 TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping " 2017 "%" B_PRIu32 ")\n", fd, offset, size, mapping)); 2018 2019 offset = ROUNDDOWN(offset, B_PAGE_SIZE); 2020 size = PAGE_ALIGN(size); 2021 2022 if (mapping == REGION_NO_PRIVATE_MAP) 2023 protection |= B_SHARED_AREA; 2024 if (addressSpec != B_EXACT_ADDRESS) 2025 unmapAddressRange = false; 2026 2027 if (fd < 0) { 2028 uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0; 2029 virtual_address_restrictions virtualRestrictions = {}; 2030 virtualRestrictions.address = *_address; 2031 virtualRestrictions.address_specification = addressSpec; 2032 physical_address_restrictions physicalRestrictions = {}; 2033 return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection, 2034 flags, 0, &virtualRestrictions, &physicalRestrictions, kernel, 2035 _address); 2036 } 2037 2038 // get the open flags of the FD 2039 file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd); 2040 if (descriptor == NULL) 2041 return EBADF; 2042 int32 openMode = descriptor->open_mode; 2043 put_fd(descriptor); 2044 2045 // The FD must open for reading at any rate. For shared mapping with write 2046 // access, additionally the FD must be open for writing. 2047 if ((openMode & O_ACCMODE) == O_WRONLY 2048 || (mapping == REGION_NO_PRIVATE_MAP 2049 && (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0 2050 && (openMode & O_ACCMODE) == O_RDONLY)) { 2051 return EACCES; 2052 } 2053 2054 uint32 protectionMax = 0; 2055 if (mapping != REGION_PRIVATE_MAP) { 2056 protectionMax = protection | B_READ_AREA; 2057 if ((openMode & O_ACCMODE) == O_RDWR) 2058 protectionMax |= B_WRITE_AREA; 2059 } 2060 2061 // get the vnode for the object, this also grabs a ref to it 2062 struct vnode* vnode = NULL; 2063 status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode); 2064 if (status < B_OK) 2065 return status; 2066 VnodePutter vnodePutter(vnode); 2067 2068 // If we're going to pre-map pages, we need to reserve the pages needed by 2069 // the mapping backend upfront. 2070 page_num_t reservedPreMapPages = 0; 2071 vm_page_reservation reservation; 2072 if ((protection & B_READ_AREA) != 0) { 2073 AddressSpaceWriteLocker locker; 2074 status = locker.SetTo(team); 2075 if (status != B_OK) 2076 return status; 2077 2078 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 2079 reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1); 2080 2081 locker.Unlock(); 2082 2083 vm_page_reserve_pages(&reservation, reservedPreMapPages, 2084 team == VMAddressSpace::KernelID() 2085 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2086 } 2087 2088 struct PageUnreserver { 2089 PageUnreserver(vm_page_reservation* reservation) 2090 : 2091 fReservation(reservation) 2092 { 2093 } 2094 2095 ~PageUnreserver() 2096 { 2097 if (fReservation != NULL) 2098 vm_page_unreserve_pages(fReservation); 2099 } 2100 2101 vm_page_reservation* fReservation; 2102 } pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL); 2103 2104 // Lock the address space and, if the specified address range shall be 2105 // unmapped, ensure it is not wired. 2106 AddressSpaceWriteLocker locker; 2107 do { 2108 if (locker.SetTo(team) != B_OK) 2109 return B_BAD_TEAM_ID; 2110 } while (unmapAddressRange 2111 && wait_if_address_range_is_wired(locker.AddressSpace(), 2112 (addr_t)*_address, size, &locker)); 2113 2114 // TODO: this only works for file systems that use the file cache 2115 VMCache* cache; 2116 status = vfs_get_vnode_cache(vnode, &cache, false); 2117 if (status < B_OK) 2118 return status; 2119 2120 cache->Lock(); 2121 2122 VMArea* area; 2123 virtual_address_restrictions addressRestrictions = {}; 2124 addressRestrictions.address = *_address; 2125 addressRestrictions.address_specification = addressSpec; 2126 status = map_backing_store(locker.AddressSpace(), cache, offset, name, size, 2127 0, protection, protectionMax, mapping, 2128 unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0, 2129 &addressRestrictions, kernel, &area, _address); 2130 2131 if (status != B_OK || mapping == REGION_PRIVATE_MAP) { 2132 // map_backing_store() cannot know we no longer need the ref 2133 cache->ReleaseRefLocked(); 2134 } 2135 2136 if (status == B_OK && (protection & B_READ_AREA) != 0) 2137 pre_map_area_pages(area, cache, &reservation); 2138 2139 cache->Unlock(); 2140 2141 if (status == B_OK) { 2142 // TODO: this probably deserves a smarter solution, ie. don't always 2143 // prefetch stuff, and also, probably don't trigger it at this place. 2144 cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024)); 2145 // prefetches at max 10 MB starting from "offset" 2146 } 2147 2148 if (status != B_OK) 2149 return status; 2150 2151 area->cache_type = CACHE_TYPE_VNODE; 2152 return area->id; 2153 } 2154 2155 2156 area_id 2157 vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec, 2158 addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 2159 int fd, off_t offset) 2160 { 2161 if (!arch_vm_supports_protection(protection)) 2162 return B_NOT_SUPPORTED; 2163 2164 return _vm_map_file(aid, name, address, addressSpec, size, protection, 2165 mapping, unmapAddressRange, fd, offset, true); 2166 } 2167 2168 2169 VMCache* 2170 vm_area_get_locked_cache(VMArea* area) 2171 { 2172 rw_lock_read_lock(&sAreaCacheLock); 2173 2174 while (true) { 2175 VMCache* cache = area->cache; 2176 2177 if (!cache->SwitchFromReadLock(&sAreaCacheLock)) { 2178 // cache has been deleted 2179 rw_lock_read_lock(&sAreaCacheLock); 2180 continue; 2181 } 2182 2183 rw_lock_read_lock(&sAreaCacheLock); 2184 2185 if (cache == area->cache) { 2186 cache->AcquireRefLocked(); 2187 rw_lock_read_unlock(&sAreaCacheLock); 2188 return cache; 2189 } 2190 2191 // the cache changed in the meantime 2192 cache->Unlock(); 2193 } 2194 } 2195 2196 2197 void 2198 vm_area_put_locked_cache(VMCache* cache) 2199 { 2200 cache->ReleaseRefAndUnlock(); 2201 } 2202 2203 2204 area_id 2205 vm_clone_area(team_id team, const char* name, void** address, 2206 uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID, 2207 bool kernel) 2208 { 2209 VMArea* newArea = NULL; 2210 VMArea* sourceArea; 2211 2212 // Check whether the source area exists and is cloneable. If so, mark it 2213 // B_SHARED_AREA, so that we don't get problems with copy-on-write. 2214 { 2215 AddressSpaceWriteLocker locker; 2216 status_t status = locker.SetFromArea(sourceID, sourceArea); 2217 if (status != B_OK) 2218 return status; 2219 2220 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2221 return B_NOT_ALLOWED; 2222 2223 sourceArea->protection |= B_SHARED_AREA; 2224 protection |= B_SHARED_AREA; 2225 } 2226 2227 // Now lock both address spaces and actually do the cloning. 2228 2229 MultiAddressSpaceLocker locker; 2230 VMAddressSpace* sourceAddressSpace; 2231 status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace); 2232 if (status != B_OK) 2233 return status; 2234 2235 VMAddressSpace* targetAddressSpace; 2236 status = locker.AddTeam(team, true, &targetAddressSpace); 2237 if (status != B_OK) 2238 return status; 2239 2240 status = locker.Lock(); 2241 if (status != B_OK) 2242 return status; 2243 2244 sourceArea = lookup_area(sourceAddressSpace, sourceID); 2245 if (sourceArea == NULL) 2246 return B_BAD_VALUE; 2247 2248 if (!kernel && (sourceArea->protection & B_KERNEL_AREA) != 0) 2249 return B_NOT_ALLOWED; 2250 2251 VMCache* cache = vm_area_get_locked_cache(sourceArea); 2252 2253 if (!kernel && sourceAddressSpace != targetAddressSpace 2254 && (sourceArea->protection & B_CLONEABLE_AREA) == 0) { 2255 #if KDEBUG 2256 Team* team = thread_get_current_thread()->team; 2257 dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%" 2258 B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID); 2259 #endif 2260 status = B_NOT_ALLOWED; 2261 } else if (sourceArea->cache_type == CACHE_TYPE_NULL) { 2262 status = B_NOT_ALLOWED; 2263 } else { 2264 virtual_address_restrictions addressRestrictions = {}; 2265 addressRestrictions.address = *address; 2266 addressRestrictions.address_specification = addressSpec; 2267 status = map_backing_store(targetAddressSpace, cache, 2268 sourceArea->cache_offset, name, sourceArea->Size(), 2269 sourceArea->wiring, protection, sourceArea->protection_max, 2270 mapping, 0, &addressRestrictions, 2271 kernel, &newArea, address); 2272 } 2273 if (status == B_OK && mapping != REGION_PRIVATE_MAP) { 2274 // If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed 2275 // to create a new cache, and has therefore already acquired a reference 2276 // to the source cache - but otherwise it has no idea that we need 2277 // one. 2278 cache->AcquireRefLocked(); 2279 } 2280 if (status == B_OK && newArea->wiring == B_FULL_LOCK) { 2281 // we need to map in everything at this point 2282 if (sourceArea->cache_type == CACHE_TYPE_DEVICE) { 2283 // we don't have actual pages to map but a physical area 2284 VMTranslationMap* map 2285 = sourceArea->address_space->TranslationMap(); 2286 map->Lock(); 2287 2288 phys_addr_t physicalAddress; 2289 uint32 oldProtection; 2290 map->Query(sourceArea->Base(), &physicalAddress, &oldProtection); 2291 2292 map->Unlock(); 2293 2294 map = targetAddressSpace->TranslationMap(); 2295 size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(), 2296 newArea->Base() + (newArea->Size() - 1)); 2297 2298 vm_page_reservation reservation; 2299 vm_page_reserve_pages(&reservation, reservePages, 2300 targetAddressSpace == VMAddressSpace::Kernel() 2301 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2302 map->Lock(); 2303 2304 for (addr_t offset = 0; offset < newArea->Size(); 2305 offset += B_PAGE_SIZE) { 2306 map->Map(newArea->Base() + offset, physicalAddress + offset, 2307 protection, newArea->MemoryType(), &reservation); 2308 } 2309 2310 map->Unlock(); 2311 vm_page_unreserve_pages(&reservation); 2312 } else { 2313 VMTranslationMap* map = targetAddressSpace->TranslationMap(); 2314 size_t reservePages = map->MaxPagesNeededToMap( 2315 newArea->Base(), newArea->Base() + (newArea->Size() - 1)); 2316 vm_page_reservation reservation; 2317 vm_page_reserve_pages(&reservation, reservePages, 2318 targetAddressSpace == VMAddressSpace::Kernel() 2319 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2320 2321 // map in all pages from source 2322 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2323 vm_page* page = it.Next();) { 2324 if (!page->busy) { 2325 DEBUG_PAGE_ACCESS_START(page); 2326 map_page(newArea, page, 2327 newArea->Base() + ((page->cache_offset << PAGE_SHIFT) 2328 - newArea->cache_offset), 2329 protection, &reservation); 2330 DEBUG_PAGE_ACCESS_END(page); 2331 } 2332 } 2333 // TODO: B_FULL_LOCK means that all pages are locked. We are not 2334 // ensuring that! 2335 2336 vm_page_unreserve_pages(&reservation); 2337 } 2338 } 2339 if (status == B_OK) 2340 newArea->cache_type = sourceArea->cache_type; 2341 2342 vm_area_put_locked_cache(cache); 2343 2344 if (status < B_OK) 2345 return status; 2346 2347 return newArea->id; 2348 } 2349 2350 2351 /*! Deletes the specified area of the given address space. 2352 2353 The address space must be write-locked. 2354 The caller must ensure that the area does not have any wired ranges. 2355 2356 \param addressSpace The address space containing the area. 2357 \param area The area to be deleted. 2358 \param deletingAddressSpace \c true, if the address space is in the process 2359 of being deleted. 2360 */ 2361 static void 2362 delete_area(VMAddressSpace* addressSpace, VMArea* area, 2363 bool deletingAddressSpace) 2364 { 2365 ASSERT(!area->IsWired()); 2366 2367 VMAreaHash::Remove(area); 2368 2369 // At this point the area is removed from the global hash table, but 2370 // still exists in the area list. 2371 2372 // Unmap the virtual address space the area occupied. 2373 { 2374 // We need to lock the complete cache chain. 2375 VMCache* topCache = vm_area_get_locked_cache(area); 2376 VMCacheChainLocker cacheChainLocker(topCache); 2377 cacheChainLocker.LockAllSourceCaches(); 2378 2379 // If the area's top cache is a temporary cache and the area is the only 2380 // one referencing it (besides us currently holding a second reference), 2381 // the unmapping code doesn't need to care about preserving the accessed 2382 // and dirty flags of the top cache page mappings. 2383 bool ignoreTopCachePageFlags 2384 = topCache->temporary && topCache->RefCount() == 2; 2385 2386 area->address_space->TranslationMap()->UnmapArea(area, 2387 deletingAddressSpace, ignoreTopCachePageFlags); 2388 } 2389 2390 if (!area->cache->temporary) 2391 area->cache->WriteModified(); 2392 2393 uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel() 2394 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 2395 2396 arch_vm_unset_memory_type(area); 2397 addressSpace->RemoveArea(area, allocationFlags); 2398 addressSpace->Put(); 2399 2400 area->cache->RemoveArea(area); 2401 area->cache->ReleaseRef(); 2402 2403 addressSpace->DeleteArea(area, allocationFlags); 2404 } 2405 2406 2407 status_t 2408 vm_delete_area(team_id team, area_id id, bool kernel) 2409 { 2410 TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n", 2411 team, id)); 2412 2413 // lock the address space and make sure the area isn't wired 2414 AddressSpaceWriteLocker locker; 2415 VMArea* area; 2416 AreaCacheLocker cacheLocker; 2417 2418 do { 2419 status_t status = locker.SetFromArea(team, id, area); 2420 if (status != B_OK) 2421 return status; 2422 2423 cacheLocker.SetTo(area); 2424 } while (wait_if_area_is_wired(area, &locker, &cacheLocker)); 2425 2426 cacheLocker.Unlock(); 2427 2428 if (!kernel && (area->protection & B_KERNEL_AREA) != 0) 2429 return B_NOT_ALLOWED; 2430 2431 delete_area(locker.AddressSpace(), area, false); 2432 return B_OK; 2433 } 2434 2435 2436 /*! Creates a new cache on top of given cache, moves all areas from 2437 the old cache to the new one, and changes the protection of all affected 2438 areas' pages to read-only. If requested, wired pages are moved up to the 2439 new cache and copies are added to the old cache in their place. 2440 Preconditions: 2441 - The given cache must be locked. 2442 - All of the cache's areas' address spaces must be read locked. 2443 - Either the cache must not have any wired ranges or a page reservation for 2444 all wired pages must be provided, so they can be copied. 2445 2446 \param lowerCache The cache on top of which a new cache shall be created. 2447 \param wiredPagesReservation If \c NULL there must not be any wired pages 2448 in \a lowerCache. Otherwise as many pages must be reserved as the cache 2449 has wired page. The wired pages are copied in this case. 2450 */ 2451 static status_t 2452 vm_copy_on_write_area(VMCache* lowerCache, 2453 vm_page_reservation* wiredPagesReservation) 2454 { 2455 VMCache* upperCache; 2456 2457 TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache)); 2458 2459 // We need to separate the cache from its areas. The cache goes one level 2460 // deeper and we create a new cache inbetween. 2461 2462 // create an anonymous cache 2463 status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0, 2464 lowerCache->GuardSize() / B_PAGE_SIZE, 2465 dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL, 2466 VM_PRIORITY_USER); 2467 if (status != B_OK) 2468 return status; 2469 2470 upperCache->Lock(); 2471 2472 upperCache->temporary = 1; 2473 upperCache->virtual_base = lowerCache->virtual_base; 2474 upperCache->virtual_end = lowerCache->virtual_end; 2475 2476 // transfer the lower cache areas to the upper cache 2477 rw_lock_write_lock(&sAreaCacheLock); 2478 upperCache->TransferAreas(lowerCache); 2479 rw_lock_write_unlock(&sAreaCacheLock); 2480 2481 lowerCache->AddConsumer(upperCache); 2482 2483 // We now need to remap all pages from all of the cache's areas read-only, 2484 // so that a copy will be created on next write access. If there are wired 2485 // pages, we keep their protection, move them to the upper cache and create 2486 // copies for the lower cache. 2487 if (wiredPagesReservation != NULL) { 2488 // We need to handle wired pages -- iterate through the cache's pages. 2489 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2490 vm_page* page = it.Next();) { 2491 if (page->WiredCount() > 0) { 2492 // allocate a new page and copy the wired one 2493 vm_page* copiedPage = vm_page_allocate_page( 2494 wiredPagesReservation, PAGE_STATE_ACTIVE); 2495 2496 vm_memcpy_physical_page( 2497 copiedPage->physical_page_number * B_PAGE_SIZE, 2498 page->physical_page_number * B_PAGE_SIZE); 2499 2500 // move the wired page to the upper cache (note: removing is OK 2501 // with the SplayTree iterator) and insert the copy 2502 upperCache->MovePage(page); 2503 lowerCache->InsertPage(copiedPage, 2504 page->cache_offset * B_PAGE_SIZE); 2505 2506 DEBUG_PAGE_ACCESS_END(copiedPage); 2507 } else { 2508 // Change the protection of this page in all areas. 2509 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2510 tempArea = tempArea->cache_next) { 2511 // The area must be readable in the same way it was 2512 // previously writable. 2513 addr_t address = virtual_page_address(tempArea, page); 2514 uint32 protection = 0; 2515 uint32 pageProtection = get_area_page_protection(tempArea, address); 2516 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2517 protection |= B_KERNEL_READ_AREA; 2518 if ((pageProtection & B_READ_AREA) != 0) 2519 protection |= B_READ_AREA; 2520 2521 VMTranslationMap* map 2522 = tempArea->address_space->TranslationMap(); 2523 map->Lock(); 2524 map->ProtectPage(tempArea, address, protection); 2525 map->Unlock(); 2526 } 2527 } 2528 } 2529 } else { 2530 ASSERT(lowerCache->WiredPagesCount() == 0); 2531 2532 // just change the protection of all areas 2533 for (VMArea* tempArea = upperCache->areas; tempArea != NULL; 2534 tempArea = tempArea->cache_next) { 2535 if (tempArea->page_protections != NULL) { 2536 // Change the protection of all pages in this area. 2537 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2538 map->Lock(); 2539 for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator(); 2540 vm_page* page = it.Next();) { 2541 // The area must be readable in the same way it was 2542 // previously writable. 2543 addr_t address = virtual_page_address(tempArea, page); 2544 uint32 protection = 0; 2545 uint32 pageProtection = get_area_page_protection(tempArea, address); 2546 if ((pageProtection & B_KERNEL_READ_AREA) != 0) 2547 protection |= B_KERNEL_READ_AREA; 2548 if ((pageProtection & B_READ_AREA) != 0) 2549 protection |= B_READ_AREA; 2550 2551 map->ProtectPage(tempArea, address, protection); 2552 } 2553 map->Unlock(); 2554 continue; 2555 } 2556 // The area must be readable in the same way it was previously 2557 // writable. 2558 uint32 protection = 0; 2559 if ((tempArea->protection & B_KERNEL_READ_AREA) != 0) 2560 protection |= B_KERNEL_READ_AREA; 2561 if ((tempArea->protection & B_READ_AREA) != 0) 2562 protection |= B_READ_AREA; 2563 2564 VMTranslationMap* map = tempArea->address_space->TranslationMap(); 2565 map->Lock(); 2566 map->ProtectArea(tempArea, protection); 2567 map->Unlock(); 2568 } 2569 } 2570 2571 vm_area_put_locked_cache(upperCache); 2572 2573 return B_OK; 2574 } 2575 2576 2577 area_id 2578 vm_copy_area(team_id team, const char* name, void** _address, 2579 uint32 addressSpec, area_id sourceID) 2580 { 2581 // Do the locking: target address space, all address spaces associated with 2582 // the source cache, and the cache itself. 2583 MultiAddressSpaceLocker locker; 2584 VMAddressSpace* targetAddressSpace; 2585 VMCache* cache; 2586 VMArea* source; 2587 AreaCacheLocker cacheLocker; 2588 status_t status; 2589 bool sharedArea; 2590 2591 page_num_t wiredPages = 0; 2592 vm_page_reservation wiredPagesReservation; 2593 2594 bool restart; 2595 do { 2596 restart = false; 2597 2598 locker.Unset(); 2599 status = locker.AddTeam(team, true, &targetAddressSpace); 2600 if (status == B_OK) { 2601 status = locker.AddAreaCacheAndLock(sourceID, false, false, source, 2602 &cache); 2603 } 2604 if (status != B_OK) 2605 return status; 2606 2607 cacheLocker.SetTo(cache, true); // already locked 2608 2609 sharedArea = (source->protection & B_SHARED_AREA) != 0; 2610 2611 page_num_t oldWiredPages = wiredPages; 2612 wiredPages = 0; 2613 2614 // If the source area isn't shared, count the number of wired pages in 2615 // the cache and reserve as many pages. 2616 if (!sharedArea) { 2617 wiredPages = cache->WiredPagesCount(); 2618 2619 if (wiredPages > oldWiredPages) { 2620 cacheLocker.Unlock(); 2621 locker.Unlock(); 2622 2623 if (oldWiredPages > 0) 2624 vm_page_unreserve_pages(&wiredPagesReservation); 2625 2626 vm_page_reserve_pages(&wiredPagesReservation, wiredPages, 2627 VM_PRIORITY_USER); 2628 2629 restart = true; 2630 } 2631 } else if (oldWiredPages > 0) 2632 vm_page_unreserve_pages(&wiredPagesReservation); 2633 } while (restart); 2634 2635 // unreserve pages later 2636 struct PagesUnreserver { 2637 PagesUnreserver(vm_page_reservation* reservation) 2638 : 2639 fReservation(reservation) 2640 { 2641 } 2642 2643 ~PagesUnreserver() 2644 { 2645 if (fReservation != NULL) 2646 vm_page_unreserve_pages(fReservation); 2647 } 2648 2649 private: 2650 vm_page_reservation* fReservation; 2651 } pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL); 2652 2653 bool writableCopy 2654 = (source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0; 2655 uint8* targetPageProtections = NULL; 2656 2657 if (source->page_protections != NULL) { 2658 size_t bytes = (source->Size() / B_PAGE_SIZE + 1) / 2; 2659 targetPageProtections = (uint8*)malloc_etc(bytes, 2660 (source->address_space == VMAddressSpace::Kernel() 2661 || targetAddressSpace == VMAddressSpace::Kernel()) 2662 ? HEAP_DONT_LOCK_KERNEL_SPACE : 0); 2663 if (targetPageProtections == NULL) 2664 return B_NO_MEMORY; 2665 2666 memcpy(targetPageProtections, source->page_protections, bytes); 2667 2668 if (!writableCopy) { 2669 for (size_t i = 0; i < bytes; i++) { 2670 if ((targetPageProtections[i] 2671 & (B_WRITE_AREA | B_WRITE_AREA << 4)) != 0) { 2672 writableCopy = true; 2673 break; 2674 } 2675 } 2676 } 2677 } 2678 2679 if (addressSpec == B_CLONE_ADDRESS) { 2680 addressSpec = B_EXACT_ADDRESS; 2681 *_address = (void*)source->Base(); 2682 } 2683 2684 // First, create a cache on top of the source area, respectively use the 2685 // existing one, if this is a shared area. 2686 2687 VMArea* target; 2688 virtual_address_restrictions addressRestrictions = {}; 2689 addressRestrictions.address = *_address; 2690 addressRestrictions.address_specification = addressSpec; 2691 status = map_backing_store(targetAddressSpace, cache, source->cache_offset, 2692 name, source->Size(), source->wiring, source->protection, 2693 source->protection_max, 2694 sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP, 2695 writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY, 2696 &addressRestrictions, true, &target, _address); 2697 if (status < B_OK) { 2698 free_etc(targetPageProtections, HEAP_DONT_LOCK_KERNEL_SPACE); 2699 return status; 2700 } 2701 2702 if (targetPageProtections != NULL) 2703 target->page_protections = targetPageProtections; 2704 2705 if (sharedArea) { 2706 // The new area uses the old area's cache, but map_backing_store() 2707 // hasn't acquired a ref. So we have to do that now. 2708 cache->AcquireRefLocked(); 2709 } 2710 2711 // If the source area is writable, we need to move it one layer up as well 2712 2713 if (!sharedArea) { 2714 if (writableCopy) { 2715 // TODO: do something more useful if this fails! 2716 if (vm_copy_on_write_area(cache, 2717 wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) { 2718 panic("vm_copy_on_write_area() failed!\n"); 2719 } 2720 } 2721 } 2722 2723 // we return the ID of the newly created area 2724 return target->id; 2725 } 2726 2727 2728 status_t 2729 vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection, 2730 bool kernel) 2731 { 2732 fix_protection(&newProtection); 2733 2734 TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32 2735 ", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection)); 2736 2737 if (!arch_vm_supports_protection(newProtection)) 2738 return B_NOT_SUPPORTED; 2739 2740 bool becomesWritable 2741 = (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2742 2743 // lock address spaces and cache 2744 MultiAddressSpaceLocker locker; 2745 VMCache* cache; 2746 VMArea* area; 2747 status_t status; 2748 AreaCacheLocker cacheLocker; 2749 bool isWritable; 2750 2751 bool restart; 2752 do { 2753 restart = false; 2754 2755 locker.Unset(); 2756 status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache); 2757 if (status != B_OK) 2758 return status; 2759 2760 cacheLocker.SetTo(cache, true); // already locked 2761 2762 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 2763 || (area->protection & B_KERNEL_AREA) != 0)) { 2764 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2765 "set protection %#" B_PRIx32 " on kernel area %" B_PRId32 2766 " (%s)\n", team, newProtection, areaID, area->name); 2767 return B_NOT_ALLOWED; 2768 } 2769 if (!kernel && area->protection_max != 0 2770 && (newProtection & area->protection_max) 2771 != (newProtection & B_USER_PROTECTION)) { 2772 dprintf("vm_set_area_protection: team %" B_PRId32 " tried to " 2773 "set protection %#" B_PRIx32 " (max %#" B_PRIx32 ") on kernel " 2774 "area %" B_PRId32 " (%s)\n", team, newProtection, 2775 area->protection_max, areaID, area->name); 2776 return B_NOT_ALLOWED; 2777 } 2778 2779 if (area->protection == newProtection) 2780 return B_OK; 2781 2782 if (team != VMAddressSpace::KernelID() 2783 && area->address_space->ID() != team) { 2784 // unless you're the kernel, you are only allowed to set 2785 // the protection of your own areas 2786 return B_NOT_ALLOWED; 2787 } 2788 2789 isWritable 2790 = (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0; 2791 2792 // Make sure the area (respectively, if we're going to call 2793 // vm_copy_on_write_area(), all areas of the cache) doesn't have any 2794 // wired ranges. 2795 if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) { 2796 for (VMArea* otherArea = cache->areas; otherArea != NULL; 2797 otherArea = otherArea->cache_next) { 2798 if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) { 2799 restart = true; 2800 break; 2801 } 2802 } 2803 } else { 2804 if (wait_if_area_is_wired(area, &locker, &cacheLocker)) 2805 restart = true; 2806 } 2807 } while (restart); 2808 2809 bool changePageProtection = true; 2810 bool changeTopCachePagesOnly = false; 2811 2812 if (isWritable && !becomesWritable) { 2813 // writable -> !writable 2814 2815 if (cache->source != NULL && cache->temporary) { 2816 if (cache->CountWritableAreas(area) == 0) { 2817 // Since this cache now lives from the pages in its source cache, 2818 // we can change the cache's commitment to take only those pages 2819 // into account that really are in this cache. 2820 2821 status = cache->Commit(cache->page_count * B_PAGE_SIZE, 2822 team == VMAddressSpace::KernelID() 2823 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2824 2825 // TODO: we may be able to join with our source cache, if 2826 // count == 0 2827 } 2828 } 2829 2830 // If only the writability changes, we can just remap the pages of the 2831 // top cache, since the pages of lower caches are mapped read-only 2832 // anyway. That's advantageous only, if the number of pages in the cache 2833 // is significantly smaller than the number of pages in the area, 2834 // though. 2835 if (newProtection 2836 == (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA)) 2837 && cache->page_count * 2 < area->Size() / B_PAGE_SIZE) { 2838 changeTopCachePagesOnly = true; 2839 } 2840 } else if (!isWritable && becomesWritable) { 2841 // !writable -> writable 2842 2843 if (!cache->consumers.IsEmpty()) { 2844 // There are consumers -- we have to insert a new cache. Fortunately 2845 // vm_copy_on_write_area() does everything that's needed. 2846 changePageProtection = false; 2847 status = vm_copy_on_write_area(cache, NULL); 2848 } else { 2849 // No consumers, so we don't need to insert a new one. 2850 if (cache->source != NULL && cache->temporary) { 2851 // the cache's commitment must contain all possible pages 2852 status = cache->Commit(cache->virtual_end - cache->virtual_base, 2853 team == VMAddressSpace::KernelID() 2854 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 2855 } 2856 2857 if (status == B_OK && cache->source != NULL) { 2858 // There's a source cache, hence we can't just change all pages' 2859 // protection or we might allow writing into pages belonging to 2860 // a lower cache. 2861 changeTopCachePagesOnly = true; 2862 } 2863 } 2864 } else { 2865 // we don't have anything special to do in all other cases 2866 } 2867 2868 if (status == B_OK) { 2869 // remap existing pages in this cache 2870 if (changePageProtection) { 2871 VMTranslationMap* map = area->address_space->TranslationMap(); 2872 map->Lock(); 2873 2874 if (changeTopCachePagesOnly) { 2875 page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE; 2876 page_num_t lastPageOffset 2877 = firstPageOffset + area->Size() / B_PAGE_SIZE; 2878 for (VMCachePagesTree::Iterator it = cache->pages.GetIterator(); 2879 vm_page* page = it.Next();) { 2880 if (page->cache_offset >= firstPageOffset 2881 && page->cache_offset <= lastPageOffset) { 2882 addr_t address = virtual_page_address(area, page); 2883 map->ProtectPage(area, address, newProtection); 2884 } 2885 } 2886 } else 2887 map->ProtectArea(area, newProtection); 2888 2889 map->Unlock(); 2890 } 2891 2892 area->protection = newProtection; 2893 } 2894 2895 return status; 2896 } 2897 2898 2899 status_t 2900 vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr) 2901 { 2902 VMAddressSpace* addressSpace = VMAddressSpace::Get(team); 2903 if (addressSpace == NULL) 2904 return B_BAD_TEAM_ID; 2905 2906 VMTranslationMap* map = addressSpace->TranslationMap(); 2907 2908 map->Lock(); 2909 uint32 dummyFlags; 2910 status_t status = map->Query(vaddr, paddr, &dummyFlags); 2911 map->Unlock(); 2912 2913 addressSpace->Put(); 2914 return status; 2915 } 2916 2917 2918 /*! The page's cache must be locked. 2919 */ 2920 bool 2921 vm_test_map_modification(vm_page* page) 2922 { 2923 if (page->modified) 2924 return true; 2925 2926 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2927 vm_page_mapping* mapping; 2928 while ((mapping = iterator.Next()) != NULL) { 2929 VMArea* area = mapping->area; 2930 VMTranslationMap* map = area->address_space->TranslationMap(); 2931 2932 phys_addr_t physicalAddress; 2933 uint32 flags; 2934 map->Lock(); 2935 map->Query(virtual_page_address(area, page), &physicalAddress, &flags); 2936 map->Unlock(); 2937 2938 if ((flags & PAGE_MODIFIED) != 0) 2939 return true; 2940 } 2941 2942 return false; 2943 } 2944 2945 2946 /*! The page's cache must be locked. 2947 */ 2948 void 2949 vm_clear_map_flags(vm_page* page, uint32 flags) 2950 { 2951 if ((flags & PAGE_ACCESSED) != 0) 2952 page->accessed = false; 2953 if ((flags & PAGE_MODIFIED) != 0) 2954 page->modified = false; 2955 2956 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2957 vm_page_mapping* mapping; 2958 while ((mapping = iterator.Next()) != NULL) { 2959 VMArea* area = mapping->area; 2960 VMTranslationMap* map = area->address_space->TranslationMap(); 2961 2962 map->Lock(); 2963 map->ClearFlags(virtual_page_address(area, page), flags); 2964 map->Unlock(); 2965 } 2966 } 2967 2968 2969 /*! Removes all mappings from a page. 2970 After you've called this function, the page is unmapped from memory and 2971 the page's \c accessed and \c modified flags have been updated according 2972 to the state of the mappings. 2973 The page's cache must be locked. 2974 */ 2975 void 2976 vm_remove_all_page_mappings(vm_page* page) 2977 { 2978 while (vm_page_mapping* mapping = page->mappings.Head()) { 2979 VMArea* area = mapping->area; 2980 VMTranslationMap* map = area->address_space->TranslationMap(); 2981 addr_t address = virtual_page_address(area, page); 2982 map->UnmapPage(area, address, false); 2983 } 2984 } 2985 2986 2987 int32 2988 vm_clear_page_mapping_accessed_flags(struct vm_page *page) 2989 { 2990 int32 count = 0; 2991 2992 vm_page_mappings::Iterator iterator = page->mappings.GetIterator(); 2993 vm_page_mapping* mapping; 2994 while ((mapping = iterator.Next()) != NULL) { 2995 VMArea* area = mapping->area; 2996 VMTranslationMap* map = area->address_space->TranslationMap(); 2997 2998 bool modified; 2999 if (map->ClearAccessedAndModified(area, 3000 virtual_page_address(area, page), false, modified)) { 3001 count++; 3002 } 3003 3004 page->modified |= modified; 3005 } 3006 3007 3008 if (page->accessed) { 3009 count++; 3010 page->accessed = false; 3011 } 3012 3013 return count; 3014 } 3015 3016 3017 /*! Removes all mappings of a page and/or clears the accessed bits of the 3018 mappings. 3019 The function iterates through the page mappings and removes them until 3020 encountering one that has been accessed. From then on it will continue to 3021 iterate, but only clear the accessed flag of the mapping. The page's 3022 \c modified bit will be updated accordingly, the \c accessed bit will be 3023 cleared. 3024 \return The number of mapping accessed bits encountered, including the 3025 \c accessed bit of the page itself. If \c 0 is returned, all mappings 3026 of the page have been removed. 3027 */ 3028 int32 3029 vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page) 3030 { 3031 ASSERT(page->WiredCount() == 0); 3032 3033 if (page->accessed) 3034 return vm_clear_page_mapping_accessed_flags(page); 3035 3036 while (vm_page_mapping* mapping = page->mappings.Head()) { 3037 VMArea* area = mapping->area; 3038 VMTranslationMap* map = area->address_space->TranslationMap(); 3039 addr_t address = virtual_page_address(area, page); 3040 bool modified = false; 3041 if (map->ClearAccessedAndModified(area, address, true, modified)) { 3042 page->accessed = true; 3043 page->modified |= modified; 3044 return vm_clear_page_mapping_accessed_flags(page); 3045 } 3046 page->modified |= modified; 3047 } 3048 3049 return 0; 3050 } 3051 3052 3053 static int 3054 display_mem(int argc, char** argv) 3055 { 3056 bool physical = false; 3057 addr_t copyAddress; 3058 int32 displayWidth; 3059 int32 itemSize; 3060 int32 num = -1; 3061 addr_t address; 3062 int i = 1, j; 3063 3064 if (argc > 1 && argv[1][0] == '-') { 3065 if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) { 3066 physical = true; 3067 i++; 3068 } else 3069 i = 99; 3070 } 3071 3072 if (argc < i + 1 || argc > i + 2) { 3073 kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n" 3074 "\tdl - 8 bytes\n" 3075 "\tdw - 4 bytes\n" 3076 "\tds - 2 bytes\n" 3077 "\tdb - 1 byte\n" 3078 "\tstring - a whole string\n" 3079 " -p or --physical only allows memory from a single page to be " 3080 "displayed.\n"); 3081 return 0; 3082 } 3083 3084 address = parse_expression(argv[i]); 3085 3086 if (argc > i + 1) 3087 num = parse_expression(argv[i + 1]); 3088 3089 // build the format string 3090 if (strcmp(argv[0], "db") == 0) { 3091 itemSize = 1; 3092 displayWidth = 16; 3093 } else if (strcmp(argv[0], "ds") == 0) { 3094 itemSize = 2; 3095 displayWidth = 8; 3096 } else if (strcmp(argv[0], "dw") == 0) { 3097 itemSize = 4; 3098 displayWidth = 4; 3099 } else if (strcmp(argv[0], "dl") == 0) { 3100 itemSize = 8; 3101 displayWidth = 2; 3102 } else if (strcmp(argv[0], "string") == 0) { 3103 itemSize = 1; 3104 displayWidth = -1; 3105 } else { 3106 kprintf("display_mem called in an invalid way!\n"); 3107 return 0; 3108 } 3109 3110 if (num <= 0) 3111 num = displayWidth; 3112 3113 void* physicalPageHandle = NULL; 3114 3115 if (physical) { 3116 int32 offset = address & (B_PAGE_SIZE - 1); 3117 if (num * itemSize + offset > B_PAGE_SIZE) { 3118 num = (B_PAGE_SIZE - offset) / itemSize; 3119 kprintf("NOTE: number of bytes has been cut to page size\n"); 3120 } 3121 3122 address = ROUNDDOWN(address, B_PAGE_SIZE); 3123 3124 if (vm_get_physical_page_debug(address, ©Address, 3125 &physicalPageHandle) != B_OK) { 3126 kprintf("getting the hardware page failed."); 3127 return 0; 3128 } 3129 3130 address += offset; 3131 copyAddress += offset; 3132 } else 3133 copyAddress = address; 3134 3135 if (!strcmp(argv[0], "string")) { 3136 kprintf("%p \"", (char*)copyAddress); 3137 3138 // string mode 3139 for (i = 0; true; i++) { 3140 char c; 3141 if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1) 3142 != B_OK 3143 || c == '\0') { 3144 break; 3145 } 3146 3147 if (c == '\n') 3148 kprintf("\\n"); 3149 else if (c == '\t') 3150 kprintf("\\t"); 3151 else { 3152 if (!isprint(c)) 3153 c = '.'; 3154 3155 kprintf("%c", c); 3156 } 3157 } 3158 3159 kprintf("\"\n"); 3160 } else { 3161 // number mode 3162 for (i = 0; i < num; i++) { 3163 uint64 value; 3164 3165 if ((i % displayWidth) == 0) { 3166 int32 displayed = min_c(displayWidth, (num-i)) * itemSize; 3167 if (i != 0) 3168 kprintf("\n"); 3169 3170 kprintf("[0x%lx] ", address + i * itemSize); 3171 3172 for (j = 0; j < displayed; j++) { 3173 char c; 3174 if (debug_memcpy(B_CURRENT_TEAM, &c, 3175 (char*)copyAddress + i * itemSize + j, 1) != B_OK) { 3176 displayed = j; 3177 break; 3178 } 3179 if (!isprint(c)) 3180 c = '.'; 3181 3182 kprintf("%c", c); 3183 } 3184 if (num > displayWidth) { 3185 // make sure the spacing in the last line is correct 3186 for (j = displayed; j < displayWidth * itemSize; j++) 3187 kprintf(" "); 3188 } 3189 kprintf(" "); 3190 } 3191 3192 if (debug_memcpy(B_CURRENT_TEAM, &value, 3193 (uint8*)copyAddress + i * itemSize, itemSize) != B_OK) { 3194 kprintf("read fault"); 3195 break; 3196 } 3197 3198 switch (itemSize) { 3199 case 1: 3200 kprintf(" %02" B_PRIx8, *(uint8*)&value); 3201 break; 3202 case 2: 3203 kprintf(" %04" B_PRIx16, *(uint16*)&value); 3204 break; 3205 case 4: 3206 kprintf(" %08" B_PRIx32, *(uint32*)&value); 3207 break; 3208 case 8: 3209 kprintf(" %016" B_PRIx64, *(uint64*)&value); 3210 break; 3211 } 3212 } 3213 3214 kprintf("\n"); 3215 } 3216 3217 if (physical) { 3218 copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE); 3219 vm_put_physical_page_debug(copyAddress, physicalPageHandle); 3220 } 3221 return 0; 3222 } 3223 3224 3225 static void 3226 dump_cache_tree_recursively(VMCache* cache, int level, 3227 VMCache* highlightCache) 3228 { 3229 // print this cache 3230 for (int i = 0; i < level; i++) 3231 kprintf(" "); 3232 if (cache == highlightCache) 3233 kprintf("%p <--\n", cache); 3234 else 3235 kprintf("%p\n", cache); 3236 3237 // recursively print its consumers 3238 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3239 VMCache* consumer = it.Next();) { 3240 dump_cache_tree_recursively(consumer, level + 1, highlightCache); 3241 } 3242 } 3243 3244 3245 static int 3246 dump_cache_tree(int argc, char** argv) 3247 { 3248 if (argc != 2 || !strcmp(argv[1], "--help")) { 3249 kprintf("usage: %s <address>\n", argv[0]); 3250 return 0; 3251 } 3252 3253 addr_t address = parse_expression(argv[1]); 3254 if (address == 0) 3255 return 0; 3256 3257 VMCache* cache = (VMCache*)address; 3258 VMCache* root = cache; 3259 3260 // find the root cache (the transitive source) 3261 while (root->source != NULL) 3262 root = root->source; 3263 3264 dump_cache_tree_recursively(root, 0, cache); 3265 3266 return 0; 3267 } 3268 3269 3270 const char* 3271 vm_cache_type_to_string(int32 type) 3272 { 3273 switch (type) { 3274 case CACHE_TYPE_RAM: 3275 return "RAM"; 3276 case CACHE_TYPE_DEVICE: 3277 return "device"; 3278 case CACHE_TYPE_VNODE: 3279 return "vnode"; 3280 case CACHE_TYPE_NULL: 3281 return "null"; 3282 3283 default: 3284 return "unknown"; 3285 } 3286 } 3287 3288 3289 #if DEBUG_CACHE_LIST 3290 3291 static void 3292 update_cache_info_recursively(VMCache* cache, cache_info& info) 3293 { 3294 info.page_count += cache->page_count; 3295 if (cache->type == CACHE_TYPE_RAM) 3296 info.committed += cache->committed_size; 3297 3298 // recurse 3299 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3300 VMCache* consumer = it.Next();) { 3301 update_cache_info_recursively(consumer, info); 3302 } 3303 } 3304 3305 3306 static int 3307 cache_info_compare_page_count(const void* _a, const void* _b) 3308 { 3309 const cache_info* a = (const cache_info*)_a; 3310 const cache_info* b = (const cache_info*)_b; 3311 if (a->page_count == b->page_count) 3312 return 0; 3313 return a->page_count < b->page_count ? 1 : -1; 3314 } 3315 3316 3317 static int 3318 cache_info_compare_committed(const void* _a, const void* _b) 3319 { 3320 const cache_info* a = (const cache_info*)_a; 3321 const cache_info* b = (const cache_info*)_b; 3322 if (a->committed == b->committed) 3323 return 0; 3324 return a->committed < b->committed ? 1 : -1; 3325 } 3326 3327 3328 static void 3329 dump_caches_recursively(VMCache* cache, cache_info& info, int level) 3330 { 3331 for (int i = 0; i < level; i++) 3332 kprintf(" "); 3333 3334 kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", " 3335 "pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type), 3336 cache->virtual_base, cache->virtual_end, cache->page_count); 3337 3338 if (level == 0) 3339 kprintf("/%lu", info.page_count); 3340 3341 if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) { 3342 kprintf(", committed: %" B_PRIdOFF, cache->committed_size); 3343 3344 if (level == 0) 3345 kprintf("/%lu", info.committed); 3346 } 3347 3348 // areas 3349 if (cache->areas != NULL) { 3350 VMArea* area = cache->areas; 3351 kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id, 3352 area->name, area->address_space->ID()); 3353 3354 while (area->cache_next != NULL) { 3355 area = area->cache_next; 3356 kprintf(", %" B_PRId32, area->id); 3357 } 3358 } 3359 3360 kputs("\n"); 3361 3362 // recurse 3363 for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator(); 3364 VMCache* consumer = it.Next();) { 3365 dump_caches_recursively(consumer, info, level + 1); 3366 } 3367 } 3368 3369 3370 static int 3371 dump_caches(int argc, char** argv) 3372 { 3373 if (sCacheInfoTable == NULL) { 3374 kprintf("No cache info table!\n"); 3375 return 0; 3376 } 3377 3378 bool sortByPageCount = true; 3379 3380 for (int32 i = 1; i < argc; i++) { 3381 if (strcmp(argv[i], "-c") == 0) { 3382 sortByPageCount = false; 3383 } else { 3384 print_debugger_command_usage(argv[0]); 3385 return 0; 3386 } 3387 } 3388 3389 uint32 totalCount = 0; 3390 uint32 rootCount = 0; 3391 off_t totalCommitted = 0; 3392 page_num_t totalPages = 0; 3393 3394 VMCache* cache = gDebugCacheList; 3395 while (cache) { 3396 totalCount++; 3397 if (cache->source == NULL) { 3398 cache_info stackInfo; 3399 cache_info& info = rootCount < (uint32)kCacheInfoTableCount 3400 ? sCacheInfoTable[rootCount] : stackInfo; 3401 rootCount++; 3402 info.cache = cache; 3403 info.page_count = 0; 3404 info.committed = 0; 3405 update_cache_info_recursively(cache, info); 3406 totalCommitted += info.committed; 3407 totalPages += info.page_count; 3408 } 3409 3410 cache = cache->debug_next; 3411 } 3412 3413 if (rootCount <= (uint32)kCacheInfoTableCount) { 3414 qsort(sCacheInfoTable, rootCount, sizeof(cache_info), 3415 sortByPageCount 3416 ? &cache_info_compare_page_count 3417 : &cache_info_compare_committed); 3418 } 3419 3420 kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %" 3421 B_PRIuPHYSADDR "\n", totalCommitted, totalPages); 3422 kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s " 3423 "per cache tree...\n\n", totalCount, rootCount, sortByPageCount ? 3424 "page count" : "committed size"); 3425 3426 if (rootCount <= (uint32)kCacheInfoTableCount) { 3427 for (uint32 i = 0; i < rootCount; i++) { 3428 cache_info& info = sCacheInfoTable[i]; 3429 dump_caches_recursively(info.cache, info, 0); 3430 } 3431 } else 3432 kprintf("Cache info table too small! Can't sort and print caches!\n"); 3433 3434 return 0; 3435 } 3436 3437 #endif // DEBUG_CACHE_LIST 3438 3439 3440 static int 3441 dump_cache(int argc, char** argv) 3442 { 3443 VMCache* cache; 3444 bool showPages = false; 3445 int i = 1; 3446 3447 if (argc < 2 || !strcmp(argv[1], "--help")) { 3448 kprintf("usage: %s [-ps] <address>\n" 3449 " if -p is specified, all pages are shown, if -s is used\n" 3450 " only the cache info is shown respectively.\n", argv[0]); 3451 return 0; 3452 } 3453 while (argv[i][0] == '-') { 3454 char* arg = argv[i] + 1; 3455 while (arg[0]) { 3456 if (arg[0] == 'p') 3457 showPages = true; 3458 arg++; 3459 } 3460 i++; 3461 } 3462 if (argv[i] == NULL) { 3463 kprintf("%s: invalid argument, pass address\n", argv[0]); 3464 return 0; 3465 } 3466 3467 addr_t address = parse_expression(argv[i]); 3468 if (address == 0) 3469 return 0; 3470 3471 cache = (VMCache*)address; 3472 3473 cache->Dump(showPages); 3474 3475 set_debug_variable("_sourceCache", (addr_t)cache->source); 3476 3477 return 0; 3478 } 3479 3480 3481 static void 3482 dump_area_struct(VMArea* area, bool mappings) 3483 { 3484 kprintf("AREA: %p\n", area); 3485 kprintf("name:\t\t'%s'\n", area->name); 3486 kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID()); 3487 kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id); 3488 kprintf("base:\t\t0x%lx\n", area->Base()); 3489 kprintf("size:\t\t0x%lx\n", area->Size()); 3490 kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection); 3491 kprintf("page_protection:%p\n", area->page_protections); 3492 kprintf("wiring:\t\t0x%x\n", area->wiring); 3493 kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType()); 3494 kprintf("cache:\t\t%p\n", area->cache); 3495 kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type)); 3496 kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset); 3497 kprintf("cache_next:\t%p\n", area->cache_next); 3498 kprintf("cache_prev:\t%p\n", area->cache_prev); 3499 3500 VMAreaMappings::Iterator iterator = area->mappings.GetIterator(); 3501 if (mappings) { 3502 kprintf("page mappings:\n"); 3503 while (iterator.HasNext()) { 3504 vm_page_mapping* mapping = iterator.Next(); 3505 kprintf(" %p", mapping->page); 3506 } 3507 kprintf("\n"); 3508 } else { 3509 uint32 count = 0; 3510 while (iterator.Next() != NULL) { 3511 count++; 3512 } 3513 kprintf("page mappings:\t%" B_PRIu32 "\n", count); 3514 } 3515 } 3516 3517 3518 static int 3519 dump_area(int argc, char** argv) 3520 { 3521 bool mappings = false; 3522 bool found = false; 3523 int32 index = 1; 3524 VMArea* area; 3525 addr_t num; 3526 3527 if (argc < 2 || !strcmp(argv[1], "--help")) { 3528 kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n" 3529 "All areas matching either id/address/name are listed. You can\n" 3530 "force to check only a specific item by prefixing the specifier\n" 3531 "with the id/contains/address/name keywords.\n" 3532 "-m shows the area's mappings as well.\n"); 3533 return 0; 3534 } 3535 3536 if (!strcmp(argv[1], "-m")) { 3537 mappings = true; 3538 index++; 3539 } 3540 3541 int32 mode = 0xf; 3542 if (!strcmp(argv[index], "id")) 3543 mode = 1; 3544 else if (!strcmp(argv[index], "contains")) 3545 mode = 2; 3546 else if (!strcmp(argv[index], "name")) 3547 mode = 4; 3548 else if (!strcmp(argv[index], "address")) 3549 mode = 0; 3550 if (mode != 0xf) 3551 index++; 3552 3553 if (index >= argc) { 3554 kprintf("No area specifier given.\n"); 3555 return 0; 3556 } 3557 3558 num = parse_expression(argv[index]); 3559 3560 if (mode == 0) { 3561 dump_area_struct((struct VMArea*)num, mappings); 3562 } else { 3563 // walk through the area list, looking for the arguments as a name 3564 3565 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3566 while ((area = it.Next()) != NULL) { 3567 if (((mode & 4) != 0 3568 && !strcmp(argv[index], area->name)) 3569 || (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num) 3570 || (((mode & 2) != 0 && area->Base() <= num 3571 && area->Base() + area->Size() > num))))) { 3572 dump_area_struct(area, mappings); 3573 found = true; 3574 } 3575 } 3576 3577 if (!found) 3578 kprintf("could not find area %s (%ld)\n", argv[index], num); 3579 } 3580 3581 return 0; 3582 } 3583 3584 3585 static int 3586 dump_area_list(int argc, char** argv) 3587 { 3588 VMArea* area; 3589 const char* name = NULL; 3590 int32 id = 0; 3591 3592 if (argc > 1) { 3593 id = parse_expression(argv[1]); 3594 if (id == 0) 3595 name = argv[1]; 3596 } 3597 3598 kprintf("%-*s id %-*s %-*sprotect lock name\n", 3599 B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base", 3600 B_PRINTF_POINTER_WIDTH, "size"); 3601 3602 VMAreaHashTable::Iterator it = VMAreaHash::GetIterator(); 3603 while ((area = it.Next()) != NULL) { 3604 if ((id != 0 && area->address_space->ID() != id) 3605 || (name != NULL && strstr(area->name, name) == NULL)) 3606 continue; 3607 3608 kprintf("%p %5" B_PRIx32 " %p %p %4" B_PRIx32 " %4d %s\n", area, 3609 area->id, (void*)area->Base(), (void*)area->Size(), 3610 area->protection, area->wiring, area->name); 3611 } 3612 return 0; 3613 } 3614 3615 3616 static int 3617 dump_available_memory(int argc, char** argv) 3618 { 3619 kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n", 3620 sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE); 3621 return 0; 3622 } 3623 3624 3625 static int 3626 dump_mapping_info(int argc, char** argv) 3627 { 3628 bool reverseLookup = false; 3629 bool pageLookup = false; 3630 3631 int argi = 1; 3632 for (; argi < argc && argv[argi][0] == '-'; argi++) { 3633 const char* arg = argv[argi]; 3634 if (strcmp(arg, "-r") == 0) { 3635 reverseLookup = true; 3636 } else if (strcmp(arg, "-p") == 0) { 3637 reverseLookup = true; 3638 pageLookup = true; 3639 } else { 3640 print_debugger_command_usage(argv[0]); 3641 return 0; 3642 } 3643 } 3644 3645 // We need at least one argument, the address. Optionally a thread ID can be 3646 // specified. 3647 if (argi >= argc || argi + 2 < argc) { 3648 print_debugger_command_usage(argv[0]); 3649 return 0; 3650 } 3651 3652 uint64 addressValue; 3653 if (!evaluate_debug_expression(argv[argi++], &addressValue, false)) 3654 return 0; 3655 3656 Team* team = NULL; 3657 if (argi < argc) { 3658 uint64 threadID; 3659 if (!evaluate_debug_expression(argv[argi++], &threadID, false)) 3660 return 0; 3661 3662 Thread* thread = Thread::GetDebug(threadID); 3663 if (thread == NULL) { 3664 kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]); 3665 return 0; 3666 } 3667 3668 team = thread->team; 3669 } 3670 3671 if (reverseLookup) { 3672 phys_addr_t physicalAddress; 3673 if (pageLookup) { 3674 vm_page* page = (vm_page*)(addr_t)addressValue; 3675 physicalAddress = page->physical_page_number * B_PAGE_SIZE; 3676 } else { 3677 physicalAddress = (phys_addr_t)addressValue; 3678 physicalAddress -= physicalAddress % B_PAGE_SIZE; 3679 } 3680 3681 kprintf(" Team Virtual Address Area\n"); 3682 kprintf("--------------------------------------\n"); 3683 3684 struct Callback : VMTranslationMap::ReverseMappingInfoCallback { 3685 Callback() 3686 : 3687 fAddressSpace(NULL) 3688 { 3689 } 3690 3691 void SetAddressSpace(VMAddressSpace* addressSpace) 3692 { 3693 fAddressSpace = addressSpace; 3694 } 3695 3696 virtual bool HandleVirtualAddress(addr_t virtualAddress) 3697 { 3698 kprintf("%8" B_PRId32 " %#18" B_PRIxADDR, fAddressSpace->ID(), 3699 virtualAddress); 3700 if (VMArea* area = fAddressSpace->LookupArea(virtualAddress)) 3701 kprintf(" %8" B_PRId32 " %s\n", area->id, area->name); 3702 else 3703 kprintf("\n"); 3704 return false; 3705 } 3706 3707 private: 3708 VMAddressSpace* fAddressSpace; 3709 } callback; 3710 3711 if (team != NULL) { 3712 // team specified -- get its address space 3713 VMAddressSpace* addressSpace = team->address_space; 3714 if (addressSpace == NULL) { 3715 kprintf("Failed to get address space!\n"); 3716 return 0; 3717 } 3718 3719 callback.SetAddressSpace(addressSpace); 3720 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3721 physicalAddress, callback); 3722 } else { 3723 // no team specified -- iterate through all address spaces 3724 for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst(); 3725 addressSpace != NULL; 3726 addressSpace = VMAddressSpace::DebugNext(addressSpace)) { 3727 callback.SetAddressSpace(addressSpace); 3728 addressSpace->TranslationMap()->DebugGetReverseMappingInfo( 3729 physicalAddress, callback); 3730 } 3731 } 3732 } else { 3733 // get the address space 3734 addr_t virtualAddress = (addr_t)addressValue; 3735 virtualAddress -= virtualAddress % B_PAGE_SIZE; 3736 VMAddressSpace* addressSpace; 3737 if (IS_KERNEL_ADDRESS(virtualAddress)) { 3738 addressSpace = VMAddressSpace::Kernel(); 3739 } else if (team != NULL) { 3740 addressSpace = team->address_space; 3741 } else { 3742 Thread* thread = debug_get_debugged_thread(); 3743 if (thread == NULL || thread->team == NULL) { 3744 kprintf("Failed to get team!\n"); 3745 return 0; 3746 } 3747 3748 addressSpace = thread->team->address_space; 3749 } 3750 3751 if (addressSpace == NULL) { 3752 kprintf("Failed to get address space!\n"); 3753 return 0; 3754 } 3755 3756 // let the translation map implementation do the job 3757 addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress); 3758 } 3759 3760 return 0; 3761 } 3762 3763 3764 /*! Deletes all areas and reserved regions in the given address space. 3765 3766 The caller must ensure that none of the areas has any wired ranges. 3767 3768 \param addressSpace The address space. 3769 \param deletingAddressSpace \c true, if the address space is in the process 3770 of being deleted. 3771 */ 3772 void 3773 vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace) 3774 { 3775 TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n", 3776 addressSpace->ID())); 3777 3778 addressSpace->WriteLock(); 3779 3780 // remove all reserved areas in this address space 3781 addressSpace->UnreserveAllAddressRanges(0); 3782 3783 // delete all the areas in this address space 3784 while (VMArea* area = addressSpace->FirstArea()) { 3785 ASSERT(!area->IsWired()); 3786 delete_area(addressSpace, area, deletingAddressSpace); 3787 } 3788 3789 addressSpace->WriteUnlock(); 3790 } 3791 3792 3793 static area_id 3794 vm_area_for(addr_t address, bool kernel) 3795 { 3796 team_id team; 3797 if (IS_USER_ADDRESS(address)) { 3798 // we try the user team address space, if any 3799 team = VMAddressSpace::CurrentID(); 3800 if (team < 0) 3801 return team; 3802 } else 3803 team = VMAddressSpace::KernelID(); 3804 3805 AddressSpaceReadLocker locker(team); 3806 if (!locker.IsLocked()) 3807 return B_BAD_TEAM_ID; 3808 3809 VMArea* area = locker.AddressSpace()->LookupArea(address); 3810 if (area != NULL) { 3811 if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0) 3812 return B_ERROR; 3813 3814 return area->id; 3815 } 3816 3817 return B_ERROR; 3818 } 3819 3820 3821 /*! Frees physical pages that were used during the boot process. 3822 \a end is inclusive. 3823 */ 3824 static void 3825 unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end) 3826 { 3827 // free all physical pages in the specified range 3828 3829 for (addr_t current = start; current < end; current += B_PAGE_SIZE) { 3830 phys_addr_t physicalAddress; 3831 uint32 flags; 3832 3833 if (map->Query(current, &physicalAddress, &flags) == B_OK 3834 && (flags & PAGE_PRESENT) != 0) { 3835 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 3836 if (page != NULL && page->State() != PAGE_STATE_FREE 3837 && page->State() != PAGE_STATE_CLEAR 3838 && page->State() != PAGE_STATE_UNUSED) { 3839 DEBUG_PAGE_ACCESS_START(page); 3840 vm_page_set_state(page, PAGE_STATE_FREE); 3841 } 3842 } 3843 } 3844 3845 // unmap the memory 3846 map->Unmap(start, end); 3847 } 3848 3849 3850 void 3851 vm_free_unused_boot_loader_range(addr_t start, addr_t size) 3852 { 3853 VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap(); 3854 addr_t end = start + (size - 1); 3855 addr_t lastEnd = start; 3856 3857 TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n", 3858 (void*)start, (void*)end)); 3859 3860 // The areas are sorted in virtual address space order, so 3861 // we just have to find the holes between them that fall 3862 // into the area we should dispose 3863 3864 map->Lock(); 3865 3866 for (VMAddressSpace::AreaIterator it 3867 = VMAddressSpace::Kernel()->GetAreaIterator(); 3868 VMArea* area = it.Next();) { 3869 addr_t areaStart = area->Base(); 3870 addr_t areaEnd = areaStart + (area->Size() - 1); 3871 3872 if (areaEnd < start) 3873 continue; 3874 3875 if (areaStart > end) { 3876 // we are done, the area is already beyond of what we have to free 3877 break; 3878 } 3879 3880 if (areaStart > lastEnd) { 3881 // this is something we can free 3882 TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd, 3883 (void*)areaStart)); 3884 unmap_and_free_physical_pages(map, lastEnd, areaStart - 1); 3885 } 3886 3887 if (areaEnd >= end) { 3888 lastEnd = areaEnd; 3889 // no +1 to prevent potential overflow 3890 break; 3891 } 3892 3893 lastEnd = areaEnd + 1; 3894 } 3895 3896 if (lastEnd < end) { 3897 // we can also get rid of some space at the end of the area 3898 TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd, 3899 (void*)end)); 3900 unmap_and_free_physical_pages(map, lastEnd, end); 3901 } 3902 3903 map->Unlock(); 3904 } 3905 3906 3907 static void 3908 create_preloaded_image_areas(struct preloaded_image* _image) 3909 { 3910 preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image); 3911 char name[B_OS_NAME_LENGTH]; 3912 void* address; 3913 int32 length; 3914 3915 // use file name to create a good area name 3916 char* fileName = strrchr(image->name, '/'); 3917 if (fileName == NULL) 3918 fileName = image->name; 3919 else 3920 fileName++; 3921 3922 length = strlen(fileName); 3923 // make sure there is enough space for the suffix 3924 if (length > 25) 3925 length = 25; 3926 3927 memcpy(name, fileName, length); 3928 strcpy(name + length, "_text"); 3929 address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE); 3930 image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3931 PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED, 3932 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3933 // this will later be remapped read-only/executable by the 3934 // ELF initialization code 3935 3936 strcpy(name + length, "_data"); 3937 address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE); 3938 image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS, 3939 PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED, 3940 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3941 } 3942 3943 3944 /*! Frees all previously kernel arguments areas from the kernel_args structure. 3945 Any boot loader resources contained in that arguments must not be accessed 3946 anymore past this point. 3947 */ 3948 void 3949 vm_free_kernel_args(kernel_args* args) 3950 { 3951 uint32 i; 3952 3953 TRACE(("vm_free_kernel_args()\n")); 3954 3955 for (i = 0; i < args->num_kernel_args_ranges; i++) { 3956 area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start); 3957 if (area >= B_OK) 3958 delete_area(area); 3959 } 3960 } 3961 3962 3963 static void 3964 allocate_kernel_args(kernel_args* args) 3965 { 3966 TRACE(("allocate_kernel_args()\n")); 3967 3968 for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) { 3969 void* address = (void*)(addr_t)args->kernel_args_range[i].start; 3970 3971 create_area("_kernel args_", &address, B_EXACT_ADDRESS, 3972 args->kernel_args_range[i].size, B_ALREADY_WIRED, 3973 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 3974 } 3975 } 3976 3977 3978 static void 3979 unreserve_boot_loader_ranges(kernel_args* args) 3980 { 3981 TRACE(("unreserve_boot_loader_ranges()\n")); 3982 3983 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3984 vm_unreserve_address_range(VMAddressSpace::KernelID(), 3985 (void*)(addr_t)args->virtual_allocated_range[i].start, 3986 args->virtual_allocated_range[i].size); 3987 } 3988 } 3989 3990 3991 static void 3992 reserve_boot_loader_ranges(kernel_args* args) 3993 { 3994 TRACE(("reserve_boot_loader_ranges()\n")); 3995 3996 for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) { 3997 void* address = (void*)(addr_t)args->virtual_allocated_range[i].start; 3998 3999 // If the address is no kernel address, we just skip it. The 4000 // architecture specific code has to deal with it. 4001 if (!IS_KERNEL_ADDRESS(address)) { 4002 dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %" 4003 B_PRIu64 "\n", address, args->virtual_allocated_range[i].size); 4004 continue; 4005 } 4006 4007 status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(), 4008 &address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0); 4009 if (status < B_OK) 4010 panic("could not reserve boot loader ranges\n"); 4011 } 4012 } 4013 4014 4015 static addr_t 4016 allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment) 4017 { 4018 size = PAGE_ALIGN(size); 4019 4020 // find a slot in the virtual allocation addr range 4021 for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) { 4022 // check to see if the space between this one and the last is big enough 4023 addr_t rangeStart = args->virtual_allocated_range[i].start; 4024 addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start 4025 + args->virtual_allocated_range[i - 1].size; 4026 4027 addr_t base = alignment > 0 4028 ? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd; 4029 4030 if (base >= KERNEL_BASE && base < rangeStart 4031 && rangeStart - base >= size) { 4032 args->virtual_allocated_range[i - 1].size 4033 += base + size - previousRangeEnd; 4034 return base; 4035 } 4036 } 4037 4038 // we hadn't found one between allocation ranges. this is ok. 4039 // see if there's a gap after the last one 4040 int lastEntryIndex = args->num_virtual_allocated_ranges - 1; 4041 addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start 4042 + args->virtual_allocated_range[lastEntryIndex].size; 4043 addr_t base = alignment > 0 4044 ? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd; 4045 if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) { 4046 args->virtual_allocated_range[lastEntryIndex].size 4047 += base + size - lastRangeEnd; 4048 return base; 4049 } 4050 4051 // see if there's a gap before the first one 4052 addr_t rangeStart = args->virtual_allocated_range[0].start; 4053 if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) { 4054 base = rangeStart - size; 4055 if (alignment > 0) 4056 base = ROUNDDOWN(base, alignment); 4057 4058 if (base >= KERNEL_BASE) { 4059 args->virtual_allocated_range[0].start = base; 4060 args->virtual_allocated_range[0].size += rangeStart - base; 4061 return base; 4062 } 4063 } 4064 4065 return 0; 4066 } 4067 4068 4069 static bool 4070 is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address) 4071 { 4072 // TODO: horrible brute-force method of determining if the page can be 4073 // allocated 4074 for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) { 4075 if (address >= args->physical_memory_range[i].start 4076 && address < args->physical_memory_range[i].start 4077 + args->physical_memory_range[i].size) 4078 return true; 4079 } 4080 return false; 4081 } 4082 4083 4084 page_num_t 4085 vm_allocate_early_physical_page(kernel_args* args) 4086 { 4087 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4088 phys_addr_t nextPage; 4089 4090 nextPage = args->physical_allocated_range[i].start 4091 + args->physical_allocated_range[i].size; 4092 // see if the page after the next allocated paddr run can be allocated 4093 if (i + 1 < args->num_physical_allocated_ranges 4094 && args->physical_allocated_range[i + 1].size != 0) { 4095 // see if the next page will collide with the next allocated range 4096 if (nextPage >= args->physical_allocated_range[i+1].start) 4097 continue; 4098 } 4099 // see if the next physical page fits in the memory block 4100 if (is_page_in_physical_memory_range(args, nextPage)) { 4101 // we got one! 4102 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4103 return nextPage / B_PAGE_SIZE; 4104 } 4105 } 4106 4107 // Expanding upwards didn't work, try going downwards. 4108 for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) { 4109 phys_addr_t nextPage; 4110 4111 nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE; 4112 // see if the page after the prev allocated paddr run can be allocated 4113 if (i > 0 && args->physical_allocated_range[i - 1].size != 0) { 4114 // see if the next page will collide with the next allocated range 4115 if (nextPage < args->physical_allocated_range[i-1].start 4116 + args->physical_allocated_range[i-1].size) 4117 continue; 4118 } 4119 // see if the next physical page fits in the memory block 4120 if (is_page_in_physical_memory_range(args, nextPage)) { 4121 // we got one! 4122 args->physical_allocated_range[i].start -= B_PAGE_SIZE; 4123 args->physical_allocated_range[i].size += B_PAGE_SIZE; 4124 return nextPage / B_PAGE_SIZE; 4125 } 4126 } 4127 4128 return 0; 4129 // could not allocate a block 4130 } 4131 4132 4133 /*! This one uses the kernel_args' physical and virtual memory ranges to 4134 allocate some pages before the VM is completely up. 4135 */ 4136 addr_t 4137 vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize, 4138 uint32 attributes, addr_t alignment) 4139 { 4140 if (physicalSize > virtualSize) 4141 physicalSize = virtualSize; 4142 4143 // find the vaddr to allocate at 4144 addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment); 4145 //dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase); 4146 if (virtualBase == 0) { 4147 panic("vm_allocate_early: could not allocate virtual address\n"); 4148 return 0; 4149 } 4150 4151 // map the pages 4152 for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) { 4153 page_num_t physicalAddress = vm_allocate_early_physical_page(args); 4154 if (physicalAddress == 0) 4155 panic("error allocating early page!\n"); 4156 4157 //dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress); 4158 4159 arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE, 4160 physicalAddress * B_PAGE_SIZE, attributes, 4161 &vm_allocate_early_physical_page); 4162 } 4163 4164 return virtualBase; 4165 } 4166 4167 4168 /*! The main entrance point to initialize the VM. */ 4169 status_t 4170 vm_init(kernel_args* args) 4171 { 4172 struct preloaded_image* image; 4173 void* address; 4174 status_t err = 0; 4175 uint32 i; 4176 4177 TRACE(("vm_init: entry\n")); 4178 err = arch_vm_translation_map_init(args, &sPhysicalPageMapper); 4179 err = arch_vm_init(args); 4180 4181 // initialize some globals 4182 vm_page_init_num_pages(args); 4183 sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE; 4184 4185 slab_init(args); 4186 4187 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4188 off_t heapSize = INITIAL_HEAP_SIZE; 4189 // try to accomodate low memory systems 4190 while (heapSize > sAvailableMemory / 8) 4191 heapSize /= 2; 4192 if (heapSize < 1024 * 1024) 4193 panic("vm_init: go buy some RAM please."); 4194 4195 // map in the new heap and initialize it 4196 addr_t heapBase = vm_allocate_early(args, heapSize, heapSize, 4197 B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0); 4198 TRACE(("heap at 0x%lx\n", heapBase)); 4199 heap_init(heapBase, heapSize); 4200 #endif 4201 4202 // initialize the free page list and physical page mapper 4203 vm_page_init(args); 4204 4205 // initialize the cache allocators 4206 vm_cache_init(args); 4207 4208 { 4209 status_t error = VMAreaHash::Init(); 4210 if (error != B_OK) 4211 panic("vm_init: error initializing area hash table\n"); 4212 } 4213 4214 VMAddressSpace::Init(); 4215 reserve_boot_loader_ranges(args); 4216 4217 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4218 heap_init_post_area(); 4219 #endif 4220 4221 // Do any further initialization that the architecture dependant layers may 4222 // need now 4223 arch_vm_translation_map_init_post_area(args); 4224 arch_vm_init_post_area(args); 4225 vm_page_init_post_area(args); 4226 slab_init_post_area(); 4227 4228 // allocate areas to represent stuff that already exists 4229 4230 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4231 address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE); 4232 create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize, 4233 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4234 #endif 4235 4236 allocate_kernel_args(args); 4237 4238 create_preloaded_image_areas(args->kernel_image); 4239 4240 // allocate areas for preloaded images 4241 for (image = args->preloaded_images; image != NULL; image = image->next) 4242 create_preloaded_image_areas(image); 4243 4244 // allocate kernel stacks 4245 for (i = 0; i < args->num_cpus; i++) { 4246 char name[64]; 4247 4248 sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1); 4249 address = (void*)args->cpu_kstack[i].start; 4250 create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size, 4251 B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA); 4252 } 4253 4254 void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE); 4255 vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE); 4256 4257 #if PARANOID_KERNEL_MALLOC 4258 vm_block_address_range("uninitialized heap memory", 4259 (void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4260 #endif 4261 #if PARANOID_KERNEL_FREE 4262 vm_block_address_range("freed heap memory", 4263 (void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64); 4264 #endif 4265 4266 // create the object cache for the page mappings 4267 gPageMappingsObjectCache = create_object_cache_etc("page mappings", 4268 sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL, 4269 NULL, NULL); 4270 if (gPageMappingsObjectCache == NULL) 4271 panic("failed to create page mappings object cache"); 4272 4273 object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024); 4274 4275 #if DEBUG_CACHE_LIST 4276 if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) { 4277 virtual_address_restrictions virtualRestrictions = {}; 4278 virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS; 4279 physical_address_restrictions physicalRestrictions = {}; 4280 create_area_etc(VMAddressSpace::KernelID(), "cache info table", 4281 ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE), 4282 B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 4283 CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions, 4284 &physicalRestrictions, (void**)&sCacheInfoTable); 4285 } 4286 #endif // DEBUG_CACHE_LIST 4287 4288 // add some debugger commands 4289 add_debugger_command("areas", &dump_area_list, "Dump a list of all areas"); 4290 add_debugger_command("area", &dump_area, 4291 "Dump info about a particular area"); 4292 add_debugger_command("cache", &dump_cache, "Dump VMCache"); 4293 add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree"); 4294 #if DEBUG_CACHE_LIST 4295 if (sCacheInfoTable != NULL) { 4296 add_debugger_command_etc("caches", &dump_caches, 4297 "List all VMCache trees", 4298 "[ \"-c\" ]\n" 4299 "All cache trees are listed sorted in decreasing order by number " 4300 "of\n" 4301 "used pages or, if \"-c\" is specified, by size of committed " 4302 "memory.\n", 4303 0); 4304 } 4305 #endif 4306 add_debugger_command("avail", &dump_available_memory, 4307 "Dump available memory"); 4308 add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)"); 4309 add_debugger_command("dw", &display_mem, "dump memory words (32-bit)"); 4310 add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)"); 4311 add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)"); 4312 add_debugger_command("string", &display_mem, "dump strings"); 4313 4314 add_debugger_command_etc("mapping", &dump_mapping_info, 4315 "Print address mapping information", 4316 "[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n" 4317 "Prints low-level page mapping information for a given address. If\n" 4318 "neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n" 4319 "address that is looked up in the translation map of the current\n" 4320 "team, respectively the team specified by thread ID <thread ID>. If\n" 4321 "\"-r\" is specified, <address> is a physical address that is\n" 4322 "searched in the translation map of all teams, respectively the team\n" 4323 "specified by thread ID <thread ID>. If \"-p\" is specified,\n" 4324 "<address> is the address of a vm_page structure. The behavior is\n" 4325 "equivalent to specifying \"-r\" with the physical address of that\n" 4326 "page.\n", 4327 0); 4328 4329 TRACE(("vm_init: exit\n")); 4330 4331 vm_cache_init_post_heap(); 4332 4333 return err; 4334 } 4335 4336 4337 status_t 4338 vm_init_post_sem(kernel_args* args) 4339 { 4340 // This frees all unused boot loader resources and makes its space available 4341 // again 4342 arch_vm_init_end(args); 4343 unreserve_boot_loader_ranges(args); 4344 4345 // fill in all of the semaphores that were not allocated before 4346 // since we're still single threaded and only the kernel address space 4347 // exists, it isn't that hard to find all of the ones we need to create 4348 4349 arch_vm_translation_map_init_post_sem(args); 4350 4351 slab_init_post_sem(); 4352 4353 #if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC 4354 heap_init_post_sem(); 4355 #endif 4356 4357 return B_OK; 4358 } 4359 4360 4361 status_t 4362 vm_init_post_thread(kernel_args* args) 4363 { 4364 vm_page_init_post_thread(args); 4365 slab_init_post_thread(); 4366 return heap_init_post_thread(); 4367 } 4368 4369 4370 status_t 4371 vm_init_post_modules(kernel_args* args) 4372 { 4373 return arch_vm_init_post_modules(args); 4374 } 4375 4376 4377 void 4378 permit_page_faults(void) 4379 { 4380 Thread* thread = thread_get_current_thread(); 4381 if (thread != NULL) 4382 atomic_add(&thread->page_faults_allowed, 1); 4383 } 4384 4385 4386 void 4387 forbid_page_faults(void) 4388 { 4389 Thread* thread = thread_get_current_thread(); 4390 if (thread != NULL) 4391 atomic_add(&thread->page_faults_allowed, -1); 4392 } 4393 4394 4395 status_t 4396 vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute, 4397 bool isUser, addr_t* newIP) 4398 { 4399 FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address, 4400 faultAddress)); 4401 4402 TPF(PageFaultStart(address, isWrite, isUser, faultAddress)); 4403 4404 addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE); 4405 VMAddressSpace* addressSpace = NULL; 4406 4407 status_t status = B_OK; 4408 *newIP = 0; 4409 atomic_add((int32*)&sPageFaults, 1); 4410 4411 if (IS_KERNEL_ADDRESS(pageAddress)) { 4412 addressSpace = VMAddressSpace::GetKernel(); 4413 } else if (IS_USER_ADDRESS(pageAddress)) { 4414 addressSpace = VMAddressSpace::GetCurrent(); 4415 if (addressSpace == NULL) { 4416 if (!isUser) { 4417 dprintf("vm_page_fault: kernel thread accessing invalid user " 4418 "memory!\n"); 4419 status = B_BAD_ADDRESS; 4420 TPF(PageFaultError(-1, 4421 VMPageFaultTracing 4422 ::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY)); 4423 } else { 4424 // XXX weird state. 4425 panic("vm_page_fault: non kernel thread accessing user memory " 4426 "that doesn't exist!\n"); 4427 status = B_BAD_ADDRESS; 4428 } 4429 } 4430 } else { 4431 // the hit was probably in the 64k DMZ between kernel and user space 4432 // this keeps a user space thread from passing a buffer that crosses 4433 // into kernel space 4434 status = B_BAD_ADDRESS; 4435 TPF(PageFaultError(-1, 4436 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE)); 4437 } 4438 4439 if (status == B_OK) { 4440 status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute, 4441 isUser, NULL); 4442 } 4443 4444 if (status < B_OK) { 4445 dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at " 4446 "0x%lx, ip 0x%lx, write %d, user %d, exec %d, thread 0x%" B_PRIx32 "\n", 4447 strerror(status), address, faultAddress, isWrite, isUser, isExecute, 4448 thread_get_current_thread_id()); 4449 if (!isUser) { 4450 Thread* thread = thread_get_current_thread(); 4451 if (thread != NULL && thread->fault_handler != 0) { 4452 // this will cause the arch dependant page fault handler to 4453 // modify the IP on the interrupt frame or whatever to return 4454 // to this address 4455 *newIP = reinterpret_cast<uintptr_t>(thread->fault_handler); 4456 } else { 4457 // unhandled page fault in the kernel 4458 panic("vm_page_fault: unhandled page fault in kernel space at " 4459 "0x%lx, ip 0x%lx\n", address, faultAddress); 4460 } 4461 } else { 4462 Thread* thread = thread_get_current_thread(); 4463 4464 #ifdef TRACE_FAULTS 4465 VMArea* area = NULL; 4466 if (addressSpace != NULL) { 4467 addressSpace->ReadLock(); 4468 area = addressSpace->LookupArea(faultAddress); 4469 } 4470 4471 dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team " 4472 "\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx " 4473 "(\"%s\" +%#lx)\n", thread->name, thread->id, 4474 thread->team->Name(), thread->team->id, 4475 isWrite ? "write" : (isExecute ? "execute" : "read"), address, 4476 faultAddress, area ? area->name : "???", faultAddress - (area ? 4477 area->Base() : 0x0)); 4478 4479 if (addressSpace != NULL) 4480 addressSpace->ReadUnlock(); 4481 #endif 4482 4483 // If the thread has a signal handler for SIGSEGV, we simply 4484 // send it the signal. Otherwise we notify the user debugger 4485 // first. 4486 struct sigaction action; 4487 if ((sigaction(SIGSEGV, NULL, &action) == 0 4488 && action.sa_handler != SIG_DFL 4489 && action.sa_handler != SIG_IGN) 4490 || user_debug_exception_occurred(B_SEGMENT_VIOLATION, 4491 SIGSEGV)) { 4492 Signal signal(SIGSEGV, 4493 status == B_PERMISSION_DENIED 4494 ? SEGV_ACCERR : SEGV_MAPERR, 4495 EFAULT, thread->team->id); 4496 signal.SetAddress((void*)address); 4497 send_signal_to_thread(thread, signal, 0); 4498 } 4499 } 4500 } 4501 4502 if (addressSpace != NULL) 4503 addressSpace->Put(); 4504 4505 return B_HANDLED_INTERRUPT; 4506 } 4507 4508 4509 struct PageFaultContext { 4510 AddressSpaceReadLocker addressSpaceLocker; 4511 VMCacheChainLocker cacheChainLocker; 4512 4513 VMTranslationMap* map; 4514 VMCache* topCache; 4515 off_t cacheOffset; 4516 vm_page_reservation reservation; 4517 bool isWrite; 4518 4519 // return values 4520 vm_page* page; 4521 bool restart; 4522 bool pageAllocated; 4523 4524 4525 PageFaultContext(VMAddressSpace* addressSpace, bool isWrite) 4526 : 4527 addressSpaceLocker(addressSpace, true), 4528 map(addressSpace->TranslationMap()), 4529 isWrite(isWrite) 4530 { 4531 } 4532 4533 ~PageFaultContext() 4534 { 4535 UnlockAll(); 4536 vm_page_unreserve_pages(&reservation); 4537 } 4538 4539 void Prepare(VMCache* topCache, off_t cacheOffset) 4540 { 4541 this->topCache = topCache; 4542 this->cacheOffset = cacheOffset; 4543 page = NULL; 4544 restart = false; 4545 pageAllocated = false; 4546 4547 cacheChainLocker.SetTo(topCache); 4548 } 4549 4550 void UnlockAll(VMCache* exceptCache = NULL) 4551 { 4552 topCache = NULL; 4553 addressSpaceLocker.Unlock(); 4554 cacheChainLocker.Unlock(exceptCache); 4555 } 4556 }; 4557 4558 4559 /*! Gets the page that should be mapped into the area. 4560 Returns an error code other than \c B_OK, if the page couldn't be found or 4561 paged in. The locking state of the address space and the caches is undefined 4562 in that case. 4563 Returns \c B_OK with \c context.restart set to \c true, if the functions 4564 had to unlock the address space and all caches and is supposed to be called 4565 again. 4566 Returns \c B_OK with \c context.restart set to \c false, if the page was 4567 found. It is returned in \c context.page. The address space will still be 4568 locked as well as all caches starting from the top cache to at least the 4569 cache the page lives in. 4570 */ 4571 static status_t 4572 fault_get_page(PageFaultContext& context) 4573 { 4574 VMCache* cache = context.topCache; 4575 VMCache* lastCache = NULL; 4576 vm_page* page = NULL; 4577 4578 while (cache != NULL) { 4579 // We already hold the lock of the cache at this point. 4580 4581 lastCache = cache; 4582 4583 page = cache->LookupPage(context.cacheOffset); 4584 if (page != NULL && page->busy) { 4585 // page must be busy -- wait for it to become unbusy 4586 context.UnlockAll(cache); 4587 cache->ReleaseRefLocked(); 4588 cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false); 4589 4590 // restart the whole process 4591 context.restart = true; 4592 return B_OK; 4593 } 4594 4595 if (page != NULL) 4596 break; 4597 4598 // The current cache does not contain the page we're looking for. 4599 4600 // see if the backing store has it 4601 if (cache->HasPage(context.cacheOffset)) { 4602 // insert a fresh page and mark it busy -- we're going to read it in 4603 page = vm_page_allocate_page(&context.reservation, 4604 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY); 4605 cache->InsertPage(page, context.cacheOffset); 4606 4607 // We need to unlock all caches and the address space while reading 4608 // the page in. Keep a reference to the cache around. 4609 cache->AcquireRefLocked(); 4610 context.UnlockAll(); 4611 4612 // read the page in 4613 generic_io_vec vec; 4614 vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE; 4615 generic_size_t bytesRead = vec.length = B_PAGE_SIZE; 4616 4617 status_t status = cache->Read(context.cacheOffset, &vec, 1, 4618 B_PHYSICAL_IO_REQUEST, &bytesRead); 4619 4620 cache->Lock(); 4621 4622 if (status < B_OK) { 4623 // on error remove and free the page 4624 dprintf("reading page from cache %p returned: %s!\n", 4625 cache, strerror(status)); 4626 4627 cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY); 4628 cache->RemovePage(page); 4629 vm_page_set_state(page, PAGE_STATE_FREE); 4630 4631 cache->ReleaseRefAndUnlock(); 4632 return status; 4633 } 4634 4635 // mark the page unbusy again 4636 cache->MarkPageUnbusy(page); 4637 4638 DEBUG_PAGE_ACCESS_END(page); 4639 4640 // Since we needed to unlock everything temporarily, the area 4641 // situation might have changed. So we need to restart the whole 4642 // process. 4643 cache->ReleaseRefAndUnlock(); 4644 context.restart = true; 4645 return B_OK; 4646 } 4647 4648 cache = context.cacheChainLocker.LockSourceCache(); 4649 } 4650 4651 if (page == NULL) { 4652 // There was no adequate page, determine the cache for a clean one. 4653 // Read-only pages come in the deepest cache, only the top most cache 4654 // may have direct write access. 4655 cache = context.isWrite ? context.topCache : lastCache; 4656 4657 // allocate a clean page 4658 page = vm_page_allocate_page(&context.reservation, 4659 PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR); 4660 FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n", 4661 page->physical_page_number)); 4662 4663 // insert the new page into our cache 4664 cache->InsertPage(page, context.cacheOffset); 4665 context.pageAllocated = true; 4666 } else if (page->Cache() != context.topCache && context.isWrite) { 4667 // We have a page that has the data we want, but in the wrong cache 4668 // object so we need to copy it and stick it into the top cache. 4669 vm_page* sourcePage = page; 4670 4671 // TODO: If memory is low, it might be a good idea to steal the page 4672 // from our source cache -- if possible, that is. 4673 FTRACE(("get new page, copy it, and put it into the topmost cache\n")); 4674 page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE); 4675 4676 // To not needlessly kill concurrency we unlock all caches but the top 4677 // one while copying the page. Lacking another mechanism to ensure that 4678 // the source page doesn't disappear, we mark it busy. 4679 sourcePage->busy = true; 4680 context.cacheChainLocker.UnlockKeepRefs(true); 4681 4682 // copy the page 4683 vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE, 4684 sourcePage->physical_page_number * B_PAGE_SIZE); 4685 4686 context.cacheChainLocker.RelockCaches(true); 4687 sourcePage->Cache()->MarkPageUnbusy(sourcePage); 4688 4689 // insert the new page into our cache 4690 context.topCache->InsertPage(page, context.cacheOffset); 4691 context.pageAllocated = true; 4692 } else 4693 DEBUG_PAGE_ACCESS_START(page); 4694 4695 context.page = page; 4696 return B_OK; 4697 } 4698 4699 4700 /*! Makes sure the address in the given address space is mapped. 4701 4702 \param addressSpace The address space. 4703 \param originalAddress The address. Doesn't need to be page aligned. 4704 \param isWrite If \c true the address shall be write-accessible. 4705 \param isUser If \c true the access is requested by a userland team. 4706 \param wirePage On success, if non \c NULL, the wired count of the page 4707 mapped at the given address is incremented and the page is returned 4708 via this parameter. 4709 \return \c B_OK on success, another error code otherwise. 4710 */ 4711 static status_t 4712 vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress, 4713 bool isWrite, bool isExecute, bool isUser, vm_page** wirePage) 4714 { 4715 FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", " 4716 "isWrite %d, isUser %d\n", thread_get_current_thread_id(), 4717 originalAddress, isWrite, isUser)); 4718 4719 PageFaultContext context(addressSpace, isWrite); 4720 4721 addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE); 4722 status_t status = B_OK; 4723 4724 addressSpace->IncrementFaultCount(); 4725 4726 // We may need up to 2 pages plus pages needed for mapping them -- reserving 4727 // the pages upfront makes sure we don't have any cache locked, so that the 4728 // page daemon/thief can do their job without problems. 4729 size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress, 4730 originalAddress); 4731 context.addressSpaceLocker.Unlock(); 4732 vm_page_reserve_pages(&context.reservation, reservePages, 4733 addressSpace == VMAddressSpace::Kernel() 4734 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER); 4735 4736 while (true) { 4737 context.addressSpaceLocker.Lock(); 4738 4739 // get the area the fault was in 4740 VMArea* area = addressSpace->LookupArea(address); 4741 if (area == NULL) { 4742 dprintf("vm_soft_fault: va 0x%lx not covered by area in address " 4743 "space\n", originalAddress); 4744 TPF(PageFaultError(-1, 4745 VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA)); 4746 status = B_BAD_ADDRESS; 4747 break; 4748 } 4749 4750 // check permissions 4751 uint32 protection = get_area_page_protection(area, address); 4752 if (isUser && (protection & B_USER_PROTECTION) == 0 4753 && (area->protection & B_KERNEL_AREA) != 0) { 4754 dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n", 4755 area->id, (void*)originalAddress); 4756 TPF(PageFaultError(area->id, 4757 VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY)); 4758 status = B_PERMISSION_DENIED; 4759 break; 4760 } 4761 if (isWrite && (protection 4762 & (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) { 4763 dprintf("write access attempted on write-protected area 0x%" 4764 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4765 TPF(PageFaultError(area->id, 4766 VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED)); 4767 status = B_PERMISSION_DENIED; 4768 break; 4769 } else if (isExecute && (protection 4770 & (B_EXECUTE_AREA | (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) { 4771 dprintf("instruction fetch attempted on execute-protected area 0x%" 4772 B_PRIx32 " at %p\n", area->id, (void*)originalAddress); 4773 TPF(PageFaultError(area->id, 4774 VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED)); 4775 status = B_PERMISSION_DENIED; 4776 break; 4777 } else if (!isWrite && !isExecute && (protection 4778 & (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) { 4779 dprintf("read access attempted on read-protected area 0x%" B_PRIx32 4780 " at %p\n", area->id, (void*)originalAddress); 4781 TPF(PageFaultError(area->id, 4782 VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED)); 4783 status = B_PERMISSION_DENIED; 4784 break; 4785 } 4786 4787 // We have the area, it was a valid access, so let's try to resolve the 4788 // page fault now. 4789 // At first, the top most cache from the area is investigated. 4790 4791 context.Prepare(vm_area_get_locked_cache(area), 4792 address - area->Base() + area->cache_offset); 4793 4794 // See if this cache has a fault handler -- this will do all the work 4795 // for us. 4796 { 4797 // Note, since the page fault is resolved with interrupts enabled, 4798 // the fault handler could be called more than once for the same 4799 // reason -- the store must take this into account. 4800 status = context.topCache->Fault(addressSpace, context.cacheOffset); 4801 if (status != B_BAD_HANDLER) 4802 break; 4803 } 4804 4805 // The top most cache has no fault handler, so let's see if the cache or 4806 // its sources already have the page we're searching for (we're going 4807 // from top to bottom). 4808 status = fault_get_page(context); 4809 if (status != B_OK) { 4810 TPF(PageFaultError(area->id, status)); 4811 break; 4812 } 4813 4814 if (context.restart) 4815 continue; 4816 4817 // All went fine, all there is left to do is to map the page into the 4818 // address space. 4819 TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(), 4820 context.page)); 4821 4822 // If the page doesn't reside in the area's cache, we need to make sure 4823 // it's mapped in read-only, so that we cannot overwrite someone else's 4824 // data (copy-on-write) 4825 uint32 newProtection = protection; 4826 if (context.page->Cache() != context.topCache && !isWrite) 4827 newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA); 4828 4829 bool unmapPage = false; 4830 bool mapPage = true; 4831 4832 // check whether there's already a page mapped at the address 4833 context.map->Lock(); 4834 4835 phys_addr_t physicalAddress; 4836 uint32 flags; 4837 vm_page* mappedPage = NULL; 4838 if (context.map->Query(address, &physicalAddress, &flags) == B_OK 4839 && (flags & PAGE_PRESENT) != 0 4840 && (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 4841 != NULL) { 4842 // Yep there's already a page. If it's ours, we can simply adjust 4843 // its protection. Otherwise we have to unmap it. 4844 if (mappedPage == context.page) { 4845 context.map->ProtectPage(area, address, newProtection); 4846 // Note: We assume that ProtectPage() is atomic (i.e. 4847 // the page isn't temporarily unmapped), otherwise we'd have 4848 // to make sure it isn't wired. 4849 mapPage = false; 4850 } else 4851 unmapPage = true; 4852 } 4853 4854 context.map->Unlock(); 4855 4856 if (unmapPage) { 4857 // If the page is wired, we can't unmap it. Wait until it is unwired 4858 // again and restart. Note that the page cannot be wired for 4859 // writing, since it it isn't in the topmost cache. So we can safely 4860 // ignore ranges wired for writing (our own and other concurrent 4861 // wiring attempts in progress) and in fact have to do that to avoid 4862 // a deadlock. 4863 VMAreaUnwiredWaiter waiter; 4864 if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE, 4865 VMArea::IGNORE_WRITE_WIRED_RANGES)) { 4866 // unlock everything and wait 4867 if (context.pageAllocated) { 4868 // ... but since we allocated a page and inserted it into 4869 // the top cache, remove and free it first. Otherwise we'd 4870 // have a page from a lower cache mapped while an upper 4871 // cache has a page that would shadow it. 4872 context.topCache->RemovePage(context.page); 4873 vm_page_free_etc(context.topCache, context.page, 4874 &context.reservation); 4875 } else 4876 DEBUG_PAGE_ACCESS_END(context.page); 4877 4878 context.UnlockAll(); 4879 waiter.waitEntry.Wait(); 4880 continue; 4881 } 4882 4883 // Note: The mapped page is a page of a lower cache. We are 4884 // guaranteed to have that cached locked, our new page is a copy of 4885 // that page, and the page is not busy. The logic for that guarantee 4886 // is as follows: Since the page is mapped, it must live in the top 4887 // cache (ruled out above) or any of its lower caches, and there is 4888 // (was before the new page was inserted) no other page in any 4889 // cache between the top cache and the page's cache (otherwise that 4890 // would be mapped instead). That in turn means that our algorithm 4891 // must have found it and therefore it cannot be busy either. 4892 DEBUG_PAGE_ACCESS_START(mappedPage); 4893 unmap_page(area, address); 4894 DEBUG_PAGE_ACCESS_END(mappedPage); 4895 } 4896 4897 if (mapPage) { 4898 if (map_page(area, context.page, address, newProtection, 4899 &context.reservation) != B_OK) { 4900 // Mapping can only fail, when the page mapping object couldn't 4901 // be allocated. Save for the missing mapping everything is 4902 // fine, though. If this was a regular page fault, we'll simply 4903 // leave and probably fault again. To make sure we'll have more 4904 // luck then, we ensure that the minimum object reserve is 4905 // available. 4906 DEBUG_PAGE_ACCESS_END(context.page); 4907 4908 context.UnlockAll(); 4909 4910 if (object_cache_reserve(gPageMappingsObjectCache, 1, 0) 4911 != B_OK) { 4912 // Apparently the situation is serious. Let's get ourselves 4913 // killed. 4914 status = B_NO_MEMORY; 4915 } else if (wirePage != NULL) { 4916 // The caller expects us to wire the page. Since 4917 // object_cache_reserve() succeeded, we should now be able 4918 // to allocate a mapping structure. Restart. 4919 continue; 4920 } 4921 4922 break; 4923 } 4924 } else if (context.page->State() == PAGE_STATE_INACTIVE) 4925 vm_page_set_state(context.page, PAGE_STATE_ACTIVE); 4926 4927 // also wire the page, if requested 4928 if (wirePage != NULL && status == B_OK) { 4929 increment_page_wired_count(context.page); 4930 *wirePage = context.page; 4931 } 4932 4933 DEBUG_PAGE_ACCESS_END(context.page); 4934 4935 break; 4936 } 4937 4938 return status; 4939 } 4940 4941 4942 status_t 4943 vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4944 { 4945 return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle); 4946 } 4947 4948 status_t 4949 vm_put_physical_page(addr_t vaddr, void* handle) 4950 { 4951 return sPhysicalPageMapper->PutPage(vaddr, handle); 4952 } 4953 4954 4955 status_t 4956 vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr, 4957 void** _handle) 4958 { 4959 return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle); 4960 } 4961 4962 status_t 4963 vm_put_physical_page_current_cpu(addr_t vaddr, void* handle) 4964 { 4965 return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle); 4966 } 4967 4968 4969 status_t 4970 vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle) 4971 { 4972 return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle); 4973 } 4974 4975 status_t 4976 vm_put_physical_page_debug(addr_t vaddr, void* handle) 4977 { 4978 return sPhysicalPageMapper->PutPageDebug(vaddr, handle); 4979 } 4980 4981 4982 void 4983 vm_get_info(system_info* info) 4984 { 4985 swap_get_info(info); 4986 4987 MutexLocker locker(sAvailableMemoryLock); 4988 info->needed_memory = sNeededMemory; 4989 info->free_memory = sAvailableMemory; 4990 } 4991 4992 4993 uint32 4994 vm_num_page_faults(void) 4995 { 4996 return sPageFaults; 4997 } 4998 4999 5000 off_t 5001 vm_available_memory(void) 5002 { 5003 MutexLocker locker(sAvailableMemoryLock); 5004 return sAvailableMemory; 5005 } 5006 5007 5008 off_t 5009 vm_available_not_needed_memory(void) 5010 { 5011 MutexLocker locker(sAvailableMemoryLock); 5012 return sAvailableMemory - sNeededMemory; 5013 } 5014 5015 5016 /*! Like vm_available_not_needed_memory(), but only for use in the kernel 5017 debugger. 5018 */ 5019 off_t 5020 vm_available_not_needed_memory_debug(void) 5021 { 5022 return sAvailableMemory - sNeededMemory; 5023 } 5024 5025 5026 size_t 5027 vm_kernel_address_space_left(void) 5028 { 5029 return VMAddressSpace::Kernel()->FreeSpace(); 5030 } 5031 5032 5033 void 5034 vm_unreserve_memory(size_t amount) 5035 { 5036 mutex_lock(&sAvailableMemoryLock); 5037 5038 sAvailableMemory += amount; 5039 5040 mutex_unlock(&sAvailableMemoryLock); 5041 } 5042 5043 5044 status_t 5045 vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout) 5046 { 5047 size_t reserve = kMemoryReserveForPriority[priority]; 5048 5049 MutexLocker locker(sAvailableMemoryLock); 5050 5051 //dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory); 5052 5053 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5054 sAvailableMemory -= amount; 5055 return B_OK; 5056 } 5057 5058 if (timeout <= 0) 5059 return B_NO_MEMORY; 5060 5061 // turn timeout into an absolute timeout 5062 timeout += system_time(); 5063 5064 // loop until we've got the memory or the timeout occurs 5065 do { 5066 sNeededMemory += amount; 5067 5068 // call the low resource manager 5069 locker.Unlock(); 5070 low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory, 5071 B_ABSOLUTE_TIMEOUT, timeout); 5072 locker.Lock(); 5073 5074 sNeededMemory -= amount; 5075 5076 if (sAvailableMemory >= (off_t)(amount + reserve)) { 5077 sAvailableMemory -= amount; 5078 return B_OK; 5079 } 5080 } while (timeout > system_time()); 5081 5082 return B_NO_MEMORY; 5083 } 5084 5085 5086 status_t 5087 vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type) 5088 { 5089 // NOTE: The caller is responsible for synchronizing calls to this function! 5090 5091 AddressSpaceReadLocker locker; 5092 VMArea* area; 5093 status_t status = locker.SetFromArea(id, area); 5094 if (status != B_OK) 5095 return status; 5096 5097 // nothing to do, if the type doesn't change 5098 uint32 oldType = area->MemoryType(); 5099 if (type == oldType) 5100 return B_OK; 5101 5102 // set the memory type of the area and the mapped pages 5103 VMTranslationMap* map = area->address_space->TranslationMap(); 5104 map->Lock(); 5105 area->SetMemoryType(type); 5106 map->ProtectArea(area, area->protection); 5107 map->Unlock(); 5108 5109 // set the physical memory type 5110 status_t error = arch_vm_set_memory_type(area, physicalBase, type); 5111 if (error != B_OK) { 5112 // reset the memory type of the area and the mapped pages 5113 map->Lock(); 5114 area->SetMemoryType(oldType); 5115 map->ProtectArea(area, area->protection); 5116 map->Unlock(); 5117 return error; 5118 } 5119 5120 return B_OK; 5121 5122 } 5123 5124 5125 /*! This function enforces some protection properties: 5126 - kernel areas must be W^X (after kernel startup) 5127 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well 5128 - if B_READ_AREA has been set, B_KERNEL_READ_AREA is also set 5129 */ 5130 static void 5131 fix_protection(uint32* protection) 5132 { 5133 if ((*protection & B_KERNEL_EXECUTE_AREA) != 0 5134 && ((*protection & B_KERNEL_WRITE_AREA) != 0 5135 || (*protection & B_WRITE_AREA) != 0) 5136 && !gKernelStartup) 5137 panic("kernel areas cannot be both writable and executable!"); 5138 5139 if ((*protection & B_KERNEL_PROTECTION) == 0) { 5140 if ((*protection & B_WRITE_AREA) != 0) 5141 *protection |= B_KERNEL_WRITE_AREA; 5142 if ((*protection & B_READ_AREA) != 0) 5143 *protection |= B_KERNEL_READ_AREA; 5144 } 5145 } 5146 5147 5148 static void 5149 fill_area_info(struct VMArea* area, area_info* info, size_t size) 5150 { 5151 strlcpy(info->name, area->name, B_OS_NAME_LENGTH); 5152 info->area = area->id; 5153 info->address = (void*)area->Base(); 5154 info->size = area->Size(); 5155 info->protection = area->protection; 5156 info->lock = area->wiring; 5157 info->team = area->address_space->ID(); 5158 info->copy_count = 0; 5159 info->in_count = 0; 5160 info->out_count = 0; 5161 // TODO: retrieve real values here! 5162 5163 VMCache* cache = vm_area_get_locked_cache(area); 5164 5165 // Note, this is a simplification; the cache could be larger than this area 5166 info->ram_size = cache->page_count * B_PAGE_SIZE; 5167 5168 vm_area_put_locked_cache(cache); 5169 } 5170 5171 5172 static status_t 5173 vm_resize_area(area_id areaID, size_t newSize, bool kernel) 5174 { 5175 // is newSize a multiple of B_PAGE_SIZE? 5176 if (newSize & (B_PAGE_SIZE - 1)) 5177 return B_BAD_VALUE; 5178 5179 // lock all affected address spaces and the cache 5180 VMArea* area; 5181 VMCache* cache; 5182 5183 MultiAddressSpaceLocker locker; 5184 AreaCacheLocker cacheLocker; 5185 5186 status_t status; 5187 size_t oldSize; 5188 bool anyKernelArea; 5189 bool restart; 5190 5191 do { 5192 anyKernelArea = false; 5193 restart = false; 5194 5195 locker.Unset(); 5196 status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache); 5197 if (status != B_OK) 5198 return status; 5199 cacheLocker.SetTo(cache, true); // already locked 5200 5201 // enforce restrictions 5202 if (!kernel && (area->address_space == VMAddressSpace::Kernel() 5203 || (area->protection & B_KERNEL_AREA) != 0)) { 5204 dprintf("vm_resize_area: team %" B_PRId32 " tried to " 5205 "resize kernel area %" B_PRId32 " (%s)\n", 5206 team_get_current_team_id(), areaID, area->name); 5207 return B_NOT_ALLOWED; 5208 } 5209 // TODO: Enforce all restrictions (team, etc.)! 5210 5211 oldSize = area->Size(); 5212 if (newSize == oldSize) 5213 return B_OK; 5214 5215 if (cache->type != CACHE_TYPE_RAM) 5216 return B_NOT_ALLOWED; 5217 5218 if (oldSize < newSize) { 5219 // We need to check if all areas of this cache can be resized. 5220 for (VMArea* current = cache->areas; current != NULL; 5221 current = current->cache_next) { 5222 if (!current->address_space->CanResizeArea(current, newSize)) 5223 return B_ERROR; 5224 anyKernelArea 5225 |= current->address_space == VMAddressSpace::Kernel(); 5226 } 5227 } else { 5228 // We're shrinking the areas, so we must make sure the affected 5229 // ranges are not wired. 5230 for (VMArea* current = cache->areas; current != NULL; 5231 current = current->cache_next) { 5232 anyKernelArea 5233 |= current->address_space == VMAddressSpace::Kernel(); 5234 5235 if (wait_if_area_range_is_wired(current, 5236 current->Base() + newSize, oldSize - newSize, &locker, 5237 &cacheLocker)) { 5238 restart = true; 5239 break; 5240 } 5241 } 5242 } 5243 } while (restart); 5244 5245 // Okay, looks good so far, so let's do it 5246 5247 int priority = kernel && anyKernelArea 5248 ? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER; 5249 uint32 allocationFlags = kernel && anyKernelArea 5250 ? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0; 5251 5252 if (oldSize < newSize) { 5253 // Growing the cache can fail, so we do it first. 5254 status = cache->Resize(cache->virtual_base + newSize, priority); 5255 if (status != B_OK) 5256 return status; 5257 } 5258 5259 for (VMArea* current = cache->areas; current != NULL; 5260 current = current->cache_next) { 5261 status = current->address_space->ResizeArea(current, newSize, 5262 allocationFlags); 5263 if (status != B_OK) 5264 break; 5265 5266 // We also need to unmap all pages beyond the new size, if the area has 5267 // shrunk 5268 if (newSize < oldSize) { 5269 VMCacheChainLocker cacheChainLocker(cache); 5270 cacheChainLocker.LockAllSourceCaches(); 5271 5272 unmap_pages(current, current->Base() + newSize, 5273 oldSize - newSize); 5274 5275 cacheChainLocker.Unlock(cache); 5276 } 5277 } 5278 5279 if (status == B_OK) { 5280 // Shrink or grow individual page protections if in use. 5281 if (area->page_protections != NULL) { 5282 size_t bytes = (newSize / B_PAGE_SIZE + 1) / 2; 5283 uint8* newProtections 5284 = (uint8*)realloc(area->page_protections, bytes); 5285 if (newProtections == NULL) 5286 status = B_NO_MEMORY; 5287 else { 5288 area->page_protections = newProtections; 5289 5290 if (oldSize < newSize) { 5291 // init the additional page protections to that of the area 5292 uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2; 5293 uint32 areaProtection = area->protection 5294 & (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA); 5295 memset(area->page_protections + offset, 5296 areaProtection | (areaProtection << 4), bytes - offset); 5297 if ((oldSize / B_PAGE_SIZE) % 2 != 0) { 5298 uint8& entry = area->page_protections[offset - 1]; 5299 entry = (entry & 0x0f) | (areaProtection << 4); 5300 } 5301 } 5302 } 5303 } 5304 } 5305 5306 // shrinking the cache can't fail, so we do it now 5307 if (status == B_OK && newSize < oldSize) 5308 status = cache->Resize(cache->virtual_base + newSize, priority); 5309 5310 if (status != B_OK) { 5311 // Something failed -- resize the areas back to their original size. 5312 // This can fail, too, in which case we're seriously screwed. 5313 for (VMArea* current = cache->areas; current != NULL; 5314 current = current->cache_next) { 5315 if (current->address_space->ResizeArea(current, oldSize, 5316 allocationFlags) != B_OK) { 5317 panic("vm_resize_area(): Failed and not being able to restore " 5318 "original state."); 5319 } 5320 } 5321 5322 cache->Resize(cache->virtual_base + oldSize, priority); 5323 } 5324 5325 // TODO: we must honour the lock restrictions of this area 5326 return status; 5327 } 5328 5329 5330 status_t 5331 vm_memset_physical(phys_addr_t address, int value, phys_size_t length) 5332 { 5333 return sPhysicalPageMapper->MemsetPhysical(address, value, length); 5334 } 5335 5336 5337 status_t 5338 vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user) 5339 { 5340 return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user); 5341 } 5342 5343 5344 status_t 5345 vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length, 5346 bool user) 5347 { 5348 return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user); 5349 } 5350 5351 5352 void 5353 vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from) 5354 { 5355 return sPhysicalPageMapper->MemcpyPhysicalPage(to, from); 5356 } 5357 5358 5359 /*! Copies a range of memory directly from/to a page that might not be mapped 5360 at the moment. 5361 5362 For \a unsafeMemory the current mapping (if any is ignored). The function 5363 walks through the respective area's cache chain to find the physical page 5364 and copies from/to it directly. 5365 The memory range starting at \a unsafeMemory with a length of \a size bytes 5366 must not cross a page boundary. 5367 5368 \param teamID The team ID identifying the address space \a unsafeMemory is 5369 to be interpreted in. Ignored, if \a unsafeMemory is a kernel address 5370 (the kernel address space is assumed in this case). If \c B_CURRENT_TEAM 5371 is passed, the address space of the thread returned by 5372 debug_get_debugged_thread() is used. 5373 \param unsafeMemory The start of the unsafe memory range to be copied 5374 from/to. 5375 \param buffer A safely accessible kernel buffer to be copied from/to. 5376 \param size The number of bytes to be copied. 5377 \param copyToUnsafe If \c true, memory is copied from \a buffer to 5378 \a unsafeMemory, the other way around otherwise. 5379 */ 5380 status_t 5381 vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer, 5382 size_t size, bool copyToUnsafe) 5383 { 5384 if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE) 5385 != ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) { 5386 return B_BAD_VALUE; 5387 } 5388 5389 // get the address space for the debugged thread 5390 VMAddressSpace* addressSpace; 5391 if (IS_KERNEL_ADDRESS(unsafeMemory)) { 5392 addressSpace = VMAddressSpace::Kernel(); 5393 } else if (teamID == B_CURRENT_TEAM) { 5394 Thread* thread = debug_get_debugged_thread(); 5395 if (thread == NULL || thread->team == NULL) 5396 return B_BAD_ADDRESS; 5397 5398 addressSpace = thread->team->address_space; 5399 } else 5400 addressSpace = VMAddressSpace::DebugGet(teamID); 5401 5402 if (addressSpace == NULL) 5403 return B_BAD_ADDRESS; 5404 5405 // get the area 5406 VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory); 5407 if (area == NULL) 5408 return B_BAD_ADDRESS; 5409 5410 // search the page 5411 off_t cacheOffset = (addr_t)unsafeMemory - area->Base() 5412 + area->cache_offset; 5413 VMCache* cache = area->cache; 5414 vm_page* page = NULL; 5415 while (cache != NULL) { 5416 page = cache->DebugLookupPage(cacheOffset); 5417 if (page != NULL) 5418 break; 5419 5420 // Page not found in this cache -- if it is paged out, we must not try 5421 // to get it from lower caches. 5422 if (cache->DebugHasPage(cacheOffset)) 5423 break; 5424 5425 cache = cache->source; 5426 } 5427 5428 if (page == NULL) 5429 return B_UNSUPPORTED; 5430 5431 // copy from/to physical memory 5432 phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE 5433 + (addr_t)unsafeMemory % B_PAGE_SIZE; 5434 5435 if (copyToUnsafe) { 5436 if (page->Cache() != area->cache) 5437 return B_UNSUPPORTED; 5438 5439 return vm_memcpy_to_physical(physicalAddress, buffer, size, false); 5440 } 5441 5442 return vm_memcpy_from_physical(buffer, physicalAddress, size, false); 5443 } 5444 5445 5446 /** Validate that a memory range is either fully in kernel space, or fully in 5447 * userspace */ 5448 static inline bool 5449 validate_memory_range(const void* addr, size_t size) 5450 { 5451 addr_t address = (addr_t)addr; 5452 5453 // Check for overflows on all addresses. 5454 if ((address + size) < address) 5455 return false; 5456 5457 // Validate that the address range does not cross the kernel/user boundary. 5458 return IS_USER_ADDRESS(address) == IS_USER_ADDRESS(address + size - 1); 5459 } 5460 5461 5462 // #pragma mark - kernel public API 5463 5464 5465 status_t 5466 user_memcpy(void* to, const void* from, size_t size) 5467 { 5468 if (!validate_memory_range(to, size) || !validate_memory_range(from, size)) 5469 return B_BAD_ADDRESS; 5470 5471 if (arch_cpu_user_memcpy(to, from, size) < B_OK) 5472 return B_BAD_ADDRESS; 5473 5474 return B_OK; 5475 } 5476 5477 5478 /*! \brief Copies at most (\a size - 1) characters from the string in \a from to 5479 the string in \a to, NULL-terminating the result. 5480 5481 \param to Pointer to the destination C-string. 5482 \param from Pointer to the source C-string. 5483 \param size Size in bytes of the string buffer pointed to by \a to. 5484 5485 \return strlen(\a from). 5486 */ 5487 ssize_t 5488 user_strlcpy(char* to, const char* from, size_t size) 5489 { 5490 if (to == NULL && size != 0) 5491 return B_BAD_VALUE; 5492 if (from == NULL) 5493 return B_BAD_ADDRESS; 5494 5495 // Protect the source address from overflows. 5496 size_t maxSize = size; 5497 if ((addr_t)from + maxSize < (addr_t)from) 5498 maxSize -= (addr_t)from + maxSize; 5499 if (IS_USER_ADDRESS(from) && !IS_USER_ADDRESS((addr_t)from + maxSize)) 5500 maxSize = USER_TOP - (addr_t)from; 5501 5502 if (!validate_memory_range(to, maxSize)) 5503 return B_BAD_ADDRESS; 5504 5505 ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize); 5506 if (result < 0) 5507 return result; 5508 5509 // If we hit the address overflow boundary, fail. 5510 if ((size_t)result >= maxSize && maxSize < size) 5511 return B_BAD_ADDRESS; 5512 5513 return result; 5514 } 5515 5516 5517 status_t 5518 user_memset(void* s, char c, size_t count) 5519 { 5520 if (!validate_memory_range(s, count)) 5521 return B_BAD_ADDRESS; 5522 5523 if (arch_cpu_user_memset(s, c, count) < B_OK) 5524 return B_BAD_ADDRESS; 5525 5526 return B_OK; 5527 } 5528 5529 5530 /*! Wires a single page at the given address. 5531 5532 \param team The team whose address space the address belongs to. Supports 5533 also \c B_CURRENT_TEAM. If the given address is a kernel address, the 5534 parameter is ignored. 5535 \param address address The virtual address to wire down. Does not need to 5536 be page aligned. 5537 \param writable If \c true the page shall be writable. 5538 \param info On success the info is filled in, among other things 5539 containing the physical address the given virtual one translates to. 5540 \return \c B_OK, when the page could be wired, another error code otherwise. 5541 */ 5542 status_t 5543 vm_wire_page(team_id team, addr_t address, bool writable, 5544 VMPageWiringInfo* info) 5545 { 5546 addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5547 info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false); 5548 5549 // compute the page protection that is required 5550 bool isUser = IS_USER_ADDRESS(address); 5551 uint32 requiredProtection = PAGE_PRESENT 5552 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5553 if (writable) 5554 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5555 5556 // get and read lock the address space 5557 VMAddressSpace* addressSpace = NULL; 5558 if (isUser) { 5559 if (team == B_CURRENT_TEAM) 5560 addressSpace = VMAddressSpace::GetCurrent(); 5561 else 5562 addressSpace = VMAddressSpace::Get(team); 5563 } else 5564 addressSpace = VMAddressSpace::GetKernel(); 5565 if (addressSpace == NULL) 5566 return B_ERROR; 5567 5568 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5569 5570 VMTranslationMap* map = addressSpace->TranslationMap(); 5571 status_t error = B_OK; 5572 5573 // get the area 5574 VMArea* area = addressSpace->LookupArea(pageAddress); 5575 if (area == NULL) { 5576 addressSpace->Put(); 5577 return B_BAD_ADDRESS; 5578 } 5579 5580 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5581 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5582 5583 // mark the area range wired 5584 area->Wire(&info->range); 5585 5586 // Lock the area's cache chain and the translation map. Needed to look 5587 // up the page and play with its wired count. 5588 cacheChainLocker.LockAllSourceCaches(); 5589 map->Lock(); 5590 5591 phys_addr_t physicalAddress; 5592 uint32 flags; 5593 vm_page* page; 5594 if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK 5595 && (flags & requiredProtection) == requiredProtection 5596 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5597 != NULL) { 5598 // Already mapped with the correct permissions -- just increment 5599 // the page's wired count. 5600 increment_page_wired_count(page); 5601 5602 map->Unlock(); 5603 cacheChainLocker.Unlock(); 5604 addressSpaceLocker.Unlock(); 5605 } else { 5606 // Let vm_soft_fault() map the page for us, if possible. We need 5607 // to fully unlock to avoid deadlocks. Since we have already 5608 // wired the area itself, nothing disturbing will happen with it 5609 // in the meantime. 5610 map->Unlock(); 5611 cacheChainLocker.Unlock(); 5612 addressSpaceLocker.Unlock(); 5613 5614 error = vm_soft_fault(addressSpace, pageAddress, writable, false, 5615 isUser, &page); 5616 5617 if (error != B_OK) { 5618 // The page could not be mapped -- clean up. 5619 VMCache* cache = vm_area_get_locked_cache(area); 5620 area->Unwire(&info->range); 5621 cache->ReleaseRefAndUnlock(); 5622 addressSpace->Put(); 5623 return error; 5624 } 5625 } 5626 5627 info->physicalAddress 5628 = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE 5629 + address % B_PAGE_SIZE; 5630 info->page = page; 5631 5632 return B_OK; 5633 } 5634 5635 5636 /*! Unwires a single page previously wired via vm_wire_page(). 5637 5638 \param info The same object passed to vm_wire_page() before. 5639 */ 5640 void 5641 vm_unwire_page(VMPageWiringInfo* info) 5642 { 5643 // lock the address space 5644 VMArea* area = info->range.area; 5645 AddressSpaceReadLocker addressSpaceLocker(area->address_space, false); 5646 // takes over our reference 5647 5648 // lock the top cache 5649 VMCache* cache = vm_area_get_locked_cache(area); 5650 VMCacheChainLocker cacheChainLocker(cache); 5651 5652 if (info->page->Cache() != cache) { 5653 // The page is not in the top cache, so we lock the whole cache chain 5654 // before touching the page's wired count. 5655 cacheChainLocker.LockAllSourceCaches(); 5656 } 5657 5658 decrement_page_wired_count(info->page); 5659 5660 // remove the wired range from the range 5661 area->Unwire(&info->range); 5662 5663 cacheChainLocker.Unlock(); 5664 } 5665 5666 5667 /*! Wires down the given address range in the specified team's address space. 5668 5669 If successful the function 5670 - acquires a reference to the specified team's address space, 5671 - adds respective wired ranges to all areas that intersect with the given 5672 address range, 5673 - makes sure all pages in the given address range are mapped with the 5674 requested access permissions and increments their wired count. 5675 5676 It fails, when \a team doesn't specify a valid address space, when any part 5677 of the specified address range is not covered by areas, when the concerned 5678 areas don't allow mapping with the requested permissions, or when mapping 5679 failed for another reason. 5680 5681 When successful the call must be balanced by a unlock_memory_etc() call with 5682 the exact same parameters. 5683 5684 \param team Identifies the address (via team ID). \c B_CURRENT_TEAM is 5685 supported. 5686 \param address The start of the address range to be wired. 5687 \param numBytes The size of the address range to be wired. 5688 \param flags Flags. Currently only \c B_READ_DEVICE is defined, which 5689 requests that the range must be wired writable ("read from device 5690 into memory"). 5691 \return \c B_OK on success, another error code otherwise. 5692 */ 5693 status_t 5694 lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5695 { 5696 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5697 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5698 5699 // compute the page protection that is required 5700 bool isUser = IS_USER_ADDRESS(address); 5701 bool writable = (flags & B_READ_DEVICE) == 0; 5702 uint32 requiredProtection = PAGE_PRESENT 5703 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5704 if (writable) 5705 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5706 5707 uint32 mallocFlags = isUser 5708 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5709 5710 // get and read lock the address space 5711 VMAddressSpace* addressSpace = NULL; 5712 if (isUser) { 5713 if (team == B_CURRENT_TEAM) 5714 addressSpace = VMAddressSpace::GetCurrent(); 5715 else 5716 addressSpace = VMAddressSpace::Get(team); 5717 } else 5718 addressSpace = VMAddressSpace::GetKernel(); 5719 if (addressSpace == NULL) 5720 return B_ERROR; 5721 5722 AddressSpaceReadLocker addressSpaceLocker(addressSpace, true); 5723 // We get a new address space reference here. The one we got above will 5724 // be freed by unlock_memory_etc(). 5725 5726 VMTranslationMap* map = addressSpace->TranslationMap(); 5727 status_t error = B_OK; 5728 5729 // iterate through all concerned areas 5730 addr_t nextAddress = lockBaseAddress; 5731 while (nextAddress != lockEndAddress) { 5732 // get the next area 5733 VMArea* area = addressSpace->LookupArea(nextAddress); 5734 if (area == NULL) { 5735 error = B_BAD_ADDRESS; 5736 break; 5737 } 5738 5739 addr_t areaStart = nextAddress; 5740 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5741 5742 // allocate the wired range (do that before locking the cache to avoid 5743 // deadlocks) 5744 VMAreaWiredRange* range = new(malloc_flags(mallocFlags)) 5745 VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true); 5746 if (range == NULL) { 5747 error = B_NO_MEMORY; 5748 break; 5749 } 5750 5751 // Lock the area's top cache. This is a requirement for VMArea::Wire(). 5752 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5753 5754 // mark the area range wired 5755 area->Wire(range); 5756 5757 // Depending on the area cache type and the wiring, we may not need to 5758 // look at the individual pages. 5759 if (area->cache_type == CACHE_TYPE_NULL 5760 || area->cache_type == CACHE_TYPE_DEVICE 5761 || area->wiring == B_FULL_LOCK 5762 || area->wiring == B_CONTIGUOUS) { 5763 nextAddress = areaEnd; 5764 continue; 5765 } 5766 5767 // Lock the area's cache chain and the translation map. Needed to look 5768 // up pages and play with their wired count. 5769 cacheChainLocker.LockAllSourceCaches(); 5770 map->Lock(); 5771 5772 // iterate through the pages and wire them 5773 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5774 phys_addr_t physicalAddress; 5775 uint32 flags; 5776 5777 vm_page* page; 5778 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5779 && (flags & requiredProtection) == requiredProtection 5780 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5781 != NULL) { 5782 // Already mapped with the correct permissions -- just increment 5783 // the page's wired count. 5784 increment_page_wired_count(page); 5785 } else { 5786 // Let vm_soft_fault() map the page for us, if possible. We need 5787 // to fully unlock to avoid deadlocks. Since we have already 5788 // wired the area itself, nothing disturbing will happen with it 5789 // in the meantime. 5790 map->Unlock(); 5791 cacheChainLocker.Unlock(); 5792 addressSpaceLocker.Unlock(); 5793 5794 error = vm_soft_fault(addressSpace, nextAddress, writable, 5795 false, isUser, &page); 5796 5797 addressSpaceLocker.Lock(); 5798 cacheChainLocker.SetTo(vm_area_get_locked_cache(area)); 5799 cacheChainLocker.LockAllSourceCaches(); 5800 map->Lock(); 5801 } 5802 5803 if (error != B_OK) 5804 break; 5805 } 5806 5807 map->Unlock(); 5808 5809 if (error == B_OK) { 5810 cacheChainLocker.Unlock(); 5811 } else { 5812 // An error occurred, so abort right here. If the current address 5813 // is the first in this area, unwire the area, since we won't get 5814 // to it when reverting what we've done so far. 5815 if (nextAddress == areaStart) { 5816 area->Unwire(range); 5817 cacheChainLocker.Unlock(); 5818 range->~VMAreaWiredRange(); 5819 free_etc(range, mallocFlags); 5820 } else 5821 cacheChainLocker.Unlock(); 5822 5823 break; 5824 } 5825 } 5826 5827 if (error != B_OK) { 5828 // An error occurred, so unwire all that we've already wired. Note that 5829 // even if not a single page was wired, unlock_memory_etc() is called 5830 // to put the address space reference. 5831 addressSpaceLocker.Unlock(); 5832 unlock_memory_etc(team, (void*)lockBaseAddress, 5833 nextAddress - lockBaseAddress, flags); 5834 } 5835 5836 return error; 5837 } 5838 5839 5840 status_t 5841 lock_memory(void* address, size_t numBytes, uint32 flags) 5842 { 5843 return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5844 } 5845 5846 5847 /*! Unwires an address range previously wired with lock_memory_etc(). 5848 5849 Note that a call to this function must balance a previous lock_memory_etc() 5850 call with exactly the same parameters. 5851 */ 5852 status_t 5853 unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags) 5854 { 5855 addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE); 5856 addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE); 5857 5858 // compute the page protection that is required 5859 bool isUser = IS_USER_ADDRESS(address); 5860 bool writable = (flags & B_READ_DEVICE) == 0; 5861 uint32 requiredProtection = PAGE_PRESENT 5862 | B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0); 5863 if (writable) 5864 requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0); 5865 5866 uint32 mallocFlags = isUser 5867 ? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE; 5868 5869 // get and read lock the address space 5870 VMAddressSpace* addressSpace = NULL; 5871 if (isUser) { 5872 if (team == B_CURRENT_TEAM) 5873 addressSpace = VMAddressSpace::GetCurrent(); 5874 else 5875 addressSpace = VMAddressSpace::Get(team); 5876 } else 5877 addressSpace = VMAddressSpace::GetKernel(); 5878 if (addressSpace == NULL) 5879 return B_ERROR; 5880 5881 AddressSpaceReadLocker addressSpaceLocker(addressSpace, false); 5882 // Take over the address space reference. We don't unlock until we're 5883 // done. 5884 5885 VMTranslationMap* map = addressSpace->TranslationMap(); 5886 status_t error = B_OK; 5887 5888 // iterate through all concerned areas 5889 addr_t nextAddress = lockBaseAddress; 5890 while (nextAddress != lockEndAddress) { 5891 // get the next area 5892 VMArea* area = addressSpace->LookupArea(nextAddress); 5893 if (area == NULL) { 5894 error = B_BAD_ADDRESS; 5895 break; 5896 } 5897 5898 addr_t areaStart = nextAddress; 5899 addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size()); 5900 5901 // Lock the area's top cache. This is a requirement for 5902 // VMArea::Unwire(). 5903 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 5904 5905 // Depending on the area cache type and the wiring, we may not need to 5906 // look at the individual pages. 5907 if (area->cache_type == CACHE_TYPE_NULL 5908 || area->cache_type == CACHE_TYPE_DEVICE 5909 || area->wiring == B_FULL_LOCK 5910 || area->wiring == B_CONTIGUOUS) { 5911 // unwire the range (to avoid deadlocks we delete the range after 5912 // unlocking the cache) 5913 nextAddress = areaEnd; 5914 VMAreaWiredRange* range = area->Unwire(areaStart, 5915 areaEnd - areaStart, writable); 5916 cacheChainLocker.Unlock(); 5917 if (range != NULL) { 5918 range->~VMAreaWiredRange(); 5919 free_etc(range, mallocFlags); 5920 } 5921 continue; 5922 } 5923 5924 // Lock the area's cache chain and the translation map. Needed to look 5925 // up pages and play with their wired count. 5926 cacheChainLocker.LockAllSourceCaches(); 5927 map->Lock(); 5928 5929 // iterate through the pages and unwire them 5930 for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) { 5931 phys_addr_t physicalAddress; 5932 uint32 flags; 5933 5934 vm_page* page; 5935 if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK 5936 && (flags & PAGE_PRESENT) != 0 5937 && (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE)) 5938 != NULL) { 5939 // Already mapped with the correct permissions -- just increment 5940 // the page's wired count. 5941 decrement_page_wired_count(page); 5942 } else { 5943 panic("unlock_memory_etc(): Failed to unwire page: address " 5944 "space %p, address: %#" B_PRIxADDR, addressSpace, 5945 nextAddress); 5946 error = B_BAD_VALUE; 5947 break; 5948 } 5949 } 5950 5951 map->Unlock(); 5952 5953 // All pages are unwired. Remove the area's wired range as well (to 5954 // avoid deadlocks we delete the range after unlocking the cache). 5955 VMAreaWiredRange* range = area->Unwire(areaStart, 5956 areaEnd - areaStart, writable); 5957 5958 cacheChainLocker.Unlock(); 5959 5960 if (range != NULL) { 5961 range->~VMAreaWiredRange(); 5962 free_etc(range, mallocFlags); 5963 } 5964 5965 if (error != B_OK) 5966 break; 5967 } 5968 5969 // get rid of the address space reference lock_memory_etc() acquired 5970 addressSpace->Put(); 5971 5972 return error; 5973 } 5974 5975 5976 status_t 5977 unlock_memory(void* address, size_t numBytes, uint32 flags) 5978 { 5979 return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags); 5980 } 5981 5982 5983 /*! Similar to get_memory_map(), but also allows to specify the address space 5984 for the memory in question and has a saner semantics. 5985 Returns \c B_OK when the complete range could be translated or 5986 \c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either 5987 case the actual number of entries is written to \c *_numEntries. Any other 5988 error case indicates complete failure; \c *_numEntries will be set to \c 0 5989 in this case. 5990 */ 5991 status_t 5992 get_memory_map_etc(team_id team, const void* address, size_t numBytes, 5993 physical_entry* table, uint32* _numEntries) 5994 { 5995 uint32 numEntries = *_numEntries; 5996 *_numEntries = 0; 5997 5998 VMAddressSpace* addressSpace; 5999 addr_t virtualAddress = (addr_t)address; 6000 addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1); 6001 phys_addr_t physicalAddress; 6002 status_t status = B_OK; 6003 int32 index = -1; 6004 addr_t offset = 0; 6005 bool interrupts = are_interrupts_enabled(); 6006 6007 TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " " 6008 "entries)\n", team, address, numBytes, numEntries)); 6009 6010 if (numEntries == 0 || numBytes == 0) 6011 return B_BAD_VALUE; 6012 6013 // in which address space is the address to be found? 6014 if (IS_USER_ADDRESS(virtualAddress)) { 6015 if (team == B_CURRENT_TEAM) 6016 addressSpace = VMAddressSpace::GetCurrent(); 6017 else 6018 addressSpace = VMAddressSpace::Get(team); 6019 } else 6020 addressSpace = VMAddressSpace::GetKernel(); 6021 6022 if (addressSpace == NULL) 6023 return B_ERROR; 6024 6025 VMTranslationMap* map = addressSpace->TranslationMap(); 6026 6027 if (interrupts) 6028 map->Lock(); 6029 6030 while (offset < numBytes) { 6031 addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE); 6032 uint32 flags; 6033 6034 if (interrupts) { 6035 status = map->Query((addr_t)address + offset, &physicalAddress, 6036 &flags); 6037 } else { 6038 status = map->QueryInterrupt((addr_t)address + offset, 6039 &physicalAddress, &flags); 6040 } 6041 if (status < B_OK) 6042 break; 6043 if ((flags & PAGE_PRESENT) == 0) { 6044 panic("get_memory_map() called on unmapped memory!"); 6045 return B_BAD_ADDRESS; 6046 } 6047 6048 if (index < 0 && pageOffset > 0) { 6049 physicalAddress += pageOffset; 6050 if (bytes > B_PAGE_SIZE - pageOffset) 6051 bytes = B_PAGE_SIZE - pageOffset; 6052 } 6053 6054 // need to switch to the next physical_entry? 6055 if (index < 0 || table[index].address 6056 != physicalAddress - table[index].size) { 6057 if ((uint32)++index + 1 > numEntries) { 6058 // table to small 6059 break; 6060 } 6061 table[index].address = physicalAddress; 6062 table[index].size = bytes; 6063 } else { 6064 // page does fit in current entry 6065 table[index].size += bytes; 6066 } 6067 6068 offset += bytes; 6069 } 6070 6071 if (interrupts) 6072 map->Unlock(); 6073 6074 if (status != B_OK) 6075 return status; 6076 6077 if ((uint32)index + 1 > numEntries) { 6078 *_numEntries = index; 6079 return B_BUFFER_OVERFLOW; 6080 } 6081 6082 *_numEntries = index + 1; 6083 return B_OK; 6084 } 6085 6086 6087 /*! According to the BeBook, this function should always succeed. 6088 This is no longer the case. 6089 */ 6090 extern "C" int32 6091 __get_memory_map_haiku(const void* address, size_t numBytes, 6092 physical_entry* table, int32 numEntries) 6093 { 6094 uint32 entriesRead = numEntries; 6095 status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes, 6096 table, &entriesRead); 6097 if (error != B_OK) 6098 return error; 6099 6100 // close the entry list 6101 6102 // if it's only one entry, we will silently accept the missing ending 6103 if (numEntries == 1) 6104 return B_OK; 6105 6106 if (entriesRead + 1 > (uint32)numEntries) 6107 return B_BUFFER_OVERFLOW; 6108 6109 table[entriesRead].address = 0; 6110 table[entriesRead].size = 0; 6111 6112 return B_OK; 6113 } 6114 6115 6116 area_id 6117 area_for(void* address) 6118 { 6119 return vm_area_for((addr_t)address, true); 6120 } 6121 6122 6123 area_id 6124 find_area(const char* name) 6125 { 6126 return VMAreaHash::Find(name); 6127 } 6128 6129 6130 status_t 6131 _get_area_info(area_id id, area_info* info, size_t size) 6132 { 6133 if (size != sizeof(area_info) || info == NULL) 6134 return B_BAD_VALUE; 6135 6136 AddressSpaceReadLocker locker; 6137 VMArea* area; 6138 status_t status = locker.SetFromArea(id, area); 6139 if (status != B_OK) 6140 return status; 6141 6142 fill_area_info(area, info, size); 6143 return B_OK; 6144 } 6145 6146 6147 status_t 6148 _get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size) 6149 { 6150 addr_t nextBase = *(addr_t*)cookie; 6151 6152 // we're already through the list 6153 if (nextBase == (addr_t)-1) 6154 return B_ENTRY_NOT_FOUND; 6155 6156 if (team == B_CURRENT_TEAM) 6157 team = team_get_current_team_id(); 6158 6159 AddressSpaceReadLocker locker(team); 6160 if (!locker.IsLocked()) 6161 return B_BAD_TEAM_ID; 6162 6163 VMArea* area = locker.AddressSpace()->FindClosestArea(nextBase, false); 6164 if (area == NULL) { 6165 nextBase = (addr_t)-1; 6166 return B_ENTRY_NOT_FOUND; 6167 } 6168 6169 fill_area_info(area, info, size); 6170 *cookie = (ssize_t)(area->Base() + 1); 6171 6172 return B_OK; 6173 } 6174 6175 6176 status_t 6177 set_area_protection(area_id area, uint32 newProtection) 6178 { 6179 return vm_set_area_protection(VMAddressSpace::KernelID(), area, 6180 newProtection, true); 6181 } 6182 6183 6184 status_t 6185 resize_area(area_id areaID, size_t newSize) 6186 { 6187 return vm_resize_area(areaID, newSize, true); 6188 } 6189 6190 6191 /*! Transfers the specified area to a new team. The caller must be the owner 6192 of the area. 6193 */ 6194 area_id 6195 transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target, 6196 bool kernel) 6197 { 6198 area_info info; 6199 status_t status = get_area_info(id, &info); 6200 if (status != B_OK) 6201 return status; 6202 6203 if (info.team != thread_get_current_thread()->team->id) 6204 return B_PERMISSION_DENIED; 6205 6206 // We need to mark the area cloneable so the following operations work. 6207 status = set_area_protection(id, info.protection | B_CLONEABLE_AREA); 6208 if (status != B_OK) 6209 return status; 6210 6211 area_id clonedArea = vm_clone_area(target, info.name, _address, 6212 addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel); 6213 if (clonedArea < 0) 6214 return clonedArea; 6215 6216 status = vm_delete_area(info.team, id, kernel); 6217 if (status != B_OK) { 6218 vm_delete_area(target, clonedArea, kernel); 6219 return status; 6220 } 6221 6222 // Now we can reset the protection to whatever it was before. 6223 set_area_protection(clonedArea, info.protection); 6224 6225 // TODO: The clonedArea is B_SHARED_AREA, which is not really desired. 6226 6227 return clonedArea; 6228 } 6229 6230 6231 extern "C" area_id 6232 __map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress, 6233 size_t numBytes, uint32 addressSpec, uint32 protection, 6234 void** _virtualAddress) 6235 { 6236 if (!arch_vm_supports_protection(protection)) 6237 return B_NOT_SUPPORTED; 6238 6239 fix_protection(&protection); 6240 6241 return vm_map_physical_memory(VMAddressSpace::KernelID(), name, 6242 _virtualAddress, addressSpec, numBytes, protection, physicalAddress, 6243 false); 6244 } 6245 6246 6247 area_id 6248 clone_area(const char* name, void** _address, uint32 addressSpec, 6249 uint32 protection, area_id source) 6250 { 6251 if ((protection & B_KERNEL_PROTECTION) == 0) 6252 protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA; 6253 6254 return vm_clone_area(VMAddressSpace::KernelID(), name, _address, 6255 addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true); 6256 } 6257 6258 6259 area_id 6260 create_area_etc(team_id team, const char* name, size_t size, uint32 lock, 6261 uint32 protection, uint32 flags, uint32 guardSize, 6262 const virtual_address_restrictions* virtualAddressRestrictions, 6263 const physical_address_restrictions* physicalAddressRestrictions, 6264 void** _address) 6265 { 6266 fix_protection(&protection); 6267 6268 return vm_create_anonymous_area(team, name, size, lock, protection, flags, 6269 guardSize, virtualAddressRestrictions, physicalAddressRestrictions, 6270 true, _address); 6271 } 6272 6273 6274 extern "C" area_id 6275 __create_area_haiku(const char* name, void** _address, uint32 addressSpec, 6276 size_t size, uint32 lock, uint32 protection) 6277 { 6278 fix_protection(&protection); 6279 6280 virtual_address_restrictions virtualRestrictions = {}; 6281 virtualRestrictions.address = *_address; 6282 virtualRestrictions.address_specification = addressSpec; 6283 physical_address_restrictions physicalRestrictions = {}; 6284 return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size, 6285 lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions, 6286 true, _address); 6287 } 6288 6289 6290 status_t 6291 delete_area(area_id area) 6292 { 6293 return vm_delete_area(VMAddressSpace::KernelID(), area, true); 6294 } 6295 6296 6297 // #pragma mark - Userland syscalls 6298 6299 6300 status_t 6301 _user_reserve_address_range(addr_t* userAddress, uint32 addressSpec, 6302 addr_t size) 6303 { 6304 // filter out some unavailable values (for userland) 6305 switch (addressSpec) { 6306 case B_ANY_KERNEL_ADDRESS: 6307 case B_ANY_KERNEL_BLOCK_ADDRESS: 6308 return B_BAD_VALUE; 6309 } 6310 6311 addr_t address; 6312 6313 if (!IS_USER_ADDRESS(userAddress) 6314 || user_memcpy(&address, userAddress, sizeof(address)) != B_OK) 6315 return B_BAD_ADDRESS; 6316 6317 status_t status = vm_reserve_address_range( 6318 VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size, 6319 RESERVED_AVOID_BASE); 6320 if (status != B_OK) 6321 return status; 6322 6323 if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) { 6324 vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6325 (void*)address, size); 6326 return B_BAD_ADDRESS; 6327 } 6328 6329 return B_OK; 6330 } 6331 6332 6333 status_t 6334 _user_unreserve_address_range(addr_t address, addr_t size) 6335 { 6336 return vm_unreserve_address_range(VMAddressSpace::CurrentID(), 6337 (void*)address, size); 6338 } 6339 6340 6341 area_id 6342 _user_area_for(void* address) 6343 { 6344 return vm_area_for((addr_t)address, false); 6345 } 6346 6347 6348 area_id 6349 _user_find_area(const char* userName) 6350 { 6351 char name[B_OS_NAME_LENGTH]; 6352 6353 if (!IS_USER_ADDRESS(userName) 6354 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK) 6355 return B_BAD_ADDRESS; 6356 6357 return find_area(name); 6358 } 6359 6360 6361 status_t 6362 _user_get_area_info(area_id area, area_info* userInfo) 6363 { 6364 if (!IS_USER_ADDRESS(userInfo)) 6365 return B_BAD_ADDRESS; 6366 6367 area_info info; 6368 status_t status = get_area_info(area, &info); 6369 if (status < B_OK) 6370 return status; 6371 6372 // TODO: do we want to prevent userland from seeing kernel protections? 6373 //info.protection &= B_USER_PROTECTION; 6374 6375 if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6376 return B_BAD_ADDRESS; 6377 6378 return status; 6379 } 6380 6381 6382 status_t 6383 _user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo) 6384 { 6385 ssize_t cookie; 6386 6387 if (!IS_USER_ADDRESS(userCookie) 6388 || !IS_USER_ADDRESS(userInfo) 6389 || user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK) 6390 return B_BAD_ADDRESS; 6391 6392 area_info info; 6393 status_t status = _get_next_area_info(team, &cookie, &info, 6394 sizeof(area_info)); 6395 if (status != B_OK) 6396 return status; 6397 6398 //info.protection &= B_USER_PROTECTION; 6399 6400 if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK 6401 || user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK) 6402 return B_BAD_ADDRESS; 6403 6404 return status; 6405 } 6406 6407 6408 status_t 6409 _user_set_area_protection(area_id area, uint32 newProtection) 6410 { 6411 if ((newProtection & ~B_USER_PROTECTION) != 0) 6412 return B_BAD_VALUE; 6413 6414 return vm_set_area_protection(VMAddressSpace::CurrentID(), area, 6415 newProtection, false); 6416 } 6417 6418 6419 status_t 6420 _user_resize_area(area_id area, size_t newSize) 6421 { 6422 // TODO: Since we restrict deleting of areas to those owned by the team, 6423 // we should also do that for resizing (check other functions, too). 6424 return vm_resize_area(area, newSize, false); 6425 } 6426 6427 6428 area_id 6429 _user_transfer_area(area_id area, void** userAddress, uint32 addressSpec, 6430 team_id target) 6431 { 6432 // filter out some unavailable values (for userland) 6433 switch (addressSpec) { 6434 case B_ANY_KERNEL_ADDRESS: 6435 case B_ANY_KERNEL_BLOCK_ADDRESS: 6436 return B_BAD_VALUE; 6437 } 6438 6439 void* address; 6440 if (!IS_USER_ADDRESS(userAddress) 6441 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6442 return B_BAD_ADDRESS; 6443 6444 area_id newArea = transfer_area(area, &address, addressSpec, target, false); 6445 if (newArea < B_OK) 6446 return newArea; 6447 6448 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6449 return B_BAD_ADDRESS; 6450 6451 return newArea; 6452 } 6453 6454 6455 area_id 6456 _user_clone_area(const char* userName, void** userAddress, uint32 addressSpec, 6457 uint32 protection, area_id sourceArea) 6458 { 6459 char name[B_OS_NAME_LENGTH]; 6460 void* address; 6461 6462 // filter out some unavailable values (for userland) 6463 switch (addressSpec) { 6464 case B_ANY_KERNEL_ADDRESS: 6465 case B_ANY_KERNEL_BLOCK_ADDRESS: 6466 return B_BAD_VALUE; 6467 } 6468 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6469 return B_BAD_VALUE; 6470 6471 if (!IS_USER_ADDRESS(userName) 6472 || !IS_USER_ADDRESS(userAddress) 6473 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6474 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6475 return B_BAD_ADDRESS; 6476 6477 fix_protection(&protection); 6478 6479 area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name, 6480 &address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea, 6481 false); 6482 if (clonedArea < B_OK) 6483 return clonedArea; 6484 6485 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6486 delete_area(clonedArea); 6487 return B_BAD_ADDRESS; 6488 } 6489 6490 return clonedArea; 6491 } 6492 6493 6494 area_id 6495 _user_create_area(const char* userName, void** userAddress, uint32 addressSpec, 6496 size_t size, uint32 lock, uint32 protection) 6497 { 6498 char name[B_OS_NAME_LENGTH]; 6499 void* address; 6500 6501 // filter out some unavailable values (for userland) 6502 switch (addressSpec) { 6503 case B_ANY_KERNEL_ADDRESS: 6504 case B_ANY_KERNEL_BLOCK_ADDRESS: 6505 return B_BAD_VALUE; 6506 } 6507 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6508 return B_BAD_VALUE; 6509 6510 if (!IS_USER_ADDRESS(userName) 6511 || !IS_USER_ADDRESS(userAddress) 6512 || user_strlcpy(name, userName, sizeof(name)) < B_OK 6513 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6514 return B_BAD_ADDRESS; 6515 6516 if (addressSpec == B_EXACT_ADDRESS 6517 && IS_KERNEL_ADDRESS(address)) 6518 return B_BAD_VALUE; 6519 6520 if (addressSpec == B_ANY_ADDRESS) 6521 addressSpec = B_RANDOMIZED_ANY_ADDRESS; 6522 if (addressSpec == B_BASE_ADDRESS) 6523 addressSpec = B_RANDOMIZED_BASE_ADDRESS; 6524 6525 fix_protection(&protection); 6526 6527 virtual_address_restrictions virtualRestrictions = {}; 6528 virtualRestrictions.address = address; 6529 virtualRestrictions.address_specification = addressSpec; 6530 physical_address_restrictions physicalRestrictions = {}; 6531 area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name, 6532 size, lock, protection, 0, 0, &virtualRestrictions, 6533 &physicalRestrictions, false, &address); 6534 6535 if (area >= B_OK 6536 && user_memcpy(userAddress, &address, sizeof(address)) < B_OK) { 6537 delete_area(area); 6538 return B_BAD_ADDRESS; 6539 } 6540 6541 return area; 6542 } 6543 6544 6545 status_t 6546 _user_delete_area(area_id area) 6547 { 6548 // Unlike the BeOS implementation, you can now only delete areas 6549 // that you have created yourself from userland. 6550 // The documentation to delete_area() explicitly states that this 6551 // will be restricted in the future, and so it will. 6552 return vm_delete_area(VMAddressSpace::CurrentID(), area, false); 6553 } 6554 6555 6556 // TODO: create a BeOS style call for this! 6557 6558 area_id 6559 _user_map_file(const char* userName, void** userAddress, uint32 addressSpec, 6560 size_t size, uint32 protection, uint32 mapping, bool unmapAddressRange, 6561 int fd, off_t offset) 6562 { 6563 char name[B_OS_NAME_LENGTH]; 6564 void* address; 6565 area_id area; 6566 6567 if ((protection & ~B_USER_AREA_FLAGS) != 0) 6568 return B_BAD_VALUE; 6569 6570 fix_protection(&protection); 6571 6572 if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userAddress) 6573 || user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK 6574 || user_memcpy(&address, userAddress, sizeof(address)) < B_OK) 6575 return B_BAD_ADDRESS; 6576 6577 if (addressSpec == B_EXACT_ADDRESS) { 6578 if ((addr_t)address + size < (addr_t)address 6579 || (addr_t)address % B_PAGE_SIZE != 0) { 6580 return B_BAD_VALUE; 6581 } 6582 if (!IS_USER_ADDRESS(address) 6583 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6584 return B_BAD_ADDRESS; 6585 } 6586 } 6587 6588 area = _vm_map_file(VMAddressSpace::CurrentID(), name, &address, 6589 addressSpec, size, protection, mapping, unmapAddressRange, fd, offset, 6590 false); 6591 if (area < B_OK) 6592 return area; 6593 6594 if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) 6595 return B_BAD_ADDRESS; 6596 6597 return area; 6598 } 6599 6600 6601 status_t 6602 _user_unmap_memory(void* _address, size_t size) 6603 { 6604 addr_t address = (addr_t)_address; 6605 6606 // check params 6607 if (size == 0 || (addr_t)address + size < (addr_t)address 6608 || (addr_t)address % B_PAGE_SIZE != 0) { 6609 return B_BAD_VALUE; 6610 } 6611 6612 if (!IS_USER_ADDRESS(address) 6613 || !IS_USER_ADDRESS((addr_t)address + size - 1)) { 6614 return B_BAD_ADDRESS; 6615 } 6616 6617 // Write lock the address space and ensure the address range is not wired. 6618 AddressSpaceWriteLocker locker; 6619 do { 6620 status_t status = locker.SetTo(team_get_current_team_id()); 6621 if (status != B_OK) 6622 return status; 6623 } while (wait_if_address_range_is_wired(locker.AddressSpace(), address, 6624 size, &locker)); 6625 6626 // unmap 6627 return unmap_address_range(locker.AddressSpace(), address, size, false); 6628 } 6629 6630 6631 status_t 6632 _user_set_memory_protection(void* _address, size_t size, uint32 protection) 6633 { 6634 // check address range 6635 addr_t address = (addr_t)_address; 6636 size = PAGE_ALIGN(size); 6637 6638 if ((address % B_PAGE_SIZE) != 0) 6639 return B_BAD_VALUE; 6640 if (!is_user_address_range(_address, size)) { 6641 // weird error code required by POSIX 6642 return ENOMEM; 6643 } 6644 6645 // extend and check protection 6646 if ((protection & ~B_USER_PROTECTION) != 0) 6647 return B_BAD_VALUE; 6648 6649 fix_protection(&protection); 6650 6651 // We need to write lock the address space, since we're going to play with 6652 // the areas. Also make sure that none of the areas is wired and that we're 6653 // actually allowed to change the protection. 6654 AddressSpaceWriteLocker locker; 6655 6656 bool restart; 6657 do { 6658 restart = false; 6659 6660 status_t status = locker.SetTo(team_get_current_team_id()); 6661 if (status != B_OK) 6662 return status; 6663 6664 // First round: Check whether the whole range is covered by areas and we 6665 // are allowed to modify them. 6666 addr_t currentAddress = address; 6667 size_t sizeLeft = size; 6668 while (sizeLeft > 0) { 6669 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6670 if (area == NULL) 6671 return B_NO_MEMORY; 6672 6673 if ((area->protection & B_KERNEL_AREA) != 0) 6674 return B_NOT_ALLOWED; 6675 if (area->protection_max != 0 6676 && (protection & area->protection_max) != (protection & B_USER_PROTECTION)) { 6677 return B_NOT_ALLOWED; 6678 } 6679 6680 addr_t offset = currentAddress - area->Base(); 6681 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6682 6683 AreaCacheLocker cacheLocker(area); 6684 6685 if (wait_if_area_range_is_wired(area, currentAddress, rangeSize, 6686 &locker, &cacheLocker)) { 6687 restart = true; 6688 break; 6689 } 6690 6691 cacheLocker.Unlock(); 6692 6693 currentAddress += rangeSize; 6694 sizeLeft -= rangeSize; 6695 } 6696 } while (restart); 6697 6698 // Second round: If the protections differ from that of the area, create a 6699 // page protection array and re-map mapped pages. 6700 VMTranslationMap* map = locker.AddressSpace()->TranslationMap(); 6701 addr_t currentAddress = address; 6702 size_t sizeLeft = size; 6703 while (sizeLeft > 0) { 6704 VMArea* area = locker.AddressSpace()->LookupArea(currentAddress); 6705 if (area == NULL) 6706 return B_NO_MEMORY; 6707 6708 addr_t offset = currentAddress - area->Base(); 6709 size_t rangeSize = min_c(area->Size() - offset, sizeLeft); 6710 6711 currentAddress += rangeSize; 6712 sizeLeft -= rangeSize; 6713 6714 if (area->page_protections == NULL) { 6715 if (area->protection == protection) 6716 continue; 6717 if (offset == 0 && rangeSize == area->Size()) { 6718 status_t status = vm_set_area_protection(area->address_space->ID(), 6719 area->id, protection, false); 6720 if (status != B_OK) 6721 return status; 6722 continue; 6723 } 6724 6725 status_t status = allocate_area_page_protections(area); 6726 if (status != B_OK) 6727 return status; 6728 } 6729 6730 // We need to lock the complete cache chain, since we potentially unmap 6731 // pages of lower caches. 6732 VMCache* topCache = vm_area_get_locked_cache(area); 6733 VMCacheChainLocker cacheChainLocker(topCache); 6734 cacheChainLocker.LockAllSourceCaches(); 6735 6736 for (addr_t pageAddress = area->Base() + offset; 6737 pageAddress < currentAddress; pageAddress += B_PAGE_SIZE) { 6738 map->Lock(); 6739 6740 set_area_page_protection(area, pageAddress, protection); 6741 6742 phys_addr_t physicalAddress; 6743 uint32 flags; 6744 6745 status_t error = map->Query(pageAddress, &physicalAddress, &flags); 6746 if (error != B_OK || (flags & PAGE_PRESENT) == 0) { 6747 map->Unlock(); 6748 continue; 6749 } 6750 6751 vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE); 6752 if (page == NULL) { 6753 panic("area %p looking up page failed for pa %#" B_PRIxPHYSADDR 6754 "\n", area, physicalAddress); 6755 map->Unlock(); 6756 return B_ERROR; 6757 } 6758 6759 // If the page is not in the topmost cache and write access is 6760 // requested, we have to unmap it. Otherwise we can re-map it with 6761 // the new protection. 6762 bool unmapPage = page->Cache() != topCache 6763 && (protection & B_WRITE_AREA) != 0; 6764 6765 if (!unmapPage) 6766 map->ProtectPage(area, pageAddress, protection); 6767 6768 map->Unlock(); 6769 6770 if (unmapPage) { 6771 DEBUG_PAGE_ACCESS_START(page); 6772 unmap_page(area, pageAddress); 6773 DEBUG_PAGE_ACCESS_END(page); 6774 } 6775 } 6776 } 6777 6778 return B_OK; 6779 } 6780 6781 6782 status_t 6783 _user_sync_memory(void* _address, size_t size, uint32 flags) 6784 { 6785 addr_t address = (addr_t)_address; 6786 size = PAGE_ALIGN(size); 6787 6788 // check params 6789 if ((address % B_PAGE_SIZE) != 0) 6790 return B_BAD_VALUE; 6791 if (!is_user_address_range(_address, size)) { 6792 // weird error code required by POSIX 6793 return ENOMEM; 6794 } 6795 6796 bool writeSync = (flags & MS_SYNC) != 0; 6797 bool writeAsync = (flags & MS_ASYNC) != 0; 6798 if (writeSync && writeAsync) 6799 return B_BAD_VALUE; 6800 6801 if (size == 0 || (!writeSync && !writeAsync)) 6802 return B_OK; 6803 6804 // iterate through the range and sync all concerned areas 6805 while (size > 0) { 6806 // read lock the address space 6807 AddressSpaceReadLocker locker; 6808 status_t error = locker.SetTo(team_get_current_team_id()); 6809 if (error != B_OK) 6810 return error; 6811 6812 // get the first area 6813 VMArea* area = locker.AddressSpace()->LookupArea(address); 6814 if (area == NULL) 6815 return B_NO_MEMORY; 6816 6817 uint32 offset = address - area->Base(); 6818 size_t rangeSize = min_c(area->Size() - offset, size); 6819 offset += area->cache_offset; 6820 6821 // lock the cache 6822 AreaCacheLocker cacheLocker(area); 6823 if (!cacheLocker) 6824 return B_BAD_VALUE; 6825 VMCache* cache = area->cache; 6826 6827 locker.Unlock(); 6828 6829 uint32 firstPage = offset >> PAGE_SHIFT; 6830 uint32 endPage = firstPage + (rangeSize >> PAGE_SHIFT); 6831 6832 // write the pages 6833 if (cache->type == CACHE_TYPE_VNODE) { 6834 if (writeSync) { 6835 // synchronous 6836 error = vm_page_write_modified_page_range(cache, firstPage, 6837 endPage); 6838 if (error != B_OK) 6839 return error; 6840 } else { 6841 // asynchronous 6842 vm_page_schedule_write_page_range(cache, firstPage, endPage); 6843 // TODO: This is probably not quite what is supposed to happen. 6844 // Especially when a lot has to be written, it might take ages 6845 // until it really hits the disk. 6846 } 6847 } 6848 6849 address += rangeSize; 6850 size -= rangeSize; 6851 } 6852 6853 // NOTE: If I understand it correctly the purpose of MS_INVALIDATE is to 6854 // synchronize multiple mappings of the same file. In our VM they never get 6855 // out of sync, though, so we don't have to do anything. 6856 6857 return B_OK; 6858 } 6859 6860 6861 status_t 6862 _user_memory_advice(void* _address, size_t size, uint32 advice) 6863 { 6864 addr_t address = (addr_t)_address; 6865 if ((address % B_PAGE_SIZE) != 0) 6866 return B_BAD_VALUE; 6867 6868 size = PAGE_ALIGN(size); 6869 if (!is_user_address_range(_address, size)) { 6870 // weird error code required by POSIX 6871 return B_NO_MEMORY; 6872 } 6873 6874 switch (advice) { 6875 case MADV_NORMAL: 6876 case MADV_SEQUENTIAL: 6877 case MADV_RANDOM: 6878 case MADV_WILLNEED: 6879 case MADV_DONTNEED: 6880 // TODO: Implement! 6881 break; 6882 6883 case MADV_FREE: 6884 { 6885 AddressSpaceWriteLocker locker; 6886 do { 6887 status_t status = locker.SetTo(team_get_current_team_id()); 6888 if (status != B_OK) 6889 return status; 6890 } while (wait_if_address_range_is_wired(locker.AddressSpace(), 6891 address, size, &locker)); 6892 6893 discard_address_range(locker.AddressSpace(), address, size, false); 6894 break; 6895 } 6896 6897 default: 6898 return B_BAD_VALUE; 6899 } 6900 6901 return B_OK; 6902 } 6903 6904 6905 status_t 6906 _user_get_memory_properties(team_id teamID, const void* address, 6907 uint32* _protected, uint32* _lock) 6908 { 6909 if (!IS_USER_ADDRESS(_protected) || !IS_USER_ADDRESS(_lock)) 6910 return B_BAD_ADDRESS; 6911 6912 AddressSpaceReadLocker locker; 6913 status_t error = locker.SetTo(teamID); 6914 if (error != B_OK) 6915 return error; 6916 6917 VMArea* area = locker.AddressSpace()->LookupArea((addr_t)address); 6918 if (area == NULL) 6919 return B_NO_MEMORY; 6920 6921 uint32 protection = get_area_page_protection(area, (addr_t)address); 6922 uint32 wiring = area->wiring; 6923 6924 locker.Unlock(); 6925 6926 error = user_memcpy(_protected, &protection, sizeof(protection)); 6927 if (error != B_OK) 6928 return error; 6929 6930 error = user_memcpy(_lock, &wiring, sizeof(wiring)); 6931 6932 return error; 6933 } 6934 6935 6936 static status_t 6937 user_set_memory_swappable(const void* _address, size_t size, bool swappable) 6938 { 6939 #if ENABLE_SWAP_SUPPORT 6940 // check address range 6941 addr_t address = (addr_t)_address; 6942 size = PAGE_ALIGN(size); 6943 6944 if ((address % B_PAGE_SIZE) != 0) 6945 return EINVAL; 6946 if (!is_user_address_range(_address, size)) 6947 return EINVAL; 6948 6949 const addr_t endAddress = address + size; 6950 6951 AddressSpaceReadLocker addressSpaceLocker; 6952 status_t error = addressSpaceLocker.SetTo(team_get_current_team_id()); 6953 if (error != B_OK) 6954 return error; 6955 VMAddressSpace* addressSpace = addressSpaceLocker.AddressSpace(); 6956 6957 // iterate through all concerned areas 6958 addr_t nextAddress = address; 6959 while (nextAddress != endAddress) { 6960 // get the next area 6961 VMArea* area = addressSpace->LookupArea(nextAddress); 6962 if (area == NULL) { 6963 error = B_BAD_ADDRESS; 6964 break; 6965 } 6966 6967 const addr_t areaStart = nextAddress; 6968 const addr_t areaEnd = std::min(endAddress, area->Base() + area->Size()); 6969 nextAddress = areaEnd; 6970 6971 error = lock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6972 if (error != B_OK) { 6973 // We don't need to unset or reset things on failure. 6974 break; 6975 } 6976 6977 VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area)); 6978 VMAnonymousCache* anonCache = NULL; 6979 if (dynamic_cast<VMAnonymousNoSwapCache*>(area->cache) != NULL) { 6980 // This memory will aready never be swapped. Nothing to do. 6981 } else if ((anonCache = dynamic_cast<VMAnonymousCache*>(area->cache)) != NULL) { 6982 error = anonCache->SetCanSwapPages(areaStart - area->Base(), 6983 areaEnd - areaStart, swappable); 6984 } else { 6985 // Some other cache type? We cannot affect anything here. 6986 error = EINVAL; 6987 } 6988 6989 cacheChainLocker.Unlock(); 6990 6991 unlock_memory_etc(addressSpace->ID(), (void*)areaStart, areaEnd - areaStart, 0); 6992 if (error != B_OK) 6993 break; 6994 } 6995 6996 return error; 6997 #else 6998 // No swap support? Nothing to do. 6999 return B_OK; 7000 #endif 7001 } 7002 7003 7004 status_t 7005 _user_mlock(const void* _address, size_t size) 7006 { 7007 return user_set_memory_swappable(_address, size, false); 7008 } 7009 7010 7011 status_t 7012 _user_munlock(const void* _address, size_t size) 7013 { 7014 // TODO: B_SHARED_AREAs need to be handled a bit differently: 7015 // if multiple clones of an area had mlock() called on them, 7016 // munlock() must also be called on all of them to actually unlock. 7017 // (At present, the first munlock() will unlock all.) 7018 // TODO: fork() should automatically unlock memory in the child. 7019 return user_set_memory_swappable(_address, size, true); 7020 } 7021 7022 7023 // #pragma mark -- compatibility 7024 7025 7026 #if defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7027 7028 7029 struct physical_entry_beos { 7030 uint32 address; 7031 uint32 size; 7032 }; 7033 7034 7035 /*! The physical_entry structure has changed. We need to translate it to the 7036 old one. 7037 */ 7038 extern "C" int32 7039 __get_memory_map_beos(const void* _address, size_t numBytes, 7040 physical_entry_beos* table, int32 numEntries) 7041 { 7042 if (numEntries <= 0) 7043 return B_BAD_VALUE; 7044 7045 const uint8* address = (const uint8*)_address; 7046 7047 int32 count = 0; 7048 while (numBytes > 0 && count < numEntries) { 7049 physical_entry entry; 7050 status_t result = __get_memory_map_haiku(address, numBytes, &entry, 1); 7051 if (result < 0) { 7052 if (result != B_BUFFER_OVERFLOW) 7053 return result; 7054 } 7055 7056 if (entry.address >= (phys_addr_t)1 << 32) { 7057 panic("get_memory_map(): Address is greater 4 GB!"); 7058 return B_ERROR; 7059 } 7060 7061 table[count].address = entry.address; 7062 table[count++].size = entry.size; 7063 7064 address += entry.size; 7065 numBytes -= entry.size; 7066 } 7067 7068 // null-terminate the table, if possible 7069 if (count < numEntries) { 7070 table[count].address = 0; 7071 table[count].size = 0; 7072 } 7073 7074 return B_OK; 7075 } 7076 7077 7078 /*! The type of the \a physicalAddress parameter has changed from void* to 7079 phys_addr_t. 7080 */ 7081 extern "C" area_id 7082 __map_physical_memory_beos(const char* name, void* physicalAddress, 7083 size_t numBytes, uint32 addressSpec, uint32 protection, 7084 void** _virtualAddress) 7085 { 7086 return __map_physical_memory_haiku(name, (addr_t)physicalAddress, numBytes, 7087 addressSpec, protection, _virtualAddress); 7088 } 7089 7090 7091 /*! The caller might not be able to deal with physical addresses >= 4 GB, so 7092 we meddle with the \a lock parameter to force 32 bit. 7093 */ 7094 extern "C" area_id 7095 __create_area_beos(const char* name, void** _address, uint32 addressSpec, 7096 size_t size, uint32 lock, uint32 protection) 7097 { 7098 switch (lock) { 7099 case B_NO_LOCK: 7100 break; 7101 case B_FULL_LOCK: 7102 case B_LAZY_LOCK: 7103 lock = B_32_BIT_FULL_LOCK; 7104 break; 7105 case B_CONTIGUOUS: 7106 lock = B_32_BIT_CONTIGUOUS; 7107 break; 7108 } 7109 7110 return __create_area_haiku(name, _address, addressSpec, size, lock, 7111 protection); 7112 } 7113 7114 7115 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_beos", "get_memory_map@", 7116 "BASE"); 7117 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_beos", 7118 "map_physical_memory@", "BASE"); 7119 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_beos", "create_area@", 7120 "BASE"); 7121 7122 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7123 "get_memory_map@@", "1_ALPHA3"); 7124 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7125 "map_physical_memory@@", "1_ALPHA3"); 7126 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7127 "1_ALPHA3"); 7128 7129 7130 #else 7131 7132 7133 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__get_memory_map_haiku", 7134 "get_memory_map@@", "BASE"); 7135 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__map_physical_memory_haiku", 7136 "map_physical_memory@@", "BASE"); 7137 DEFINE_LIBROOT_KERNEL_SYMBOL_VERSION("__create_area_haiku", "create_area@@", 7138 "BASE"); 7139 7140 7141 #endif // defined(__i386__) && B_HAIKU_PHYSICAL_BITS > 32 7142